Yeachan-Heo 4bae5ee132 Improve CLI visibility into runtime usage and compaction
This adds token and estimated cost reporting to runtime usage tracking and surfaces it in the CLI status and turn output. It also upgrades compaction summaries so users see a clearer resumable summary and token savings after /compact.

The verification path required cleaning existing workspace clippy and test friction in adjacent crates so cargo fmt, cargo clippy -D warnings, and cargo test succeed from the Rust workspace root in this repo state.

Constraint: Keep the change incremental and user-visible without a large CLI rewrite

Constraint: Verification must pass with cargo fmt, cargo clippy --all-targets --all-features -- -D warnings, and cargo test

Rejected: Implement a full model-pricing table now | would add more surface area than needed for this first UX slice

Confidence: high

Scope-risk: moderate

Reversibility: clean

Directive: If pricing becomes model-specific later, keep the current estimate labeling explicit rather than implying exact billing

Tested: cargo fmt; cargo clippy --all-targets --all-features -- -D warnings; cargo test -q

Not-tested: Live Anthropic API interaction and real streaming terminal sessions
2026-03-31 19:18:56 +00:00

771 lines
26 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

mod input;
mod render;
use std::env;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use api::{
AnthropicClient, ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest,
MessageResponse, OutputContentBlock, StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition,
ToolResultContentBlock,
};
use commands::handle_slash_command;
use compat_harness::{extract_manifest, UpstreamPaths};
use render::{Spinner, TerminalRenderer};
use runtime::{
estimate_session_tokens, load_system_prompt, ApiClient, ApiRequest, AssistantEvent,
CompactionConfig, ContentBlock, ConversationMessage, ConversationRuntime, MessageRole,
PermissionMode, PermissionPolicy, RuntimeError, Session, TokenUsage, ToolError, ToolExecutor,
};
use tools::{execute_tool, mvp_tool_specs};
const DEFAULT_MODEL: &str = "claude-sonnet-4-20250514";
const DEFAULT_MAX_TOKENS: u32 = 32;
const DEFAULT_DATE: &str = "2026-03-31";
fn main() {
if let Err(error) = run() {
eprintln!("{error}");
std::process::exit(1);
}
}
fn run() -> Result<(), Box<dyn std::error::Error>> {
let args: Vec<String> = env::args().skip(1).collect();
match parse_args(&args)? {
CliAction::DumpManifests => dump_manifests(),
CliAction::BootstrapPlan => print_bootstrap_plan(),
CliAction::PrintSystemPrompt { cwd, date } => print_system_prompt(cwd, date),
CliAction::ResumeSession {
session_path,
command,
} => resume_session(&session_path, command),
CliAction::Prompt { prompt, model } => LiveCli::new(model, false)?.run_turn(&prompt)?,
CliAction::Repl { model } => run_repl(model)?,
CliAction::Help => print_help(),
}
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum CliAction {
DumpManifests,
BootstrapPlan,
PrintSystemPrompt {
cwd: PathBuf,
date: String,
},
ResumeSession {
session_path: PathBuf,
command: Option<String>,
},
Prompt {
prompt: String,
model: String,
},
Repl {
model: String,
},
Help,
}
fn parse_args(args: &[String]) -> Result<CliAction, String> {
let mut model = DEFAULT_MODEL.to_string();
let mut rest = Vec::new();
let mut index = 0;
while index < args.len() {
match args[index].as_str() {
"--model" => {
let value = args
.get(index + 1)
.ok_or_else(|| "missing value for --model".to_string())?;
model.clone_from(value);
index += 2;
}
flag if flag.starts_with("--model=") => {
model = flag[8..].to_string();
index += 1;
}
other => {
rest.push(other.to_string());
index += 1;
}
}
}
if rest.is_empty() {
return Ok(CliAction::Repl { model });
}
if matches!(rest.first().map(String::as_str), Some("--help" | "-h")) {
return Ok(CliAction::Help);
}
if rest.first().map(String::as_str) == Some("--resume") {
return parse_resume_args(&rest[1..]);
}
match rest[0].as_str() {
"dump-manifests" => Ok(CliAction::DumpManifests),
"bootstrap-plan" => Ok(CliAction::BootstrapPlan),
"system-prompt" => parse_system_prompt_args(&rest[1..]),
"prompt" => {
let prompt = rest[1..].join(" ");
if prompt.trim().is_empty() {
return Err("prompt subcommand requires a prompt string".to_string());
}
Ok(CliAction::Prompt { prompt, model })
}
other => Err(format!("unknown subcommand: {other}")),
}
}
fn parse_system_prompt_args(args: &[String]) -> Result<CliAction, String> {
let mut cwd = env::current_dir().map_err(|error| error.to_string())?;
let mut date = DEFAULT_DATE.to_string();
let mut index = 0;
while index < args.len() {
match args[index].as_str() {
"--cwd" => {
let value = args
.get(index + 1)
.ok_or_else(|| "missing value for --cwd".to_string())?;
cwd = PathBuf::from(value);
index += 2;
}
"--date" => {
let value = args
.get(index + 1)
.ok_or_else(|| "missing value for --date".to_string())?;
date.clone_from(value);
index += 2;
}
other => return Err(format!("unknown system-prompt option: {other}")),
}
}
Ok(CliAction::PrintSystemPrompt { cwd, date })
}
fn parse_resume_args(args: &[String]) -> Result<CliAction, String> {
let session_path = args
.first()
.ok_or_else(|| "missing session path for --resume".to_string())
.map(PathBuf::from)?;
let command = args.get(1).cloned();
if args.len() > 2 {
return Err("--resume accepts at most one trailing slash command".to_string());
}
Ok(CliAction::ResumeSession {
session_path,
command,
})
}
fn dump_manifests() {
let workspace_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
let paths = UpstreamPaths::from_workspace_dir(&workspace_dir);
match extract_manifest(&paths) {
Ok(manifest) => {
println!("commands: {}", manifest.commands.entries().len());
println!("tools: {}", manifest.tools.entries().len());
println!("bootstrap phases: {}", manifest.bootstrap.phases().len());
}
Err(error) => {
eprintln!("failed to extract manifests: {error}");
std::process::exit(1);
}
}
}
fn print_bootstrap_plan() {
for phase in runtime::BootstrapPlan::claude_code_default().phases() {
println!("- {phase:?}");
}
}
fn print_system_prompt(cwd: PathBuf, date: String) {
match load_system_prompt(cwd, date, env::consts::OS, "unknown") {
Ok(sections) => println!("{}", sections.join("\n\n")),
Err(error) => {
eprintln!("failed to build system prompt: {error}");
std::process::exit(1);
}
}
}
fn resume_session(session_path: &Path, command: Option<String>) {
let session = match Session::load_from_path(session_path) {
Ok(session) => session,
Err(error) => {
eprintln!("failed to restore session: {error}");
std::process::exit(1);
}
};
match command {
Some(command) if command.starts_with('/') => {
let Some(result) = handle_slash_command(
&command,
&session,
CompactionConfig {
max_estimated_tokens: 0,
..CompactionConfig::default()
},
) else {
eprintln!("unknown slash command: {command}");
std::process::exit(2);
};
if let Err(error) = result.session.save_to_path(session_path) {
eprintln!("failed to persist resumed session: {error}");
std::process::exit(1);
}
println!("{}", result.message);
}
Some(other) => {
eprintln!("unsupported resumed command: {other}");
std::process::exit(2);
}
None => {
println!(
"Restored session from {} ({} messages).",
session_path.display(),
session.messages.len()
);
}
}
}
fn run_repl(model: String) -> Result<(), Box<dyn std::error::Error>> {
let mut cli = LiveCli::new(model, true)?;
let editor = input::LineEditor::new(" ");
println!("Rusty Claude CLI interactive mode");
println!("Type /help for commands. Shift+Enter or Ctrl+J inserts a newline.");
while let Some(input) = editor.read_line()? {
let trimmed = input.trim();
if trimmed.is_empty() {
continue;
}
match trimmed {
"/exit" | "/quit" => break,
"/help" => {
println!("Available commands:");
println!(" /help Show help");
println!(" /status Show session status");
println!(" /compact Compact session history");
println!(" /exit Quit the REPL");
}
"/status" => cli.print_status(),
"/compact" => cli.compact()?,
_ => cli.run_turn(trimmed)?,
}
}
Ok(())
}
struct LiveCli {
model: String,
system_prompt: Vec<String>,
runtime: ConversationRuntime<AnthropicRuntimeClient, CliToolExecutor>,
}
impl LiveCli {
fn new(model: String, enable_tools: bool) -> Result<Self, Box<dyn std::error::Error>> {
let system_prompt = build_system_prompt()?;
let runtime = build_runtime(
Session::new(),
model.clone(),
system_prompt.clone(),
enable_tools,
)?;
Ok(Self {
model,
system_prompt,
runtime,
})
}
fn run_turn(&mut self, input: &str) -> Result<(), Box<dyn std::error::Error>> {
let mut spinner = Spinner::new();
let mut stdout = io::stdout();
spinner.tick(
"Waiting for Claude",
TerminalRenderer::new().color_theme(),
&mut stdout,
)?;
let result = self.runtime.run_turn(input, None);
match result {
Ok(turn) => {
spinner.finish(
"Claude response complete",
TerminalRenderer::new().color_theme(),
&mut stdout,
)?;
println!();
self.print_turn_usage(turn.usage);
Ok(())
}
Err(error) => {
spinner.fail(
"Claude request failed",
TerminalRenderer::new().color_theme(),
&mut stdout,
)?;
Err(Box::new(error))
}
}
}
fn print_status(&self) {
let usage = self.runtime.usage().cumulative_usage();
println!(
"status: messages={} turns={} estimated_session_tokens={}",
self.runtime.session().messages.len(),
self.runtime.usage().turns(),
self.runtime.estimated_tokens()
);
for line in usage.summary_lines("usage") {
println!("{line}");
}
}
fn print_turn_usage(&self, cumulative_usage: TokenUsage) {
let latest = self.runtime.usage().current_turn_usage();
println!("\nTurn usage:");
for line in latest.summary_lines(" latest") {
println!("{line}");
}
println!("Cumulative usage:");
for line in cumulative_usage.summary_lines(" total") {
println!("{line}");
}
}
fn compact(&mut self) -> Result<(), Box<dyn std::error::Error>> {
let estimated_before = self.runtime.estimated_tokens();
let result = self.runtime.compact(CompactionConfig::default());
let removed = result.removed_message_count;
let estimated_after = estimate_session_tokens(&result.compacted_session);
let formatted_summary = result.formatted_summary.clone();
let compacted_session = result.compacted_session;
self.runtime = build_runtime(
compacted_session,
self.model.clone(),
self.system_prompt.clone(),
true,
)?;
if removed == 0 {
println!("Compaction skipped: session is below the compaction threshold.");
} else {
println!("Compacted {removed} messages into a resumable system summary.");
if !formatted_summary.is_empty() {
println!("\n{formatted_summary}");
}
let estimated_saved = estimated_before.saturating_sub(estimated_after);
println!("Estimated tokens saved: {estimated_saved}");
}
Ok(())
}
}
fn build_system_prompt() -> Result<Vec<String>, Box<dyn std::error::Error>> {
Ok(load_system_prompt(
env::current_dir()?,
DEFAULT_DATE,
env::consts::OS,
"unknown",
)?)
}
fn build_runtime(
session: Session,
model: String,
system_prompt: Vec<String>,
enable_tools: bool,
) -> Result<ConversationRuntime<AnthropicRuntimeClient, CliToolExecutor>, Box<dyn std::error::Error>>
{
Ok(ConversationRuntime::new(
session,
AnthropicRuntimeClient::new(model, enable_tools)?,
CliToolExecutor::new(),
permission_policy_from_env(),
system_prompt,
))
}
struct AnthropicRuntimeClient {
runtime: tokio::runtime::Runtime,
client: AnthropicClient,
model: String,
enable_tools: bool,
}
impl AnthropicRuntimeClient {
fn new(model: String, enable_tools: bool) -> Result<Self, Box<dyn std::error::Error>> {
Ok(Self {
runtime: tokio::runtime::Runtime::new()?,
client: AnthropicClient::from_env()?,
model,
enable_tools,
})
}
}
impl ApiClient for AnthropicRuntimeClient {
#[allow(clippy::too_many_lines)]
fn stream(&mut self, request: ApiRequest) -> Result<Vec<AssistantEvent>, RuntimeError> {
let message_request = MessageRequest {
model: self.model.clone(),
max_tokens: DEFAULT_MAX_TOKENS,
messages: convert_messages(&request.messages),
system: (!request.system_prompt.is_empty()).then(|| request.system_prompt.join("\n\n")),
tools: self.enable_tools.then(|| {
mvp_tool_specs()
.into_iter()
.map(|spec| ToolDefinition {
name: spec.name.to_string(),
description: Some(spec.description.to_string()),
input_schema: spec.input_schema,
})
.collect()
}),
tool_choice: self.enable_tools.then_some(ToolChoice::Auto),
stream: true,
};
self.runtime.block_on(async {
let mut stream = self
.client
.stream_message(&message_request)
.await
.map_err(|error| RuntimeError::new(error.to_string()))?;
let mut stdout = io::stdout();
let mut events = Vec::new();
let mut pending_tool: Option<(String, String, String)> = None;
let mut saw_stop = false;
while let Some(event) = stream
.next_event()
.await
.map_err(|error| RuntimeError::new(error.to_string()))?
{
match event {
ApiStreamEvent::MessageStart(start) => {
for block in start.message.content {
push_output_block(block, &mut stdout, &mut events, &mut pending_tool)?;
}
}
ApiStreamEvent::ContentBlockStart(start) => {
push_output_block(
start.content_block,
&mut stdout,
&mut events,
&mut pending_tool,
)?;
}
ApiStreamEvent::ContentBlockDelta(delta) => match delta.delta {
ContentBlockDelta::TextDelta { text } => {
if !text.is_empty() {
write!(stdout, "{text}")
.and_then(|()| stdout.flush())
.map_err(|error| RuntimeError::new(error.to_string()))?;
events.push(AssistantEvent::TextDelta(text));
}
}
ContentBlockDelta::InputJsonDelta { partial_json } => {
if let Some((_, _, input)) = &mut pending_tool {
input.push_str(&partial_json);
}
}
},
ApiStreamEvent::ContentBlockStop(_) => {
if let Some((id, name, input)) = pending_tool.take() {
events.push(AssistantEvent::ToolUse { id, name, input });
}
}
ApiStreamEvent::MessageDelta(delta) => {
events.push(AssistantEvent::Usage(TokenUsage {
input_tokens: delta.usage.input_tokens,
output_tokens: delta.usage.output_tokens,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}));
}
ApiStreamEvent::MessageStop(_) => {
saw_stop = true;
events.push(AssistantEvent::MessageStop);
}
}
}
if !saw_stop
&& events.iter().any(|event| {
matches!(event, AssistantEvent::TextDelta(text) if !text.is_empty())
|| matches!(event, AssistantEvent::ToolUse { .. })
})
{
events.push(AssistantEvent::MessageStop);
}
if events
.iter()
.any(|event| matches!(event, AssistantEvent::MessageStop))
{
return Ok(events);
}
let response = self
.client
.send_message(&MessageRequest {
stream: false,
..message_request.clone()
})
.await
.map_err(|error| RuntimeError::new(error.to_string()))?;
response_to_events(response, &mut stdout)
})
}
}
fn push_output_block(
block: OutputContentBlock,
out: &mut impl Write,
events: &mut Vec<AssistantEvent>,
pending_tool: &mut Option<(String, String, String)>,
) -> Result<(), RuntimeError> {
match block {
OutputContentBlock::Text { text } => {
if !text.is_empty() {
write!(out, "{text}")
.and_then(|()| out.flush())
.map_err(|error| RuntimeError::new(error.to_string()))?;
events.push(AssistantEvent::TextDelta(text));
}
}
OutputContentBlock::ToolUse { id, name, input } => {
*pending_tool = Some((id, name, input.to_string()));
}
}
Ok(())
}
fn response_to_events(
response: MessageResponse,
out: &mut impl Write,
) -> Result<Vec<AssistantEvent>, RuntimeError> {
let mut events = Vec::new();
let mut pending_tool = None;
for block in response.content {
push_output_block(block, out, &mut events, &mut pending_tool)?;
if let Some((id, name, input)) = pending_tool.take() {
events.push(AssistantEvent::ToolUse { id, name, input });
}
}
events.push(AssistantEvent::Usage(TokenUsage {
input_tokens: response.usage.input_tokens,
output_tokens: response.usage.output_tokens,
cache_creation_input_tokens: response.usage.cache_creation_input_tokens,
cache_read_input_tokens: response.usage.cache_read_input_tokens,
}));
events.push(AssistantEvent::MessageStop);
Ok(events)
}
struct CliToolExecutor {
renderer: TerminalRenderer,
}
impl CliToolExecutor {
fn new() -> Self {
Self {
renderer: TerminalRenderer::new(),
}
}
}
impl ToolExecutor for CliToolExecutor {
fn execute(&mut self, tool_name: &str, input: &str) -> Result<String, ToolError> {
let value = serde_json::from_str(input)
.map_err(|error| ToolError::new(format!("invalid tool input JSON: {error}")))?;
match execute_tool(tool_name, &value) {
Ok(output) => {
let markdown = format!("### Tool `{tool_name}`\n\n```json\n{output}\n```\n");
self.renderer
.stream_markdown(&markdown, &mut io::stdout())
.map_err(|error| ToolError::new(error.to_string()))?;
Ok(output)
}
Err(error) => Err(ToolError::new(error)),
}
}
}
fn permission_policy_from_env() -> PermissionPolicy {
let mode =
env::var("RUSTY_CLAUDE_PERMISSION_MODE").unwrap_or_else(|_| "workspace-write".to_string());
match mode.as_str() {
"read-only" => PermissionPolicy::new(PermissionMode::Deny)
.with_tool_mode("read_file", PermissionMode::Allow)
.with_tool_mode("glob_search", PermissionMode::Allow)
.with_tool_mode("grep_search", PermissionMode::Allow),
_ => PermissionPolicy::new(PermissionMode::Allow),
}
}
fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
messages
.iter()
.filter_map(|message| {
let role = match message.role {
MessageRole::System | MessageRole::User | MessageRole::Tool => "user",
MessageRole::Assistant => "assistant",
};
let content = message
.blocks
.iter()
.map(|block| match block {
ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() },
ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse {
id: id.clone(),
name: name.clone(),
input: serde_json::from_str(input)
.unwrap_or_else(|_| serde_json::json!({ "raw": input })),
},
ContentBlock::ToolResult {
tool_use_id,
output,
is_error,
..
} => InputContentBlock::ToolResult {
tool_use_id: tool_use_id.clone(),
content: vec![ToolResultContentBlock::Text {
text: output.clone(),
}],
is_error: *is_error,
},
})
.collect::<Vec<_>>();
(!content.is_empty()).then(|| InputMessage {
role: role.to_string(),
content,
})
})
.collect()
}
fn print_help() {
println!("rusty-claude-cli");
println!();
println!("Usage:");
println!(" rusty-claude-cli [--model MODEL] Start interactive REPL");
println!(
" rusty-claude-cli [--model MODEL] prompt TEXT Send one prompt and stream the response"
);
println!(" rusty-claude-cli dump-manifests");
println!(" rusty-claude-cli bootstrap-plan");
println!(" rusty-claude-cli system-prompt [--cwd PATH] [--date YYYY-MM-DD]");
println!(" rusty-claude-cli --resume SESSION.json [/compact]");
}
#[cfg(test)]
mod tests {
use super::{parse_args, CliAction, DEFAULT_MODEL};
use runtime::{ContentBlock, ConversationMessage, MessageRole};
use std::path::PathBuf;
#[test]
fn defaults_to_repl_when_no_args() {
assert_eq!(
parse_args(&[]).expect("args should parse"),
CliAction::Repl {
model: DEFAULT_MODEL.to_string(),
}
);
}
#[test]
fn parses_prompt_subcommand() {
let args = vec![
"prompt".to_string(),
"hello".to_string(),
"world".to_string(),
];
assert_eq!(
parse_args(&args).expect("args should parse"),
CliAction::Prompt {
prompt: "hello world".to_string(),
model: DEFAULT_MODEL.to_string(),
}
);
}
#[test]
fn parses_system_prompt_options() {
let args = vec![
"system-prompt".to_string(),
"--cwd".to_string(),
"/tmp/project".to_string(),
"--date".to_string(),
"2026-04-01".to_string(),
];
assert_eq!(
parse_args(&args).expect("args should parse"),
CliAction::PrintSystemPrompt {
cwd: PathBuf::from("/tmp/project"),
date: "2026-04-01".to_string(),
}
);
}
#[test]
fn parses_resume_flag_with_slash_command() {
let args = vec![
"--resume".to_string(),
"session.json".to_string(),
"/compact".to_string(),
];
assert_eq!(
parse_args(&args).expect("args should parse"),
CliAction::ResumeSession {
session_path: PathBuf::from("session.json"),
command: Some("/compact".to_string()),
}
);
}
#[test]
fn converts_tool_roundtrip_messages() {
let messages = vec![
ConversationMessage::user_text("hello"),
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
id: "tool-1".to_string(),
name: "bash".to_string(),
input: "{\"command\":\"pwd\"}".to_string(),
}]),
ConversationMessage {
role: MessageRole::Tool,
blocks: vec![ContentBlock::ToolResult {
tool_use_id: "tool-1".to_string(),
tool_name: "bash".to_string(),
output: "ok".to_string(),
is_error: false,
}],
usage: None,
},
];
let converted = super::convert_messages(&messages);
assert_eq!(converted.len(), 3);
assert_eq!(converted[1].role, "assistant");
assert_eq!(converted[2].role, "user");
}
}