feat(frontend): display token usage per conversation turn (#1229)

Surface the usage_metadata that PR #1218 added to the streaming API.
A compact indicator in the chat header shows cumulative tokens consumed
per thread, with a tooltip breakdown of input/output/total counts.

Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
Matt Van Horn
2026-03-23 17:59:35 -07:00
committed by GitHub
parent 8b0f3fe233
commit b40b05f623
7 changed files with 159 additions and 1 deletions

View File

@@ -15,6 +15,7 @@ import { MessageList } from "@/components/workspace/messages";
import { ThreadContext } from "@/components/workspace/messages/context";
import { ThreadTitle } from "@/components/workspace/thread-title";
import { TodoList } from "@/components/workspace/todo-list";
import { TokenUsageIndicator } from "@/components/workspace/token-usage-indicator";
import { Tooltip } from "@/components/workspace/tooltip";
import { useAgent } from "@/core/agents";
import { useI18n } from "@/core/i18n/hooks";
@@ -115,6 +116,7 @@ export default function AgentChatPage() {
<PlusSquare /> {t.agents.newChat}
</Button>
</Tooltip>
<TokenUsageIndicator messages={thread.messages} />
<ExportTrigger threadId={threadId} />
<ArtifactTrigger />
</div>

View File

@@ -15,6 +15,7 @@ import { MessageList } from "@/components/workspace/messages";
import { ThreadContext } from "@/components/workspace/messages/context";
import { ThreadTitle } from "@/components/workspace/thread-title";
import { TodoList } from "@/components/workspace/todo-list";
import { TokenUsageIndicator } from "@/components/workspace/token-usage-indicator";
import { Welcome } from "@/components/workspace/welcome";
import { useI18n } from "@/core/i18n/hooks";
import { useNotification } from "@/core/notification/hooks";
@@ -85,7 +86,8 @@ export default function ChatPage() {
<div className="flex w-full items-center text-sm font-medium">
<ThreadTitle threadId={threadId} thread={thread} />
</div>
<div className="flex items-center">
<div className="flex items-center gap-2">
<TokenUsageIndicator messages={thread.messages} />
<ExportTrigger threadId={threadId} />
<ArtifactTrigger />
</div>

View File

@@ -0,0 +1,74 @@
"use client";
import type { Message } from "@langchain/langgraph-sdk";
import { CoinsIcon } from "lucide-react";
import { useMemo } from "react";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { useI18n } from "@/core/i18n/hooks";
import { accumulateUsage, formatTokenCount } from "@/core/messages/usage";
import { cn } from "@/lib/utils";
interface TokenUsageIndicatorProps {
messages: Message[];
className?: string;
}
export function TokenUsageIndicator({
messages,
className,
}: TokenUsageIndicatorProps) {
const { t } = useI18n();
const usage = useMemo(() => accumulateUsage(messages), [messages]);
if (!usage) {
return null;
}
return (
<Tooltip delayDuration={200}>
<TooltipTrigger asChild>
<button
type="button"
className={cn(
"text-muted-foreground flex cursor-default items-center gap-1 text-xs",
className,
)}
>
<CoinsIcon size={14} />
<span>{formatTokenCount(usage.totalTokens)}</span>
</button>
</TooltipTrigger>
<TooltipContent side="bottom" align="end">
<div className="space-y-1 text-xs">
<div className="font-medium">{t.tokenUsage.title}</div>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.input}</span>
<span className="font-mono">
{formatTokenCount(usage.inputTokens)}
</span>
</div>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.output}</span>
<span className="font-mono">
{formatTokenCount(usage.outputTokens)}
</span>
</div>
<div className="border-t pt-1">
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.total}</span>
<span className="font-mono font-medium">
{formatTokenCount(usage.totalTokens)}
</span>
</div>
</div>
</div>
</TooltipContent>
</Tooltip>
);
}

View File

@@ -274,6 +274,12 @@ export const enUS: Translations = {
failed: "Subtask failed",
},
// Token Usage
tokenUsage: {
title: "Token Usage",
input: "Input",
output: "Output",
total: "Total",
// Shortcuts
shortcuts: {
searchActions: "Search actions...",

View File

@@ -211,6 +211,12 @@ export interface Translations {
failed: string;
};
// Token Usage
tokenUsage: {
title: string;
input: string;
output: string;
total: string;
// Shortcuts
shortcuts: {
searchActions: string;

View File

@@ -261,6 +261,12 @@ export const zhCN: Translations = {
failed: "子任务失败",
},
// Token Usage
tokenUsage: {
title: "Token 用量",
input: "输入",
output: "输出",
total: "总计",
// Shortcuts
shortcuts: {
searchActions: "搜索操作...",

View File

@@ -0,0 +1,62 @@
import type { Message } from "@langchain/langgraph-sdk";
export interface TokenUsage {
inputTokens: number;
outputTokens: number;
totalTokens: number;
}
/**
* Extract usage_metadata from an AI message if present.
* The field is added by the backend (PR #1218) but not typed in the SDK.
*/
function getUsageMetadata(
message: Message,
): TokenUsage | null {
if (message.type !== "ai") {
return null;
}
const usage = (message as Record<string, unknown>).usage_metadata as
| { input_tokens?: number; output_tokens?: number; total_tokens?: number }
| undefined;
if (!usage) {
return null;
}
return {
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
totalTokens: usage.total_tokens ?? 0,
};
}
/**
* Accumulate token usage across all AI messages in a thread.
*/
export function accumulateUsage(messages: Message[]): TokenUsage | null {
const cumulative: TokenUsage = {
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
};
let hasUsage = false;
for (const message of messages) {
const usage = getUsageMetadata(message);
if (usage) {
hasUsage = true;
cumulative.inputTokens += usage.inputTokens;
cumulative.outputTokens += usage.outputTokens;
cumulative.totalTokens += usage.totalTokens;
}
}
return hasUsage ? cumulative : null;
}
/**
* Format a token count for display: 1234 -> "1,234", 12345 -> "12.3K"
*/
export function formatTokenCount(count: number): string {
if (count < 10_000) {
return count.toLocaleString();
}
return `${(count / 1000).toFixed(1)}K`;
}