chore: 移除所有 Citations 相关逻辑,为后续重构做准备

- Backend: 删除 lead_agent / general_purpose 中的 citations_format 与引用相关 reminder;artifacts 下载不再对 markdown 做 citation 清洗,统一走 FileResponse,保留 Response 用于二进制 inline
- Frontend: 删除 core/citations 模块、inline-citation、safe-citation-content;新增 MarkdownContent 仅做 Markdown 渲染;消息/artifact 预览与复制均使用原始 content
- i18n: 移除 citations 命名空间(loadingCitations、loadingCitationsWithCount)
- 技能与 demo: 措辞改为 references,demo 数据去掉 <citations> 块
- 文档: 更新 CLAUDE/AGENTS/README 描述,新增按文件 diff 的代码变更总结

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
LofiSu
2026-02-09 16:24:01 +08:00
parent cef8d389fd
commit 46048c76ce
27 changed files with 1043 additions and 894 deletions

View File

@@ -1,13 +0,0 @@
export {
contentWithoutCitationsFromParsed,
extractDomainFromUrl,
isExternalUrl,
parseCitations,
removeAllCitations,
shouldShowCitationLoading,
syntheticCitationFromLink,
} from "./utils";
export { useParsedCitations } from "./use-parsed-citations";
export type { UseParsedCitationsResult } from "./use-parsed-citations";
export type { Citation, ParseCitationsResult } from "./utils";

View File

@@ -1,28 +0,0 @@
"use client";
import { useMemo } from "react";
import { parseCitations } from "./utils";
import type { Citation } from "./utils";
export interface UseParsedCitationsResult {
citations: Citation[];
cleanContent: string;
citationMap: Map<string, Citation>;
}
/**
* Parse content for citations and build citation map. Memoized by content.
*/
export function useParsedCitations(content: string): UseParsedCitationsResult {
return useMemo(() => {
const parsed = parseCitations(content ?? "");
const citationMap = new Map<string, Citation>();
for (const c of parsed.citations) citationMap.set(c.url, c);
return {
citations: parsed.citations,
cleanContent: parsed.cleanContent,
citationMap,
};
}, [content]);
}

View File

@@ -1,226 +0,0 @@
/**
* Citation parsing and display helpers.
* Display rule: never show half-finished citations. Use shouldShowCitationLoading
* and show only the loading indicator until the block is complete and all
* [cite-N] refs are replaced.
*/
/**
* Citation data structure representing a source reference
*/
export interface Citation {
id: string;
title: string;
url: string;
snippet: string;
}
/**
* Result of parsing citations from content
*/
export interface ParseCitationsResult {
citations: Citation[];
cleanContent: string;
}
/**
* Parse citation lines (one JSON object per line) into Citation array.
* Deduplicates by URL. Used for both complete and incomplete (streaming) blocks.
*/
function parseCitationLines(
blockContent: string,
seenUrls: Set<string>,
): Citation[] {
const out: Citation[] = [];
const lines = blockContent.split("\n");
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed?.startsWith("{")) continue;
try {
const citation = JSON.parse(trimmed) as Citation;
if (citation.id && citation.url && !seenUrls.has(citation.url)) {
seenUrls.add(citation.url);
out.push({
id: citation.id,
title: citation.title || "",
url: citation.url,
snippet: citation.snippet || "",
});
}
} catch {
// Skip invalid JSON lines - can happen during streaming
}
}
return out;
}
/**
* Parse citations block from message content.
* Shared by all modes (Flash / Thinking / Pro / Ultra); supports incomplete
* <citations> blocks during SSE streaming (parses whatever complete JSON lines
* have arrived so far so [cite-N] can be linked progressively).
*
* The citations block format:
* <citations>
* {"id": "cite-1", "title": "Page Title", "url": "https://example.com", "snippet": "Description"}
* {"id": "cite-2", "title": "Another Page", "url": "https://example2.com", "snippet": "Description"}
* </citations>
*
* @param content - The raw message content that may contain a citations block
* @returns Object containing parsed citations array and content with citations block removed
*/
export function parseCitations(content: string): ParseCitationsResult {
if (!content) {
return { citations: [], cleanContent: content };
}
const citations: Citation[] = [];
const seenUrls = new Set<string>();
// 1) Complete blocks: <citations>...</citations>
const citationsRegex = /<citations>([\s\S]*?)<\/citations>/g;
let match;
while ((match = citationsRegex.exec(content)) !== null) {
citations.push(...parseCitationLines(match[1] ?? "", seenUrls));
}
// 2) Incomplete block during streaming: <citations>... (no closing tag yet)
if (content.includes("<citations>") && !content.includes("</citations>")) {
const openMatch = content.match(/<citations>([\s\S]*)$/);
if (openMatch?.[1] != null) {
citations.push(...parseCitationLines(openMatch[1], seenUrls));
}
}
let cleanContent = removeCitationsBlocks(content);
// Convert [cite-N] references to markdown links
// Example: [cite-1] -> [Title](url)
if (citations.length > 0) {
// Build a map from citation id to citation object
const idMap = new Map<string, Citation>();
for (const citation of citations) {
idMap.set(citation.id, citation);
}
// Replace all [cite-N] patterns with markdown links
cleanContent = cleanContent.replace(/\[cite-(\d+)\]/g, (match, num) => {
const citeId = `cite-${num}`;
const citation = idMap.get(citeId);
if (citation) {
// Use title if available, otherwise use domain
const linkText = citation.title || extractDomainFromUrl(citation.url);
return `[${linkText}](${citation.url})`;
}
// If citation not found, keep the original text
return match;
});
}
return { citations, cleanContent };
}
/**
* Whether the URL is external (http/https).
*/
export function isExternalUrl(url: string): boolean {
return url.startsWith("http://") || url.startsWith("https://");
}
/**
* Build a synthetic Citation from a link (e.g. in artifact markdown without <citations> block).
*/
export function syntheticCitationFromLink(href: string, title: string): Citation {
return {
id: `artifact-cite-${href}`,
title: title || href,
url: href,
snippet: "",
};
}
/**
* Extract the domain name from a URL for display
*
* @param url - Full URL string
* @returns Domain name or the original URL if parsing fails
*/
export function extractDomainFromUrl(url: string): string {
try {
const urlObj = new URL(url);
// Remove 'www.' prefix if present
return urlObj.hostname.replace(/^www\./, "");
} catch {
return url;
}
}
/**
* Remove all <citations> blocks from content (complete and incomplete).
* Does not remove [cite-N] or markdown links; use removeAllCitations for that.
*/
export function removeCitationsBlocks(content: string): string {
if (!content) return content;
let result = content.replace(/<citations>[\s\S]*?<\/citations>/g, "").trim();
if (result.includes("<citations>")) {
result = result.replace(/<citations>[\s\S]*$/g, "").trim();
}
return result;
}
/**
* Whether content contains a <citations> block (open tag).
*/
export function hasCitationsBlock(content: string): boolean {
return Boolean(content?.includes("<citations>"));
}
/** Pattern for [cite-1], [cite-2], ... that should be replaced by parseCitations. */
const UNREPLACED_CITE_REF = /\[cite-\d+\]/;
/**
* Whether cleanContent still contains unreplaced [cite-N] refs (half-finished citations).
* When true, callers must not render this content and should show loading instead.
*/
export function hasUnreplacedCitationRefs(cleanContent: string): boolean {
return Boolean(cleanContent && UNREPLACED_CITE_REF.test(cleanContent));
}
/**
* Single source of truth: true when body must not be rendered (show loading instead).
* Use after parseCitations: pass raw content, parsed cleanContent, and isLoading.
* Never show body when cleanContent still has [cite-N] (e.g. refs arrived before
* <citations> block in stream); also show loading while streaming with citation block.
*/
export function shouldShowCitationLoading(
rawContent: string,
cleanContent: string,
isLoading: boolean,
): boolean {
if (hasUnreplacedCitationRefs(cleanContent)) return true;
return isLoading && hasCitationsBlock(rawContent);
}
/**
* Strip citation markdown links from already-cleaned content (from parseCitations).
* Use when you already have ParseCitationsResult to avoid parsing twice.
*/
export function contentWithoutCitationsFromParsed(
parsed: ParseCitationsResult,
): string {
const citationUrls = new Set(parsed.citations.map((c) => c.url));
const withoutLinks = parsed.cleanContent.replace(
/\[([^\]]+)\]\(([^)]+)\)/g,
(fullMatch, _text, url) => (citationUrls.has(url) ? "" : fullMatch),
);
return withoutLinks.replace(/\n{3,}/g, "\n\n").trim();
}
/**
* Remove ALL citations from content (blocks, [cite-N], and citation links).
* Used for copy/download. For display you typically use parseCitations/useParsedCitations.
*/
export function removeAllCitations(content: string): string {
if (!content) return content;
return contentWithoutCitationsFromParsed(parseCitations(content));
}

View File

@@ -167,13 +167,6 @@ export const enUS: Translations = {
startConversation: "Start a conversation to see messages here",
},
// Citations
citations: {
loadingCitations: "Organizing citations...",
loadingCitationsWithCount: (count: number) =>
`Organizing ${count} citation${count === 1 ? "" : "s"}...`,
},
// Chats
chats: {
searchChats: "Search chats",

View File

@@ -115,12 +115,6 @@ export interface Translations {
startConversation: string;
};
// Citations
citations: {
loadingCitations: string;
loadingCitationsWithCount: (count: number) => string;
};
// Chats
chats: {
searchChats: string;

View File

@@ -164,12 +164,6 @@ export const zhCN: Translations = {
startConversation: "开始新的对话以查看消息",
},
// Citations
citations: {
loadingCitations: "正在整理引用...",
loadingCitationsWithCount: (count: number) => `正在整理 ${count} 个引用...`,
},
// Chats
chats: {
searchChats: "搜索对话",