feat: refine citations format and improve content presentation

Backend: - Simplify citations prompt format and rules - Add clear distinction between chat responses and file content - Enforce full URL usage in markdown links, prohibit [cite-1] format - Require content-first approach: write full content, then add citations at end Frontend: - Hide <citations> block in both chat messages and markdown preview - Remove top-level Citations/Sources list for cleaner UI - Auto-remove <citations> block in code editor view for markdown files - Keep inline citation hover cards for reference details This ensures citations are presented like Claude: clean content with inline reference badges. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-04-19 12:24:46 +08:00 · 2026-01-29 12:29:13 +08:00
parent ad85b72064
commit c14378a312
10 changed files with 515 additions and 185 deletions
--- a/frontend/src/core/citations/utils.ts
+++ b/frontend/src/core/citations/utils.ts
@@ -33,41 +33,42 @@ export function parseCitations(content: string): ParseCitationsResult {
    return { citations: [], cleanContent: content };
  }

-  // Match the citations block at the start of content (with possible leading whitespace)
-  const citationsRegex = /^\s*<citations>([\s\S]*?)<\/citations>/;
-  const match = citationsRegex.exec(content);
-
-  if (!match) {
-    return { citations: [], cleanContent: content };
-  }
-
-  const citationsBlock = match[1] ?? "";
+  // Match ALL citations blocks anywhere in content (not just at the start)
+  const citationsRegex = /<citations>([\s\S]*?)<\/citations>/g;
  const citations: Citation[] = [];
+  const seenUrls = new Set<string>(); // Deduplicate by URL
+  let cleanContent = content;

-  // Parse each line as JSON
-  const lines = citationsBlock.split("\n");
-  for (const line of lines) {
-    const trimmed = line.trim();
-    if (trimmed?.startsWith("{")) {
-      try {
-        const citation = JSON.parse(trimmed) as Citation;
-        // Validate required fields
-        if (citation.id && citation.url) {
-          citations.push({
-            id: citation.id,
-            title: citation.title || "",
-            url: citation.url,
-            snippet: citation.snippet || "",
-          });
+  let match;
+  while ((match = citationsRegex.exec(content)) !== null) {
+    const citationsBlock = match[1] ?? "";
+
+    // Parse each line as JSON
+    const lines = citationsBlock.split("\n");
+    for (const line of lines) {
+      const trimmed = line.trim();
+      if (trimmed?.startsWith("{")) {
+        try {
+          const citation = JSON.parse(trimmed) as Citation;
+          // Validate required fields and deduplicate
+          if (citation.id && citation.url && !seenUrls.has(citation.url)) {
+            seenUrls.add(citation.url);
+            citations.push({
+              id: citation.id,
+              title: citation.title || "",
+              url: citation.url,
+              snippet: citation.snippet || "",
+            });
+          }
+        } catch {
+          // Skip invalid JSON lines - this can happen during streaming
        }
-      } catch {
-        // Skip invalid JSON lines - this can happen during streaming
      }
    }
  }

-  // Remove the citations block from content
-  const cleanContent = content.replace(citationsRegex, "").trim();
+  // Remove ALL citations blocks from content
+  cleanContent = content.replace(/<citations>[\s\S]*?<\/citations>/g, "").trim();

  return { citations, cleanContent };
 }
--- a/frontend/src/core/messages/utils.ts
+++ b/frontend/src/core/messages/utils.ts
@@ -217,3 +217,58 @@ export function findToolCallResult(toolCallId: string, messages: Message[]) {
  }
  return undefined;
 }
+
+/**
+ * Represents an uploaded file parsed from the <uploaded_files> tag
+ */
+export interface UploadedFile {
+  filename: string;
+  size: string;
+  path: string;
+}
+
+/**
+ * Result of parsing uploaded files from message content
+ */
+export interface ParsedUploadedFiles {
+  files: UploadedFile[];
+  cleanContent: string;
+}
+
+/**
+ * Parse <uploaded_files> tag from message content and extract file information.
+ * Returns the list of uploaded files and the content with the tag removed.
+ */
+export function parseUploadedFiles(content: string): ParsedUploadedFiles {
+  // Match <uploaded_files>...</uploaded_files> tag
+  const uploadedFilesRegex = /<uploaded_files>([\s\S]*?)<\/uploaded_files>/;
+  const match = content.match(uploadedFilesRegex);
+
+  if (!match) {
+    return { files: [], cleanContent: content };
+  }
+
+  const uploadedFilesContent = match[1];
+  const cleanContent = content.replace(uploadedFilesRegex, "").trim();
+
+  // Check if it's "No files have been uploaded yet."
+  if (uploadedFilesContent.includes("No files have been uploaded yet.")) {
+    return { files: [], cleanContent };
+  }
+
+  // Parse file list
+  // Format: - filename (size)\n  Path: /path/to/file
+  const fileRegex = /- ([^\n(]+)\s*\(([^)]+)\)\s*\n\s*Path:\s*([^\n]+)/g;
+  const files: UploadedFile[] = [];
+  let fileMatch;
+
+  while ((fileMatch = fileRegex.exec(uploadedFilesContent)) !== null) {
+    files.push({
+      filename: fileMatch[1].trim(),
+      size: fileMatch[2].trim(),
+      path: fileMatch[3].trim(),
+    });
+  }
+
+  return { files, cleanContent };
+}