mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-29 16:54:47 +08:00
125 lines
3.3 KiB
TypeScript
125 lines
3.3 KiB
TypeScript
|
|
/**
|
||
|
|
* Citation data structure representing a source reference
|
||
|
|
*/
|
||
|
|
export interface Citation {
|
||
|
|
id: string;
|
||
|
|
title: string;
|
||
|
|
url: string;
|
||
|
|
snippet: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Result of parsing citations from content
|
||
|
|
*/
|
||
|
|
export interface ParseCitationsResult {
|
||
|
|
citations: Citation[];
|
||
|
|
cleanContent: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Parse citations block from message content.
|
||
|
|
*
|
||
|
|
* The citations block format:
|
||
|
|
* <citations>
|
||
|
|
* {"id": "cite-1", "title": "Page Title", "url": "https://example.com", "snippet": "Description"}
|
||
|
|
* {"id": "cite-2", "title": "Another Page", "url": "https://example2.com", "snippet": "Description"}
|
||
|
|
* </citations>
|
||
|
|
*
|
||
|
|
* @param content - The raw message content that may contain a citations block
|
||
|
|
* @returns Object containing parsed citations array and content with citations block removed
|
||
|
|
*/
|
||
|
|
export function parseCitations(content: string): ParseCitationsResult {
|
||
|
|
if (!content) {
|
||
|
|
return { citations: [], cleanContent: content };
|
||
|
|
}
|
||
|
|
|
||
|
|
// Match the citations block at the start of content (with possible leading whitespace)
|
||
|
|
const citationsRegex = /^\s*<citations>([\s\S]*?)<\/citations>/;
|
||
|
|
const match = citationsRegex.exec(content);
|
||
|
|
|
||
|
|
if (!match) {
|
||
|
|
return { citations: [], cleanContent: content };
|
||
|
|
}
|
||
|
|
|
||
|
|
const citationsBlock = match[1] ?? "";
|
||
|
|
const citations: Citation[] = [];
|
||
|
|
|
||
|
|
// Parse each line as JSON
|
||
|
|
const lines = citationsBlock.split("\n");
|
||
|
|
for (const line of lines) {
|
||
|
|
const trimmed = line.trim();
|
||
|
|
if (trimmed?.startsWith("{")) {
|
||
|
|
try {
|
||
|
|
const citation = JSON.parse(trimmed) as Citation;
|
||
|
|
// Validate required fields
|
||
|
|
if (citation.id && citation.url) {
|
||
|
|
citations.push({
|
||
|
|
id: citation.id,
|
||
|
|
title: citation.title || "",
|
||
|
|
url: citation.url,
|
||
|
|
snippet: citation.snippet || "",
|
||
|
|
});
|
||
|
|
}
|
||
|
|
} catch {
|
||
|
|
// Skip invalid JSON lines - this can happen during streaming
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Remove the citations block from content
|
||
|
|
const cleanContent = content.replace(citationsRegex, "").trim();
|
||
|
|
|
||
|
|
return { citations, cleanContent };
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Build a map from URL to Citation for quick lookup
|
||
|
|
*
|
||
|
|
* @param citations - Array of citations
|
||
|
|
* @returns Map with URL as key and Citation as value
|
||
|
|
*/
|
||
|
|
export function buildCitationMap(
|
||
|
|
citations: Citation[],
|
||
|
|
): Map<string, Citation> {
|
||
|
|
const map = new Map<string, Citation>();
|
||
|
|
for (const citation of citations) {
|
||
|
|
map.set(citation.url, citation);
|
||
|
|
}
|
||
|
|
return map;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Extract the domain name from a URL for display
|
||
|
|
*
|
||
|
|
* @param url - Full URL string
|
||
|
|
* @returns Domain name or the original URL if parsing fails
|
||
|
|
*/
|
||
|
|
export function extractDomainFromUrl(url: string): string {
|
||
|
|
try {
|
||
|
|
const urlObj = new URL(url);
|
||
|
|
// Remove 'www.' prefix if present
|
||
|
|
return urlObj.hostname.replace(/^www\./, "");
|
||
|
|
} catch {
|
||
|
|
return url;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Check if content is still receiving the citations block (streaming)
|
||
|
|
* This helps determine if we should wait before parsing
|
||
|
|
*
|
||
|
|
* @param content - The current content being streamed
|
||
|
|
* @returns true if citations block appears to be incomplete
|
||
|
|
*/
|
||
|
|
export function isCitationsBlockIncomplete(content: string): boolean {
|
||
|
|
if (!content) {
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check if we have an opening tag but no closing tag
|
||
|
|
const hasOpenTag = content.includes("<citations>");
|
||
|
|
const hasCloseTag = content.includes("</citations>");
|
||
|
|
|
||
|
|
return hasOpenTag && !hasCloseTag;
|
||
|
|
}
|