/** * Citation data structure representing a source reference */ export interface Citation { id: string; title: string; url: string; snippet: string; } /** * Result of parsing citations from content */ export interface ParseCitationsResult { citations: Citation[]; cleanContent: string; } /** * Parse citations block from message content. * * The citations block format: * * {"id": "cite-1", "title": "Page Title", "url": "https://example.com", "snippet": "Description"} * {"id": "cite-2", "title": "Another Page", "url": "https://example2.com", "snippet": "Description"} * * * @param content - The raw message content that may contain a citations block * @returns Object containing parsed citations array and content with citations block removed */ export function parseCitations(content: string): ParseCitationsResult { if (!content) { return { citations: [], cleanContent: content }; } // Match ALL citations blocks anywhere in content (not just at the start) const citationsRegex = /([\s\S]*?)<\/citations>/g; const citations: Citation[] = []; const seenUrls = new Set(); // Deduplicate by URL let cleanContent = content; let match; while ((match = citationsRegex.exec(content)) !== null) { const citationsBlock = match[1] ?? ""; // Parse each line as JSON const lines = citationsBlock.split("\n"); for (const line of lines) { const trimmed = line.trim(); if (trimmed?.startsWith("{")) { try { const citation = JSON.parse(trimmed) as Citation; // Validate required fields and deduplicate if (citation.id && citation.url && !seenUrls.has(citation.url)) { seenUrls.add(citation.url); citations.push({ id: citation.id, title: citation.title || "", url: citation.url, snippet: citation.snippet || "", }); } } catch { // Skip invalid JSON lines - this can happen during streaming } } } } // Remove ALL citations blocks from content (both complete and incomplete) cleanContent = content.replace(/[\s\S]*?<\/citations>/g, "").trim(); // Also remove incomplete citations blocks (during streaming) // Match without closing tag or followed by anything until end of string if (cleanContent.includes("")) { cleanContent = cleanContent.replace(/[\s\S]*$/g, "").trim(); } return { citations, cleanContent }; } /** * Build a map from URL to Citation for quick lookup * * @param citations - Array of citations * @returns Map with URL as key and Citation as value */ export function buildCitationMap( citations: Citation[], ): Map { const map = new Map(); for (const citation of citations) { map.set(citation.url, citation); } return map; } /** * Extract the domain name from a URL for display * * @param url - Full URL string * @returns Domain name or the original URL if parsing fails */ export function extractDomainFromUrl(url: string): string { try { const urlObj = new URL(url); // Remove 'www.' prefix if present return urlObj.hostname.replace(/^www\./, ""); } catch { return url; } } /** * Check if content is still receiving the citations block (streaming) * This helps determine if we should wait before parsing * * @param content - The current content being streamed * @returns true if citations block appears to be incomplete */ export function isCitationsBlockIncomplete(content: string): boolean { if (!content) { return false; } // Check if we have an opening tag but no closing tag const hasOpenTag = content.includes(""); const hasCloseTag = content.includes(""); return hasOpenTag && !hasCloseTag; }