feat: add citation support in research report block and markdown

* feat: add citation support in research report block and markdown

- Enhanced ResearchReportBlock to fetch citations based on researchId and pass them to the Markdown component.
- Introduced CitationLink component to display citation metadata on hover for links in markdown.
- Implemented CitationCard and CitationList components for displaying citation details and lists.
- Updated Markdown component to handle citation links and inline citations.
- Created HoverCard component for displaying citation information in a tooltip-like manner.
- Modified store to manage citations, including setting and retrieving citations for ongoing research.
- Added CitationsEvent type to handle citations in chat events and updated Message type to include citations.

* fix(log): Enable the logging level  when enabling the DEBUG environment variable (#793)

* fix(frontend): render all tool calls in the frontend #796 (#797)

* build(deps): bump jspdf from 3.0.4 to 4.0.0 in /web (#798)

Bumps [jspdf](https://github.com/parallax/jsPDF) from 3.0.4 to 4.0.0.
- [Release notes](https://github.com/parallax/jsPDF/releases)
- [Changelog](https://github.com/parallax/jsPDF/blob/master/RELEASE.md)
- [Commits](https://github.com/parallax/jsPDF/compare/v3.0.4...v4.0.0)

---
updated-dependencies:
- dependency-name: jspdf
  dependency-version: 4.0.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* fix(frontend):added the display of the 'analyst' message #800 (#801)

* fix: migrate from deprecated create_react_agent to langchain.agents.create_agent (#802)

* fix: migrate from deprecated create_react_agent to langchain.agents.create_agent

Fixes #799

- Replace deprecated langgraph.prebuilt.create_react_agent with
  langchain.agents.create_agent (LangGraph 1.0 migration)
- Add DynamicPromptMiddleware to handle dynamic prompt templates
  (replaces the 'prompt' callable parameter)
- Add PreModelHookMiddleware to handle pre-model hooks
  (replaces the 'pre_model_hook' parameter)
- Update AgentState import from langchain.agents in template.py
- Update tests to use the new API

* fix:update the code with review comments

* fix: Add runtime parameter to compress_messages method(#803) 

* fix: Add runtime parameter to compress_messages method(#803)

    The compress_messages method was being called by PreModelHookMiddleware
    with both state and runtime parameters, but only accepted state parameter.
    This caused a TypeError when the middleware executed the pre_model_hook.

    Added optional runtime parameter to compress_messages signature to match
    the expected interface while maintaining backward compatibility.

* Update the code with the review comments

* fix: Refactor citation handling and add comprehensive tests for citation features

* refactor: Clean up imports and formatting across citation modules

* fix: Add monkeypatch to clear AGENT_RECURSION_LIMIT in recursion limit tests

* feat: Enhance citation link handling in Markdown component

* fix: Exclude citations from finish reason handling in mergeMessage function

* fix(nodes): update message handling

* fix(citations): improve citation extraction and handling in event processing

* feat(citations): enhance citation extraction and handling with improved merging and normalization

* fix(reporter): update citation formatting instructions for clarity and consistency

* fix(reporter): prioritize using Markdown tables for data presentation and comparison

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: LoftyComet <1277173875@qq。>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
LoftyComet
2026-01-24 17:49:13 +08:00
committed by GitHub
parent 612bddd3fb
commit b7f0f54aa0
22 changed files with 2125 additions and 29 deletions

28
src/citations/__init__.py Normal file
View File

@@ -0,0 +1,28 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
"""
Citation management module for DeerFlow.
This module provides structured citation/source metadata handling
for research reports, enabling proper attribution and inline citations.
"""
from .collector import CitationCollector
from .extractor import (
citations_to_markdown_references,
extract_citations_from_messages,
merge_citations,
)
from .formatter import CitationFormatter
from .models import Citation, CitationMetadata
__all__ = [
"Citation",
"CitationMetadata",
"CitationCollector",
"CitationFormatter",
"extract_citations_from_messages",
"merge_citations",
"citations_to_markdown_references",
]

280
src/citations/collector.py Normal file
View File

@@ -0,0 +1,280 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
"""
Citation collector for gathering and managing citations during research.
"""
import logging
from typing import Any, Dict, List, Optional
from .models import Citation, CitationMetadata
logger = logging.getLogger(__name__)
class CitationCollector:
"""
Collects and manages citations during the research process.
This class handles:
- Collecting citations from search results and crawled pages
- Deduplicating citations by URL
- Assigning citation numbers
- Tracking which citations are actually used in the report
"""
def __init__(self):
self._citations: Dict[str, CitationMetadata] = {} # url -> metadata
self._citation_order: List[str] = [] # ordered list of URLs
self._used_citations: set[str] = set() # URLs that are actually cited
def add_from_search_results(
self, results: List[Dict[str, Any]], query: str = ""
) -> List[CitationMetadata]:
"""
Add citations from search results.
Args:
results: List of search result dictionaries
query: The search query that produced these results
Returns:
List of CitationMetadata objects that were added
"""
added = []
for result in results:
# Skip image results
if result.get("type") == "image_url":
continue
url = result.get("url")
if not url:
continue
# Create or update citation metadata
metadata = CitationMetadata.from_search_result(result, query)
if url not in self._citations:
self._citations[url] = metadata
self._citation_order.append(url)
added.append(metadata)
logger.debug(f"Added citation: {metadata.title} ({url})")
else:
# Update with potentially better metadata
existing = self._citations[url]
if metadata.relevance_score > existing.relevance_score:
self._citations[url] = metadata
logger.debug(f"Updated citation: {metadata.title} ({url})")
return added
def add_from_crawl_result(
self, url: str, title: str, content: Optional[str] = None, **extra_metadata
) -> CitationMetadata:
"""
Add or update a citation from a crawled page.
Args:
url: The URL of the crawled page
title: The page title
content: The page content
**extra_metadata: Additional metadata fields
Returns:
The CitationMetadata object
"""
if url in self._citations:
# Update existing citation with crawled content
metadata = self._citations[url]
if title and title != "Untitled":
metadata.title = title
if content:
metadata.raw_content = content
if not metadata.content_snippet:
metadata.content_snippet = content[:500]
else:
# Create new citation
metadata = CitationMetadata(
url=url,
title=title or "Untitled",
content_snippet=content[:500] if content else None,
raw_content=content,
**extra_metadata,
)
self._citations[url] = metadata
self._citation_order.append(url)
return metadata
def mark_used(self, url: str) -> Optional[int]:
"""
Mark a citation as used and return its number.
Args:
url: The URL of the citation
Returns:
The citation number (1-indexed) or None if not found
"""
if url in self._citations:
self._used_citations.add(url)
return self.get_number(url)
return None
def get_number(self, url: str) -> Optional[int]:
"""
Get the citation number for a URL.
Args:
url: The URL to look up
Returns:
The citation number (1-indexed) or None if not found
"""
try:
return self._citation_order.index(url) + 1
except ValueError:
return None
def get_metadata(self, url: str) -> Optional[CitationMetadata]:
"""
Get the metadata for a URL.
Args:
url: The URL to look up
Returns:
The CitationMetadata or None if not found
"""
return self._citations.get(url)
def get_all_citations(self) -> List[Citation]:
"""
Get all collected citations in order.
Returns:
List of Citation objects
"""
citations = []
for i, url in enumerate(self._citation_order):
metadata = self._citations[url]
citations.append(
Citation(
number=i + 1,
metadata=metadata,
)
)
return citations
def get_used_citations(self) -> List[Citation]:
"""
Get only the citations that have been marked as used.
Returns:
List of Citation objects that are actually used
"""
citations = []
number = 1
for url in self._citation_order:
if url in self._used_citations:
metadata = self._citations[url]
citations.append(
Citation(
number=number,
metadata=metadata,
)
)
number += 1
return citations
def to_dict(self) -> Dict[str, Any]:
"""
Serialize the collector state to a dictionary.
Returns:
Dictionary representation of the collector
"""
return {
"citations": [c.to_dict() for c in self.get_all_citations()],
"used_urls": list(self._used_citations),
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "CitationCollector":
"""
Deserialize a collector from a dictionary.
Args:
data: Dictionary representation
Returns:
CitationCollector instance
"""
collector = cls()
for citation_data in data.get("citations", []):
citation = Citation.from_dict(citation_data)
collector._citations[citation.url] = citation.metadata
collector._citation_order.append(citation.url)
collector._used_citations = set(data.get("used_urls", []))
return collector
def merge_with(self, other: "CitationCollector") -> None:
"""
Merge another collector's citations into this one.
Args:
other: Another CitationCollector to merge
"""
for url in other._citation_order:
if url not in self._citations:
self._citations[url] = other._citations[url]
self._citation_order.append(url)
self._used_citations.update(other._used_citations)
@property
def count(self) -> int:
"""Return the total number of citations."""
return len(self._citations)
@property
def used_count(self) -> int:
"""Return the number of used citations."""
return len(self._used_citations)
def clear(self) -> None:
"""Clear all citations."""
self._citations.clear()
self._citation_order.clear()
self._used_citations.clear()
def extract_urls_from_text(text: str) -> List[str]:
"""
Extract URLs from markdown text.
Args:
text: Markdown text that may contain URLs
Returns:
List of URLs found in the text
"""
import re
urls = []
# Match markdown links: [text](url)
markdown_pattern = r"\[([^\]]+)\]\(([^)]+)\)"
for match in re.finditer(markdown_pattern, text):
url = match.group(2)
if url.startswith(("http://", "https://")):
urls.append(url)
# Match bare URLs
bare_url_pattern = r"(?<![\(\[])(https?://[^\s\)>\]]+)"
for match in re.finditer(bare_url_pattern, text):
url = match.group(1)
if url not in urls:
urls.append(url)
return urls

343
src/citations/extractor.py Normal file
View File

@@ -0,0 +1,343 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
"""
Citation extraction utilities for extracting citations from tool results.
"""
import json
import logging
from typing import Any, Dict, List, Optional
from langchain_core.messages import AIMessage, ToolMessage
from .models import CitationMetadata
logger = logging.getLogger(__name__)
def extract_citations_from_messages(messages: List[Any]) -> List[Dict[str, Any]]:
"""
Extract citation metadata from agent messages (tool calls/results).
Args:
messages: List of messages from agent execution
Returns:
List of citation dictionaries
"""
citations = []
seen_urls = set()
logger.info(f"[Citations] Starting extraction from {len(messages)} messages")
for message in messages:
# Extract from ToolMessage results (web_search, crawl)
if isinstance(message, ToolMessage):
logger.info(
f"[Citations] Found ToolMessage: name={getattr(message, 'name', 'unknown')}"
)
tool_citations = _extract_from_tool_message(message)
for citation in tool_citations:
url = citation.get("url", "")
if url and url not in seen_urls:
seen_urls.add(url)
citations.append(citation)
# Also check AIMessage tool_calls for any embedded results
if isinstance(message, AIMessage) and hasattr(message, "tool_calls"):
for tool_call in message.tool_calls or []:
if tool_call.get("name") == "web_search":
# The query is in the args
query = tool_call.get("args", {}).get("query", "")
logger.info(
"[Citations] Found web_search tool call with query=%r", query
)
# Note: results come in subsequent ToolMessage
logger.info(
f"[Citations] Extracted {len(citations)} unique citations from {len(messages)} messages"
)
return citations
def _extract_from_tool_message(message: ToolMessage) -> List[Dict[str, Any]]:
"""
Extract citations from a tool message result.
Args:
message: ToolMessage with tool execution result
Returns:
List of citation dictionaries
"""
citations = []
tool_name = getattr(message, "name", "") or ""
content = getattr(message, "content", "")
logger.info(
f"Processing tool message: tool_name='{tool_name}', content_len={len(str(content)) if content else 0}"
)
if not content:
return citations
# Parse JSON content
try:
if isinstance(content, str):
data = json.loads(content)
else:
data = content
except (json.JSONDecodeError, TypeError):
logger.debug(
f"Could not parse tool message content as JSON: {str(content)[:100]}..."
)
return citations
logger.debug(f"Parsed tool message data type: {type(data).__name__}")
# Try to detect content type by structure rather than just tool name
tool_name_lower = tool_name.lower() if tool_name else ""
# Handle web_search results (by name or by structure)
if tool_name_lower in (
"web_search",
"tavily_search",
"duckduckgo_search",
"brave_search",
"searx_search",
):
citations.extend(_extract_from_search_results(data))
logger.debug(
f"Extracted {len(citations)} citations from search tool '{tool_name}'"
)
# Handle crawl results (by name or by structure)
elif tool_name_lower in ("crawl_tool", "crawl", "jina_crawl"):
citation = _extract_from_crawl_result(data)
if citation:
citations.append(citation)
logger.debug(f"Extracted 1 citation from crawl tool '{tool_name}'")
# Fallback: Try to detect by data structure
else:
# Check if it looks like search results (list of items with url)
if isinstance(data, list) and len(data) > 0:
first_item = data[0]
if isinstance(first_item, dict) and "url" in first_item:
logger.debug(
f"Auto-detected search results format for tool '{tool_name}'"
)
citations.extend(_extract_from_search_results(data))
# Check if it looks like crawl result (dict with url and crawled_content)
elif (
isinstance(data, dict)
and "url" in data
and ("crawled_content" in data or "content" in data)
):
logger.debug(f"Auto-detected crawl result format for tool '{tool_name}'")
citation = _extract_from_crawl_result(data)
if citation:
citations.append(citation)
return citations
def _extract_from_search_results(data: Any) -> List[Dict[str, Any]]:
"""
Extract citations from web search results.
Args:
data: Parsed JSON data from search tool
Returns:
List of citation dictionaries
"""
citations = []
# Handle list of results
if isinstance(data, list):
for result in data:
if isinstance(result, dict) and result.get("type") != "image_url":
citation = _result_to_citation(result)
if citation:
citations.append(citation)
# Handle dict with results key
elif isinstance(data, dict):
if "error" in data:
logger.warning(f"Search error: {data.get('error')}")
return citations
results = data.get("results", [])
for result in results:
if isinstance(result, dict) and result.get("type") != "image_url":
citation = _result_to_citation(result)
if citation:
citations.append(citation)
return citations
def _result_to_citation(result: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""
Convert a search result to a citation dictionary.
Args:
result: Search result dictionary
Returns:
Citation dictionary or None
"""
url = result.get("url", "")
if not url:
return None
return {
"url": url,
"title": result.get("title", "Untitled"),
"description": result.get("content", ""),
"content_snippet": (result.get("content", "") or "")[:500],
"relevance_score": result.get("score", 0.0),
"domain": _extract_domain(url),
"accessed_at": None, # Will be filled by CitationMetadata
"source_type": "web_search",
}
def _extract_from_crawl_result(data: Any) -> Optional[Dict[str, Any]]:
"""
Extract citation from crawl tool result.
Args:
data: Parsed JSON data from crawl tool
Returns:
Citation dictionary or None
"""
if not isinstance(data, dict):
return None
url = data.get("url", "")
if not url:
return None
content = data.get("crawled_content", "")
# Try to extract title from content (first h1 or first line)
title = "Untitled"
if content:
lines = content.strip().split("\n")
for line in lines:
line = line.strip()
if line.startswith("# "):
title = line[2:].strip()
break
elif line and not line.startswith("#"):
title = line[:100]
break
return {
"url": url,
"title": title,
"description": content[:300] if content else "",
"content_snippet": content[:500] if content else "",
"raw_content": content,
"domain": _extract_domain(url),
"source_type": "crawl",
}
def _extract_domain(url: str) -> str:
"""Extract domain from URL."""
try:
from urllib.parse import urlparse
parsed = urlparse(url)
return parsed.netloc
except Exception:
return ""
def merge_citations(
existing: List[Dict[str, Any]], new: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
Merge new citations into existing list, avoiding duplicates.
Args:
existing: Existing citations list
new: New citations to add
Returns:
Merged list of citations
"""
seen_urls = {c.get("url") for c in existing if c.get("url")}
result = list(existing)
for citation in new:
url = citation.get("url", "")
if url and url not in seen_urls:
seen_urls.add(url)
result.append(citation)
elif url in seen_urls:
# Update existing citation with potentially better data
for i, existing_citation in enumerate(result):
if existing_citation.get("url") == url:
# Prefer higher relevance score
if citation.get("relevance_score", 0) > existing_citation.get(
"relevance_score", 0
):
# Update selectively instead of blindly merging all fields.
updated = existing_citation.copy()
# Always update relevance_score
if "relevance_score" in citation:
updated["relevance_score"] = citation["relevance_score"]
# Merge other metadata only if improved (here assuming non-empty is 'better')
for key in ("title", "description", "snippet"):
new_value = citation.get(key)
if new_value:
updated[key] = new_value
result[i] = updated
break
break
return result
def citations_to_markdown_references(citations: List[Dict[str, Any]]) -> str:
"""
Convert citations list to markdown references section.
Args:
citations: List of citation dictionaries
Returns:
Markdown formatted references section
"""
if not citations:
return ""
lines = ["## Key Citations", ""]
for i, citation in enumerate(citations, 1):
title = citation.get("title", "Untitled")
url = citation.get("url", "")
domain = citation.get("domain", "")
# Main reference link
lines.append(f"- [{title}]({url})")
# Add metadata as comment for parsing
metadata_parts = []
if domain:
metadata_parts.append(f"domain: {domain}")
if citation.get("relevance_score"):
metadata_parts.append(f"score: {citation['relevance_score']:.2f}")
if metadata_parts:
lines.append(f" <!-- {', '.join(metadata_parts)} -->")
lines.append("") # Empty line between citations
return "\n".join(lines)

271
src/citations/formatter.py Normal file
View File

@@ -0,0 +1,271 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
"""
Citation formatter for generating citation sections and inline references.
"""
import re
from typing import Dict, List, Tuple
from .models import Citation, CitationMetadata
class CitationFormatter:
"""
Formats citations for display in reports.
Supports multiple citation styles:
- numbered: [1], [2], etc.
- superscript: ¹, ², etc.
- footnote: [^1], [^2], etc.
- inline: (Author, Year) or (Source)
"""
SUPERSCRIPT_MAP = {
"0": "",
"1": "¹",
"2": "²",
"3": "³",
"4": "",
"5": "",
"6": "",
"7": "",
"8": "",
"9": "",
}
def __init__(self, style: str = "numbered"):
"""
Initialize the formatter.
Args:
style: Citation style ('numbered', 'superscript', 'footnote', 'inline')
"""
self.style = style
def format_inline_marker(self, number: int) -> str:
"""
Format an inline citation marker.
Args:
number: The citation number
Returns:
Formatted marker string
"""
if self.style == "superscript":
return "".join(self.SUPERSCRIPT_MAP.get(c, c) for c in str(number))
elif self.style == "footnote":
return f"[^{number}]"
else: # numbered
return f"[{number}]"
def format_reference(self, citation: Citation) -> str:
"""
Format a single reference for the citations section.
Args:
citation: The citation to format
Returns:
Formatted reference string
"""
metadata = citation.metadata
# Build reference with available metadata
parts = []
# Number and title
parts.append(f"[{citation.number}] **{metadata.title}**")
# Author if available
if metadata.author:
parts.append(f" *{metadata.author}*")
# Domain/source
if metadata.domain:
parts.append(f" Source: {metadata.domain}")
# Published date if available
if metadata.published_date:
parts.append(f" Published: {metadata.published_date}")
# URL
parts.append(f" URL: {metadata.url}")
# Description/snippet
if metadata.description:
snippet = metadata.description[:200]
if len(metadata.description) > 200:
snippet += "..."
parts.append(f" > {snippet}")
return "\n".join(parts)
def format_simple_reference(self, citation: Citation) -> str:
"""
Format a simple reference (title + URL).
Args:
citation: The citation to format
Returns:
Simple reference string
"""
return f"- [{citation.metadata.title}]({citation.metadata.url})"
def format_rich_reference(self, citation: Citation) -> str:
"""
Format a rich reference with metadata as JSON-like annotation.
Args:
citation: The citation to format
Returns:
Rich reference string with metadata
"""
metadata = citation.metadata
parts = [f"- [{metadata.title}]({metadata.url})"]
annotations = []
if metadata.domain:
annotations.append(f"domain: {metadata.domain}")
if metadata.relevance_score > 0:
annotations.append(f"relevance: {metadata.relevance_score:.2f}")
if metadata.accessed_at:
annotations.append(f"accessed: {metadata.accessed_at[:10]}")
if annotations:
parts.append(f" <!-- {', '.join(annotations)} -->")
return "\n".join(parts)
def format_citations_section(
self, citations: List[Citation], include_metadata: bool = True
) -> str:
"""
Format the full citations section for a report.
Args:
citations: List of citations to include
include_metadata: Whether to include rich metadata
Returns:
Formatted citations section markdown
"""
if not citations:
return ""
lines = ["## Key Citations", ""]
for citation in citations:
if include_metadata:
lines.append(self.format_rich_reference(citation))
else:
lines.append(self.format_simple_reference(citation))
lines.append("") # Empty line between citations
return "\n".join(lines)
def format_footnotes_section(self, citations: List[Citation]) -> str:
"""
Format citations as footnotes (for footnote style).
Args:
citations: List of citations
Returns:
Footnotes section markdown
"""
if not citations:
return ""
lines = ["", "---", ""]
for citation in citations:
lines.append(
f"[^{citation.number}]: {citation.metadata.title} - {citation.metadata.url}"
)
return "\n".join(lines)
def add_citation_markers_to_text(
self, text: str, citations: List[Citation], url_to_number: Dict[str, int]
) -> str:
"""
Add citation markers to text where URLs are referenced.
Args:
text: The text to process
citations: Available citations
url_to_number: Mapping from URL to citation number
Returns:
Text with citation markers added
"""
# Find all markdown links and add citation numbers
def replace_link(match):
full_match = match.group(0)
url = match.group(2)
if url in url_to_number:
number = url_to_number[url]
marker = self.format_inline_marker(number)
return f"{full_match}{marker}"
return full_match
pattern = r"\[([^\]]+)\]\(([^)]+)\)"
return re.sub(pattern, replace_link, text)
@staticmethod
def build_citation_data_json(citations: List[Citation]) -> str:
"""
Build a JSON block containing citation data for frontend use.
Args:
citations: List of citations
Returns:
JSON string with citation data
"""
import json
data = {
"citations": [c.to_dict() for c in citations],
"count": len(citations),
}
return json.dumps(data, ensure_ascii=False)
def parse_citations_from_report(report: str) -> List[Tuple[str, str]]:
"""
Parse citation links from a report's Key Citations section.
Args:
report: The report markdown text
Returns:
List of (title, url) tuples
"""
citations = []
# Find the Key Citations section
section_pattern = (
r"(?:##\s*Key Citations|##\s*References|##\s*Sources)\s*\n(.*?)(?=\n##|\Z)"
)
section_match = re.search(section_pattern, report, re.IGNORECASE | re.DOTALL)
if section_match:
section = section_match.group(1)
# Extract markdown links
link_pattern = r"\[([^\]]+)\]\(([^)]+)\)"
for match in re.finditer(link_pattern, section):
title = match.group(1)
url = match.group(2)
if url.startswith(("http://", "https://")):
citations.append((title, url))
return citations

178
src/citations/models.py Normal file
View File

@@ -0,0 +1,178 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
"""
Citation data models for structured source metadata.
"""
import hashlib
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
@dataclass
class CitationMetadata:
"""Metadata extracted from a source."""
# Core identifiers
url: str
title: str
# Content information
description: Optional[str] = None
content_snippet: Optional[str] = None
raw_content: Optional[str] = None
# Source metadata
domain: Optional[str] = None
author: Optional[str] = None
published_date: Optional[str] = None
language: Optional[str] = None
# Media
images: List[str] = field(default_factory=list)
favicon: Optional[str] = None
# Quality indicators
relevance_score: float = 0.0
credibility_score: float = 0.0
# Timestamps
accessed_at: str = field(default_factory=lambda: datetime.now().isoformat())
# Additional metadata
extra: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Extract domain from URL if not provided."""
if not self.domain and self.url:
try:
parsed = urlparse(self.url)
self.domain = parsed.netloc
except Exception:
# If URL parsing fails for any reason, leave `domain` as None.
# This is a non-critical convenience field and failures here
# should not prevent citation metadata creation.
pass
@property
def id(self) -> str:
"""Generate a unique ID for this citation based on URL."""
return hashlib.sha256(self.url.encode("utf-8")).hexdigest()[:12]
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"id": self.id,
"url": self.url,
"title": self.title,
"description": self.description,
"content_snippet": self.content_snippet,
"domain": self.domain,
"author": self.author,
"published_date": self.published_date,
"language": self.language,
"images": self.images,
"favicon": self.favicon,
"relevance_score": self.relevance_score,
"credibility_score": self.credibility_score,
"accessed_at": self.accessed_at,
"extra": self.extra,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "CitationMetadata":
"""Create from dictionary."""
# Remove 'id' as it's computed from url
data = {k: v for k, v in data.items() if k != "id"}
return cls(**data)
@classmethod
def from_search_result(
cls, result: Dict[str, Any], query: str = ""
) -> "CitationMetadata":
"""Create citation metadata from a search result."""
return cls(
url=result.get("url", ""),
title=result.get("title", "Untitled"),
description=result.get("content", result.get("description", "")),
content_snippet=result.get("content", "")[:500]
if result.get("content")
else None,
raw_content=result.get("raw_content"),
relevance_score=result.get("score", 0.0),
extra={"query": query, "result_type": result.get("type", "page")},
)
@dataclass
class Citation:
"""
A citation reference that can be used in reports.
This represents a numbered citation that links to source metadata.
"""
# Citation number (1-indexed for display)
number: int
# Reference to the source metadata
metadata: CitationMetadata
# Context where this citation is used
context: Optional[str] = None
# Specific quote or fact being cited
cited_text: Optional[str] = None
@property
def id(self) -> str:
"""Get the citation ID from metadata."""
return self.metadata.id
@property
def url(self) -> str:
"""Get the URL from metadata."""
return self.metadata.url
@property
def title(self) -> str:
"""Get the title from metadata."""
return self.metadata.title
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"number": self.number,
"metadata": self.metadata.to_dict(),
"context": self.context,
"cited_text": self.cited_text,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Citation":
"""Create from dictionary."""
return cls(
number=data["number"],
metadata=CitationMetadata.from_dict(data["metadata"]),
context=data.get("context"),
cited_text=data.get("cited_text"),
)
def to_markdown_reference(self) -> str:
"""Generate markdown reference format: [Title](URL)"""
return f"[{self.title}]({self.url})"
def to_numbered_reference(self) -> str:
"""Generate numbered reference format: [1] Title - URL"""
return f"[{self.number}] {self.title} - {self.url}"
def to_inline_marker(self) -> str:
"""Generate inline citation marker: [^1]"""
return f"[^{self.number}]"
def to_footnote(self) -> str:
"""Generate footnote definition: [^1]: Title - URL"""
return f"[^{self.number}]: {self.title} - {self.url}"

View File

@@ -14,6 +14,7 @@ from langchain_mcp_adapters.client import MultiServerMCPClient
from langgraph.types import Command, interrupt
from src.agents import create_agent
from src.citations import extract_citations_from_messages, merge_citations
from src.config.agents import AGENT_LLM_MAP
from src.config.configuration import Configuration
from src.llms.llm import get_llm_by_type, get_llm_token_limit_by_type
@@ -715,6 +716,7 @@ def coordinator_node(
"clarified_research_topic": clarified_topic,
"is_clarification_complete": False,
"goto": goto,
"citations": state.get("citations", []),
"__interrupt__": [("coordinator", response.content)],
},
goto=goto,
@@ -802,6 +804,7 @@ def coordinator_node(
"clarification_history": clarification_history,
"is_clarification_complete": goto != "coordinator",
"goto": goto,
"citations": state.get("citations", []),
},
goto=goto,
)
@@ -822,14 +825,32 @@ def reporter_node(state: State, config: RunnableConfig):
}
invoke_messages = apply_prompt_template("reporter", input_, configurable, input_.get("locale", "en-US"))
observations = state.get("observations", [])
# Get collected citations for the report
citations = state.get("citations", [])
# Add a reminder about the new report format, citation style, and table usage
invoke_messages.append(
HumanMessage(
content="IMPORTANT: Structure your report according to the format in the prompt. Remember to include:\n\n1. Key Points - A bulleted list of the most important findings\n2. Overview - A brief introduction to the topic\n3. Detailed Analysis - Organized into logical sections\n4. Survey Note (optional) - For more comprehensive reports\n5. Key Citations - List all references at the end\n\nFor citations, DO NOT include inline citations in the text. Instead, place all citations in the 'Key Citations' section at the end using the format: `- [Source Title](URL)`. Include an empty line between each citation for better readability.\n\nPRIORITIZE USING MARKDOWN TABLES for data presentation and comparison. Use tables whenever presenting comparative data, statistics, features, or options. Structure tables with clear headers and aligned columns. Example table format:\n\n| Feature | Description | Pros | Cons |\n|---------|-------------|------|------|\n| Feature 1 | Description 1 | Pros 1 | Cons 1 |\n| Feature 2 | Description 2 | Pros 2 | Cons 2 |",
name="system",
# If we have collected citations, provide them to the reporter
if citations:
citation_list = "\n\n## Available Source References (use these in References section):\n\n"
for i, citation in enumerate(citations, 1):
title = citation.get("title", "Untitled")
url = citation.get("url", "")
domain = citation.get("domain", "")
description = citation.get("description", "")
desc_truncated = description[:150] if description else ""
citation_list += f"{i}. **{title}**\n - URL: {url}\n - Domain: {domain}\n"
if desc_truncated:
citation_list += f" - Summary: {desc_truncated}...\n"
citation_list += "\n"
logger.info(f"Providing {len(citations)} collected citations to reporter")
invoke_messages.append(
HumanMessage(
content=citation_list,
name="system",
)
)
)
observation_messages = []
for observation in observations:
@@ -852,7 +873,10 @@ def reporter_node(state: State, config: RunnableConfig):
response_content = response.content
logger.info(f"reporter response: {response_content}")
return {"final_report": response_content}
return {
"final_report": response_content,
"citations": citations, # Pass citations through to final state
}
def research_team_node(state: State):
@@ -1114,11 +1138,23 @@ async def _execute_agent_step(
f"All tool results will be preserved and streamed to frontend."
)
# Extract citations from tool call results (web_search, crawl)
existing_citations = state.get("citations", [])
new_citations = extract_citations_from_messages(agent_messages)
merged_citations = merge_citations(existing_citations, new_citations)
if new_citations:
logger.info(
f"Extracted {len(new_citations)} new citations from {agent_name} agent. "
f"Total citations: {len(merged_citations)}"
)
return Command(
update={
**preserve_state_meta_fields(state),
"messages": agent_messages,
"observations": observations + [response_content + validation_info],
**preserve_state_meta_fields(state),
"citations": merged_citations, # Store merged citations based on existing state and new tool results
},
goto="research_team",
)

View File

@@ -3,6 +3,7 @@
from dataclasses import field
from typing import Any
from langgraph.graph import MessagesState
@@ -27,6 +28,10 @@ class State(MessagesState):
auto_accepted_plan: bool = False
enable_background_investigation: bool = True
background_investigation_results: str = None
# Citation metadata collected during research
# Format: List of citation dictionaries with url, title, description, etc.
citations: list[dict[str, Any]] = field(default_factory=list)
# Clarification state tracking (disabled by default)
enable_clarification: bool = (

View File

@@ -372,9 +372,12 @@ Structure your report in the following format:
- If uncertain about any information, acknowledge the uncertainty.
- Only include verifiable facts from the provided source material.
- Place all citations in the "Key Citations" section at the end, not inline in the text.
- For each citation, use the format: `- [Source Title](URL)`
- Include an empty line between each citation for better readability.
- Structure your report to include: Key Points, Overview, Detailed Analysis, Survey Note (optional), and References.
- Use inline citations [n] in the text where appropriate.
- The number n must correspond to the source index in the provided 'Available Source References' list.
- Make the inline citation a link to the reference at the bottom using the format `[[n]](#ref-n)`.
- In the References section at the end, list the sources using the format `[[n]](#citation-target-n) **[Title](URL)**`.
- PRIORITIZE USING MARKDOWN TABLES for data presentation and comparison. Use tables whenever presenting comparative data, statistics, features, or options.
- Include images using `![Image Description](image_url)`. The images should be in the middle of the report, not at the end or separate section.
- The included images should **only** be from the information gathered **from the previous steps**. **Never** include images that are not from the previous steps
- Directly output the Markdown raw content without "```markdown" or "```".

View File

@@ -370,9 +370,12 @@ CURRENT_TIME: {{ CURRENT_TIME }}
- 如果对任何信息不确定,确认不确定性。
- 仅包括来自提供的源资料的可验证事实。
- 将所有引用放在末尾的"关键引文"部分,而不是文本中的内联
- 对于每个引用,使用格式:`- [来源标题](URL)`
- 在每个引文之间包括一个空行以获得更好的可读性
- 报告结构应包含:核心要点、概述、详细分析、调查说明(可选)和参考文献
- 在正文适当位置使用内联引用 [n]。
- 数字 n 必须对应提供的"可用来源参考"列表中的索引
- 将内联引用设为指向底部参考文献的链接,格式为 `[[n]](#ref-n)`
- 在末尾的参考文献部分,使用格式 `[[n]](#citation-target-n) **[标题](URL)**` 列出来源。
- 优先使用 Markdown 表格进行数据展示和比较。在展示对比数据、统计数据、特性或选项时,请务必使用表格。
- 使用`![图像说明](图像URL)`包括图像。图像应该在报告的中间,而不是末尾或单独的部分。
- 包含的图像应**仅**来自**从之前步骤中**收集的信息。**绝不**包括不来自之前步骤的图像
- 直接输出Markdown原始内容不带"```markdown"或"```"。

View File

@@ -37,6 +37,7 @@ from src.config.configuration import get_recursion_limit
from src.config.loader import get_bool_env, get_int_env, get_str_env
from src.config.report_style import ReportStyle
from src.config.tools import SELECTED_RAG_PROVIDER
from src.citations import merge_citations
from src.graph.builder import build_graph_with_memory
from src.graph.checkpoint import chat_stream_message
from src.graph.utils import (
@@ -584,14 +585,69 @@ async def _process_message_chunk(message_chunk, message_metadata, thread_id, age
yield _make_event("message_chunk", event_stream_message)
def extract_citations_from_event(event: Any, safe_thread_id: str = "unknown") -> list:
"""Extract all citations from event data using an iterative, depth-limited traversal."""
# Only dict-based event structures are supported
if not isinstance(event, dict):
return []
from collections import deque
citations: list[Any] = []
max_depth = 5 # Prevent excessively deep traversal
max_nodes = 5000 # Safety cap to avoid pathological large structures
# Queue holds (node_dict, depth) for BFS traversal
queue: deque[tuple[dict[str, Any], int]] = deque([(event, 0)])
nodes_visited = 0
while queue:
current, depth = queue.popleft()
nodes_visited += 1
if nodes_visited > max_nodes:
logger.warning(
f"[{safe_thread_id}] Stopping citation extraction after visiting "
f"{nodes_visited} nodes to avoid performance issues"
)
break
# Direct citations field at this level
direct_citations = current.get("citations")
if isinstance(direct_citations, list) and direct_citations:
logger.debug(
f"[{safe_thread_id}] Found {len(direct_citations)} citations at depth {depth}"
)
citations.extend(direct_citations)
# Do not traverse deeper than max_depth
if depth >= max_depth:
continue
# Check nested values (for updates mode)
for value in current.values():
if isinstance(value, dict):
queue.append((value, depth + 1))
# Also check if the value is a list of dicts (like Command updates)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
queue.append((item, depth + 1))
return citations
async def _stream_graph_events(
graph_instance, workflow_input, workflow_config, thread_id
):
"""Stream events from the graph and process them."""
safe_thread_id = sanitize_thread_id(thread_id)
logger.debug(f"[{safe_thread_id}] Starting graph event stream with agent nodes")
# Track citations collected during research
collected_citations = []
try:
event_count = 0
last_state_update = None # Track the last state update to get final citations
async for agent, _, event_data in graph_instance.astream(
workflow_input,
config=workflow_config,
@@ -603,6 +659,24 @@ async def _stream_graph_events(
logger.debug(f"[{safe_thread_id}] Graph event #{event_count} received from agent: {safe_agent}")
if isinstance(event_data, dict):
# Store the last state update for final citation extraction
last_state_update = event_data
# Log event keys for debugging (more verbose for citations debugging)
event_keys = list(event_data.keys())
# Check for citations in state updates (may be nested)
new_citations = extract_citations_from_event(event_data, safe_thread_id)
if new_citations:
# Accumulate citations across events instead of overwriting
# using merge_citations to avoid duplicates and preserve better metadata
collected_citations = merge_citations(collected_citations, new_citations)
# Key difference: replace string heuristic with actual extraction count for logging
logger.info(
f"[{safe_thread_id}] Event contains citations, "
f"keys: {event_keys}, count: {len(new_citations)}, total: {len(collected_citations)}"
)
if "__interrupt__" in event_data:
logger.debug(
f"[{safe_thread_id}] Processing interrupt event: "
@@ -631,6 +705,40 @@ async def _stream_graph_events(
):
yield event
# After streaming completes, try to get citations
# First check if we collected any during streaming
if not collected_citations and last_state_update:
# Try to get citations from the last state update
logger.debug(f"[{safe_thread_id}] No citations collected during streaming, checking last state update")
collected_citations = extract_citations_from_event(last_state_update, safe_thread_id)
# If still no citations, try to get from graph state directly
if not collected_citations:
try:
# Get the current state from the graph using proper config
state_config = {"configurable": {"thread_id": thread_id}}
current_state = await graph_instance.aget_state(state_config)
if current_state and hasattr(current_state, 'values'):
state_values = current_state.values
if isinstance(state_values, dict) and 'citations' in state_values:
collected_citations = state_values.get('citations', [])
logger.info(f"[{safe_thread_id}] Retrieved {len(collected_citations)} citations from final graph state")
except Exception as e:
logger.warning(
f"[{safe_thread_id}] Could not retrieve citations from graph state: {e}",
exc_info=True,
)
# Send collected citations as a separate event
if collected_citations:
logger.info(f"[{safe_thread_id}] Sending {len(collected_citations)} citations to client")
yield _make_event("citations", {
"thread_id": thread_id,
"citations": collected_citations,
})
else:
logger.debug(f"[{safe_thread_id}] No citations to send")
logger.debug(f"[{safe_thread_id}] Graph event stream completed. Total events: {event_count}")
except asyncio.CancelledError:
# User cancelled/interrupted the stream - this is normal, not an error

View File

@@ -0,0 +1,136 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
from langchain_core.messages import ToolMessage
from src.citations.collector import CitationCollector
from src.citations.extractor import (
_extract_domain,
citations_to_markdown_references,
extract_citations_from_messages,
merge_citations,
)
from src.citations.formatter import CitationFormatter
from src.citations.models import Citation, CitationMetadata
class TestCitationMetadata:
def test_initialization(self):
meta = CitationMetadata(
url="https://example.com/page",
title="Example Page",
description="An example description",
)
assert meta.url == "https://example.com/page"
assert meta.title == "Example Page"
assert meta.description == "An example description"
assert meta.domain == "example.com" # Auto-extracted in post_init
def test_id_generation(self):
meta = CitationMetadata(url="https://example.com", title="Test")
# Just check it's a non-empty string, length 12
assert len(meta.id) == 12
assert isinstance(meta.id, str)
def test_to_dict(self):
meta = CitationMetadata(
url="https://example.com", title="Test", relevance_score=0.8
)
data = meta.to_dict()
assert data["url"] == "https://example.com"
assert data["title"] == "Test"
assert data["relevance_score"] == 0.8
assert "id" in data
class TestCitation:
def test_citation_wrapper(self):
meta = CitationMetadata(url="https://example.com", title="Test")
citation = Citation(number=1, metadata=meta)
assert citation.number == 1
assert citation.url == "https://example.com"
assert citation.title == "Test"
assert citation.to_markdown_reference() == "[Test](https://example.com)"
assert citation.to_numbered_reference() == "[1] Test - https://example.com"
class TestExtractor:
def test_extract_from_tool_message_web_search(self):
search_result = {
"results": [
{
"url": "https://example.com/1",
"title": "Result 1",
"content": "Content 1",
"score": 0.9,
}
]
}
msg = ToolMessage(
content=str(search_result).replace("'", '"'), # Simple JSON dump simulation
tool_call_id="call_1",
name="web_search",
)
# Mocking json structure if ToolMessage content expects stringified JSON
import json
msg.content = json.dumps(search_result)
citations = extract_citations_from_messages([msg])
assert len(citations) == 1
assert citations[0]["url"] == "https://example.com/1"
assert citations[0]["title"] == "Result 1"
def test_extract_domain(self):
assert _extract_domain("https://www.example.com/path") == "www.example.com"
assert _extract_domain("http://example.org") == "example.org"
def test_merge_citations(self):
existing = [{"url": "https://a.com", "title": "A", "relevance_score": 0.5}]
new = [
{"url": "https://b.com", "title": "B", "relevance_score": 0.6},
{
"url": "https://a.com",
"title": "A New",
"relevance_score": 0.7,
}, # Better score for A
]
merged = merge_citations(existing, new)
assert len(merged) == 2
# Check A was updated
a_citation = next(c for c in merged if c["url"] == "https://a.com")
assert a_citation["relevance_score"] == 0.7
# Check B is present
b_citation = next(c for c in merged if c["url"] == "https://b.com")
assert b_citation["title"] == "B"
def test_citations_to_markdown(self):
citations = [{"url": "https://a.com", "title": "A", "description": "Desc A"}]
md = citations_to_markdown_references(citations)
assert "## Key Citations" in md
assert "- [A](https://a.com)" in md
class TestCollector:
def test_add_citations(self):
collector = CitationCollector()
results = [
{"url": "https://example.com", "title": "Example", "content": "Test"}
]
added = collector.add_from_search_results(results, query="test")
assert len(added) == 1
assert added[0].url == "https://example.com"
assert collector.count == 1
class TestFormatter:
def test_format_inline(self):
formatter = CitationFormatter(style="superscript")
assert formatter.format_inline_marker(1) == "¹"
assert formatter.format_inline_marker(12) == "¹²"

View File

@@ -135,17 +135,18 @@ def test_from_runnable_config_with_boolean_true_values():
assert config.enable_deep_thinking is True
assert config.enforce_web_search is True
def test_get_recursion_limit_default():
def test_get_recursion_limit_default(monkeypatch):
from src.config.configuration import get_recursion_limit
monkeypatch.delenv("AGENT_RECURSION_LIMIT", raising=False)
result = get_recursion_limit()
assert result == 25
def test_get_recursion_limit_custom_default():
def test_get_recursion_limit_custom_default(monkeypatch):
from src.config.configuration import get_recursion_limit
monkeypatch.delenv("AGENT_RECURSION_LIMIT", raising=False)
result = get_recursion_limit(50)
assert result == 50

View File

@@ -27,6 +27,7 @@
"@radix-ui/react-collapsible": "^1.1.8",
"@radix-ui/react-dialog": "^1.1.10",
"@radix-ui/react-dropdown-menu": "^2.1.11",
"@radix-ui/react-hover-card": "^1.1.6",
"@radix-ui/react-icons": "^1.3.2",
"@radix-ui/react-label": "^2.1.4",
"@radix-ui/react-popover": "^1.1.11",

201
web/pnpm-lock.yaml generated
View File

@@ -29,6 +29,9 @@ importers:
'@radix-ui/react-dropdown-menu':
specifier: ^2.1.11
version: 2.1.11(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-hover-card':
specifier: ^1.1.6
version: 1.1.15(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-icons':
specifier: ^1.3.2
version: 1.3.2(react@19.1.0)
@@ -1030,6 +1033,9 @@ packages:
'@radix-ui/primitive@1.1.2':
resolution: {integrity: sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA==}
'@radix-ui/primitive@1.1.3':
resolution: {integrity: sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==}
'@radix-ui/react-accordion@1.2.8':
resolution: {integrity: sha512-c7OKBvO36PfQIUGIjj1Wko0hH937pYFU2tR5zbIJDUsmTzHoZVHHt4bmb7OOJbzTaWJtVELKWojBHa7OcnUHmQ==}
peerDependencies:
@@ -1069,6 +1075,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-arrow@1.1.7':
resolution: {integrity: sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-checkbox@1.2.3':
resolution: {integrity: sha512-pHVzDYsnaDmBlAuwim45y3soIN8H4R7KbkSVirGhXO+R/kO2OLCe0eucUEbddaTcdMHHdzcIGHtZSMSQlA+apw==}
peerDependencies:
@@ -1161,6 +1180,19 @@ packages:
'@types/react':
optional: true
'@radix-ui/react-dismissable-layer@1.1.11':
resolution: {integrity: sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-dismissable-layer@1.1.6':
resolution: {integrity: sha512-7gpgMT2gyKym9Jz2ZhlRXSg2y6cNQIK8d/cqBZ0RBCaps8pFryCWXiUKI+uHGFrhMrbGUP7U6PWgiXzIxoyF3Q==}
peerDependencies:
@@ -1222,6 +1254,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-hover-card@1.1.15':
resolution: {integrity: sha512-qgTkjNT1CfKMoP0rcasmlH2r1DAiYicWsDsufxl940sT2wHNEWWv6FMWIQXWhVdmC1d/HYfbhQx60KYyAtKxjg==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-icons@1.3.2':
resolution: {integrity: sha512-fyQIhGDhzfc9pK2kH6Pl9c4BDJGfMkPqkyIgYDthyNYoNg3wVhoJMMh19WS4Up/1KMPFVpNsT2q3WmXn2N1m6g==}
peerDependencies:
@@ -1301,6 +1346,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-popper@1.2.8':
resolution: {integrity: sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-portal@1.1.5':
resolution: {integrity: sha512-ps/67ZqsFm+Mb6lSPJpfhRLrVL2i2fntgCmGMqqth4eaGUf+knAuuRtWVJrNjUhExgmdRqftSgzpf0DF0n6yXA==}
peerDependencies:
@@ -1327,6 +1385,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-portal@1.1.9':
resolution: {integrity: sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-presence@1.1.3':
resolution: {integrity: sha512-IrVLIhskYhH3nLvtcBLQFZr61tBG7wx7O3kEmdzcYwRGAEBmBicGGL7ATzNgruYJ3xBTbuzEEq9OXJM3PAX3tA==}
peerDependencies:
@@ -1353,6 +1424,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-presence@1.1.5':
resolution: {integrity: sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-primitive@2.0.3':
resolution: {integrity: sha512-Pf/t/GkndH7CQ8wE2hbkXA+WyZ83fhQQn5DDmwDiDo6AwN/fhaH8oqZ0jRjMrO2iaMhDi6P1HRx6AZwyMinY1g==}
peerDependencies:
@@ -1379,6 +1463,19 @@ packages:
'@types/react-dom':
optional: true
'@radix-ui/react-primitive@2.1.3':
resolution: {integrity: sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==}
peerDependencies:
'@types/react': '*'
'@types/react-dom': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@types/react-dom':
optional: true
'@radix-ui/react-roving-focus@1.1.3':
resolution: {integrity: sha512-ufbpLUjZiOg4iYgb2hQrWXEPYX6jOLBbR27bDyAff5GYMRrCzcze8lukjuXVUQvJ6HZe8+oL+hhswDcjmcgVyg==}
peerDependencies:
@@ -1466,6 +1563,15 @@ packages:
'@types/react':
optional: true
'@radix-ui/react-slot@1.2.3':
resolution: {integrity: sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==}
peerDependencies:
'@types/react': '*'
react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
peerDependenciesMeta:
'@types/react':
optional: true
'@radix-ui/react-switch@1.2.2':
resolution: {integrity: sha512-7Z8n6L+ifMIIYZ83f28qWSceUpkXuslI2FJ34+kDMTiyj91ENdpdQ7VCidrzj5JfwfZTeano/BnGBbu/jqa5rQ==}
peerDependencies:
@@ -6616,6 +6722,8 @@ snapshots:
'@radix-ui/primitive@1.1.2': {}
'@radix-ui/primitive@1.1.3': {}
'@radix-ui/react-accordion@1.2.8(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.2
@@ -6651,6 +6759,15 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-arrow@1.1.7(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-checkbox@1.2.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.2
@@ -6747,6 +6864,19 @@ snapshots:
optionalDependencies:
'@types/react': 19.1.2
'@radix-ui/react-dismissable-layer@1.1.11(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.3
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-use-escape-keydown': 1.1.1(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-dismissable-layer@1.1.6(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.2
@@ -6805,6 +6935,23 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-hover-card@1.1.15(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.3
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-context': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-dismissable-layer': 1.1.11(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-popper': 1.2.8(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-portal': 1.1.9(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-presence': 1.1.5(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-icons@1.3.2(react@19.1.0)':
dependencies:
react: 19.1.0
@@ -6910,6 +7057,24 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-popper@1.2.8(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@floating-ui/react-dom': 2.1.2(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-arrow': 1.1.7(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-context': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-use-rect': 1.1.1(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-use-size': 1.1.1(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/rect': 1.1.1
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-portal@1.1.5(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-primitive': 2.0.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
@@ -6930,6 +7095,16 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-portal@1.1.9(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-presence@1.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
@@ -6950,6 +7125,16 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-presence@1.1.5(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
'@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-primitive@2.0.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-slot': 1.2.0(@types/react@19.1.2)(react@19.1.0)
@@ -6968,6 +7153,15 @@ snapshots:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-primitive@2.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/react-slot': 1.2.3(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
react-dom: 19.1.0(react@19.1.0)
optionalDependencies:
'@types/react': 19.1.2
'@types/react-dom': 19.1.1(@types/react@19.1.2)
'@radix-ui/react-roving-focus@1.1.3(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.2
@@ -7083,6 +7277,13 @@ snapshots:
optionalDependencies:
'@types/react': 19.1.2
'@radix-ui/react-slot@1.2.3(@types/react@19.1.2)(react@19.1.0)':
dependencies:
'@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.2)(react@19.1.0)
react: 19.1.0
optionalDependencies:
'@types/react': 19.1.2
'@radix-ui/react-switch@1.2.2(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@radix-ui/primitive': 1.1.2

View File

@@ -7,11 +7,12 @@ import { LoadingAnimation } from "~/components/deer-flow/loading-animation";
import { Markdown } from "~/components/deer-flow/markdown";
import ReportEditor from "~/components/editor";
import { useReplay } from "~/core/replay";
import { useMessage, useStore } from "~/core/store";
import { useCitations, useMessage, useStore } from "~/core/store";
import { cn } from "~/lib/utils";
export function ResearchReportBlock({
className,
researchId,
messageId,
editing,
}: {
@@ -21,6 +22,7 @@ export function ResearchReportBlock({
editing: boolean;
}) {
const message = useMessage(messageId);
const citations = useCitations(researchId);
const { isReplay } = useReplay();
const handleMarkdownChange = useCallback(
(markdown: string) => {
@@ -61,7 +63,7 @@ export function ResearchReportBlock({
/>
) : (
<>
<Markdown animated checkLinkCredibility>
<Markdown animated checkLinkCredibility citations={citations}>
{message?.content}
</Markdown>
{message?.isStreaming && <LoadingAnimation className="my-12" />}

View File

@@ -0,0 +1,308 @@
// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
// SPDX-License-Identifier: MIT
import { ExternalLink, Globe, Clock, Star } from "lucide-react";
import { useMemo } from "react";
import {
HoverCard,
HoverCardContent,
HoverCardTrigger,
} from "~/components/ui/hover-card";
import { cn } from "~/lib/utils";
import type { Citation } from "~/core/messages";
// Re-export Citation type as CitationData for backward compatibility
export type CitationData = Citation;
interface CitationLinkProps {
href: string;
children: React.ReactNode;
citations: CitationData[];
className?: string;
id?: string;
}
/**
* Enhanced link component that shows citation metadata on hover.
* Used within markdown content to provide rich citation information.
*/
export function CitationLink({
href,
children,
citations,
className,
id,
}: CitationLinkProps) {
// Find matching citation data for this URL
const { citation, index } = useMemo(() => {
if (!href || !citations) return { citation: null, index: -1 };
// Try exact match first
let matchIndex = citations.findIndex((c) => c.url === href);
// If not found, try versatile comparison using normalized URLs
if (matchIndex === -1) {
const normalizeUrl = (url: string) => {
try {
return decodeURIComponent(url).trim();
} catch {
return url.trim();
}
};
const normalizedHref = normalizeUrl(href);
matchIndex = citations.findIndex(
(c) => normalizeUrl(c.url) === normalizedHref
);
}
const match = matchIndex !== -1 ? citations[matchIndex] : null;
return { citation: match, index: matchIndex };
}, [href, citations]);
// If no citation data found, render as regular link
if (!citation) {
return (
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className={cn("text-primary hover:underline", className)}
>
{children}
</a>
);
}
const handleCitationClick = (e: React.MouseEvent) => {
// If it's an internal-looking citation (e.g. [1])
// or if the user clicks the citation number in the text
// we try to scroll to the reference list at the bottom
if (index !== -1) {
const targetId = `ref-${index + 1}`;
const element = document.getElementById(targetId);
if (element) {
e.preventDefault();
element.scrollIntoView({ behavior: "smooth", block: "start" });
}
}
// If element not found or index is -1, let the default behavior (open URL) happen
};
return (
<HoverCard openDelay={200} closeDelay={100}>
<HoverCardTrigger asChild>
<a
id={id}
href={href}
target="_blank"
rel="noopener noreferrer"
onClick={handleCitationClick}
className={cn(
"text-primary hover:underline inline-flex items-center gap-0.5 cursor-pointer scroll-mt-20",
className
)}
>
{children}
<span className="text-xs text-muted-foreground ml-0.5">
<ExternalLink className="h-3 w-3 inline" />
</span>
</a>
</HoverCardTrigger>
<HoverCardContent
className="w-80 p-4"
side="top"
align="start"
sideOffset={8}
>
<CitationCard citation={citation} />
</HoverCardContent>
</HoverCard>
);
}
interface CitationCardProps {
citation: CitationData;
compact?: boolean;
}
/**
* Card component displaying citation metadata.
*/
export function CitationCard({ citation, compact = false }: CitationCardProps) {
const {
title,
url,
description,
domain,
relevance_score,
accessed_at,
source_type,
} = citation;
// Format access date
const formattedDate = useMemo(() => {
if (!accessed_at) return null;
try {
const date = new Date(accessed_at);
return date.toLocaleDateString(undefined, {
year: "numeric",
month: "short",
day: "numeric",
});
} catch {
return accessed_at.slice(0, 10);
}
}, [accessed_at]);
// Format relevance score as percentage
const relevancePercent = useMemo(() => {
if (relevance_score == null || relevance_score <= 0) return null;
return Math.round(relevance_score * 100);
}, [relevance_score]);
return (
<span className={cn("block space-y-2", compact && "space-y-1")}>
{/* Title */}
<span className="block font-semibold text-sm line-clamp-2 leading-snug">
{title}
</span>
{/* Domain and metadata row */}
<span className="flex items-center gap-3 text-xs text-muted-foreground">
{domain && (
<span className="flex items-center gap-1">
<Globe className="h-3 w-3" />
{domain}
</span>
)}
{formattedDate && (
<span className="flex items-center gap-1">
<Clock className="h-3 w-3" />
{formattedDate}
</span>
)}
{relevancePercent != null && (
<span className="flex items-center gap-1">
<Star className="h-3 w-3" />
{relevancePercent}% match
</span>
)}
</span>
{/* Description/snippet */}
{description && !compact && (
<span className="block text-xs text-muted-foreground line-clamp-3 leading-relaxed">
{description}
</span>
)}
{/* Source type badge */}
{source_type && (
<span className="inline-flex items-center px-1.5 py-0.5 rounded text-[10px] bg-secondary text-secondary-foreground">
{source_type === "web_search" ? "Web" : source_type}
</span>
)}
{/* URL preview */}
<span className="block text-[10px] text-muted-foreground truncate opacity-60">
{url}
</span>
</span>
);
}
interface CitationListProps {
citations: CitationData[];
title?: string;
className?: string;
}
/**
* List component for displaying all citations.
*/
export function CitationList({
citations,
title = "Sources",
className,
}: CitationListProps) {
if (!citations || citations.length === 0) {
return null;
}
return (
<div className={cn("space-y-3", className)}>
<h3 className="text-sm font-semibold text-foreground">{title}</h3>
<div className="space-y-2">
{citations.map((citation, index) => (
<div
key={citation.url || index}
className="p-3 rounded-lg border bg-card hover:bg-accent/50 transition-colors"
>
<div className="flex items-start gap-3">
<span className="flex-shrink-0 w-6 h-6 rounded-full bg-primary/10 text-primary text-xs font-medium flex items-center justify-center">
{index + 1}
</span>
<div className="flex-1 min-w-0">
<a
href={citation.url}
target="_blank"
rel="noopener noreferrer"
className="text-sm font-medium text-foreground hover:text-primary hover:underline line-clamp-1"
>
{citation.title}
</a>
{citation.domain && (
<p className="text-xs text-muted-foreground mt-0.5">
{citation.domain}
</p>
)}
{citation.description && (
<p className="text-xs text-muted-foreground mt-1 line-clamp-2">
{citation.description}
</p>
)}
</div>
</div>
</div>
))}
</div>
</div>
);
}
interface CitationBadgeProps {
number: number;
citation?: CitationData;
onClick?: () => void;
}
/**
* Small numbered badge for inline citations.
*/
export function CitationBadge({ number, citation, onClick }: CitationBadgeProps) {
const badge = (
<button
onClick={onClick}
className="inline-flex items-center justify-center w-4 h-4 rounded-full bg-primary/10 text-primary text-[10px] font-medium hover:bg-primary/20 transition-colors align-super ml-0.5 cursor-pointer"
>
{number}
</button>
);
if (!citation) {
return badge;
}
return (
<HoverCard openDelay={200} closeDelay={100}>
<HoverCardTrigger asChild>{badge}</HoverCardTrigger>
<HoverCardContent className="w-72 p-3" side="top" sideOffset={4}>
<CitationCard citation={citation} compact />
</HoverCardContent>
</HoverCard>
);
}

View File

@@ -20,6 +20,7 @@ import { cn } from "~/lib/utils";
import Image from "./image";
import { Tooltip } from "./tooltip";
import { Link } from "./link";
import { CitationLink, type CitationData } from "./citation";
export function Markdown({
className,
@@ -28,6 +29,7 @@ export function Markdown({
enableCopy,
animated = false,
checkLinkCredibility = false,
citations = [],
...props
}: ReactMarkdownOptions & {
className?: string;
@@ -35,21 +37,127 @@ export function Markdown({
style?: React.CSSProperties;
animated?: boolean;
checkLinkCredibility?: boolean;
citations?: CitationData[];
}) {
// Pre-compute normalized URL map for O(1) lookup
const citationMap = useMemo(() => {
const map = new Map<string, number>();
citations?.forEach((c, index) => {
if (!c.url) return;
// Add exact match
map.set(c.url, index);
// Add decoded match
try {
const decoded = decodeURIComponent(c.url);
if (decoded !== c.url) map.set(decoded, index);
} catch {}
// Add encoded match
try {
const encoded = encodeURI(c.url);
if (encoded !== c.url) map.set(encoded, index);
} catch {}
});
return map;
}, [citations]);
const components: ReactMarkdownOptions["components"] = useMemo(() => {
return {
a: ({ href, children }) => (
<Link href={href} checkLinkCredibility={checkLinkCredibility}>
{children}
</Link>
),
a: ({ href, children }) => {
const hrefStr = href ?? "";
// Handle citation anchor targets (rendered in Reference list)
// Format: [[n]](#citation-target-n)
const targetMatch = hrefStr.match(/^#citation-target-(\d+)$/);
if (targetMatch) {
const index = targetMatch[1];
return (
<span
id={`ref-${index}`}
className="font-bold text-primary scroll-mt-20"
>
[{index}]
</span>
);
}
// Handle inline citation links (rendered in text)
// Format: [[n]](#ref-n), [n](#ref1), [n](#1)
const linkMatch = hrefStr.match(/^#(?:ref-?)?(\d+)$/);
if (linkMatch) {
return (
<a
href={hrefStr}
className="text-primary hover:underline cursor-pointer marker-link"
onClick={(e) => {
e.preventDefault();
const targetId = `ref-${linkMatch[1]}`;
const element = document.getElementById(targetId);
if (element) {
element.scrollIntoView({ behavior: "smooth", block: "start" });
}
}}
>
{children}
</a>
);
}
// If we have citation data, use CitationLink for enhanced display
if (citations && citations.length > 0) {
// Find if this URL is one of our citations
const citationIndex = citationMap.get(hrefStr) ?? -1;
if (citationIndex !== -1) {
// Heuristic to determine if this is a citation target (in Reference list)
// vs a citation link (in text).
// Targets are usually the full title, while links are numbers like [1].
const childrenText = Array.isArray(children)
? children.join("")
: String(children);
// Heuristic: inline citation text usually looks like a numeric marker
// rather than a full title. We treat the following as "inline":
// "1", "[1]", "^1^", "[^1]" (with optional surrounding whitespace).
// This pattern matches either:
// - a bracketed number: "[1]"
// - a caret-style number: "1", "^1", "1^", "^1^"
// and ignores surrounding whitespace.
const inlineCitationPattern = /^\s*(?:\[\d+\]|\^?\d+\^?)\s*$/;
const isInline = inlineCitationPattern.test(childrenText);
return (
<CitationLink
href={hrefStr}
citations={citations}
id={!isInline ? `ref-${citationIndex + 1}` : undefined}
>
{children}
</CitationLink>
);
}
return (
<CitationLink href={hrefStr} citations={citations}>
{children}
</CitationLink>
);
}
// Otherwise fall back to regular Link
return (
<Link href={href} checkLinkCredibility={checkLinkCredibility}>
{children}
</Link>
);
},
img: ({ src, alt }) => (
<a href={src as string} target="_blank" rel="noopener noreferrer">
<Image className="rounded" src={src as string} alt={alt ?? ""} />
</a>
),
};
}, [checkLinkCredibility]);
}, [checkLinkCredibility, citations, citationMap]);
const rehypePlugins = useMemo<NonNullable<ReactMarkdownOptions["rehypePlugins"]>>(() => {
const plugins: NonNullable<ReactMarkdownOptions["rehypePlugins"]> = [[

View File

@@ -0,0 +1,34 @@
// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
// SPDX-License-Identifier: MIT
"use client";
import * as React from "react";
import * as HoverCardPrimitive from "@radix-ui/react-hover-card";
import { cn } from "~/lib/utils";
const HoverCard = HoverCardPrimitive.Root;
const HoverCardTrigger = HoverCardPrimitive.Trigger;
const HoverCardContent = React.forwardRef<
React.ElementRef<typeof HoverCardPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof HoverCardPrimitive.Content>
>(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
<HoverCardPrimitive.Portal>
<HoverCardPrimitive.Content
ref={ref}
align={align}
sideOffset={sideOffset}
className={cn(
"z-50 w-64 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
className
)}
{...props}
/>
</HoverCardPrimitive.Portal>
));
HoverCardContent.displayName = HoverCardPrimitive.Content.displayName;
export { HoverCard, HoverCardTrigger, HoverCardContent };

View File

@@ -1,7 +1,7 @@
// Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
// SPDX-License-Identifier: MIT
import type { Option } from "../messages";
import type { Citation, Option } from "../messages";
// Tool Calls
@@ -76,9 +76,18 @@ export interface InterruptEvent
}
> {}
export interface CitationsEvent {
type: "citations";
data: {
thread_id: string;
citations: Citation[];
};
}
export type ChatEvent =
| MessageChunkEvent
| ToolCallsEvent
| ToolCallChunksEvent
| ToolCallResultEvent
| InterruptEvent;
| InterruptEvent
| CitationsEvent;

View File

@@ -53,7 +53,7 @@ export function mergeMessage(message: Message, event: ChatEvent) {
} else if (event.type === "interrupt") {
mergeInterruptMessage(message, event);
}
if (event.data.finish_reason) {
if (event.type !== "citations" && event.data.finish_reason) {
message.finishReason = event.data.finish_reason;
message.isStreaming = false;
if (message.toolCalls) {

View File

@@ -25,6 +25,7 @@ export interface Message {
finishReason?: "stop" | "interrupt" | "tool_calls";
interruptFeedback?: string;
resources?: Array<Resource>;
citations?: Array<Citation>;
}
export interface Option {
@@ -45,3 +46,14 @@ export interface Resource {
title: string;
description?: string;
}
export interface Citation {
url: string;
title: string;
description?: string;
content_snippet?: string;
domain?: string;
relevance_score?: number;
accessed_at?: string;
source_type?: string;
}

View File

@@ -7,7 +7,7 @@ import { create } from "zustand";
import { useShallow } from "zustand/react/shallow";
import { chatStream, generatePodcast } from "../api";
import type { Message, Resource } from "../messages";
import type { Citation, Message, Resource } from "../messages";
import { mergeMessage } from "../messages";
import { parseJSON } from "../utils";
@@ -25,6 +25,7 @@ export const useStore = create<{
researchReportIds: Map<string, string>;
researchActivityIds: Map<string, string[]>;
researchQueries: Map<string, string>;
researchCitations: Map<string, Citation[]>;
ongoingResearchId: string | null;
openResearchId: string | null;
@@ -34,6 +35,7 @@ export const useStore = create<{
openResearch: (researchId: string | null) => void;
closeResearch: () => void;
setOngoingResearch: (researchId: string | null) => void;
setCitations: (researchId: string, citations: Citation[]) => void;
}>((set) => ({
responding: false,
threadId: THREAD_ID,
@@ -44,6 +46,7 @@ export const useStore = create<{
researchReportIds: new Map<string, string>(),
researchActivityIds: new Map<string, string[]>(),
researchQueries: new Map<string, string>(),
researchCitations: new Map<string, Citation[]>(),
ongoingResearchId: null,
openResearchId: null,
@@ -80,6 +83,11 @@ export const useStore = create<{
setOngoingResearch(researchId: string | null) {
set({ ongoingResearchId: researchId });
},
setCitations(researchId: string, citations: Citation[]) {
set((state) => ({
researchCitations: new Map(state.researchCitations).set(researchId, citations),
}));
},
}));
export async function sendMessage(
@@ -148,6 +156,15 @@ export async function sendMessage(
const { type, data } = event;
let message: Message | undefined;
// Handle citations event: store citations for the current research
if (type === "citations") {
const ongoingResearchId = useStore.getState().ongoingResearchId;
if (ongoingResearchId && data.citations) {
useStore.getState().setCitations(ongoingResearchId, data.citations);
}
continue;
}
// Handle tool_call_result specially: use the message that contains the tool call
if (type === "tool_call_result") {
message = findMessageByToolCallId(data.tool_call_id);
@@ -496,3 +513,15 @@ export function useToolCalls() {
}),
);
}
export function useCitations(researchId: string | null | undefined) {
return useStore(
useShallow((state) =>
researchId ? state.researchCitations.get(researchId) ?? [] : []
),
);
}
export function getCitations(researchId: string): Citation[] {
return useStore.getState().researchCitations.get(researchId) ?? [];
}