mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-05-01 17:50:44 +08:00
feat: add citation support in research report block and markdown
* feat: add citation support in research report block and markdown - Enhanced ResearchReportBlock to fetch citations based on researchId and pass them to the Markdown component. - Introduced CitationLink component to display citation metadata on hover for links in markdown. - Implemented CitationCard and CitationList components for displaying citation details and lists. - Updated Markdown component to handle citation links and inline citations. - Created HoverCard component for displaying citation information in a tooltip-like manner. - Modified store to manage citations, including setting and retrieving citations for ongoing research. - Added CitationsEvent type to handle citations in chat events and updated Message type to include citations. * fix(log): Enable the logging level when enabling the DEBUG environment variable (#793) * fix(frontend): render all tool calls in the frontend #796 (#797) * build(deps): bump jspdf from 3.0.4 to 4.0.0 in /web (#798) Bumps [jspdf](https://github.com/parallax/jsPDF) from 3.0.4 to 4.0.0. - [Release notes](https://github.com/parallax/jsPDF/releases) - [Changelog](https://github.com/parallax/jsPDF/blob/master/RELEASE.md) - [Commits](https://github.com/parallax/jsPDF/compare/v3.0.4...v4.0.0) --- updated-dependencies: - dependency-name: jspdf dependency-version: 4.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * fix(frontend):added the display of the 'analyst' message #800 (#801) * fix: migrate from deprecated create_react_agent to langchain.agents.create_agent (#802) * fix: migrate from deprecated create_react_agent to langchain.agents.create_agent Fixes #799 - Replace deprecated langgraph.prebuilt.create_react_agent with langchain.agents.create_agent (LangGraph 1.0 migration) - Add DynamicPromptMiddleware to handle dynamic prompt templates (replaces the 'prompt' callable parameter) - Add PreModelHookMiddleware to handle pre-model hooks (replaces the 'pre_model_hook' parameter) - Update AgentState import from langchain.agents in template.py - Update tests to use the new API * fix:update the code with review comments * fix: Add runtime parameter to compress_messages method(#803) * fix: Add runtime parameter to compress_messages method(#803) The compress_messages method was being called by PreModelHookMiddleware with both state and runtime parameters, but only accepted state parameter. This caused a TypeError when the middleware executed the pre_model_hook. Added optional runtime parameter to compress_messages signature to match the expected interface while maintaining backward compatibility. * Update the code with the review comments * fix: Refactor citation handling and add comprehensive tests for citation features * refactor: Clean up imports and formatting across citation modules * fix: Add monkeypatch to clear AGENT_RECURSION_LIMIT in recursion limit tests * feat: Enhance citation link handling in Markdown component * fix: Exclude citations from finish reason handling in mergeMessage function * fix(nodes): update message handling * fix(citations): improve citation extraction and handling in event processing * feat(citations): enhance citation extraction and handling with improved merging and normalization * fix(reporter): update citation formatting instructions for clarity and consistency * fix(reporter): prioritize using Markdown tables for data presentation and comparison --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: LoftyComet <1277173875@qq。> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
178
src/citations/models.py
Normal file
178
src/citations/models.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
"""
|
||||
Citation data models for structured source metadata.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
@dataclass
|
||||
class CitationMetadata:
|
||||
"""Metadata extracted from a source."""
|
||||
|
||||
# Core identifiers
|
||||
url: str
|
||||
title: str
|
||||
|
||||
# Content information
|
||||
description: Optional[str] = None
|
||||
content_snippet: Optional[str] = None
|
||||
raw_content: Optional[str] = None
|
||||
|
||||
# Source metadata
|
||||
domain: Optional[str] = None
|
||||
author: Optional[str] = None
|
||||
published_date: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
|
||||
# Media
|
||||
images: List[str] = field(default_factory=list)
|
||||
favicon: Optional[str] = None
|
||||
|
||||
# Quality indicators
|
||||
relevance_score: float = 0.0
|
||||
credibility_score: float = 0.0
|
||||
|
||||
# Timestamps
|
||||
accessed_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
# Additional metadata
|
||||
extra: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Extract domain from URL if not provided."""
|
||||
if not self.domain and self.url:
|
||||
try:
|
||||
parsed = urlparse(self.url)
|
||||
self.domain = parsed.netloc
|
||||
except Exception:
|
||||
# If URL parsing fails for any reason, leave `domain` as None.
|
||||
# This is a non-critical convenience field and failures here
|
||||
# should not prevent citation metadata creation.
|
||||
pass
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
"""Generate a unique ID for this citation based on URL."""
|
||||
return hashlib.sha256(self.url.encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"url": self.url,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"content_snippet": self.content_snippet,
|
||||
"domain": self.domain,
|
||||
"author": self.author,
|
||||
"published_date": self.published_date,
|
||||
"language": self.language,
|
||||
"images": self.images,
|
||||
"favicon": self.favicon,
|
||||
"relevance_score": self.relevance_score,
|
||||
"credibility_score": self.credibility_score,
|
||||
"accessed_at": self.accessed_at,
|
||||
"extra": self.extra,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "CitationMetadata":
|
||||
"""Create from dictionary."""
|
||||
# Remove 'id' as it's computed from url
|
||||
data = {k: v for k, v in data.items() if k != "id"}
|
||||
return cls(**data)
|
||||
|
||||
@classmethod
|
||||
def from_search_result(
|
||||
cls, result: Dict[str, Any], query: str = ""
|
||||
) -> "CitationMetadata":
|
||||
"""Create citation metadata from a search result."""
|
||||
return cls(
|
||||
url=result.get("url", ""),
|
||||
title=result.get("title", "Untitled"),
|
||||
description=result.get("content", result.get("description", "")),
|
||||
content_snippet=result.get("content", "")[:500]
|
||||
if result.get("content")
|
||||
else None,
|
||||
raw_content=result.get("raw_content"),
|
||||
relevance_score=result.get("score", 0.0),
|
||||
extra={"query": query, "result_type": result.get("type", "page")},
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Citation:
|
||||
"""
|
||||
A citation reference that can be used in reports.
|
||||
|
||||
This represents a numbered citation that links to source metadata.
|
||||
"""
|
||||
|
||||
# Citation number (1-indexed for display)
|
||||
number: int
|
||||
|
||||
# Reference to the source metadata
|
||||
metadata: CitationMetadata
|
||||
|
||||
# Context where this citation is used
|
||||
context: Optional[str] = None
|
||||
|
||||
# Specific quote or fact being cited
|
||||
cited_text: Optional[str] = None
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
"""Get the citation ID from metadata."""
|
||||
return self.metadata.id
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
"""Get the URL from metadata."""
|
||||
return self.metadata.url
|
||||
|
||||
@property
|
||||
def title(self) -> str:
|
||||
"""Get the title from metadata."""
|
||||
return self.metadata.title
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"number": self.number,
|
||||
"metadata": self.metadata.to_dict(),
|
||||
"context": self.context,
|
||||
"cited_text": self.cited_text,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "Citation":
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
number=data["number"],
|
||||
metadata=CitationMetadata.from_dict(data["metadata"]),
|
||||
context=data.get("context"),
|
||||
cited_text=data.get("cited_text"),
|
||||
)
|
||||
|
||||
def to_markdown_reference(self) -> str:
|
||||
"""Generate markdown reference format: [Title](URL)"""
|
||||
return f"[{self.title}]({self.url})"
|
||||
|
||||
def to_numbered_reference(self) -> str:
|
||||
"""Generate numbered reference format: [1] Title - URL"""
|
||||
return f"[{self.number}] {self.title} - {self.url}"
|
||||
|
||||
def to_inline_marker(self) -> str:
|
||||
"""Generate inline citation marker: [^1]"""
|
||||
return f"[^{self.number}]"
|
||||
|
||||
def to_footnote(self) -> str:
|
||||
"""Generate footnote definition: [^1]: Title - URL"""
|
||||
return f"[^{self.number}]: {self.title} - {self.url}"
|
||||
Reference in New Issue
Block a user