Merge upstream/experimental into feat/citations

Resolved conflicts: - backend/src/gateway/routers/artifacts.py: Keep citations block removal for markdown downloads - frontend/src/components/workspace/messages/message-list-item.tsx: Keep improved citation handling with rehypePlugins, humanMessagePlugins, and CitationsLoadingIndicator Co-authored-by: Cursor <cursoragent@cursor.com>
2026-04-18 12:04:45 +08:00 · 2026-02-07 00:53:16 +08:00
parent 5484233548 5ed15d79c9
commit f0075e0d64
65 changed files with 3489 additions and 5320 deletions
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -233,11 +233,12 @@ def make_lead_agent(config: RunnableConfig):
    thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True)
    model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model")
    is_plan_mode = config.get("configurable", {}).get("is_plan_mode", False)
-    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}")
+    subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
+    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}, subagent_enabled: {subagent_enabled}")
    return create_agent(
        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
-        tools=get_available_tools(model_name=model_name),
+        tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled),
        middleware=_build_middlewares(config),
-        system_prompt=apply_prompt_template(),
+        system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled),
        state_schema=ThreadState,
    )
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -2,6 +2,130 @@ from datetime import datetime

 from src.skills import load_skills

+SUBAGENT_SECTION = """<subagent_system>
+**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
+
+You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
+1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
+2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
+3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
+
+**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
+
+**Available Subagents:**
+- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
+- **bash**: For command execution (git, build, test, deploy operations)
+
+**Your Orchestration Strategy:**
+
+✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
+
+For complex queries, break them down into multiple focused sub-tasks and execute in parallel:
+
+**Example 1: "Why is Tencent's stock price declining?"**
+→ Decompose into 4 parallel searches:
+- Subagent 1: Recent financial reports and earnings data
+- Subagent 2: Negative news and controversies
+- Subagent 3: Industry trends and competitor performance
+- Subagent 4: Macro-economic factors and market sentiment
+
+**Example 2: "What are the latest AI trends in 2026?"**
+→ Decompose into parallel research areas:
+- Subagent 1: LLM and foundation model developments
+- Subagent 2: AI infrastructure and hardware trends
+- Subagent 3: Enterprise AI adoption patterns
+- Subagent 4: Regulatory and ethical developments
+
+**Example 3: "Refactor the authentication system"**
+→ Decompose into parallel analysis:
+- Subagent 1: Analyze current auth implementation
+- Subagent 2: Research best practices and security patterns
+- Subagent 3: Check for vulnerabilities and technical debt
+- Subagent 4: Review related tests and documentation
+
+✅ **USE Parallel Subagents (2+ subagents) when:**
+- **Complex research questions**: Requires multiple information sources or perspectives
+- **Multi-aspect analysis**: Task has several independent dimensions to explore
+- **Large codebases**: Need to analyze different parts simultaneously
+- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
+
+❌ **DO NOT use subagents (execute directly) when:**
+- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
+- **Ultra-simple actions**: Read one file, quick edits, single commands
+- **Need immediate clarification**: Must ask user before proceeding
+- **Meta conversation**: Questions about conversation history
+- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
+
+**CRITICAL WORKFLOW**:
+1. In your thinking: Can I decompose this into 2+ independent parallel sub-tasks?
+2. **YES** → Launch multiple `task` calls in parallel, then synthesize results
+3. **NO** → Execute directly using available tools (bash, read_file, web_search, etc.)
+
+**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
+
+**How It Works:**
+- The task tool runs subagents asynchronously in the background
+- The backend automatically polls for completion (you don't need to poll)
+- The tool call will block until the subagent completes its work
+- Once complete, the result is returned to you directly
+
+**Usage Example - Parallel Decomposition:**
+
+```python
+# User asks: "Why is Tencent's stock price declining?"
+# Thinking: This is complex research requiring multiple angles
+# → Decompose into 4 parallel searches
+
+# Launch 4 subagents in a SINGLE response with multiple tool calls:
+
+# Subagent 1: Financial data
+task(
+    subagent_type="general-purpose",
+    prompt="Search for Tencent's latest financial reports, quarterly earnings, and revenue trends in 2025-2026. Focus on numbers and official data.",
+    description="Tencent financial data"
+)
+
+# Subagent 2: Negative news
+task(
+    subagent_type="general-purpose",
+    prompt="Search for recent negative news, controversies, or regulatory issues affecting Tencent in 2025-2026.",
+    description="Tencent negative news"
+)
+
+# Subagent 3: Industry/competitors
+task(
+    subagent_type="general-purpose",
+    prompt="Search for Chinese tech industry trends and how Tencent's competitors (Alibaba, ByteDance) are performing in 2025-2026.",
+    description="Industry comparison"
+)
+
+# Subagent 4: Market factors
+task(
+    subagent_type="general-purpose",
+    prompt="Search for macro-economic factors affecting Chinese tech stocks and overall market sentiment toward Tencent in 2025-2026.",
+    description="Market sentiment"
+)
+
+# All 4 subagents run in parallel, results return simultaneously
+# Then synthesize findings into comprehensive analysis
+```
+
+**Counter-Example - Direct Execution (NO subagents):**
+
+```python
+# User asks: "Run the tests"
+# Thinking: Cannot decompose into parallel sub-tasks
+# → Execute directly
+
+bash("npm test")  # Direct execution, not task()
+```
+
+**CRITICAL**:
+- Only use `task` when you can launch 2+ subagents in parallel
+- Single task = No value from subagents = Execute directly
+- Multiple tasks in SINGLE response = Parallel execution
+</subagent_system>"""
+
 SYSTEM_PROMPT_TEMPLATE = """
 <role>
 You are DeerFlow 2.0, an open-source super agent.
@@ -13,7 +137,7 @@ You are DeerFlow 2.0, an open-source super agent.
 - Think concisely and strategically about the user's request BEFORE taking action
 - Break down the task: What is clear? What is ambiguous? What is missing?
 - **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
- Never write down your full final answer or report in thinking process, but only outline
+{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
 - CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
 - Your response must contain the actual answer, not just a reference to what you thought about
 </thinking_style>
@@ -103,6 +227,8 @@ You have access to skills that provide optimized workflows for specific tasks. E

 </skill_system>

+{subagent_section}
+
 <working_directory existed="true">
 - User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
 - User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
@@ -149,7 +275,7 @@ The key AI trends for 2026 include enhanced reasoning capabilities and multimoda

 <critical_reminders>
 - **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
- Skill First: Always load the relevant skill before starting **complex** tasks.
+{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
 - Progressive Loading: Load resources incrementally as referenced in skills
 - Output Files: Final deliverables must be in `/mnt/user-data/outputs`
 - Clarity: Be direct and helpful, avoid unnecessary meta-commentary
@@ -176,9 +302,7 @@ def _get_memory_context() -> str:
            return ""

        memory_data = get_memory_data()
-        memory_content = format_memory_for_injection(
-            memory_data, max_tokens=config.max_injection_tokens
-        )
+        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)

        if not memory_content.strip():
            return ""
@@ -192,29 +316,24 @@ def _get_memory_context() -> str:
        return ""


-def apply_prompt_template() -> str:
+def apply_prompt_template(subagent_enabled: bool = False) -> str:
    # Load only enabled skills
    skills = load_skills(enabled_only=True)

-    # Get skills container path from config
+    # Get config
    try:
        from src.config import get_app_config

        config = get_app_config()
        container_base_path = config.skills.container_path
    except Exception:
-        # Fallback to default if config fails
+        # Fallback to defaults if config fails
        container_base_path = "/mnt/skills"

    # Generate skills list XML with paths (path points to SKILL.md file)
    if skills:
        skill_items = "\n".join(
-            f"    <skill>\n"
-            f"        <name>{skill.name}</name>\n"
-            f"        <description>{skill.description}</description>\n"
-            f"        <location>{skill.get_container_file_path(container_base_path)}</location>\n"
-            f"    </skill>"
-            for skill in skills
+            f"    <skill>\n        <name>{skill.name}</name>\n        <description>{skill.description}</description>\n        <location>{skill.get_container_file_path(container_base_path)}</location>\n    </skill>" for skill in skills
        )
        skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
    else:
@@ -223,11 +342,31 @@ def apply_prompt_template() -> str:
    # Get memory context
    memory_context = _get_memory_context()

+    # Include subagent section only if enabled (from runtime parameter)
+    subagent_section = SUBAGENT_SECTION if subagent_enabled else ""
+
+    # Add subagent reminder to critical_reminders if enabled
+    subagent_reminder = (
+        "- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks and launch multiple subagents simultaneously. Synthesize results, don't execute directly.\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Add subagent thinking guidance if enabled
+    subagent_thinking = (
+        "- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, decompose and launch multiple subagents in parallel. Your role is orchestrator, not executor.**\n"
+        if subagent_enabled
+        else ""
+    )
+
    # Format the prompt with dynamic skills and memory
    prompt = SYSTEM_PROMPT_TEMPLATE.format(
        skills_list=skills_list,
        skills_base_path=container_base_path,
        memory_context=memory_context,
+        subagent_section=subagent_section,
+        subagent_reminder=subagent_reminder,
+        subagent_thinking=subagent_thinking,
    )

    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
--- a/backend/src/agents/memory/prompt.py
+++ b/backend/src/agents/memory/prompt.py
@@ -2,6 +2,13 @@

 from typing import Any

+try:
+    import tiktoken
+
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    TIKTOKEN_AVAILABLE = False
+
 # Prompt template for updating memory based on conversation
 MEMORY_UPDATE_PROMPT = """You are a memory management system. Your task is to analyze a conversation and update the user's memory profile.

@@ -17,22 +24,60 @@ New Conversation to Process:

 Instructions:
 1. Analyze the conversation for important information about the user
-2. Extract relevant facts, preferences, and context
-3. Update the memory sections as needed:
-   - workContext: User's work-related information (job, projects, tools, technologies)
-   - personalContext: Personal preferences, communication style, background
-   - topOfMind: Current focus areas, ongoing tasks, immediate priorities
+2. Extract relevant facts, preferences, and context with specific details (numbers, names, technologies)
+3. Update the memory sections as needed following the detailed length guidelines below

-4. For facts extraction:
-   - Extract specific, verifiable facts about the user
-   - Assign appropriate categories: preference, knowledge, context, behavior, goal
-   - Estimate confidence (0.0-1.0) based on how explicit the information is
-   - Avoid duplicating existing facts
+Memory Section Guidelines:

-5. Update history sections:
-   - recentMonths: Summary of recent activities and discussions
-   - earlierContext: Important historical context
-   - longTermBackground: Persistent background information
+**User Context** (Current state - concise summaries):
+- workContext: Professional role, company, key projects, main technologies (2-3 sentences)
+  Example: Core contributor, project names with metrics (16k+ stars), technical stack
+- personalContext: Languages, communication preferences, key interests (1-2 sentences)
+  Example: Bilingual capabilities, specific interest areas, expertise domains
+- topOfMind: Multiple ongoing focus areas and priorities (3-5 sentences, detailed paragraph)
+  Example: Primary project work, parallel technical investigations, ongoing learning/tracking
+  Include: Active implementation work, troubleshooting issues, market/research interests
+  Note: This captures SEVERAL concurrent focus areas, not just one task
+
+**History** (Temporal context - rich paragraphs):
+- recentMonths: Detailed summary of recent activities (4-6 sentences or 1-2 paragraphs)
+  Timeline: Last 1-3 months of interactions
+  Include: Technologies explored, projects worked on, problems solved, interests demonstrated
+- earlierContext: Important historical patterns (3-5 sentences or 1 paragraph)
+  Timeline: 3-12 months ago
+  Include: Past projects, learning journeys, established patterns
+- longTermBackground: Persistent background and foundational context (2-4 sentences)
+  Timeline: Overall/foundational information
+  Include: Core expertise, longstanding interests, fundamental working style
+
+**Facts Extraction**:
+- Extract specific, quantifiable details (e.g., "16k+ GitHub stars", "200+ datasets")
+- Include proper nouns (company names, project names, technology names)
+- Preserve technical terminology and version numbers
+- Categories:
+  * preference: Tools, styles, approaches user prefers/dislikes
+  * knowledge: Specific expertise, technologies mastered, domain knowledge
+  * context: Background facts (job title, projects, locations, languages)
+  * behavior: Working patterns, communication habits, problem-solving approaches
+  * goal: Stated objectives, learning targets, project ambitions
+- Confidence levels:
+  * 0.9-1.0: Explicitly stated facts ("I work on X", "My role is Y")
+  * 0.7-0.8: Strongly implied from actions/discussions
+  * 0.5-0.6: Inferred patterns (use sparingly, only for clear patterns)
+
+**What Goes Where**:
+- workContext: Current job, active projects, primary tech stack
+- personalContext: Languages, personality, interests outside direct work tasks
+- topOfMind: Multiple ongoing priorities and focus areas user cares about recently (gets updated most frequently)
+  Should capture 3-5 concurrent themes: main work, side explorations, learning/tracking interests
+- recentMonths: Detailed account of recent technical explorations and work
+- earlierContext: Patterns from slightly older interactions still relevant
+- longTermBackground: Unchanging foundational facts about the user
+
+**Multilingual Content**:
+- Preserve original language for proper nouns and company names
+- Keep technical terms in their original form (DeepSeek, LangGraph, etc.)
+- Note language capabilities in personalContext

 Output Format (JSON):
 {{
@@ -54,11 +99,15 @@ Output Format (JSON):

 Important Rules:
 - Only set shouldUpdate=true if there's meaningful new information
- Keep summaries concise (1-3 sentences each)
- Only add facts that are clearly stated or strongly implied
+- Follow length guidelines: workContext/personalContext are concise (1-3 sentences), topOfMind and history sections are detailed (paragraphs)
+- Include specific metrics, version numbers, and proper nouns in facts
+- Only add facts that are clearly stated (0.9+) or strongly implied (0.7+)
 - Remove facts that are contradicted by new information
- Preserve existing information that isn't contradicted
- Focus on information useful for future interactions
+- When updating topOfMind, integrate new focus areas while removing completed/abandoned ones
+  Keep 3-5 concurrent focus themes that are still active and relevant
+- For history sections, integrate new information chronologically into appropriate time period
+- Preserve technical accuracy - keep exact names of technologies, companies, projects
+- Focus on information useful for future interactions and personalization

 Return ONLY valid JSON, no explanation or markdown."""

@@ -91,12 +140,34 @@ Rules:
 Return ONLY valid JSON."""


+def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
+    """Count tokens in text using tiktoken.
+
+    Args:
+        text: The text to count tokens for.
+        encoding_name: The encoding to use (default: cl100k_base for GPT-4/3.5).
+
+    Returns:
+        The number of tokens in the text.
+    """
+    if not TIKTOKEN_AVAILABLE:
+        # Fallback to character-based estimation if tiktoken is not available
+        return len(text) // 4
+
+    try:
+        encoding = tiktoken.get_encoding(encoding_name)
+        return len(encoding.encode(text))
+    except Exception:
+        # Fallback to character-based estimation on error
+        return len(text) // 4
+
+
 def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
    """Format memory data for injection into system prompt.

    Args:
        memory_data: The memory data dictionary.
-        max_tokens: Maximum tokens to use (approximate via character count).
+        max_tokens: Maximum tokens to use (counted via tiktoken for accuracy).

    Returns:
        Formatted memory string for system prompt injection.
@@ -142,33 +213,19 @@ def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2
        if history_sections:
            sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))

-    # Format facts (most relevant ones)
-    facts = memory_data.get("facts", [])
-    if facts:
-        # Sort by confidence and take top facts
-        sorted_facts = sorted(facts, key=lambda f: f.get("confidence", 0), reverse=True)
-        # Limit to avoid too much content
-        top_facts = sorted_facts[:15]
-
-        fact_lines = []
-        for fact in top_facts:
-            content = fact.get("content", "")
-            category = fact.get("category", "")
-            if content:
-                fact_lines.append(f"- [{category}] {content}")
-
-        if fact_lines:
-            sections.append("Known Facts:\n" + "\n".join(fact_lines))
-
    if not sections:
        return ""

    result = "\n\n".join(sections)

-    # Rough token limit (approximate 4 chars per token)
-    max_chars = max_tokens * 4
-    if len(result) > max_chars:
-        result = result[:max_chars] + "\n..."
+    # Use accurate token counting with tiktoken
+    token_count = _count_tokens(result)
+    if token_count > max_tokens:
+        # Truncate to fit within token limit
+        # Estimate characters to remove based on token ratio
+        char_per_token = len(result) / token_count
+        target_chars = int(max_tokens * char_per_token * 0.95)  # 95% to leave margin
+        result = result[:target_chars] + "\n..."

    return result

--- a/backend/src/agents/memory/updater.py
+++ b/backend/src/agents/memory/updater.py
@@ -273,9 +273,7 @@ class MemoryUpdater:
        # Remove facts
        facts_to_remove = set(update_data.get("factsToRemove", []))
        if facts_to_remove:
-            current_memory["facts"] = [
-                f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove
-            ]
+            current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]

        # Add new facts
        new_facts = update_data.get("newFacts", [])
@@ -304,9 +302,7 @@ class MemoryUpdater:
        return current_memory


-def update_memory_from_conversation(
-    messages: list[Any], thread_id: str | None = None
-) -> bool:
+def update_memory_from_conversation(messages: list[Any], thread_id: str | None = None) -> bool:
    """Convenience function to update memory from a conversation.

    Args:
--- a/backend/src/agents/middlewares/uploads_middleware.py
+++ b/backend/src/agents/middlewares/uploads_middleware.py
@@ -151,8 +151,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
            State updates including uploaded files list.
        """
        import logging
+
        logger = logging.getLogger(__name__)
-        
+
        thread_id = runtime.context.get("thread_id")
        if thread_id is None:
            return None
@@ -172,7 +173,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
                    logger.info(f"Found previously shown files: {extracted}")

        logger.info(f"Total shown files from history: {shown_files}")
-        
+
        # List only newly uploaded files
        files = self._list_newly_uploaded_files(thread_id, shown_files)
        logger.info(f"Newly uploaded files to inject: {[f['filename'] for f in files]}")
@@ -189,7 +190,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):

        # Create files message and prepend to the last human message content
        files_message = self._create_files_message(files)
-        
+
        # Extract original content - handle both string and list formats
        original_content = ""
        if isinstance(last_message.content, str):
@@ -201,9 +202,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
                if isinstance(block, dict) and block.get("type") == "text":
                    text_parts.append(block.get("text", ""))
            original_content = "\n".join(text_parts)
-        
+
        logger.info(f"Original message content: {original_content[:100] if original_content else '(empty)'}")
-        
+
        # Create new message with combined content
        updated_message = HumanMessage(
            content=f"{files_message}\n\n{original_content}",