mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-18 12:04:45 +08:00
Merge upstream/experimental into feat/citations
Resolved conflicts: - backend/src/gateway/routers/artifacts.py: Keep citations block removal for markdown downloads - frontend/src/components/workspace/messages/message-list-item.tsx: Keep improved citation handling with rehypePlugins, humanMessagePlugins, and CitationsLoadingIndicator Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -233,11 +233,12 @@ def make_lead_agent(config: RunnableConfig):
|
||||
thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True)
|
||||
model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model")
|
||||
is_plan_mode = config.get("configurable", {}).get("is_plan_mode", False)
|
||||
print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}")
|
||||
subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
|
||||
print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}, subagent_enabled: {subagent_enabled}")
|
||||
return create_agent(
|
||||
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
|
||||
tools=get_available_tools(model_name=model_name),
|
||||
tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled),
|
||||
middleware=_build_middlewares(config),
|
||||
system_prompt=apply_prompt_template(),
|
||||
system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled),
|
||||
state_schema=ThreadState,
|
||||
)
|
||||
|
||||
@@ -2,6 +2,130 @@ from datetime import datetime
|
||||
|
||||
from src.skills import load_skills
|
||||
|
||||
SUBAGENT_SECTION = """<subagent_system>
|
||||
**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
|
||||
|
||||
You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
|
||||
1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
|
||||
2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
|
||||
3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
|
||||
|
||||
**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
|
||||
|
||||
**Available Subagents:**
|
||||
- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
|
||||
- **bash**: For command execution (git, build, test, deploy operations)
|
||||
|
||||
**Your Orchestration Strategy:**
|
||||
|
||||
✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
|
||||
|
||||
For complex queries, break them down into multiple focused sub-tasks and execute in parallel:
|
||||
|
||||
**Example 1: "Why is Tencent's stock price declining?"**
|
||||
→ Decompose into 4 parallel searches:
|
||||
- Subagent 1: Recent financial reports and earnings data
|
||||
- Subagent 2: Negative news and controversies
|
||||
- Subagent 3: Industry trends and competitor performance
|
||||
- Subagent 4: Macro-economic factors and market sentiment
|
||||
|
||||
**Example 2: "What are the latest AI trends in 2026?"**
|
||||
→ Decompose into parallel research areas:
|
||||
- Subagent 1: LLM and foundation model developments
|
||||
- Subagent 2: AI infrastructure and hardware trends
|
||||
- Subagent 3: Enterprise AI adoption patterns
|
||||
- Subagent 4: Regulatory and ethical developments
|
||||
|
||||
**Example 3: "Refactor the authentication system"**
|
||||
→ Decompose into parallel analysis:
|
||||
- Subagent 1: Analyze current auth implementation
|
||||
- Subagent 2: Research best practices and security patterns
|
||||
- Subagent 3: Check for vulnerabilities and technical debt
|
||||
- Subagent 4: Review related tests and documentation
|
||||
|
||||
✅ **USE Parallel Subagents (2+ subagents) when:**
|
||||
- **Complex research questions**: Requires multiple information sources or perspectives
|
||||
- **Multi-aspect analysis**: Task has several independent dimensions to explore
|
||||
- **Large codebases**: Need to analyze different parts simultaneously
|
||||
- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
|
||||
|
||||
❌ **DO NOT use subagents (execute directly) when:**
|
||||
- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
|
||||
- **Ultra-simple actions**: Read one file, quick edits, single commands
|
||||
- **Need immediate clarification**: Must ask user before proceeding
|
||||
- **Meta conversation**: Questions about conversation history
|
||||
- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
|
||||
|
||||
**CRITICAL WORKFLOW**:
|
||||
1. In your thinking: Can I decompose this into 2+ independent parallel sub-tasks?
|
||||
2. **YES** → Launch multiple `task` calls in parallel, then synthesize results
|
||||
3. **NO** → Execute directly using available tools (bash, read_file, web_search, etc.)
|
||||
|
||||
**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
|
||||
|
||||
**How It Works:**
|
||||
- The task tool runs subagents asynchronously in the background
|
||||
- The backend automatically polls for completion (you don't need to poll)
|
||||
- The tool call will block until the subagent completes its work
|
||||
- Once complete, the result is returned to you directly
|
||||
|
||||
**Usage Example - Parallel Decomposition:**
|
||||
|
||||
```python
|
||||
# User asks: "Why is Tencent's stock price declining?"
|
||||
# Thinking: This is complex research requiring multiple angles
|
||||
# → Decompose into 4 parallel searches
|
||||
|
||||
# Launch 4 subagents in a SINGLE response with multiple tool calls:
|
||||
|
||||
# Subagent 1: Financial data
|
||||
task(
|
||||
subagent_type="general-purpose",
|
||||
prompt="Search for Tencent's latest financial reports, quarterly earnings, and revenue trends in 2025-2026. Focus on numbers and official data.",
|
||||
description="Tencent financial data"
|
||||
)
|
||||
|
||||
# Subagent 2: Negative news
|
||||
task(
|
||||
subagent_type="general-purpose",
|
||||
prompt="Search for recent negative news, controversies, or regulatory issues affecting Tencent in 2025-2026.",
|
||||
description="Tencent negative news"
|
||||
)
|
||||
|
||||
# Subagent 3: Industry/competitors
|
||||
task(
|
||||
subagent_type="general-purpose",
|
||||
prompt="Search for Chinese tech industry trends and how Tencent's competitors (Alibaba, ByteDance) are performing in 2025-2026.",
|
||||
description="Industry comparison"
|
||||
)
|
||||
|
||||
# Subagent 4: Market factors
|
||||
task(
|
||||
subagent_type="general-purpose",
|
||||
prompt="Search for macro-economic factors affecting Chinese tech stocks and overall market sentiment toward Tencent in 2025-2026.",
|
||||
description="Market sentiment"
|
||||
)
|
||||
|
||||
# All 4 subagents run in parallel, results return simultaneously
|
||||
# Then synthesize findings into comprehensive analysis
|
||||
```
|
||||
|
||||
**Counter-Example - Direct Execution (NO subagents):**
|
||||
|
||||
```python
|
||||
# User asks: "Run the tests"
|
||||
# Thinking: Cannot decompose into parallel sub-tasks
|
||||
# → Execute directly
|
||||
|
||||
bash("npm test") # Direct execution, not task()
|
||||
```
|
||||
|
||||
**CRITICAL**:
|
||||
- Only use `task` when you can launch 2+ subagents in parallel
|
||||
- Single task = No value from subagents = Execute directly
|
||||
- Multiple tasks in SINGLE response = Parallel execution
|
||||
</subagent_system>"""
|
||||
|
||||
SYSTEM_PROMPT_TEMPLATE = """
|
||||
<role>
|
||||
You are DeerFlow 2.0, an open-source super agent.
|
||||
@@ -13,7 +137,7 @@ You are DeerFlow 2.0, an open-source super agent.
|
||||
- Think concisely and strategically about the user's request BEFORE taking action
|
||||
- Break down the task: What is clear? What is ambiguous? What is missing?
|
||||
- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
|
||||
- Never write down your full final answer or report in thinking process, but only outline
|
||||
{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
|
||||
- CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
|
||||
- Your response must contain the actual answer, not just a reference to what you thought about
|
||||
</thinking_style>
|
||||
@@ -103,6 +227,8 @@ You have access to skills that provide optimized workflows for specific tasks. E
|
||||
|
||||
</skill_system>
|
||||
|
||||
{subagent_section}
|
||||
|
||||
<working_directory existed="true">
|
||||
- User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
|
||||
- User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
|
||||
@@ -149,7 +275,7 @@ The key AI trends for 2026 include enhanced reasoning capabilities and multimoda
|
||||
|
||||
<critical_reminders>
|
||||
- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
|
||||
- Skill First: Always load the relevant skill before starting **complex** tasks.
|
||||
{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
|
||||
- Progressive Loading: Load resources incrementally as referenced in skills
|
||||
- Output Files: Final deliverables must be in `/mnt/user-data/outputs`
|
||||
- Clarity: Be direct and helpful, avoid unnecessary meta-commentary
|
||||
@@ -176,9 +302,7 @@ def _get_memory_context() -> str:
|
||||
return ""
|
||||
|
||||
memory_data = get_memory_data()
|
||||
memory_content = format_memory_for_injection(
|
||||
memory_data, max_tokens=config.max_injection_tokens
|
||||
)
|
||||
memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
|
||||
|
||||
if not memory_content.strip():
|
||||
return ""
|
||||
@@ -192,29 +316,24 @@ def _get_memory_context() -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def apply_prompt_template() -> str:
|
||||
def apply_prompt_template(subagent_enabled: bool = False) -> str:
|
||||
# Load only enabled skills
|
||||
skills = load_skills(enabled_only=True)
|
||||
|
||||
# Get skills container path from config
|
||||
# Get config
|
||||
try:
|
||||
from src.config import get_app_config
|
||||
|
||||
config = get_app_config()
|
||||
container_base_path = config.skills.container_path
|
||||
except Exception:
|
||||
# Fallback to default if config fails
|
||||
# Fallback to defaults if config fails
|
||||
container_base_path = "/mnt/skills"
|
||||
|
||||
# Generate skills list XML with paths (path points to SKILL.md file)
|
||||
if skills:
|
||||
skill_items = "\n".join(
|
||||
f" <skill>\n"
|
||||
f" <name>{skill.name}</name>\n"
|
||||
f" <description>{skill.description}</description>\n"
|
||||
f" <location>{skill.get_container_file_path(container_base_path)}</location>\n"
|
||||
f" </skill>"
|
||||
for skill in skills
|
||||
f" <skill>\n <name>{skill.name}</name>\n <description>{skill.description}</description>\n <location>{skill.get_container_file_path(container_base_path)}</location>\n </skill>" for skill in skills
|
||||
)
|
||||
skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
|
||||
else:
|
||||
@@ -223,11 +342,31 @@ def apply_prompt_template() -> str:
|
||||
# Get memory context
|
||||
memory_context = _get_memory_context()
|
||||
|
||||
# Include subagent section only if enabled (from runtime parameter)
|
||||
subagent_section = SUBAGENT_SECTION if subagent_enabled else ""
|
||||
|
||||
# Add subagent reminder to critical_reminders if enabled
|
||||
subagent_reminder = (
|
||||
"- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks and launch multiple subagents simultaneously. Synthesize results, don't execute directly.\n"
|
||||
if subagent_enabled
|
||||
else ""
|
||||
)
|
||||
|
||||
# Add subagent thinking guidance if enabled
|
||||
subagent_thinking = (
|
||||
"- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, decompose and launch multiple subagents in parallel. Your role is orchestrator, not executor.**\n"
|
||||
if subagent_enabled
|
||||
else ""
|
||||
)
|
||||
|
||||
# Format the prompt with dynamic skills and memory
|
||||
prompt = SYSTEM_PROMPT_TEMPLATE.format(
|
||||
skills_list=skills_list,
|
||||
skills_base_path=container_base_path,
|
||||
memory_context=memory_context,
|
||||
subagent_section=subagent_section,
|
||||
subagent_reminder=subagent_reminder,
|
||||
subagent_thinking=subagent_thinking,
|
||||
)
|
||||
|
||||
return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import tiktoken
|
||||
|
||||
TIKTOKEN_AVAILABLE = True
|
||||
except ImportError:
|
||||
TIKTOKEN_AVAILABLE = False
|
||||
|
||||
# Prompt template for updating memory based on conversation
|
||||
MEMORY_UPDATE_PROMPT = """You are a memory management system. Your task is to analyze a conversation and update the user's memory profile.
|
||||
|
||||
@@ -17,22 +24,60 @@ New Conversation to Process:
|
||||
|
||||
Instructions:
|
||||
1. Analyze the conversation for important information about the user
|
||||
2. Extract relevant facts, preferences, and context
|
||||
3. Update the memory sections as needed:
|
||||
- workContext: User's work-related information (job, projects, tools, technologies)
|
||||
- personalContext: Personal preferences, communication style, background
|
||||
- topOfMind: Current focus areas, ongoing tasks, immediate priorities
|
||||
2. Extract relevant facts, preferences, and context with specific details (numbers, names, technologies)
|
||||
3. Update the memory sections as needed following the detailed length guidelines below
|
||||
|
||||
4. For facts extraction:
|
||||
- Extract specific, verifiable facts about the user
|
||||
- Assign appropriate categories: preference, knowledge, context, behavior, goal
|
||||
- Estimate confidence (0.0-1.0) based on how explicit the information is
|
||||
- Avoid duplicating existing facts
|
||||
Memory Section Guidelines:
|
||||
|
||||
5. Update history sections:
|
||||
- recentMonths: Summary of recent activities and discussions
|
||||
- earlierContext: Important historical context
|
||||
- longTermBackground: Persistent background information
|
||||
**User Context** (Current state - concise summaries):
|
||||
- workContext: Professional role, company, key projects, main technologies (2-3 sentences)
|
||||
Example: Core contributor, project names with metrics (16k+ stars), technical stack
|
||||
- personalContext: Languages, communication preferences, key interests (1-2 sentences)
|
||||
Example: Bilingual capabilities, specific interest areas, expertise domains
|
||||
- topOfMind: Multiple ongoing focus areas and priorities (3-5 sentences, detailed paragraph)
|
||||
Example: Primary project work, parallel technical investigations, ongoing learning/tracking
|
||||
Include: Active implementation work, troubleshooting issues, market/research interests
|
||||
Note: This captures SEVERAL concurrent focus areas, not just one task
|
||||
|
||||
**History** (Temporal context - rich paragraphs):
|
||||
- recentMonths: Detailed summary of recent activities (4-6 sentences or 1-2 paragraphs)
|
||||
Timeline: Last 1-3 months of interactions
|
||||
Include: Technologies explored, projects worked on, problems solved, interests demonstrated
|
||||
- earlierContext: Important historical patterns (3-5 sentences or 1 paragraph)
|
||||
Timeline: 3-12 months ago
|
||||
Include: Past projects, learning journeys, established patterns
|
||||
- longTermBackground: Persistent background and foundational context (2-4 sentences)
|
||||
Timeline: Overall/foundational information
|
||||
Include: Core expertise, longstanding interests, fundamental working style
|
||||
|
||||
**Facts Extraction**:
|
||||
- Extract specific, quantifiable details (e.g., "16k+ GitHub stars", "200+ datasets")
|
||||
- Include proper nouns (company names, project names, technology names)
|
||||
- Preserve technical terminology and version numbers
|
||||
- Categories:
|
||||
* preference: Tools, styles, approaches user prefers/dislikes
|
||||
* knowledge: Specific expertise, technologies mastered, domain knowledge
|
||||
* context: Background facts (job title, projects, locations, languages)
|
||||
* behavior: Working patterns, communication habits, problem-solving approaches
|
||||
* goal: Stated objectives, learning targets, project ambitions
|
||||
- Confidence levels:
|
||||
* 0.9-1.0: Explicitly stated facts ("I work on X", "My role is Y")
|
||||
* 0.7-0.8: Strongly implied from actions/discussions
|
||||
* 0.5-0.6: Inferred patterns (use sparingly, only for clear patterns)
|
||||
|
||||
**What Goes Where**:
|
||||
- workContext: Current job, active projects, primary tech stack
|
||||
- personalContext: Languages, personality, interests outside direct work tasks
|
||||
- topOfMind: Multiple ongoing priorities and focus areas user cares about recently (gets updated most frequently)
|
||||
Should capture 3-5 concurrent themes: main work, side explorations, learning/tracking interests
|
||||
- recentMonths: Detailed account of recent technical explorations and work
|
||||
- earlierContext: Patterns from slightly older interactions still relevant
|
||||
- longTermBackground: Unchanging foundational facts about the user
|
||||
|
||||
**Multilingual Content**:
|
||||
- Preserve original language for proper nouns and company names
|
||||
- Keep technical terms in their original form (DeepSeek, LangGraph, etc.)
|
||||
- Note language capabilities in personalContext
|
||||
|
||||
Output Format (JSON):
|
||||
{{
|
||||
@@ -54,11 +99,15 @@ Output Format (JSON):
|
||||
|
||||
Important Rules:
|
||||
- Only set shouldUpdate=true if there's meaningful new information
|
||||
- Keep summaries concise (1-3 sentences each)
|
||||
- Only add facts that are clearly stated or strongly implied
|
||||
- Follow length guidelines: workContext/personalContext are concise (1-3 sentences), topOfMind and history sections are detailed (paragraphs)
|
||||
- Include specific metrics, version numbers, and proper nouns in facts
|
||||
- Only add facts that are clearly stated (0.9+) or strongly implied (0.7+)
|
||||
- Remove facts that are contradicted by new information
|
||||
- Preserve existing information that isn't contradicted
|
||||
- Focus on information useful for future interactions
|
||||
- When updating topOfMind, integrate new focus areas while removing completed/abandoned ones
|
||||
Keep 3-5 concurrent focus themes that are still active and relevant
|
||||
- For history sections, integrate new information chronologically into appropriate time period
|
||||
- Preserve technical accuracy - keep exact names of technologies, companies, projects
|
||||
- Focus on information useful for future interactions and personalization
|
||||
|
||||
Return ONLY valid JSON, no explanation or markdown."""
|
||||
|
||||
@@ -91,12 +140,34 @@ Rules:
|
||||
Return ONLY valid JSON."""
|
||||
|
||||
|
||||
def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
|
||||
"""Count tokens in text using tiktoken.
|
||||
|
||||
Args:
|
||||
text: The text to count tokens for.
|
||||
encoding_name: The encoding to use (default: cl100k_base for GPT-4/3.5).
|
||||
|
||||
Returns:
|
||||
The number of tokens in the text.
|
||||
"""
|
||||
if not TIKTOKEN_AVAILABLE:
|
||||
# Fallback to character-based estimation if tiktoken is not available
|
||||
return len(text) // 4
|
||||
|
||||
try:
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
return len(encoding.encode(text))
|
||||
except Exception:
|
||||
# Fallback to character-based estimation on error
|
||||
return len(text) // 4
|
||||
|
||||
|
||||
def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
|
||||
"""Format memory data for injection into system prompt.
|
||||
|
||||
Args:
|
||||
memory_data: The memory data dictionary.
|
||||
max_tokens: Maximum tokens to use (approximate via character count).
|
||||
max_tokens: Maximum tokens to use (counted via tiktoken for accuracy).
|
||||
|
||||
Returns:
|
||||
Formatted memory string for system prompt injection.
|
||||
@@ -142,33 +213,19 @@ def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2
|
||||
if history_sections:
|
||||
sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))
|
||||
|
||||
# Format facts (most relevant ones)
|
||||
facts = memory_data.get("facts", [])
|
||||
if facts:
|
||||
# Sort by confidence and take top facts
|
||||
sorted_facts = sorted(facts, key=lambda f: f.get("confidence", 0), reverse=True)
|
||||
# Limit to avoid too much content
|
||||
top_facts = sorted_facts[:15]
|
||||
|
||||
fact_lines = []
|
||||
for fact in top_facts:
|
||||
content = fact.get("content", "")
|
||||
category = fact.get("category", "")
|
||||
if content:
|
||||
fact_lines.append(f"- [{category}] {content}")
|
||||
|
||||
if fact_lines:
|
||||
sections.append("Known Facts:\n" + "\n".join(fact_lines))
|
||||
|
||||
if not sections:
|
||||
return ""
|
||||
|
||||
result = "\n\n".join(sections)
|
||||
|
||||
# Rough token limit (approximate 4 chars per token)
|
||||
max_chars = max_tokens * 4
|
||||
if len(result) > max_chars:
|
||||
result = result[:max_chars] + "\n..."
|
||||
# Use accurate token counting with tiktoken
|
||||
token_count = _count_tokens(result)
|
||||
if token_count > max_tokens:
|
||||
# Truncate to fit within token limit
|
||||
# Estimate characters to remove based on token ratio
|
||||
char_per_token = len(result) / token_count
|
||||
target_chars = int(max_tokens * char_per_token * 0.95) # 95% to leave margin
|
||||
result = result[:target_chars] + "\n..."
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -273,9 +273,7 @@ class MemoryUpdater:
|
||||
# Remove facts
|
||||
facts_to_remove = set(update_data.get("factsToRemove", []))
|
||||
if facts_to_remove:
|
||||
current_memory["facts"] = [
|
||||
f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove
|
||||
]
|
||||
current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
|
||||
|
||||
# Add new facts
|
||||
new_facts = update_data.get("newFacts", [])
|
||||
@@ -304,9 +302,7 @@ class MemoryUpdater:
|
||||
return current_memory
|
||||
|
||||
|
||||
def update_memory_from_conversation(
|
||||
messages: list[Any], thread_id: str | None = None
|
||||
) -> bool:
|
||||
def update_memory_from_conversation(messages: list[Any], thread_id: str | None = None) -> bool:
|
||||
"""Convenience function to update memory from a conversation.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -151,8 +151,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||
State updates including uploaded files list.
|
||||
"""
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
thread_id = runtime.context.get("thread_id")
|
||||
if thread_id is None:
|
||||
return None
|
||||
@@ -172,7 +173,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||
logger.info(f"Found previously shown files: {extracted}")
|
||||
|
||||
logger.info(f"Total shown files from history: {shown_files}")
|
||||
|
||||
|
||||
# List only newly uploaded files
|
||||
files = self._list_newly_uploaded_files(thread_id, shown_files)
|
||||
logger.info(f"Newly uploaded files to inject: {[f['filename'] for f in files]}")
|
||||
@@ -189,7 +190,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||
|
||||
# Create files message and prepend to the last human message content
|
||||
files_message = self._create_files_message(files)
|
||||
|
||||
|
||||
# Extract original content - handle both string and list formats
|
||||
original_content = ""
|
||||
if isinstance(last_message.content, str):
|
||||
@@ -201,9 +202,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
original_content = "\n".join(text_parts)
|
||||
|
||||
|
||||
logger.info(f"Original message content: {original_content[:100] if original_content else '(empty)'}")
|
||||
|
||||
|
||||
# Create new message with combined content
|
||||
updated_message = HumanMessage(
|
||||
content=f"{files_message}\n\n{original_content}",
|
||||
|
||||
@@ -32,14 +32,17 @@ IDLE_CHECK_INTERVAL = 60 # Check every 60 seconds
|
||||
|
||||
|
||||
class AioSandboxProvider(SandboxProvider):
|
||||
"""Sandbox provider that manages Docker containers running the AIO sandbox.
|
||||
"""Sandbox provider that manages containers running the AIO sandbox.
|
||||
|
||||
On macOS, automatically prefers Apple Container if available, otherwise falls back to Docker.
|
||||
On other platforms, uses Docker.
|
||||
|
||||
Configuration options in config.yaml under sandbox:
|
||||
use: src.community.aio_sandbox:AioSandboxProvider
|
||||
image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest # Docker image to use
|
||||
image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest # Container image to use (works with both runtimes)
|
||||
port: 8080 # Base port for sandbox containers
|
||||
base_url: http://localhost:8080 # If set, uses existing sandbox instead of starting new container
|
||||
auto_start: true # Whether to automatically start Docker container
|
||||
auto_start: true # Whether to automatically start container
|
||||
container_prefix: deer-flow-sandbox # Prefix for container names
|
||||
idle_timeout: 600 # Idle timeout in seconds (default: 600 = 10 minutes). Set to 0 to disable.
|
||||
mounts: # List of volume mounts
|
||||
@@ -57,11 +60,13 @@ class AioSandboxProvider(SandboxProvider):
|
||||
self._containers: dict[str, str] = {} # sandbox_id -> container_id
|
||||
self._ports: dict[str, int] = {} # sandbox_id -> port
|
||||
self._thread_sandboxes: dict[str, str] = {} # thread_id -> sandbox_id (for reusing sandbox across turns)
|
||||
self._thread_locks: dict[str, threading.Lock] = {} # thread_id -> lock (for thread-specific acquisition)
|
||||
self._last_activity: dict[str, float] = {} # sandbox_id -> last activity timestamp
|
||||
self._config = self._load_config()
|
||||
self._shutdown_called = False
|
||||
self._idle_checker_stop = threading.Event()
|
||||
self._idle_checker_thread: threading.Thread | None = None
|
||||
self._container_runtime = self._detect_container_runtime()
|
||||
|
||||
# Register shutdown handler to clean up containers on exit
|
||||
atexit.register(self.shutdown)
|
||||
@@ -184,6 +189,35 @@ class AioSandboxProvider(SandboxProvider):
|
||||
resolved[key] = str(value)
|
||||
return resolved
|
||||
|
||||
def _detect_container_runtime(self) -> str:
|
||||
"""Detect which container runtime to use.
|
||||
|
||||
On macOS, prefer Apple Container if available, otherwise fall back to Docker.
|
||||
On other platforms, use Docker.
|
||||
|
||||
Returns:
|
||||
"container" for Apple Container, "docker" for Docker.
|
||||
"""
|
||||
import platform
|
||||
|
||||
# Only try Apple Container on macOS
|
||||
if platform.system() == "Darwin":
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["container", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
timeout=5,
|
||||
)
|
||||
logger.info(f"Detected Apple Container: {result.stdout.strip()}")
|
||||
return "container"
|
||||
except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
||||
logger.info("Apple Container not available, falling back to Docker")
|
||||
|
||||
# Default to Docker
|
||||
return "docker"
|
||||
|
||||
def _is_sandbox_ready(self, base_url: str, timeout: int = 30) -> bool:
|
||||
"""Check if sandbox is ready to accept connections.
|
||||
|
||||
@@ -253,7 +287,10 @@ class AioSandboxProvider(SandboxProvider):
|
||||
return None
|
||||
|
||||
def _start_container(self, sandbox_id: str, port: int, extra_mounts: list[tuple[str, str, bool]] | None = None) -> str:
|
||||
"""Start a new Docker container for the sandbox.
|
||||
"""Start a new container for the sandbox.
|
||||
|
||||
On macOS, prefers Apple Container if available, otherwise uses Docker.
|
||||
On other platforms, uses Docker.
|
||||
|
||||
Args:
|
||||
sandbox_id: Unique identifier for the sandbox.
|
||||
@@ -267,18 +304,25 @@ class AioSandboxProvider(SandboxProvider):
|
||||
container_name = f"{self._config['container_prefix']}-{sandbox_id}"
|
||||
|
||||
cmd = [
|
||||
"docker",
|
||||
self._container_runtime,
|
||||
"run",
|
||||
"--security-opt",
|
||||
"seccomp=unconfined",
|
||||
"--rm",
|
||||
"-d",
|
||||
"-p",
|
||||
f"{port}:8080",
|
||||
"--name",
|
||||
container_name,
|
||||
]
|
||||
|
||||
# Add Docker-specific security options
|
||||
if self._container_runtime == "docker":
|
||||
cmd.extend(["--security-opt", "seccomp=unconfined"])
|
||||
|
||||
cmd.extend(
|
||||
[
|
||||
"--rm",
|
||||
"-d",
|
||||
"-p",
|
||||
f"{port}:8080",
|
||||
"--name",
|
||||
container_name,
|
||||
]
|
||||
)
|
||||
|
||||
# Add configured environment variables
|
||||
for key, value in self._config["environment"].items():
|
||||
cmd.extend(["-e", f"{key}={value}"])
|
||||
@@ -303,29 +347,48 @@ class AioSandboxProvider(SandboxProvider):
|
||||
|
||||
cmd.append(image)
|
||||
|
||||
logger.info(f"Starting sandbox container: {' '.join(cmd)}")
|
||||
logger.info(f"Starting sandbox container using {self._container_runtime}: {' '.join(cmd)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
container_id = result.stdout.strip()
|
||||
logger.info(f"Started sandbox container {container_name} with ID {container_id}")
|
||||
logger.info(f"Started sandbox container {container_name} with ID {container_id} using {self._container_runtime}")
|
||||
return container_id
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Failed to start sandbox container: {e.stderr}")
|
||||
logger.error(f"Failed to start sandbox container using {self._container_runtime}: {e.stderr}")
|
||||
raise RuntimeError(f"Failed to start sandbox container: {e.stderr}")
|
||||
|
||||
def _stop_container(self, container_id: str) -> None:
|
||||
"""Stop and remove a Docker container.
|
||||
"""Stop and remove a container.
|
||||
|
||||
Since we use --rm flag, the container is automatically removed after stopping.
|
||||
|
||||
Args:
|
||||
container_id: The container ID to stop.
|
||||
"""
|
||||
try:
|
||||
subprocess.run(["docker", "stop", container_id], capture_output=True, text=True, check=True)
|
||||
logger.info(f"Stopped sandbox container {container_id}")
|
||||
subprocess.run([self._container_runtime, "stop", container_id], capture_output=True, text=True, check=True)
|
||||
logger.info(f"Stopped sandbox container {container_id} using {self._container_runtime} (--rm will auto-remove)")
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.warning(f"Failed to stop sandbox container {container_id}: {e.stderr}")
|
||||
|
||||
def _get_thread_lock(self, thread_id: str) -> threading.Lock:
|
||||
"""Get or create a lock for a specific thread_id.
|
||||
|
||||
This ensures that concurrent sandbox acquisition for the same thread_id
|
||||
is serialized, preventing duplicate sandbox creation.
|
||||
|
||||
Args:
|
||||
thread_id: The thread ID.
|
||||
|
||||
Returns:
|
||||
A lock specific to this thread_id.
|
||||
"""
|
||||
with self._lock:
|
||||
if thread_id not in self._thread_locks:
|
||||
self._thread_locks[thread_id] = threading.Lock()
|
||||
return self._thread_locks[thread_id]
|
||||
|
||||
def acquire(self, thread_id: str | None = None) -> str:
|
||||
"""Acquire a sandbox environment and return its ID.
|
||||
|
||||
@@ -335,7 +398,8 @@ class AioSandboxProvider(SandboxProvider):
|
||||
For the same thread_id, this method will return the same sandbox_id,
|
||||
allowing sandbox reuse across multiple turns in a conversation.
|
||||
|
||||
This method is thread-safe.
|
||||
This method is thread-safe and prevents race conditions when multiple
|
||||
concurrent requests try to acquire a sandbox for the same thread_id.
|
||||
|
||||
Args:
|
||||
thread_id: Optional thread ID for thread-specific configurations.
|
||||
@@ -343,6 +407,26 @@ class AioSandboxProvider(SandboxProvider):
|
||||
mounts for workspace, uploads, and outputs directories.
|
||||
The same thread_id will reuse the same sandbox.
|
||||
|
||||
Returns:
|
||||
The ID of the acquired sandbox environment.
|
||||
"""
|
||||
# For thread-specific acquisition, use a per-thread lock to prevent
|
||||
# concurrent creation of multiple sandboxes for the same thread
|
||||
if thread_id:
|
||||
thread_lock = self._get_thread_lock(thread_id)
|
||||
with thread_lock:
|
||||
return self._acquire_internal(thread_id)
|
||||
else:
|
||||
return self._acquire_internal(thread_id)
|
||||
|
||||
def _acquire_internal(self, thread_id: str | None) -> str:
|
||||
"""Internal implementation of sandbox acquisition.
|
||||
|
||||
This method should only be called from acquire() which handles locking.
|
||||
|
||||
Args:
|
||||
thread_id: Optional thread ID for thread-specific configurations.
|
||||
|
||||
Returns:
|
||||
The ID of the acquired sandbox environment.
|
||||
"""
|
||||
|
||||
@@ -162,7 +162,7 @@ class ExtensionsConfig(BaseModel):
|
||||
skill_config = self.skills.get(skill_name)
|
||||
if skill_config is None:
|
||||
# Default to enable for public & custom skill
|
||||
return skill_category in ('public', 'custom')
|
||||
return skill_category in ("public", "custom")
|
||||
return skill_config.enabled
|
||||
|
||||
|
||||
|
||||
@@ -93,6 +93,8 @@ def get_thread_data(runtime: ToolRuntime[ContextT, ThreadState] | None) -> Threa
|
||||
"""Extract thread_data from runtime state."""
|
||||
if runtime is None:
|
||||
return None
|
||||
if runtime.state is None:
|
||||
return None
|
||||
return runtime.state.get("thread_data")
|
||||
|
||||
|
||||
@@ -104,6 +106,8 @@ def is_local_sandbox(runtime: ToolRuntime[ContextT, ThreadState] | None) -> bool
|
||||
"""
|
||||
if runtime is None:
|
||||
return False
|
||||
if runtime.state is None:
|
||||
return False
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is None:
|
||||
return False
|
||||
@@ -122,6 +126,8 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
|
||||
"""
|
||||
if runtime is None:
|
||||
raise SandboxRuntimeError("Tool runtime not available")
|
||||
if runtime.state is None:
|
||||
raise SandboxRuntimeError("Tool runtime state not available")
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is None:
|
||||
raise SandboxRuntimeError("Sandbox state not initialized in runtime")
|
||||
@@ -155,6 +161,9 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
|
||||
if runtime is None:
|
||||
raise SandboxRuntimeError("Tool runtime not available")
|
||||
|
||||
if runtime.state is None:
|
||||
raise SandboxRuntimeError("Tool runtime state not available")
|
||||
|
||||
# Check if sandbox already exists in state
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is not None:
|
||||
|
||||
11
backend/src/subagents/__init__.py
Normal file
11
backend/src/subagents/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .config import SubagentConfig
|
||||
from .executor import SubagentExecutor, SubagentResult
|
||||
from .registry import get_subagent_config, list_subagents
|
||||
|
||||
__all__ = [
|
||||
"SubagentConfig",
|
||||
"SubagentExecutor",
|
||||
"SubagentResult",
|
||||
"get_subagent_config",
|
||||
"list_subagents",
|
||||
]
|
||||
15
backend/src/subagents/builtins/__init__.py
Normal file
15
backend/src/subagents/builtins/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Built-in subagent configurations."""
|
||||
|
||||
from .bash_agent import BASH_AGENT_CONFIG
|
||||
from .general_purpose import GENERAL_PURPOSE_CONFIG
|
||||
|
||||
__all__ = [
|
||||
"GENERAL_PURPOSE_CONFIG",
|
||||
"BASH_AGENT_CONFIG",
|
||||
]
|
||||
|
||||
# Registry of built-in subagents
|
||||
BUILTIN_SUBAGENTS = {
|
||||
"general-purpose": GENERAL_PURPOSE_CONFIG,
|
||||
"bash": BASH_AGENT_CONFIG,
|
||||
}
|
||||
46
backend/src/subagents/builtins/bash_agent.py
Normal file
46
backend/src/subagents/builtins/bash_agent.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""Bash command execution subagent configuration."""
|
||||
|
||||
from src.subagents.config import SubagentConfig
|
||||
|
||||
BASH_AGENT_CONFIG = SubagentConfig(
|
||||
name="bash",
|
||||
description="""Command execution specialist for running bash commands in a separate context.
|
||||
|
||||
Use this subagent when:
|
||||
- You need to run a series of related bash commands
|
||||
- Terminal operations like git, npm, docker, etc.
|
||||
- Command output is verbose and would clutter main context
|
||||
- Build, test, or deployment operations
|
||||
|
||||
Do NOT use for simple single commands - use bash tool directly instead.""",
|
||||
system_prompt="""You are a bash command execution specialist. Execute the requested commands carefully and report results clearly.
|
||||
|
||||
<guidelines>
|
||||
- Execute commands one at a time when they depend on each other
|
||||
- Use parallel execution when commands are independent
|
||||
- Report both stdout and stderr when relevant
|
||||
- Handle errors gracefully and explain what went wrong
|
||||
- Use absolute paths for file operations
|
||||
- Be cautious with destructive operations (rm, overwrite, etc.)
|
||||
</guidelines>
|
||||
|
||||
<output_format>
|
||||
For each command or group of commands:
|
||||
1. What was executed
|
||||
2. The result (success/failure)
|
||||
3. Relevant output (summarized if verbose)
|
||||
4. Any errors or warnings
|
||||
</output_format>
|
||||
|
||||
<working_directory>
|
||||
You have access to the sandbox environment:
|
||||
- User uploads: `/mnt/user-data/uploads`
|
||||
- User workspace: `/mnt/user-data/workspace`
|
||||
- Output files: `/mnt/user-data/outputs`
|
||||
</working_directory>
|
||||
""",
|
||||
tools=["bash", "ls", "read_file", "write_file", "str_replace"], # Sandbox tools only
|
||||
disallowed_tools=["task", "ask_clarification"],
|
||||
model="inherit",
|
||||
max_turns=30,
|
||||
)
|
||||
46
backend/src/subagents/builtins/general_purpose.py
Normal file
46
backend/src/subagents/builtins/general_purpose.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""General-purpose subagent configuration."""
|
||||
|
||||
from src.subagents.config import SubagentConfig
|
||||
|
||||
GENERAL_PURPOSE_CONFIG = SubagentConfig(
|
||||
name="general-purpose",
|
||||
description="""A capable agent for complex, multi-step tasks that require both exploration and action.
|
||||
|
||||
Use this subagent when:
|
||||
- The task requires both exploration and modification
|
||||
- Complex reasoning is needed to interpret results
|
||||
- Multiple dependent steps must be executed
|
||||
- The task would benefit from isolated context management
|
||||
|
||||
Do NOT use for simple, single-step operations.""",
|
||||
system_prompt="""You are a general-purpose subagent working on a delegated task. Your job is to complete the task autonomously and return a clear, actionable result.
|
||||
|
||||
<guidelines>
|
||||
- Focus on completing the delegated task efficiently
|
||||
- Use available tools as needed to accomplish the goal
|
||||
- Think step by step but act decisively
|
||||
- If you encounter issues, explain them clearly in your response
|
||||
- Return a concise summary of what you accomplished
|
||||
- Do NOT ask for clarification - work with the information provided
|
||||
</guidelines>
|
||||
|
||||
<output_format>
|
||||
When you complete the task, provide:
|
||||
1. A brief summary of what was accomplished
|
||||
2. Key findings or results
|
||||
3. Any relevant file paths, data, or artifacts created
|
||||
4. Issues encountered (if any)
|
||||
</output_format>
|
||||
|
||||
<working_directory>
|
||||
You have access to the same sandbox environment as the parent agent:
|
||||
- User uploads: `/mnt/user-data/uploads`
|
||||
- User workspace: `/mnt/user-data/workspace`
|
||||
- Output files: `/mnt/user-data/outputs`
|
||||
</working_directory>
|
||||
""",
|
||||
tools=None, # Inherit all tools from parent
|
||||
disallowed_tools=["task", "ask_clarification"], # Prevent nesting and clarification
|
||||
model="inherit",
|
||||
max_turns=50,
|
||||
)
|
||||
28
backend/src/subagents/config.py
Normal file
28
backend/src/subagents/config.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""Subagent configuration definitions."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubagentConfig:
|
||||
"""Configuration for a subagent.
|
||||
|
||||
Attributes:
|
||||
name: Unique identifier for the subagent.
|
||||
description: When Claude should delegate to this subagent.
|
||||
system_prompt: The system prompt that guides the subagent's behavior.
|
||||
tools: Optional list of tool names to allow. If None, inherits all tools.
|
||||
disallowed_tools: Optional list of tool names to deny.
|
||||
model: Model to use - 'inherit' uses parent's model.
|
||||
max_turns: Maximum number of agent turns before stopping.
|
||||
timeout_seconds: Maximum execution time in seconds (default: 300 = 5 minutes).
|
||||
"""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
system_prompt: str
|
||||
tools: list[str] | None = None
|
||||
disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
|
||||
model: str = "inherit"
|
||||
max_turns: int = 50
|
||||
timeout_seconds: int = 300
|
||||
368
backend/src/subagents/executor.py
Normal file
368
backend/src/subagents/executor.py
Normal file
@@ -0,0 +1,368 @@
|
||||
"""Subagent execution engine."""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import uuid
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from concurrent.futures import TimeoutError as FuturesTimeoutError
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from langchain.agents import create_agent
|
||||
from langchain.tools import BaseTool
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from src.agents.thread_state import SandboxState, ThreadDataState, ThreadState
|
||||
from src.models import create_chat_model
|
||||
from src.subagents.config import SubagentConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SubagentStatus(Enum):
|
||||
"""Status of a subagent execution."""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SubagentResult:
|
||||
"""Result of a subagent execution.
|
||||
|
||||
Attributes:
|
||||
task_id: Unique identifier for this execution.
|
||||
trace_id: Trace ID for distributed tracing (links parent and subagent logs).
|
||||
status: Current status of the execution.
|
||||
result: The final result message (if completed).
|
||||
error: Error message (if failed).
|
||||
started_at: When execution started.
|
||||
completed_at: When execution completed.
|
||||
"""
|
||||
|
||||
task_id: str
|
||||
trace_id: str
|
||||
status: SubagentStatus
|
||||
result: str | None = None
|
||||
error: str | None = None
|
||||
started_at: datetime | None = None
|
||||
completed_at: datetime | None = None
|
||||
|
||||
|
||||
# Global storage for background task results
|
||||
_background_tasks: dict[str, SubagentResult] = {}
|
||||
_background_tasks_lock = threading.Lock()
|
||||
|
||||
# Thread pool for background task scheduling and orchestration
|
||||
_scheduler_pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="subagent-scheduler-")
|
||||
|
||||
# Thread pool for actual subagent execution (with timeout support)
|
||||
# Larger pool to avoid blocking when scheduler submits execution tasks
|
||||
_execution_pool = ThreadPoolExecutor(max_workers=8, thread_name_prefix="subagent-exec-")
|
||||
|
||||
|
||||
def _filter_tools(
|
||||
all_tools: list[BaseTool],
|
||||
allowed: list[str] | None,
|
||||
disallowed: list[str] | None,
|
||||
) -> list[BaseTool]:
|
||||
"""Filter tools based on subagent configuration.
|
||||
|
||||
Args:
|
||||
all_tools: List of all available tools.
|
||||
allowed: Optional allowlist of tool names. If provided, only these tools are included.
|
||||
disallowed: Optional denylist of tool names. These tools are always excluded.
|
||||
|
||||
Returns:
|
||||
Filtered list of tools.
|
||||
"""
|
||||
filtered = all_tools
|
||||
|
||||
# Apply allowlist if specified
|
||||
if allowed is not None:
|
||||
allowed_set = set(allowed)
|
||||
filtered = [t for t in filtered if t.name in allowed_set]
|
||||
|
||||
# Apply denylist
|
||||
if disallowed is not None:
|
||||
disallowed_set = set(disallowed)
|
||||
filtered = [t for t in filtered if t.name not in disallowed_set]
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def _get_model_name(config: SubagentConfig, parent_model: str | None) -> str | None:
|
||||
"""Resolve the model name for a subagent.
|
||||
|
||||
Args:
|
||||
config: Subagent configuration.
|
||||
parent_model: The parent agent's model name.
|
||||
|
||||
Returns:
|
||||
Model name to use, or None to use default.
|
||||
"""
|
||||
if config.model == "inherit":
|
||||
return parent_model
|
||||
return config.model
|
||||
|
||||
|
||||
class SubagentExecutor:
|
||||
"""Executor for running subagents."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: SubagentConfig,
|
||||
tools: list[BaseTool],
|
||||
parent_model: str | None = None,
|
||||
sandbox_state: SandboxState | None = None,
|
||||
thread_data: ThreadDataState | None = None,
|
||||
thread_id: str | None = None,
|
||||
trace_id: str | None = None,
|
||||
):
|
||||
"""Initialize the executor.
|
||||
|
||||
Args:
|
||||
config: Subagent configuration.
|
||||
tools: List of all available tools (will be filtered).
|
||||
parent_model: The parent agent's model name for inheritance.
|
||||
sandbox_state: Sandbox state from parent agent.
|
||||
thread_data: Thread data from parent agent.
|
||||
thread_id: Thread ID for sandbox operations.
|
||||
trace_id: Trace ID from parent for distributed tracing.
|
||||
"""
|
||||
self.config = config
|
||||
self.parent_model = parent_model
|
||||
self.sandbox_state = sandbox_state
|
||||
self.thread_data = thread_data
|
||||
self.thread_id = thread_id
|
||||
# Generate trace_id if not provided (for top-level calls)
|
||||
self.trace_id = trace_id or str(uuid.uuid4())[:8]
|
||||
|
||||
# Filter tools based on config
|
||||
self.tools = _filter_tools(
|
||||
tools,
|
||||
config.tools,
|
||||
config.disallowed_tools,
|
||||
)
|
||||
|
||||
logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools")
|
||||
|
||||
def _create_agent(self):
|
||||
"""Create the agent instance."""
|
||||
model_name = _get_model_name(self.config, self.parent_model)
|
||||
model = create_chat_model(name=model_name, thinking_enabled=False)
|
||||
|
||||
# Subagents need minimal middlewares to ensure tools can access sandbox and thread_data
|
||||
# These middlewares will reuse the sandbox/thread_data from parent agent
|
||||
from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
|
||||
from src.sandbox.middleware import SandboxMiddleware
|
||||
|
||||
middlewares = [
|
||||
ThreadDataMiddleware(lazy_init=True), # Compute thread paths
|
||||
SandboxMiddleware(lazy_init=True), # Reuse parent's sandbox (no re-acquisition)
|
||||
]
|
||||
|
||||
return create_agent(
|
||||
model=model,
|
||||
tools=self.tools,
|
||||
middleware=middlewares,
|
||||
system_prompt=self.config.system_prompt,
|
||||
state_schema=ThreadState,
|
||||
)
|
||||
|
||||
def _build_initial_state(self, task: str) -> dict[str, Any]:
|
||||
"""Build the initial state for agent execution.
|
||||
|
||||
Args:
|
||||
task: The task description.
|
||||
|
||||
Returns:
|
||||
Initial state dictionary.
|
||||
"""
|
||||
state: dict[str, Any] = {
|
||||
"messages": [HumanMessage(content=task)],
|
||||
}
|
||||
|
||||
# Pass through sandbox and thread data from parent
|
||||
if self.sandbox_state is not None:
|
||||
state["sandbox"] = self.sandbox_state
|
||||
if self.thread_data is not None:
|
||||
state["thread_data"] = self.thread_data
|
||||
|
||||
return state
|
||||
|
||||
def execute(self, task: str) -> SubagentResult:
|
||||
"""Execute a task synchronously.
|
||||
|
||||
Args:
|
||||
task: The task description for the subagent.
|
||||
|
||||
Returns:
|
||||
SubagentResult with the execution result.
|
||||
"""
|
||||
task_id = str(uuid.uuid4())[:8]
|
||||
result = SubagentResult(
|
||||
task_id=task_id,
|
||||
trace_id=self.trace_id,
|
||||
status=SubagentStatus.RUNNING,
|
||||
started_at=datetime.now(),
|
||||
)
|
||||
|
||||
try:
|
||||
agent = self._create_agent()
|
||||
state = self._build_initial_state(task)
|
||||
|
||||
# Build config with thread_id for sandbox access and recursion limit
|
||||
run_config: RunnableConfig = {
|
||||
"recursion_limit": self.config.max_turns,
|
||||
}
|
||||
context = {}
|
||||
if self.thread_id:
|
||||
run_config["configurable"] = {"thread_id": self.thread_id}
|
||||
context["thread_id"] = self.thread_id
|
||||
|
||||
logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting execution with max_turns={self.config.max_turns}")
|
||||
|
||||
# Run the agent using invoke for complete result
|
||||
# Note: invoke() runs until completion or interruption
|
||||
# Timeout is handled at the execute_async level, not here
|
||||
final_state = agent.invoke(state, config=run_config, context=context) # type: ignore[arg-type]
|
||||
|
||||
logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} completed execution")
|
||||
|
||||
# Extract the final message - find the last AIMessage
|
||||
messages = final_state.get("messages", [])
|
||||
logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} final messages count: {len(messages)}")
|
||||
|
||||
# Find the last AIMessage in the conversation
|
||||
last_ai_message = None
|
||||
for msg in reversed(messages):
|
||||
if isinstance(msg, AIMessage):
|
||||
last_ai_message = msg
|
||||
break
|
||||
|
||||
if last_ai_message is not None:
|
||||
content = last_ai_message.content
|
||||
logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} last AI message content type: {type(content)}")
|
||||
|
||||
# Handle both str and list content types
|
||||
if isinstance(content, str):
|
||||
result.result = content
|
||||
elif isinstance(content, list):
|
||||
# Extract text from list of content blocks
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, str):
|
||||
text_parts.append(block)
|
||||
elif isinstance(block, dict) and "text" in block:
|
||||
text_parts.append(block["text"])
|
||||
result.result = "\n".join(text_parts) if text_parts else "No text content in response"
|
||||
else:
|
||||
result.result = str(content)
|
||||
elif messages:
|
||||
# Fallback: use the last message if no AIMessage found
|
||||
last_message = messages[-1]
|
||||
logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no AIMessage found, using last message: {type(last_message)}")
|
||||
result.result = str(last_message.content) if hasattr(last_message, "content") else str(last_message)
|
||||
else:
|
||||
logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no messages in final state")
|
||||
result.result = "No response generated"
|
||||
|
||||
result.status = SubagentStatus.COMPLETED
|
||||
result.completed_at = datetime.now()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"[trace={self.trace_id}] Subagent {self.config.name} execution failed")
|
||||
result.status = SubagentStatus.FAILED
|
||||
result.error = str(e)
|
||||
result.completed_at = datetime.now()
|
||||
|
||||
return result
|
||||
|
||||
def execute_async(self, task: str) -> str:
|
||||
"""Start a task execution in the background.
|
||||
|
||||
Args:
|
||||
task: The task description for the subagent.
|
||||
|
||||
Returns:
|
||||
Task ID that can be used to check status later.
|
||||
"""
|
||||
task_id = str(uuid.uuid4())[:8]
|
||||
|
||||
# Create initial pending result
|
||||
result = SubagentResult(
|
||||
task_id=task_id,
|
||||
trace_id=self.trace_id,
|
||||
status=SubagentStatus.PENDING,
|
||||
)
|
||||
|
||||
logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting async execution, task_id={task_id}")
|
||||
|
||||
with _background_tasks_lock:
|
||||
_background_tasks[task_id] = result
|
||||
|
||||
# Submit to scheduler pool
|
||||
def run_task():
|
||||
with _background_tasks_lock:
|
||||
_background_tasks[task_id].status = SubagentStatus.RUNNING
|
||||
_background_tasks[task_id].started_at = datetime.now()
|
||||
|
||||
try:
|
||||
# Submit execution to execution pool with timeout
|
||||
execution_future: Future = _execution_pool.submit(self.execute, task)
|
||||
try:
|
||||
# Wait for execution with timeout
|
||||
exec_result = execution_future.result(timeout=self.config.timeout_seconds)
|
||||
with _background_tasks_lock:
|
||||
_background_tasks[task_id].status = exec_result.status
|
||||
_background_tasks[task_id].result = exec_result.result
|
||||
_background_tasks[task_id].error = exec_result.error
|
||||
_background_tasks[task_id].completed_at = datetime.now()
|
||||
except FuturesTimeoutError:
|
||||
logger.error(
|
||||
f"[trace={self.trace_id}] Subagent {self.config.name} execution timed out after {self.config.timeout_seconds}s"
|
||||
)
|
||||
with _background_tasks_lock:
|
||||
_background_tasks[task_id].status = SubagentStatus.FAILED
|
||||
_background_tasks[task_id].error = f"Execution timed out after {self.config.timeout_seconds} seconds"
|
||||
_background_tasks[task_id].completed_at = datetime.now()
|
||||
# Cancel the future (best effort - may not stop the actual execution)
|
||||
execution_future.cancel()
|
||||
except Exception as e:
|
||||
logger.exception(f"[trace={self.trace_id}] Subagent {self.config.name} async execution failed")
|
||||
with _background_tasks_lock:
|
||||
_background_tasks[task_id].status = SubagentStatus.FAILED
|
||||
_background_tasks[task_id].error = str(e)
|
||||
_background_tasks[task_id].completed_at = datetime.now()
|
||||
|
||||
_scheduler_pool.submit(run_task)
|
||||
return task_id
|
||||
|
||||
|
||||
def get_background_task_result(task_id: str) -> SubagentResult | None:
|
||||
"""Get the result of a background task.
|
||||
|
||||
Args:
|
||||
task_id: The task ID returned by execute_async.
|
||||
|
||||
Returns:
|
||||
SubagentResult if found, None otherwise.
|
||||
"""
|
||||
with _background_tasks_lock:
|
||||
return _background_tasks.get(task_id)
|
||||
|
||||
|
||||
def list_background_tasks() -> list[SubagentResult]:
|
||||
"""List all background tasks.
|
||||
|
||||
Returns:
|
||||
List of all SubagentResult instances.
|
||||
"""
|
||||
with _background_tasks_lock:
|
||||
return list(_background_tasks.values())
|
||||
34
backend/src/subagents/registry.py
Normal file
34
backend/src/subagents/registry.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Subagent registry for managing available subagents."""
|
||||
|
||||
from src.subagents.builtins import BUILTIN_SUBAGENTS
|
||||
from src.subagents.config import SubagentConfig
|
||||
|
||||
|
||||
def get_subagent_config(name: str) -> SubagentConfig | None:
|
||||
"""Get a subagent configuration by name.
|
||||
|
||||
Args:
|
||||
name: The name of the subagent.
|
||||
|
||||
Returns:
|
||||
SubagentConfig if found, None otherwise.
|
||||
"""
|
||||
return BUILTIN_SUBAGENTS.get(name)
|
||||
|
||||
|
||||
def list_subagents() -> list[SubagentConfig]:
|
||||
"""List all available subagent configurations.
|
||||
|
||||
Returns:
|
||||
List of all registered SubagentConfig instances.
|
||||
"""
|
||||
return list(BUILTIN_SUBAGENTS.values())
|
||||
|
||||
|
||||
def get_subagent_names() -> list[str]:
|
||||
"""Get all available subagent names.
|
||||
|
||||
Returns:
|
||||
List of subagent names.
|
||||
"""
|
||||
return list(BUILTIN_SUBAGENTS.keys())
|
||||
@@ -1,5 +1,11 @@
|
||||
from .clarification_tool import ask_clarification_tool
|
||||
from .present_file_tool import present_file_tool
|
||||
from .task_tool import task_tool
|
||||
from .view_image_tool import view_image_tool
|
||||
|
||||
__all__ = ["present_file_tool", "ask_clarification_tool", "view_image_tool"]
|
||||
__all__ = [
|
||||
"present_file_tool",
|
||||
"ask_clarification_tool",
|
||||
"view_image_tool",
|
||||
"task_tool",
|
||||
]
|
||||
|
||||
151
backend/src/tools/builtins/task_tool.py
Normal file
151
backend/src/tools/builtins/task_tool.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""Task tool for delegating work to subagents."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from typing import Literal
|
||||
|
||||
from langchain.tools import ToolRuntime, tool
|
||||
from langgraph.typing import ContextT
|
||||
from langgraph.config import get_stream_writer
|
||||
|
||||
from src.agents.thread_state import ThreadState
|
||||
from src.subagents import SubagentExecutor, get_subagent_config
|
||||
from src.subagents.executor import SubagentStatus, get_background_task_result
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@tool("task", parse_docstring=True)
|
||||
def task_tool(
|
||||
runtime: ToolRuntime[ContextT, ThreadState],
|
||||
subagent_type: Literal["general-purpose", "bash"],
|
||||
prompt: str,
|
||||
description: str,
|
||||
max_turns: int | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task to a specialized subagent that runs in its own context.
|
||||
|
||||
Subagents help you:
|
||||
- Preserve context by keeping exploration and implementation separate
|
||||
- Handle complex multi-step tasks autonomously
|
||||
- Execute commands or operations in isolated contexts
|
||||
|
||||
Available subagent types:
|
||||
- **general-purpose**: A capable agent for complex, multi-step tasks that require
|
||||
both exploration and action. Use when the task requires complex reasoning,
|
||||
multiple dependent steps, or would benefit from isolated context.
|
||||
- **bash**: Command execution specialist for running bash commands. Use for
|
||||
git operations, build processes, or when command output would be verbose.
|
||||
|
||||
When to use this tool:
|
||||
- Complex tasks requiring multiple steps or tools
|
||||
- Tasks that produce verbose output
|
||||
- When you want to isolate context from the main conversation
|
||||
- Parallel research or exploration tasks
|
||||
|
||||
When NOT to use this tool:
|
||||
- Simple, single-step operations (use tools directly)
|
||||
- Tasks requiring user interaction or clarification
|
||||
|
||||
Args:
|
||||
subagent_type: The type of subagent to use.
|
||||
prompt: The task description for the subagent. Be specific and clear about what needs to be done.
|
||||
description: A short (3-5 word) description of the task for logging/display.
|
||||
max_turns: Optional maximum number of agent turns. Defaults to subagent's configured max.
|
||||
"""
|
||||
# Get subagent configuration
|
||||
config = get_subagent_config(subagent_type)
|
||||
if config is None:
|
||||
return f"Error: Unknown subagent type '{subagent_type}'. Available: general-purpose, bash"
|
||||
|
||||
# Override max_turns if specified
|
||||
if max_turns is not None:
|
||||
# Create a copy with updated max_turns
|
||||
from dataclasses import replace
|
||||
|
||||
config = replace(config, max_turns=max_turns)
|
||||
|
||||
# Extract parent context from runtime
|
||||
sandbox_state = None
|
||||
thread_data = None
|
||||
thread_id = None
|
||||
parent_model = None
|
||||
trace_id = None
|
||||
|
||||
if runtime is not None:
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
thread_data = runtime.state.get("thread_data")
|
||||
thread_id = runtime.context.get("thread_id")
|
||||
|
||||
# Try to get parent model from configurable
|
||||
metadata = runtime.config.get("metadata", {})
|
||||
parent_model = metadata.get("model_name")
|
||||
|
||||
# Get or generate trace_id for distributed tracing
|
||||
trace_id = metadata.get("trace_id") or str(uuid.uuid4())[:8]
|
||||
|
||||
# Get available tools (excluding task tool to prevent nesting)
|
||||
# Lazy import to avoid circular dependency
|
||||
from src.tools import get_available_tools
|
||||
|
||||
# Subagents should not have subagent tools enabled (prevent recursive nesting)
|
||||
tools = get_available_tools(model_name=parent_model, subagent_enabled=False)
|
||||
|
||||
# Create executor
|
||||
executor = SubagentExecutor(
|
||||
config=config,
|
||||
tools=tools,
|
||||
parent_model=parent_model,
|
||||
sandbox_state=sandbox_state,
|
||||
thread_data=thread_data,
|
||||
thread_id=thread_id,
|
||||
trace_id=trace_id,
|
||||
)
|
||||
|
||||
# Start background execution (always async to prevent blocking)
|
||||
task_id = executor.execute_async(prompt)
|
||||
logger.info(f"[trace={trace_id}] Started background task {task_id}, polling for completion...")
|
||||
|
||||
# Poll for task completion in backend (removes need for LLM to poll)
|
||||
poll_count = 0
|
||||
last_status = None
|
||||
|
||||
writer = get_stream_writer()
|
||||
# Send Task Started message'
|
||||
writer({"type": "task_started", "task_id": task_id, "task_type": subagent_type, "description": description})
|
||||
|
||||
|
||||
while True:
|
||||
result = get_background_task_result(task_id)
|
||||
|
||||
if result is None:
|
||||
logger.error(f"[trace={trace_id}] Task {task_id} not found in background tasks")
|
||||
writer({"type": "task_failed", "task_id": task_id, "task_type": subagent_type, "error": "Task disappeared from background tasks"})
|
||||
return f"Error: Task {task_id} disappeared from background tasks"
|
||||
|
||||
# Log status changes for debugging
|
||||
if result.status != last_status:
|
||||
logger.info(f"[trace={trace_id}] Task {task_id} status: {result.status.value}")
|
||||
last_status = result.status
|
||||
|
||||
# Check if task completed or failed
|
||||
if result.status == SubagentStatus.COMPLETED:
|
||||
writer({"type": "task_completed", "task_id": task_id, "task_type": subagent_type, "result": result.result})
|
||||
logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
|
||||
return f"Task Succeeded. Result: {result.result}"
|
||||
elif result.status == SubagentStatus.FAILED:
|
||||
writer({"type": "task_failed", "task_id": task_id, "task_type": subagent_type, "error": result.error})
|
||||
logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
|
||||
return f"Task failed. Error: {result.error}"
|
||||
|
||||
# Still running, wait before next poll
|
||||
writer({"type": "task_running", "task_id": task_id, "task_type": subagent_type, "poll_count": poll_count})
|
||||
time.sleep(5) # Poll every 5 seconds
|
||||
poll_count += 1
|
||||
|
||||
# Optional: Add timeout protection (e.g., max 5 minutes)
|
||||
if poll_count > 60: # 60 * 5s = 5 minutes
|
||||
logger.warning(f"[trace={trace_id}] Task {task_id} timed out after {poll_count} polls")
|
||||
writer({"type": "task_timed_out", "task_id": task_id, "task_type": subagent_type})
|
||||
return f"Task timed out after 5 minutes. Status: {result.status.value}"
|
||||
@@ -4,7 +4,7 @@ from langchain.tools import BaseTool
|
||||
|
||||
from src.config import get_app_config
|
||||
from src.reflection import resolve_variable
|
||||
from src.tools.builtins import ask_clarification_tool, present_file_tool, view_image_tool
|
||||
from src.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,8 +13,18 @@ BUILTIN_TOOLS = [
|
||||
ask_clarification_tool,
|
||||
]
|
||||
|
||||
SUBAGENT_TOOLS = [
|
||||
task_tool,
|
||||
# task_status_tool is no longer exposed to LLM (backend handles polling internally)
|
||||
]
|
||||
|
||||
def get_available_tools(groups: list[str] | None = None, include_mcp: bool = True, model_name: str | None = None) -> list[BaseTool]:
|
||||
|
||||
def get_available_tools(
|
||||
groups: list[str] | None = None,
|
||||
include_mcp: bool = True,
|
||||
model_name: str | None = None,
|
||||
subagent_enabled: bool = False,
|
||||
) -> list[BaseTool]:
|
||||
"""Get all available tools from config.
|
||||
|
||||
Note: MCP tools should be initialized at application startup using
|
||||
@@ -24,6 +34,7 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
|
||||
groups: Optional list of tool groups to filter by.
|
||||
include_mcp: Whether to include tools from MCP servers (default: True).
|
||||
model_name: Optional model name to determine if vision tools should be included.
|
||||
subagent_enabled: Whether to include subagent tools (task, task_status).
|
||||
|
||||
Returns:
|
||||
List of available tools.
|
||||
@@ -52,13 +63,19 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get cached MCP tools: {e}")
|
||||
|
||||
# Conditionally add view_image_tool only if the model supports vision
|
||||
# Conditionally add tools based on config
|
||||
builtin_tools = BUILTIN_TOOLS.copy()
|
||||
|
||||
# Add subagent tools only if enabled via runtime parameter
|
||||
if subagent_enabled:
|
||||
builtin_tools.extend(SUBAGENT_TOOLS)
|
||||
logger.info("Including subagent tools (task)")
|
||||
|
||||
# If no model_name specified, use the first model (default)
|
||||
if model_name is None and config.models:
|
||||
model_name = config.models[0].name
|
||||
|
||||
# Add view_image_tool only if the model supports vision
|
||||
model_config = config.get_model_config(model_name) if model_name else None
|
||||
if model_config is not None and model_config.supports_vision:
|
||||
builtin_tools.append(view_image_tool)
|
||||
|
||||
Reference in New Issue
Block a user