feat: support sub agent mechanism

2026-04-17 19:44:45 +08:00 · 2026-02-05 19:59:25 +08:00
parent c31175defd
commit cbd2fe66de
18 changed files with 775 additions and 33 deletions
--- a/backend/src/subagents/init.py
+++ b/backend/src/subagents/init.py
@@ -0,0 +1,11 @@
+from .config import SubagentConfig
+from .executor import SubagentExecutor, SubagentResult
+from .registry import get_subagent_config, list_subagents
+
+__all__ = [
+    "SubagentConfig",
+    "SubagentExecutor",
+    "SubagentResult",
+    "get_subagent_config",
+    "list_subagents",
+]
--- a/backend/src/subagents/builtins/init.py
+++ b/backend/src/subagents/builtins/init.py
@@ -0,0 +1,15 @@
+"""Built-in subagent configurations."""
+
+from .bash_agent import BASH_AGENT_CONFIG
+from .general_purpose import GENERAL_PURPOSE_CONFIG
+
+__all__ = [
+    "GENERAL_PURPOSE_CONFIG",
+    "BASH_AGENT_CONFIG",
+]
+
+# Registry of built-in subagents
+BUILTIN_SUBAGENTS = {
+    "general-purpose": GENERAL_PURPOSE_CONFIG,
+    "bash": BASH_AGENT_CONFIG,
+}
--- a/backend/src/subagents/builtins/bash_agent.py
+++ b/backend/src/subagents/builtins/bash_agent.py
@@ -0,0 +1,46 @@
+"""Bash command execution subagent configuration."""
+
+from src.subagents.config import SubagentConfig
+
+BASH_AGENT_CONFIG = SubagentConfig(
+    name="bash",
+    description="""Command execution specialist for running bash commands in a separate context.
+
+Use this subagent when:
+- You need to run a series of related bash commands
+- Terminal operations like git, npm, docker, etc.
+- Command output is verbose and would clutter main context
+- Build, test, or deployment operations
+
+Do NOT use for simple single commands - use bash tool directly instead.""",
+    system_prompt="""You are a bash command execution specialist. Execute the requested commands carefully and report results clearly.
+
+<guidelines>
+- Execute commands one at a time when they depend on each other
+- Use parallel execution when commands are independent
+- Report both stdout and stderr when relevant
+- Handle errors gracefully and explain what went wrong
+- Use absolute paths for file operations
+- Be cautious with destructive operations (rm, overwrite, etc.)
+</guidelines>
+
+<output_format>
+For each command or group of commands:
+1. What was executed
+2. The result (success/failure)
+3. Relevant output (summarized if verbose)
+4. Any errors or warnings
+</output_format>
+
+<working_directory>
+You have access to the sandbox environment:
+- User uploads: `/mnt/user-data/uploads`
+- User workspace: `/mnt/user-data/workspace`
+- Output files: `/mnt/user-data/outputs`
+</working_directory>
+""",
+    tools=["bash", "ls", "read_file", "write_file", "str_replace"],  # Sandbox tools only
+    disallowed_tools=["task", "ask_clarification"],
+    model="inherit",
+    max_turns=30,
+)
--- a/backend/src/subagents/builtins/general_purpose.py
+++ b/backend/src/subagents/builtins/general_purpose.py
@@ -0,0 +1,46 @@
+"""General-purpose subagent configuration."""
+
+from src.subagents.config import SubagentConfig
+
+GENERAL_PURPOSE_CONFIG = SubagentConfig(
+    name="general-purpose",
+    description="""A capable agent for complex, multi-step tasks that require both exploration and action.
+
+Use this subagent when:
+- The task requires both exploration and modification
+- Complex reasoning is needed to interpret results
+- Multiple dependent steps must be executed
+- The task would benefit from isolated context management
+
+Do NOT use for simple, single-step operations.""",
+    system_prompt="""You are a general-purpose subagent working on a delegated task. Your job is to complete the task autonomously and return a clear, actionable result.
+
+<guidelines>
+- Focus on completing the delegated task efficiently
+- Use available tools as needed to accomplish the goal
+- Think step by step but act decisively
+- If you encounter issues, explain them clearly in your response
+- Return a concise summary of what you accomplished
+- Do NOT ask for clarification - work with the information provided
+</guidelines>
+
+<output_format>
+When you complete the task, provide:
+1. A brief summary of what was accomplished
+2. Key findings or results
+3. Any relevant file paths, data, or artifacts created
+4. Issues encountered (if any)
+</output_format>
+
+<working_directory>
+You have access to the same sandbox environment as the parent agent:
+- User uploads: `/mnt/user-data/uploads`
+- User workspace: `/mnt/user-data/workspace`
+- Output files: `/mnt/user-data/outputs`
+</working_directory>
+""",
+    tools=None,  # Inherit all tools from parent
+    disallowed_tools=["task", "ask_clarification"],  # Prevent nesting and clarification
+    model="inherit",
+    max_turns=50,
+)
--- a/backend/src/subagents/config.py
+++ b/backend/src/subagents/config.py
@@ -0,0 +1,26 @@
+"""Subagent configuration definitions."""
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class SubagentConfig:
+    """Configuration for a subagent.
+
+    Attributes:
+        name: Unique identifier for the subagent.
+        description: When Claude should delegate to this subagent.
+        system_prompt: The system prompt that guides the subagent's behavior.
+        tools: Optional list of tool names to allow. If None, inherits all tools.
+        disallowed_tools: Optional list of tool names to deny.
+        model: Model to use - 'inherit' uses parent's model.
+        max_turns: Maximum number of agent turns before stopping.
+    """
+
+    name: str
+    description: str
+    system_prompt: str
+    tools: list[str] | None = None
+    disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
+    model: str = "inherit"
+    max_turns: int = 50
--- a/backend/src/subagents/executor.py
+++ b/backend/src/subagents/executor.py
@@ -0,0 +1,336 @@
+"""Subagent execution engine."""
+
+import logging
+import threading
+import uuid
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain.tools import BaseTool
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import RunnableConfig
+
+from src.agents.thread_state import SandboxState, ThreadDataState, ThreadState
+from src.models import create_chat_model
+from src.subagents.config import SubagentConfig
+
+logger = logging.getLogger(__name__)
+
+
+class SubagentStatus(Enum):
+    """Status of a subagent execution."""
+
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
+@dataclass
+class SubagentResult:
+    """Result of a subagent execution.
+
+    Attributes:
+        task_id: Unique identifier for this execution.
+        trace_id: Trace ID for distributed tracing (links parent and subagent logs).
+        status: Current status of the execution.
+        result: The final result message (if completed).
+        error: Error message (if failed).
+        started_at: When execution started.
+        completed_at: When execution completed.
+    """
+
+    task_id: str
+    trace_id: str
+    status: SubagentStatus
+    result: str | None = None
+    error: str | None = None
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+
+
+# Global storage for background task results
+_background_tasks: dict[str, SubagentResult] = {}
+_background_tasks_lock = threading.Lock()
+
+# Thread pool for background execution
+_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="subagent-")
+
+
+def _filter_tools(
+    all_tools: list[BaseTool],
+    allowed: list[str] | None,
+    disallowed: list[str] | None,
+) -> list[BaseTool]:
+    """Filter tools based on subagent configuration.
+
+    Args:
+        all_tools: List of all available tools.
+        allowed: Optional allowlist of tool names. If provided, only these tools are included.
+        disallowed: Optional denylist of tool names. These tools are always excluded.
+
+    Returns:
+        Filtered list of tools.
+    """
+    filtered = all_tools
+
+    # Apply allowlist if specified
+    if allowed is not None:
+        allowed_set = set(allowed)
+        filtered = [t for t in filtered if t.name in allowed_set]
+
+    # Apply denylist
+    if disallowed is not None:
+        disallowed_set = set(disallowed)
+        filtered = [t for t in filtered if t.name not in disallowed_set]
+
+    return filtered
+
+
+def _get_model_name(config: SubagentConfig, parent_model: str | None) -> str | None:
+    """Resolve the model name for a subagent.
+
+    Args:
+        config: Subagent configuration.
+        parent_model: The parent agent's model name.
+
+    Returns:
+        Model name to use, or None to use default.
+    """
+    if config.model == "inherit":
+        return parent_model
+    return config.model
+
+
+class SubagentExecutor:
+    """Executor for running subagents."""
+
+    def __init__(
+        self,
+        config: SubagentConfig,
+        tools: list[BaseTool],
+        parent_model: str | None = None,
+        sandbox_state: SandboxState | None = None,
+        thread_data: ThreadDataState | None = None,
+        thread_id: str | None = None,
+        trace_id: str | None = None,
+    ):
+        """Initialize the executor.
+
+        Args:
+            config: Subagent configuration.
+            tools: List of all available tools (will be filtered).
+            parent_model: The parent agent's model name for inheritance.
+            sandbox_state: Sandbox state from parent agent.
+            thread_data: Thread data from parent agent.
+            thread_id: Thread ID for sandbox operations.
+            trace_id: Trace ID from parent for distributed tracing.
+        """
+        self.config = config
+        self.parent_model = parent_model
+        self.sandbox_state = sandbox_state
+        self.thread_data = thread_data
+        self.thread_id = thread_id
+        # Generate trace_id if not provided (for top-level calls)
+        self.trace_id = trace_id or str(uuid.uuid4())[:8]
+
+        # Filter tools based on config
+        self.tools = _filter_tools(
+            tools,
+            config.tools,
+            config.disallowed_tools,
+        )
+
+        logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools")
+
+    def _create_agent(self):
+        """Create the agent instance."""
+        model_name = _get_model_name(self.config, self.parent_model)
+        model = create_chat_model(name=model_name, thinking_enabled=False)
+
+        # Create a simple agent without middlewares
+        # Subagents don't need the full middleware chain
+        return create_agent(
+            model=model,
+            tools=self.tools,
+            system_prompt=self.config.system_prompt,
+            state_schema=ThreadState,
+        )
+
+    def _build_initial_state(self, task: str) -> dict[str, Any]:
+        """Build the initial state for agent execution.
+
+        Args:
+            task: The task description.
+
+        Returns:
+            Initial state dictionary.
+        """
+        state: dict[str, Any] = {
+            "messages": [HumanMessage(content=task)],
+        }
+
+        # Pass through sandbox and thread data from parent
+        if self.sandbox_state is not None:
+            state["sandbox"] = self.sandbox_state
+        if self.thread_data is not None:
+            state["thread_data"] = self.thread_data
+
+        return state
+
+    def execute(self, task: str) -> SubagentResult:
+        """Execute a task synchronously.
+
+        Args:
+            task: The task description for the subagent.
+
+        Returns:
+            SubagentResult with the execution result.
+        """
+        task_id = str(uuid.uuid4())[:8]
+        result = SubagentResult(
+            task_id=task_id,
+            trace_id=self.trace_id,
+            status=SubagentStatus.RUNNING,
+            started_at=datetime.now(),
+        )
+
+        try:
+            agent = self._create_agent()
+            state = self._build_initial_state(task)
+
+            # Build config with thread_id for sandbox access and recursion limit
+            run_config: RunnableConfig = {
+                "recursion_limit": self.config.max_turns,
+            }
+            if self.thread_id:
+                run_config["configurable"] = {"thread_id": self.thread_id}
+
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting execution with max_turns={self.config.max_turns}")
+
+            # Run the agent using invoke for complete result
+            # Note: invoke() runs until completion or interruption
+            final_state = agent.invoke(state, config=run_config)  # type: ignore[arg-type]
+
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} completed execution")
+
+            # Extract the final message - find the last AIMessage
+            messages = final_state.get("messages", [])
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} final messages count: {len(messages)}")
+
+            # Find the last AIMessage in the conversation
+            last_ai_message = None
+            for msg in reversed(messages):
+                if isinstance(msg, AIMessage):
+                    last_ai_message = msg
+                    break
+
+            if last_ai_message is not None:
+                content = last_ai_message.content
+                logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} last AI message content type: {type(content)}")
+
+                # Handle both str and list content types
+                if isinstance(content, str):
+                    result.result = content
+                elif isinstance(content, list):
+                    # Extract text from list of content blocks
+                    text_parts = []
+                    for block in content:
+                        if isinstance(block, str):
+                            text_parts.append(block)
+                        elif isinstance(block, dict) and "text" in block:
+                            text_parts.append(block["text"])
+                    result.result = "\n".join(text_parts) if text_parts else "No text content in response"
+                else:
+                    result.result = str(content)
+            elif messages:
+                # Fallback: use the last message if no AIMessage found
+                last_message = messages[-1]
+                logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no AIMessage found, using last message: {type(last_message)}")
+                result.result = str(last_message.content) if hasattr(last_message, "content") else str(last_message)
+            else:
+                logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no messages in final state")
+                result.result = "No response generated"
+
+            result.status = SubagentStatus.COMPLETED
+            result.completed_at = datetime.now()
+
+        except Exception as e:
+            logger.exception(f"[trace={self.trace_id}] Subagent {self.config.name} execution failed")
+            result.status = SubagentStatus.FAILED
+            result.error = str(e)
+            result.completed_at = datetime.now()
+
+        return result
+
+    def execute_async(self, task: str) -> str:
+        """Start a task execution in the background.
+
+        Args:
+            task: The task description for the subagent.
+
+        Returns:
+            Task ID that can be used to check status later.
+        """
+        task_id = str(uuid.uuid4())[:8]
+
+        # Create initial pending result
+        result = SubagentResult(
+            task_id=task_id,
+            trace_id=self.trace_id,
+            status=SubagentStatus.PENDING,
+        )
+
+        logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting async execution, task_id={task_id}")
+
+        with _background_tasks_lock:
+            _background_tasks[task_id] = result
+
+        # Submit to thread pool
+        def run_task():
+            with _background_tasks_lock:
+                _background_tasks[task_id].status = SubagentStatus.RUNNING
+                _background_tasks[task_id].started_at = datetime.now()
+
+            try:
+                exec_result = self.execute(task)
+                with _background_tasks_lock:
+                    _background_tasks[task_id].status = exec_result.status
+                    _background_tasks[task_id].result = exec_result.result
+                    _background_tasks[task_id].error = exec_result.error
+                    _background_tasks[task_id].completed_at = datetime.now()
+            except Exception as e:
+                with _background_tasks_lock:
+                    _background_tasks[task_id].status = SubagentStatus.FAILED
+                    _background_tasks[task_id].error = str(e)
+                    _background_tasks[task_id].completed_at = datetime.now()
+
+        _executor.submit(run_task)
+        return task_id
+
+
+def get_background_task_result(task_id: str) -> SubagentResult | None:
+    """Get the result of a background task.
+
+    Args:
+        task_id: The task ID returned by execute_async.
+
+    Returns:
+        SubagentResult if found, None otherwise.
+    """
+    with _background_tasks_lock:
+        return _background_tasks.get(task_id)
+
+
+def list_background_tasks() -> list[SubagentResult]:
+    """List all background tasks.
+
+    Returns:
+        List of all SubagentResult instances.
+    """
+    with _background_tasks_lock:
+        return list(_background_tasks.values())
--- a/backend/src/subagents/registry.py
+++ b/backend/src/subagents/registry.py
@@ -0,0 +1,34 @@
+"""Subagent registry for managing available subagents."""
+
+from src.subagents.builtins import BUILTIN_SUBAGENTS
+from src.subagents.config import SubagentConfig
+
+
+def get_subagent_config(name: str) -> SubagentConfig | None:
+    """Get a subagent configuration by name.
+
+    Args:
+        name: The name of the subagent.
+
+    Returns:
+        SubagentConfig if found, None otherwise.
+    """
+    return BUILTIN_SUBAGENTS.get(name)
+
+
+def list_subagents() -> list[SubagentConfig]:
+    """List all available subagent configurations.
+
+    Returns:
+        List of all registered SubagentConfig instances.
+    """
+    return list(BUILTIN_SUBAGENTS.values())
+
+
+def get_subagent_names() -> list[str]:
+    """Get all available subagent names.
+
+    Returns:
+        List of subagent names.
+    """
+    return list(BUILTIN_SUBAGENTS.keys())