Merge upstream/experimental into feat/citations

Resolved conflicts: - backend/src/gateway/routers/artifacts.py: Keep citations block removal for markdown downloads - frontend/src/components/workspace/messages/message-list-item.tsx: Keep improved citation handling with rehypePlugins, humanMessagePlugins, and CitationsLoadingIndicator Co-authored-by: Cursor <cursoragent@cursor.com>
2026-04-18 12:04:45 +08:00 · 2026-02-07 00:53:16 +08:00
parent 50ced32722 c3f9089e95
commit ea543ce1f4
65 changed files with 3489 additions and 5320 deletions
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -233,11 +233,12 @@ def make_lead_agent(config: RunnableConfig):
    thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True)
    model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model")
    is_plan_mode = config.get("configurable", {}).get("is_plan_mode", False)
-    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}")
+    subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
+    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}, subagent_enabled: {subagent_enabled}")
    return create_agent(
        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
-        tools=get_available_tools(model_name=model_name),
+        tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled),
        middleware=_build_middlewares(config),
-        system_prompt=apply_prompt_template(),
+        system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled),
        state_schema=ThreadState,
    )
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -2,6 +2,130 @@ from datetime import datetime

 from src.skills import load_skills

+SUBAGENT_SECTION = """<subagent_system>
+**🚀 SUBAGENT MODE ACTIVE - DECOMPOSE, DELEGATE, SYNTHESIZE**
+
+You are running with subagent capabilities enabled. Your role is to be a **task orchestrator**:
+1. **DECOMPOSE**: Break complex tasks into parallel sub-tasks
+2. **DELEGATE**: Launch multiple subagents simultaneously using parallel `task` calls
+3. **SYNTHESIZE**: Collect and integrate results into a coherent answer
+
+**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
+
+**Available Subagents:**
+- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
+- **bash**: For command execution (git, build, test, deploy operations)
+
+**Your Orchestration Strategy:**
+
+✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
+
+For complex queries, break them down into multiple focused sub-tasks and execute in parallel:
+
+**Example 1: "Why is Tencent's stock price declining?"**
+→ Decompose into 4 parallel searches:
+- Subagent 1: Recent financial reports and earnings data
+- Subagent 2: Negative news and controversies
+- Subagent 3: Industry trends and competitor performance
+- Subagent 4: Macro-economic factors and market sentiment
+
+**Example 2: "What are the latest AI trends in 2026?"**
+→ Decompose into parallel research areas:
+- Subagent 1: LLM and foundation model developments
+- Subagent 2: AI infrastructure and hardware trends
+- Subagent 3: Enterprise AI adoption patterns
+- Subagent 4: Regulatory and ethical developments
+
+**Example 3: "Refactor the authentication system"**
+→ Decompose into parallel analysis:
+- Subagent 1: Analyze current auth implementation
+- Subagent 2: Research best practices and security patterns
+- Subagent 3: Check for vulnerabilities and technical debt
+- Subagent 4: Review related tests and documentation
+
+✅ **USE Parallel Subagents (2+ subagents) when:**
+- **Complex research questions**: Requires multiple information sources or perspectives
+- **Multi-aspect analysis**: Task has several independent dimensions to explore
+- **Large codebases**: Need to analyze different parts simultaneously
+- **Comprehensive investigations**: Questions requiring thorough coverage from multiple angles
+
+❌ **DO NOT use subagents (execute directly) when:**
+- **Task cannot be decomposed**: If you can't break it into 2+ meaningful parallel sub-tasks, execute directly
+- **Ultra-simple actions**: Read one file, quick edits, single commands
+- **Need immediate clarification**: Must ask user before proceeding
+- **Meta conversation**: Questions about conversation history
+- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
+
+**CRITICAL WORKFLOW**:
+1. In your thinking: Can I decompose this into 2+ independent parallel sub-tasks?
+2. **YES** → Launch multiple `task` calls in parallel, then synthesize results
+3. **NO** → Execute directly using available tools (bash, read_file, web_search, etc.)
+
+**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
+
+**How It Works:**
+- The task tool runs subagents asynchronously in the background
+- The backend automatically polls for completion (you don't need to poll)
+- The tool call will block until the subagent completes its work
+- Once complete, the result is returned to you directly
+
+**Usage Example - Parallel Decomposition:**
+
+```python
+# User asks: "Why is Tencent's stock price declining?"
+# Thinking: This is complex research requiring multiple angles
+# → Decompose into 4 parallel searches
+
+# Launch 4 subagents in a SINGLE response with multiple tool calls:
+
+# Subagent 1: Financial data
+task(
+    subagent_type="general-purpose",
+    prompt="Search for Tencent's latest financial reports, quarterly earnings, and revenue trends in 2025-2026. Focus on numbers and official data.",
+    description="Tencent financial data"
+)
+
+# Subagent 2: Negative news
+task(
+    subagent_type="general-purpose",
+    prompt="Search for recent negative news, controversies, or regulatory issues affecting Tencent in 2025-2026.",
+    description="Tencent negative news"
+)
+
+# Subagent 3: Industry/competitors
+task(
+    subagent_type="general-purpose",
+    prompt="Search for Chinese tech industry trends and how Tencent's competitors (Alibaba, ByteDance) are performing in 2025-2026.",
+    description="Industry comparison"
+)
+
+# Subagent 4: Market factors
+task(
+    subagent_type="general-purpose",
+    prompt="Search for macro-economic factors affecting Chinese tech stocks and overall market sentiment toward Tencent in 2025-2026.",
+    description="Market sentiment"
+)
+
+# All 4 subagents run in parallel, results return simultaneously
+# Then synthesize findings into comprehensive analysis
+```
+
+**Counter-Example - Direct Execution (NO subagents):**
+
+```python
+# User asks: "Run the tests"
+# Thinking: Cannot decompose into parallel sub-tasks
+# → Execute directly
+
+bash("npm test")  # Direct execution, not task()
+```
+
+**CRITICAL**:
+- Only use `task` when you can launch 2+ subagents in parallel
+- Single task = No value from subagents = Execute directly
+- Multiple tasks in SINGLE response = Parallel execution
+</subagent_system>"""
+
 SYSTEM_PROMPT_TEMPLATE = """
 <role>
 You are DeerFlow 2.0, an open-source super agent.
@@ -13,7 +137,7 @@ You are DeerFlow 2.0, an open-source super agent.
 - Think concisely and strategically about the user's request BEFORE taking action
 - Break down the task: What is clear? What is ambiguous? What is missing?
 - **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
- Never write down your full final answer or report in thinking process, but only outline
+{subagent_thinking}- Never write down your full final answer or report in thinking process, but only outline
 - CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
 - Your response must contain the actual answer, not just a reference to what you thought about
 </thinking_style>
@@ -103,6 +227,8 @@ You have access to skills that provide optimized workflows for specific tasks. E

 </skill_system>

+{subagent_section}
+
 <working_directory existed="true">
 - User uploads: `/mnt/user-data/uploads` - Files uploaded by the user (automatically listed in context)
 - User workspace: `/mnt/user-data/workspace` - Working directory for temporary files
@@ -149,7 +275,7 @@ The key AI trends for 2026 include enhanced reasoning capabilities and multimoda

 <critical_reminders>
 - **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
- Skill First: Always load the relevant skill before starting **complex** tasks.
+{subagent_reminder}- Skill First: Always load the relevant skill before starting **complex** tasks.
 - Progressive Loading: Load resources incrementally as referenced in skills
 - Output Files: Final deliverables must be in `/mnt/user-data/outputs`
 - Clarity: Be direct and helpful, avoid unnecessary meta-commentary
@@ -176,9 +302,7 @@ def _get_memory_context() -> str:
            return ""

        memory_data = get_memory_data()
-        memory_content = format_memory_for_injection(
-            memory_data, max_tokens=config.max_injection_tokens
-        )
+        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)

        if not memory_content.strip():
            return ""
@@ -192,29 +316,24 @@ def _get_memory_context() -> str:
        return ""


-def apply_prompt_template() -> str:
+def apply_prompt_template(subagent_enabled: bool = False) -> str:
    # Load only enabled skills
    skills = load_skills(enabled_only=True)

-    # Get skills container path from config
+    # Get config
    try:
        from src.config import get_app_config

        config = get_app_config()
        container_base_path = config.skills.container_path
    except Exception:
-        # Fallback to default if config fails
+        # Fallback to defaults if config fails
        container_base_path = "/mnt/skills"

    # Generate skills list XML with paths (path points to SKILL.md file)
    if skills:
        skill_items = "\n".join(
-            f"    <skill>\n"
-            f"        <name>{skill.name}</name>\n"
-            f"        <description>{skill.description}</description>\n"
-            f"        <location>{skill.get_container_file_path(container_base_path)}</location>\n"
-            f"    </skill>"
-            for skill in skills
+            f"    <skill>\n        <name>{skill.name}</name>\n        <description>{skill.description}</description>\n        <location>{skill.get_container_file_path(container_base_path)}</location>\n    </skill>" for skill in skills
        )
        skills_list = f"<available_skills>\n{skill_items}\n</available_skills>"
    else:
@@ -223,11 +342,31 @@ def apply_prompt_template() -> str:
    # Get memory context
    memory_context = _get_memory_context()

+    # Include subagent section only if enabled (from runtime parameter)
+    subagent_section = SUBAGENT_SECTION if subagent_enabled else ""
+
+    # Add subagent reminder to critical_reminders if enabled
+    subagent_reminder = (
+        "- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks and launch multiple subagents simultaneously. Synthesize results, don't execute directly.\n"
+        if subagent_enabled
+        else ""
+    )
+
+    # Add subagent thinking guidance if enabled
+    subagent_thinking = (
+        "- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, decompose and launch multiple subagents in parallel. Your role is orchestrator, not executor.**\n"
+        if subagent_enabled
+        else ""
+    )
+
    # Format the prompt with dynamic skills and memory
    prompt = SYSTEM_PROMPT_TEMPLATE.format(
        skills_list=skills_list,
        skills_base_path=container_base_path,
        memory_context=memory_context,
+        subagent_section=subagent_section,
+        subagent_reminder=subagent_reminder,
+        subagent_thinking=subagent_thinking,
    )

    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
--- a/backend/src/agents/memory/prompt.py
+++ b/backend/src/agents/memory/prompt.py
@@ -2,6 +2,13 @@

 from typing import Any

+try:
+    import tiktoken
+
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    TIKTOKEN_AVAILABLE = False
+
 # Prompt template for updating memory based on conversation
 MEMORY_UPDATE_PROMPT = """You are a memory management system. Your task is to analyze a conversation and update the user's memory profile.

@@ -17,22 +24,60 @@ New Conversation to Process:

 Instructions:
 1. Analyze the conversation for important information about the user
-2. Extract relevant facts, preferences, and context
-3. Update the memory sections as needed:
-   - workContext: User's work-related information (job, projects, tools, technologies)
-   - personalContext: Personal preferences, communication style, background
-   - topOfMind: Current focus areas, ongoing tasks, immediate priorities
+2. Extract relevant facts, preferences, and context with specific details (numbers, names, technologies)
+3. Update the memory sections as needed following the detailed length guidelines below

-4. For facts extraction:
-   - Extract specific, verifiable facts about the user
-   - Assign appropriate categories: preference, knowledge, context, behavior, goal
-   - Estimate confidence (0.0-1.0) based on how explicit the information is
-   - Avoid duplicating existing facts
+Memory Section Guidelines:

-5. Update history sections:
-   - recentMonths: Summary of recent activities and discussions
-   - earlierContext: Important historical context
-   - longTermBackground: Persistent background information
+**User Context** (Current state - concise summaries):
+- workContext: Professional role, company, key projects, main technologies (2-3 sentences)
+  Example: Core contributor, project names with metrics (16k+ stars), technical stack
+- personalContext: Languages, communication preferences, key interests (1-2 sentences)
+  Example: Bilingual capabilities, specific interest areas, expertise domains
+- topOfMind: Multiple ongoing focus areas and priorities (3-5 sentences, detailed paragraph)
+  Example: Primary project work, parallel technical investigations, ongoing learning/tracking
+  Include: Active implementation work, troubleshooting issues, market/research interests
+  Note: This captures SEVERAL concurrent focus areas, not just one task
+
+**History** (Temporal context - rich paragraphs):
+- recentMonths: Detailed summary of recent activities (4-6 sentences or 1-2 paragraphs)
+  Timeline: Last 1-3 months of interactions
+  Include: Technologies explored, projects worked on, problems solved, interests demonstrated
+- earlierContext: Important historical patterns (3-5 sentences or 1 paragraph)
+  Timeline: 3-12 months ago
+  Include: Past projects, learning journeys, established patterns
+- longTermBackground: Persistent background and foundational context (2-4 sentences)
+  Timeline: Overall/foundational information
+  Include: Core expertise, longstanding interests, fundamental working style
+
+**Facts Extraction**:
+- Extract specific, quantifiable details (e.g., "16k+ GitHub stars", "200+ datasets")
+- Include proper nouns (company names, project names, technology names)
+- Preserve technical terminology and version numbers
+- Categories:
+  * preference: Tools, styles, approaches user prefers/dislikes
+  * knowledge: Specific expertise, technologies mastered, domain knowledge
+  * context: Background facts (job title, projects, locations, languages)
+  * behavior: Working patterns, communication habits, problem-solving approaches
+  * goal: Stated objectives, learning targets, project ambitions
+- Confidence levels:
+  * 0.9-1.0: Explicitly stated facts ("I work on X", "My role is Y")
+  * 0.7-0.8: Strongly implied from actions/discussions
+  * 0.5-0.6: Inferred patterns (use sparingly, only for clear patterns)
+
+**What Goes Where**:
+- workContext: Current job, active projects, primary tech stack
+- personalContext: Languages, personality, interests outside direct work tasks
+- topOfMind: Multiple ongoing priorities and focus areas user cares about recently (gets updated most frequently)
+  Should capture 3-5 concurrent themes: main work, side explorations, learning/tracking interests
+- recentMonths: Detailed account of recent technical explorations and work
+- earlierContext: Patterns from slightly older interactions still relevant
+- longTermBackground: Unchanging foundational facts about the user
+
+**Multilingual Content**:
+- Preserve original language for proper nouns and company names
+- Keep technical terms in their original form (DeepSeek, LangGraph, etc.)
+- Note language capabilities in personalContext

 Output Format (JSON):
 {{
@@ -54,11 +99,15 @@ Output Format (JSON):

 Important Rules:
 - Only set shouldUpdate=true if there's meaningful new information
- Keep summaries concise (1-3 sentences each)
- Only add facts that are clearly stated or strongly implied
+- Follow length guidelines: workContext/personalContext are concise (1-3 sentences), topOfMind and history sections are detailed (paragraphs)
+- Include specific metrics, version numbers, and proper nouns in facts
+- Only add facts that are clearly stated (0.9+) or strongly implied (0.7+)
 - Remove facts that are contradicted by new information
- Preserve existing information that isn't contradicted
- Focus on information useful for future interactions
+- When updating topOfMind, integrate new focus areas while removing completed/abandoned ones
+  Keep 3-5 concurrent focus themes that are still active and relevant
+- For history sections, integrate new information chronologically into appropriate time period
+- Preserve technical accuracy - keep exact names of technologies, companies, projects
+- Focus on information useful for future interactions and personalization

 Return ONLY valid JSON, no explanation or markdown."""

@@ -91,12 +140,34 @@ Rules:
 Return ONLY valid JSON."""


+def _count_tokens(text: str, encoding_name: str = "cl100k_base") -> int:
+    """Count tokens in text using tiktoken.
+
+    Args:
+        text: The text to count tokens for.
+        encoding_name: The encoding to use (default: cl100k_base for GPT-4/3.5).
+
+    Returns:
+        The number of tokens in the text.
+    """
+    if not TIKTOKEN_AVAILABLE:
+        # Fallback to character-based estimation if tiktoken is not available
+        return len(text) // 4
+
+    try:
+        encoding = tiktoken.get_encoding(encoding_name)
+        return len(encoding.encode(text))
+    except Exception:
+        # Fallback to character-based estimation on error
+        return len(text) // 4
+
+
 def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2000) -> str:
    """Format memory data for injection into system prompt.

    Args:
        memory_data: The memory data dictionary.
-        max_tokens: Maximum tokens to use (approximate via character count).
+        max_tokens: Maximum tokens to use (counted via tiktoken for accuracy).

    Returns:
        Formatted memory string for system prompt injection.
@@ -142,33 +213,19 @@ def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2
        if history_sections:
            sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))

-    # Format facts (most relevant ones)
-    facts = memory_data.get("facts", [])
-    if facts:
-        # Sort by confidence and take top facts
-        sorted_facts = sorted(facts, key=lambda f: f.get("confidence", 0), reverse=True)
-        # Limit to avoid too much content
-        top_facts = sorted_facts[:15]
-
-        fact_lines = []
-        for fact in top_facts:
-            content = fact.get("content", "")
-            category = fact.get("category", "")
-            if content:
-                fact_lines.append(f"- [{category}] {content}")
-
-        if fact_lines:
-            sections.append("Known Facts:\n" + "\n".join(fact_lines))
-
    if not sections:
        return ""

    result = "\n\n".join(sections)

-    # Rough token limit (approximate 4 chars per token)
-    max_chars = max_tokens * 4
-    if len(result) > max_chars:
-        result = result[:max_chars] + "\n..."
+    # Use accurate token counting with tiktoken
+    token_count = _count_tokens(result)
+    if token_count > max_tokens:
+        # Truncate to fit within token limit
+        # Estimate characters to remove based on token ratio
+        char_per_token = len(result) / token_count
+        target_chars = int(max_tokens * char_per_token * 0.95)  # 95% to leave margin
+        result = result[:target_chars] + "\n..."

    return result

--- a/backend/src/agents/memory/updater.py
+++ b/backend/src/agents/memory/updater.py
@@ -273,9 +273,7 @@ class MemoryUpdater:
        # Remove facts
        facts_to_remove = set(update_data.get("factsToRemove", []))
        if facts_to_remove:
-            current_memory["facts"] = [
-                f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove
-            ]
+            current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]

        # Add new facts
        new_facts = update_data.get("newFacts", [])
@@ -304,9 +302,7 @@ class MemoryUpdater:
        return current_memory


-def update_memory_from_conversation(
-    messages: list[Any], thread_id: str | None = None
-) -> bool:
+def update_memory_from_conversation(messages: list[Any], thread_id: str | None = None) -> bool:
    """Convenience function to update memory from a conversation.

    Args:
--- a/backend/src/agents/middlewares/uploads_middleware.py
+++ b/backend/src/agents/middlewares/uploads_middleware.py
@@ -151,8 +151,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
            State updates including uploaded files list.
        """
        import logging
+
        logger = logging.getLogger(__name__)
-        
+
        thread_id = runtime.context.get("thread_id")
        if thread_id is None:
            return None
@@ -172,7 +173,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
                    logger.info(f"Found previously shown files: {extracted}")

        logger.info(f"Total shown files from history: {shown_files}")
-        
+
        # List only newly uploaded files
        files = self._list_newly_uploaded_files(thread_id, shown_files)
        logger.info(f"Newly uploaded files to inject: {[f['filename'] for f in files]}")
@@ -189,7 +190,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):

        # Create files message and prepend to the last human message content
        files_message = self._create_files_message(files)
-        
+
        # Extract original content - handle both string and list formats
        original_content = ""
        if isinstance(last_message.content, str):
@@ -201,9 +202,9 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
                if isinstance(block, dict) and block.get("type") == "text":
                    text_parts.append(block.get("text", ""))
            original_content = "\n".join(text_parts)
-        
+
        logger.info(f"Original message content: {original_content[:100] if original_content else '(empty)'}")
-        
+
        # Create new message with combined content
        updated_message = HumanMessage(
            content=f"{files_message}\n\n{original_content}",
--- a/backend/src/community/aio_sandbox/aio_sandbox_provider.py
+++ b/backend/src/community/aio_sandbox/aio_sandbox_provider.py
@@ -32,14 +32,17 @@ IDLE_CHECK_INTERVAL = 60  # Check every 60 seconds


 class AioSandboxProvider(SandboxProvider):
-    """Sandbox provider that manages Docker containers running the AIO sandbox.
+    """Sandbox provider that manages containers running the AIO sandbox.
+
+    On macOS, automatically prefers Apple Container if available, otherwise falls back to Docker.
+    On other platforms, uses Docker.

    Configuration options in config.yaml under sandbox:
        use: src.community.aio_sandbox:AioSandboxProvider
-        image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest  # Docker image to use
+        image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest  # Container image to use (works with both runtimes)
        port: 8080  # Base port for sandbox containers
        base_url: http://localhost:8080  # If set, uses existing sandbox instead of starting new container
-        auto_start: true  # Whether to automatically start Docker container
+        auto_start: true  # Whether to automatically start container
        container_prefix: deer-flow-sandbox  # Prefix for container names
        idle_timeout: 600  # Idle timeout in seconds (default: 600 = 10 minutes). Set to 0 to disable.
        mounts:  # List of volume mounts
@@ -57,11 +60,13 @@ class AioSandboxProvider(SandboxProvider):
        self._containers: dict[str, str] = {}  # sandbox_id -> container_id
        self._ports: dict[str, int] = {}  # sandbox_id -> port
        self._thread_sandboxes: dict[str, str] = {}  # thread_id -> sandbox_id (for reusing sandbox across turns)
+        self._thread_locks: dict[str, threading.Lock] = {}  # thread_id -> lock (for thread-specific acquisition)
        self._last_activity: dict[str, float] = {}  # sandbox_id -> last activity timestamp
        self._config = self._load_config()
        self._shutdown_called = False
        self._idle_checker_stop = threading.Event()
        self._idle_checker_thread: threading.Thread | None = None
+        self._container_runtime = self._detect_container_runtime()

        # Register shutdown handler to clean up containers on exit
        atexit.register(self.shutdown)
@@ -184,6 +189,35 @@ class AioSandboxProvider(SandboxProvider):
                resolved[key] = str(value)
        return resolved

+    def _detect_container_runtime(self) -> str:
+        """Detect which container runtime to use.
+
+        On macOS, prefer Apple Container if available, otherwise fall back to Docker.
+        On other platforms, use Docker.
+
+        Returns:
+            "container" for Apple Container, "docker" for Docker.
+        """
+        import platform
+
+        # Only try Apple Container on macOS
+        if platform.system() == "Darwin":
+            try:
+                result = subprocess.run(
+                    ["container", "--version"],
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                    timeout=5,
+                )
+                logger.info(f"Detected Apple Container: {result.stdout.strip()}")
+                return "container"
+            except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired):
+                logger.info("Apple Container not available, falling back to Docker")
+
+        # Default to Docker
+        return "docker"
+
    def _is_sandbox_ready(self, base_url: str, timeout: int = 30) -> bool:
        """Check if sandbox is ready to accept connections.

@@ -253,7 +287,10 @@ class AioSandboxProvider(SandboxProvider):
        return None

    def _start_container(self, sandbox_id: str, port: int, extra_mounts: list[tuple[str, str, bool]] | None = None) -> str:
-        """Start a new Docker container for the sandbox.
+        """Start a new container for the sandbox.
+
+        On macOS, prefers Apple Container if available, otherwise uses Docker.
+        On other platforms, uses Docker.

        Args:
            sandbox_id: Unique identifier for the sandbox.
@@ -267,18 +304,25 @@ class AioSandboxProvider(SandboxProvider):
        container_name = f"{self._config['container_prefix']}-{sandbox_id}"

        cmd = [
-            "docker",
+            self._container_runtime,
            "run",
-            "--security-opt",
-            "seccomp=unconfined",
-            "--rm",
-            "-d",
-            "-p",
-            f"{port}:8080",
-            "--name",
-            container_name,
        ]

+        # Add Docker-specific security options
+        if self._container_runtime == "docker":
+            cmd.extend(["--security-opt", "seccomp=unconfined"])
+
+        cmd.extend(
+            [
+                "--rm",
+                "-d",
+                "-p",
+                f"{port}:8080",
+                "--name",
+                container_name,
+            ]
+        )
+
        # Add configured environment variables
        for key, value in self._config["environment"].items():
            cmd.extend(["-e", f"{key}={value}"])
@@ -303,29 +347,48 @@ class AioSandboxProvider(SandboxProvider):

        cmd.append(image)

-        logger.info(f"Starting sandbox container: {' '.join(cmd)}")
+        logger.info(f"Starting sandbox container using {self._container_runtime}: {' '.join(cmd)}")

        try:
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
            container_id = result.stdout.strip()
-            logger.info(f"Started sandbox container {container_name} with ID {container_id}")
+            logger.info(f"Started sandbox container {container_name} with ID {container_id} using {self._container_runtime}")
            return container_id
        except subprocess.CalledProcessError as e:
-            logger.error(f"Failed to start sandbox container: {e.stderr}")
+            logger.error(f"Failed to start sandbox container using {self._container_runtime}: {e.stderr}")
            raise RuntimeError(f"Failed to start sandbox container: {e.stderr}")

    def _stop_container(self, container_id: str) -> None:
-        """Stop and remove a Docker container.
+        """Stop and remove a container.
+
+        Since we use --rm flag, the container is automatically removed after stopping.

        Args:
            container_id: The container ID to stop.
        """
        try:
-            subprocess.run(["docker", "stop", container_id], capture_output=True, text=True, check=True)
-            logger.info(f"Stopped sandbox container {container_id}")
+            subprocess.run([self._container_runtime, "stop", container_id], capture_output=True, text=True, check=True)
+            logger.info(f"Stopped sandbox container {container_id} using {self._container_runtime} (--rm will auto-remove)")
        except subprocess.CalledProcessError as e:
            logger.warning(f"Failed to stop sandbox container {container_id}: {e.stderr}")

+    def _get_thread_lock(self, thread_id: str) -> threading.Lock:
+        """Get or create a lock for a specific thread_id.
+
+        This ensures that concurrent sandbox acquisition for the same thread_id
+        is serialized, preventing duplicate sandbox creation.
+
+        Args:
+            thread_id: The thread ID.
+
+        Returns:
+            A lock specific to this thread_id.
+        """
+        with self._lock:
+            if thread_id not in self._thread_locks:
+                self._thread_locks[thread_id] = threading.Lock()
+            return self._thread_locks[thread_id]
+
    def acquire(self, thread_id: str | None = None) -> str:
        """Acquire a sandbox environment and return its ID.

@@ -335,7 +398,8 @@ class AioSandboxProvider(SandboxProvider):
        For the same thread_id, this method will return the same sandbox_id,
        allowing sandbox reuse across multiple turns in a conversation.

-        This method is thread-safe.
+        This method is thread-safe and prevents race conditions when multiple
+        concurrent requests try to acquire a sandbox for the same thread_id.

        Args:
            thread_id: Optional thread ID for thread-specific configurations.
@@ -343,6 +407,26 @@ class AioSandboxProvider(SandboxProvider):
                mounts for workspace, uploads, and outputs directories.
                The same thread_id will reuse the same sandbox.

+        Returns:
+            The ID of the acquired sandbox environment.
+        """
+        # For thread-specific acquisition, use a per-thread lock to prevent
+        # concurrent creation of multiple sandboxes for the same thread
+        if thread_id:
+            thread_lock = self._get_thread_lock(thread_id)
+            with thread_lock:
+                return self._acquire_internal(thread_id)
+        else:
+            return self._acquire_internal(thread_id)
+
+    def _acquire_internal(self, thread_id: str | None) -> str:
+        """Internal implementation of sandbox acquisition.
+
+        This method should only be called from acquire() which handles locking.
+
+        Args:
+            thread_id: Optional thread ID for thread-specific configurations.
+
        Returns:
            The ID of the acquired sandbox environment.
        """
--- a/backend/src/config/extensions_config.py
+++ b/backend/src/config/extensions_config.py
@@ -162,7 +162,7 @@ class ExtensionsConfig(BaseModel):
        skill_config = self.skills.get(skill_name)
        if skill_config is None:
            # Default to enable for public & custom skill
-            return skill_category in ('public', 'custom')
+            return skill_category in ("public", "custom")
        return skill_config.enabled


--- a/backend/src/sandbox/tools.py
+++ b/backend/src/sandbox/tools.py
@@ -93,6 +93,8 @@ def get_thread_data(runtime: ToolRuntime[ContextT, ThreadState] | None) -> Threa
    """Extract thread_data from runtime state."""
    if runtime is None:
        return None
+    if runtime.state is None:
+        return None
    return runtime.state.get("thread_data")


@@ -104,6 +106,8 @@ def is_local_sandbox(runtime: ToolRuntime[ContextT, ThreadState] | None) -> bool
    """
    if runtime is None:
        return False
+    if runtime.state is None:
+        return False
    sandbox_state = runtime.state.get("sandbox")
    if sandbox_state is None:
        return False
@@ -122,6 +126,8 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
    """
    if runtime is None:
        raise SandboxRuntimeError("Tool runtime not available")
+    if runtime.state is None:
+        raise SandboxRuntimeError("Tool runtime state not available")
    sandbox_state = runtime.state.get("sandbox")
    if sandbox_state is None:
        raise SandboxRuntimeError("Sandbox state not initialized in runtime")
@@ -155,6 +161,9 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
    if runtime is None:
        raise SandboxRuntimeError("Tool runtime not available")

+    if runtime.state is None:
+        raise SandboxRuntimeError("Tool runtime state not available")
+
    # Check if sandbox already exists in state
    sandbox_state = runtime.state.get("sandbox")
    if sandbox_state is not None:
--- a/backend/src/subagents/init.py
+++ b/backend/src/subagents/init.py
@@ -0,0 +1,11 @@
+from .config import SubagentConfig
+from .executor import SubagentExecutor, SubagentResult
+from .registry import get_subagent_config, list_subagents
+
+__all__ = [
+    "SubagentConfig",
+    "SubagentExecutor",
+    "SubagentResult",
+    "get_subagent_config",
+    "list_subagents",
+]
--- a/backend/src/subagents/builtins/init.py
+++ b/backend/src/subagents/builtins/init.py
@@ -0,0 +1,15 @@
+"""Built-in subagent configurations."""
+
+from .bash_agent import BASH_AGENT_CONFIG
+from .general_purpose import GENERAL_PURPOSE_CONFIG
+
+__all__ = [
+    "GENERAL_PURPOSE_CONFIG",
+    "BASH_AGENT_CONFIG",
+]
+
+# Registry of built-in subagents
+BUILTIN_SUBAGENTS = {
+    "general-purpose": GENERAL_PURPOSE_CONFIG,
+    "bash": BASH_AGENT_CONFIG,
+}
--- a/backend/src/subagents/builtins/bash_agent.py
+++ b/backend/src/subagents/builtins/bash_agent.py
@@ -0,0 +1,46 @@
+"""Bash command execution subagent configuration."""
+
+from src.subagents.config import SubagentConfig
+
+BASH_AGENT_CONFIG = SubagentConfig(
+    name="bash",
+    description="""Command execution specialist for running bash commands in a separate context.
+
+Use this subagent when:
+- You need to run a series of related bash commands
+- Terminal operations like git, npm, docker, etc.
+- Command output is verbose and would clutter main context
+- Build, test, or deployment operations
+
+Do NOT use for simple single commands - use bash tool directly instead.""",
+    system_prompt="""You are a bash command execution specialist. Execute the requested commands carefully and report results clearly.
+
+<guidelines>
+- Execute commands one at a time when they depend on each other
+- Use parallel execution when commands are independent
+- Report both stdout and stderr when relevant
+- Handle errors gracefully and explain what went wrong
+- Use absolute paths for file operations
+- Be cautious with destructive operations (rm, overwrite, etc.)
+</guidelines>
+
+<output_format>
+For each command or group of commands:
+1. What was executed
+2. The result (success/failure)
+3. Relevant output (summarized if verbose)
+4. Any errors or warnings
+</output_format>
+
+<working_directory>
+You have access to the sandbox environment:
+- User uploads: `/mnt/user-data/uploads`
+- User workspace: `/mnt/user-data/workspace`
+- Output files: `/mnt/user-data/outputs`
+</working_directory>
+""",
+    tools=["bash", "ls", "read_file", "write_file", "str_replace"],  # Sandbox tools only
+    disallowed_tools=["task", "ask_clarification"],
+    model="inherit",
+    max_turns=30,
+)
--- a/backend/src/subagents/builtins/general_purpose.py
+++ b/backend/src/subagents/builtins/general_purpose.py
@@ -0,0 +1,46 @@
+"""General-purpose subagent configuration."""
+
+from src.subagents.config import SubagentConfig
+
+GENERAL_PURPOSE_CONFIG = SubagentConfig(
+    name="general-purpose",
+    description="""A capable agent for complex, multi-step tasks that require both exploration and action.
+
+Use this subagent when:
+- The task requires both exploration and modification
+- Complex reasoning is needed to interpret results
+- Multiple dependent steps must be executed
+- The task would benefit from isolated context management
+
+Do NOT use for simple, single-step operations.""",
+    system_prompt="""You are a general-purpose subagent working on a delegated task. Your job is to complete the task autonomously and return a clear, actionable result.
+
+<guidelines>
+- Focus on completing the delegated task efficiently
+- Use available tools as needed to accomplish the goal
+- Think step by step but act decisively
+- If you encounter issues, explain them clearly in your response
+- Return a concise summary of what you accomplished
+- Do NOT ask for clarification - work with the information provided
+</guidelines>
+
+<output_format>
+When you complete the task, provide:
+1. A brief summary of what was accomplished
+2. Key findings or results
+3. Any relevant file paths, data, or artifacts created
+4. Issues encountered (if any)
+</output_format>
+
+<working_directory>
+You have access to the same sandbox environment as the parent agent:
+- User uploads: `/mnt/user-data/uploads`
+- User workspace: `/mnt/user-data/workspace`
+- Output files: `/mnt/user-data/outputs`
+</working_directory>
+""",
+    tools=None,  # Inherit all tools from parent
+    disallowed_tools=["task", "ask_clarification"],  # Prevent nesting and clarification
+    model="inherit",
+    max_turns=50,
+)
--- a/backend/src/subagents/config.py
+++ b/backend/src/subagents/config.py
@@ -0,0 +1,28 @@
+"""Subagent configuration definitions."""
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class SubagentConfig:
+    """Configuration for a subagent.
+
+    Attributes:
+        name: Unique identifier for the subagent.
+        description: When Claude should delegate to this subagent.
+        system_prompt: The system prompt that guides the subagent's behavior.
+        tools: Optional list of tool names to allow. If None, inherits all tools.
+        disallowed_tools: Optional list of tool names to deny.
+        model: Model to use - 'inherit' uses parent's model.
+        max_turns: Maximum number of agent turns before stopping.
+        timeout_seconds: Maximum execution time in seconds (default: 300 = 5 minutes).
+    """
+
+    name: str
+    description: str
+    system_prompt: str
+    tools: list[str] | None = None
+    disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
+    model: str = "inherit"
+    max_turns: int = 50
+    timeout_seconds: int = 300
--- a/backend/src/subagents/executor.py
+++ b/backend/src/subagents/executor.py
@@ -0,0 +1,368 @@
+"""Subagent execution engine."""
+
+import logging
+import threading
+import uuid
+from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import TimeoutError as FuturesTimeoutError
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Any
+
+from langchain.agents import create_agent
+from langchain.tools import BaseTool
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.runnables import RunnableConfig
+
+from src.agents.thread_state import SandboxState, ThreadDataState, ThreadState
+from src.models import create_chat_model
+from src.subagents.config import SubagentConfig
+
+logger = logging.getLogger(__name__)
+
+
+class SubagentStatus(Enum):
+    """Status of a subagent execution."""
+
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+
+
+@dataclass
+class SubagentResult:
+    """Result of a subagent execution.
+
+    Attributes:
+        task_id: Unique identifier for this execution.
+        trace_id: Trace ID for distributed tracing (links parent and subagent logs).
+        status: Current status of the execution.
+        result: The final result message (if completed).
+        error: Error message (if failed).
+        started_at: When execution started.
+        completed_at: When execution completed.
+    """
+
+    task_id: str
+    trace_id: str
+    status: SubagentStatus
+    result: str | None = None
+    error: str | None = None
+    started_at: datetime | None = None
+    completed_at: datetime | None = None
+
+
+# Global storage for background task results
+_background_tasks: dict[str, SubagentResult] = {}
+_background_tasks_lock = threading.Lock()
+
+# Thread pool for background task scheduling and orchestration
+_scheduler_pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="subagent-scheduler-")
+
+# Thread pool for actual subagent execution (with timeout support)
+# Larger pool to avoid blocking when scheduler submits execution tasks
+_execution_pool = ThreadPoolExecutor(max_workers=8, thread_name_prefix="subagent-exec-")
+
+
+def _filter_tools(
+    all_tools: list[BaseTool],
+    allowed: list[str] | None,
+    disallowed: list[str] | None,
+) -> list[BaseTool]:
+    """Filter tools based on subagent configuration.
+
+    Args:
+        all_tools: List of all available tools.
+        allowed: Optional allowlist of tool names. If provided, only these tools are included.
+        disallowed: Optional denylist of tool names. These tools are always excluded.
+
+    Returns:
+        Filtered list of tools.
+    """
+    filtered = all_tools
+
+    # Apply allowlist if specified
+    if allowed is not None:
+        allowed_set = set(allowed)
+        filtered = [t for t in filtered if t.name in allowed_set]
+
+    # Apply denylist
+    if disallowed is not None:
+        disallowed_set = set(disallowed)
+        filtered = [t for t in filtered if t.name not in disallowed_set]
+
+    return filtered
+
+
+def _get_model_name(config: SubagentConfig, parent_model: str | None) -> str | None:
+    """Resolve the model name for a subagent.
+
+    Args:
+        config: Subagent configuration.
+        parent_model: The parent agent's model name.
+
+    Returns:
+        Model name to use, or None to use default.
+    """
+    if config.model == "inherit":
+        return parent_model
+    return config.model
+
+
+class SubagentExecutor:
+    """Executor for running subagents."""
+
+    def __init__(
+        self,
+        config: SubagentConfig,
+        tools: list[BaseTool],
+        parent_model: str | None = None,
+        sandbox_state: SandboxState | None = None,
+        thread_data: ThreadDataState | None = None,
+        thread_id: str | None = None,
+        trace_id: str | None = None,
+    ):
+        """Initialize the executor.
+
+        Args:
+            config: Subagent configuration.
+            tools: List of all available tools (will be filtered).
+            parent_model: The parent agent's model name for inheritance.
+            sandbox_state: Sandbox state from parent agent.
+            thread_data: Thread data from parent agent.
+            thread_id: Thread ID for sandbox operations.
+            trace_id: Trace ID from parent for distributed tracing.
+        """
+        self.config = config
+        self.parent_model = parent_model
+        self.sandbox_state = sandbox_state
+        self.thread_data = thread_data
+        self.thread_id = thread_id
+        # Generate trace_id if not provided (for top-level calls)
+        self.trace_id = trace_id or str(uuid.uuid4())[:8]
+
+        # Filter tools based on config
+        self.tools = _filter_tools(
+            tools,
+            config.tools,
+            config.disallowed_tools,
+        )
+
+        logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools")
+
+    def _create_agent(self):
+        """Create the agent instance."""
+        model_name = _get_model_name(self.config, self.parent_model)
+        model = create_chat_model(name=model_name, thinking_enabled=False)
+
+        # Subagents need minimal middlewares to ensure tools can access sandbox and thread_data
+        # These middlewares will reuse the sandbox/thread_data from parent agent
+        from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
+        from src.sandbox.middleware import SandboxMiddleware
+
+        middlewares = [
+            ThreadDataMiddleware(lazy_init=True),  # Compute thread paths
+            SandboxMiddleware(lazy_init=True),  # Reuse parent's sandbox (no re-acquisition)
+        ]
+
+        return create_agent(
+            model=model,
+            tools=self.tools,
+            middleware=middlewares,
+            system_prompt=self.config.system_prompt,
+            state_schema=ThreadState,
+        )
+
+    def _build_initial_state(self, task: str) -> dict[str, Any]:
+        """Build the initial state for agent execution.
+
+        Args:
+            task: The task description.
+
+        Returns:
+            Initial state dictionary.
+        """
+        state: dict[str, Any] = {
+            "messages": [HumanMessage(content=task)],
+        }
+
+        # Pass through sandbox and thread data from parent
+        if self.sandbox_state is not None:
+            state["sandbox"] = self.sandbox_state
+        if self.thread_data is not None:
+            state["thread_data"] = self.thread_data
+
+        return state
+
+    def execute(self, task: str) -> SubagentResult:
+        """Execute a task synchronously.
+
+        Args:
+            task: The task description for the subagent.
+
+        Returns:
+            SubagentResult with the execution result.
+        """
+        task_id = str(uuid.uuid4())[:8]
+        result = SubagentResult(
+            task_id=task_id,
+            trace_id=self.trace_id,
+            status=SubagentStatus.RUNNING,
+            started_at=datetime.now(),
+        )
+
+        try:
+            agent = self._create_agent()
+            state = self._build_initial_state(task)
+
+            # Build config with thread_id for sandbox access and recursion limit
+            run_config: RunnableConfig = {
+                "recursion_limit": self.config.max_turns,
+            }
+            context = {}
+            if self.thread_id:
+                run_config["configurable"] = {"thread_id": self.thread_id}
+                context["thread_id"] = self.thread_id
+
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting execution with max_turns={self.config.max_turns}")
+
+            # Run the agent using invoke for complete result
+            # Note: invoke() runs until completion or interruption
+            # Timeout is handled at the execute_async level, not here
+            final_state = agent.invoke(state, config=run_config, context=context)  # type: ignore[arg-type]
+
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} completed execution")
+
+            # Extract the final message - find the last AIMessage
+            messages = final_state.get("messages", [])
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} final messages count: {len(messages)}")
+
+            # Find the last AIMessage in the conversation
+            last_ai_message = None
+            for msg in reversed(messages):
+                if isinstance(msg, AIMessage):
+                    last_ai_message = msg
+                    break
+
+            if last_ai_message is not None:
+                content = last_ai_message.content
+                logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} last AI message content type: {type(content)}")
+
+                # Handle both str and list content types
+                if isinstance(content, str):
+                    result.result = content
+                elif isinstance(content, list):
+                    # Extract text from list of content blocks
+                    text_parts = []
+                    for block in content:
+                        if isinstance(block, str):
+                            text_parts.append(block)
+                        elif isinstance(block, dict) and "text" in block:
+                            text_parts.append(block["text"])
+                    result.result = "\n".join(text_parts) if text_parts else "No text content in response"
+                else:
+                    result.result = str(content)
+            elif messages:
+                # Fallback: use the last message if no AIMessage found
+                last_message = messages[-1]
+                logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no AIMessage found, using last message: {type(last_message)}")
+                result.result = str(last_message.content) if hasattr(last_message, "content") else str(last_message)
+            else:
+                logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no messages in final state")
+                result.result = "No response generated"
+
+            result.status = SubagentStatus.COMPLETED
+            result.completed_at = datetime.now()
+
+        except Exception as e:
+            logger.exception(f"[trace={self.trace_id}] Subagent {self.config.name} execution failed")
+            result.status = SubagentStatus.FAILED
+            result.error = str(e)
+            result.completed_at = datetime.now()
+
+        return result
+
+    def execute_async(self, task: str) -> str:
+        """Start a task execution in the background.
+
+        Args:
+            task: The task description for the subagent.
+
+        Returns:
+            Task ID that can be used to check status later.
+        """
+        task_id = str(uuid.uuid4())[:8]
+
+        # Create initial pending result
+        result = SubagentResult(
+            task_id=task_id,
+            trace_id=self.trace_id,
+            status=SubagentStatus.PENDING,
+        )
+
+        logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} starting async execution, task_id={task_id}")
+
+        with _background_tasks_lock:
+            _background_tasks[task_id] = result
+
+        # Submit to scheduler pool
+        def run_task():
+            with _background_tasks_lock:
+                _background_tasks[task_id].status = SubagentStatus.RUNNING
+                _background_tasks[task_id].started_at = datetime.now()
+
+            try:
+                # Submit execution to execution pool with timeout
+                execution_future: Future = _execution_pool.submit(self.execute, task)
+                try:
+                    # Wait for execution with timeout
+                    exec_result = execution_future.result(timeout=self.config.timeout_seconds)
+                    with _background_tasks_lock:
+                        _background_tasks[task_id].status = exec_result.status
+                        _background_tasks[task_id].result = exec_result.result
+                        _background_tasks[task_id].error = exec_result.error
+                        _background_tasks[task_id].completed_at = datetime.now()
+                except FuturesTimeoutError:
+                    logger.error(
+                        f"[trace={self.trace_id}] Subagent {self.config.name} execution timed out after {self.config.timeout_seconds}s"
+                    )
+                    with _background_tasks_lock:
+                        _background_tasks[task_id].status = SubagentStatus.FAILED
+                        _background_tasks[task_id].error = f"Execution timed out after {self.config.timeout_seconds} seconds"
+                        _background_tasks[task_id].completed_at = datetime.now()
+                    # Cancel the future (best effort - may not stop the actual execution)
+                    execution_future.cancel()
+            except Exception as e:
+                logger.exception(f"[trace={self.trace_id}] Subagent {self.config.name} async execution failed")
+                with _background_tasks_lock:
+                    _background_tasks[task_id].status = SubagentStatus.FAILED
+                    _background_tasks[task_id].error = str(e)
+                    _background_tasks[task_id].completed_at = datetime.now()
+
+        _scheduler_pool.submit(run_task)
+        return task_id
+
+
+def get_background_task_result(task_id: str) -> SubagentResult | None:
+    """Get the result of a background task.
+
+    Args:
+        task_id: The task ID returned by execute_async.
+
+    Returns:
+        SubagentResult if found, None otherwise.
+    """
+    with _background_tasks_lock:
+        return _background_tasks.get(task_id)
+
+
+def list_background_tasks() -> list[SubagentResult]:
+    """List all background tasks.
+
+    Returns:
+        List of all SubagentResult instances.
+    """
+    with _background_tasks_lock:
+        return list(_background_tasks.values())
--- a/backend/src/subagents/registry.py
+++ b/backend/src/subagents/registry.py
@@ -0,0 +1,34 @@
+"""Subagent registry for managing available subagents."""
+
+from src.subagents.builtins import BUILTIN_SUBAGENTS
+from src.subagents.config import SubagentConfig
+
+
+def get_subagent_config(name: str) -> SubagentConfig | None:
+    """Get a subagent configuration by name.
+
+    Args:
+        name: The name of the subagent.
+
+    Returns:
+        SubagentConfig if found, None otherwise.
+    """
+    return BUILTIN_SUBAGENTS.get(name)
+
+
+def list_subagents() -> list[SubagentConfig]:
+    """List all available subagent configurations.
+
+    Returns:
+        List of all registered SubagentConfig instances.
+    """
+    return list(BUILTIN_SUBAGENTS.values())
+
+
+def get_subagent_names() -> list[str]:
+    """Get all available subagent names.
+
+    Returns:
+        List of subagent names.
+    """
+    return list(BUILTIN_SUBAGENTS.keys())
--- a/backend/src/tools/builtins/init.py
+++ b/backend/src/tools/builtins/init.py
@@ -1,5 +1,11 @@
 from .clarification_tool import ask_clarification_tool
 from .present_file_tool import present_file_tool
+from .task_tool import task_tool
 from .view_image_tool import view_image_tool

-__all__ = ["present_file_tool", "ask_clarification_tool", "view_image_tool"]
+__all__ = [
+    "present_file_tool",
+    "ask_clarification_tool",
+    "view_image_tool",
+    "task_tool",
+]
--- a/backend/src/tools/builtins/task_tool.py
+++ b/backend/src/tools/builtins/task_tool.py
@@ -0,0 +1,151 @@
+"""Task tool for delegating work to subagents."""
+
+import logging
+import time
+import uuid
+from typing import Literal
+
+from langchain.tools import ToolRuntime, tool
+from langgraph.typing import ContextT
+from langgraph.config import get_stream_writer
+
+from src.agents.thread_state import ThreadState
+from src.subagents import SubagentExecutor, get_subagent_config
+from src.subagents.executor import SubagentStatus, get_background_task_result
+
+logger = logging.getLogger(__name__)
+
+
+@tool("task", parse_docstring=True)
+def task_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    subagent_type: Literal["general-purpose", "bash"],
+    prompt: str,
+    description: str,
+    max_turns: int | None = None,
+) -> str:
+    """Delegate a task to a specialized subagent that runs in its own context.
+
+    Subagents help you:
+    - Preserve context by keeping exploration and implementation separate
+    - Handle complex multi-step tasks autonomously
+    - Execute commands or operations in isolated contexts
+
+    Available subagent types:
+    - **general-purpose**: A capable agent for complex, multi-step tasks that require
+      both exploration and action. Use when the task requires complex reasoning,
+      multiple dependent steps, or would benefit from isolated context.
+    - **bash**: Command execution specialist for running bash commands. Use for
+      git operations, build processes, or when command output would be verbose.
+
+    When to use this tool:
+    - Complex tasks requiring multiple steps or tools
+    - Tasks that produce verbose output
+    - When you want to isolate context from the main conversation
+    - Parallel research or exploration tasks
+
+    When NOT to use this tool:
+    - Simple, single-step operations (use tools directly)
+    - Tasks requiring user interaction or clarification
+
+    Args:
+        subagent_type: The type of subagent to use.
+        prompt: The task description for the subagent. Be specific and clear about what needs to be done.
+        description: A short (3-5 word) description of the task for logging/display.
+        max_turns: Optional maximum number of agent turns. Defaults to subagent's configured max.
+    """
+    # Get subagent configuration
+    config = get_subagent_config(subagent_type)
+    if config is None:
+        return f"Error: Unknown subagent type '{subagent_type}'. Available: general-purpose, bash"
+
+    # Override max_turns if specified
+    if max_turns is not None:
+        # Create a copy with updated max_turns
+        from dataclasses import replace
+
+        config = replace(config, max_turns=max_turns)
+
+    # Extract parent context from runtime
+    sandbox_state = None
+    thread_data = None
+    thread_id = None
+    parent_model = None
+    trace_id = None
+
+    if runtime is not None:
+        sandbox_state = runtime.state.get("sandbox")
+        thread_data = runtime.state.get("thread_data")
+        thread_id = runtime.context.get("thread_id")
+
+        # Try to get parent model from configurable
+        metadata = runtime.config.get("metadata", {})
+        parent_model = metadata.get("model_name")
+
+        # Get or generate trace_id for distributed tracing
+        trace_id = metadata.get("trace_id") or str(uuid.uuid4())[:8]
+
+    # Get available tools (excluding task tool to prevent nesting)
+    # Lazy import to avoid circular dependency
+    from src.tools import get_available_tools
+
+    # Subagents should not have subagent tools enabled (prevent recursive nesting)
+    tools = get_available_tools(model_name=parent_model, subagent_enabled=False)
+
+    # Create executor
+    executor = SubagentExecutor(
+        config=config,
+        tools=tools,
+        parent_model=parent_model,
+        sandbox_state=sandbox_state,
+        thread_data=thread_data,
+        thread_id=thread_id,
+        trace_id=trace_id,
+    )
+
+    # Start background execution (always async to prevent blocking)
+    task_id = executor.execute_async(prompt)
+    logger.info(f"[trace={trace_id}] Started background task {task_id}, polling for completion...")
+
+    # Poll for task completion in backend (removes need for LLM to poll)
+    poll_count = 0
+    last_status = None
+
+    writer = get_stream_writer()
+    # Send Task Started message'
+    writer({"type": "task_started", "task_id": task_id, "task_type": subagent_type, "description": description})
+
+
+    while True:
+        result = get_background_task_result(task_id)
+
+        if result is None:
+            logger.error(f"[trace={trace_id}] Task {task_id} not found in background tasks")
+            writer({"type": "task_failed", "task_id": task_id, "task_type": subagent_type, "error": "Task disappeared from background tasks"})
+            return f"Error: Task {task_id} disappeared from background tasks"
+
+        # Log status changes for debugging
+        if result.status != last_status:
+            logger.info(f"[trace={trace_id}] Task {task_id} status: {result.status.value}")
+            last_status = result.status
+
+        # Check if task completed or failed
+        if result.status == SubagentStatus.COMPLETED:
+            writer({"type": "task_completed", "task_id": task_id, "task_type": subagent_type, "result": result.result})
+            logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
+            return f"Task Succeeded. Result: {result.result}"
+        elif result.status == SubagentStatus.FAILED:
+            writer({"type": "task_failed", "task_id": task_id, "task_type": subagent_type, "error": result.error})
+            logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
+            return f"Task failed. Error: {result.error}"
+
+        # Still running, wait before next poll
+        writer({"type": "task_running", "task_id": task_id, "task_type": subagent_type, "poll_count": poll_count})
+        time.sleep(5)  # Poll every 5 seconds
+        poll_count += 1
+
+        # Optional: Add timeout protection (e.g., max 5 minutes)
+        if poll_count > 60:  # 60 * 5s = 5 minutes
+            logger.warning(f"[trace={trace_id}] Task {task_id} timed out after {poll_count} polls")
+            writer({"type": "task_timed_out", "task_id": task_id, "task_type": subagent_type})
+            return f"Task timed out after 5 minutes. Status: {result.status.value}"
--- a/backend/src/tools/tools.py
+++ b/backend/src/tools/tools.py
@@ -4,7 +4,7 @@ from langchain.tools import BaseTool

 from src.config import get_app_config
 from src.reflection import resolve_variable
-from src.tools.builtins import ask_clarification_tool, present_file_tool, view_image_tool
+from src.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool

 logger = logging.getLogger(__name__)

@@ -13,8 +13,18 @@ BUILTIN_TOOLS = [
    ask_clarification_tool,
 ]

+SUBAGENT_TOOLS = [
+    task_tool,
+    # task_status_tool is no longer exposed to LLM (backend handles polling internally)
+]

-def get_available_tools(groups: list[str] | None = None, include_mcp: bool = True, model_name: str | None = None) -> list[BaseTool]:
+
+def get_available_tools(
+    groups: list[str] | None = None,
+    include_mcp: bool = True,
+    model_name: str | None = None,
+    subagent_enabled: bool = False,
+) -> list[BaseTool]:
    """Get all available tools from config.

    Note: MCP tools should be initialized at application startup using
@@ -24,6 +34,7 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
        groups: Optional list of tool groups to filter by.
        include_mcp: Whether to include tools from MCP servers (default: True).
        model_name: Optional model name to determine if vision tools should be included.
+        subagent_enabled: Whether to include subagent tools (task, task_status).

    Returns:
        List of available tools.
@@ -52,13 +63,19 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
        except Exception as e:
            logger.error(f"Failed to get cached MCP tools: {e}")

-    # Conditionally add view_image_tool only if the model supports vision
+    # Conditionally add tools based on config
    builtin_tools = BUILTIN_TOOLS.copy()

+    # Add subagent tools only if enabled via runtime parameter
+    if subagent_enabled:
+        builtin_tools.extend(SUBAGENT_TOOLS)
+        logger.info("Including subagent tools (task)")
+
    # If no model_name specified, use the first model (default)
    if model_name is None and config.models:
        model_name = config.models[0].name

+    # Add view_image_tool only if the model supports vision
    model_config = config.get_model_config(model_name) if model_name else None
    if model_config is not None and model_config.supports_vision:
        builtin_tools.append(view_image_tool)