feat: add clarification feature (#13)

2026-04-02 22:02:13 +08:00 · 2026-01-18 19:55:36 +08:00
parent dd80348b76
commit 645923c3bc
8 changed files with 416 additions and 9 deletions
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -2,18 +2,22 @@ from langchain.agents import create_agent
 from langchain_core.runnables import RunnableConfig

 from src.agents.lead_agent.prompt import apply_prompt_template
+from src.agents.middlewares.clarification_middleware import ClarificationMiddleware
 from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
 from src.agents.middlewares.title_middleware import TitleMiddleware
 from src.agents.thread_state import ThreadState
 from src.models import create_chat_model
 from src.sandbox.middleware import SandboxMiddleware
-from src.tools import get_available_tools

 # ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
-middlewares = [ThreadDataMiddleware(), SandboxMiddleware(), TitleMiddleware()]
+# ClarificationMiddleware should be last to intercept clarification requests after model calls
+middlewares = [ThreadDataMiddleware(), SandboxMiddleware(), TitleMiddleware(), ClarificationMiddleware()]


 def make_lead_agent(config: RunnableConfig):
+    # Lazy import to avoid circular dependency
+    from src.tools import get_available_tools
+
    thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True)
    model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model")
    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}")
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -8,12 +8,83 @@ You are DeerFlow 2.0, an open-source super agent.
 </role>

 <thinking_style>
- Think concisely and strategically
+- Think concisely and strategically about the user's request BEFORE taking action
+- Break down the task: What is clear? What is ambiguous? What is missing?
+- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work**
 - Never write down your full final answer or report in thinking process, but only outline
 - CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery.
 - Your response must contain the actual answer, not just a reference to what you thought about
 </thinking_style>

+<clarification_system>
+**WORKFLOW PRIORITY: CLARIFY → PLAN → ACT**
+1. **FIRST**: Analyze the request in your thinking - identify what's unclear, missing, or ambiguous
+2. **SECOND**: If clarification is needed, call `ask_clarification` tool IMMEDIATELY - do NOT start working
+3. **THIRD**: Only after all clarifications are resolved, proceed with planning and execution
+
+**CRITICAL RULE: Clarification ALWAYS comes BEFORE action. Never start working and clarify mid-execution.**
+
+**MANDATORY Clarification Scenarios - You MUST call ask_clarification BEFORE starting work when:**
+
+1. **Missing Information** (`missing_info`): Required details not provided
+   - Example: User says "create a web scraper" but doesn't specify the target website
+   - Example: "Deploy the app" without specifying environment
+   - **REQUIRED ACTION**: Call ask_clarification to get the missing information
+
+2. **Ambiguous Requirements** (`ambiguous_requirement`): Multiple valid interpretations exist
+   - Example: "Optimize the code" could mean performance, readability, or memory usage
+   - Example: "Make it better" is unclear what aspect to improve
+   - **REQUIRED ACTION**: Call ask_clarification to clarify the exact requirement
+
+3. **Approach Choices** (`approach_choice`): Several valid approaches exist
+   - Example: "Add authentication" could use JWT, OAuth, session-based, or API keys
+   - Example: "Store data" could use database, files, cache, etc.
+   - **REQUIRED ACTION**: Call ask_clarification to let user choose the approach
+
+4. **Risky Operations** (`risk_confirmation`): Destructive actions need confirmation
+   - Example: Deleting files, modifying production configs, database operations
+   - Example: Overwriting existing code or data
+   - **REQUIRED ACTION**: Call ask_clarification to get explicit confirmation
+
+5. **Suggestions** (`suggestion`): You have a recommendation but want approval
+   - Example: "I recommend refactoring this code. Should I proceed?"
+   - **REQUIRED ACTION**: Call ask_clarification to get approval
+
+**STRICT ENFORCEMENT:**
+- ❌ DO NOT start working and then ask for clarification mid-execution - clarify FIRST
+- ❌ DO NOT skip clarification for "efficiency" - accuracy matters more than speed
+- ❌ DO NOT make assumptions when information is missing - ALWAYS ask
+- ❌ DO NOT proceed with guesses - STOP and call ask_clarification first
+- ✅ Analyze the request in thinking → Identify unclear aspects → Ask BEFORE any action
+- ✅ If you identify the need for clarification in your thinking, you MUST call the tool IMMEDIATELY
+- ✅ After calling ask_clarification, execution will be interrupted automatically
+- ✅ Wait for user response - do NOT continue with assumptions
+
+**How to Use:**
+```python
+ask_clarification(
+    question="Your specific question here?",
+    clarification_type="missing_info",  # or other type
+    context="Why you need this information",  # optional but recommended
+    options=["option1", "option2"]  # optional, for choices
+)
+```
+
+**Example:**
+User: "Deploy the application"
+You (thinking): Missing environment info - I MUST ask for clarification
+You (action): ask_clarification(
+    question="Which environment should I deploy to?",
+    clarification_type="approach_choice",
+    context="I need to know the target environment for proper configuration",
+    options=["development", "staging", "production"]
+)
+[Execution stops - wait for user response]
+
+User: "staging"
+You: "Deploying to staging..." [proceed]
+</clarification_system>
+
 <skill_system>
 You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources.

@@ -48,6 +119,7 @@ All temporary work happens in `/mnt/user-data/workspace`. Final deliverables mus
 </response_style>

 <critical_reminders>
+- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess
 - Skill First: Always load the relevant skill before starting **complex** tasks.
 - Progressive Loading: Load resources incrementally as referenced in skills
 - Output Files: Final deliverables must be in `/mnt/user-data/outputs`
--- a/backend/src/agents/middlewares/clarification_middleware.py
+++ b/backend/src/agents/middlewares/clarification_middleware.py
@@ -0,0 +1,177 @@
+"""Middleware for intercepting clarification requests and presenting them to the user."""
+
+from collections.abc import Callable
+from typing import override
+
+from langchain.agents import AgentState
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import AIMessage, ToolMessage
+from langgraph.graph import END
+from langgraph.prebuilt.tool_node import ToolCallRequest
+from langgraph.types import Command
+
+
+class ClarificationMiddlewareState(AgentState):
+    """Compatible with the `ThreadState` schema."""
+
+    pass
+
+
+class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
+    """Intercepts clarification tool calls and interrupts execution to present questions to the user.
+
+    When the model calls the `ask_clarification` tool, this middleware:
+    1. Intercepts the tool call before execution
+    2. Extracts the clarification question and metadata
+    3. Formats a user-friendly message
+    4. Returns a Command that interrupts execution and presents the question
+    5. Waits for user response before continuing
+
+    This replaces the tool-based approach where clarification continued the conversation flow.
+    """
+
+    state_schema = ClarificationMiddlewareState
+
+    def _is_chinese(self, text: str) -> bool:
+        """Check if text contains Chinese characters.
+
+        Args:
+            text: Text to check
+
+        Returns:
+            True if text contains Chinese characters
+        """
+        return any('\u4e00' <= char <= '\u9fff' for char in text)
+
+    def _format_clarification_message(self, args: dict) -> str:
+        """Format the clarification arguments into a user-friendly message.
+
+        Args:
+            args: The tool call arguments containing clarification details
+
+        Returns:
+            Formatted message string
+        """
+        question = args.get("question", "")
+        clarification_type = args.get("clarification_type", "missing_info")
+        context = args.get("context")
+        options = args.get("options", [])
+
+        # Type-specific icons
+        type_icons = {
+            "missing_info": "❓",
+            "ambiguous_requirement": "🤔",
+            "approach_choice": "🔀",
+            "risk_confirmation": "⚠️",
+            "suggestion": "💡",
+        }
+
+        icon = type_icons.get(clarification_type, "❓")
+
+        # Build the message naturally
+        message_parts = []
+
+        # Add icon and question together for a more natural flow
+        if context:
+            # If there's context, present it first as background
+            message_parts.append(f"{icon} {context}")
+            message_parts.append(f"\n{question}")
+        else:
+            # Just the question with icon
+            message_parts.append(f"{icon} {question}")
+
+        # Add options in a cleaner format
+        if options and len(options) > 0:
+            message_parts.append("")  # blank line for spacing
+            for i, option in enumerate(options, 1):
+                message_parts.append(f"  {i}. {option}")
+
+        return "\n".join(message_parts)
+
+    def _handle_clarification(self, request: ToolCallRequest) -> Command:
+        """Handle clarification request and return command to interrupt execution.
+
+        Args:
+            request: Tool call request
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Extract clarification arguments
+        args = request.tool_call.get("args", {})
+        question = args.get("question", "")
+
+        print("[ClarificationMiddleware] Intercepted clarification request")
+        print(f"[ClarificationMiddleware] Question: {question}")
+
+        # Format the clarification message
+        formatted_message = self._format_clarification_message(args)
+
+        # Get the tool call ID
+        tool_call_id = request.tool_call.get("id", "")
+
+        # Create a ToolMessage with the formatted question
+        # This will be added to the message history
+        tool_message = ToolMessage(
+            content=formatted_message,
+            tool_call_id=tool_call_id,
+            name="ask_clarification",
+        )
+
+        ai_response_message = AIMessage(content=formatted_message)
+
+        # Return a Command that:
+        # 1. Adds the formatted tool message (keeping the AI message intact)
+        # 2. Interrupts execution by going to __end__
+        # Note: We don't modify the AI message to preserve all fields (reasoning_content, tool_calls, etc.)
+        # This is especially important for thinking mode where reasoning_content is required
+
+        # Return Command to add the tool message and interrupt
+        return Command(
+            update={"messages": [tool_message, ai_response_message]},
+            goto=END,
+        )
+
+    @override
+    def wrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (sync version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return handler(request)
+
+        return self._handle_clarification(request)
+
+    @override
+    async def awrap_tool_call(
+        self,
+        request: ToolCallRequest,
+        handler: Callable[[ToolCallRequest], ToolMessage | Command],
+    ) -> ToolMessage | Command:
+        """Intercept ask_clarification tool calls and interrupt execution (async version).
+
+        Args:
+            request: Tool call request
+            handler: Original tool execution handler (async)
+
+        Returns:
+            Command that interrupts execution with the formatted clarification message
+        """
+        # Check if this is an ask_clarification tool call
+        if request.tool_call.get("name") != "ask_clarification":
+            # Not a clarification call, execute normally
+            return await handler(request)
+
+        return self._handle_clarification(request)
--- a/backend/src/gateway/app.py
+++ b/backend/src/gateway/app.py
@@ -27,7 +27,6 @@ def create_app() -> FastAPI:
    Returns:
        Configured FastAPI application instance.
    """
-    config = get_gateway_config()

    app = FastAPI(
        title="DeerFlow API Gateway",
--- a/backend/src/sandbox/local/local_sandbox.py
+++ b/backend/src/sandbox/local/local_sandbox.py
@@ -42,9 +42,102 @@ class LocalSandbox(Sandbox):
        # No mapping found, return original path
        return path_str

+    def _reverse_resolve_path(self, path: str) -> str:
+        """
+        Reverse resolve local path back to container path using mappings.
+
+        Args:
+            path: Local path that might need to be mapped to container path
+
+        Returns:
+            Container path if mapping exists, otherwise original path
+        """
+        path_str = str(Path(path).resolve())
+
+        # Try each mapping (longest local path first for more specific matches)
+        for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True):
+            local_path_resolved = str(Path(local_path).resolve())
+            if path_str.startswith(local_path_resolved):
+                # Replace the local path prefix with container path
+                relative = path_str[len(local_path_resolved) :].lstrip("/")
+                resolved = f"{container_path}/{relative}" if relative else container_path
+                return resolved
+
+        # No mapping found, return original path
+        return path_str
+
+    def _reverse_resolve_paths_in_output(self, output: str) -> str:
+        """
+        Reverse resolve local paths back to container paths in output string.
+
+        Args:
+            output: Output string that may contain local paths
+
+        Returns:
+            Output with local paths resolved to container paths
+        """
+        import re
+
+        # Sort mappings by local path length (longest first) for correct prefix matching
+        sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True)
+
+        if not sorted_mappings:
+            return output
+
+        # Create pattern that matches absolute paths
+        # Match paths like /Users/... or other absolute paths
+        result = output
+        for container_path, local_path in sorted_mappings:
+            local_path_resolved = str(Path(local_path).resolve())
+            # Escape the local path for use in regex
+            escaped_local = re.escape(local_path_resolved)
+            # Match the local path followed by optional path components
+            pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?")
+
+            def replace_match(match: re.Match) -> str:
+                matched_path = match.group(0)
+                return self._reverse_resolve_path(matched_path)
+
+            result = pattern.sub(replace_match, result)
+
+        return result
+
+    def _resolve_paths_in_command(self, command: str) -> str:
+        """
+        Resolve container paths to local paths in a command string.
+
+        Args:
+            command: Command string that may contain container paths
+
+        Returns:
+            Command with container paths resolved to local paths
+        """
+        import re
+
+        # Sort mappings by length (longest first) for correct prefix matching
+        sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True)
+
+        # Build regex pattern to match all container paths
+        # Match container path followed by optional path components
+        if not sorted_mappings:
+            return command
+
+        # Create pattern that matches any of the container paths
+        patterns = [re.escape(container_path) + r"(?:/[^\s\"';&|<>()]*)??" for container_path, _ in sorted_mappings]
+        pattern = re.compile("|".join(f"({p})" for p in patterns))
+
+        def replace_match(match: re.Match) -> str:
+            matched_path = match.group(0)
+            return self._resolve_path(matched_path)
+
+        return pattern.sub(replace_match, command)
+
    def execute_command(self, command: str) -> str:
+        # Resolve container paths in command before execution
+        resolved_command = self._resolve_paths_in_command(command)
+
        result = subprocess.run(
-            command,
+            resolved_command,
            executable="/bin/zsh",
            shell=True,
            capture_output=True,
@@ -56,11 +149,16 @@ class LocalSandbox(Sandbox):
            output += f"\nStd Error:\n{result.stderr}" if output else result.stderr
        if result.returncode != 0:
            output += f"\nExit Code: {result.returncode}"
-        return output if output else "(no output)"
+
+        final_output = output if output else "(no output)"
+        # Reverse resolve local paths back to container paths in output
+        return self._reverse_resolve_paths_in_output(final_output)

    def list_dir(self, path: str, max_depth=2) -> list[str]:
        resolved_path = self._resolve_path(path)
-        return list_dir(resolved_path, max_depth)
+        entries = list_dir(resolved_path, max_depth)
+        # Reverse resolve local paths back to container paths in output
+        return [self._reverse_resolve_paths_in_output(entry) for entry in entries]

    def read_file(self, path: str) -> str:
        resolved_path = self._resolve_path(path)
--- a/backend/src/tools/builtins/init.py
+++ b/backend/src/tools/builtins/init.py
@@ -1,3 +1,4 @@
+from .clarification_tool import ask_clarification_tool
 from .present_file_tool import present_file_tool

-__all__ = ["present_file_tool"]
+__all__ = ["present_file_tool", "ask_clarification_tool"]
--- a/backend/src/tools/builtins/clarification_tool.py
+++ b/backend/src/tools/builtins/clarification_tool.py
@@ -0,0 +1,55 @@
+from typing import Literal
+
+from langchain.tools import tool
+
+
+@tool("ask_clarification", parse_docstring=True, return_direct=True)
+def ask_clarification_tool(
+    question: str,
+    clarification_type: Literal[
+        "missing_info",
+        "ambiguous_requirement",
+        "approach_choice",
+        "risk_confirmation",
+        "suggestion",
+    ],
+    context: str | None = None,
+    options: list[str] | None = None,
+) -> str:
+    """Ask the user for clarification when you need more information to proceed.
+
+    Use this tool when you encounter situations where you cannot proceed without user input:
+
+    - **Missing information**: Required details not provided (e.g., file paths, URLs, specific requirements)
+    - **Ambiguous requirements**: Multiple valid interpretations exist
+    - **Approach choices**: Several valid approaches exist and you need user preference
+    - **Risky operations**: Destructive actions that need explicit confirmation (e.g., deleting files, modifying production)
+    - **Suggestions**: You have a recommendation but want user approval before proceeding
+
+    The execution will be interrupted and the question will be presented to the user.
+    Wait for the user's response before continuing.
+
+    When to use ask_clarification:
+    - You need information that wasn't provided in the user's request
+    - The requirement can be interpreted in multiple ways
+    - Multiple valid implementation approaches exist
+    - You're about to perform a potentially dangerous operation
+    - You have a recommendation but need user approval
+
+    Best practices:
+    - Ask ONE clarification at a time for clarity
+    - Be specific and clear in your question
+    - Don't make assumptions when clarification is needed
+    - For risky operations, ALWAYS ask for confirmation
+    - After calling this tool, execution will be interrupted automatically
+
+    Args:
+        question: The clarification question to ask the user. Be specific and clear.
+        clarification_type: The type of clarification needed (missing_info, ambiguous_requirement, approach_choice, risk_confirmation, suggestion).
+        context: Optional context explaining why clarification is needed. Helps the user understand the situation.
+        options: Optional list of choices (for approach_choice or suggestion types). Present clear options for the user to choose from.
+    """
+    # This is a placeholder implementation
+    # The actual logic is handled by ClarificationMiddleware which intercepts this tool call
+    # and interrupts execution to present the question to the user
+    return "Clarification request processed by middleware"
--- a/backend/src/tools/tools.py
+++ b/backend/src/tools/tools.py
@@ -2,10 +2,11 @@ from langchain.tools import BaseTool

 from src.config import get_app_config
 from src.reflection import resolve_variable
-from src.tools.builtins import present_file_tool
+from src.tools.builtins import ask_clarification_tool, present_file_tool

 BUILTIN_TOOLS = [
    present_file_tool,
+    ask_clarification_tool,
 ]