diff --git a/backend/src/agents/lead_agent/agent.py b/backend/src/agents/lead_agent/agent.py index aef5841..5d4f94d 100644 --- a/backend/src/agents/lead_agent/agent.py +++ b/backend/src/agents/lead_agent/agent.py @@ -2,18 +2,22 @@ from langchain.agents import create_agent from langchain_core.runnables import RunnableConfig from src.agents.lead_agent.prompt import apply_prompt_template +from src.agents.middlewares.clarification_middleware import ClarificationMiddleware from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware from src.agents.middlewares.title_middleware import TitleMiddleware from src.agents.thread_state import ThreadState from src.models import create_chat_model from src.sandbox.middleware import SandboxMiddleware -from src.tools import get_available_tools # ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available -middlewares = [ThreadDataMiddleware(), SandboxMiddleware(), TitleMiddleware()] +# ClarificationMiddleware should be last to intercept clarification requests after model calls +middlewares = [ThreadDataMiddleware(), SandboxMiddleware(), TitleMiddleware(), ClarificationMiddleware()] def make_lead_agent(config: RunnableConfig): + # Lazy import to avoid circular dependency + from src.tools import get_available_tools + thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True) model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model") print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}") diff --git a/backend/src/agents/lead_agent/prompt.py b/backend/src/agents/lead_agent/prompt.py index 94fea0c..472bfdd 100644 --- a/backend/src/agents/lead_agent/prompt.py +++ b/backend/src/agents/lead_agent/prompt.py @@ -8,12 +8,83 @@ You are DeerFlow 2.0, an open-source super agent. -- Think concisely and strategically +- Think concisely and strategically about the user's request BEFORE taking action +- Break down the task: What is clear? What is ambiguous? What is missing? +- **PRIORITY CHECK: If anything is unclear, missing, or has multiple interpretations, you MUST ask for clarification FIRST - do NOT proceed with work** - Never write down your full final answer or report in thinking process, but only outline - CRITICAL: After thinking, you MUST provide your actual response to the user. Thinking is for planning, the response is for delivery. - Your response must contain the actual answer, not just a reference to what you thought about + +**WORKFLOW PRIORITY: CLARIFY → PLAN → ACT** +1. **FIRST**: Analyze the request in your thinking - identify what's unclear, missing, or ambiguous +2. **SECOND**: If clarification is needed, call `ask_clarification` tool IMMEDIATELY - do NOT start working +3. **THIRD**: Only after all clarifications are resolved, proceed with planning and execution + +**CRITICAL RULE: Clarification ALWAYS comes BEFORE action. Never start working and clarify mid-execution.** + +**MANDATORY Clarification Scenarios - You MUST call ask_clarification BEFORE starting work when:** + +1. **Missing Information** (`missing_info`): Required details not provided + - Example: User says "create a web scraper" but doesn't specify the target website + - Example: "Deploy the app" without specifying environment + - **REQUIRED ACTION**: Call ask_clarification to get the missing information + +2. **Ambiguous Requirements** (`ambiguous_requirement`): Multiple valid interpretations exist + - Example: "Optimize the code" could mean performance, readability, or memory usage + - Example: "Make it better" is unclear what aspect to improve + - **REQUIRED ACTION**: Call ask_clarification to clarify the exact requirement + +3. **Approach Choices** (`approach_choice`): Several valid approaches exist + - Example: "Add authentication" could use JWT, OAuth, session-based, or API keys + - Example: "Store data" could use database, files, cache, etc. + - **REQUIRED ACTION**: Call ask_clarification to let user choose the approach + +4. **Risky Operations** (`risk_confirmation`): Destructive actions need confirmation + - Example: Deleting files, modifying production configs, database operations + - Example: Overwriting existing code or data + - **REQUIRED ACTION**: Call ask_clarification to get explicit confirmation + +5. **Suggestions** (`suggestion`): You have a recommendation but want approval + - Example: "I recommend refactoring this code. Should I proceed?" + - **REQUIRED ACTION**: Call ask_clarification to get approval + +**STRICT ENFORCEMENT:** +- ❌ DO NOT start working and then ask for clarification mid-execution - clarify FIRST +- ❌ DO NOT skip clarification for "efficiency" - accuracy matters more than speed +- ❌ DO NOT make assumptions when information is missing - ALWAYS ask +- ❌ DO NOT proceed with guesses - STOP and call ask_clarification first +- ✅ Analyze the request in thinking → Identify unclear aspects → Ask BEFORE any action +- ✅ If you identify the need for clarification in your thinking, you MUST call the tool IMMEDIATELY +- ✅ After calling ask_clarification, execution will be interrupted automatically +- ✅ Wait for user response - do NOT continue with assumptions + +**How to Use:** +```python +ask_clarification( + question="Your specific question here?", + clarification_type="missing_info", # or other type + context="Why you need this information", # optional but recommended + options=["option1", "option2"] # optional, for choices +) +``` + +**Example:** +User: "Deploy the application" +You (thinking): Missing environment info - I MUST ask for clarification +You (action): ask_clarification( + question="Which environment should I deploy to?", + clarification_type="approach_choice", + context="I need to know the target environment for proper configuration", + options=["development", "staging", "production"] +) +[Execution stops - wait for user response] + +User: "staging" +You: "Deploying to staging..." [proceed] + + You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources. @@ -48,6 +119,7 @@ All temporary work happens in `/mnt/user-data/workspace`. Final deliverables mus +- **Clarification First**: ALWAYS clarify unclear/missing/ambiguous requirements BEFORE starting work - never assume or guess - Skill First: Always load the relevant skill before starting **complex** tasks. - Progressive Loading: Load resources incrementally as referenced in skills - Output Files: Final deliverables must be in `/mnt/user-data/outputs` diff --git a/backend/src/agents/middlewares/clarification_middleware.py b/backend/src/agents/middlewares/clarification_middleware.py new file mode 100644 index 0000000..2202bb0 --- /dev/null +++ b/backend/src/agents/middlewares/clarification_middleware.py @@ -0,0 +1,177 @@ +"""Middleware for intercepting clarification requests and presenting them to the user.""" + +from collections.abc import Callable +from typing import override + +from langchain.agents import AgentState +from langchain.agents.middleware import AgentMiddleware +from langchain_core.messages import AIMessage, ToolMessage +from langgraph.graph import END +from langgraph.prebuilt.tool_node import ToolCallRequest +from langgraph.types import Command + + +class ClarificationMiddlewareState(AgentState): + """Compatible with the `ThreadState` schema.""" + + pass + + +class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]): + """Intercepts clarification tool calls and interrupts execution to present questions to the user. + + When the model calls the `ask_clarification` tool, this middleware: + 1. Intercepts the tool call before execution + 2. Extracts the clarification question and metadata + 3. Formats a user-friendly message + 4. Returns a Command that interrupts execution and presents the question + 5. Waits for user response before continuing + + This replaces the tool-based approach where clarification continued the conversation flow. + """ + + state_schema = ClarificationMiddlewareState + + def _is_chinese(self, text: str) -> bool: + """Check if text contains Chinese characters. + + Args: + text: Text to check + + Returns: + True if text contains Chinese characters + """ + return any('\u4e00' <= char <= '\u9fff' for char in text) + + def _format_clarification_message(self, args: dict) -> str: + """Format the clarification arguments into a user-friendly message. + + Args: + args: The tool call arguments containing clarification details + + Returns: + Formatted message string + """ + question = args.get("question", "") + clarification_type = args.get("clarification_type", "missing_info") + context = args.get("context") + options = args.get("options", []) + + # Type-specific icons + type_icons = { + "missing_info": "❓", + "ambiguous_requirement": "🤔", + "approach_choice": "🔀", + "risk_confirmation": "⚠️", + "suggestion": "💡", + } + + icon = type_icons.get(clarification_type, "❓") + + # Build the message naturally + message_parts = [] + + # Add icon and question together for a more natural flow + if context: + # If there's context, present it first as background + message_parts.append(f"{icon} {context}") + message_parts.append(f"\n{question}") + else: + # Just the question with icon + message_parts.append(f"{icon} {question}") + + # Add options in a cleaner format + if options and len(options) > 0: + message_parts.append("") # blank line for spacing + for i, option in enumerate(options, 1): + message_parts.append(f" {i}. {option}") + + return "\n".join(message_parts) + + def _handle_clarification(self, request: ToolCallRequest) -> Command: + """Handle clarification request and return command to interrupt execution. + + Args: + request: Tool call request + + Returns: + Command that interrupts execution with the formatted clarification message + """ + # Extract clarification arguments + args = request.tool_call.get("args", {}) + question = args.get("question", "") + + print("[ClarificationMiddleware] Intercepted clarification request") + print(f"[ClarificationMiddleware] Question: {question}") + + # Format the clarification message + formatted_message = self._format_clarification_message(args) + + # Get the tool call ID + tool_call_id = request.tool_call.get("id", "") + + # Create a ToolMessage with the formatted question + # This will be added to the message history + tool_message = ToolMessage( + content=formatted_message, + tool_call_id=tool_call_id, + name="ask_clarification", + ) + + ai_response_message = AIMessage(content=formatted_message) + + # Return a Command that: + # 1. Adds the formatted tool message (keeping the AI message intact) + # 2. Interrupts execution by going to __end__ + # Note: We don't modify the AI message to preserve all fields (reasoning_content, tool_calls, etc.) + # This is especially important for thinking mode where reasoning_content is required + + # Return Command to add the tool message and interrupt + return Command( + update={"messages": [tool_message, ai_response_message]}, + goto=END, + ) + + @override + def wrap_tool_call( + self, + request: ToolCallRequest, + handler: Callable[[ToolCallRequest], ToolMessage | Command], + ) -> ToolMessage | Command: + """Intercept ask_clarification tool calls and interrupt execution (sync version). + + Args: + request: Tool call request + handler: Original tool execution handler + + Returns: + Command that interrupts execution with the formatted clarification message + """ + # Check if this is an ask_clarification tool call + if request.tool_call.get("name") != "ask_clarification": + # Not a clarification call, execute normally + return handler(request) + + return self._handle_clarification(request) + + @override + async def awrap_tool_call( + self, + request: ToolCallRequest, + handler: Callable[[ToolCallRequest], ToolMessage | Command], + ) -> ToolMessage | Command: + """Intercept ask_clarification tool calls and interrupt execution (async version). + + Args: + request: Tool call request + handler: Original tool execution handler (async) + + Returns: + Command that interrupts execution with the formatted clarification message + """ + # Check if this is an ask_clarification tool call + if request.tool_call.get("name") != "ask_clarification": + # Not a clarification call, execute normally + return await handler(request) + + return self._handle_clarification(request) diff --git a/backend/src/gateway/app.py b/backend/src/gateway/app.py index 41d0aa3..796df12 100644 --- a/backend/src/gateway/app.py +++ b/backend/src/gateway/app.py @@ -27,7 +27,6 @@ def create_app() -> FastAPI: Returns: Configured FastAPI application instance. """ - config = get_gateway_config() app = FastAPI( title="DeerFlow API Gateway", diff --git a/backend/src/sandbox/local/local_sandbox.py b/backend/src/sandbox/local/local_sandbox.py index 27376ae..4bb44ba 100644 --- a/backend/src/sandbox/local/local_sandbox.py +++ b/backend/src/sandbox/local/local_sandbox.py @@ -42,9 +42,102 @@ class LocalSandbox(Sandbox): # No mapping found, return original path return path_str + def _reverse_resolve_path(self, path: str) -> str: + """ + Reverse resolve local path back to container path using mappings. + + Args: + path: Local path that might need to be mapped to container path + + Returns: + Container path if mapping exists, otherwise original path + """ + path_str = str(Path(path).resolve()) + + # Try each mapping (longest local path first for more specific matches) + for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True): + local_path_resolved = str(Path(local_path).resolve()) + if path_str.startswith(local_path_resolved): + # Replace the local path prefix with container path + relative = path_str[len(local_path_resolved) :].lstrip("/") + resolved = f"{container_path}/{relative}" if relative else container_path + return resolved + + # No mapping found, return original path + return path_str + + def _reverse_resolve_paths_in_output(self, output: str) -> str: + """ + Reverse resolve local paths back to container paths in output string. + + Args: + output: Output string that may contain local paths + + Returns: + Output with local paths resolved to container paths + """ + import re + + # Sort mappings by local path length (longest first) for correct prefix matching + sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True) + + if not sorted_mappings: + return output + + # Create pattern that matches absolute paths + # Match paths like /Users/... or other absolute paths + result = output + for container_path, local_path in sorted_mappings: + local_path_resolved = str(Path(local_path).resolve()) + # Escape the local path for use in regex + escaped_local = re.escape(local_path_resolved) + # Match the local path followed by optional path components + pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?") + + def replace_match(match: re.Match) -> str: + matched_path = match.group(0) + return self._reverse_resolve_path(matched_path) + + result = pattern.sub(replace_match, result) + + return result + + def _resolve_paths_in_command(self, command: str) -> str: + """ + Resolve container paths to local paths in a command string. + + Args: + command: Command string that may contain container paths + + Returns: + Command with container paths resolved to local paths + """ + import re + + # Sort mappings by length (longest first) for correct prefix matching + sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True) + + # Build regex pattern to match all container paths + # Match container path followed by optional path components + if not sorted_mappings: + return command + + # Create pattern that matches any of the container paths + patterns = [re.escape(container_path) + r"(?:/[^\s\"';&|<>()]*)??" for container_path, _ in sorted_mappings] + pattern = re.compile("|".join(f"({p})" for p in patterns)) + + def replace_match(match: re.Match) -> str: + matched_path = match.group(0) + return self._resolve_path(matched_path) + + return pattern.sub(replace_match, command) + def execute_command(self, command: str) -> str: + # Resolve container paths in command before execution + resolved_command = self._resolve_paths_in_command(command) + result = subprocess.run( - command, + resolved_command, executable="/bin/zsh", shell=True, capture_output=True, @@ -56,11 +149,16 @@ class LocalSandbox(Sandbox): output += f"\nStd Error:\n{result.stderr}" if output else result.stderr if result.returncode != 0: output += f"\nExit Code: {result.returncode}" - return output if output else "(no output)" + + final_output = output if output else "(no output)" + # Reverse resolve local paths back to container paths in output + return self._reverse_resolve_paths_in_output(final_output) def list_dir(self, path: str, max_depth=2) -> list[str]: resolved_path = self._resolve_path(path) - return list_dir(resolved_path, max_depth) + entries = list_dir(resolved_path, max_depth) + # Reverse resolve local paths back to container paths in output + return [self._reverse_resolve_paths_in_output(entry) for entry in entries] def read_file(self, path: str) -> str: resolved_path = self._resolve_path(path) diff --git a/backend/src/tools/builtins/__init__.py b/backend/src/tools/builtins/__init__.py index 5c87b04..7d3f5ab 100644 --- a/backend/src/tools/builtins/__init__.py +++ b/backend/src/tools/builtins/__init__.py @@ -1,3 +1,4 @@ +from .clarification_tool import ask_clarification_tool from .present_file_tool import present_file_tool -__all__ = ["present_file_tool"] +__all__ = ["present_file_tool", "ask_clarification_tool"] diff --git a/backend/src/tools/builtins/clarification_tool.py b/backend/src/tools/builtins/clarification_tool.py new file mode 100644 index 0000000..49c3db1 --- /dev/null +++ b/backend/src/tools/builtins/clarification_tool.py @@ -0,0 +1,55 @@ +from typing import Literal + +from langchain.tools import tool + + +@tool("ask_clarification", parse_docstring=True, return_direct=True) +def ask_clarification_tool( + question: str, + clarification_type: Literal[ + "missing_info", + "ambiguous_requirement", + "approach_choice", + "risk_confirmation", + "suggestion", + ], + context: str | None = None, + options: list[str] | None = None, +) -> str: + """Ask the user for clarification when you need more information to proceed. + + Use this tool when you encounter situations where you cannot proceed without user input: + + - **Missing information**: Required details not provided (e.g., file paths, URLs, specific requirements) + - **Ambiguous requirements**: Multiple valid interpretations exist + - **Approach choices**: Several valid approaches exist and you need user preference + - **Risky operations**: Destructive actions that need explicit confirmation (e.g., deleting files, modifying production) + - **Suggestions**: You have a recommendation but want user approval before proceeding + + The execution will be interrupted and the question will be presented to the user. + Wait for the user's response before continuing. + + When to use ask_clarification: + - You need information that wasn't provided in the user's request + - The requirement can be interpreted in multiple ways + - Multiple valid implementation approaches exist + - You're about to perform a potentially dangerous operation + - You have a recommendation but need user approval + + Best practices: + - Ask ONE clarification at a time for clarity + - Be specific and clear in your question + - Don't make assumptions when clarification is needed + - For risky operations, ALWAYS ask for confirmation + - After calling this tool, execution will be interrupted automatically + + Args: + question: The clarification question to ask the user. Be specific and clear. + clarification_type: The type of clarification needed (missing_info, ambiguous_requirement, approach_choice, risk_confirmation, suggestion). + context: Optional context explaining why clarification is needed. Helps the user understand the situation. + options: Optional list of choices (for approach_choice or suggestion types). Present clear options for the user to choose from. + """ + # This is a placeholder implementation + # The actual logic is handled by ClarificationMiddleware which intercepts this tool call + # and interrupts execution to present the question to the user + return "Clarification request processed by middleware" diff --git a/backend/src/tools/tools.py b/backend/src/tools/tools.py index a89a4b1..a136518 100644 --- a/backend/src/tools/tools.py +++ b/backend/src/tools/tools.py @@ -2,10 +2,11 @@ from langchain.tools import BaseTool from src.config import get_app_config from src.reflection import resolve_variable -from src.tools.builtins import present_file_tool +from src.tools.builtins import ask_clarification_tool, present_file_tool BUILTIN_TOOLS = [ present_file_tool, + ask_clarification_tool, ]