Fix: clarification bugs - max rounds, locale passing, and over-clarification (#647)

Fixes: Max rounds bug, locale passing bug, over-clarification issue * reslove Copilot spelling comments --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
2026-04-03 06:12:14 +08:00 · 2025-10-24 16:43:39 +08:00
parent 5eada04f50
commit 2001a7c223
6 changed files with 119 additions and 40 deletions
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -201,17 +201,20 @@ def planner_node(
    configurable = Configuration.from_runnable_config(config)
    plan_iterations = state["plan_iterations"] if state.get("plan_iterations", 0) else 0

-    # For clarification feature: only send the final clarified question to planner
-    if state.get("enable_clarification", False) and state.get("clarified_question"):
-        # Create a clean state with only the clarified question
-        clean_state = {
-            "messages": [{"role": "user", "content": state["clarified_question"]}],
-            "locale": state.get("locale", "en-US"),
-            "research_topic": state["clarified_question"],
-        }
-        messages = apply_prompt_template("planner", clean_state, configurable, state.get("locale", "en-US"))
+    # For clarification feature: use the clarified research topic (complete history)
+    if state.get("enable_clarification", False) and state.get(
+        "clarified_research_topic"
+    ):
+        # Modify state to use clarified research topic instead of full conversation
+        modified_state = state.copy()
+        modified_state["messages"] = [
+            {"role": "user", "content": state["clarified_research_topic"]}
+        ]
+        modified_state["research_topic"] = state["clarified_research_topic"]
+        messages = apply_prompt_template("planner", modified_state, configurable, state.get("locale", "en-US"))
+
        logger.info(
-            f"Clarification mode: Using clarified question: {state['clarified_question']}"
+            f"Clarification mode: Using clarified research topic: {state['clarified_research_topic']}"
        )
    else:
        # Normal mode: use full conversation history
@@ -435,24 +438,38 @@ def coordinator_node(
                }
            )

+        current_response = latest_user_content or "No response"
        logger.info(
-            "Clarification round %s/%s | topic: %s | latest user content: %s",
+            "Clarification round %s/%s | topic: %s | current user response: %s",
            clarification_rounds,
            max_clarification_rounds,
            clarified_topic or initial_topic,
-            latest_user_content or "N/A",
+            current_response,
        )

-        current_response = latest_user_content or "No response"
-
        clarification_context = f"""Continuing clarification (round {clarification_rounds}/{max_clarification_rounds}):
            User's latest response: {current_response}
            Ask for remaining missing dimensions. Do NOT repeat questions or start new topics."""

        messages.append({"role": "system", "content": clarification_context})

-        # Bind both clarification tools
+        # Bind both clarification tools - let LLM choose the appropriate one
        tools = [handoff_to_planner, handoff_after_clarification]
+
+        # Check if we've already reached max rounds
+        if clarification_rounds >= max_clarification_rounds:
+            # Max rounds reached - force handoff by adding system instruction
+            logger.warning(
+                f"Max clarification rounds ({max_clarification_rounds}) reached. Forcing handoff to planner. Using prepared clarified topic: {clarified_topic}"
+            )
+            # Add system instruction to force handoff - let LLM choose the right tool
+            messages.append(
+                {
+                    "role": "system",
+                    "content": f"MAX ROUNDS REACHED. You MUST call handoff_after_clarification (not handoff_to_planner) with the appropriate locale based on the user's language and research_topic='{clarified_topic}'. Do not ask any more questions.",
+                }
+            )
+
        response = (
            get_llm_by_type(AGENT_LLM_MAP["coordinator"])
            .bind_tools(tools)
@@ -474,7 +491,15 @@ def coordinator_node(
        # --- Process LLM response ---
        # No tool calls - LLM is asking a clarifying question
        if not response.tool_calls and response.content:
-            if clarification_rounds < max_clarification_rounds:
+            # Check if we've reached max rounds - if so, force handoff to planner
+            if clarification_rounds >= max_clarification_rounds:
+                logger.warning(
+                    f"Max clarification rounds ({max_clarification_rounds}) reached. "
+                    "LLM didn't call handoff tool, forcing handoff to planner."
+                )
+                goto = "planner"
+                # Continue to final section instead of early return
+            else:
                # Continue clarification process
                clarification_rounds += 1
                # Do NOT add LLM response to clarification_history - only user responses
@@ -499,20 +524,11 @@ def coordinator_node(
                        "clarification_history": clarification_history,
                        "clarified_research_topic": clarified_topic,
                        "is_clarification_complete": False,
-                        "clarified_question": "",
                        "goto": goto,
                        "__interrupt__": [("coordinator", response.content)],
                    },
                    goto=goto,
                )
-            else:
-                # Max rounds reached - no more questions allowed
-                logger.warning(
-                    f"Max clarification rounds ({max_clarification_rounds}) reached. Handing off to planner. Using prepared clarified topic: {clarified_topic}"
-                )
-                goto = "planner"
-                if state.get("enable_background_investigation"):
-                    goto = "background_investigator"
        else:
            # LLM called a tool (handoff) or has no content - clarification complete
            if response.tool_calls:
@@ -583,11 +599,7 @@ def coordinator_node(

    clarified_research_topic_value = clarified_topic or research_topic

-    if enable_clarification:
-        handoff_topic = clarified_topic or research_topic
-    else:
-        handoff_topic = research_topic
-
+    # clarified_research_topic: Complete clarified topic with all clarification rounds
    return Command(
        update={
            "messages": messages,
@@ -598,7 +610,6 @@ def coordinator_node(
            "clarification_rounds": clarification_rounds,
            "clarification_history": clarification_history,
            "is_clarification_complete": goto != "coordinator",
-            "clarified_question": handoff_topic if goto != "coordinator" else "",
            "goto": goto,
        },
        goto=goto,
--- a/src/graph/types.py
+++ b/src/graph/types.py
@@ -16,7 +16,9 @@ class State(MessagesState):
    # Runtime Variables
    locale: str = "en-US"
    research_topic: str = ""
-    clarified_research_topic: str = ""
+    clarified_research_topic: str = (
+        ""  # Complete/final clarified topic with all clarification rounds
+    )
    observations: list[str] = []
    resources: list[Resource] = []
    plan_iterations: int = 0
@@ -33,7 +35,6 @@ class State(MessagesState):
    clarification_rounds: int = 0
    clarification_history: list[str] = field(default_factory=list)
    is_clarification_complete: bool = False
-    clarified_question: str = ""
    max_clarification_rounds: int = (
        3  # Default: 3 rounds (only used when enable_clarification=True)
    )
--- a/src/prompts/coordinator.md
+++ b/src/prompts/coordinator.md
@@ -64,18 +64,32 @@ Your primary responsibilities are:

 Goal: Get 2+ dimensions before handing off to planner.

-## Three Key Dimensions
+## Smart Clarification Rules

-A specific research question needs at least 2 of these 3 dimensions:
+**DO NOT clarify if the topic already contains:**
+- Complete research plan/title (e.g., "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model")
+- Specific technology + application + goal (e.g., "Using deep learning to optimize recommendation algorithms")
+- Clear research scope (e.g., "Blockchain applications in financial services research")
+
+**ONLY clarify if the topic is genuinely vague:**
+- Too broad: "AI", "cloud computing", "market analysis"
+- Missing key elements: "research technology" (what technology?), "analyze market" (which market?)
+- Ambiguous: "development trends" (trends of what?)
+
+## Three Key Dimensions (Only for vague topics)
+
+A vague research question needs at least 2 of these 3 dimensions:

 1. Specific Tech/App: "Kubernetes", "GPT model" vs "cloud computing", "AI"
-2. Clear Focus: "architecture design", "performance optimization" vs "technology aspect"
+2. Clear Focus: "architecture design", "performance optimization" vs "technology aspect"  
 3. Scope: "2024 China e-commerce", "financial sector"

 ## When to Continue vs. Handoff

 - 0-1 dimensions: Ask for missing ones with 3-5 concrete examples
 - 2+ dimensions: Call handoff_to_planner() or handoff_after_clarification()
+
+**If the topic is already specific enough, hand off directly to planner.**
 - Max rounds reached: Must call handoff_after_clarification() regardless

 ## Response Guidelines
--- a/src/tools/search.py
+++ b/src/tools/search.py
@@ -54,8 +54,8 @@ def get_web_search_tool(max_search_results: int):
        search_depth: str = search_config.get("search_depth", "advanced")
        include_raw_content: bool = search_config.get("include_raw_content", True)
        include_images: bool = search_config.get("include_images", True)
-        include_image_descriptions: bool = (
-            include_images and search_config.get("include_image_descriptions", True)
+        include_image_descriptions: bool = include_images and search_config.get(
+            "include_image_descriptions", True
        )

        logger.info(
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -1644,7 +1644,6 @@ def test_clarification_handoff_combines_history():
    )
    assert update["research_topic"] == "Research artificial intelligence"
    assert update["clarified_research_topic"] == expected_topic
-    assert update["clarified_question"] == expected_topic


 def test_clarification_history_reconstructed_from_messages():
@@ -1863,3 +1862,55 @@ def test_clarification_no_history_defaults_to_topic():
    assert hasattr(result, "update")
    assert result.update["research_topic"] == "What is quantum computing?"
    assert result.update["clarified_research_topic"] == "What is quantum computing?"
+
+
+def test_clarification_skips_specific_topics():
+    """Coordinator should skip clarification for already specific topics."""
+    from langchain_core.messages import AIMessage
+    from langchain_core.runnables import RunnableConfig
+
+    test_state = {
+        "messages": [
+            {
+                "role": "user",
+                "content": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+            }
+        ],
+        "enable_clarification": True,
+        "clarification_rounds": 0,
+        "clarification_history": [],
+        "max_clarification_rounds": 3,
+        "research_topic": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+        "locale": "en-US",
+    }
+
+    config = RunnableConfig(configurable={"thread_id": "specific-topic-test"})
+
+    mock_response = AIMessage(
+        content="I understand you want to research AI e-commerce video synthesis technology. Let me hand this off to the planner.",
+        tool_calls=[
+            {
+                "name": "handoff_to_planner",
+                "args": {
+                    "locale": "en-US",
+                    "research_topic": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+                },
+                "id": "tool-call-handoff",
+                "type": "tool_call",
+            }
+        ],
+    )
+
+    with patch("src.graph.nodes.get_llm_by_type") as mock_get_llm:
+        mock_llm = MagicMock()
+        mock_llm.bind_tools.return_value.invoke.return_value = mock_response
+        mock_get_llm.return_value = mock_llm
+
+        result = coordinator_node(test_state, config)
+
+    assert hasattr(result, "update")
+    assert result.goto == "planner"
+    assert (
+        result.update["research_topic"]
+        == "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model"
+    )
--- a/tests/unit/tools/test_search.py
+++ b/tests/unit/tools/test_search.py
@@ -267,7 +267,9 @@ class TestGetWebSearchTool:
        tool = get_web_search_tool(max_search_results=5)
        assert tool.include_answer is True
        assert tool.include_images is False
-        assert tool.include_image_descriptions is False  # should be False since include_images is False
+        assert (
+            tool.include_image_descriptions is False
+        )  # should be False since include_images is False
        assert tool.search_depth == "advanced"  # default
        assert tool.include_raw_content is True  # default
        assert tool.include_domains == []  # default