From 2001a7c223886eef6177a6c5177ecef7846289dc Mon Sep 17 00:00:00 2001
From: jimmyuconn1982 <jimmy.cdradio@gmail.com>
Date: Fri, 24 Oct 2025 16:43:39 +0800
Subject: [PATCH] Fix: clarification bugs - max rounds, locale passing, and
 over-clarification (#647)

Fixes: Max rounds bug, locale passing bug, over-clarification issue

* reslove Copilot spelling comments

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
---
 src/graph/nodes.py              | 73 +++++++++++++++++++--------------
 src/graph/types.py              |  5 ++-
 src/prompts/coordinator.md      | 20 +++++++--
 src/tools/search.py             |  4 +-
 tests/integration/test_nodes.py | 53 +++++++++++++++++++++++-
 tests/unit/tools/test_search.py |  4 +-
 6 files changed, 119 insertions(+), 40 deletions(-)

diff --git a/src/graph/nodes.py b/src/graph/nodes.py
index f436dc1..990864b 100644
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -201,17 +201,20 @@ def planner_node(
     configurable = Configuration.from_runnable_config(config)
     plan_iterations = state["plan_iterations"] if state.get("plan_iterations", 0) else 0
 
-    # For clarification feature: only send the final clarified question to planner
-    if state.get("enable_clarification", False) and state.get("clarified_question"):
-        # Create a clean state with only the clarified question
-        clean_state = {
-            "messages": [{"role": "user", "content": state["clarified_question"]}],
-            "locale": state.get("locale", "en-US"),
-            "research_topic": state["clarified_question"],
-        }
-        messages = apply_prompt_template("planner", clean_state, configurable, state.get("locale", "en-US"))
+    # For clarification feature: use the clarified research topic (complete history)
+    if state.get("enable_clarification", False) and state.get(
+        "clarified_research_topic"
+    ):
+        # Modify state to use clarified research topic instead of full conversation
+        modified_state = state.copy()
+        modified_state["messages"] = [
+            {"role": "user", "content": state["clarified_research_topic"]}
+        ]
+        modified_state["research_topic"] = state["clarified_research_topic"]
+        messages = apply_prompt_template("planner", modified_state, configurable, state.get("locale", "en-US"))
+
         logger.info(
-            f"Clarification mode: Using clarified question: {state['clarified_question']}"
+            f"Clarification mode: Using clarified research topic: {state['clarified_research_topic']}"
         )
     else:
         # Normal mode: use full conversation history
@@ -435,24 +438,38 @@ def coordinator_node(
                 }
             )
 
+        current_response = latest_user_content or "No response"
         logger.info(
-            "Clarification round %s/%s | topic: %s | latest user content: %s",
+            "Clarification round %s/%s | topic: %s | current user response: %s",
             clarification_rounds,
             max_clarification_rounds,
             clarified_topic or initial_topic,
-            latest_user_content or "N/A",
+            current_response,
         )
 
-        current_response = latest_user_content or "No response"
-
         clarification_context = f"""Continuing clarification (round {clarification_rounds}/{max_clarification_rounds}):
             User's latest response: {current_response}
             Ask for remaining missing dimensions. Do NOT repeat questions or start new topics."""
 
         messages.append({"role": "system", "content": clarification_context})
 
-        # Bind both clarification tools
+        # Bind both clarification tools - let LLM choose the appropriate one
         tools = [handoff_to_planner, handoff_after_clarification]
+
+        # Check if we've already reached max rounds
+        if clarification_rounds >= max_clarification_rounds:
+            # Max rounds reached - force handoff by adding system instruction
+            logger.warning(
+                f"Max clarification rounds ({max_clarification_rounds}) reached. Forcing handoff to planner. Using prepared clarified topic: {clarified_topic}"
+            )
+            # Add system instruction to force handoff - let LLM choose the right tool
+            messages.append(
+                {
+                    "role": "system",
+                    "content": f"MAX ROUNDS REACHED. You MUST call handoff_after_clarification (not handoff_to_planner) with the appropriate locale based on the user's language and research_topic='{clarified_topic}'. Do not ask any more questions.",
+                }
+            )
+
         response = (
             get_llm_by_type(AGENT_LLM_MAP["coordinator"])
             .bind_tools(tools)
@@ -474,7 +491,15 @@ def coordinator_node(
         # --- Process LLM response ---
         # No tool calls - LLM is asking a clarifying question
         if not response.tool_calls and response.content:
-            if clarification_rounds < max_clarification_rounds:
+            # Check if we've reached max rounds - if so, force handoff to planner
+            if clarification_rounds >= max_clarification_rounds:
+                logger.warning(
+                    f"Max clarification rounds ({max_clarification_rounds}) reached. "
+                    "LLM didn't call handoff tool, forcing handoff to planner."
+                )
+                goto = "planner"
+                # Continue to final section instead of early return
+            else:
                 # Continue clarification process
                 clarification_rounds += 1
                 # Do NOT add LLM response to clarification_history - only user responses
@@ -499,20 +524,11 @@ def coordinator_node(
                         "clarification_history": clarification_history,
                         "clarified_research_topic": clarified_topic,
                         "is_clarification_complete": False,
-                        "clarified_question": "",
                         "goto": goto,
                         "__interrupt__": [("coordinator", response.content)],
                     },
                     goto=goto,
                 )
-            else:
-                # Max rounds reached - no more questions allowed
-                logger.warning(
-                    f"Max clarification rounds ({max_clarification_rounds}) reached. Handing off to planner. Using prepared clarified topic: {clarified_topic}"
-                )
-                goto = "planner"
-                if state.get("enable_background_investigation"):
-                    goto = "background_investigator"
         else:
             # LLM called a tool (handoff) or has no content - clarification complete
             if response.tool_calls:
@@ -583,11 +599,7 @@ def coordinator_node(
 
     clarified_research_topic_value = clarified_topic or research_topic
 
-    if enable_clarification:
-        handoff_topic = clarified_topic or research_topic
-    else:
-        handoff_topic = research_topic
-
+    # clarified_research_topic: Complete clarified topic with all clarification rounds
     return Command(
         update={
             "messages": messages,
@@ -598,7 +610,6 @@ def coordinator_node(
             "clarification_rounds": clarification_rounds,
             "clarification_history": clarification_history,
             "is_clarification_complete": goto != "coordinator",
-            "clarified_question": handoff_topic if goto != "coordinator" else "",
             "goto": goto,
         },
         goto=goto,
diff --git a/src/graph/types.py b/src/graph/types.py
index 8bac1c0..a977de7 100644
--- a/src/graph/types.py
+++ b/src/graph/types.py
@@ -16,7 +16,9 @@ class State(MessagesState):
     # Runtime Variables
     locale: str = "en-US"
     research_topic: str = ""
-    clarified_research_topic: str = ""
+    clarified_research_topic: str = (
+        ""  # Complete/final clarified topic with all clarification rounds
+    )
     observations: list[str] = []
     resources: list[Resource] = []
     plan_iterations: int = 0
@@ -33,7 +35,6 @@ class State(MessagesState):
     clarification_rounds: int = 0
     clarification_history: list[str] = field(default_factory=list)
     is_clarification_complete: bool = False
-    clarified_question: str = ""
     max_clarification_rounds: int = (
         3  # Default: 3 rounds (only used when enable_clarification=True)
     )
diff --git a/src/prompts/coordinator.md b/src/prompts/coordinator.md
index 617fdc3..49ece4a 100644
--- a/src/prompts/coordinator.md
+++ b/src/prompts/coordinator.md
@@ -64,18 +64,32 @@ Your primary responsibilities are:
 
 Goal: Get 2+ dimensions before handing off to planner.
 
-## Three Key Dimensions
+## Smart Clarification Rules
 
-A specific research question needs at least 2 of these 3 dimensions:
+**DO NOT clarify if the topic already contains:**
+- Complete research plan/title (e.g., "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model")
+- Specific technology + application + goal (e.g., "Using deep learning to optimize recommendation algorithms")
+- Clear research scope (e.g., "Blockchain applications in financial services research")
+
+**ONLY clarify if the topic is genuinely vague:**
+- Too broad: "AI", "cloud computing", "market analysis"
+- Missing key elements: "research technology" (what technology?), "analyze market" (which market?)
+- Ambiguous: "development trends" (trends of what?)
+
+## Three Key Dimensions (Only for vague topics)
+
+A vague research question needs at least 2 of these 3 dimensions:
 
 1. Specific Tech/App: "Kubernetes", "GPT model" vs "cloud computing", "AI"
-2. Clear Focus: "architecture design", "performance optimization" vs "technology aspect"
+2. Clear Focus: "architecture design", "performance optimization" vs "technology aspect"  
 3. Scope: "2024 China e-commerce", "financial sector"
 
 ## When to Continue vs. Handoff
 
 - 0-1 dimensions: Ask for missing ones with 3-5 concrete examples
 - 2+ dimensions: Call handoff_to_planner() or handoff_after_clarification()
+
+**If the topic is already specific enough, hand off directly to planner.**
 - Max rounds reached: Must call handoff_after_clarification() regardless
 
 ## Response Guidelines
diff --git a/src/tools/search.py b/src/tools/search.py
index 11421d0..747519a 100644
--- a/src/tools/search.py
+++ b/src/tools/search.py
@@ -54,8 +54,8 @@ def get_web_search_tool(max_search_results: int):
         search_depth: str = search_config.get("search_depth", "advanced")
         include_raw_content: bool = search_config.get("include_raw_content", True)
         include_images: bool = search_config.get("include_images", True)
-        include_image_descriptions: bool = (
-            include_images and search_config.get("include_image_descriptions", True)
+        include_image_descriptions: bool = include_images and search_config.get(
+            "include_image_descriptions", True
         )
 
         logger.info(
diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py
index 7496019..f7de25b 100644
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -1644,7 +1644,6 @@ def test_clarification_handoff_combines_history():
     )
     assert update["research_topic"] == "Research artificial intelligence"
     assert update["clarified_research_topic"] == expected_topic
-    assert update["clarified_question"] == expected_topic
 
 
 def test_clarification_history_reconstructed_from_messages():
@@ -1863,3 +1862,55 @@ def test_clarification_no_history_defaults_to_topic():
     assert hasattr(result, "update")
     assert result.update["research_topic"] == "What is quantum computing?"
     assert result.update["clarified_research_topic"] == "What is quantum computing?"
+
+
+def test_clarification_skips_specific_topics():
+    """Coordinator should skip clarification for already specific topics."""
+    from langchain_core.messages import AIMessage
+    from langchain_core.runnables import RunnableConfig
+
+    test_state = {
+        "messages": [
+            {
+                "role": "user",
+                "content": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+            }
+        ],
+        "enable_clarification": True,
+        "clarification_rounds": 0,
+        "clarification_history": [],
+        "max_clarification_rounds": 3,
+        "research_topic": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+        "locale": "en-US",
+    }
+
+    config = RunnableConfig(configurable={"thread_id": "specific-topic-test"})
+
+    mock_response = AIMessage(
+        content="I understand you want to research AI e-commerce video synthesis technology. Let me hand this off to the planner.",
+        tool_calls=[
+            {
+                "name": "handoff_to_planner",
+                "args": {
+                    "locale": "en-US",
+                    "research_topic": "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model",
+                },
+                "id": "tool-call-handoff",
+                "type": "tool_call",
+            }
+        ],
+    )
+
+    with patch("src.graph.nodes.get_llm_by_type") as mock_get_llm:
+        mock_llm = MagicMock()
+        mock_llm.bind_tools.return_value.invoke.return_value = mock_response
+        mock_get_llm.return_value = mock_llm
+
+        result = coordinator_node(test_state, config)
+
+    assert hasattr(result, "update")
+    assert result.goto == "planner"
+    assert (
+        result.update["research_topic"]
+        == "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model"
+    )
diff --git a/tests/unit/tools/test_search.py b/tests/unit/tools/test_search.py
index a719bda..c13d455 100644
--- a/tests/unit/tools/test_search.py
+++ b/tests/unit/tools/test_search.py
@@ -267,7 +267,9 @@ class TestGetWebSearchTool:
         tool = get_web_search_tool(max_search_results=5)
         assert tool.include_answer is True
         assert tool.include_images is False
-        assert tool.include_image_descriptions is False  # should be False since include_images is False
+        assert (
+            tool.include_image_descriptions is False
+        )  # should be False since include_images is False
         assert tool.search_depth == "advanced"  # default
         assert tool.include_raw_content is True  # default
         assert tool.include_domains == []  # default