diff --git a/src/config/configuration.py b/src/config/configuration.py index e7845d5..890e0b5 100644 --- a/src/config/configuration.py +++ b/src/config/configuration.py @@ -51,6 +51,7 @@ class Configuration: mcp_settings: dict = None # MCP settings, including dynamic loaded tools report_style: str = ReportStyle.ACADEMIC.value # Report style enable_deep_thinking: bool = False # Whether to enable deep thinking + enforce_web_search: bool = False # Enforce at least one web search step in every plan @classmethod def from_runnable_config( diff --git a/src/graph/nodes.py b/src/graph/nodes.py index 9665524..e6eaa56 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -75,6 +75,54 @@ def needs_clarification(state: dict) -> bool: ) +def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict: + """ + Validate and fix a plan to ensure it meets requirements. + + Args: + plan: The plan dict to validate + enforce_web_search: If True, ensure at least one step has need_search=true + + Returns: + The validated/fixed plan dict + """ + if not isinstance(plan, dict): + return plan + + steps = plan.get("steps", []) + + if enforce_web_search: + # Check if any step has need_search=true + has_search_step = any(step.get("need_search", False) for step in steps) + + if not has_search_step and steps: + # Ensure first research step has web search enabled + for idx, step in enumerate(steps): + if step.get("step_type") == "research": + step["need_search"] = True + logger.info(f"Enforced web search on research step at index {idx}") + break + else: + # Fallback: If no research step exists, convert the first step to a research step with web search enabled. + # This ensures that at least one step will perform a web search as required. + steps[0]["step_type"] = "research" + steps[0]["need_search"] = True + logger.info("Converted first step to research with web search enforcement") + elif not has_search_step and not steps: + # Add a default research step if no steps exist + logger.warning("Plan has no steps. Adding default research step.") + plan["steps"] = [ + { + "need_search": True, + "title": "Initial Research", + "description": "Gather information about the topic", + "step_type": "research", + } + ] + + return plan + + def background_investigation_node(state: State, config: RunnableConfig): logger.info("background investigation node is running.") configurable = Configuration.from_runnable_config(config) @@ -182,6 +230,11 @@ def planner_node( return Command(goto="reporter") else: return Command(goto="__end__") + + # Validate and fix plan to ensure web search requirements are met + if isinstance(curr_plan, dict): + curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search) + if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"): logger.info("Planner response has enough context.") new_plan = Plan.model_validate(curr_plan) @@ -234,6 +287,9 @@ def human_feedback_node( plan_iterations += 1 # parse the plan new_plan = json.loads(current_plan) + # Validate and fix plan to ensure web search requirements are met + configurable = Configuration.from_runnable_config(config) + new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search) except json.JSONDecodeError: logger.warning("Planner response is not a valid JSON") if plan_iterations > 1: # the plan_iterations is increased before this check @@ -508,9 +564,16 @@ def coordinator_node( logger.error(f"Error processing tool calls: {e}") goto = "planner" else: - # No tool calls - both modes should goto __end__ - logger.warning("LLM didn't call any tools. Staying at __end__.") - goto = "__end__" + # No tool calls detected - fallback to planner instead of ending + logger.warning( + "LLM didn't call any tools. This may indicate tool calling issues with the model. " + "Falling back to planner to ensure research proceeds." + ) + # Log full response for debugging + logger.debug(f"Coordinator response content: {response.content}") + logger.debug(f"Coordinator response object: {response}") + # Fallback to planner to ensure workflow continues + goto = "planner" # Apply background_investigation routing if enabled (unified logic) if goto == "planner" and state.get("enable_background_investigation"): diff --git a/src/prompts/coordinator.md b/src/prompts/coordinator.md index a4d8881..617fdc3 100644 --- a/src/prompts/coordinator.md +++ b/src/prompts/coordinator.md @@ -51,6 +51,15 @@ Your primary responsibilities are: - For all other inputs (category 3 - which includes most questions): - call `handoff_to_planner()` tool to handoff to planner for research without ANY thoughts. +# Tool Calling Requirements + +**CRITICAL**: You MUST call one of the available tools for research requests. This is mandatory: +- Do NOT respond to research questions without calling a tool +- For research questions, ALWAYS use either `handoff_to_planner()` or `handoff_after_clarification()` +- Tool calling is required to ensure the workflow proceeds correctly +- Never skip tool calling even if you think you can answer the question directly +- Responding with text alone for research requests will cause the workflow to fail + # Clarification Process (When Enabled) Goal: Get 2+ dimensions before handing off to planner. diff --git a/src/prompts/planner.md b/src/prompts/planner.md index f4a8d64..12be23b 100644 --- a/src/prompts/planner.md +++ b/src/prompts/planner.md @@ -64,6 +64,8 @@ Different types of steps have different web search requirements: - Collecting competitor analysis - Researching current events or news - Finding statistical data or reports + - **CRITICAL**: Research plans MUST include at least one step with `need_search: true` to gather real information + - Without web search, the report will contain hallucinated/fabricated data 2. **Data Processing Steps** (`need_search: false`): - API calls and data extraction @@ -71,6 +73,15 @@ Different types of steps have different web search requirements: - Raw data collection from existing sources - Mathematical calculations and analysis - Statistical computations and data processing + - **NOTE**: Processing steps alone are insufficient - you must include research steps with web search + +## Web Search Requirement + +**MANDATORY**: Every research plan MUST include at least one step with `need_search: true`. This is critical because: +- Without web search, models generate hallucinated data +- Research steps must gather real information from external sources +- Pure processing steps cannot generate credible information for the final report +- At least one research step must search the web for factual data ## Exclusions @@ -143,6 +154,7 @@ When planning information gathering, consider these key aspects and ensure COMPR - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects - Ensure each step is substantial and covers related information categories - Prioritize breadth and depth within the {{ max_step_num }}-step constraint + - **MANDATORY**: Include at least ONE research step with `need_search: true` to avoid hallucinated data - For each step, carefully assess if web search is needed: - Research and external data gathering: Set `need_search: true` - Internal data processing: Set `need_search: false` @@ -150,6 +162,7 @@ When planning information gathering, consider these key aspects and ensure COMPR - Prioritize depth and volume of relevant information - limited information is not acceptable. - Use the same language as the user to generate the plan. - Do not include steps for summarizing or consolidating the gathered information. +- **CRITICAL**: Verify that your plan includes at least one step with `need_search: true` before finalizing # Output Format diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py index 0f6a268..56ebe2e 100644 --- a/tests/integration/test_nodes.py +++ b/tests/integration/test_nodes.py @@ -568,7 +568,7 @@ def test_coordinator_node_no_tool_calls( patch_handoff_to_planner, patch_logger, ): - # No tool calls, should goto __end__ + # No tool calls, should fallback to planner (fix for issue #535) with ( patch("src.graph.nodes.AGENT_LLM_MAP", {"coordinator": "basic"}), patch("src.graph.nodes.get_llm_by_type") as mock_get_llm, @@ -579,7 +579,8 @@ def test_coordinator_node_no_tool_calls( mock_get_llm.return_value = mock_llm result = coordinator_node(mock_state_coordinator, MagicMock()) - assert result.goto == "__end__" + # Should fallback to planner instead of __end__ to ensure workflow continues + assert result.goto == "planner" assert result.update["locale"] == "en-US" assert result.update["resources"] == ["resource1", "resource2"] @@ -1535,7 +1536,7 @@ def test_coordinator_empty_llm_response_corner_case(mock_get_llm): This tests error handling when LLM fails to return any content or tool calls in the initial state (clarification_rounds=0). The system should gracefully - handle this by going to __end__ instead of crashing. + handle this by going to planner instead of crashing (fix for issue #535). Note: This is NOT a typical clarification workflow test, but rather tests fault tolerance when LLM misbehaves. @@ -1563,6 +1564,6 @@ def test_coordinator_empty_llm_response_corner_case(mock_get_llm): # Call coordinator_node - should not crash result = coordinator_node(state, config) - # Should gracefully handle empty response by going to __end__ - assert result.goto == "__end__" + # Should gracefully handle empty response by going to planner to ensure workflow continues + assert result.goto == "planner" assert result.update["locale"] == "en-US"