mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-28 08:14:48 +08:00
* fix: ensure web search is performed for research plans to fix #535 When using certain models (DeepSeek-V3, Qwen3, or local deployments), the agent framework failed to trigger web search tools, resulting in hallucinated data. This fix implements multiple safeguards: 1. Add enforce_web_search configuration flag: - New config option to mandate web search in research plans - Defaults to False for backward compatibility 2. Add plan validation function validate_and_fix_plan(): - Validates that plans include at least one research step with web search - Enforces web search requirement when enabled - Adds default research step if plan has no steps 3. Enhance coordinator_node fallback logic: - When model fails to call tools, fallback to planner instead of __end__ - Ensures workflow continues even when tool calling fails - Logs detailed diagnostic info for debugging 4. Update prompts for stricter requirements: - planner.md: Add MANDATORY web search requirement and clear warnings - coordinator.md: Add CRITICAL tool calling requirement - Emphasize consequences of missing web search (hallucinated data) 5. Update tests to reflect new behavior: - test_coordinator_node_no_tool_calls: Expect planner instead of __end__ - test_coordinator_empty_llm_response_corner_case: Same expectation Fixes #535 by ensuring: - Web search is always performed for research tasks - Workflow doesn't terminate on tool calling failures - Models with poor tool calling support can still proceed - No hallucinated data without real information gathering * Update src/graph/nodes.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/graph/nodes.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * accept the review suggestion of getting configuration --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -51,6 +51,7 @@ class Configuration:
|
|||||||
mcp_settings: dict = None # MCP settings, including dynamic loaded tools
|
mcp_settings: dict = None # MCP settings, including dynamic loaded tools
|
||||||
report_style: str = ReportStyle.ACADEMIC.value # Report style
|
report_style: str = ReportStyle.ACADEMIC.value # Report style
|
||||||
enable_deep_thinking: bool = False # Whether to enable deep thinking
|
enable_deep_thinking: bool = False # Whether to enable deep thinking
|
||||||
|
enforce_web_search: bool = False # Enforce at least one web search step in every plan
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_runnable_config(
|
def from_runnable_config(
|
||||||
|
|||||||
@@ -75,6 +75,54 @@ def needs_clarification(state: dict) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
|
||||||
|
"""
|
||||||
|
Validate and fix a plan to ensure it meets requirements.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
plan: The plan dict to validate
|
||||||
|
enforce_web_search: If True, ensure at least one step has need_search=true
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The validated/fixed plan dict
|
||||||
|
"""
|
||||||
|
if not isinstance(plan, dict):
|
||||||
|
return plan
|
||||||
|
|
||||||
|
steps = plan.get("steps", [])
|
||||||
|
|
||||||
|
if enforce_web_search:
|
||||||
|
# Check if any step has need_search=true
|
||||||
|
has_search_step = any(step.get("need_search", False) for step in steps)
|
||||||
|
|
||||||
|
if not has_search_step and steps:
|
||||||
|
# Ensure first research step has web search enabled
|
||||||
|
for idx, step in enumerate(steps):
|
||||||
|
if step.get("step_type") == "research":
|
||||||
|
step["need_search"] = True
|
||||||
|
logger.info(f"Enforced web search on research step at index {idx}")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Fallback: If no research step exists, convert the first step to a research step with web search enabled.
|
||||||
|
# This ensures that at least one step will perform a web search as required.
|
||||||
|
steps[0]["step_type"] = "research"
|
||||||
|
steps[0]["need_search"] = True
|
||||||
|
logger.info("Converted first step to research with web search enforcement")
|
||||||
|
elif not has_search_step and not steps:
|
||||||
|
# Add a default research step if no steps exist
|
||||||
|
logger.warning("Plan has no steps. Adding default research step.")
|
||||||
|
plan["steps"] = [
|
||||||
|
{
|
||||||
|
"need_search": True,
|
||||||
|
"title": "Initial Research",
|
||||||
|
"description": "Gather information about the topic",
|
||||||
|
"step_type": "research",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
return plan
|
||||||
|
|
||||||
|
|
||||||
def background_investigation_node(state: State, config: RunnableConfig):
|
def background_investigation_node(state: State, config: RunnableConfig):
|
||||||
logger.info("background investigation node is running.")
|
logger.info("background investigation node is running.")
|
||||||
configurable = Configuration.from_runnable_config(config)
|
configurable = Configuration.from_runnable_config(config)
|
||||||
@@ -182,6 +230,11 @@ def planner_node(
|
|||||||
return Command(goto="reporter")
|
return Command(goto="reporter")
|
||||||
else:
|
else:
|
||||||
return Command(goto="__end__")
|
return Command(goto="__end__")
|
||||||
|
|
||||||
|
# Validate and fix plan to ensure web search requirements are met
|
||||||
|
if isinstance(curr_plan, dict):
|
||||||
|
curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search)
|
||||||
|
|
||||||
if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"):
|
if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"):
|
||||||
logger.info("Planner response has enough context.")
|
logger.info("Planner response has enough context.")
|
||||||
new_plan = Plan.model_validate(curr_plan)
|
new_plan = Plan.model_validate(curr_plan)
|
||||||
@@ -234,6 +287,9 @@ def human_feedback_node(
|
|||||||
plan_iterations += 1
|
plan_iterations += 1
|
||||||
# parse the plan
|
# parse the plan
|
||||||
new_plan = json.loads(current_plan)
|
new_plan = json.loads(current_plan)
|
||||||
|
# Validate and fix plan to ensure web search requirements are met
|
||||||
|
configurable = Configuration.from_runnable_config(config)
|
||||||
|
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning("Planner response is not a valid JSON")
|
logger.warning("Planner response is not a valid JSON")
|
||||||
if plan_iterations > 1: # the plan_iterations is increased before this check
|
if plan_iterations > 1: # the plan_iterations is increased before this check
|
||||||
@@ -508,9 +564,16 @@ def coordinator_node(
|
|||||||
logger.error(f"Error processing tool calls: {e}")
|
logger.error(f"Error processing tool calls: {e}")
|
||||||
goto = "planner"
|
goto = "planner"
|
||||||
else:
|
else:
|
||||||
# No tool calls - both modes should goto __end__
|
# No tool calls detected - fallback to planner instead of ending
|
||||||
logger.warning("LLM didn't call any tools. Staying at __end__.")
|
logger.warning(
|
||||||
goto = "__end__"
|
"LLM didn't call any tools. This may indicate tool calling issues with the model. "
|
||||||
|
"Falling back to planner to ensure research proceeds."
|
||||||
|
)
|
||||||
|
# Log full response for debugging
|
||||||
|
logger.debug(f"Coordinator response content: {response.content}")
|
||||||
|
logger.debug(f"Coordinator response object: {response}")
|
||||||
|
# Fallback to planner to ensure workflow continues
|
||||||
|
goto = "planner"
|
||||||
|
|
||||||
# Apply background_investigation routing if enabled (unified logic)
|
# Apply background_investigation routing if enabled (unified logic)
|
||||||
if goto == "planner" and state.get("enable_background_investigation"):
|
if goto == "planner" and state.get("enable_background_investigation"):
|
||||||
|
|||||||
@@ -51,6 +51,15 @@ Your primary responsibilities are:
|
|||||||
- For all other inputs (category 3 - which includes most questions):
|
- For all other inputs (category 3 - which includes most questions):
|
||||||
- call `handoff_to_planner()` tool to handoff to planner for research without ANY thoughts.
|
- call `handoff_to_planner()` tool to handoff to planner for research without ANY thoughts.
|
||||||
|
|
||||||
|
# Tool Calling Requirements
|
||||||
|
|
||||||
|
**CRITICAL**: You MUST call one of the available tools for research requests. This is mandatory:
|
||||||
|
- Do NOT respond to research questions without calling a tool
|
||||||
|
- For research questions, ALWAYS use either `handoff_to_planner()` or `handoff_after_clarification()`
|
||||||
|
- Tool calling is required to ensure the workflow proceeds correctly
|
||||||
|
- Never skip tool calling even if you think you can answer the question directly
|
||||||
|
- Responding with text alone for research requests will cause the workflow to fail
|
||||||
|
|
||||||
# Clarification Process (When Enabled)
|
# Clarification Process (When Enabled)
|
||||||
|
|
||||||
Goal: Get 2+ dimensions before handing off to planner.
|
Goal: Get 2+ dimensions before handing off to planner.
|
||||||
|
|||||||
@@ -64,6 +64,8 @@ Different types of steps have different web search requirements:
|
|||||||
- Collecting competitor analysis
|
- Collecting competitor analysis
|
||||||
- Researching current events or news
|
- Researching current events or news
|
||||||
- Finding statistical data or reports
|
- Finding statistical data or reports
|
||||||
|
- **CRITICAL**: Research plans MUST include at least one step with `need_search: true` to gather real information
|
||||||
|
- Without web search, the report will contain hallucinated/fabricated data
|
||||||
|
|
||||||
2. **Data Processing Steps** (`need_search: false`):
|
2. **Data Processing Steps** (`need_search: false`):
|
||||||
- API calls and data extraction
|
- API calls and data extraction
|
||||||
@@ -71,6 +73,15 @@ Different types of steps have different web search requirements:
|
|||||||
- Raw data collection from existing sources
|
- Raw data collection from existing sources
|
||||||
- Mathematical calculations and analysis
|
- Mathematical calculations and analysis
|
||||||
- Statistical computations and data processing
|
- Statistical computations and data processing
|
||||||
|
- **NOTE**: Processing steps alone are insufficient - you must include research steps with web search
|
||||||
|
|
||||||
|
## Web Search Requirement
|
||||||
|
|
||||||
|
**MANDATORY**: Every research plan MUST include at least one step with `need_search: true`. This is critical because:
|
||||||
|
- Without web search, models generate hallucinated data
|
||||||
|
- Research steps must gather real information from external sources
|
||||||
|
- Pure processing steps cannot generate credible information for the final report
|
||||||
|
- At least one research step must search the web for factual data
|
||||||
|
|
||||||
## Exclusions
|
## Exclusions
|
||||||
|
|
||||||
@@ -143,6 +154,7 @@ When planning information gathering, consider these key aspects and ensure COMPR
|
|||||||
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
|
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
|
||||||
- Ensure each step is substantial and covers related information categories
|
- Ensure each step is substantial and covers related information categories
|
||||||
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint
|
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint
|
||||||
|
- **MANDATORY**: Include at least ONE research step with `need_search: true` to avoid hallucinated data
|
||||||
- For each step, carefully assess if web search is needed:
|
- For each step, carefully assess if web search is needed:
|
||||||
- Research and external data gathering: Set `need_search: true`
|
- Research and external data gathering: Set `need_search: true`
|
||||||
- Internal data processing: Set `need_search: false`
|
- Internal data processing: Set `need_search: false`
|
||||||
@@ -150,6 +162,7 @@ When planning information gathering, consider these key aspects and ensure COMPR
|
|||||||
- Prioritize depth and volume of relevant information - limited information is not acceptable.
|
- Prioritize depth and volume of relevant information - limited information is not acceptable.
|
||||||
- Use the same language as the user to generate the plan.
|
- Use the same language as the user to generate the plan.
|
||||||
- Do not include steps for summarizing or consolidating the gathered information.
|
- Do not include steps for summarizing or consolidating the gathered information.
|
||||||
|
- **CRITICAL**: Verify that your plan includes at least one step with `need_search: true` before finalizing
|
||||||
|
|
||||||
# Output Format
|
# Output Format
|
||||||
|
|
||||||
|
|||||||
@@ -568,7 +568,7 @@ def test_coordinator_node_no_tool_calls(
|
|||||||
patch_handoff_to_planner,
|
patch_handoff_to_planner,
|
||||||
patch_logger,
|
patch_logger,
|
||||||
):
|
):
|
||||||
# No tool calls, should goto __end__
|
# No tool calls, should fallback to planner (fix for issue #535)
|
||||||
with (
|
with (
|
||||||
patch("src.graph.nodes.AGENT_LLM_MAP", {"coordinator": "basic"}),
|
patch("src.graph.nodes.AGENT_LLM_MAP", {"coordinator": "basic"}),
|
||||||
patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
|
patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
|
||||||
@@ -579,7 +579,8 @@ def test_coordinator_node_no_tool_calls(
|
|||||||
mock_get_llm.return_value = mock_llm
|
mock_get_llm.return_value = mock_llm
|
||||||
|
|
||||||
result = coordinator_node(mock_state_coordinator, MagicMock())
|
result = coordinator_node(mock_state_coordinator, MagicMock())
|
||||||
assert result.goto == "__end__"
|
# Should fallback to planner instead of __end__ to ensure workflow continues
|
||||||
|
assert result.goto == "planner"
|
||||||
assert result.update["locale"] == "en-US"
|
assert result.update["locale"] == "en-US"
|
||||||
assert result.update["resources"] == ["resource1", "resource2"]
|
assert result.update["resources"] == ["resource1", "resource2"]
|
||||||
|
|
||||||
@@ -1535,7 +1536,7 @@ def test_coordinator_empty_llm_response_corner_case(mock_get_llm):
|
|||||||
|
|
||||||
This tests error handling when LLM fails to return any content or tool calls
|
This tests error handling when LLM fails to return any content or tool calls
|
||||||
in the initial state (clarification_rounds=0). The system should gracefully
|
in the initial state (clarification_rounds=0). The system should gracefully
|
||||||
handle this by going to __end__ instead of crashing.
|
handle this by going to planner instead of crashing (fix for issue #535).
|
||||||
|
|
||||||
Note: This is NOT a typical clarification workflow test, but rather tests
|
Note: This is NOT a typical clarification workflow test, but rather tests
|
||||||
fault tolerance when LLM misbehaves.
|
fault tolerance when LLM misbehaves.
|
||||||
@@ -1563,6 +1564,6 @@ def test_coordinator_empty_llm_response_corner_case(mock_get_llm):
|
|||||||
# Call coordinator_node - should not crash
|
# Call coordinator_node - should not crash
|
||||||
result = coordinator_node(state, config)
|
result = coordinator_node(state, config)
|
||||||
|
|
||||||
# Should gracefully handle empty response by going to __end__
|
# Should gracefully handle empty response by going to planner to ensure workflow continues
|
||||||
assert result.goto == "__end__"
|
assert result.goto == "planner"
|
||||||
assert result.update["locale"] == "en-US"
|
assert result.update["locale"] == "en-US"
|
||||||
|
|||||||
Reference in New Issue
Block a user