mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-21 21:24:46 +08:00
* feat: add enable_web_search config to disable web search (#681) * fix: skip enforce_researcher_search validation when web search is disabled - Return json.dumps([]) instead of empty string for consistency in background_investigation_node - Add enable_web_search check to skip validation warning when user intentionally disabled web search - Add warning log when researcher has no tools available - Update tests to include new enable_web_search parameter * fix: address Copilot review feedback - Coordinate enforce_web_search with enable_web_search in validate_and_fix_plan - Fix misleading comment in background_investigation_node * docs: add warning about local RAG setup when disabling web search * docs: add web search toggle section to configuration guide
This commit is contained in:
@@ -61,6 +61,14 @@ BASIC_MODEL:
|
|||||||
# # When interrupt is triggered, user will be prompted to approve/reject
|
# # When interrupt is triggered, user will be prompted to approve/reject
|
||||||
# # Approved keywords: "approved", "approve", "yes", "proceed", "continue", "ok", "okay", "accepted", "accept"
|
# # Approved keywords: "approved", "approve", "yes", "proceed", "continue", "ok", "okay", "accepted", "accept"
|
||||||
|
|
||||||
|
# Web search toggle (Issue #681)
|
||||||
|
# Set to false to disable web search and use only local RAG knowledge base.
|
||||||
|
# This is useful for environments without internet access.
|
||||||
|
# WARNING: If you disable web search, make sure to configure local RAG resources;
|
||||||
|
# otherwise, the researcher will operate in pure LLM reasoning mode without external data.
|
||||||
|
# Note: This can be overridden per-request via the API parameter `enable_web_search`.
|
||||||
|
# ENABLE_WEB_SEARCH: true
|
||||||
|
|
||||||
# Search engine configuration
|
# Search engine configuration
|
||||||
# Supported engines: tavily, infoquest
|
# Supported engines: tavily, infoquest
|
||||||
# SEARCH_ENGINE:
|
# SEARCH_ENGINE:
|
||||||
|
|||||||
@@ -254,6 +254,39 @@ SEARCH_ENGINE:
|
|||||||
```
|
```
|
||||||
That's meaning that the search results will be filtered based on the minimum relevance score threshold and truncated to the maximum length limit for each search result content.
|
That's meaning that the search results will be filtered based on the minimum relevance score threshold and truncated to the maximum length limit for each search result content.
|
||||||
|
|
||||||
|
## Web Search Toggle
|
||||||
|
|
||||||
|
DeerFlow allows you to disable web search functionality, which is useful for environments without internet access or when you want to use only local RAG knowledge bases.
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
You can disable web search in your `conf.yaml` file:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Disable web search (use only local RAG)
|
||||||
|
ENABLE_WEB_SEARCH: false
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via API request parameter:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"messages": [{"role": "user", "content": "Research topic"}],
|
||||||
|
"enable_web_search": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> If you disable web search, make sure to configure local RAG resources; otherwise, the researcher will operate in pure LLM reasoning mode without external data sources.
|
||||||
|
|
||||||
|
### Behavior When Web Search is Disabled
|
||||||
|
|
||||||
|
- **Background investigation**: Skipped entirely (relies on web search)
|
||||||
|
- **Researcher node**: Will use only RAG retriever tools if configured
|
||||||
|
- **Pure reasoning mode**: If no RAG resources are available, the researcher will rely solely on LLM reasoning
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## RAG (Retrieval-Augmented Generation) Configuration
|
## RAG (Retrieval-Augmented Generation) Configuration
|
||||||
|
|
||||||
DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables.
|
DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables.
|
||||||
|
|||||||
@@ -57,6 +57,9 @@ class Configuration:
|
|||||||
enforce_researcher_search: bool = (
|
enforce_researcher_search: bool = (
|
||||||
True # Enforce that researcher must use web search tool at least once
|
True # Enforce that researcher must use web search tool at least once
|
||||||
)
|
)
|
||||||
|
enable_web_search: bool = (
|
||||||
|
True # Whether to enable web search, set to False to use only local RAG
|
||||||
|
)
|
||||||
interrupt_before_tools: list[str] = field(
|
interrupt_before_tools: list[str] = field(
|
||||||
default_factory=list
|
default_factory=list
|
||||||
) # List of tool names to interrupt before execution
|
) # List of tool names to interrupt before execution
|
||||||
|
|||||||
@@ -118,13 +118,14 @@ def preserve_state_meta_fields(state: State) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
|
def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False, enable_web_search: bool = True) -> dict:
|
||||||
"""
|
"""
|
||||||
Validate and fix a plan to ensure it meets requirements.
|
Validate and fix a plan to ensure it meets requirements.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
plan: The plan dict to validate
|
plan: The plan dict to validate
|
||||||
enforce_web_search: If True, ensure at least one step has need_search=true
|
enforce_web_search: If True, ensure at least one step has need_search=true
|
||||||
|
enable_web_search: If False, skip web search enforcement (takes precedence)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The validated/fixed plan dict
|
The validated/fixed plan dict
|
||||||
@@ -154,8 +155,9 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
|
|||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# SECTION 2: Enforce web search requirements
|
# SECTION 2: Enforce web search requirements
|
||||||
|
# Skip enforcement if web search is disabled (enable_web_search=False takes precedence)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
if enforce_web_search:
|
if enforce_web_search and enable_web_search:
|
||||||
# Check if any step has need_search=true (only check dict steps)
|
# Check if any step has need_search=true (only check dict steps)
|
||||||
has_search_step = any(
|
has_search_step = any(
|
||||||
step.get("need_search", False)
|
step.get("need_search", False)
|
||||||
@@ -197,6 +199,12 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
|
|||||||
def background_investigation_node(state: State, config: RunnableConfig):
|
def background_investigation_node(state: State, config: RunnableConfig):
|
||||||
logger.info("background investigation node is running.")
|
logger.info("background investigation node is running.")
|
||||||
configurable = Configuration.from_runnable_config(config)
|
configurable = Configuration.from_runnable_config(config)
|
||||||
|
|
||||||
|
# Background investigation relies on web search; skip entirely when web search is disabled
|
||||||
|
if not configurable.enable_web_search:
|
||||||
|
logger.info("Web search is disabled, skipping background investigation.")
|
||||||
|
return {"background_investigation_results": json.dumps([], ensure_ascii=False)}
|
||||||
|
|
||||||
query = state.get("clarified_research_topic") or state.get("research_topic")
|
query = state.get("clarified_research_topic") or state.get("research_topic")
|
||||||
background_investigation_results = []
|
background_investigation_results = []
|
||||||
|
|
||||||
@@ -357,7 +365,7 @@ def planner_node(
|
|||||||
|
|
||||||
# Validate and fix plan to ensure web search requirements are met
|
# Validate and fix plan to ensure web search requirements are met
|
||||||
if isinstance(curr_plan, dict):
|
if isinstance(curr_plan, dict):
|
||||||
curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search)
|
curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search, configurable.enable_web_search)
|
||||||
|
|
||||||
if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"):
|
if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"):
|
||||||
logger.info("Planner response has enough context.")
|
logger.info("Planner response has enough context.")
|
||||||
@@ -480,7 +488,7 @@ def human_feedback_node(
|
|||||||
new_plan = json.loads(repair_json_output(current_plan_content))
|
new_plan = json.loads(repair_json_output(current_plan_content))
|
||||||
# Validate and fix plan to ensure web search requirements are met
|
# Validate and fix plan to ensure web search requirements are met
|
||||||
configurable = Configuration.from_runnable_config(config)
|
configurable = Configuration.from_runnable_config(config)
|
||||||
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search)
|
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search)
|
||||||
except (json.JSONDecodeError, AttributeError) as e:
|
except (json.JSONDecodeError, AttributeError) as e:
|
||||||
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
|
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
|
||||||
if isinstance(current_plan, dict) and "content" in original_plan:
|
if isinstance(current_plan, dict) and "content" in original_plan:
|
||||||
@@ -1072,7 +1080,8 @@ async def _execute_agent_step(
|
|||||||
if should_validate:
|
if should_validate:
|
||||||
# Check if enforcement is enabled in configuration
|
# Check if enforcement is enabled in configuration
|
||||||
configurable = Configuration.from_runnable_config(config) if config else Configuration()
|
configurable = Configuration.from_runnable_config(config) if config else Configuration()
|
||||||
if configurable.enforce_researcher_search:
|
# Skip validation if web search is disabled (user intentionally disabled it)
|
||||||
|
if configurable.enforce_researcher_search and configurable.enable_web_search:
|
||||||
web_search_validated = validate_web_search_usage(result["messages"], agent_name)
|
web_search_validated = validate_web_search_usage(result["messages"], agent_name)
|
||||||
|
|
||||||
# If web search was not used, add a warning to the response
|
# If web search was not used, add a warning to the response
|
||||||
@@ -1210,15 +1219,30 @@ async def researcher_node(
|
|||||||
configurable = Configuration.from_runnable_config(config)
|
configurable = Configuration.from_runnable_config(config)
|
||||||
logger.debug(f"[researcher_node] Max search results: {configurable.max_search_results}")
|
logger.debug(f"[researcher_node] Max search results: {configurable.max_search_results}")
|
||||||
|
|
||||||
tools = [get_web_search_tool(configurable.max_search_results), crawl_tool]
|
# Build tools list based on configuration
|
||||||
|
tools = []
|
||||||
|
|
||||||
|
# Add web search and crawl tools only if web search is enabled
|
||||||
|
if configurable.enable_web_search:
|
||||||
|
tools.extend([get_web_search_tool(configurable.max_search_results), crawl_tool])
|
||||||
|
else:
|
||||||
|
logger.info("[researcher_node] Web search is disabled, using only local RAG")
|
||||||
|
|
||||||
|
# Add retriever tool if resources are available (always add, higher priority)
|
||||||
retriever_tool = get_retriever_tool(state.get("resources", []))
|
retriever_tool = get_retriever_tool(state.get("resources", []))
|
||||||
if retriever_tool:
|
if retriever_tool:
|
||||||
logger.debug(f"[researcher_node] Adding retriever tool to tools list")
|
logger.debug(f"[researcher_node] Adding retriever tool to tools list")
|
||||||
tools.insert(0, retriever_tool)
|
tools.insert(0, retriever_tool)
|
||||||
|
|
||||||
|
# Warn if no tools are available
|
||||||
|
if not tools:
|
||||||
|
logger.warning("[researcher_node] No tools available (web search disabled, no resources). "
|
||||||
|
"Researcher will operate in pure reasoning mode.")
|
||||||
|
|
||||||
logger.info(f"[researcher_node] Researcher tools count: {len(tools)}")
|
logger.info(f"[researcher_node] Researcher tools count: {len(tools)}")
|
||||||
logger.debug(f"[researcher_node] Researcher tools: {[tool.name if hasattr(tool, 'name') else str(tool) for tool in tools]}")
|
logger.debug(f"[researcher_node] Researcher tools: {[tool.name if hasattr(tool, 'name') else str(tool) for tool in tools]}")
|
||||||
logger.info(f"[researcher_node] enforce_researcher_search is set to: {configurable.enforce_researcher_search}")
|
logger.info(f"[researcher_node] enforce_researcher_search={configurable.enforce_researcher_search}, "
|
||||||
|
f"enable_web_search={configurable.enable_web_search}")
|
||||||
|
|
||||||
return await _setup_and_execute_agent_step(
|
return await _setup_and_execute_agent_step(
|
||||||
state,
|
state,
|
||||||
|
|||||||
@@ -132,6 +132,7 @@ async def chat_stream(request: ChatRequest):
|
|||||||
request.interrupt_feedback,
|
request.interrupt_feedback,
|
||||||
request.mcp_settings if mcp_enabled else {},
|
request.mcp_settings if mcp_enabled else {},
|
||||||
request.enable_background_investigation,
|
request.enable_background_investigation,
|
||||||
|
request.enable_web_search,
|
||||||
request.report_style,
|
request.report_style,
|
||||||
request.enable_deep_thinking,
|
request.enable_deep_thinking,
|
||||||
request.enable_clarification,
|
request.enable_clarification,
|
||||||
@@ -517,6 +518,7 @@ async def _astream_workflow_generator(
|
|||||||
interrupt_feedback: str,
|
interrupt_feedback: str,
|
||||||
mcp_settings: dict,
|
mcp_settings: dict,
|
||||||
enable_background_investigation: bool,
|
enable_background_investigation: bool,
|
||||||
|
enable_web_search: bool,
|
||||||
report_style: ReportStyle,
|
report_style: ReportStyle,
|
||||||
enable_deep_thinking: bool,
|
enable_deep_thinking: bool,
|
||||||
enable_clarification: bool,
|
enable_clarification: bool,
|
||||||
@@ -594,6 +596,7 @@ async def _astream_workflow_generator(
|
|||||||
"max_step_num": max_step_num,
|
"max_step_num": max_step_num,
|
||||||
"max_search_results": max_search_results,
|
"max_search_results": max_search_results,
|
||||||
"mcp_settings": mcp_settings,
|
"mcp_settings": mcp_settings,
|
||||||
|
"enable_web_search": enable_web_search,
|
||||||
"report_style": report_style.value,
|
"report_style": report_style.value,
|
||||||
"enable_deep_thinking": enable_deep_thinking,
|
"enable_deep_thinking": enable_deep_thinking,
|
||||||
"interrupt_before_tools": interrupt_before_tools,
|
"interrupt_before_tools": interrupt_before_tools,
|
||||||
|
|||||||
@@ -62,6 +62,9 @@ class ChatRequest(BaseModel):
|
|||||||
enable_background_investigation: Optional[bool] = Field(
|
enable_background_investigation: Optional[bool] = Field(
|
||||||
True, description="Whether to get background investigation before plan"
|
True, description="Whether to get background investigation before plan"
|
||||||
)
|
)
|
||||||
|
enable_web_search: Optional[bool] = Field(
|
||||||
|
True, description="Whether to enable web search, set to False to use only local RAG"
|
||||||
|
)
|
||||||
report_style: Optional[ReportStyle] = Field(
|
report_style: Optional[ReportStyle] = Field(
|
||||||
ReportStyle.ACADEMIC, description="The style of the report"
|
ReportStyle.ACADEMIC, description="The style of the report"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -103,6 +103,7 @@ async def test_astream_workflow_generator_preserves_clarification_history():
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=True,
|
enable_background_investigation=True,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=True,
|
enable_clarification=True,
|
||||||
@@ -608,6 +609,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -649,6 +651,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="edit_plan",
|
interrupt_feedback="edit_plan",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -685,6 +688,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -723,6 +727,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -766,6 +771,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -809,6 +815,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
@@ -849,6 +856,7 @@ class TestAstreamWorkflowGenerator:
|
|||||||
interrupt_feedback="",
|
interrupt_feedback="",
|
||||||
mcp_settings={},
|
mcp_settings={},
|
||||||
enable_background_investigation=False,
|
enable_background_investigation=False,
|
||||||
|
enable_web_search=True,
|
||||||
report_style=ReportStyle.ACADEMIC,
|
report_style=ReportStyle.ACADEMIC,
|
||||||
enable_deep_thinking=False,
|
enable_deep_thinking=False,
|
||||||
enable_clarification=False,
|
enable_clarification=False,
|
||||||
|
|||||||
Reference in New Issue
Block a user