feat: add enable_web_search config to disable web search (#681) (#760)

* feat: add enable_web_search config to disable web search (#681)

* fix: skip enforce_researcher_search validation when web search is disabled

- Return json.dumps([]) instead of empty string for consistency in background_investigation_node

- Add enable_web_search check to skip validation warning when user intentionally disabled web search

- Add warning log when researcher has no tools available

- Update tests to include new enable_web_search parameter

* fix: address Copilot review feedback

- Coordinate enforce_web_search with enable_web_search in validate_and_fix_plan

- Fix misleading comment in background_investigation_node

* docs: add warning about local RAG setup when disabling web search

* docs: add web search toggle section to configuration guide
This commit is contained in:
Jiahe Wu
2025-12-15 19:17:24 +08:00
committed by GitHub
parent c686ab7016
commit 93d81d450d
7 changed files with 89 additions and 7 deletions

View File

@@ -61,6 +61,14 @@ BASIC_MODEL:
# # When interrupt is triggered, user will be prompted to approve/reject # # When interrupt is triggered, user will be prompted to approve/reject
# # Approved keywords: "approved", "approve", "yes", "proceed", "continue", "ok", "okay", "accepted", "accept" # # Approved keywords: "approved", "approve", "yes", "proceed", "continue", "ok", "okay", "accepted", "accept"
# Web search toggle (Issue #681)
# Set to false to disable web search and use only local RAG knowledge base.
# This is useful for environments without internet access.
# WARNING: If you disable web search, make sure to configure local RAG resources;
# otherwise, the researcher will operate in pure LLM reasoning mode without external data.
# Note: This can be overridden per-request via the API parameter `enable_web_search`.
# ENABLE_WEB_SEARCH: true
# Search engine configuration # Search engine configuration
# Supported engines: tavily, infoquest # Supported engines: tavily, infoquest
# SEARCH_ENGINE: # SEARCH_ENGINE:

View File

@@ -254,6 +254,39 @@ SEARCH_ENGINE:
``` ```
That's meaning that the search results will be filtered based on the minimum relevance score threshold and truncated to the maximum length limit for each search result content. That's meaning that the search results will be filtered based on the minimum relevance score threshold and truncated to the maximum length limit for each search result content.
## Web Search Toggle
DeerFlow allows you to disable web search functionality, which is useful for environments without internet access or when you want to use only local RAG knowledge bases.
### Configuration
You can disable web search in your `conf.yaml` file:
```yaml
# Disable web search (use only local RAG)
ENABLE_WEB_SEARCH: false
```
Or via API request parameter:
```json
{
"messages": [{"role": "user", "content": "Research topic"}],
"enable_web_search": false
}
```
> [!WARNING]
> If you disable web search, make sure to configure local RAG resources; otherwise, the researcher will operate in pure LLM reasoning mode without external data sources.
### Behavior When Web Search is Disabled
- **Background investigation**: Skipped entirely (relies on web search)
- **Researcher node**: Will use only RAG retriever tools if configured
- **Pure reasoning mode**: If no RAG resources are available, the researcher will rely solely on LLM reasoning
---
## RAG (Retrieval-Augmented Generation) Configuration ## RAG (Retrieval-Augmented Generation) Configuration
DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables. DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables.

View File

@@ -57,6 +57,9 @@ class Configuration:
enforce_researcher_search: bool = ( enforce_researcher_search: bool = (
True # Enforce that researcher must use web search tool at least once True # Enforce that researcher must use web search tool at least once
) )
enable_web_search: bool = (
True # Whether to enable web search, set to False to use only local RAG
)
interrupt_before_tools: list[str] = field( interrupt_before_tools: list[str] = field(
default_factory=list default_factory=list
) # List of tool names to interrupt before execution ) # List of tool names to interrupt before execution

View File

@@ -118,13 +118,14 @@ def preserve_state_meta_fields(state: State) -> dict:
} }
def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict: def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False, enable_web_search: bool = True) -> dict:
""" """
Validate and fix a plan to ensure it meets requirements. Validate and fix a plan to ensure it meets requirements.
Args: Args:
plan: The plan dict to validate plan: The plan dict to validate
enforce_web_search: If True, ensure at least one step has need_search=true enforce_web_search: If True, ensure at least one step has need_search=true
enable_web_search: If False, skip web search enforcement (takes precedence)
Returns: Returns:
The validated/fixed plan dict The validated/fixed plan dict
@@ -154,8 +155,9 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
# ============================================================ # ============================================================
# SECTION 2: Enforce web search requirements # SECTION 2: Enforce web search requirements
# Skip enforcement if web search is disabled (enable_web_search=False takes precedence)
# ============================================================ # ============================================================
if enforce_web_search: if enforce_web_search and enable_web_search:
# Check if any step has need_search=true (only check dict steps) # Check if any step has need_search=true (only check dict steps)
has_search_step = any( has_search_step = any(
step.get("need_search", False) step.get("need_search", False)
@@ -197,6 +199,12 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
def background_investigation_node(state: State, config: RunnableConfig): def background_investigation_node(state: State, config: RunnableConfig):
logger.info("background investigation node is running.") logger.info("background investigation node is running.")
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
# Background investigation relies on web search; skip entirely when web search is disabled
if not configurable.enable_web_search:
logger.info("Web search is disabled, skipping background investigation.")
return {"background_investigation_results": json.dumps([], ensure_ascii=False)}
query = state.get("clarified_research_topic") or state.get("research_topic") query = state.get("clarified_research_topic") or state.get("research_topic")
background_investigation_results = [] background_investigation_results = []
@@ -357,7 +365,7 @@ def planner_node(
# Validate and fix plan to ensure web search requirements are met # Validate and fix plan to ensure web search requirements are met
if isinstance(curr_plan, dict): if isinstance(curr_plan, dict):
curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search) curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search, configurable.enable_web_search)
if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"): if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"):
logger.info("Planner response has enough context.") logger.info("Planner response has enough context.")
@@ -480,7 +488,7 @@ def human_feedback_node(
new_plan = json.loads(repair_json_output(current_plan_content)) new_plan = json.loads(repair_json_output(current_plan_content))
# Validate and fix plan to ensure web search requirements are met # Validate and fix plan to ensure web search requirements are met
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search) new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search)
except (json.JSONDecodeError, AttributeError) as e: except (json.JSONDecodeError, AttributeError) as e:
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}") logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
if isinstance(current_plan, dict) and "content" in original_plan: if isinstance(current_plan, dict) and "content" in original_plan:
@@ -1072,7 +1080,8 @@ async def _execute_agent_step(
if should_validate: if should_validate:
# Check if enforcement is enabled in configuration # Check if enforcement is enabled in configuration
configurable = Configuration.from_runnable_config(config) if config else Configuration() configurable = Configuration.from_runnable_config(config) if config else Configuration()
if configurable.enforce_researcher_search: # Skip validation if web search is disabled (user intentionally disabled it)
if configurable.enforce_researcher_search and configurable.enable_web_search:
web_search_validated = validate_web_search_usage(result["messages"], agent_name) web_search_validated = validate_web_search_usage(result["messages"], agent_name)
# If web search was not used, add a warning to the response # If web search was not used, add a warning to the response
@@ -1210,15 +1219,30 @@ async def researcher_node(
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
logger.debug(f"[researcher_node] Max search results: {configurable.max_search_results}") logger.debug(f"[researcher_node] Max search results: {configurable.max_search_results}")
tools = [get_web_search_tool(configurable.max_search_results), crawl_tool] # Build tools list based on configuration
tools = []
# Add web search and crawl tools only if web search is enabled
if configurable.enable_web_search:
tools.extend([get_web_search_tool(configurable.max_search_results), crawl_tool])
else:
logger.info("[researcher_node] Web search is disabled, using only local RAG")
# Add retriever tool if resources are available (always add, higher priority)
retriever_tool = get_retriever_tool(state.get("resources", [])) retriever_tool = get_retriever_tool(state.get("resources", []))
if retriever_tool: if retriever_tool:
logger.debug(f"[researcher_node] Adding retriever tool to tools list") logger.debug(f"[researcher_node] Adding retriever tool to tools list")
tools.insert(0, retriever_tool) tools.insert(0, retriever_tool)
# Warn if no tools are available
if not tools:
logger.warning("[researcher_node] No tools available (web search disabled, no resources). "
"Researcher will operate in pure reasoning mode.")
logger.info(f"[researcher_node] Researcher tools count: {len(tools)}") logger.info(f"[researcher_node] Researcher tools count: {len(tools)}")
logger.debug(f"[researcher_node] Researcher tools: {[tool.name if hasattr(tool, 'name') else str(tool) for tool in tools]}") logger.debug(f"[researcher_node] Researcher tools: {[tool.name if hasattr(tool, 'name') else str(tool) for tool in tools]}")
logger.info(f"[researcher_node] enforce_researcher_search is set to: {configurable.enforce_researcher_search}") logger.info(f"[researcher_node] enforce_researcher_search={configurable.enforce_researcher_search}, "
f"enable_web_search={configurable.enable_web_search}")
return await _setup_and_execute_agent_step( return await _setup_and_execute_agent_step(
state, state,

View File

@@ -132,6 +132,7 @@ async def chat_stream(request: ChatRequest):
request.interrupt_feedback, request.interrupt_feedback,
request.mcp_settings if mcp_enabled else {}, request.mcp_settings if mcp_enabled else {},
request.enable_background_investigation, request.enable_background_investigation,
request.enable_web_search,
request.report_style, request.report_style,
request.enable_deep_thinking, request.enable_deep_thinking,
request.enable_clarification, request.enable_clarification,
@@ -517,6 +518,7 @@ async def _astream_workflow_generator(
interrupt_feedback: str, interrupt_feedback: str,
mcp_settings: dict, mcp_settings: dict,
enable_background_investigation: bool, enable_background_investigation: bool,
enable_web_search: bool,
report_style: ReportStyle, report_style: ReportStyle,
enable_deep_thinking: bool, enable_deep_thinking: bool,
enable_clarification: bool, enable_clarification: bool,
@@ -594,6 +596,7 @@ async def _astream_workflow_generator(
"max_step_num": max_step_num, "max_step_num": max_step_num,
"max_search_results": max_search_results, "max_search_results": max_search_results,
"mcp_settings": mcp_settings, "mcp_settings": mcp_settings,
"enable_web_search": enable_web_search,
"report_style": report_style.value, "report_style": report_style.value,
"enable_deep_thinking": enable_deep_thinking, "enable_deep_thinking": enable_deep_thinking,
"interrupt_before_tools": interrupt_before_tools, "interrupt_before_tools": interrupt_before_tools,

View File

@@ -62,6 +62,9 @@ class ChatRequest(BaseModel):
enable_background_investigation: Optional[bool] = Field( enable_background_investigation: Optional[bool] = Field(
True, description="Whether to get background investigation before plan" True, description="Whether to get background investigation before plan"
) )
enable_web_search: Optional[bool] = Field(
True, description="Whether to enable web search, set to False to use only local RAG"
)
report_style: Optional[ReportStyle] = Field( report_style: Optional[ReportStyle] = Field(
ReportStyle.ACADEMIC, description="The style of the report" ReportStyle.ACADEMIC, description="The style of the report"
) )

View File

@@ -103,6 +103,7 @@ async def test_astream_workflow_generator_preserves_clarification_history():
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=True, enable_background_investigation=True,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=True, enable_clarification=True,
@@ -608,6 +609,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -649,6 +651,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="edit_plan", interrupt_feedback="edit_plan",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -685,6 +688,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -723,6 +727,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -766,6 +771,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -809,6 +815,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,
@@ -849,6 +856,7 @@ class TestAstreamWorkflowGenerator:
interrupt_feedback="", interrupt_feedback="",
mcp_settings={}, mcp_settings={},
enable_background_investigation=False, enable_background_investigation=False,
enable_web_search=True,
report_style=ReportStyle.ACADEMIC, report_style=ReportStyle.ACADEMIC,
enable_deep_thinking=False, enable_deep_thinking=False,
enable_clarification=False, enable_clarification=False,