From 93d81d450dd8dbb95d9aabc63055bdb666914cea Mon Sep 17 00:00:00 2001 From: Jiahe Wu <117764480+blueberrycongee@users.noreply.github.com> Date: Mon, 15 Dec 2025 19:17:24 +0800 Subject: [PATCH] feat: add enable_web_search config to disable web search (#681) (#760) * feat: add enable_web_search config to disable web search (#681) * fix: skip enforce_researcher_search validation when web search is disabled - Return json.dumps([]) instead of empty string for consistency in background_investigation_node - Add enable_web_search check to skip validation warning when user intentionally disabled web search - Add warning log when researcher has no tools available - Update tests to include new enable_web_search parameter * fix: address Copilot review feedback - Coordinate enforce_web_search with enable_web_search in validate_and_fix_plan - Fix misleading comment in background_investigation_node * docs: add warning about local RAG setup when disabling web search * docs: add web search toggle section to configuration guide --- conf.yaml.example | 8 ++++++++ docs/configuration_guide.md | 33 ++++++++++++++++++++++++++++++ src/config/configuration.py | 3 +++ src/graph/nodes.py | 38 ++++++++++++++++++++++++++++------- src/server/app.py | 3 +++ src/server/chat_request.py | 3 +++ tests/unit/server/test_app.py | 8 ++++++++ 7 files changed, 89 insertions(+), 7 deletions(-) diff --git a/conf.yaml.example b/conf.yaml.example index 914c506..45ce0da 100644 --- a/conf.yaml.example +++ b/conf.yaml.example @@ -61,6 +61,14 @@ BASIC_MODEL: # # When interrupt is triggered, user will be prompted to approve/reject # # Approved keywords: "approved", "approve", "yes", "proceed", "continue", "ok", "okay", "accepted", "accept" +# Web search toggle (Issue #681) +# Set to false to disable web search and use only local RAG knowledge base. +# This is useful for environments without internet access. +# WARNING: If you disable web search, make sure to configure local RAG resources; +# otherwise, the researcher will operate in pure LLM reasoning mode without external data. +# Note: This can be overridden per-request via the API parameter `enable_web_search`. +# ENABLE_WEB_SEARCH: true + # Search engine configuration # Supported engines: tavily, infoquest # SEARCH_ENGINE: diff --git a/docs/configuration_guide.md b/docs/configuration_guide.md index 9698ce4..c2f7669 100644 --- a/docs/configuration_guide.md +++ b/docs/configuration_guide.md @@ -254,6 +254,39 @@ SEARCH_ENGINE: ``` That's meaning that the search results will be filtered based on the minimum relevance score threshold and truncated to the maximum length limit for each search result content. +## Web Search Toggle + +DeerFlow allows you to disable web search functionality, which is useful for environments without internet access or when you want to use only local RAG knowledge bases. + +### Configuration + +You can disable web search in your `conf.yaml` file: + +```yaml +# Disable web search (use only local RAG) +ENABLE_WEB_SEARCH: false +``` + +Or via API request parameter: + +```json +{ + "messages": [{"role": "user", "content": "Research topic"}], + "enable_web_search": false +} +``` + +> [!WARNING] +> If you disable web search, make sure to configure local RAG resources; otherwise, the researcher will operate in pure LLM reasoning mode without external data sources. + +### Behavior When Web Search is Disabled + +- **Background investigation**: Skipped entirely (relies on web search) +- **Researcher node**: Will use only RAG retriever tools if configured +- **Pure reasoning mode**: If no RAG resources are available, the researcher will rely solely on LLM reasoning + +--- + ## RAG (Retrieval-Augmented Generation) Configuration DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables. diff --git a/src/config/configuration.py b/src/config/configuration.py index 611618c..34694ad 100644 --- a/src/config/configuration.py +++ b/src/config/configuration.py @@ -57,6 +57,9 @@ class Configuration: enforce_researcher_search: bool = ( True # Enforce that researcher must use web search tool at least once ) + enable_web_search: bool = ( + True # Whether to enable web search, set to False to use only local RAG + ) interrupt_before_tools: list[str] = field( default_factory=list ) # List of tool names to interrupt before execution diff --git a/src/graph/nodes.py b/src/graph/nodes.py index 7941ddb..829ab24 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -118,13 +118,14 @@ def preserve_state_meta_fields(state: State) -> dict: } -def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict: +def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False, enable_web_search: bool = True) -> dict: """ Validate and fix a plan to ensure it meets requirements. Args: plan: The plan dict to validate enforce_web_search: If True, ensure at least one step has need_search=true + enable_web_search: If False, skip web search enforcement (takes precedence) Returns: The validated/fixed plan dict @@ -154,8 +155,9 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict: # ============================================================ # SECTION 2: Enforce web search requirements + # Skip enforcement if web search is disabled (enable_web_search=False takes precedence) # ============================================================ - if enforce_web_search: + if enforce_web_search and enable_web_search: # Check if any step has need_search=true (only check dict steps) has_search_step = any( step.get("need_search", False) @@ -197,6 +199,12 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict: def background_investigation_node(state: State, config: RunnableConfig): logger.info("background investigation node is running.") configurable = Configuration.from_runnable_config(config) + + # Background investigation relies on web search; skip entirely when web search is disabled + if not configurable.enable_web_search: + logger.info("Web search is disabled, skipping background investigation.") + return {"background_investigation_results": json.dumps([], ensure_ascii=False)} + query = state.get("clarified_research_topic") or state.get("research_topic") background_investigation_results = [] @@ -357,7 +365,7 @@ def planner_node( # Validate and fix plan to ensure web search requirements are met if isinstance(curr_plan, dict): - curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search) + curr_plan = validate_and_fix_plan(curr_plan, configurable.enforce_web_search, configurable.enable_web_search) if isinstance(curr_plan, dict) and curr_plan.get("has_enough_context"): logger.info("Planner response has enough context.") @@ -480,7 +488,7 @@ def human_feedback_node( new_plan = json.loads(repair_json_output(current_plan_content)) # Validate and fix plan to ensure web search requirements are met configurable = Configuration.from_runnable_config(config) - new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search) + new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search) except (json.JSONDecodeError, AttributeError) as e: logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}") if isinstance(current_plan, dict) and "content" in original_plan: @@ -1072,7 +1080,8 @@ async def _execute_agent_step( if should_validate: # Check if enforcement is enabled in configuration configurable = Configuration.from_runnable_config(config) if config else Configuration() - if configurable.enforce_researcher_search: + # Skip validation if web search is disabled (user intentionally disabled it) + if configurable.enforce_researcher_search and configurable.enable_web_search: web_search_validated = validate_web_search_usage(result["messages"], agent_name) # If web search was not used, add a warning to the response @@ -1210,15 +1219,30 @@ async def researcher_node( configurable = Configuration.from_runnable_config(config) logger.debug(f"[researcher_node] Max search results: {configurable.max_search_results}") - tools = [get_web_search_tool(configurable.max_search_results), crawl_tool] + # Build tools list based on configuration + tools = [] + + # Add web search and crawl tools only if web search is enabled + if configurable.enable_web_search: + tools.extend([get_web_search_tool(configurable.max_search_results), crawl_tool]) + else: + logger.info("[researcher_node] Web search is disabled, using only local RAG") + + # Add retriever tool if resources are available (always add, higher priority) retriever_tool = get_retriever_tool(state.get("resources", [])) if retriever_tool: logger.debug(f"[researcher_node] Adding retriever tool to tools list") tools.insert(0, retriever_tool) + # Warn if no tools are available + if not tools: + logger.warning("[researcher_node] No tools available (web search disabled, no resources). " + "Researcher will operate in pure reasoning mode.") + logger.info(f"[researcher_node] Researcher tools count: {len(tools)}") logger.debug(f"[researcher_node] Researcher tools: {[tool.name if hasattr(tool, 'name') else str(tool) for tool in tools]}") - logger.info(f"[researcher_node] enforce_researcher_search is set to: {configurable.enforce_researcher_search}") + logger.info(f"[researcher_node] enforce_researcher_search={configurable.enforce_researcher_search}, " + f"enable_web_search={configurable.enable_web_search}") return await _setup_and_execute_agent_step( state, diff --git a/src/server/app.py b/src/server/app.py index 9181079..fa9ff77 100644 --- a/src/server/app.py +++ b/src/server/app.py @@ -132,6 +132,7 @@ async def chat_stream(request: ChatRequest): request.interrupt_feedback, request.mcp_settings if mcp_enabled else {}, request.enable_background_investigation, + request.enable_web_search, request.report_style, request.enable_deep_thinking, request.enable_clarification, @@ -517,6 +518,7 @@ async def _astream_workflow_generator( interrupt_feedback: str, mcp_settings: dict, enable_background_investigation: bool, + enable_web_search: bool, report_style: ReportStyle, enable_deep_thinking: bool, enable_clarification: bool, @@ -594,6 +596,7 @@ async def _astream_workflow_generator( "max_step_num": max_step_num, "max_search_results": max_search_results, "mcp_settings": mcp_settings, + "enable_web_search": enable_web_search, "report_style": report_style.value, "enable_deep_thinking": enable_deep_thinking, "interrupt_before_tools": interrupt_before_tools, diff --git a/src/server/chat_request.py b/src/server/chat_request.py index 8fe81ce..7cad7b8 100644 --- a/src/server/chat_request.py +++ b/src/server/chat_request.py @@ -62,6 +62,9 @@ class ChatRequest(BaseModel): enable_background_investigation: Optional[bool] = Field( True, description="Whether to get background investigation before plan" ) + enable_web_search: Optional[bool] = Field( + True, description="Whether to enable web search, set to False to use only local RAG" + ) report_style: Optional[ReportStyle] = Field( ReportStyle.ACADEMIC, description="The style of the report" ) diff --git a/tests/unit/server/test_app.py b/tests/unit/server/test_app.py index 3c63103..3a3588d 100644 --- a/tests/unit/server/test_app.py +++ b/tests/unit/server/test_app.py @@ -103,6 +103,7 @@ async def test_astream_workflow_generator_preserves_clarification_history(): interrupt_feedback="", mcp_settings={}, enable_background_investigation=True, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=True, @@ -608,6 +609,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -649,6 +651,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="edit_plan", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -685,6 +688,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -723,6 +727,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -766,6 +771,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -809,6 +815,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False, @@ -849,6 +856,7 @@ class TestAstreamWorkflowGenerator: interrupt_feedback="", mcp_settings={}, enable_background_investigation=False, + enable_web_search=True, report_style=ReportStyle.ACADEMIC, enable_deep_thinking=False, enable_clarification=False,