From 2a97170b6c6743290780cc77eb2f07781c4da28e Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Mon, 15 Dec 2025 23:04:26 +0800 Subject: [PATCH] feat: add Serper search engine support (#762) * feat: add Serper search engine support * docs: update configuration guide and env example for Serper * test: add test case for Serper with missing API key --- .env.example | 3 ++- docs/configuration_guide.md | 18 ++++++++++++++++++ src/config/tools.py | 1 + src/tools/search.py | 11 +++++++++++ tests/unit/tools/test_search.py | 15 +++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index a41ef6b..23a609a 100644 --- a/.env.example +++ b/.env.example @@ -24,10 +24,11 @@ ENABLE_MCP_SERVER_CONFIGURATION=false # Otherwise, you system could be compromised. ENABLE_PYTHON_REPL=false -# Search Engine, Supported values: tavily, infoquest (recommended), duckduckgo, brave_search, arxiv, searx +# Search Engine, Supported values: tavily, infoquest (recommended), duckduckgo, brave_search, arxiv, searx, serper SEARCH_API=tavily TAVILY_API_KEY=tvly-xxx INFOQUEST_API_KEY="infoquest-xxx" +# SERPER_API_KEY=xxx # Required only if SEARCH_API is serper # SEARX_HOST=xxx # Required only if SEARCH_API is searx.(compatible with both Searx and SearxNG) # BRAVE_SEARCH_API_KEY=xxx # Required only if SEARCH_API is brave_search # JINA_API_KEY=jina_xxx # Optional, default is None diff --git a/docs/configuration_guide.md b/docs/configuration_guide.md index c2f7669..8979cc7 100644 --- a/docs/configuration_guide.md +++ b/docs/configuration_guide.md @@ -204,6 +204,24 @@ The context management doesn't work if the token_limit is not set. ## About Search Engine +### Supported Search Engines +DeerFlow supports the following search engines: +- Tavily +- InfoQuest +- DuckDuckGo +- Brave Search +- Arxiv +- Searx +- Serper +- Wikipedia + +### How to use Serper Search? + +To use Serper as your search engine, you need to: +1. Get your API key from [Serper](https://serper.dev/) +2. Set `SEARCH_API=serper` in your `.env` file +3. Set `SERPER_API_KEY=your_api_key` in your `.env` file + ### How to control search domains for Tavily? DeerFlow allows you to control which domains are included or excluded in Tavily search results through the configuration file. This helps improve search result quality and reduce hallucinations by focusing on trusted sources. diff --git a/src/config/tools.py b/src/config/tools.py index 87465f0..5e435b6 100644 --- a/src/config/tools.py +++ b/src/config/tools.py @@ -17,6 +17,7 @@ class SearchEngine(enum.Enum): ARXIV = "arxiv" SEARX = "searx" WIKIPEDIA = "wikipedia" + SERPER = "serper" class CrawlerEngine(enum.Enum): diff --git a/src/tools/search.py b/src/tools/search.py index 5c08e1f..3e45c27 100644 --- a/src/tools/search.py +++ b/src/tools/search.py @@ -8,6 +8,7 @@ from typing import List, Optional from langchain_community.tools import ( BraveSearch, DuckDuckGoSearchResults, + GoogleSerperRun, SearxSearchRun, WikipediaQueryRun, ) @@ -15,6 +16,7 @@ from langchain_community.tools.arxiv import ArxivQueryRun from langchain_community.utilities import ( ArxivAPIWrapper, BraveSearchWrapper, + GoogleSerperAPIWrapper, SearxSearchWrapper, WikipediaAPIWrapper, ) @@ -33,6 +35,7 @@ LoggedTavilySearch = create_logged_tool(TavilySearchWithImages) LoggedInfoQuestSearch = create_logged_tool(InfoQuestSearchResults) LoggedDuckDuckGoSearch = create_logged_tool(DuckDuckGoSearchResults) LoggedBraveSearch = create_logged_tool(BraveSearch) +LoggedSerperSearch = create_logged_tool(GoogleSerperRun) LoggedArxivSearch = create_logged_tool(ArxivQueryRun) LoggedSearxSearch = create_logged_tool(SearxSearchRun) LoggedWikipediaSearch = create_logged_tool(WikipediaQueryRun) @@ -102,6 +105,14 @@ def get_web_search_tool(max_search_results: int): search_kwargs={"count": max_search_results}, ), ) + elif SELECTED_SEARCH_ENGINE == SearchEngine.SERPER.value: + return LoggedSerperSearch( + name="web_search", + api_wrapper=GoogleSerperAPIWrapper( + k=max_search_results, + serper_api_key=os.getenv("SERPER_API_KEY", ""), + ), + ) elif SELECTED_SEARCH_ENGINE == SearchEngine.ARXIV.value: return LoggedArxivSearch( name="web_search", diff --git a/tests/unit/tools/test_search.py b/tests/unit/tools/test_search.py index 2a26036..3b58a74 100644 --- a/tests/unit/tools/test_search.py +++ b/tests/unit/tools/test_search.py @@ -5,6 +5,7 @@ import os from unittest.mock import patch import pytest +from pydantic import ValidationError from src.config import SearchEngine from src.tools.search import get_web_search_tool @@ -56,6 +57,20 @@ class TestGetWebSearchTool: tool = get_web_search_tool(max_search_results=1) assert tool.search_wrapper.api_key.get_secret_value() == "" + @patch("src.tools.search.SELECTED_SEARCH_ENGINE", SearchEngine.SERPER.value) + @patch.dict(os.environ, {"SERPER_API_KEY": "test_serper_key"}) + def test_get_web_search_tool_serper(self): + tool = get_web_search_tool(max_search_results=6) + assert tool.name == "web_search" + assert tool.api_wrapper.k == 6 + assert tool.api_wrapper.serper_api_key == "test_serper_key" + + @patch("src.tools.search.SELECTED_SEARCH_ENGINE", SearchEngine.SERPER.value) + @patch.dict(os.environ, {}, clear=True) + def test_get_web_search_tool_serper_no_api_key(self): + with pytest.raises(ValidationError): + get_web_search_tool(max_search_results=1) + @patch("src.tools.search.SELECTED_SEARCH_ENGINE", SearchEngine.TAVILY.value) @patch("src.tools.search.load_yaml_config") def test_get_web_search_tool_tavily_with_custom_config(self, mock_config):