From bedf7d4af2a19f32288387a9519cc91f9fdb0451 Mon Sep 17 00:00:00 2001 From: HansleCho Date: Tue, 29 Jul 2025 14:58:08 +0900 Subject: [PATCH] Feat: Add Wikipedia search engine (#478) * feat: add Wikipedia search engine * wikipedia * make format --- pyproject.toml | 1 + src/config/tools.py | 1 + src/tools/search.py | 27 +++++++++++++++++++++++++-- uv.lock | 34 +++++++++++++++++++++++----------- 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 00b77d5..7d83ae6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "mcp>=1.6.0", "langchain-mcp-adapters>=0.0.9", "langchain-deepseek>=0.1.3", + "wikipedia>=1.4.0", ] [project.optional-dependencies] diff --git a/src/config/tools.py b/src/config/tools.py index ca0f300..fef0111 100644 --- a/src/config/tools.py +++ b/src/config/tools.py @@ -13,6 +13,7 @@ class SearchEngine(enum.Enum): DUCKDUCKGO = "duckduckgo" BRAVE_SEARCH = "brave_search" ARXIV = "arxiv" + WIKIPEDIA = "wikipedia" # Tool configuration diff --git a/src/tools/search.py b/src/tools/search.py index 8852ea0..54a86b9 100644 --- a/src/tools/search.py +++ b/src/tools/search.py @@ -5,9 +5,17 @@ import logging import os from typing import List, Optional -from langchain_community.tools import BraveSearch, DuckDuckGoSearchResults +from langchain_community.tools import ( + BraveSearch, + DuckDuckGoSearchResults, + WikipediaQueryRun, +) from langchain_community.tools.arxiv import ArxivQueryRun -from langchain_community.utilities import ArxivAPIWrapper, BraveSearchWrapper +from langchain_community.utilities import ( + ArxivAPIWrapper, + BraveSearchWrapper, + WikipediaAPIWrapper, +) from src.config import SearchEngine, SELECTED_SEARCH_ENGINE from src.config import load_yaml_config @@ -24,6 +32,7 @@ LoggedTavilySearch = create_logged_tool(TavilySearchResultsWithImages) LoggedDuckDuckGoSearch = create_logged_tool(DuckDuckGoSearchResults) LoggedBraveSearch = create_logged_tool(BraveSearch) LoggedArxivSearch = create_logged_tool(ArxivQueryRun) +LoggedWikipediaSearch = create_logged_tool(WikipediaQueryRun) def get_search_config(): @@ -76,5 +85,19 @@ def get_web_search_tool(max_search_results: int): load_all_available_meta=True, ), ) + elif SELECTED_SEARCH_ENGINE == SearchEngine.WIKIPEDIA.value: + wiki_lang = search_config.get("wikipedia_lang", "en") + wiki_doc_content_chars_max = search_config.get( + "wikipedia_doc_content_chars_max", 4000 + ) + return LoggedWikipediaSearch( + name="web_search", + api_wrapper=WikipediaAPIWrapper( + lang=wiki_lang, + top_k_results=max_search_results, + load_all_available_meta=True, + doc_content_chars_max=wiki_doc_content_chars_max, + ), + ) else: raise ValueError(f"Unsupported search engine: {SELECTED_SEARCH_ENGINE}") diff --git a/uv.lock b/uv.lock index b83292b..6976837 100644 --- a/uv.lock +++ b/uv.lock @@ -393,6 +393,7 @@ dependencies = [ { name = "socksio" }, { name = "sse-starlette" }, { name = "uvicorn" }, + { name = "wikipedia" }, { name = "yfinance" }, ] @@ -439,6 +440,7 @@ requires-dist = [ { name = "socksio", specifier = ">=1.0.0" }, { name = "sse-starlette", specifier = ">=1.6.5" }, { name = "uvicorn", specifier = ">=0.27.1" }, + { name = "wikipedia", specifier = ">=1.4.0" }, { name = "yfinance", specifier = ">=0.2.54" }, ] provides-extras = ["dev", "test"] @@ -974,10 +976,10 @@ name = "langgraph" version = "0.4.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "langchain-core", marker = "python_full_version < '4.0'" }, + { name = "langchain-core", marker = "python_full_version < '4'" }, { name = "langgraph-checkpoint" }, - { name = "langgraph-prebuilt", marker = "python_full_version < '4.0'" }, - { name = "langgraph-sdk", marker = "python_full_version < '4.0'" }, + { name = "langgraph-prebuilt", marker = "python_full_version < '4'" }, + { name = "langgraph-sdk", marker = "python_full_version < '4'" }, { name = "pydantic" }, { name = "xxhash" }, ] @@ -995,11 +997,11 @@ dependencies = [ { name = "cryptography" }, { name = "httpx" }, { name = "jsonschema-rs" }, - { name = "langchain-core", marker = "python_full_version < '4.0'" }, - { name = "langgraph", marker = "python_full_version < '4.0'" }, - { name = "langgraph-checkpoint", marker = "python_full_version < '4.0'" }, + { name = "langchain-core", marker = "python_full_version < '4'" }, + { name = "langgraph", marker = "python_full_version < '4'" }, + { name = "langgraph-checkpoint", marker = "python_full_version < '4'" }, { name = "langgraph-runtime-inmem" }, - { name = "langgraph-sdk", marker = "python_full_version < '4.0'" }, + { name = "langgraph-sdk", marker = "python_full_version < '4'" }, { name = "langsmith" }, { name = "orjson" }, { name = "pyjwt" }, @@ -1043,8 +1045,8 @@ wheels = [ [package.optional-dependencies] inmem = [ - { name = "langgraph-api", marker = "python_full_version < '4.0'" }, - { name = "langgraph-runtime-inmem", marker = "python_full_version < '4.0'" }, + { name = "langgraph-api", marker = "python_full_version < '4'" }, + { name = "langgraph-runtime-inmem", marker = "python_full_version < '4'" }, { name = "python-dotenv" }, ] @@ -1067,8 +1069,8 @@ version = "0.0.11" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "blockbuster" }, - { name = "langgraph", marker = "python_full_version < '4.0'" }, - { name = "langgraph-checkpoint", marker = "python_full_version < '4.0'" }, + { name = "langgraph", marker = "python_full_version < '4'" }, + { name = "langgraph-checkpoint", marker = "python_full_version < '4'" }, { name = "sse-starlette" }, { name = "starlette" }, { name = "structlog" }, @@ -2235,6 +2237,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" }, ] +[[package]] +name = "wikipedia" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2", size = 27748, upload-time = "2014-11-15T15:59:49.808Z" } + [[package]] name = "xxhash" version = "3.5.0"