feat: add Domain Control Features for Tavily Search Engine (#401)

* feat: add Domain Control Features for Tavily Search Engine

* fixed

* chore: update config.md
This commit is contained in:
HagonChan
2025-07-12 08:53:51 +08:00
committed by GitHub
parent 859c6e3c5d
commit dfd4712d9f
3 changed files with 57 additions and 0 deletions

View File

@@ -4,12 +4,14 @@
import json
import logging
import os
from typing import List, Optional
from langchain_community.tools import BraveSearch, DuckDuckGoSearchResults
from langchain_community.tools.arxiv import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper, BraveSearchWrapper
from src.config import SearchEngine, SELECTED_SEARCH_ENGINE
from src.config import load_yaml_config
from src.tools.tavily_search.tavily_search_results_with_images import (
TavilySearchResultsWithImages,
)
@@ -25,15 +27,33 @@ LoggedBraveSearch = create_logged_tool(BraveSearch)
LoggedArxivSearch = create_logged_tool(ArxivQueryRun)
def get_search_config():
config = load_yaml_config("conf.yaml")
search_config = config.get("SEARCH_ENGINE", {})
return search_config
# Get the selected search tool
def get_web_search_tool(max_search_results: int):
search_config = get_search_config()
if SELECTED_SEARCH_ENGINE == SearchEngine.TAVILY.value:
# Only get and apply include/exclude domains for Tavily
include_domains: Optional[List[str]] = search_config.get("include_domains", [])
exclude_domains: Optional[List[str]] = search_config.get("exclude_domains", [])
logger.info(
f"Tavily search configuration loaded: include_domains={include_domains}, exclude_domains={exclude_domains}"
)
return LoggedTavilySearch(
name="web_search",
max_results=max_search_results,
include_raw_content=True,
include_images=True,
include_image_descriptions=True,
include_domains=include_domains,
exclude_domains=exclude_domains,
)
elif SELECTED_SEARCH_ENGINE == SearchEngine.DUCKDUCKGO.value:
return LoggedDuckDuckGoSearch(