mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
feat: add Domain Control Features for Tavily Search Engine (#401)
* feat: add Domain Control Features for Tavily Search Engine * fixed * chore: update config.md
This commit is contained in:
@@ -20,3 +20,18 @@ BASIC_MODEL:
|
||||
# base_url: https://ark-cn-beijing.bytedance.net/api/v3
|
||||
# model: "doubao-1-5-thinking-pro-m-250428"
|
||||
# api_key: xxxx
|
||||
|
||||
# OTHER SETTINGS:
|
||||
# Search engine configuration (Only supports Tavily currently)
|
||||
# SEARCH_ENGINE:
|
||||
# engine: tavily
|
||||
# # Only include results from these domains
|
||||
# include_domains:
|
||||
# - example.com
|
||||
# - trusted-news.com
|
||||
# - reliable-source.org
|
||||
# - gov.cn
|
||||
# - edu.cn
|
||||
# # Exclude results from these domains
|
||||
# exclude_domains:
|
||||
# - example.com
|
||||
|
||||
@@ -115,3 +115,25 @@ BASIC_MODEL:
|
||||
api_version: $AZURE_API_VERSION
|
||||
api_key: $AZURE_API_KEY
|
||||
```
|
||||
## About Search Engine
|
||||
|
||||
### How to control search domains for Tavily?
|
||||
|
||||
DeerFlow allows you to control which domains are included or excluded in Tavily search results through the configuration file. This helps improve search result quality and reduce hallucinations by focusing on trusted sources.
|
||||
|
||||
`Tips`: it only supports Tavily currently.
|
||||
|
||||
You can configure domain filtering in your `conf.yaml` file as follows:
|
||||
|
||||
```yaml
|
||||
SEARCH_ENGINE:
|
||||
engine: tavily
|
||||
# Only include results from these domains (whitelist)
|
||||
include_domains:
|
||||
- trusted-news.com
|
||||
- gov.org
|
||||
- reliable-source.edu
|
||||
# Exclude results from these domains (blacklist)
|
||||
exclude_domains:
|
||||
- unreliable-site.com
|
||||
- spam-domain.net
|
||||
@@ -4,12 +4,14 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_community.tools import BraveSearch, DuckDuckGoSearchResults
|
||||
from langchain_community.tools.arxiv import ArxivQueryRun
|
||||
from langchain_community.utilities import ArxivAPIWrapper, BraveSearchWrapper
|
||||
|
||||
from src.config import SearchEngine, SELECTED_SEARCH_ENGINE
|
||||
from src.config import load_yaml_config
|
||||
from src.tools.tavily_search.tavily_search_results_with_images import (
|
||||
TavilySearchResultsWithImages,
|
||||
)
|
||||
@@ -25,15 +27,33 @@ LoggedBraveSearch = create_logged_tool(BraveSearch)
|
||||
LoggedArxivSearch = create_logged_tool(ArxivQueryRun)
|
||||
|
||||
|
||||
def get_search_config():
|
||||
config = load_yaml_config("conf.yaml")
|
||||
search_config = config.get("SEARCH_ENGINE", {})
|
||||
return search_config
|
||||
|
||||
|
||||
# Get the selected search tool
|
||||
def get_web_search_tool(max_search_results: int):
|
||||
search_config = get_search_config()
|
||||
|
||||
if SELECTED_SEARCH_ENGINE == SearchEngine.TAVILY.value:
|
||||
# Only get and apply include/exclude domains for Tavily
|
||||
include_domains: Optional[List[str]] = search_config.get("include_domains", [])
|
||||
exclude_domains: Optional[List[str]] = search_config.get("exclude_domains", [])
|
||||
|
||||
logger.info(
|
||||
f"Tavily search configuration loaded: include_domains={include_domains}, exclude_domains={exclude_domains}"
|
||||
)
|
||||
|
||||
return LoggedTavilySearch(
|
||||
name="web_search",
|
||||
max_results=max_search_results,
|
||||
include_raw_content=True,
|
||||
include_images=True,
|
||||
include_image_descriptions=True,
|
||||
include_domains=include_domains,
|
||||
exclude_domains=exclude_domains,
|
||||
)
|
||||
elif SELECTED_SEARCH_ENGINE == SearchEngine.DUCKDUCKGO.value:
|
||||
return LoggedDuckDuckGoSearch(
|
||||
|
||||
Reference in New Issue
Block a user