2026-01-14 07:17:22 +08:00
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
from langchain.tools import tool
|
|
|
|
|
from tavily import TavilyClient
|
|
|
|
|
|
|
|
|
|
from src.config import get_app_config
|
|
|
|
|
|
|
|
|
|
tavily_client = TavilyClient()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool("web_search", parse_docstring=True)
|
|
|
|
|
def web_search_tool(query: str) -> str:
|
|
|
|
|
"""Search the web.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
query: The query to search for.
|
|
|
|
|
"""
|
|
|
|
|
config = get_app_config().get_tool_config("web_search")
|
|
|
|
|
max_results = 5
|
|
|
|
|
if config is not None and "max_results" in config.model_extra:
|
|
|
|
|
max_results = config.model_extra.get("max_results")
|
|
|
|
|
res = tavily_client.search(query, max_results=max_results)
|
|
|
|
|
normalized_results = [
|
|
|
|
|
{
|
|
|
|
|
"title": result["title"],
|
|
|
|
|
"url": result["url"],
|
|
|
|
|
"snippet": result["content"],
|
|
|
|
|
}
|
|
|
|
|
for result in res["results"]
|
|
|
|
|
]
|
|
|
|
|
json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
|
|
|
|
|
return json_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool("web_fetch", parse_docstring=True)
|
|
|
|
|
def web_fetch_tool(url: str) -> str:
|
|
|
|
|
"""Fetch the contents of a web page at a given URL.
|
|
|
|
|
Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
|
|
|
|
|
This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
|
|
|
|
|
Do NOT add www. to URLs that do NOT have them.
|
|
|
|
|
URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
url: The URL to fetch the contents of.
|
|
|
|
|
"""
|
|
|
|
|
res = tavily_client.extract([url])
|
|
|
|
|
if "failed_results" in res and len(res["failed_results"]) > 0:
|
2026-01-14 09:21:19 +08:00
|
|
|
return f"Error: {res['failed_results'][0]['error']}"
|
2026-01-14 07:17:22 +08:00
|
|
|
elif "results" in res and len(res["results"]) > 0:
|
|
|
|
|
result = res["results"][0]
|
|
|
|
|
return f"# {result['title']}\n\n{result['raw_content']}"
|
|
|
|
|
else:
|
|
|
|
|
return "Error: No results found"
|