mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
support infoquest (#960)
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
committed by
GitHub
parent
3e4a24f48b
commit
28e1257e1e
@@ -4,6 +4,8 @@ TAVILY_API_KEY=your-tavily-api-key
|
|||||||
# Jina API Key
|
# Jina API Key
|
||||||
JINA_API_KEY=your-jina-api-key
|
JINA_API_KEY=your-jina-api-key
|
||||||
|
|
||||||
|
# InfoQuest API Key
|
||||||
|
INFOQUEST_API_KEY=your-infoquest-api-key
|
||||||
# CORS Origins (comma-separated) - e.g., http://localhost:3000,http://localhost:3001
|
# CORS Origins (comma-separated) - e.g., http://localhost:3000,http://localhost:3001
|
||||||
# CORS_ORIGINS=http://localhost:3000
|
# CORS_ORIGINS=http://localhost:3000
|
||||||
|
|
||||||
|
|||||||
11
README.md
11
README.md
@@ -16,6 +16,16 @@ Learn more and see **real demos** on our official website.
|
|||||||
|
|
||||||
**[deerflow.tech](https://deerflow.tech/)**
|
**[deerflow.tech](https://deerflow.tech/)**
|
||||||
|
|
||||||
|
## InfoQuest
|
||||||
|
|
||||||
|
DeerFlow has newly integrated the intelligent search and crawling toolset independently developed by BytePlus--[InfoQuest (supports free online experience)](https://docs.byteplus.com/en/docs/InfoQuest/What_is_Info_Quest)
|
||||||
|
|
||||||
|
<a href="https://docs.byteplus.com/en/docs/InfoQuest/What_is_Info_Quest" target="_blank">
|
||||||
|
<img
|
||||||
|
src="https://sf16-sg.tiktokcdn.com/obj/eden-sg/hubseh7bsbps/20251208-160108.png" alt="InfoQuest_banner"
|
||||||
|
/>
|
||||||
|
</a>
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
@@ -94,6 +104,7 @@ Learn more and see **real demos** on our official website.
|
|||||||
TAVILY_API_KEY=your-tavily-api-key
|
TAVILY_API_KEY=your-tavily-api-key
|
||||||
OPENAI_API_KEY=your-openai-api-key
|
OPENAI_API_KEY=your-openai-api-key
|
||||||
# Add other provider keys as needed
|
# Add other provider keys as needed
|
||||||
|
INFOQUEST_API_KEY=your-infoquest-api-key
|
||||||
```
|
```
|
||||||
|
|
||||||
- Option B: Export environment variables in your shell
|
- Option B: Export environment variables in your shell
|
||||||
|
|||||||
312
backend/src/community/infoquest/infoquest_client.py
Normal file
312
backend/src/community/infoquest/infoquest_client.py
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
"""Util that calls InfoQuest Search And Fetch API.
|
||||||
|
|
||||||
|
In order to set this up, follow instructions at:
|
||||||
|
https://docs.byteplus.com/en/docs/InfoQuest/What_is_Info_Quest
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class InfoQuestClient:
|
||||||
|
"""Client for interacting with the InfoQuest web search and fetch API."""
|
||||||
|
|
||||||
|
def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1):
|
||||||
|
logger.info("\n============================================\n🚀 BytePlus InfoQuest Client Initialization 🚀\n============================================")
|
||||||
|
|
||||||
|
self.fetch_time = fetch_time
|
||||||
|
self.fetch_timeout = fetch_timeout
|
||||||
|
self.fetch_navigation_timeout = fetch_navigation_timeout
|
||||||
|
self.search_time_range = search_time_range
|
||||||
|
self.api_key_set = bool(os.getenv("INFOQUEST_API_KEY"))
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
config_details = (
|
||||||
|
f"\n📋 Configuration Details:\n"
|
||||||
|
f"├── Fetch time: {fetch_time} {'(Default: No fetch time)' if fetch_time == -1 else '(Custom)'}\n"
|
||||||
|
f"├── Fetch Timeout: {fetch_timeout} {'(Default: No fetch timeout)' if fetch_timeout == -1 else '(Custom)'}\n"
|
||||||
|
f"├── Navigation Timeout: {fetch_navigation_timeout} {'(Default: No Navigation Timeout)' if fetch_navigation_timeout == -1 else '(Custom)'}\n"
|
||||||
|
f"├── Search Time Range: {search_time_range} {'(Default: No Search Time Range)' if search_time_range == -1 else '(Custom)'}\n"
|
||||||
|
f"└── API Key: {'✅ Configured' if self.api_key_set else '❌ Not set'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(config_details)
|
||||||
|
logger.debug("\n" + "*" * 70 + "\n")
|
||||||
|
|
||||||
|
def fetch(self, url: str, return_format: str = "html") -> str:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
url_truncated = url[:50] + "..." if len(url) > 50 else url
|
||||||
|
logger.debug(
|
||||||
|
f"InfoQuest - Fetch API request initiated | "
|
||||||
|
f"operation=crawl url | "
|
||||||
|
f"url_truncated={url_truncated} | "
|
||||||
|
f"has_timeout_filter={self.fetch_timeout > 0} | timeout_filter={self.fetch_timeout} | "
|
||||||
|
f"has_fetch_time_filter={self.fetch_time > 0} | fetch_time_filter={self.fetch_time} | "
|
||||||
|
f"has_navigation_timeout_filter={self.fetch_navigation_timeout > 0} | navi_timeout_filter={self.fetch_navigation_timeout} | "
|
||||||
|
f"request_type=sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare headers
|
||||||
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
|
# Prepare request data
|
||||||
|
data = self._prepare_crawl_request_data(url, return_format)
|
||||||
|
|
||||||
|
logger.debug("Sending crawl request to InfoQuest API")
|
||||||
|
try:
|
||||||
|
response = requests.post("https://reader.infoquest.bytepluses.com", headers=headers, json=data)
|
||||||
|
|
||||||
|
# Check if status code is not 200
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_message = f"fetch API returned status {response.status_code}: {response.text}"
|
||||||
|
logger.debug("InfoQuest Crawler fetch API return status %d: %s for URL: %s", response.status_code, response.text, url)
|
||||||
|
return f"Error: {error_message}"
|
||||||
|
|
||||||
|
# Check for empty response
|
||||||
|
if not response.text or not response.text.strip():
|
||||||
|
error_message = "no result found"
|
||||||
|
logger.debug("InfoQuest Crawler returned empty response for URL: %s", url)
|
||||||
|
return f"Error: {error_message}"
|
||||||
|
|
||||||
|
# Try to parse response as JSON and extract reader_result
|
||||||
|
try:
|
||||||
|
response_data = json.loads(response.text)
|
||||||
|
# Extract reader_result if it exists
|
||||||
|
if "reader_result" in response_data:
|
||||||
|
logger.debug("Successfully extracted reader_result from JSON response")
|
||||||
|
return response_data["reader_result"]
|
||||||
|
elif "content" in response_data:
|
||||||
|
# Fallback to content field if reader_result is not available
|
||||||
|
logger.debug("reader_result missing in JSON response, falling back to content field: %s",
|
||||||
|
response_data["content"])
|
||||||
|
return response_data["content"]
|
||||||
|
else:
|
||||||
|
# If neither field exists, return the original response
|
||||||
|
logger.warning("Neither reader_result nor content field found in JSON response")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# If response is not JSON, return the original text
|
||||||
|
logger.debug("Response is not in JSON format, returning as-is")
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
# Print partial response for debugging
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
response_sample = response.text[:200] + ("..." if len(response.text) > 200 else "")
|
||||||
|
logger.debug("Successfully received response, content length: %d bytes, first 200 chars: %s", len(response.text), response_sample)
|
||||||
|
return response.text
|
||||||
|
except Exception as e:
|
||||||
|
error_message = f"fetch API failed: {str(e)}"
|
||||||
|
logger.error(error_message)
|
||||||
|
return f"Error: {error_message}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prepare_headers() -> dict[str, str]:
|
||||||
|
"""Prepare request headers."""
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add API key if available
|
||||||
|
if os.getenv("INFOQUEST_API_KEY"):
|
||||||
|
headers["Authorization"] = f"Bearer {os.getenv('INFOQUEST_API_KEY')}"
|
||||||
|
logger.debug("API key added to request headers")
|
||||||
|
else:
|
||||||
|
logger.warning("InfoQuest API key is not set. Provide your own key for authentication.")
|
||||||
|
|
||||||
|
return headers
|
||||||
|
|
||||||
|
def _prepare_crawl_request_data(self, url: str, return_format: str) -> dict[str, Any]:
|
||||||
|
"""Prepare request data with formatted parameters."""
|
||||||
|
# Normalize return_format
|
||||||
|
if return_format and return_format.lower() == "html":
|
||||||
|
normalized_format = "HTML"
|
||||||
|
else:
|
||||||
|
normalized_format = return_format
|
||||||
|
|
||||||
|
data = {"url": url, "format": normalized_format}
|
||||||
|
|
||||||
|
# Add timeout parameters if set to positive values
|
||||||
|
timeout_params = {}
|
||||||
|
if self.fetch_time > 0:
|
||||||
|
timeout_params["fetch_time"] = self.fetch_time
|
||||||
|
if self.fetch_timeout > 0:
|
||||||
|
timeout_params["timeout"] = self.fetch_timeout
|
||||||
|
if self.fetch_navigation_timeout > 0:
|
||||||
|
timeout_params["navi_timeout"] = self.fetch_navigation_timeout
|
||||||
|
|
||||||
|
# Log applied timeout parameters
|
||||||
|
if timeout_params:
|
||||||
|
logger.debug("Applying timeout parameters: %s", timeout_params)
|
||||||
|
data.update(timeout_params)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def web_search_raw_results(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
site: str,
|
||||||
|
output_format: str = "JSON",
|
||||||
|
) -> dict:
|
||||||
|
"""Get results from the InfoQuest Web-Search API synchronously."""
|
||||||
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
|
params = {"format": output_format, "query": query}
|
||||||
|
if self.search_time_range > 0:
|
||||||
|
params["time_range"] = self.search_time_range
|
||||||
|
|
||||||
|
if site != "":
|
||||||
|
params["site"] = site
|
||||||
|
|
||||||
|
response = requests.post("https://search.infoquest.bytepluses.com", headers=headers, json=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Print partial response for debugging
|
||||||
|
response_json = response.json()
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
response_sample = json.dumps(response_json)[:200] + ("..." if len(json.dumps(response_json)) > 200 else "")
|
||||||
|
logger.debug(f"Search API request completed successfully | service=InfoQuest | status=success | response_sample={response_sample}")
|
||||||
|
|
||||||
|
return response_json
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clean_results(raw_results: list[dict[str, dict[str, dict[str, Any]]]]) -> list[dict]:
|
||||||
|
"""Clean results from InfoQuest Web-Search API."""
|
||||||
|
logger.debug("Processing web-search results")
|
||||||
|
|
||||||
|
seen_urls = set()
|
||||||
|
clean_results = []
|
||||||
|
counts = {"pages": 0, "news": 0}
|
||||||
|
|
||||||
|
for content_list in raw_results:
|
||||||
|
content = content_list["content"]
|
||||||
|
results = content["results"]
|
||||||
|
|
||||||
|
if results.get("organic"):
|
||||||
|
organic_results = results["organic"]
|
||||||
|
for result in organic_results:
|
||||||
|
clean_result = {
|
||||||
|
"type": "page",
|
||||||
|
}
|
||||||
|
if "title" in result:
|
||||||
|
clean_result["title"] = result["title"]
|
||||||
|
if "desc" in result:
|
||||||
|
clean_result["desc"] = result["desc"]
|
||||||
|
clean_result["snippet"] = result["desc"]
|
||||||
|
if "url" in result:
|
||||||
|
clean_result["url"] = result["url"]
|
||||||
|
url = clean_result["url"]
|
||||||
|
if isinstance(url, str) and url and url not in seen_urls:
|
||||||
|
seen_urls.add(url)
|
||||||
|
clean_results.append(clean_result)
|
||||||
|
counts["pages"] += 1
|
||||||
|
|
||||||
|
if results.get("top_stories"):
|
||||||
|
news = results["top_stories"]
|
||||||
|
for obj in news["items"]:
|
||||||
|
clean_result = {
|
||||||
|
"type": "news",
|
||||||
|
}
|
||||||
|
if "time_frame" in obj:
|
||||||
|
clean_result["time_frame"] = obj["time_frame"]
|
||||||
|
if "source" in obj:
|
||||||
|
clean_result["source"] = obj["source"]
|
||||||
|
title = obj.get("title")
|
||||||
|
url = obj.get("url")
|
||||||
|
if title:
|
||||||
|
clean_result["title"] = title
|
||||||
|
if url:
|
||||||
|
clean_result["url"] = url
|
||||||
|
if title and isinstance(url, str) and url and url not in seen_urls:
|
||||||
|
seen_urls.add(url)
|
||||||
|
clean_results.append(clean_result)
|
||||||
|
counts["news"] += 1
|
||||||
|
logger.debug(f"Results processing completed | total_results={len(clean_results)} | pages={counts['pages']} | news_items={counts['news']} | unique_urls={len(seen_urls)}")
|
||||||
|
|
||||||
|
return clean_results
|
||||||
|
|
||||||
|
def web_search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
site: str = "",
|
||||||
|
output_format: str = "JSON",
|
||||||
|
) -> str:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
query_truncated = query[:50] + "..." if len(query) > 50 else query
|
||||||
|
logger.debug(
|
||||||
|
f"InfoQuest - Search API request initiated | "
|
||||||
|
f"operation=search webs | "
|
||||||
|
f"query_truncated={query_truncated} | "
|
||||||
|
f"has_time_filter={self.search_time_range > 0} | time_filter={self.search_time_range} | "
|
||||||
|
f"has_site_filter={bool(site)} | site={site} | "
|
||||||
|
f"request_type=sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.debug("InfoQuest Web-Search - Executing search with parameters")
|
||||||
|
raw_results = self.web_search_raw_results(
|
||||||
|
query,
|
||||||
|
site,
|
||||||
|
output_format,
|
||||||
|
)
|
||||||
|
if "search_result" in raw_results:
|
||||||
|
logger.debug("InfoQuest Web-Search - Successfully extracted search_result from JSON response")
|
||||||
|
results = raw_results["search_result"]
|
||||||
|
|
||||||
|
logger.debug("InfoQuest Web-Search - Processing raw search results")
|
||||||
|
cleaned_results = self.clean_results(results["results"])
|
||||||
|
|
||||||
|
result_json = json.dumps(cleaned_results, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
logger.debug(f"InfoQuest Web-Search - Search tool execution completed | mode=synchronous | results_count={len(cleaned_results)}")
|
||||||
|
return result_json
|
||||||
|
|
||||||
|
elif "content" in raw_results:
|
||||||
|
# Fallback to content field if search_result is not available
|
||||||
|
error_message = "web search API return wrong format"
|
||||||
|
logger.error("web search API return wrong format, no search_result nor content field found in JSON response, content: %s", raw_results["content"])
|
||||||
|
return f"Error: {error_message}"
|
||||||
|
else:
|
||||||
|
# If neither field exists, return the original response
|
||||||
|
logger.warning("InfoQuest Web-Search - Neither search_result nor content field found in JSON response")
|
||||||
|
return json.dumps(raw_results, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_message = f"InfoQuest Web-Search - Search tool execution failed | mode=synchronous | error={str(e)}"
|
||||||
|
logger.error(error_message)
|
||||||
|
return f"Error: {error_message}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clean_results_with_image_search(raw_results: list[dict[str, dict[str, dict[str, Any]]]]) -> list[dict]:
|
||||||
|
"""Clean results from InfoQuest Web-Search API."""
|
||||||
|
logger.debug("Processing web-search results")
|
||||||
|
|
||||||
|
seen_urls = set()
|
||||||
|
clean_results = []
|
||||||
|
counts = {"images": 0}
|
||||||
|
|
||||||
|
for content_list in raw_results:
|
||||||
|
content = content_list["content"]
|
||||||
|
results = content["results"]
|
||||||
|
|
||||||
|
if results.get("images_results"):
|
||||||
|
images_results = results["images_results"]
|
||||||
|
for result in images_results:
|
||||||
|
clean_result = {}
|
||||||
|
if "image_url" in result:
|
||||||
|
clean_result["image_url"] = result["image_url"]
|
||||||
|
url = clean_result["image_url"]
|
||||||
|
if isinstance(url, str) and url and url not in seen_urls:
|
||||||
|
seen_urls.add(url)
|
||||||
|
clean_results.append(clean_result)
|
||||||
|
counts["images"] += 1
|
||||||
|
if "thumbnail_url" in result:
|
||||||
|
clean_result["thumbnail_url"] = result["thumbnail_url"]
|
||||||
|
if "url" in result:
|
||||||
|
clean_result["url"] = result["url"]
|
||||||
|
logger.debug(f"Results processing completed | total_results={len(clean_results)} | images={counts['images']} | unique_urls={len(seen_urls)}")
|
||||||
|
|
||||||
|
return clean_results
|
||||||
63
backend/src/community/infoquest/tools.py
Normal file
63
backend/src/community/infoquest/tools.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
from langchain.tools import tool
|
||||||
|
|
||||||
|
from src.config import get_app_config
|
||||||
|
from src.utils.readability import ReadabilityExtractor
|
||||||
|
|
||||||
|
from .infoquest_client import InfoQuestClient
|
||||||
|
|
||||||
|
readability_extractor = ReadabilityExtractor()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_infoquest_client() -> InfoQuestClient:
|
||||||
|
search_config = get_app_config().get_tool_config("web_search")
|
||||||
|
search_time_range = -1
|
||||||
|
if search_config is not None and "search_time_range" in search_config.model_extra:
|
||||||
|
search_time_range = search_config.model_extra.get("search_time_range")
|
||||||
|
fetch_config = get_app_config().get_tool_config("web_fetch")
|
||||||
|
fetch_time = -1
|
||||||
|
if fetch_config is not None and "fetch_time" in fetch_config.model_extra:
|
||||||
|
fetch_time = fetch_config.model_extra.get("fetch_time")
|
||||||
|
fetch_timeout = -1
|
||||||
|
if fetch_config is not None and "timeout" in fetch_config.model_extra:
|
||||||
|
fetch_timeout = fetch_config.model_extra.get("timeout")
|
||||||
|
navigation_timeout = -1
|
||||||
|
if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra:
|
||||||
|
navigation_timeout = fetch_config.model_extra.get("navigation_timeout")
|
||||||
|
|
||||||
|
return InfoQuestClient(
|
||||||
|
search_time_range=search_time_range,
|
||||||
|
fetch_timeout=fetch_timeout,
|
||||||
|
fetch_navigation_timeout=navigation_timeout,
|
||||||
|
fetch_time=fetch_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@tool("web_search", parse_docstring=True)
|
||||||
|
def web_search_tool(query: str) -> str:
|
||||||
|
"""Search the web.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The query to search for.
|
||||||
|
"""
|
||||||
|
|
||||||
|
client = _get_infoquest_client()
|
||||||
|
return client.web_search(query)
|
||||||
|
|
||||||
|
|
||||||
|
@tool("web_fetch", parse_docstring=True)
|
||||||
|
def web_fetch_tool(url: str) -> str:
|
||||||
|
"""Fetch the contents of a web page at a given URL.
|
||||||
|
Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
|
||||||
|
This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
|
||||||
|
Do NOT add www. to URLs that do NOT have them.
|
||||||
|
URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to fetch the contents of.
|
||||||
|
"""
|
||||||
|
client = _get_infoquest_client()
|
||||||
|
result = client.fetch(url)
|
||||||
|
if result.startswith("Error: "):
|
||||||
|
return result
|
||||||
|
article = readability_extractor.extract_article(result)
|
||||||
|
return article.to_markdown()[:4096]
|
||||||
184
backend/tests/test_infoquest_client.py
Normal file
184
backend/tests/test_infoquest_client.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
"""Tests for InfoQuest client and tools."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from src.community.infoquest import tools
|
||||||
|
from src.community.infoquest.infoquest_client import InfoQuestClient
|
||||||
|
|
||||||
|
|
||||||
|
class TestInfoQuestClient:
|
||||||
|
def test_infoquest_client_initialization(self):
|
||||||
|
"""Test InfoQuestClient initialization with different parameters."""
|
||||||
|
# Test with default parameters
|
||||||
|
client = InfoQuestClient()
|
||||||
|
assert client.fetch_time == -1
|
||||||
|
assert client.fetch_timeout == -1
|
||||||
|
assert client.fetch_navigation_timeout == -1
|
||||||
|
assert client.search_time_range == -1
|
||||||
|
|
||||||
|
# Test with custom parameters
|
||||||
|
client = InfoQuestClient(fetch_time=10, fetch_timeout=30, fetch_navigation_timeout=60, search_time_range=24)
|
||||||
|
assert client.fetch_time == 10
|
||||||
|
assert client.fetch_timeout == 30
|
||||||
|
assert client.fetch_navigation_timeout == 60
|
||||||
|
assert client.search_time_range == 24
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_fetch_success(self, mock_post):
|
||||||
|
"""Test successful fetch operation."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = json.dumps({"reader_result": "<html><body>Test content</body></html>"})
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.fetch("https://example.com")
|
||||||
|
|
||||||
|
assert result == "<html><body>Test content</body></html>"
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
args, kwargs = mock_post.call_args
|
||||||
|
assert args[0] == "https://reader.infoquest.bytepluses.com"
|
||||||
|
assert kwargs["json"]["url"] == "https://example.com"
|
||||||
|
assert kwargs["json"]["format"] == "HTML"
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_fetch_non_200_status(self, mock_post):
|
||||||
|
"""Test fetch operation with non-200 status code."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
mock_response.text = "Not Found"
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.fetch("https://example.com")
|
||||||
|
|
||||||
|
assert result == "Error: fetch API returned status 404: Not Found"
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_fetch_empty_response(self, mock_post):
|
||||||
|
"""Test fetch operation with empty response."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = ""
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.fetch("https://example.com")
|
||||||
|
|
||||||
|
assert result == "Error: no result found"
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_web_search_raw_results_success(self, mock_post):
|
||||||
|
"""Test successful web_search_raw_results operation."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"organic": [{"title": "Test Result", "desc": "Test description", "url": "https://example.com"}]}}}], "images_results": []}}
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.web_search_raw_results("test query", "")
|
||||||
|
|
||||||
|
assert "search_result" in result
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
args, kwargs = mock_post.call_args
|
||||||
|
assert args[0] == "https://search.infoquest.bytepluses.com"
|
||||||
|
assert kwargs["json"]["query"] == "test query"
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_web_search_success(self, mock_post):
|
||||||
|
"""Test successful web_search operation."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"organic": [{"title": "Test Result", "desc": "Test description", "url": "https://example.com"}]}}}], "images_results": []}}
|
||||||
|
mock_post.return_value = mock_response
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.web_search("test query")
|
||||||
|
|
||||||
|
# Check if result is a valid JSON string with expected content
|
||||||
|
result_data = json.loads(result)
|
||||||
|
assert len(result_data) == 1
|
||||||
|
assert result_data[0]["title"] == "Test Result"
|
||||||
|
assert result_data[0]["url"] == "https://example.com"
|
||||||
|
|
||||||
|
def test_clean_results(self):
|
||||||
|
"""Test clean_results method with sample raw results."""
|
||||||
|
raw_results = [
|
||||||
|
{
|
||||||
|
"content": {
|
||||||
|
"results": {
|
||||||
|
"organic": [{"title": "Test Page", "desc": "Page description", "url": "https://example.com/page1"}],
|
||||||
|
"top_stories": {"items": [{"title": "Test News", "source": "Test Source", "time_frame": "2 hours ago", "url": "https://example.com/news1"}]},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
cleaned = InfoQuestClient.clean_results(raw_results)
|
||||||
|
|
||||||
|
assert len(cleaned) == 2
|
||||||
|
assert cleaned[0]["type"] == "page"
|
||||||
|
assert cleaned[0]["title"] == "Test Page"
|
||||||
|
assert cleaned[1]["type"] == "news"
|
||||||
|
assert cleaned[1]["title"] == "Test News"
|
||||||
|
|
||||||
|
def test_clean_results_with_image_search(self):
|
||||||
|
"""Test clean_results_with_image_search method with sample raw results."""
|
||||||
|
raw_results = [{"content": {"results": {"images_results": [{"image_url": "https://example.com/image1.jpg", "thumbnail_url": "https://example.com/thumb1.jpg","url": "https://example.com/page1"}]}}}]
|
||||||
|
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||||
|
|
||||||
|
assert len(cleaned) == 1
|
||||||
|
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
|
||||||
|
assert cleaned[0]["thumbnail_url"] == "https://example.com/thumb1.jpg"
|
||||||
|
assert cleaned[0]["url"] == "https://example.com/page1"
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.tools._get_infoquest_client")
|
||||||
|
def test_web_search_tool(self, mock_get_client):
|
||||||
|
"""Test web_search_tool function."""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.web_search.return_value = json.dumps([])
|
||||||
|
mock_get_client.return_value = mock_client
|
||||||
|
|
||||||
|
result = tools.web_search_tool.run("test query")
|
||||||
|
|
||||||
|
assert result == json.dumps([])
|
||||||
|
mock_get_client.assert_called_once()
|
||||||
|
mock_client.web_search.assert_called_once_with("test query")
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.tools._get_infoquest_client")
|
||||||
|
def test_web_fetch_tool(self, mock_get_client):
|
||||||
|
"""Test web_fetch_tool function."""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.fetch.return_value = "<html><body>Test content</body></html>"
|
||||||
|
mock_get_client.return_value = mock_client
|
||||||
|
|
||||||
|
result = tools.web_fetch_tool.run("https://example.com")
|
||||||
|
|
||||||
|
assert result == "# Untitled\n\nTest content"
|
||||||
|
mock_get_client.assert_called_once()
|
||||||
|
mock_client.fetch.assert_called_once_with("https://example.com")
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.tools.get_app_config")
|
||||||
|
def test_get_infoquest_client(self, mock_get_app_config):
|
||||||
|
"""Test _get_infoquest_client function with config."""
|
||||||
|
mock_config = MagicMock()
|
||||||
|
mock_config.get_tool_config.side_effect = [MagicMock(model_extra={"search_time_range": 24}), MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60})]
|
||||||
|
mock_get_app_config.return_value = mock_config
|
||||||
|
|
||||||
|
client = tools._get_infoquest_client()
|
||||||
|
|
||||||
|
assert client.search_time_range == 24
|
||||||
|
assert client.fetch_time == 10
|
||||||
|
assert client.fetch_timeout == 30
|
||||||
|
assert client.fetch_navigation_timeout == 60
|
||||||
|
|
||||||
|
@patch("src.community.infoquest.infoquest_client.requests.post")
|
||||||
|
def test_web_search_api_error(self, mock_post):
|
||||||
|
"""Test web_search operation with API error."""
|
||||||
|
mock_post.side_effect = Exception("Connection error")
|
||||||
|
|
||||||
|
client = InfoQuestClient()
|
||||||
|
result = client.web_search("test query")
|
||||||
|
|
||||||
|
assert "Error" in result
|
||||||
@@ -127,12 +127,30 @@ tools:
|
|||||||
max_results: 5
|
max_results: 5
|
||||||
# api_key: $TAVILY_API_KEY # Set if needed
|
# api_key: $TAVILY_API_KEY # Set if needed
|
||||||
|
|
||||||
|
# Web search tool (requires InfoQuest API key)
|
||||||
|
# - name: web_search
|
||||||
|
# group: web
|
||||||
|
# use: src.community.infoquest.tools:web_search_tool
|
||||||
|
# # Used to limit the scope of search results, only returns content within the specified time range. Set to -1 to disable time filtering
|
||||||
|
# search_time_range: 10
|
||||||
|
|
||||||
# Web fetch tool (uses Jina AI reader)
|
# Web fetch tool (uses Jina AI reader)
|
||||||
- name: web_fetch
|
- name: web_fetch
|
||||||
group: web
|
group: web
|
||||||
use: src.community.jina_ai.tools:web_fetch_tool
|
use: src.community.jina_ai.tools:web_fetch_tool
|
||||||
timeout: 10
|
timeout: 10
|
||||||
|
|
||||||
|
# Web fetch tool (uses InfoQuest AI reader)
|
||||||
|
# - name: web_fetch
|
||||||
|
# group: web
|
||||||
|
# use: src.community.infoquest.tools:web_fetch_tool
|
||||||
|
# # Overall timeout for the entire crawling process (in seconds). Set to positive value to enable, -1 to disable
|
||||||
|
# timeout: 10
|
||||||
|
# # Waiting time after page loading (in seconds). Set to positive value to enable, -1 to disable
|
||||||
|
# fetch_time: 10
|
||||||
|
# # Timeout for navigating to the page (in seconds). Set to positive value to enable, -1 to disable
|
||||||
|
# navigation_timeout: 30
|
||||||
|
|
||||||
# Image search tool (uses DuckDuckGo)
|
# Image search tool (uses DuckDuckGo)
|
||||||
# Use this to find reference images before image generation
|
# Use this to find reference images before image generation
|
||||||
- name: image_search
|
- name: image_search
|
||||||
|
|||||||
Reference in New Issue
Block a user