mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-02 22:02:13 +08:00
infoquest support image-search (#1255)
This commit is contained in:
committed by
GitHub
parent
38ace61617
commit
f6c54e0308
@@ -17,13 +17,15 @@ logger = logging.getLogger(__name__)
|
||||
class InfoQuestClient:
|
||||
"""Client for interacting with the InfoQuest web search and fetch API."""
|
||||
|
||||
def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1):
|
||||
def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1, image_search_time_range: int = -1, image_size: str = "i"):
|
||||
logger.info("\n============================================\n🚀 BytePlus InfoQuest Client Initialization 🚀\n============================================")
|
||||
|
||||
self.fetch_time = fetch_time
|
||||
self.fetch_timeout = fetch_timeout
|
||||
self.fetch_navigation_timeout = fetch_navigation_timeout
|
||||
self.search_time_range = search_time_range
|
||||
self.image_search_time_range = image_search_time_range
|
||||
self.image_size = image_size
|
||||
self.api_key_set = bool(os.getenv("INFOQUEST_API_KEY"))
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
config_details = (
|
||||
@@ -32,6 +34,8 @@ class InfoQuestClient:
|
||||
f"├── Fetch Timeout: {fetch_timeout} {'(Default: No fetch timeout)' if fetch_timeout == -1 else '(Custom)'}\n"
|
||||
f"├── Navigation Timeout: {fetch_navigation_timeout} {'(Default: No Navigation Timeout)' if fetch_navigation_timeout == -1 else '(Custom)'}\n"
|
||||
f"├── Search Time Range: {search_time_range} {'(Default: No Search Time Range)' if search_time_range == -1 else '(Custom)'}\n"
|
||||
f"├── Image Search Time Range: {image_search_time_range} {'(Default: No Image Search Time Range)' if image_search_time_range == -1 else '(Custom)'}\n"
|
||||
f"├── Image Size: {image_size} {'(Default: Medium)' if image_size == 'm' else '(Custom)'}\n"
|
||||
f"└── API Key: {'✅ Configured' if self.api_key_set else '❌ Not set'}"
|
||||
)
|
||||
|
||||
@@ -295,17 +299,106 @@ class InfoQuestClient:
|
||||
images_results = results["images_results"]
|
||||
for result in images_results:
|
||||
clean_result = {}
|
||||
if "image_url" in result:
|
||||
clean_result["image_url"] = result["image_url"]
|
||||
if "original" in result:
|
||||
clean_result["image_url"] = result["original"]
|
||||
url = clean_result["image_url"]
|
||||
if isinstance(url, str) and url and url not in seen_urls:
|
||||
seen_urls.add(url)
|
||||
clean_results.append(clean_result)
|
||||
counts["images"] += 1
|
||||
if "thumbnail_url" in result:
|
||||
clean_result["thumbnail_url"] = result["thumbnail_url"]
|
||||
if "url" in result:
|
||||
clean_result["url"] = result["url"]
|
||||
if "title" in result:
|
||||
clean_result["title"] = result["title"]
|
||||
logger.debug(f"Results processing completed | total_results={len(clean_results)} | images={counts['images']} | unique_urls={len(seen_urls)}")
|
||||
|
||||
return clean_results
|
||||
|
||||
def image_search_raw_results(
|
||||
self,
|
||||
query: str,
|
||||
site: str = "",
|
||||
output_format: str = "JSON",
|
||||
) -> dict:
|
||||
"""Get image search results from the InfoQuest Web-Search API synchronously."""
|
||||
headers = self._prepare_headers()
|
||||
|
||||
params = {"format": output_format, "query": query, "search_type": "Images"}
|
||||
|
||||
# Add time_range filter if specified (1-365)
|
||||
if 1 <= self.image_search_time_range <= 365:
|
||||
params["time_range"] = self.image_search_time_range
|
||||
elif self.image_search_time_range > 0:
|
||||
logger.warning(f"time_range {self.image_search_time_range} is out of valid range (1-365), ignoring")
|
||||
|
||||
# Add site filter if specified
|
||||
if site:
|
||||
params["site"] = site
|
||||
|
||||
# Add image_size filter if specified
|
||||
if self.image_size and self.image_size in ["l", "m", "i"]:
|
||||
params["image_size"] = self.image_size
|
||||
elif self.image_size:
|
||||
logger.warning(f"image_size {self.image_size} is not valid, must be 'l', 'm', or 'i'")
|
||||
|
||||
response = requests.post("https://search.infoquest.bytepluses.com", headers=headers, json=params)
|
||||
response.raise_for_status()
|
||||
|
||||
# Print partial response for debugging
|
||||
response_json = response.json()
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
response_sample = json.dumps(response_json)[:200] + ("..." if len(json.dumps(response_json)) > 200 else "")
|
||||
logger.debug(f"Image Search API request completed successfully | service=InfoQuest | status=success | response_sample={response_sample}")
|
||||
|
||||
return response_json
|
||||
|
||||
def image_search(
|
||||
self,
|
||||
query: str,
|
||||
site: str = "",
|
||||
output_format: str = "JSON",
|
||||
) -> str:
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
query_truncated = query[:50] + "..." if len(query) > 50 else query
|
||||
logger.debug(
|
||||
f"InfoQuest - Image Search API request initiated | "
|
||||
f"operation=search images | "
|
||||
f"query_truncated={query_truncated} | "
|
||||
f"has_site_filter={bool(site)} | site={site} | "
|
||||
f"image_search_time_range={self.image_search_time_range if self.image_search_time_range >= 1 and self.image_search_time_range <= 365 else 'default'} | "
|
||||
f"image_size={self.image_size} |"
|
||||
f"request_type=sync"
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("InfoQuest Image Search - Executing search with parameters")
|
||||
raw_results = self.image_search_raw_results(
|
||||
query,
|
||||
site,
|
||||
output_format,
|
||||
)
|
||||
|
||||
if "search_result" in raw_results:
|
||||
logger.debug("InfoQuest Image Search - Successfully extracted search_result from JSON response")
|
||||
results = raw_results["search_result"]
|
||||
|
||||
logger.debug(f"InfoQuest Image Search - Processing raw image search results: {results}")
|
||||
cleaned_results = self.clean_results_with_image_search(results["results"])
|
||||
|
||||
result_json = json.dumps(cleaned_results, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.debug(f"InfoQuest Image Search - Image search tool execution completed | mode=synchronous | results_count={len(cleaned_results)}")
|
||||
return result_json
|
||||
|
||||
elif "content" in raw_results:
|
||||
# Fallback to content field if search_result is not available
|
||||
error_message = "image search API return wrong format"
|
||||
logger.error("image search API return wrong format, no search_result nor content field found in JSON response, content: %s", raw_results["content"])
|
||||
return f"Error: {error_message}"
|
||||
else:
|
||||
# If neither field exists, return the original response
|
||||
logger.warning("InfoQuest Image Search - Neither search_result nor content field found in JSON response")
|
||||
return json.dumps(raw_results, indent=2, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"InfoQuest Image Search - Image search tool execution failed | mode=synchronous | error={str(e)}"
|
||||
logger.error(error_message)
|
||||
return f"Error: {error_message}"
|
||||
|
||||
@@ -13,6 +13,7 @@ def _get_infoquest_client() -> InfoQuestClient:
|
||||
search_time_range = -1
|
||||
if search_config is not None and "search_time_range" in search_config.model_extra:
|
||||
search_time_range = search_config.model_extra.get("search_time_range")
|
||||
|
||||
fetch_config = get_app_config().get_tool_config("web_fetch")
|
||||
fetch_time = -1
|
||||
if fetch_config is not None and "fetch_time" in fetch_config.model_extra:
|
||||
@@ -23,12 +24,24 @@ def _get_infoquest_client() -> InfoQuestClient:
|
||||
navigation_timeout = -1
|
||||
if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra:
|
||||
navigation_timeout = fetch_config.model_extra.get("navigation_timeout")
|
||||
|
||||
image_search_config = get_app_config().get_tool_config("image_search")
|
||||
image_search_time_range = -1
|
||||
if image_search_config is not None and "image_search_time_range" in image_search_config.model_extra:
|
||||
image_search_time_range = image_search_config.model_extra.get("image_search_time_range")
|
||||
image_size = "i"
|
||||
if image_search_config is not None and "image_size" in image_search_config.model_extra:
|
||||
image_size = image_search_config.model_extra.get("image_size")
|
||||
|
||||
|
||||
|
||||
return InfoQuestClient(
|
||||
search_time_range=search_time_range,
|
||||
fetch_timeout=fetch_timeout,
|
||||
fetch_navigation_timeout=navigation_timeout,
|
||||
fetch_time=fetch_time,
|
||||
image_search_time_range=image_search_time_range,
|
||||
image_size=image_size,
|
||||
)
|
||||
|
||||
|
||||
@@ -61,3 +74,22 @@ def web_fetch_tool(url: str) -> str:
|
||||
return result
|
||||
article = readability_extractor.extract_article(result)
|
||||
return article.to_markdown()[:4096]
|
||||
|
||||
|
||||
@tool("image_search", parse_docstring=True)
|
||||
def image_search_tool(query: str) -> str:
|
||||
"""Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
|
||||
|
||||
**When to use:**
|
||||
- Before generating character/portrait images: search for similar poses, expressions, styles
|
||||
- Before generating specific objects/products: search for accurate visual references
|
||||
- Before generating scenes/locations: search for architectural or environmental references
|
||||
- Before generating fashion/clothing: search for style and detail references
|
||||
|
||||
The returned image URLs can be used as reference images in image generation to significantly improve quality.
|
||||
|
||||
Args:
|
||||
query: The query to search for images.
|
||||
"""
|
||||
client = _get_infoquest_client()
|
||||
return client.image_search(query)
|
||||
|
||||
@@ -123,16 +123,6 @@ class TestInfoQuestClient:
|
||||
assert cleaned[1]["type"] == "news"
|
||||
assert cleaned[1]["title"] == "Test News"
|
||||
|
||||
def test_clean_results_with_image_search(self):
|
||||
"""Test clean_results_with_image_search method with sample raw results."""
|
||||
raw_results = [{"content": {"results": {"images_results": [{"image_url": "https://example.com/image1.jpg", "thumbnail_url": "https://example.com/thumb1.jpg", "url": "https://example.com/page1"}]}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 1
|
||||
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
assert cleaned[0]["thumbnail_url"] == "https://example.com/thumb1.jpg"
|
||||
assert cleaned[0]["url"] == "https://example.com/page1"
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_web_search_tool(self, mock_get_client):
|
||||
"""Test web_search_tool function."""
|
||||
@@ -163,7 +153,12 @@ class TestInfoQuestClient:
|
||||
def test_get_infoquest_client(self, mock_get_app_config):
|
||||
"""Test _get_infoquest_client function with config."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.get_tool_config.side_effect = [MagicMock(model_extra={"search_time_range": 24}), MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60})]
|
||||
# Add image_search config to the side_effect
|
||||
mock_config.get_tool_config.side_effect = [
|
||||
MagicMock(model_extra={"search_time_range": 24}), # web_search config
|
||||
MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60}), # web_fetch config
|
||||
MagicMock(model_extra={"image_search_time_range": 7, "image_size": "l"}) # image_search config
|
||||
]
|
||||
mock_get_app_config.return_value = mock_config
|
||||
|
||||
client = tools._get_infoquest_client()
|
||||
@@ -172,6 +167,8 @@ class TestInfoQuestClient:
|
||||
assert client.fetch_time == 10
|
||||
assert client.fetch_timeout == 30
|
||||
assert client.fetch_navigation_timeout == 60
|
||||
assert client.image_search_time_range == 7
|
||||
assert client.image_size == "l"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_web_search_api_error(self, mock_post):
|
||||
@@ -182,3 +179,170 @@ class TestInfoQuestClient:
|
||||
result = client.web_search("test query")
|
||||
|
||||
assert "Error" in result
|
||||
|
||||
def test_clean_results_with_image_search(self):
|
||||
"""Test clean_results_with_image_search method with sample raw results."""
|
||||
raw_results = [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image 1", "url": "https://example.com/page1"}]}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 1
|
||||
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
assert cleaned[0]["title"] == "Test Image 1"
|
||||
|
||||
def test_clean_results_with_image_search_empty(self):
|
||||
"""Test clean_results_with_image_search method with empty results."""
|
||||
raw_results = [{"content": {"results": {"images_results": []}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 0
|
||||
|
||||
def test_clean_results_with_image_search_no_images(self):
|
||||
"""Test clean_results_with_image_search method with no images_results field."""
|
||||
raw_results = [{"content": {"results": {"organic": [{"title": "Test Page"}]}}}]
|
||||
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
|
||||
|
||||
assert len(cleaned) == 0
|
||||
|
||||
|
||||
class TestImageSearch:
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_success(self, mock_post):
|
||||
"""Test successful image_search_raw_results operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search_raw_results("test query")
|
||||
|
||||
assert "search_result" in result
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert args[0] == "https://search.infoquest.bytepluses.com"
|
||||
assert kwargs["json"]["query"] == "test query"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_with_parameters(self, mock_post):
|
||||
"""Test image_search_raw_results with all parameters."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient(image_search_time_range=30, image_size="l")
|
||||
client.image_search_raw_results(query="cat", site="unsplash.com", output_format="JSON")
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "cat"
|
||||
assert kwargs["json"]["time_range"] == 30
|
||||
assert kwargs["json"]["site"] == "unsplash.com"
|
||||
assert kwargs["json"]["image_size"] == "l"
|
||||
assert kwargs["json"]["format"] == "JSON"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_raw_results_invalid_time_range(self, mock_post):
|
||||
"""Test image_search_raw_results with invalid time_range parameter."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": []}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Create client with invalid time_range (should be ignored)
|
||||
client = InfoQuestClient(image_search_time_range=400, image_size="x")
|
||||
client.image_search_raw_results(
|
||||
query="test",
|
||||
site="",
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "test"
|
||||
assert "time_range" not in kwargs["json"]
|
||||
assert "image_size" not in kwargs["json"]
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_success(self, mock_post):
|
||||
"""Test successful image_search operation."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search("cat")
|
||||
|
||||
# Check if result is a valid JSON string with expected content
|
||||
result_data = json.loads(result)
|
||||
|
||||
assert len(result_data) == 1
|
||||
|
||||
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
|
||||
assert result_data[0]["title"] == "Test Image"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_with_all_parameters(self, mock_post):
|
||||
"""Test image_search with all optional parameters."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Create client with image search parameters
|
||||
client = InfoQuestClient(image_search_time_range=7, image_size="m")
|
||||
client.image_search(query="dog", site="flickr.com", output_format="JSON")
|
||||
|
||||
mock_post.assert_called_once()
|
||||
args, kwargs = mock_post.call_args
|
||||
assert kwargs["json"]["query"] == "dog"
|
||||
assert kwargs["json"]["time_range"] == 7
|
||||
assert kwargs["json"]["site"] == "flickr.com"
|
||||
assert kwargs["json"]["image_size"] == "m"
|
||||
|
||||
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
|
||||
def test_image_search_api_error(self, mock_post):
|
||||
"""Test image_search operation with API error."""
|
||||
mock_post.side_effect = Exception("Connection error")
|
||||
|
||||
client = InfoQuestClient()
|
||||
result = client.image_search("test query")
|
||||
|
||||
assert "Error" in result
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_image_search_tool(self, mock_get_client):
|
||||
"""Test image_search_tool function."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
result = tools.image_search_tool.run({"query": "test query"})
|
||||
|
||||
# Check if result is a valid JSON string
|
||||
result_data = json.loads(result)
|
||||
assert len(result_data) == 1
|
||||
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
|
||||
mock_get_client.assert_called_once()
|
||||
mock_client.image_search.assert_called_once_with("test query")
|
||||
|
||||
# In /Users/bytedance/python/deer-flowv2/deer-flow/backend/tests/test_infoquest_client.py
|
||||
|
||||
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
|
||||
def test_image_search_tool_with_parameters(self, mock_get_client):
|
||||
"""Test image_search_tool function with all parameters (extra parameters will be ignored)."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
# Pass all parameters as a dictionary (extra parameters will be ignored)
|
||||
tools.image_search_tool.run({"query": "sunset", "time_range": 30, "site": "unsplash.com", "image_size": "l"})
|
||||
|
||||
mock_get_client.assert_called_once()
|
||||
# image_search_tool only passes query to client.image_search
|
||||
# site parameter is empty string by default
|
||||
mock_client.image_search.assert_called_once_with("sunset")
|
||||
|
||||
@@ -181,7 +181,7 @@ tools:
|
||||
max_results: 5
|
||||
# api_key: $TAVILY_API_KEY # Set if needed
|
||||
|
||||
# Web search tool (requires InfoQuest API key)
|
||||
# Web search tool (uses InfoQuest, requires InfoQuest API key)
|
||||
# - name: web_search
|
||||
# group: web
|
||||
# use: deerflow.community.infoquest.tools:web_search_tool
|
||||
@@ -194,7 +194,7 @@ tools:
|
||||
use: deerflow.community.jina_ai.tools:web_fetch_tool
|
||||
timeout: 10
|
||||
|
||||
# Web fetch tool (uses InfoQuest AI reader)
|
||||
# Web fetch tool (uses InfoQuest)
|
||||
# - name: web_fetch
|
||||
# group: web
|
||||
# use: deerflow.community.infoquest.tools:web_fetch_tool
|
||||
@@ -212,6 +212,15 @@ tools:
|
||||
use: deerflow.community.image_search.tools:image_search_tool
|
||||
max_results: 5
|
||||
|
||||
# Image search tool (uses InfoQuest)
|
||||
# - name: image_search
|
||||
# group: web
|
||||
# use: deerflow.community.infoquest.tools:image_search_tool
|
||||
# # Used to limit the scope of image search results, only returns content within the specified time range. Set to -1 to disable time filtering
|
||||
# image_search_time_range: 10
|
||||
# # Image size filter. Options: "l" (large), "m" (medium), "i" (icon).
|
||||
# image_size: "i"
|
||||
|
||||
# File operations tools
|
||||
- name: ls
|
||||
group: file:read
|
||||
|
||||
Reference in New Issue
Block a user