infoquest support image-search (#1255)

This commit is contained in:
infoquest-byteplus
2026-03-23 17:06:56 +08:00
committed by GitHub
parent 38ace61617
commit f6c54e0308
4 changed files with 318 additions and 20 deletions

View File

@@ -17,13 +17,15 @@ logger = logging.getLogger(__name__)
class InfoQuestClient:
"""Client for interacting with the InfoQuest web search and fetch API."""
def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1):
def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1, image_search_time_range: int = -1, image_size: str = "i"):
logger.info("\n============================================\n🚀 BytePlus InfoQuest Client Initialization 🚀\n============================================")
self.fetch_time = fetch_time
self.fetch_timeout = fetch_timeout
self.fetch_navigation_timeout = fetch_navigation_timeout
self.search_time_range = search_time_range
self.image_search_time_range = image_search_time_range
self.image_size = image_size
self.api_key_set = bool(os.getenv("INFOQUEST_API_KEY"))
if logger.isEnabledFor(logging.DEBUG):
config_details = (
@@ -32,6 +34,8 @@ class InfoQuestClient:
f"├── Fetch Timeout: {fetch_timeout} {'(Default: No fetch timeout)' if fetch_timeout == -1 else '(Custom)'}\n"
f"├── Navigation Timeout: {fetch_navigation_timeout} {'(Default: No Navigation Timeout)' if fetch_navigation_timeout == -1 else '(Custom)'}\n"
f"├── Search Time Range: {search_time_range} {'(Default: No Search Time Range)' if search_time_range == -1 else '(Custom)'}\n"
f"├── Image Search Time Range: {image_search_time_range} {'(Default: No Image Search Time Range)' if image_search_time_range == -1 else '(Custom)'}\n"
f"├── Image Size: {image_size} {'(Default: Medium)' if image_size == 'm' else '(Custom)'}\n"
f"└── API Key: {'✅ Configured' if self.api_key_set else '❌ Not set'}"
)
@@ -295,17 +299,106 @@ class InfoQuestClient:
images_results = results["images_results"]
for result in images_results:
clean_result = {}
if "image_url" in result:
clean_result["image_url"] = result["image_url"]
if "original" in result:
clean_result["image_url"] = result["original"]
url = clean_result["image_url"]
if isinstance(url, str) and url and url not in seen_urls:
seen_urls.add(url)
clean_results.append(clean_result)
counts["images"] += 1
if "thumbnail_url" in result:
clean_result["thumbnail_url"] = result["thumbnail_url"]
if "url" in result:
clean_result["url"] = result["url"]
if "title" in result:
clean_result["title"] = result["title"]
logger.debug(f"Results processing completed | total_results={len(clean_results)} | images={counts['images']} | unique_urls={len(seen_urls)}")
return clean_results
def image_search_raw_results(
self,
query: str,
site: str = "",
output_format: str = "JSON",
) -> dict:
"""Get image search results from the InfoQuest Web-Search API synchronously."""
headers = self._prepare_headers()
params = {"format": output_format, "query": query, "search_type": "Images"}
# Add time_range filter if specified (1-365)
if 1 <= self.image_search_time_range <= 365:
params["time_range"] = self.image_search_time_range
elif self.image_search_time_range > 0:
logger.warning(f"time_range {self.image_search_time_range} is out of valid range (1-365), ignoring")
# Add site filter if specified
if site:
params["site"] = site
# Add image_size filter if specified
if self.image_size and self.image_size in ["l", "m", "i"]:
params["image_size"] = self.image_size
elif self.image_size:
logger.warning(f"image_size {self.image_size} is not valid, must be 'l', 'm', or 'i'")
response = requests.post("https://search.infoquest.bytepluses.com", headers=headers, json=params)
response.raise_for_status()
# Print partial response for debugging
response_json = response.json()
if logger.isEnabledFor(logging.DEBUG):
response_sample = json.dumps(response_json)[:200] + ("..." if len(json.dumps(response_json)) > 200 else "")
logger.debug(f"Image Search API request completed successfully | service=InfoQuest | status=success | response_sample={response_sample}")
return response_json
def image_search(
self,
query: str,
site: str = "",
output_format: str = "JSON",
) -> str:
if logger.isEnabledFor(logging.DEBUG):
query_truncated = query[:50] + "..." if len(query) > 50 else query
logger.debug(
f"InfoQuest - Image Search API request initiated | "
f"operation=search images | "
f"query_truncated={query_truncated} | "
f"has_site_filter={bool(site)} | site={site} | "
f"image_search_time_range={self.image_search_time_range if self.image_search_time_range >= 1 and self.image_search_time_range <= 365 else 'default'} | "
f"image_size={self.image_size} |"
f"request_type=sync"
)
try:
logger.info("InfoQuest Image Search - Executing search with parameters")
raw_results = self.image_search_raw_results(
query,
site,
output_format,
)
if "search_result" in raw_results:
logger.debug("InfoQuest Image Search - Successfully extracted search_result from JSON response")
results = raw_results["search_result"]
logger.debug(f"InfoQuest Image Search - Processing raw image search results: {results}")
cleaned_results = self.clean_results_with_image_search(results["results"])
result_json = json.dumps(cleaned_results, indent=2, ensure_ascii=False)
logger.debug(f"InfoQuest Image Search - Image search tool execution completed | mode=synchronous | results_count={len(cleaned_results)}")
return result_json
elif "content" in raw_results:
# Fallback to content field if search_result is not available
error_message = "image search API return wrong format"
logger.error("image search API return wrong format, no search_result nor content field found in JSON response, content: %s", raw_results["content"])
return f"Error: {error_message}"
else:
# If neither field exists, return the original response
logger.warning("InfoQuest Image Search - Neither search_result nor content field found in JSON response")
return json.dumps(raw_results, indent=2, ensure_ascii=False)
except Exception as e:
error_message = f"InfoQuest Image Search - Image search tool execution failed | mode=synchronous | error={str(e)}"
logger.error(error_message)
return f"Error: {error_message}"

View File

@@ -13,6 +13,7 @@ def _get_infoquest_client() -> InfoQuestClient:
search_time_range = -1
if search_config is not None and "search_time_range" in search_config.model_extra:
search_time_range = search_config.model_extra.get("search_time_range")
fetch_config = get_app_config().get_tool_config("web_fetch")
fetch_time = -1
if fetch_config is not None and "fetch_time" in fetch_config.model_extra:
@@ -23,12 +24,24 @@ def _get_infoquest_client() -> InfoQuestClient:
navigation_timeout = -1
if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra:
navigation_timeout = fetch_config.model_extra.get("navigation_timeout")
image_search_config = get_app_config().get_tool_config("image_search")
image_search_time_range = -1
if image_search_config is not None and "image_search_time_range" in image_search_config.model_extra:
image_search_time_range = image_search_config.model_extra.get("image_search_time_range")
image_size = "i"
if image_search_config is not None and "image_size" in image_search_config.model_extra:
image_size = image_search_config.model_extra.get("image_size")
return InfoQuestClient(
search_time_range=search_time_range,
fetch_timeout=fetch_timeout,
fetch_navigation_timeout=navigation_timeout,
fetch_time=fetch_time,
image_search_time_range=image_search_time_range,
image_size=image_size,
)
@@ -61,3 +74,22 @@ def web_fetch_tool(url: str) -> str:
return result
article = readability_extractor.extract_article(result)
return article.to_markdown()[:4096]
@tool("image_search", parse_docstring=True)
def image_search_tool(query: str) -> str:
"""Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
**When to use:**
- Before generating character/portrait images: search for similar poses, expressions, styles
- Before generating specific objects/products: search for accurate visual references
- Before generating scenes/locations: search for architectural or environmental references
- Before generating fashion/clothing: search for style and detail references
The returned image URLs can be used as reference images in image generation to significantly improve quality.
Args:
query: The query to search for images.
"""
client = _get_infoquest_client()
return client.image_search(query)

View File

@@ -123,16 +123,6 @@ class TestInfoQuestClient:
assert cleaned[1]["type"] == "news"
assert cleaned[1]["title"] == "Test News"
def test_clean_results_with_image_search(self):
"""Test clean_results_with_image_search method with sample raw results."""
raw_results = [{"content": {"results": {"images_results": [{"image_url": "https://example.com/image1.jpg", "thumbnail_url": "https://example.com/thumb1.jpg", "url": "https://example.com/page1"}]}}}]
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
assert len(cleaned) == 1
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
assert cleaned[0]["thumbnail_url"] == "https://example.com/thumb1.jpg"
assert cleaned[0]["url"] == "https://example.com/page1"
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
def test_web_search_tool(self, mock_get_client):
"""Test web_search_tool function."""
@@ -163,7 +153,12 @@ class TestInfoQuestClient:
def test_get_infoquest_client(self, mock_get_app_config):
"""Test _get_infoquest_client function with config."""
mock_config = MagicMock()
mock_config.get_tool_config.side_effect = [MagicMock(model_extra={"search_time_range": 24}), MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60})]
# Add image_search config to the side_effect
mock_config.get_tool_config.side_effect = [
MagicMock(model_extra={"search_time_range": 24}), # web_search config
MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60}), # web_fetch config
MagicMock(model_extra={"image_search_time_range": 7, "image_size": "l"}) # image_search config
]
mock_get_app_config.return_value = mock_config
client = tools._get_infoquest_client()
@@ -172,6 +167,8 @@ class TestInfoQuestClient:
assert client.fetch_time == 10
assert client.fetch_timeout == 30
assert client.fetch_navigation_timeout == 60
assert client.image_search_time_range == 7
assert client.image_size == "l"
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_web_search_api_error(self, mock_post):
@@ -182,3 +179,170 @@ class TestInfoQuestClient:
result = client.web_search("test query")
assert "Error" in result
def test_clean_results_with_image_search(self):
"""Test clean_results_with_image_search method with sample raw results."""
raw_results = [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image 1", "url": "https://example.com/page1"}]}}}]
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
assert len(cleaned) == 1
assert cleaned[0]["image_url"] == "https://example.com/image1.jpg"
assert cleaned[0]["title"] == "Test Image 1"
def test_clean_results_with_image_search_empty(self):
"""Test clean_results_with_image_search method with empty results."""
raw_results = [{"content": {"results": {"images_results": []}}}]
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
assert len(cleaned) == 0
def test_clean_results_with_image_search_no_images(self):
"""Test clean_results_with_image_search method with no images_results field."""
raw_results = [{"content": {"results": {"organic": [{"title": "Test Page"}]}}}]
cleaned = InfoQuestClient.clean_results_with_image_search(raw_results)
assert len(cleaned) == 0
class TestImageSearch:
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_raw_results_success(self, mock_post):
"""Test successful image_search_raw_results operation."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
mock_post.return_value = mock_response
client = InfoQuestClient()
result = client.image_search_raw_results("test query")
assert "search_result" in result
mock_post.assert_called_once()
args, kwargs = mock_post.call_args
assert args[0] == "https://search.infoquest.bytepluses.com"
assert kwargs["json"]["query"] == "test query"
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_raw_results_with_parameters(self, mock_post):
"""Test image_search_raw_results with all parameters."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
mock_post.return_value = mock_response
client = InfoQuestClient(image_search_time_range=30, image_size="l")
client.image_search_raw_results(query="cat", site="unsplash.com", output_format="JSON")
mock_post.assert_called_once()
args, kwargs = mock_post.call_args
assert kwargs["json"]["query"] == "cat"
assert kwargs["json"]["time_range"] == 30
assert kwargs["json"]["site"] == "unsplash.com"
assert kwargs["json"]["image_size"] == "l"
assert kwargs["json"]["format"] == "JSON"
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_raw_results_invalid_time_range(self, mock_post):
"""Test image_search_raw_results with invalid time_range parameter."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": []}}}]}}
mock_post.return_value = mock_response
# Create client with invalid time_range (should be ignored)
client = InfoQuestClient(image_search_time_range=400, image_size="x")
client.image_search_raw_results(
query="test",
site="",
)
mock_post.assert_called_once()
args, kwargs = mock_post.call_args
assert kwargs["json"]["query"] == "test"
assert "time_range" not in kwargs["json"]
assert "image_size" not in kwargs["json"]
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_success(self, mock_post):
"""Test successful image_search operation."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}}
mock_post.return_value = mock_response
client = InfoQuestClient()
result = client.image_search("cat")
# Check if result is a valid JSON string with expected content
result_data = json.loads(result)
assert len(result_data) == 1
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
assert result_data[0]["title"] == "Test Image"
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_with_all_parameters(self, mock_post):
"""Test image_search with all optional parameters."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}}
mock_post.return_value = mock_response
# Create client with image search parameters
client = InfoQuestClient(image_search_time_range=7, image_size="m")
client.image_search(query="dog", site="flickr.com", output_format="JSON")
mock_post.assert_called_once()
args, kwargs = mock_post.call_args
assert kwargs["json"]["query"] == "dog"
assert kwargs["json"]["time_range"] == 7
assert kwargs["json"]["site"] == "flickr.com"
assert kwargs["json"]["image_size"] == "m"
@patch("deerflow.community.infoquest.infoquest_client.requests.post")
def test_image_search_api_error(self, mock_post):
"""Test image_search operation with API error."""
mock_post.side_effect = Exception("Connection error")
client = InfoQuestClient()
result = client.image_search("test query")
assert "Error" in result
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
def test_image_search_tool(self, mock_get_client):
"""Test image_search_tool function."""
mock_client = MagicMock()
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
mock_get_client.return_value = mock_client
result = tools.image_search_tool.run({"query": "test query"})
# Check if result is a valid JSON string
result_data = json.loads(result)
assert len(result_data) == 1
assert result_data[0]["image_url"] == "https://example.com/image1.jpg"
mock_get_client.assert_called_once()
mock_client.image_search.assert_called_once_with("test query")
# In /Users/bytedance/python/deer-flowv2/deer-flow/backend/tests/test_infoquest_client.py
@patch("deerflow.community.infoquest.tools._get_infoquest_client")
def test_image_search_tool_with_parameters(self, mock_get_client):
"""Test image_search_tool function with all parameters (extra parameters will be ignored)."""
mock_client = MagicMock()
mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
mock_get_client.return_value = mock_client
# Pass all parameters as a dictionary (extra parameters will be ignored)
tools.image_search_tool.run({"query": "sunset", "time_range": 30, "site": "unsplash.com", "image_size": "l"})
mock_get_client.assert_called_once()
# image_search_tool only passes query to client.image_search
# site parameter is empty string by default
mock_client.image_search.assert_called_once_with("sunset")

View File

@@ -181,7 +181,7 @@ tools:
max_results: 5
# api_key: $TAVILY_API_KEY # Set if needed
# Web search tool (requires InfoQuest API key)
# Web search tool (uses InfoQuest, requires InfoQuest API key)
# - name: web_search
# group: web
# use: deerflow.community.infoquest.tools:web_search_tool
@@ -194,7 +194,7 @@ tools:
use: deerflow.community.jina_ai.tools:web_fetch_tool
timeout: 10
# Web fetch tool (uses InfoQuest AI reader)
# Web fetch tool (uses InfoQuest)
# - name: web_fetch
# group: web
# use: deerflow.community.infoquest.tools:web_fetch_tool
@@ -212,6 +212,15 @@ tools:
use: deerflow.community.image_search.tools:image_search_tool
max_results: 5
# Image search tool (uses InfoQuest)
# - name: image_search
# group: web
# use: deerflow.community.infoquest.tools:image_search_tool
# # Used to limit the scope of image search results, only returns content within the specified time range. Set to -1 to disable time filtering
# image_search_time_range: 10
# # Image size filter. Options: "l" (large), "m" (medium), "i" (icon).
# image_size: "i"
# File operations tools
- name: ls
group: file:read