diff --git a/backend/packages/harness/deerflow/community/infoquest/infoquest_client.py b/backend/packages/harness/deerflow/community/infoquest/infoquest_client.py index 88269bc..0fd6e8d 100644 --- a/backend/packages/harness/deerflow/community/infoquest/infoquest_client.py +++ b/backend/packages/harness/deerflow/community/infoquest/infoquest_client.py @@ -17,13 +17,15 @@ logger = logging.getLogger(__name__) class InfoQuestClient: """Client for interacting with the InfoQuest web search and fetch API.""" - def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1): + def __init__(self, fetch_time: int = -1, fetch_timeout: int = -1, fetch_navigation_timeout: int = -1, search_time_range: int = -1, image_search_time_range: int = -1, image_size: str = "i"): logger.info("\n============================================\nšŸš€ BytePlus InfoQuest Client Initialization šŸš€\n============================================") self.fetch_time = fetch_time self.fetch_timeout = fetch_timeout self.fetch_navigation_timeout = fetch_navigation_timeout self.search_time_range = search_time_range + self.image_search_time_range = image_search_time_range + self.image_size = image_size self.api_key_set = bool(os.getenv("INFOQUEST_API_KEY")) if logger.isEnabledFor(logging.DEBUG): config_details = ( @@ -32,6 +34,8 @@ class InfoQuestClient: f"ā”œā”€ā”€ Fetch Timeout: {fetch_timeout} {'(Default: No fetch timeout)' if fetch_timeout == -1 else '(Custom)'}\n" f"ā”œā”€ā”€ Navigation Timeout: {fetch_navigation_timeout} {'(Default: No Navigation Timeout)' if fetch_navigation_timeout == -1 else '(Custom)'}\n" f"ā”œā”€ā”€ Search Time Range: {search_time_range} {'(Default: No Search Time Range)' if search_time_range == -1 else '(Custom)'}\n" + f"ā”œā”€ā”€ Image Search Time Range: {image_search_time_range} {'(Default: No Image Search Time Range)' if image_search_time_range == -1 else '(Custom)'}\n" + f"ā”œā”€ā”€ Image Size: {image_size} {'(Default: Medium)' if image_size == 'm' else '(Custom)'}\n" f"└── API Key: {'āœ… Configured' if self.api_key_set else 'āŒ Not set'}" ) @@ -295,17 +299,106 @@ class InfoQuestClient: images_results = results["images_results"] for result in images_results: clean_result = {} - if "image_url" in result: - clean_result["image_url"] = result["image_url"] + if "original" in result: + clean_result["image_url"] = result["original"] url = clean_result["image_url"] if isinstance(url, str) and url and url not in seen_urls: seen_urls.add(url) clean_results.append(clean_result) counts["images"] += 1 - if "thumbnail_url" in result: - clean_result["thumbnail_url"] = result["thumbnail_url"] - if "url" in result: - clean_result["url"] = result["url"] + if "title" in result: + clean_result["title"] = result["title"] logger.debug(f"Results processing completed | total_results={len(clean_results)} | images={counts['images']} | unique_urls={len(seen_urls)}") return clean_results + + def image_search_raw_results( + self, + query: str, + site: str = "", + output_format: str = "JSON", + ) -> dict: + """Get image search results from the InfoQuest Web-Search API synchronously.""" + headers = self._prepare_headers() + + params = {"format": output_format, "query": query, "search_type": "Images"} + + # Add time_range filter if specified (1-365) + if 1 <= self.image_search_time_range <= 365: + params["time_range"] = self.image_search_time_range + elif self.image_search_time_range > 0: + logger.warning(f"time_range {self.image_search_time_range} is out of valid range (1-365), ignoring") + + # Add site filter if specified + if site: + params["site"] = site + + # Add image_size filter if specified + if self.image_size and self.image_size in ["l", "m", "i"]: + params["image_size"] = self.image_size + elif self.image_size: + logger.warning(f"image_size {self.image_size} is not valid, must be 'l', 'm', or 'i'") + + response = requests.post("https://search.infoquest.bytepluses.com", headers=headers, json=params) + response.raise_for_status() + + # Print partial response for debugging + response_json = response.json() + if logger.isEnabledFor(logging.DEBUG): + response_sample = json.dumps(response_json)[:200] + ("..." if len(json.dumps(response_json)) > 200 else "") + logger.debug(f"Image Search API request completed successfully | service=InfoQuest | status=success | response_sample={response_sample}") + + return response_json + + def image_search( + self, + query: str, + site: str = "", + output_format: str = "JSON", + ) -> str: + if logger.isEnabledFor(logging.DEBUG): + query_truncated = query[:50] + "..." if len(query) > 50 else query + logger.debug( + f"InfoQuest - Image Search API request initiated | " + f"operation=search images | " + f"query_truncated={query_truncated} | " + f"has_site_filter={bool(site)} | site={site} | " + f"image_search_time_range={self.image_search_time_range if self.image_search_time_range >= 1 and self.image_search_time_range <= 365 else 'default'} | " + f"image_size={self.image_size} |" + f"request_type=sync" + ) + + try: + logger.info("InfoQuest Image Search - Executing search with parameters") + raw_results = self.image_search_raw_results( + query, + site, + output_format, + ) + + if "search_result" in raw_results: + logger.debug("InfoQuest Image Search - Successfully extracted search_result from JSON response") + results = raw_results["search_result"] + + logger.debug(f"InfoQuest Image Search - Processing raw image search results: {results}") + cleaned_results = self.clean_results_with_image_search(results["results"]) + + result_json = json.dumps(cleaned_results, indent=2, ensure_ascii=False) + + logger.debug(f"InfoQuest Image Search - Image search tool execution completed | mode=synchronous | results_count={len(cleaned_results)}") + return result_json + + elif "content" in raw_results: + # Fallback to content field if search_result is not available + error_message = "image search API return wrong format" + logger.error("image search API return wrong format, no search_result nor content field found in JSON response, content: %s", raw_results["content"]) + return f"Error: {error_message}" + else: + # If neither field exists, return the original response + logger.warning("InfoQuest Image Search - Neither search_result nor content field found in JSON response") + return json.dumps(raw_results, indent=2, ensure_ascii=False) + + except Exception as e: + error_message = f"InfoQuest Image Search - Image search tool execution failed | mode=synchronous | error={str(e)}" + logger.error(error_message) + return f"Error: {error_message}" diff --git a/backend/packages/harness/deerflow/community/infoquest/tools.py b/backend/packages/harness/deerflow/community/infoquest/tools.py index bf1d77e..d05b5a8 100644 --- a/backend/packages/harness/deerflow/community/infoquest/tools.py +++ b/backend/packages/harness/deerflow/community/infoquest/tools.py @@ -13,6 +13,7 @@ def _get_infoquest_client() -> InfoQuestClient: search_time_range = -1 if search_config is not None and "search_time_range" in search_config.model_extra: search_time_range = search_config.model_extra.get("search_time_range") + fetch_config = get_app_config().get_tool_config("web_fetch") fetch_time = -1 if fetch_config is not None and "fetch_time" in fetch_config.model_extra: @@ -23,12 +24,24 @@ def _get_infoquest_client() -> InfoQuestClient: navigation_timeout = -1 if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra: navigation_timeout = fetch_config.model_extra.get("navigation_timeout") + + image_search_config = get_app_config().get_tool_config("image_search") + image_search_time_range = -1 + if image_search_config is not None and "image_search_time_range" in image_search_config.model_extra: + image_search_time_range = image_search_config.model_extra.get("image_search_time_range") + image_size = "i" + if image_search_config is not None and "image_size" in image_search_config.model_extra: + image_size = image_search_config.model_extra.get("image_size") + + return InfoQuestClient( search_time_range=search_time_range, fetch_timeout=fetch_timeout, fetch_navigation_timeout=navigation_timeout, fetch_time=fetch_time, + image_search_time_range=image_search_time_range, + image_size=image_size, ) @@ -61,3 +74,22 @@ def web_fetch_tool(url: str) -> str: return result article = readability_extractor.extract_article(result) return article.to_markdown()[:4096] + + +@tool("image_search", parse_docstring=True) +def image_search_tool(query: str) -> str: + """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy. + + **When to use:** + - Before generating character/portrait images: search for similar poses, expressions, styles + - Before generating specific objects/products: search for accurate visual references + - Before generating scenes/locations: search for architectural or environmental references + - Before generating fashion/clothing: search for style and detail references + + The returned image URLs can be used as reference images in image generation to significantly improve quality. + + Args: + query: The query to search for images. + """ + client = _get_infoquest_client() + return client.image_search(query) diff --git a/backend/tests/test_infoquest_client.py b/backend/tests/test_infoquest_client.py index 190444d..b5f09a2 100644 --- a/backend/tests/test_infoquest_client.py +++ b/backend/tests/test_infoquest_client.py @@ -123,16 +123,6 @@ class TestInfoQuestClient: assert cleaned[1]["type"] == "news" assert cleaned[1]["title"] == "Test News" - def test_clean_results_with_image_search(self): - """Test clean_results_with_image_search method with sample raw results.""" - raw_results = [{"content": {"results": {"images_results": [{"image_url": "https://example.com/image1.jpg", "thumbnail_url": "https://example.com/thumb1.jpg", "url": "https://example.com/page1"}]}}}] - cleaned = InfoQuestClient.clean_results_with_image_search(raw_results) - - assert len(cleaned) == 1 - assert cleaned[0]["image_url"] == "https://example.com/image1.jpg" - assert cleaned[0]["thumbnail_url"] == "https://example.com/thumb1.jpg" - assert cleaned[0]["url"] == "https://example.com/page1" - @patch("deerflow.community.infoquest.tools._get_infoquest_client") def test_web_search_tool(self, mock_get_client): """Test web_search_tool function.""" @@ -163,7 +153,12 @@ class TestInfoQuestClient: def test_get_infoquest_client(self, mock_get_app_config): """Test _get_infoquest_client function with config.""" mock_config = MagicMock() - mock_config.get_tool_config.side_effect = [MagicMock(model_extra={"search_time_range": 24}), MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60})] + # Add image_search config to the side_effect + mock_config.get_tool_config.side_effect = [ + MagicMock(model_extra={"search_time_range": 24}), # web_search config + MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60}), # web_fetch config + MagicMock(model_extra={"image_search_time_range": 7, "image_size": "l"}) # image_search config + ] mock_get_app_config.return_value = mock_config client = tools._get_infoquest_client() @@ -172,6 +167,8 @@ class TestInfoQuestClient: assert client.fetch_time == 10 assert client.fetch_timeout == 30 assert client.fetch_navigation_timeout == 60 + assert client.image_search_time_range == 7 + assert client.image_size == "l" @patch("deerflow.community.infoquest.infoquest_client.requests.post") def test_web_search_api_error(self, mock_post): @@ -182,3 +179,170 @@ class TestInfoQuestClient: result = client.web_search("test query") assert "Error" in result + + def test_clean_results_with_image_search(self): + """Test clean_results_with_image_search method with sample raw results.""" + raw_results = [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image 1", "url": "https://example.com/page1"}]}}}] + cleaned = InfoQuestClient.clean_results_with_image_search(raw_results) + + assert len(cleaned) == 1 + assert cleaned[0]["image_url"] == "https://example.com/image1.jpg" + assert cleaned[0]["title"] == "Test Image 1" + + def test_clean_results_with_image_search_empty(self): + """Test clean_results_with_image_search method with empty results.""" + raw_results = [{"content": {"results": {"images_results": []}}}] + cleaned = InfoQuestClient.clean_results_with_image_search(raw_results) + + assert len(cleaned) == 0 + + def test_clean_results_with_image_search_no_images(self): + """Test clean_results_with_image_search method with no images_results field.""" + raw_results = [{"content": {"results": {"organic": [{"title": "Test Page"}]}}}] + cleaned = InfoQuestClient.clean_results_with_image_search(raw_results) + + assert len(cleaned) == 0 + + +class TestImageSearch: + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_raw_results_success(self, mock_post): + """Test successful image_search_raw_results operation.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}} + mock_post.return_value = mock_response + + client = InfoQuestClient() + result = client.image_search_raw_results("test query") + + assert "search_result" in result + mock_post.assert_called_once() + args, kwargs = mock_post.call_args + assert args[0] == "https://search.infoquest.bytepluses.com" + assert kwargs["json"]["query"] == "test query" + + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_raw_results_with_parameters(self, mock_post): + """Test image_search_raw_results with all parameters.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}} + mock_post.return_value = mock_response + + client = InfoQuestClient(image_search_time_range=30, image_size="l") + client.image_search_raw_results(query="cat", site="unsplash.com", output_format="JSON") + + mock_post.assert_called_once() + args, kwargs = mock_post.call_args + assert kwargs["json"]["query"] == "cat" + assert kwargs["json"]["time_range"] == 30 + assert kwargs["json"]["site"] == "unsplash.com" + assert kwargs["json"]["image_size"] == "l" + assert kwargs["json"]["format"] == "JSON" + + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_raw_results_invalid_time_range(self, mock_post): + """Test image_search_raw_results with invalid time_range parameter.""" + mock_response = MagicMock() + mock_response.status_code = 200 + + mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": []}}}]}} + mock_post.return_value = mock_response + + # Create client with invalid time_range (should be ignored) + client = InfoQuestClient(image_search_time_range=400, image_size="x") + client.image_search_raw_results( + query="test", + site="", + ) + + mock_post.assert_called_once() + args, kwargs = mock_post.call_args + assert kwargs["json"]["query"] == "test" + assert "time_range" not in kwargs["json"] + assert "image_size" not in kwargs["json"] + + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_success(self, mock_post): + """Test successful image_search operation.""" + mock_response = MagicMock() + mock_response.status_code = 200 + + mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg", "title": "Test Image", "url": "https://example.com/page1"}]}}}]}} + mock_post.return_value = mock_response + + client = InfoQuestClient() + result = client.image_search("cat") + + # Check if result is a valid JSON string with expected content + result_data = json.loads(result) + + assert len(result_data) == 1 + + assert result_data[0]["image_url"] == "https://example.com/image1.jpg" + + assert result_data[0]["title"] == "Test Image" + + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_with_all_parameters(self, mock_post): + """Test image_search with all optional parameters.""" + mock_response = MagicMock() + mock_response.status_code = 200 + + mock_response.json.return_value = {"search_result": {"results": [{"content": {"results": {"images_results": [{"original": "https://example.com/image1.jpg"}]}}}]}} + mock_post.return_value = mock_response + + # Create client with image search parameters + client = InfoQuestClient(image_search_time_range=7, image_size="m") + client.image_search(query="dog", site="flickr.com", output_format="JSON") + + mock_post.assert_called_once() + args, kwargs = mock_post.call_args + assert kwargs["json"]["query"] == "dog" + assert kwargs["json"]["time_range"] == 7 + assert kwargs["json"]["site"] == "flickr.com" + assert kwargs["json"]["image_size"] == "m" + + @patch("deerflow.community.infoquest.infoquest_client.requests.post") + def test_image_search_api_error(self, mock_post): + """Test image_search operation with API error.""" + mock_post.side_effect = Exception("Connection error") + + client = InfoQuestClient() + result = client.image_search("test query") + + assert "Error" in result + + @patch("deerflow.community.infoquest.tools._get_infoquest_client") + def test_image_search_tool(self, mock_get_client): + """Test image_search_tool function.""" + mock_client = MagicMock() + mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}]) + mock_get_client.return_value = mock_client + + result = tools.image_search_tool.run({"query": "test query"}) + + # Check if result is a valid JSON string + result_data = json.loads(result) + assert len(result_data) == 1 + assert result_data[0]["image_url"] == "https://example.com/image1.jpg" + mock_get_client.assert_called_once() + mock_client.image_search.assert_called_once_with("test query") + + # In /Users/bytedance/python/deer-flowv2/deer-flow/backend/tests/test_infoquest_client.py + + @patch("deerflow.community.infoquest.tools._get_infoquest_client") + def test_image_search_tool_with_parameters(self, mock_get_client): + """Test image_search_tool function with all parameters (extra parameters will be ignored).""" + mock_client = MagicMock() + mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}]) + mock_get_client.return_value = mock_client + + # Pass all parameters as a dictionary (extra parameters will be ignored) + tools.image_search_tool.run({"query": "sunset", "time_range": 30, "site": "unsplash.com", "image_size": "l"}) + + mock_get_client.assert_called_once() + # image_search_tool only passes query to client.image_search + # site parameter is empty string by default + mock_client.image_search.assert_called_once_with("sunset") diff --git a/config.example.yaml b/config.example.yaml index 0d79cf7..44ebb0d 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -181,7 +181,7 @@ tools: max_results: 5 # api_key: $TAVILY_API_KEY # Set if needed - # Web search tool (requires InfoQuest API key) + # Web search tool (uses InfoQuest, requires InfoQuest API key) # - name: web_search # group: web # use: deerflow.community.infoquest.tools:web_search_tool @@ -194,7 +194,7 @@ tools: use: deerflow.community.jina_ai.tools:web_fetch_tool timeout: 10 - # Web fetch tool (uses InfoQuest AI reader) + # Web fetch tool (uses InfoQuest) # - name: web_fetch # group: web # use: deerflow.community.infoquest.tools:web_fetch_tool @@ -212,6 +212,15 @@ tools: use: deerflow.community.image_search.tools:image_search_tool max_results: 5 + # Image search tool (uses InfoQuest) + # - name: image_search + # group: web + # use: deerflow.community.infoquest.tools:image_search_tool + # # Used to limit the scope of image search results, only returns content within the specified time range. Set to -1 to disable time filtering + # image_search_time_range: 10 + # # Image size filter. Options: "l" (large), "m" (medium), "i" (icon). + # image_size: "i" + # File operations tools - name: ls group: file:read