feat: support infoquest (#708)

* support infoquest * support html checker * support html checker * change line break format * change line break format * change line break format * change line break format * change line break format * change line break format * change line break format * change line break format * Fix several critical issues in the codebase - Resolve crawler panic by improving error handling - Fix plan validation to prevent invalid configurations - Correct InfoQuest crawler JSON conversion logic * add test for infoquest * add test for infoquest * Add InfoQuest introduction to the README * add test for infoquest * fix readme for infoquest * fix readme for infoquest * resolve the conflict * resolve the conflict * resolve the conflict * Fix formatting of INFOQUEST in SearchEngine enum * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-15 03:04:44 +08:00 · 2025-12-02 08:16:35 +08:00
parent e179fb1632
commit 7ec9e45702
22 changed files with 2103 additions and 94 deletions
--- a/src/config/tools.py
+++ b/src/config/tools.py
@@ -11,6 +11,7 @@ load_dotenv()

 class SearchEngine(enum.Enum):
    TAVILY = "tavily"
+    INFOQUEST = "infoquest"
    DUCKDUCKGO = "duckduckgo"
    BRAVE_SEARCH = "brave_search"
    ARXIV = "arxiv"
@@ -18,10 +19,14 @@ class SearchEngine(enum.Enum):
    WIKIPEDIA = "wikipedia"


+class CrawlerEngine(enum.Enum):
+    JINA = "jina"
+    INFOQUEST = "infoquest"
+
+
 # Tool configuration
 SELECTED_SEARCH_ENGINE = os.getenv("SEARCH_API", SearchEngine.TAVILY.value)

-
 class RAGProvider(enum.Enum):
    DIFY = "dify"
    RAGFLOW = "ragflow"
--- a/src/crawler/crawler.py
+++ b/src/crawler/crawler.py
@@ -4,9 +4,12 @@
 import re
 import logging

-from .article import Article
-from .jina_client import JinaClient
-from .readability_extractor import ReadabilityExtractor
+from src.config.tools import CrawlerEngine
+from src.config import load_yaml_config
+from src.crawler.article import Article
+from src.crawler.infoquest_client import InfoQuestClient
+from src.crawler.jina_client import JinaClient
+from src.crawler.readability_extractor import ReadabilityExtractor

 logger = logging.getLogger(__name__)

@@ -14,11 +17,11 @@ logger = logging.getLogger(__name__)
 def safe_truncate(text: str, max_length: int = 500) -> str:
    """
    Safely truncate text to a maximum length without breaking multi-byte characters.
-    
+
    Args:
        text: The text to truncate
        max_length: Maximum number of characters to keep
-        
+
    Returns:
        Truncated text that is safe to use without encoding issues
    """
@@ -49,7 +52,7 @@ def safe_truncate(text: str, max_length: int = 500) -> str:
 def is_html_content(content: str) -> bool:
    """
    Check if the provided content is HTML.
-    
+
    Uses a more robust detection method that checks for common HTML patterns
    including DOCTYPE declarations, HTML tags, and other HTML markers.
    """
@@ -138,17 +141,21 @@ class Crawler:
        # them into text and image blocks for one single and unified
        # LLM message.
        #
-        # Jina is not the best crawler on readability, however it's
-        # much easier and free to use.
+        # The system supports multiple crawler engines:
+        # - Jina: An accessible solution, though with some limitations in readability extraction
+        # - InfoQuest: A BytePlus product offering advanced capabilities with configurable parameters
+        #   like fetch_time, timeout, and navi_timeout.
        #
        # Instead of using Jina's own markdown converter, we'll use
        # our own solution to get better readability results.
-        try:
-            jina_client = JinaClient()
-            html = jina_client.crawl(url, return_format="html")
-        except Exception as e:
-            logger.error(f"Failed to fetch URL {url} from Jina: {repr(e)}")
-            raise
+        
+        # Get crawler configuration
+        config = load_yaml_config("conf.yaml")
+        crawler_config = config.get("CRAWLER_ENGINE", {})
+        
+        # Get the selected crawler tool based on configuration
+        crawler_client = self._select_crawler_tool(crawler_config)
+        html = self._crawl_with_tool(crawler_client, url)
        
        # Check if we got valid HTML content
        if not html or not html.strip():
@@ -186,3 +193,44 @@ class Crawler:
        
        article.url = url
        return article
+    
+    def _select_crawler_tool(self, crawler_config: dict):
+        # Only check engine from configuration file
+        engine = crawler_config.get("engine", CrawlerEngine.JINA.value)
+        
+        if engine == CrawlerEngine.JINA.value:
+            logger.info(f"Selecting Jina crawler engine")
+            return JinaClient()
+        elif engine == CrawlerEngine.INFOQUEST.value:
+            logger.info(f"Selecting InfoQuest crawler engine")
+            # Read timeout parameters directly from crawler_config root level
+            # These parameters are only effective when engine is set to "infoquest"
+            fetch_time = crawler_config.get("fetch_time", -1)
+            timeout = crawler_config.get("timeout", -1)
+            navi_timeout = crawler_config.get("navi_timeout", -1)
+            
+            # Log the configuration being used
+            if fetch_time > 0 or timeout > 0 or navi_timeout > 0:
+                logger.debug(
+                    f"Initializing InfoQuestCrawler with parameters: "
+                    f"fetch_time={fetch_time}, "
+                    f"timeout={timeout}, "
+                    f"navi_timeout={navi_timeout}"
+                )
+            
+            # Initialize InfoQuestClient with the parameters from configuration
+            return InfoQuestClient(
+                fetch_time=fetch_time,
+                timeout=timeout,
+                navi_timeout=navi_timeout
+            )
+        else:
+            raise ValueError(f"Unsupported crawler engine: {engine}")
+    
+    def _crawl_with_tool(self, crawler_client, url: str) -> str:
+        logger.info(f"Crawling URL: {url} using {crawler_client.__class__.__name__}")
+        try:
+            return crawler_client.crawl(url, return_format="html")
+        except Exception as e:
+            logger.error(f"Failed to fetch URL {url} using {crawler_client.__class__.__name__}: {repr(e)}")
+            raise
--- a/src/crawler/infoquest_client.py
+++ b/src/crawler/infoquest_client.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+"""Util that calls InfoQuest Crawler API.
+
+In order to set this up, follow instructions at:
+https://docs.byteplus.com/en/docs/InfoQuest/What_is_Info_Quest
+"""
+
+import json
+import logging
+import os
+from typing import Dict, Any
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+class InfoQuestClient:
+    """Client for interacting with the InfoQuest web crawling API."""
+    
+    def __init__(self, fetch_time: int = -1, timeout: int = -1, navi_timeout: int = -1):
+        logger.info(
+            "\n============================================\n"
+            "🚀 BytePlus InfoQuest Crawler Initialization 🚀\n"
+            "============================================"
+        )
+        
+        self.fetch_time = fetch_time
+        self.timeout = timeout
+        self.navi_timeout = navi_timeout
+        self.api_key_set = bool(os.getenv("INFOQUEST_API_KEY"))
+        
+        config_details = (
+            f"\n📋 Configuration Details:\n"
+            f"├── Fetch Timeout: {fetch_time} {'(Default: No timeout)' if fetch_time == -1 else '(Custom)'}\n"
+            f"├── Timeout: {timeout} {'(Default: No timeout)' if timeout == -1 else '(Custom)'}\n"
+            f"├── Navigation Timeout: {navi_timeout} {'(Default: No timeout)' if navi_timeout == -1 else '(Custom)'}\n"
+            f"└── API Key: {'✅ Configured' if self.api_key_set else '❌ Not set'}"
+        )
+        
+        logger.info(config_details)
+        logger.info("\n" + "*" * 70 + "\n")
+    
+    def crawl(self, url: str, return_format: str = "html") -> str:
+        logger.debug("Preparing request for URL: %s", url)
+        
+        # Prepare headers
+        headers = self._prepare_headers()
+        
+        # Prepare request data
+        data = self._prepare_request_data(url, return_format)
+        
+        # Log request details
+        logger.debug(
+            "InfoQuest Crawler request prepared: endpoint=https://reader.infoquest.bytepluses.com, "
+            "format=%s, has_api_key=%s",
+            data.get("format"), self.api_key_set
+        )
+        
+        logger.debug("Sending crawl request to InfoQuest API")
+        try:
+            response = requests.post(
+                "https://reader.infoquest.bytepluses.com",
+                headers=headers,
+                json=data
+            )
+            
+            # Check if status code is not 200
+            if response.status_code != 200:
+                error_message = f"InfoQuest API returned status {response.status_code}: {response.text}"
+                logger.error(error_message)
+                return f"Error: {error_message}"
+            
+            # Check for empty response
+            if not response.text or not response.text.strip():
+                error_message = "InfoQuest Crawler API returned empty response"
+                logger.error("BytePlus InfoQuest Crawler returned empty response for URL: %s", url)
+                return f"Error: {error_message}"
+                
+            # Try to parse response as JSON and extract reader_result
+            try:
+                response_data = json.loads(response.text)
+                # Extract reader_result if it exists
+                if "reader_result" in response_data:
+                    logger.debug("Successfully extracted reader_result from JSON response")
+                    return response_data["reader_result"]
+                elif "content" in response_data:
+                    # Fallback to content field if reader_result is not available
+                    logger.debug("Using content field as fallback")
+                    return response_data["content"]
+                else:
+                    # If neither field exists, return the original response
+                    logger.warning("Neither reader_result nor content field found in JSON response")
+            except json.JSONDecodeError:
+                # If response is not JSON, return the original text
+                logger.debug("Response is not in JSON format, returning as-is")
+                
+            # Print partial response for debugging
+            if logger.isEnabledFor(logging.DEBUG):
+                response_sample = response.text[:200] + ("..." if len(response.text) > 200 else "")
+                logger.debug(
+                    "Successfully received response, content length: %d bytes, first 200 chars: %s",
+                    len(response.text), response_sample
+                )
+            return response.text
+        except Exception as e:
+            error_message = f"Request to InfoQuest API failed: {str(e)}"
+            logger.error(error_message)
+            return f"Error: {error_message}"
+    
+    def _prepare_headers(self) -> Dict[str, str]:
+        """Prepare request headers."""
+        headers = {
+            "Content-Type": "application/json",
+        }
+        
+        # Add API key if available
+        if os.getenv("INFOQUEST_API_KEY"):
+            headers["Authorization"] = f"Bearer {os.getenv('INFOQUEST_API_KEY')}"
+            logger.debug("API key added to request headers")
+        else:
+            logger.warning(
+                "InfoQuest API key is not set. Provide your own key for authentication."
+            )
+        
+        return headers
+    
+    def _prepare_request_data(self, url: str, return_format: str) -> Dict[str, Any]:
+        """Prepare request data with formatted parameters."""
+        # Normalize return_format
+        if return_format and return_format.lower() == "html":
+            normalized_format = "HTML"
+        else:
+            normalized_format = return_format
+        
+        data = {"url": url, "format": normalized_format}
+        
+        # Add timeout parameters if set to positive values
+        timeout_params = {}
+        if self.fetch_time > 0:
+            timeout_params["fetch_time"] = self.fetch_time
+        if self.timeout > 0:
+            timeout_params["timeout"] = self.timeout
+        if self.navi_timeout > 0:
+            timeout_params["navi_timeout"] = self.navi_timeout
+        
+        # Log applied timeout parameters
+        if timeout_params:
+            logger.debug("Applying timeout parameters: %s", timeout_params)
+            data.update(timeout_params)
+        
+        return data
--- a/src/crawler/jina_client.py
+++ b/src/crawler/jina_client.py
@@ -22,12 +22,21 @@ class JinaClient:
                "Jina API key is not set. Provide your own key to access a higher rate limit. See https://jina.ai/reader for more information."
            )
        data = {"url": url}
-        response = requests.post("https://r.jina.ai/", headers=headers, json=data)
-        
-        if response.status_code != 200:
-            raise ValueError(f"Jina API returned status {response.status_code}: {response.text}")
-        
-        if not response.text or not response.text.strip():
-            raise ValueError("Jina API returned empty response")
-        
-        return response.text
+        try:
+            response = requests.post("https://r.jina.ai/", headers=headers, json=data)
+            
+            if response.status_code != 200:
+                error_message = f"Jina API returned status {response.status_code}: {response.text}"
+                logger.error(error_message)
+                return f"Error: {error_message}"
+            
+            if not response.text or not response.text.strip():
+                error_message = "Jina API returned empty response"
+                logger.error(error_message)
+                return f"Error: {error_message}"
+            
+            return response.text
+        except Exception as e:
+            error_message = f"Request to Jina API failed: {str(e)}"
+            logger.error(error_message)
+            return f"Error: {error_message}"
--- a/src/tools/infoquest_search/init.py
+++ b/src/tools/infoquest_search/init.py
@@ -0,0 +1,4 @@
+from .infoquest_search_api import InfoQuestAPIWrapper
+from .infoquest_search_results import InfoQuestSearchResults
+
+__all__ = ["InfoQuestAPIWrapper", "InfoQuestSearchResults"]
--- a/src/tools/infoquest_search/infoquest_search_api.py
+++ b/src/tools/infoquest_search/infoquest_search_api.py
@@ -0,0 +1,232 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+"""Util that calls InfoQuest Search API.
+
+In order to set this up, follow instructions at:
+https://docs.byteplus.com/en/docs/InfoQuest/What_is_Info_Quest
+"""
+
+import json
+from typing import Any, Dict, List
+
+import aiohttp
+import requests
+from langchain_core.utils import get_from_dict_or_env
+from pydantic import BaseModel, ConfigDict, SecretStr, model_validator
+from src.config import load_yaml_config
+import logging
+
+logger = logging.getLogger(__name__)
+
+INFOQUEST_API_URL = "https://search.infoquest.bytepluses.com"
+
+def get_search_config():
+    config = load_yaml_config("conf.yaml")
+    search_config = config.get("SEARCH_ENGINE", {})
+    return search_config
+
+class InfoQuestAPIWrapper(BaseModel):
+    """Wrapper for InfoQuest Search API."""
+
+    infoquest_api_key: SecretStr
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_environment(cls, values: Dict) -> Any:
+        """Validate that api key and endpoint exists in environment."""
+        logger.info("Initializing BytePlus InfoQuest Product - Search API client")
+
+        infoquest_api_key = get_from_dict_or_env(
+            values, "infoquest_api_key", "INFOQUEST_API_KEY"
+        )
+        values["infoquest_api_key"] = infoquest_api_key
+
+        logger.info("BytePlus InfoQuest Product - Environment validation successful")
+        return values
+
+    def raw_results(
+        self,
+        query: str,
+        time_range: int,
+        site: str,
+        output_format: str = "JSON",
+    ) -> Dict:
+        """Get results from the InfoQuest Search API synchronously."""
+        if logger.isEnabledFor(logging.DEBUG):
+            query_truncated = query[:50] + "..." if len(query) > 50 else query
+            logger.debug(
+                f"InfoQuest - Search API request initiated | "
+                f"operation=search | "
+                f"query_truncated={query_truncated} | "
+                f"has_time_filter={time_range > 0} | "
+                f"has_site_filter={bool(site)} | "
+                f"request_type=sync"
+            )
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.infoquest_api_key.get_secret_value()}",
+        }
+
+        params = {
+            "format": output_format,
+            "query": query
+        }
+        if time_range > 0:
+            params["time_range"] = time_range
+            logger.debug(f"InfoQuest - Applying time range filter: time_range_days={time_range}")
+
+        if site != "":
+            params["site"] = site
+            logger.debug(f"InfoQuest - Applying site filter: site={site}")
+
+        response = requests.post(
+            f"{INFOQUEST_API_URL}",
+            headers=headers,
+            json=params
+        )
+        response.raise_for_status()
+
+        # Print partial response for debugging
+        response_json = response.json()
+        if logger.isEnabledFor(logging.DEBUG):
+            response_sample = json.dumps(response_json)[:200] + ("..." if len(json.dumps(response_json)) > 200 else "")
+            logger.debug(
+                f"Search API request completed successfully | "
+                f"service=InfoQuest | "
+                f"status=success | "
+                f"response_sample={response_sample}"
+            )
+
+        return response_json["search_result"]
+
+    async def raw_results_async(
+        self,
+        query: str,
+        time_range: int,
+        site: str,
+        output_format: str = "JSON",
+    ) -> Dict:
+        """Get results from the InfoQuest Search API asynchronously."""
+
+        if logger.isEnabledFor(logging.DEBUG):
+            query_truncated = query[:50] + "..." if len(query) > 50 else query
+            logger.debug(
+                f"BytePlus InfoQuest - Search API async request initiated | "
+                f"operation=search | "
+                f"query_truncated={query_truncated} | "
+                f"has_time_filter={time_range > 0} | "
+                f"has_site_filter={bool(site)} | "
+                f"request_type=async"
+            )
+        # Function to perform the API call
+        async def fetch() -> str:
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.infoquest_api_key.get_secret_value()}",
+            }
+            params = {
+                "format": output_format,
+                "query": query,
+            }
+            if time_range > 0:
+                params["time_range"] = time_range
+                logger.debug(f"Applying time range filter in async request: {time_range} days")
+            if site != "":
+                params["site"] = site
+                logger.debug(f"Applying site filter in async request: {site}")
+
+            async with aiohttp.ClientSession(trust_env=True) as session:
+                async with session.post(f"{INFOQUEST_API_URL}", headers=headers, json=params) as res:
+                    if res.status == 200:
+                        data = await res.text()
+                        return data
+                    else:
+                        raise Exception(f"Error {res.status}: {res.reason}")
+        results_json_str = await fetch()
+
+        # Print partial response for debugging
+        if logger.isEnabledFor(logging.DEBUG):
+            response_sample = results_json_str[:200] + ("..." if len(results_json_str) > 200 else "")
+            logger.debug(
+                f"Async search API request completed successfully | "
+                f"service=InfoQuest | "
+                f"status=success | "
+                f"response_sample={response_sample}"
+            )
+        return json.loads(results_json_str)["search_result"]
+
+    def clean_results_with_images(
+        self, raw_results: List[Dict[str, Dict[str, Dict[str, Any]]]]
+    ) -> List[Dict]:
+        """Clean results from InfoQuest Search API."""
+        logger.debug("Processing search results")
+
+        seen_urls = set()
+        clean_results = []
+        counts = {"pages": 0, "news": 0, "images": 0}
+
+        for content_list in raw_results:
+            content = content_list["content"]
+            results = content["results"]
+
+
+            if results.get("organic"):
+                organic_results = results["organic"]
+                for result in organic_results:
+                    clean_result = {
+                        "type": "page",
+                        "title": result["title"],
+                        "url": result["url"],
+                        "desc": result["desc"],
+                    }
+                    url = clean_result["url"]
+                    if isinstance(url, str) and url and url not in seen_urls:
+                        seen_urls.add(url)
+                        clean_results.append(clean_result)
+                        counts["pages"] += 1
+
+            if results.get("top_stories"):
+                news = results["top_stories"]
+                for obj in news["items"]:
+                    clean_result = {
+                        "type": "news",
+                        "time_frame": obj["time_frame"],
+                        "title": obj["title"],
+                        "url": obj["url"],
+                        "source": obj["source"],
+                    }
+                    url = clean_result["url"]
+                    if isinstance(url, str) and url and url not in seen_urls:
+                        seen_urls.add(url)
+                        clean_results.append(clean_result)
+                        counts["news"] += 1
+
+            if results.get("images"):
+                images = results["images"]
+                for image in images["items"]:
+                    clean_result = {
+                        "type": "image_url",
+                        "image_url": image["url"],
+                        "image_description": image["alt"],
+                    }
+                    url = clean_result["image_url"]
+                    if isinstance(url, str) and url and url not in seen_urls:
+                        seen_urls.add(url)
+                        clean_results.append(clean_result)
+                        counts["images"] += 1
+
+        logger.debug(
+            f"Results processing completed | "
+            f"total_results={len(clean_results)} | "
+            f"pages={counts['pages']} | "
+            f"news_items={counts['news']} | "
+            f"images={counts['images']} | "
+            f"unique_urls={len(seen_urls)}"
+        )
+
+        return clean_results
--- a/src/tools/infoquest_search/infoquest_search_results.py
+++ b/src/tools/infoquest_search/infoquest_search_results.py
@@ -0,0 +1,236 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+"""Tool for the InfoQuest search API."""
+
+import json
+import logging
+from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForToolRun,
+    CallbackManagerForToolRun,
+)
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+
+from src.tools.infoquest_search.infoquest_search_api import InfoQuestAPIWrapper
+
+logger = logging.getLogger(__name__)
+
+class InfoQuestInput(BaseModel):
+    """Input for the InfoQuest tool."""
+
+    query: str = Field(description="search query to look up")
+
+class InfoQuestSearchResults(BaseTool):
+    """Tool that queries the InfoQuest Search API and returns processed results with images.
+
+Setup:
+    Install required packages and set environment variable ``INFOQUEST_API_KEY``.
+
+    .. code-block:: bash
+
+        pip install -U langchain-community aiohttp
+        export INFOQUEST_API_KEY="your-api-key"
+
+Instantiate:
+    .. code-block:: python
+
+        from your_module import InfoQuestSearch 
+
+        tool = InfoQuestSearchResults(
+            output_format="json",
+            time_range=10,
+            site="nytimes.com"
+        )
+
+Invoke directly with args:
+    .. code-block:: python
+
+        tool.invoke({
+            'query': 'who won the last french open'
+        })
+
+    .. code-block:: json
+
+        [
+            {
+                "type": "page",
+                "title": "Djokovic Claims French Open Title...",
+                "url": "https://www.nytimes.com/...",
+                "desc": "Novak Djokovic won the 2024 French Open by defeating Casper Ruud..."
+            },
+            {
+                "type": "news",
+                "time_frame": "2 days ago",
+                "title": "French Open Finals Recap",
+                "url": "https://www.nytimes.com/...",
+                "source": "New York Times"
+            },
+            {
+                "type": "image_url",
+                "image_url": {"url": "https://www.nytimes.com/.../djokovic.jpg"},
+                "image_description": "Novak Djokovic celebrating his French Open victory"
+            }
+        ]
+
+Invoke with tool call:
+    .. code-block:: python
+
+        tool.invoke({
+            "args": {
+                'query': 'who won the last french open',
+            },
+            "type": "tool_call",
+            "id": "foo",
+            "name": "infoquest"
+        })
+
+    .. code-block:: python
+
+        ToolMessage(
+            content='[
+                {"type": "page", "title": "Djokovic Claims...", "url": "https://www.nytimes.com/...", "desc": "Novak Djokovic won..."},
+                {"type": "news", "time_frame": "2 days ago", "title": "French Open Finals...", "url": "https://www.nytimes.com/...", "source": "New York Times"},
+                {"type": "image_url", "image_url": {"url": "https://www.nytimes.com/.../djokovic.jpg"}, "image_description": "Novak Djokovic celebrating..."}
+            ]',
+            tool_call_id='1',
+            name='infoquest_search_results_json',
+        )
+
+
+    """  # noqa: E501
+
+    name: str = "infoquest_search_results_json"
+    description: str = (
+        "A search engine optimized for comprehensive, accurate, and trusted results. "
+        "Useful for when you need to answer questions about current events. "
+        "Input should be a search query."
+    )
+    args_schema: Type[BaseModel] = InfoQuestInput
+    """The tool response format."""
+
+    time_range: int = -1
+    """Time range for filtering search results, in days.
+
+    If set to a positive integer (e.g., 30), only results from the last N days will be included.
+    Default is -1, which means no time range filter is applied.
+    """
+
+    site: str = ""
+    """Specific domain to restrict search results to (e.g., "nytimes.com").
+
+    If provided, only results from the specified domain will be returned.
+    Default is an empty string, which means no domain restriction is applied.
+    """
+
+    api_wrapper: InfoQuestAPIWrapper = Field(default_factory=InfoQuestAPIWrapper)  # type: ignore[arg-type]
+    response_format: Literal["content_and_artifact"] = "content_and_artifact"
+
+    def __init__(self, **kwargs: Any) -> None:
+        # Create api_wrapper with infoquest_api_key if provided
+        if "infoquest_api_key" in kwargs:
+            kwargs["api_wrapper"] = InfoQuestAPIWrapper(
+                infoquest_api_key=kwargs["infoquest_api_key"]
+            )
+            logger.debug("API wrapper initialized with provided key")
+
+        super().__init__(**kwargs)
+
+        logger.info(
+            "\n============================================\n"
+            "🚀 BytePlus InfoQuest Search Initialization 🚀\n"
+            "============================================"
+        )
+        
+        # Prepare initialization details
+        time_range_status = f"{self.time_range} days" if hasattr(self, 'time_range') and self.time_range > 0 else "Disabled"
+        site_filter = f"'{self.site}'" if hasattr(self, 'site') and self.site else "Disabled"
+        
+        initialization_details = (
+            f"\n🔧 Tool Information:\n"
+            f"├── Tool Name: {self.name}\n"
+            f"├── Time Range Filter: {time_range_status}\n"
+            f"└── Site Filter: {site_filter}\n"
+            f"📊 Configuration Summary:\n"
+            f"├── Response Format: {self.response_format}\n"
+        )
+        
+        logger.info(initialization_details)
+        logger.info("\n" + "*" * 70 + "\n")
+
+    def _run(
+            self,
+            query: str,
+            run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Union[List[Dict[str, str]], str], Dict]:
+        """Use the tool."""
+        try:
+            logger.debug(f"Executing search with parameters: time_range={self.time_range}, site={self.site}")
+            raw_results = self.api_wrapper.raw_results(
+                query,
+                self.time_range,
+                self.site
+            )
+            logger.debug("Processing raw search results")
+            cleaned_results = self.api_wrapper.clean_results_with_images(raw_results["results"])
+
+            result_json = json.dumps(cleaned_results, ensure_ascii=False)
+
+            logger.info(
+                f"Search tool execution completed | "
+                f"mode=synchronous | "
+                f"results_count={len(cleaned_results)}"
+            )
+            return result_json, raw_results
+        except Exception as e:
+            logger.error(
+                f"Search tool execution failed | "
+                f"mode=synchronous | "
+                f"error={str(e)}"
+            )
+            error_result = json.dumps({"error": repr(e)}, ensure_ascii=False)
+            return error_result, {}
+
+    async def _arun(
+            self,
+            query: str,
+            run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Union[List[Dict[str, str]], str], Dict]:
+        """Use the tool asynchronously."""
+        if logger.isEnabledFor(logging.DEBUG):
+            query_truncated = query[:50] + "..." if len(query) > 50 else query
+            logger.debug(
+                f"Search tool execution started | "
+                f"mode=asynchronous | "
+                f"query={query_truncated}"
+            )
+        try:
+            logger.debug(f"Executing async search with parameters: time_range={self.time_range}, site={self.site}")
+
+            raw_results = await self.api_wrapper.raw_results_async(
+                query,
+                self.time_range,
+                self.site
+            )
+
+            logger.debug("Processing raw async search results")
+            cleaned_results = self.api_wrapper.clean_results_with_images(raw_results["results"])
+
+            result_json = json.dumps(cleaned_results, ensure_ascii=False)
+
+            logger.debug(
+                f"Search tool execution completed | "
+                f"mode=asynchronous | "
+                f"results_count={len(cleaned_results)}"
+            )
+
+            return result_json, raw_results
+        except Exception as e:
+            logger.error(
+                f"Search tool execution failed | "
+                f"mode=asynchronous | "
+                f"error={str(e)}"
+            )
+            error_result = json.dumps({"error": repr(e)}, ensure_ascii=False)
+            return error_result, {}
--- a/src/tools/search.py
+++ b/src/tools/search.py
@@ -21,6 +21,7 @@ from langchain_community.utilities import (

 from src.config import SELECTED_SEARCH_ENGINE, SearchEngine, load_yaml_config
 from src.tools.decorators import create_logged_tool
+from src.tools.infoquest_search.infoquest_search_results import InfoQuestSearchResults
 from src.tools.tavily_search.tavily_search_results_with_images import (
    TavilySearchWithImages,
 )
@@ -29,6 +30,7 @@ logger = logging.getLogger(__name__)

 # Create logged versions of the search tools
 LoggedTavilySearch = create_logged_tool(TavilySearchWithImages)
+LoggedInfoQuestSearch = create_logged_tool(InfoQuestSearchResults)
 LoggedDuckDuckGoSearch = create_logged_tool(DuckDuckGoSearchResults)
 LoggedBraveSearch = create_logged_tool(BraveSearch)
 LoggedArxivSearch = create_logged_tool(ArxivQueryRun)
@@ -76,6 +78,17 @@ def get_web_search_tool(max_search_results: int):
            include_domains=include_domains,
            exclude_domains=exclude_domains,
        )
+    elif SELECTED_SEARCH_ENGINE == SearchEngine.INFOQUEST.value:
+        time_range = search_config.get("time_range", -1)
+        site = search_config.get("site", "")
+        logger.info(
+            f"InfoQuest search configuration loaded: time_range={time_range}, site={site}"
+        )
+        return LoggedInfoQuestSearch(
+            name="web_search",
+            time_range=time_range,
+            site=site,
+        )
    elif SELECTED_SEARCH_ENGINE == SearchEngine.DUCKDUCKGO.value:
        return LoggedDuckDuckGoSearch(
            name="web_search",