feat: support infoquest (#708)

* support infoquest

* support html checker

* support html checker

* change line break format

* change line break format

* change line break format

* change line break format

* change line break format

* change line break format

* change line break format

* change line break format

* Fix several critical issues in the codebase
- Resolve crawler panic by improving error handling
- Fix plan validation to prevent invalid configurations
- Correct InfoQuest crawler JSON conversion logic

* add test for infoquest

* add test for infoquest

* Add InfoQuest introduction to the README

* add test for infoquest

* fix readme for infoquest

* fix readme for infoquest

* resolve the conflict

* resolve the conflict

* resolve the conflict

* Fix formatting of INFOQUEST in SearchEngine enum

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
infoquest-byteplus
2025-12-02 08:16:35 +08:00
committed by GitHub
parent e179fb1632
commit 7ec9e45702
22 changed files with 2103 additions and 94 deletions

View File

@@ -0,0 +1,218 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
from unittest.mock import Mock, patch
import pytest
import requests
from src.tools.infoquest_search.infoquest_search_api import InfoQuestAPIWrapper
class TestInfoQuestAPIWrapper:
@pytest.fixture
def wrapper(self):
# Create a wrapper instance with mock API key
return InfoQuestAPIWrapper(infoquest_api_key="dummy-key")
@pytest.fixture
def mock_response_data(self):
# Mock search result data
return {
"search_result": {
"results": [
{
"content": {
"results": {
"organic": [
{
"title": "Test Title",
"url": "https://example.com",
"desc": "Test description"
}
],
"top_stories": {
"items": [
{
"time_frame": "2 days ago",
"title": "Test News",
"url": "https://example.com/news",
"source": "Test Source"
}
]
},
"images": {
"items": [
{
"url": "https://example.com/image.jpg",
"alt": "Test image description"
}
]
}
}
}
}
]
}
}
@patch("src.tools.infoquest_search.infoquest_search_api.requests.post")
def test_raw_results_success(self, mock_post, wrapper, mock_response_data):
# Test successful synchronous search results
mock_response = Mock()
mock_response.json.return_value = mock_response_data
mock_response.raise_for_status.return_value = None
mock_post.return_value = mock_response
result = wrapper.raw_results("test query", time_range=0, site="")
assert result == mock_response_data["search_result"]
mock_post.assert_called_once()
call_args = mock_post.call_args
assert "json" in call_args.kwargs
assert call_args.kwargs["json"]["query"] == "test query"
assert "time_range" not in call_args.kwargs["json"]
assert "site" not in call_args.kwargs["json"]
@patch("src.tools.infoquest_search.infoquest_search_api.requests.post")
def test_raw_results_with_time_range_and_site(self, mock_post, wrapper, mock_response_data):
# Test search with time range and site filtering
mock_response = Mock()
mock_response.json.return_value = mock_response_data
mock_response.raise_for_status.return_value = None
mock_post.return_value = mock_response
result = wrapper.raw_results("test query", time_range=30, site="example.com")
assert result == mock_response_data["search_result"]
call_args = mock_post.call_args
params = call_args.kwargs["json"]
assert params["time_range"] == 30
assert params["site"] == "example.com"
@patch("src.tools.infoquest_search.infoquest_search_api.requests.post")
def test_raw_results_http_error(self, mock_post, wrapper):
# Test HTTP error handling
mock_response = Mock()
mock_response.raise_for_status.side_effect = requests.HTTPError("API Error")
mock_post.return_value = mock_response
with pytest.raises(requests.HTTPError):
wrapper.raw_results("test query", time_range=0, site="")
# Check if pytest-asyncio is available, otherwise mark for conditional skipping
try:
import pytest_asyncio
_asyncio_available = True
except ImportError:
_asyncio_available = False
@pytest.mark.asyncio
async def test_raw_results_async_success(self, wrapper, mock_response_data):
# Skip only if pytest-asyncio is not installed
if not self._asyncio_available:
pytest.skip("pytest-asyncio is not installed")
with patch('json.loads', return_value=mock_response_data):
original_method = InfoQuestAPIWrapper.raw_results_async
async def mock_raw_results_async(self, query, time_range=0, site="", output_format="json"):
return mock_response_data["search_result"]
InfoQuestAPIWrapper.raw_results_async = mock_raw_results_async
try:
result = await wrapper.raw_results_async("test query", time_range=0, site="")
assert result == mock_response_data["search_result"]
finally:
InfoQuestAPIWrapper.raw_results_async = original_method
@pytest.mark.asyncio
async def test_raw_results_async_error(self, wrapper):
if not self._asyncio_available:
pytest.skip("pytest-asyncio is not installed")
original_method = InfoQuestAPIWrapper.raw_results_async
async def mock_raw_results_async_error(self, query, time_range=0, site="", output_format="json"):
raise Exception("Error 400: Bad Request")
InfoQuestAPIWrapper.raw_results_async = mock_raw_results_async_error
try:
with pytest.raises(Exception, match="Error 400: Bad Request"):
await wrapper.raw_results_async("test query", time_range=0, site="")
finally:
InfoQuestAPIWrapper.raw_results_async = original_method
def test_clean_results_with_images(self, wrapper, mock_response_data):
# Test result cleaning functionality
raw_results = mock_response_data["search_result"]["results"]
cleaned_results = wrapper.clean_results_with_images(raw_results)
assert len(cleaned_results) == 3
# Test page result
page_result = cleaned_results[0]
assert page_result["type"] == "page"
assert page_result["title"] == "Test Title"
assert page_result["url"] == "https://example.com"
assert page_result["desc"] == "Test description"
# Test news result
news_result = cleaned_results[1]
assert news_result["type"] == "news"
assert news_result["time_frame"] == "2 days ago"
assert news_result["title"] == "Test News"
assert news_result["url"] == "https://example.com/news"
assert news_result["source"] == "Test Source"
# Test image result
image_result = cleaned_results[2]
assert image_result["type"] == "image_url"
assert image_result["image_url"] == "https://example.com/image.jpg"
assert image_result["image_description"] == "Test image description"
def test_clean_results_empty_categories(self, wrapper):
# Test result cleaning with empty categories
data = [
{
"content": {
"results": {
"organic": [],
"top_stories": {"items": []},
"images": {"items": []}
}
}
}
]
result = wrapper.clean_results_with_images(data)
assert len(result) == 0
def test_clean_results_url_deduplication(self, wrapper):
# Test URL deduplication functionality
data = [
{
"content": {
"results": {
"organic": [
{
"title": "Test Title 1",
"url": "https://example.com",
"desc": "Description 1"
},
{
"title": "Test Title 2",
"url": "https://example.com",
"desc": "Description 2"
}
]
}
}
}
]
result = wrapper.clean_results_with_images(data)
assert len(result) == 1
assert result[0]["title"] == "Test Title 1"

View File

@@ -0,0 +1,226 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
import json
from unittest.mock import Mock, patch
import pytest
class TestInfoQuestSearchResults:
@pytest.fixture
def search_tool(self):
"""Create a mock InfoQuestSearchResults instance."""
mock_tool = Mock()
mock_tool.time_range = 30
mock_tool.site = "example.com"
def mock_run(query, **kwargs):
sample_cleaned_results = [
{
"type": "page",
"title": "Test Title",
"url": "https://example.com",
"desc": "Test description"
}
]
sample_raw_results = {
"results": [
{
"content": {
"results": {
"organic": [
{
"title": "Test Title",
"url": "https://example.com",
"desc": "Test description"
}
]
}
}
}
]
}
return json.dumps(sample_cleaned_results, ensure_ascii=False), sample_raw_results
async def mock_arun(query, **kwargs):
return mock_run(query, **kwargs)
mock_tool._run = mock_run
mock_tool._arun = mock_arun
return mock_tool
@pytest.fixture
def sample_raw_results(self):
"""Sample raw results from InfoQuest API."""
return {
"results": [
{
"content": {
"results": {
"organic": [
{
"title": "Test Title",
"url": "https://example.com",
"desc": "Test description"
}
]
}
}
}
]
}
@pytest.fixture
def sample_cleaned_results(self):
"""Sample cleaned results."""
return [
{
"type": "page",
"title": "Test Title",
"url": "https://example.com",
"desc": "Test description"
}
]
def test_init_default_values(self):
"""Test initialization with default values using patch."""
with patch('src.tools.infoquest_search.infoquest_search_results.InfoQuestAPIWrapper') as mock_wrapper_class:
mock_instance = Mock()
mock_wrapper_class.return_value = mock_instance
from src.tools.infoquest_search.infoquest_search_results import InfoQuestSearchResults
with patch.object(InfoQuestSearchResults, '__init__', return_value=None) as mock_init:
InfoQuestSearchResults(infoquest_api_key="dummy-key")
mock_init.assert_called_once()
def test_init_custom_values(self):
"""Test initialization with custom values using patch."""
with patch('src.tools.infoquest_search.infoquest_search_results.InfoQuestAPIWrapper') as mock_wrapper_class:
mock_instance = Mock()
mock_wrapper_class.return_value = mock_instance
from src.tools.infoquest_search.infoquest_search_results import InfoQuestSearchResults
with patch.object(InfoQuestSearchResults, '__init__', return_value=None) as mock_init:
InfoQuestSearchResults(
time_range=10,
site="test.com",
infoquest_api_key="dummy-key"
)
mock_init.assert_called_once()
def test_run_success(
self,
search_tool,
sample_raw_results,
sample_cleaned_results,
):
"""Test successful synchronous run."""
result, raw = search_tool._run("test query")
assert isinstance(result, str)
assert isinstance(raw, dict)
assert "results" in raw
result_data = json.loads(result)
assert isinstance(result_data, list)
assert len(result_data) > 0
def test_run_exception(self, search_tool):
"""Test synchronous run with exception."""
original_run = search_tool._run
def mock_run_with_error(query, **kwargs):
return json.dumps({"error": "API Error"}, ensure_ascii=False), {}
try:
search_tool._run = mock_run_with_error
result, raw = search_tool._run("test query")
result_dict = json.loads(result)
assert "error" in result_dict
assert "API Error" in result_dict["error"]
assert raw == {}
finally:
search_tool._run = original_run
@pytest.mark.asyncio
async def test_arun_success(
self,
search_tool,
sample_raw_results,
sample_cleaned_results,
):
"""Test successful asynchronous run."""
result, raw = await search_tool._arun("test query")
assert isinstance(result, str)
assert isinstance(raw, dict)
assert "results" in raw
@pytest.mark.asyncio
async def test_arun_exception(self, search_tool):
"""Test asynchronous run with exception."""
original_arun = search_tool._arun
async def mock_arun_with_error(query, **kwargs):
return json.dumps({"error": "Async API Error"}, ensure_ascii=False), {}
try:
search_tool._arun = mock_arun_with_error
result, raw = await search_tool._arun("test query")
result_dict = json.loads(result)
assert "error" in result_dict
assert "Async API Error" in result_dict["error"]
assert raw == {}
finally:
search_tool._arun = original_arun
def test_run_with_run_manager(
self,
search_tool,
sample_raw_results,
sample_cleaned_results,
):
"""Test run with callback manager."""
mock_run_manager = Mock()
result, raw = search_tool._run("test query", run_manager=mock_run_manager)
assert isinstance(result, str)
assert isinstance(raw, dict)
@pytest.mark.asyncio
async def test_arun_with_run_manager(
self,
search_tool,
sample_raw_results,
sample_cleaned_results,
):
"""Test async run with callback manager."""
mock_run_manager = Mock()
result, raw = await search_tool._arun("test query", run_manager=mock_run_manager)
assert isinstance(result, str)
assert isinstance(raw, dict)
def test_api_wrapper_initialization_with_key(self):
"""Test API wrapper initialization with key."""
with patch('src.tools.infoquest_search.infoquest_search_results.InfoQuestAPIWrapper') as mock_wrapper_class:
mock_instance = Mock()
mock_wrapper_class.return_value = mock_instance
from src.tools.infoquest_search.infoquest_search_results import InfoQuestSearchResults
with patch.object(InfoQuestSearchResults, '__init__', return_value=None) as mock_init:
InfoQuestSearchResults(infoquest_api_key="test-key")
mock_init.assert_called_once()