Files
deer-flow/tests/unit/tools/test_search_postprocessor.py

264 lines
9.8 KiB
Python
Raw Permalink Normal View History

import pytest
feat: Add intelligent clarification feature in coordinate step for research queries (#613) * fix: support local models by making thought field optional in Plan model - Make thought field optional in Plan model to fix Pydantic validation errors with local models - Add Ollama configuration example to conf.yaml.example - Update documentation to include local model support - Improve planner prompt with better JSON format requirements Fixes local model integration issues where models like qwen3:14b would fail due to missing thought field in JSON output. * feat: Add intelligent clarification feature for research queries - Add multi-turn clarification process to refine vague research questions - Implement three-dimension clarification standard (Tech/App, Focus, Scope) - Add clarification state management in coordinator node - Update coordinator prompt with detailed clarification guidelines - Add UI settings to enable/disable clarification feature (disabled by default) - Update workflow to handle clarification rounds recursively - Add comprehensive test coverage for clarification functionality - Update documentation with clarification feature usage guide Key components: - src/graph/nodes.py: Core clarification logic and state management - src/prompts/coordinator.md: Detailed clarification guidelines - src/workflow.py: Recursive clarification handling - web/: UI settings integration - tests/: Comprehensive test coverage - docs/: Updated configuration guide * fix: Improve clarification conversation continuity - Add comprehensive conversation history to clarification context - Include previous exchanges summary in system messages - Add explicit guidelines for continuing rounds in coordinator prompt - Prevent LLM from starting new topics during clarification - Ensure topic continuity across clarification rounds Fixes issue where LLM would restart clarification instead of building upon previous exchanges. * fix: Add conversation history to clarification context * fix: resolve clarification feature message to planer, prompt, test issues - Optimize coordinator.md prompt template for better clarification flow - Simplify final message sent to planner after clarification - Fix API key assertion issues in test_search.py * fix: Add configurable max_clarification_rounds and comprehensive tests - Add max_clarification_rounds parameter for external configuration - Add comprehensive test cases for clarification feature in test_app.py - Fixes issues found during interactive mode testing where: - Recursive call failed due to missing initial_state parameter - Clarification exited prematurely at max rounds - Incorrect logging of max rounds reached * Move clarification tests to test_nodes.py and add max_clarification_rounds to zh.json
2025-10-13 22:35:57 -07:00
from src.tools.search_postprocessor import SearchResultPostProcessor
class TestSearchResultPostProcessor:
"""Test cases for SearchResultPostProcessor"""
@pytest.fixture
def post_processor(self):
"""Create a SearchResultPostProcessor instance for testing"""
return SearchResultPostProcessor(
min_score_threshold=0.5, max_content_length_per_page=100
)
def test_process_results_empty_input(self, post_processor):
"""Test processing empty results"""
results = []
processed = post_processor.process_results(results)
assert processed == []
def test_process_results_with_valid_page_results(self, post_processor):
"""Test processing valid page results"""
results = [
{
"type": "page",
"title": "Test Page",
"url": "https://example.com",
"content": "Test content",
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["title"] == "Test Page"
assert processed[0]["url"] == "https://example.com"
assert processed[0]["content"] == "Test content"
assert processed[0]["score"] == 0.8
def test_process_results_filter_low_score(self, post_processor):
"""Test filtering out low score results"""
results = [
{
"type": "page",
"title": "Low Score Page",
"url": "https://example.com/low",
"content": "Low score content",
"score": 0.3, # Below threshold of 0.5
},
{
"type": "page",
"title": "High Score Page",
"url": "https://example.com/high",
"content": "High score content",
"score": 0.9,
},
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["title"] == "High Score Page"
def test_process_results_remove_duplicates(self, post_processor):
"""Test removing duplicate URLs"""
results = [
{
"type": "page",
"title": "Page 1",
"url": "https://example.com",
"content": "Content 1",
"score": 0.8,
},
{
"type": "page",
"title": "Page 2",
"url": "https://example.com", # Duplicate URL
"content": "Content 2",
"score": 0.7,
},
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["title"] == "Page 1" # First one should be kept
def test_process_results_sort_by_score(self, post_processor):
"""Test sorting results by score in descending order"""
results = [
{
"type": "page",
"title": "Low Score",
"url": "https://example.com/low",
"content": "Low score content",
"score": 0.3,
},
{
"type": "page",
"title": "High Score",
"url": "https://example.com/high",
"content": "High score content",
"score": 0.9,
},
{
"type": "page",
"title": "Medium Score",
"url": "https://example.com/medium",
"content": "Medium score content",
"score": 0.6,
},
]
processed = post_processor.process_results(results)
assert len(processed) == 2 # Low score filtered out
# Should be sorted by score descending
assert processed[0]["title"] == "High Score"
assert processed[1]["title"] == "Medium Score"
def test_process_results_truncate_long_content(self, post_processor):
"""Test truncating long content"""
long_content = "A" * 150 # Longer than max_content_length of 100
results = [
{
"type": "page",
"title": "Long Content Page",
"url": "https://example.com",
"content": long_content,
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert len(processed[0]["content"]) == 103 # 100 + "..."
assert processed[0]["content"].endswith("...")
def test_process_results_remove_base64_images(self, post_processor):
"""Test removing base64 images from content"""
content_with_base64 = (
"Content with image "
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="
)
results = [
{
"type": "page",
"title": "Page with Base64",
"url": "https://example.com",
"content": content_with_base64,
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["content"] == "Content with image "
def test_process_results_with_image_type(self, post_processor):
"""Test processing image type results"""
results = [
{
"type": "image",
"image_url": "https://example.com/image.jpg",
"image_description": "Test image",
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["type"] == "image"
assert processed[0]["image_url"] == "https://example.com/image.jpg"
assert processed[0]["image_description"] == "Test image"
def test_process_results_filter_base64_image_urls(self, post_processor):
"""Test filtering out image results with base64 URLs"""
results = [
{
"type": "image",
"image_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==",
"image_description": "Base64 image",
},
{
"type": "image",
"image_url": "https://example.com/image.jpg",
"image_description": "Regular image",
},
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["image_url"] == "https://example.com/image.jpg"
def test_process_results_truncate_long_image_description(self, post_processor):
"""Test truncating long image descriptions"""
long_description = "A" * 150 # Longer than max_content_length of 100
results = [
{
"type": "image",
"image_url": "https://example.com/image.jpg",
"image_description": long_description,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert len(processed[0]["image_description"]) == 103 # 100 + "..."
assert processed[0]["image_description"].endswith("...")
def test_process_results_other_types_passthrough(self, post_processor):
"""Test that other result types pass through unchanged"""
results = [
{
"type": "video",
"title": "Test Video",
"url": "https://example.com/video.mp4",
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert processed[0]["type"] == "video"
assert processed[0]["title"] == "Test Video"
def test_process_results_truncate_long_content_with_no_config(self):
"""Test truncating long content"""
post_processor = SearchResultPostProcessor(None, None)
long_content = "A" * 150 # Longer than max_content_length of 100
results = [
{
"type": "page",
"title": "Long Content Page",
"url": "https://example.com",
"content": long_content,
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert len(processed[0]["content"]) == len("A" * 150)
def test_process_results_truncate_long_content_with_max_content_length_config(self):
"""Test truncating long content"""
post_processor = SearchResultPostProcessor(None, 100)
long_content = "A" * 150 # Longer than max_content_length of 100
results = [
{
"type": "page",
"title": "Long Content Page",
"url": "https://example.com",
"content": long_content,
"score": 0.8,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 1
assert len(processed[0]["content"]) == 103
assert processed[0]["content"].endswith("...")
def test_process_results_truncate_long_content_with_min_score_config(self):
"""Test truncating long content"""
post_processor = SearchResultPostProcessor(0.8, None)
long_content = "A" * 150 # Longer than max_content_length of 100
results = [
{
"type": "page",
"title": "Long Content Page",
"url": "https://example.com",
"content": long_content,
"score": 0.3,
}
]
processed = post_processor.process_results(results)
assert len(processed) == 0