fix: the exception of plan validation (#714)

* fix: Missing Required Fields in Plan Validation

* fix: the exception of plan validation

* Fixed the test errors

* Addressed the comments of the PR reviews
This commit is contained in:
Willem Jiang
2025-11-27 19:39:25 +08:00
committed by GitHub
parent bec97f02ae
commit 667916959b
3 changed files with 228 additions and 12 deletions

View File

@@ -146,24 +146,29 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
# SECTION 2: Enforce web search requirements
# ============================================================
if enforce_web_search:
# Check if any step has need_search=true
has_search_step = any(step.get("need_search", False) for step in steps)
# Check if any step has need_search=true (only check dict steps)
has_search_step = any(
step.get("need_search", False)
for step in steps
if isinstance(step, dict)
)
if not has_search_step and steps:
# Ensure first research step has web search enabled
for idx, step in enumerate(steps):
if step.get("step_type") == "research":
if isinstance(step, dict) and step.get("step_type") == "research":
step["need_search"] = True
logger.info(f"Enforced web search on research step at index {idx}")
break
else:
# Fallback: If no research step exists, convert the first step to a research step with web search enabled.
# This ensures that at least one step will perform a web search as required.
steps[0]["step_type"] = "research"
steps[0]["need_search"] = True
logger.info(
"Converted first step to research with web search enforcement"
)
if isinstance(steps[0], dict):
steps[0]["step_type"] = "research"
steps[0]["need_search"] = True
logger.info(
"Converted first step to research with web search enforcement"
)
elif not has_search_step and not steps:
# Add a default research step if no steps exist
logger.warning("Plan has no steps. Adding default research step.")
@@ -176,6 +181,29 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
}
]
# ============================================================
# SECTION 3: Ensure required Plan fields are present (Issue #710 fix)
# ============================================================
# Set locale from state if not present
if "locale" not in plan or not plan.get("locale"):
plan["locale"] = "en-US" # Default locale
logger.info("Added missing locale field with default value 'en-US'")
# Ensure has_enough_context is present
if "has_enough_context" not in plan:
plan["has_enough_context"] = False # Default value
logger.info("Added missing has_enough_context field with default value 'False'")
# Ensure title is present
if "title" not in plan or not plan.get("title"):
# Try to infer title from steps or use a default
if steps and isinstance(steps[0], dict) and "title" in steps[0]:
plan["title"] = steps[0]["title"]
logger.info(f"Inferred missing title from first step: {plan['title']}")
else:
plan["title"] = "Research Plan" # Default title
logger.info("Added missing title field with default value 'Research Plan'")
return plan
@@ -381,8 +409,20 @@ def extract_plan_content(plan_data: str | dict | Any) -> str:
return plan_data.content
elif isinstance(plan_data, dict):
# If it's already a dictionary, convert to JSON string
logger.debug("Converting plan dictionary to JSON string")
return json.dumps(plan_data)
# Need to check if it's dict with content field (AIMessage-like)
if "content" in plan_data:
if isinstance(plan_data["content"], str):
logger.debug("Extracting plan content from dict with content field")
return plan_data["content"]
if isinstance(plan_data["content"], dict):
logger.debug("Converting content field dict to JSON string")
return json.dumps(plan_data["content"], ensure_ascii=False)
else:
logger.warning(f"Unexpected type for 'content' field in plan_data dict: {type(plan_data['content']).__name__}, converting to string")
return str(plan_data["content"])
else:
logger.debug("Converting plan dictionary to JSON string")
return json.dumps(plan_data)
else:
# For any other type, try to convert to string
logger.warning(f"Unexpected plan data type {type(plan_data).__name__}, attempting to convert to string")

View File

@@ -116,6 +116,34 @@ class TestExtractPlanContent:
result = extract_plan_content(empty_dict)
assert result == expected_json
def test_extract_plan_content_with_content_dict(self):
"""Test that extract_plan_content handles dictionaries with content."""
content_dict = {"content": {
"locale": "zh-CN",
"has_enough_context": False,
"title": "埃菲尔铁塔与世界最高建筑高度比较研究计划",
"thought": "要回答埃菲尔铁塔比世界最高建筑高多少倍的问题,我们需要知道埃菲尔铁塔的高度以及当前世界最高建筑的高度。",
"steps": [
{
"need_search": True,
"title": "收集埃菲尔铁塔和世界最高建筑的高度数据",
"description": "从可靠来源检索埃菲尔铁塔的确切高度以及目前被公认为世界最高建筑的建筑物及其高度数据。",
"step_type": "research"
}
]
}
}
result = extract_plan_content(content_dict)
# Verify the result can be parsed back to a dictionary
parsed_result = json.loads(result)
assert parsed_result == content_dict["content"]
def test_extract_plan_content_with_content_string(self):
content_dict = {"content": '{"locale": "en-US", "title": "Test"}'}
result = extract_plan_content(content_dict)
assert result == '{"locale": "en-US", "title": "Test"}'
def test_extract_plan_content_issue_703_case(self):
"""Test that extract_plan_content handles the specific case from issue #703."""
# This is the exact structure that was causing the error in issue #703

View File

@@ -167,8 +167,8 @@ class TestValidateAndFixPlanStepTypeRepair:
validate_and_fix_plan(plan)
# Should log repair operation
mock_logger.info.assert_called()
call_args = str(mock_logger.info.call_args)
assert "Repaired missing step_type" in call_args
# Check that any of the info calls contains "Repaired missing step_type"
assert any("Repaired missing step_type" in str(call) for call in mock_logger.info.call_args_list)
def test_non_dict_plan_returns_unchanged(self):
"""Test that non-dict plans are returned unchanged."""
@@ -363,6 +363,154 @@ class TestValidateAndFixPlanIntegration:
assert result["steps"][1]["need_search"] is False
class TestValidateAndFixPlanIssue710:
"""Specific tests for Issue #710 scenarios - missing required Plan fields."""
def test_missing_locale_field_added(self):
"""Test that missing locale field is added with default value."""
plan = {
"has_enough_context": True,
"title": "Test Plan",
"steps": []
}
result = validate_and_fix_plan(plan)
assert "locale" in result
assert result["locale"] == "en-US"
def test_empty_locale_field_replaced(self):
"""Test that empty locale field is replaced with default value."""
plan = {
"locale": "",
"has_enough_context": True,
"title": "Test Plan",
"steps": []
}
result = validate_and_fix_plan(plan)
assert "locale" in result
assert result["locale"] == "en-US"
def test_missing_has_enough_context_field_added(self):
"""Test that missing has_enough_context field is added with default value."""
plan = {
"locale": "en-US",
"title": "Test Plan",
"steps": []
}
result = validate_and_fix_plan(plan)
assert "has_enough_context" in result
assert result["has_enough_context"] is False
def test_missing_title_field_added_from_step(self):
"""Test that missing title field is inferred from first step."""
plan = {
"locale": "en-US",
"has_enough_context": True,
"steps": [
{
"need_search": True,
"title": "Step Title",
"description": "Step description",
"step_type": "research"
}
]
}
result = validate_and_fix_plan(plan)
assert "title" in result
assert result["title"] == "Step Title"
def test_missing_title_field_added_default(self):
"""Test that missing title field is added with default when no steps available."""
plan = {
"locale": "en-US",
"has_enough_context": True,
"steps": []
}
result = validate_and_fix_plan(plan)
assert "title" in result
assert result["title"] == "Research Plan"
def test_all_required_fields_missing(self):
"""Test that all missing required fields are added."""
plan = {
"steps": [
{
"need_search": True,
"title": "Step 1",
"description": "Description",
"step_type": "research"
}
]
}
result = validate_and_fix_plan(plan)
# All required fields should be present
assert "locale" in result
assert "has_enough_context" in result
assert "title" in result
# With appropriate defaults
assert result["locale"] == "en-US"
assert result["has_enough_context"] is False
assert result["title"] == "Step 1" # Inferred from first step
def test_issue_710_scenario_passes_pydantic_validation(self):
"""Test that fixed plan can be validated by Pydantic schema (reproduces issue #710 fix)."""
from src.prompts.planner_model import Plan as PlanModel
# Simulate the problematic plan from issue #710 that's missing required fields
plan = {
"content": '{\n "locale": "en-US",\n "has_enough_context": false,\n "title": "Test Plan",\n "steps": [\n {\n "need_search": true,\n "title": "Research Step",\n "description": "Gather data",\n "step_type": "research"\n }\n ]\n}',
"reasoning": 2368
}
# Extract just the JSON part (simulating what would happen in the actual flow)
import json
json_content = json.loads(plan["content"])
# Remove required fields to simulate the issue
del json_content["locale"]
del json_content["has_enough_context"]
del json_content["title"]
# First validate and fix
fixed_plan = validate_and_fix_plan(json_content)
# Then try Pydantic validation (should not raise)
validated = PlanModel.model_validate(fixed_plan)
# Validation should succeed
assert validated.locale == "en-US"
assert validated.has_enough_context is False
assert validated.title == "Research Step" # Inferred from first step
def test_existing_fields_preserved(self):
"""Test that existing valid fields are preserved."""
plan = {
"locale": "zh-CN",
"has_enough_context": True,
"title": "Existing Title",
"steps": []
}
result = validate_and_fix_plan(plan)
# Existing values should be preserved
assert result["locale"] == "zh-CN"
assert result["has_enough_context"] is True
assert result["title"] == "Existing Title"
class TestValidateAndFixPlanIssue650:
"""Specific tests for Issue #650 scenarios."""