fix: the exception of plan validation (#714)

* fix: Missing Required Fields in Plan Validation * fix: the exception of plan validation * Fixed the test errors * Addressed the comments of the PR reviews
2026-04-03 06:12:14 +08:00 · 2025-11-27 19:39:25 +08:00
parent bec97f02ae
commit 667916959b
3 changed files with 228 additions and 12 deletions
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -146,24 +146,29 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
    # SECTION 2: Enforce web search requirements
    # ============================================================
    if enforce_web_search:
-        # Check if any step has need_search=true
-        has_search_step = any(step.get("need_search", False) for step in steps)
+        # Check if any step has need_search=true (only check dict steps)
+        has_search_step = any(
+            step.get("need_search", False) 
+            for step in steps 
+            if isinstance(step, dict)
+        )

        if not has_search_step and steps:
            # Ensure first research step has web search enabled
            for idx, step in enumerate(steps):
-                if step.get("step_type") == "research":
+                if isinstance(step, dict) and step.get("step_type") == "research":
                    step["need_search"] = True
                    logger.info(f"Enforced web search on research step at index {idx}")
                    break
            else:
                # Fallback: If no research step exists, convert the first step to a research step with web search enabled.
                # This ensures that at least one step will perform a web search as required.
-                steps[0]["step_type"] = "research"
-                steps[0]["need_search"] = True
-                logger.info(
-                    "Converted first step to research with web search enforcement"
-                )
+                if isinstance(steps[0], dict):
+                    steps[0]["step_type"] = "research"
+                    steps[0]["need_search"] = True
+                    logger.info(
+                        "Converted first step to research with web search enforcement"
+                    )
        elif not has_search_step and not steps:
            # Add a default research step if no steps exist
            logger.warning("Plan has no steps. Adding default research step.")
@@ -176,6 +181,29 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
                }
            ]

+    # ============================================================
+    # SECTION 3: Ensure required Plan fields are present (Issue #710 fix)
+    # ============================================================
+    # Set locale from state if not present
+    if "locale" not in plan or not plan.get("locale"):
+        plan["locale"] = "en-US"  # Default locale
+        logger.info("Added missing locale field with default value 'en-US'")
+
+    # Ensure has_enough_context is present
+    if "has_enough_context" not in plan:
+        plan["has_enough_context"] = False  # Default value
+        logger.info("Added missing has_enough_context field with default value 'False'")
+
+    # Ensure title is present
+    if "title" not in plan or not plan.get("title"):
+        # Try to infer title from steps or use a default
+        if steps and isinstance(steps[0], dict) and "title" in steps[0]:
+            plan["title"] = steps[0]["title"]
+            logger.info(f"Inferred missing title from first step: {plan['title']}")
+        else:
+            plan["title"] = "Research Plan"  # Default title
+            logger.info("Added missing title field with default value 'Research Plan'")
+
    return plan


@@ -381,8 +409,20 @@ def extract_plan_content(plan_data: str | dict | Any) -> str:
        return plan_data.content
    elif isinstance(plan_data, dict):
        # If it's already a dictionary, convert to JSON string
-        logger.debug("Converting plan dictionary to JSON string")
-        return json.dumps(plan_data)
+        # Need to check if it's dict with content field (AIMessage-like)
+        if "content" in plan_data:
+            if isinstance(plan_data["content"], str):
+                logger.debug("Extracting plan content from dict with content field")
+                return plan_data["content"]
+            if isinstance(plan_data["content"], dict):
+                logger.debug("Converting content field dict to JSON string")
+                return json.dumps(plan_data["content"], ensure_ascii=False)
+            else:
+                logger.warning(f"Unexpected type for 'content' field in plan_data dict: {type(plan_data['content']).__name__}, converting to string")
+                return str(plan_data["content"])
+        else:
+            logger.debug("Converting plan dictionary to JSON string")
+            return json.dumps(plan_data)
    else:
        # For any other type, try to convert to string
        logger.warning(f"Unexpected plan data type {type(plan_data).__name__}, attempting to convert to string")
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -116,6 +116,34 @@ class TestExtractPlanContent:
        result = extract_plan_content(empty_dict)
        assert result == expected_json

+    def test_extract_plan_content_with_content_dict(self):
+        """Test that extract_plan_content handles dictionaries with content."""
+        content_dict = {"content": {
+                "locale": "zh-CN",
+                "has_enough_context": False,
+                "title": "埃菲尔铁塔与世界最高建筑高度比较研究计划",
+                "thought": "要回答埃菲尔铁塔比世界最高建筑高多少倍的问题，我们需要知道埃菲尔铁塔的高度以及当前世界最高建筑的高度。",
+                "steps": [
+                    {
+                        "need_search": True,
+                        "title": "收集埃菲尔铁塔和世界最高建筑的高度数据",
+                        "description": "从可靠来源检索埃菲尔铁塔的确切高度以及目前被公认为世界最高建筑的建筑物及其高度数据。",
+                        "step_type": "research"
+                    }
+                ]
+            }
+        }
+        
+        result = extract_plan_content(content_dict)
+        # Verify the result can be parsed back to a dictionary
+        parsed_result = json.loads(result)
+        assert parsed_result == content_dict["content"]
+
+    def test_extract_plan_content_with_content_string(self):
+        content_dict = {"content": '{"locale": "en-US", "title": "Test"}'}
+        result = extract_plan_content(content_dict)
+        assert result == '{"locale": "en-US", "title": "Test"}'
+
    def test_extract_plan_content_issue_703_case(self):
        """Test that extract_plan_content handles the specific case from issue #703."""
        # This is the exact structure that was causing the error in issue #703
--- a/tests/unit/graph/test_plan_validation.py
+++ b/tests/unit/graph/test_plan_validation.py
@@ -167,8 +167,8 @@ class TestValidateAndFixPlanStepTypeRepair:
            validate_and_fix_plan(plan)
            # Should log repair operation
            mock_logger.info.assert_called()
-            call_args = str(mock_logger.info.call_args)
-            assert "Repaired missing step_type" in call_args
+            # Check that any of the info calls contains "Repaired missing step_type"
+            assert any("Repaired missing step_type" in str(call) for call in mock_logger.info.call_args_list)

    def test_non_dict_plan_returns_unchanged(self):
        """Test that non-dict plans are returned unchanged."""
@@ -363,6 +363,154 @@ class TestValidateAndFixPlanIntegration:
        assert result["steps"][1]["need_search"] is False


+class TestValidateAndFixPlanIssue710:
+    """Specific tests for Issue #710 scenarios - missing required Plan fields."""
+
+    def test_missing_locale_field_added(self):
+        """Test that missing locale field is added with default value."""
+        plan = {
+            "has_enough_context": True,
+            "title": "Test Plan",
+            "steps": []
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        assert "locale" in result
+        assert result["locale"] == "en-US"
+
+    def test_empty_locale_field_replaced(self):
+        """Test that empty locale field is replaced with default value."""
+        plan = {
+            "locale": "",
+            "has_enough_context": True,
+            "title": "Test Plan",
+            "steps": []
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        assert "locale" in result
+        assert result["locale"] == "en-US"
+
+    def test_missing_has_enough_context_field_added(self):
+        """Test that missing has_enough_context field is added with default value."""
+        plan = {
+            "locale": "en-US",
+            "title": "Test Plan",
+            "steps": []
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        assert "has_enough_context" in result
+        assert result["has_enough_context"] is False
+
+    def test_missing_title_field_added_from_step(self):
+        """Test that missing title field is inferred from first step."""
+        plan = {
+            "locale": "en-US",
+            "has_enough_context": True,
+            "steps": [
+                {
+                    "need_search": True,
+                    "title": "Step Title",
+                    "description": "Step description",
+                    "step_type": "research"
+                }
+            ]
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        assert "title" in result
+        assert result["title"] == "Step Title"
+
+    def test_missing_title_field_added_default(self):
+        """Test that missing title field is added with default when no steps available."""
+        plan = {
+            "locale": "en-US",
+            "has_enough_context": True,
+            "steps": []
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        assert "title" in result
+        assert result["title"] == "Research Plan"
+
+    def test_all_required_fields_missing(self):
+        """Test that all missing required fields are added."""
+        plan = {
+            "steps": [
+                {
+                    "need_search": True,
+                    "title": "Step 1",
+                    "description": "Description",
+                    "step_type": "research"
+                }
+            ]
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        # All required fields should be present
+        assert "locale" in result
+        assert "has_enough_context" in result
+        assert "title" in result
+
+        # With appropriate defaults
+        assert result["locale"] == "en-US"
+        assert result["has_enough_context"] is False
+        assert result["title"] == "Step 1"  # Inferred from first step
+
+    def test_issue_710_scenario_passes_pydantic_validation(self):
+        """Test that fixed plan can be validated by Pydantic schema (reproduces issue #710 fix)."""
+        from src.prompts.planner_model import Plan as PlanModel
+
+        # Simulate the problematic plan from issue #710 that's missing required fields
+        plan = {
+            "content": '{\n  "locale": "en-US",\n  "has_enough_context": false,\n  "title": "Test Plan",\n  "steps": [\n    {\n      "need_search": true,\n      "title": "Research Step",\n      "description": "Gather data",\n      "step_type": "research"\n    }\n  ]\n}',
+            "reasoning": 2368
+        }
+
+        # Extract just the JSON part (simulating what would happen in the actual flow)
+        import json
+        json_content = json.loads(plan["content"])
+
+        # Remove required fields to simulate the issue
+        del json_content["locale"]
+        del json_content["has_enough_context"]
+        del json_content["title"]
+
+        # First validate and fix
+        fixed_plan = validate_and_fix_plan(json_content)
+
+        # Then try Pydantic validation (should not raise)
+        validated = PlanModel.model_validate(fixed_plan)
+
+        # Validation should succeed
+        assert validated.locale == "en-US"
+        assert validated.has_enough_context is False
+        assert validated.title == "Research Step"  # Inferred from first step
+
+    def test_existing_fields_preserved(self):
+        """Test that existing valid fields are preserved."""
+        plan = {
+            "locale": "zh-CN",
+            "has_enough_context": True,
+            "title": "Existing Title",
+            "steps": []
+        }
+
+        result = validate_and_fix_plan(plan)
+
+        # Existing values should be preserved
+        assert result["locale"] == "zh-CN"
+        assert result["has_enough_context"] is True
+        assert result["title"] == "Existing Title"
+
+
 class TestValidateAndFixPlanIssue650:
    """Specific tests for Issue #650 scenarios."""