From e3e7a83f40ac852e6b5befc93466d2d4b0cf3821 Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Mon, 2 Feb 2026 20:31:58 +0800 Subject: [PATCH] fix(node):deal with the plan_data content with multipmodal message (#846) * fix(node):deal with the plan_data content with multipmodal message * Update the code with review comments --- src/graph/nodes.py | 22 ++++- tests/integration/test_nodes.py | 156 ++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 1 deletion(-) diff --git a/src/graph/nodes.py b/src/graph/nodes.py index 54f41d8..f0f8ae4 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -421,6 +421,26 @@ def extract_plan_content(plan_data: str | dict | Any) -> str: if isinstance(plan_data["content"], dict): logger.debug("Converting content field dict to JSON string") return json.dumps(plan_data["content"], ensure_ascii=False) + if isinstance(plan_data["content"], list): + # Handle multimodal message format where content is a list + # Extract text content from the list structure + logger.debug(f"Extracting plan content from multimodal list format with {len(plan_data['content'])} elements") + for item in plan_data["content"]: + if isinstance(item, str) and item.strip(): + # Return the first valid text content found + # We only take the first one because plan content should be a single JSON object + # Joining multiple text parts with newlines would produce invalid JSON + return item + elif isinstance(item, dict): + # Handle content block format like {"type": "text", "text": "..."} + if item.get("type") == "text" and "text" in item: + return item["text"] + elif "content" in item and isinstance(item["content"], str): + return item["content"] + # No valid text content found - raise ValueError to trigger error handling + # Do NOT use json.dumps() here as it would produce a JSON array that causes + # Plan.model_validate() to fail with ValidationError (issue #845) + raise ValueError(f"No valid text content found in multimodal list: {plan_data['content']}") else: logger.warning(f"Unexpected type for 'content' field in plan_data dict: {type(plan_data['content']).__name__}, converting to string") return str(plan_data["content"]) @@ -494,7 +514,7 @@ def human_feedback_node( # Validate and fix plan to ensure web search requirements are met configurable = Configuration.from_runnable_config(config) new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search) - except (json.JSONDecodeError, AttributeError) as e: + except (json.JSONDecodeError, AttributeError, ValueError) as e: logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}") if isinstance(current_plan, dict) and "content" in original_plan: logger.warning(f"Plan appears to be an AIMessage object with content field") diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py index 5ff23f5..8445bfa 100644 --- a/tests/integration/test_nodes.py +++ b/tests/integration/test_nodes.py @@ -14,6 +14,7 @@ from src.graph.nodes import ( researcher_node, extract_plan_content, ) +from src.prompts.planner_model import Plan class TestExtractPlanContent: @@ -188,6 +189,161 @@ class TestExtractPlanContent: assert len(parsed_result["steps"]) == 1 assert parsed_result["steps"][0]["title"] == "收集埃菲尔铁塔和世界最高建筑的高度数据" + def test_extract_plan_content_with_multimodal_list_issue_845(self): + """Test that extract_plan_content handles multimodal message format (list type) from issue #845.""" + # This is the structure that causes ValidationError in issue #845 + # When content is a list like ['', ['XXXXXXXX']] from multimodal LLM models + plan_json = '{"locale": "en-US", "has_enough_context": false, "title": "Test Plan", "steps": []}' + content_dict_simple_list = {"content": [plan_json]} + + result = extract_plan_content(content_dict_simple_list) + # Should extract the text content from the list + assert result == plan_json + # Verify it can be parsed as JSON + parsed_result = json.loads(result) + assert parsed_result["locale"] == "en-US" + + def test_extract_plan_content_with_multimodal_list_mixed_content(self): + """Test multimodal list with mixed content (text and references).""" + plan_json = '{"locale": "zh-CN", "title": "测试计划", "steps": []}' + # Simulate multimodal format: ['text_content', ['reference1', 'reference2']] + content_dict_mixed = {"content": [plan_json, ["ref1", "ref2"]]} + + result = extract_plan_content(content_dict_mixed) + # Should extract only the text content, ignoring nested lists + assert result == plan_json + parsed_result = json.loads(result) + assert parsed_result["title"] == "测试计划" + + def test_extract_plan_content_with_multimodal_content_blocks(self): + """Test multimodal list with content block format.""" + plan_json = '{"locale": "en-US", "title": "Block Test", "steps": []}' + # Simulate content block format: [{"type": "text", "text": "..."}] + content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]} + + result = extract_plan_content(content_dict_blocks) + assert result == plan_json + parsed_result = json.loads(result) + assert parsed_result["title"] == "Block Test" + + def test_extract_plan_content_with_generic_content_dict_format(self): + """Test multimodal list with generic {"content": "..."} dict format. + + Some LLM providers may use a simpler content block format where the dict + has a "content" field directly instead of {"type": "text", "text": "..."}. + This test ensures that format is also handled correctly. + """ + plan_json = '{"locale": "en-US", "title": "Generic Content Test", "has_enough_context": true, "steps": []}' + # Simulate generic content dict format: [{"content": "..."}] + content_dict = {"content": [{"content": plan_json}]} + + result = extract_plan_content(content_dict) + assert result == plan_json + parsed_result = json.loads(result) + assert parsed_result["title"] == "Generic Content Test" + + def test_extract_plan_content_with_empty_multimodal_list(self): + """Test multimodal list with empty or whitespace-only content raises ValueError.""" + # Simulate the case from issue #845: ['', ['XXXXXXXX']] + content_dict_empty = {"content": ["", ["XXXXXXXX"]]} + + # Should raise ValueError since no valid text content found + # This prevents the original bug where json.dumps would create a JSON array + # that causes Plan.model_validate() to fail + with pytest.raises(ValueError) as exc_info: + extract_plan_content(content_dict_empty) + assert "No valid text content found in multimodal list" in str(exc_info.value) + + def test_extract_plan_content_multimodal_uses_first_text_only(self): + """Test that only the first valid text element is used from multimodal list. + + When multiple text parts are present, joining them with newlines would produce + invalid JSON. Therefore, we only use the first valid text element. + """ + first_json = '{"locale": "en-US", "title": "First Plan", "has_enough_context": true, "steps": []}' + second_json = '{"locale": "zh-CN", "title": "Second Plan", "has_enough_context": false, "steps": []}' + + # Multiple JSON strings in the list - only the first should be used + content_dict = {"content": [first_json, second_json]} + result = extract_plan_content(content_dict) + + # Should return only the first JSON, not joined with newlines + assert result == first_json + assert "\n" not in result # Ensure no newline joining occurred + + # Verify the result is valid JSON + parsed_result = json.loads(result) + assert parsed_result["title"] == "First Plan" + assert parsed_result["locale"] == "en-US" + + def test_extract_plan_content_multimodal_full_flow_issue_845(self): + """Test complete flow: multimodal content -> extract -> parse -> Plan.model_validate(). + + This is a comprehensive end-to-end test for issue #845 that validates: + 1. The extracted result can be successfully parsed as JSON + 2. The parsed result is a dict (not a list) + 3. The parsed dict can be validated by Plan.model_validate() without raising ValidationError + + Note: This test uses the real Plan.model_validate to verify the fix, bypassing the + autouse fixture that patches Plan.model_validate globally for other tests. + """ + # Import Plan directly and get the real model_validate method + from src.prompts.planner_model import Plan as PlanModel + # Get the real model_validate method (bypass any patches) + real_model_validate = PlanModel.__pydantic_validator__.validate_python + + # Create a valid plan JSON that matches the Plan model schema + valid_plan = { + "locale": "en-US", + "has_enough_context": True, + "thought": "Test thought", + "title": "Test Plan Title", + "steps": [ + { + "need_search": True, + "title": "Step 1", + "description": "Step 1 description", + "step_type": "research" + } + ] + } + plan_json = json.dumps(valid_plan, ensure_ascii=False) + + # Test case 1: Multimodal list with valid text content + content_dict = {"content": [plan_json]} + result = extract_plan_content(content_dict) + + # Verify result can be parsed as JSON + parsed_result = json.loads(result) + + # Verify parsed result is a dict, not a list - this is the KEY assertion for issue #845 + # The original bug caused parsed_result to be a list, which fails Plan.model_validate() + assert isinstance(parsed_result, dict), f"Expected dict but got {type(parsed_result).__name__}" + + # Verify it can be validated by the real Plan.model_validate() without raising ValidationError + # This is the key assertion - if parsed_result was a list (the original bug), + # this would raise: ValidationError: 1 validation error for PlanInput should be a valid dictionary + validated_plan = real_model_validate(parsed_result) + assert validated_plan.title == "Test Plan Title" + assert validated_plan.locale == "en-US" + assert len(validated_plan.steps) == 1 + + # Test case 2: Multimodal list with content block format + content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]} + result_blocks = extract_plan_content(content_dict_blocks) + parsed_blocks = json.loads(result_blocks) + assert isinstance(parsed_blocks, dict), f"Expected dict but got {type(parsed_blocks).__name__}" + validated_blocks = real_model_validate(parsed_blocks) + assert validated_blocks.title == "Test Plan Title" + + # Test case 3: Mixed content - should extract only valid text + content_dict_mixed = {"content": [plan_json, ["reference1", "reference2"]]} + result_mixed = extract_plan_content(content_dict_mixed) + parsed_mixed = json.loads(result_mixed) + assert isinstance(parsed_mixed, dict), f"Expected dict but got {type(parsed_mixed).__name__}" + validated_mixed = real_model_validate(parsed_mixed) + assert validated_mixed.title == "Test Plan Title" + # 在这里 mock 掉 get_llm_by_type,避免 ValueError with patch("src.llms.llm.get_llm_by_type", return_value=MagicMock()):