fix(node):deal with the plan_data content with multipmodal message (#846)

* fix(node):deal with the plan_data content with multipmodal message

* Update the code with review comments
This commit is contained in:
Willem Jiang
2026-02-02 20:31:58 +08:00
committed by GitHub
parent 3adb4e90cb
commit e3e7a83f40
2 changed files with 177 additions and 1 deletions

View File

@@ -421,6 +421,26 @@ def extract_plan_content(plan_data: str | dict | Any) -> str:
if isinstance(plan_data["content"], dict):
logger.debug("Converting content field dict to JSON string")
return json.dumps(plan_data["content"], ensure_ascii=False)
if isinstance(plan_data["content"], list):
# Handle multimodal message format where content is a list
# Extract text content from the list structure
logger.debug(f"Extracting plan content from multimodal list format with {len(plan_data['content'])} elements")
for item in plan_data["content"]:
if isinstance(item, str) and item.strip():
# Return the first valid text content found
# We only take the first one because plan content should be a single JSON object
# Joining multiple text parts with newlines would produce invalid JSON
return item
elif isinstance(item, dict):
# Handle content block format like {"type": "text", "text": "..."}
if item.get("type") == "text" and "text" in item:
return item["text"]
elif "content" in item and isinstance(item["content"], str):
return item["content"]
# No valid text content found - raise ValueError to trigger error handling
# Do NOT use json.dumps() here as it would produce a JSON array that causes
# Plan.model_validate() to fail with ValidationError (issue #845)
raise ValueError(f"No valid text content found in multimodal list: {plan_data['content']}")
else:
logger.warning(f"Unexpected type for 'content' field in plan_data dict: {type(plan_data['content']).__name__}, converting to string")
return str(plan_data["content"])
@@ -494,7 +514,7 @@ def human_feedback_node(
# Validate and fix plan to ensure web search requirements are met
configurable = Configuration.from_runnable_config(config)
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search)
except (json.JSONDecodeError, AttributeError) as e:
except (json.JSONDecodeError, AttributeError, ValueError) as e:
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
if isinstance(current_plan, dict) and "content" in original_plan:
logger.warning(f"Plan appears to be an AIMessage object with content field")

View File

@@ -14,6 +14,7 @@ from src.graph.nodes import (
researcher_node,
extract_plan_content,
)
from src.prompts.planner_model import Plan
class TestExtractPlanContent:
@@ -188,6 +189,161 @@ class TestExtractPlanContent:
assert len(parsed_result["steps"]) == 1
assert parsed_result["steps"][0]["title"] == "收集埃菲尔铁塔和世界最高建筑的高度数据"
def test_extract_plan_content_with_multimodal_list_issue_845(self):
"""Test that extract_plan_content handles multimodal message format (list type) from issue #845."""
# This is the structure that causes ValidationError in issue #845
# When content is a list like ['', ['XXXXXXXX']] from multimodal LLM models
plan_json = '{"locale": "en-US", "has_enough_context": false, "title": "Test Plan", "steps": []}'
content_dict_simple_list = {"content": [plan_json]}
result = extract_plan_content(content_dict_simple_list)
# Should extract the text content from the list
assert result == plan_json
# Verify it can be parsed as JSON
parsed_result = json.loads(result)
assert parsed_result["locale"] == "en-US"
def test_extract_plan_content_with_multimodal_list_mixed_content(self):
"""Test multimodal list with mixed content (text and references)."""
plan_json = '{"locale": "zh-CN", "title": "测试计划", "steps": []}'
# Simulate multimodal format: ['text_content', ['reference1', 'reference2']]
content_dict_mixed = {"content": [plan_json, ["ref1", "ref2"]]}
result = extract_plan_content(content_dict_mixed)
# Should extract only the text content, ignoring nested lists
assert result == plan_json
parsed_result = json.loads(result)
assert parsed_result["title"] == "测试计划"
def test_extract_plan_content_with_multimodal_content_blocks(self):
"""Test multimodal list with content block format."""
plan_json = '{"locale": "en-US", "title": "Block Test", "steps": []}'
# Simulate content block format: [{"type": "text", "text": "..."}]
content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]}
result = extract_plan_content(content_dict_blocks)
assert result == plan_json
parsed_result = json.loads(result)
assert parsed_result["title"] == "Block Test"
def test_extract_plan_content_with_generic_content_dict_format(self):
"""Test multimodal list with generic {"content": "..."} dict format.
Some LLM providers may use a simpler content block format where the dict
has a "content" field directly instead of {"type": "text", "text": "..."}.
This test ensures that format is also handled correctly.
"""
plan_json = '{"locale": "en-US", "title": "Generic Content Test", "has_enough_context": true, "steps": []}'
# Simulate generic content dict format: [{"content": "..."}]
content_dict = {"content": [{"content": plan_json}]}
result = extract_plan_content(content_dict)
assert result == plan_json
parsed_result = json.loads(result)
assert parsed_result["title"] == "Generic Content Test"
def test_extract_plan_content_with_empty_multimodal_list(self):
"""Test multimodal list with empty or whitespace-only content raises ValueError."""
# Simulate the case from issue #845: ['', ['XXXXXXXX']]
content_dict_empty = {"content": ["", ["XXXXXXXX"]]}
# Should raise ValueError since no valid text content found
# This prevents the original bug where json.dumps would create a JSON array
# that causes Plan.model_validate() to fail
with pytest.raises(ValueError) as exc_info:
extract_plan_content(content_dict_empty)
assert "No valid text content found in multimodal list" in str(exc_info.value)
def test_extract_plan_content_multimodal_uses_first_text_only(self):
"""Test that only the first valid text element is used from multimodal list.
When multiple text parts are present, joining them with newlines would produce
invalid JSON. Therefore, we only use the first valid text element.
"""
first_json = '{"locale": "en-US", "title": "First Plan", "has_enough_context": true, "steps": []}'
second_json = '{"locale": "zh-CN", "title": "Second Plan", "has_enough_context": false, "steps": []}'
# Multiple JSON strings in the list - only the first should be used
content_dict = {"content": [first_json, second_json]}
result = extract_plan_content(content_dict)
# Should return only the first JSON, not joined with newlines
assert result == first_json
assert "\n" not in result # Ensure no newline joining occurred
# Verify the result is valid JSON
parsed_result = json.loads(result)
assert parsed_result["title"] == "First Plan"
assert parsed_result["locale"] == "en-US"
def test_extract_plan_content_multimodal_full_flow_issue_845(self):
"""Test complete flow: multimodal content -> extract -> parse -> Plan.model_validate().
This is a comprehensive end-to-end test for issue #845 that validates:
1. The extracted result can be successfully parsed as JSON
2. The parsed result is a dict (not a list)
3. The parsed dict can be validated by Plan.model_validate() without raising ValidationError
Note: This test uses the real Plan.model_validate to verify the fix, bypassing the
autouse fixture that patches Plan.model_validate globally for other tests.
"""
# Import Plan directly and get the real model_validate method
from src.prompts.planner_model import Plan as PlanModel
# Get the real model_validate method (bypass any patches)
real_model_validate = PlanModel.__pydantic_validator__.validate_python
# Create a valid plan JSON that matches the Plan model schema
valid_plan = {
"locale": "en-US",
"has_enough_context": True,
"thought": "Test thought",
"title": "Test Plan Title",
"steps": [
{
"need_search": True,
"title": "Step 1",
"description": "Step 1 description",
"step_type": "research"
}
]
}
plan_json = json.dumps(valid_plan, ensure_ascii=False)
# Test case 1: Multimodal list with valid text content
content_dict = {"content": [plan_json]}
result = extract_plan_content(content_dict)
# Verify result can be parsed as JSON
parsed_result = json.loads(result)
# Verify parsed result is a dict, not a list - this is the KEY assertion for issue #845
# The original bug caused parsed_result to be a list, which fails Plan.model_validate()
assert isinstance(parsed_result, dict), f"Expected dict but got {type(parsed_result).__name__}"
# Verify it can be validated by the real Plan.model_validate() without raising ValidationError
# This is the key assertion - if parsed_result was a list (the original bug),
# this would raise: ValidationError: 1 validation error for PlanInput should be a valid dictionary
validated_plan = real_model_validate(parsed_result)
assert validated_plan.title == "Test Plan Title"
assert validated_plan.locale == "en-US"
assert len(validated_plan.steps) == 1
# Test case 2: Multimodal list with content block format
content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]}
result_blocks = extract_plan_content(content_dict_blocks)
parsed_blocks = json.loads(result_blocks)
assert isinstance(parsed_blocks, dict), f"Expected dict but got {type(parsed_blocks).__name__}"
validated_blocks = real_model_validate(parsed_blocks)
assert validated_blocks.title == "Test Plan Title"
# Test case 3: Mixed content - should extract only valid text
content_dict_mixed = {"content": [plan_json, ["reference1", "reference2"]]}
result_mixed = extract_plan_content(content_dict_mixed)
parsed_mixed = json.loads(result_mixed)
assert isinstance(parsed_mixed, dict), f"Expected dict but got {type(parsed_mixed).__name__}"
validated_mixed = real_model_validate(parsed_mixed)
assert validated_mixed.title == "Test Plan Title"
# 在这里 mock 掉 get_llm_by_type避免 ValueError
with patch("src.llms.llm.get_llm_by_type", return_value=MagicMock()):