mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-12 01:54:45 +08:00
fix(node):deal with the plan_data content with multipmodal message (#846)
* fix(node):deal with the plan_data content with multipmodal message * Update the code with review comments
This commit is contained in:
@@ -421,6 +421,26 @@ def extract_plan_content(plan_data: str | dict | Any) -> str:
|
||||
if isinstance(plan_data["content"], dict):
|
||||
logger.debug("Converting content field dict to JSON string")
|
||||
return json.dumps(plan_data["content"], ensure_ascii=False)
|
||||
if isinstance(plan_data["content"], list):
|
||||
# Handle multimodal message format where content is a list
|
||||
# Extract text content from the list structure
|
||||
logger.debug(f"Extracting plan content from multimodal list format with {len(plan_data['content'])} elements")
|
||||
for item in plan_data["content"]:
|
||||
if isinstance(item, str) and item.strip():
|
||||
# Return the first valid text content found
|
||||
# We only take the first one because plan content should be a single JSON object
|
||||
# Joining multiple text parts with newlines would produce invalid JSON
|
||||
return item
|
||||
elif isinstance(item, dict):
|
||||
# Handle content block format like {"type": "text", "text": "..."}
|
||||
if item.get("type") == "text" and "text" in item:
|
||||
return item["text"]
|
||||
elif "content" in item and isinstance(item["content"], str):
|
||||
return item["content"]
|
||||
# No valid text content found - raise ValueError to trigger error handling
|
||||
# Do NOT use json.dumps() here as it would produce a JSON array that causes
|
||||
# Plan.model_validate() to fail with ValidationError (issue #845)
|
||||
raise ValueError(f"No valid text content found in multimodal list: {plan_data['content']}")
|
||||
else:
|
||||
logger.warning(f"Unexpected type for 'content' field in plan_data dict: {type(plan_data['content']).__name__}, converting to string")
|
||||
return str(plan_data["content"])
|
||||
@@ -494,7 +514,7 @@ def human_feedback_node(
|
||||
# Validate and fix plan to ensure web search requirements are met
|
||||
configurable = Configuration.from_runnable_config(config)
|
||||
new_plan = validate_and_fix_plan(new_plan, configurable.enforce_web_search, configurable.enable_web_search)
|
||||
except (json.JSONDecodeError, AttributeError) as e:
|
||||
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
||||
logger.warning(f"Failed to parse plan: {str(e)}. Plan data type: {type(current_plan).__name__}")
|
||||
if isinstance(current_plan, dict) and "content" in original_plan:
|
||||
logger.warning(f"Plan appears to be an AIMessage object with content field")
|
||||
|
||||
@@ -14,6 +14,7 @@ from src.graph.nodes import (
|
||||
researcher_node,
|
||||
extract_plan_content,
|
||||
)
|
||||
from src.prompts.planner_model import Plan
|
||||
|
||||
|
||||
class TestExtractPlanContent:
|
||||
@@ -188,6 +189,161 @@ class TestExtractPlanContent:
|
||||
assert len(parsed_result["steps"]) == 1
|
||||
assert parsed_result["steps"][0]["title"] == "收集埃菲尔铁塔和世界最高建筑的高度数据"
|
||||
|
||||
def test_extract_plan_content_with_multimodal_list_issue_845(self):
|
||||
"""Test that extract_plan_content handles multimodal message format (list type) from issue #845."""
|
||||
# This is the structure that causes ValidationError in issue #845
|
||||
# When content is a list like ['', ['XXXXXXXX']] from multimodal LLM models
|
||||
plan_json = '{"locale": "en-US", "has_enough_context": false, "title": "Test Plan", "steps": []}'
|
||||
content_dict_simple_list = {"content": [plan_json]}
|
||||
|
||||
result = extract_plan_content(content_dict_simple_list)
|
||||
# Should extract the text content from the list
|
||||
assert result == plan_json
|
||||
# Verify it can be parsed as JSON
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["locale"] == "en-US"
|
||||
|
||||
def test_extract_plan_content_with_multimodal_list_mixed_content(self):
|
||||
"""Test multimodal list with mixed content (text and references)."""
|
||||
plan_json = '{"locale": "zh-CN", "title": "测试计划", "steps": []}'
|
||||
# Simulate multimodal format: ['text_content', ['reference1', 'reference2']]
|
||||
content_dict_mixed = {"content": [plan_json, ["ref1", "ref2"]]}
|
||||
|
||||
result = extract_plan_content(content_dict_mixed)
|
||||
# Should extract only the text content, ignoring nested lists
|
||||
assert result == plan_json
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["title"] == "测试计划"
|
||||
|
||||
def test_extract_plan_content_with_multimodal_content_blocks(self):
|
||||
"""Test multimodal list with content block format."""
|
||||
plan_json = '{"locale": "en-US", "title": "Block Test", "steps": []}'
|
||||
# Simulate content block format: [{"type": "text", "text": "..."}]
|
||||
content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]}
|
||||
|
||||
result = extract_plan_content(content_dict_blocks)
|
||||
assert result == plan_json
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["title"] == "Block Test"
|
||||
|
||||
def test_extract_plan_content_with_generic_content_dict_format(self):
|
||||
"""Test multimodal list with generic {"content": "..."} dict format.
|
||||
|
||||
Some LLM providers may use a simpler content block format where the dict
|
||||
has a "content" field directly instead of {"type": "text", "text": "..."}.
|
||||
This test ensures that format is also handled correctly.
|
||||
"""
|
||||
plan_json = '{"locale": "en-US", "title": "Generic Content Test", "has_enough_context": true, "steps": []}'
|
||||
# Simulate generic content dict format: [{"content": "..."}]
|
||||
content_dict = {"content": [{"content": plan_json}]}
|
||||
|
||||
result = extract_plan_content(content_dict)
|
||||
assert result == plan_json
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["title"] == "Generic Content Test"
|
||||
|
||||
def test_extract_plan_content_with_empty_multimodal_list(self):
|
||||
"""Test multimodal list with empty or whitespace-only content raises ValueError."""
|
||||
# Simulate the case from issue #845: ['', ['XXXXXXXX']]
|
||||
content_dict_empty = {"content": ["", ["XXXXXXXX"]]}
|
||||
|
||||
# Should raise ValueError since no valid text content found
|
||||
# This prevents the original bug where json.dumps would create a JSON array
|
||||
# that causes Plan.model_validate() to fail
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
extract_plan_content(content_dict_empty)
|
||||
assert "No valid text content found in multimodal list" in str(exc_info.value)
|
||||
|
||||
def test_extract_plan_content_multimodal_uses_first_text_only(self):
|
||||
"""Test that only the first valid text element is used from multimodal list.
|
||||
|
||||
When multiple text parts are present, joining them with newlines would produce
|
||||
invalid JSON. Therefore, we only use the first valid text element.
|
||||
"""
|
||||
first_json = '{"locale": "en-US", "title": "First Plan", "has_enough_context": true, "steps": []}'
|
||||
second_json = '{"locale": "zh-CN", "title": "Second Plan", "has_enough_context": false, "steps": []}'
|
||||
|
||||
# Multiple JSON strings in the list - only the first should be used
|
||||
content_dict = {"content": [first_json, second_json]}
|
||||
result = extract_plan_content(content_dict)
|
||||
|
||||
# Should return only the first JSON, not joined with newlines
|
||||
assert result == first_json
|
||||
assert "\n" not in result # Ensure no newline joining occurred
|
||||
|
||||
# Verify the result is valid JSON
|
||||
parsed_result = json.loads(result)
|
||||
assert parsed_result["title"] == "First Plan"
|
||||
assert parsed_result["locale"] == "en-US"
|
||||
|
||||
def test_extract_plan_content_multimodal_full_flow_issue_845(self):
|
||||
"""Test complete flow: multimodal content -> extract -> parse -> Plan.model_validate().
|
||||
|
||||
This is a comprehensive end-to-end test for issue #845 that validates:
|
||||
1. The extracted result can be successfully parsed as JSON
|
||||
2. The parsed result is a dict (not a list)
|
||||
3. The parsed dict can be validated by Plan.model_validate() without raising ValidationError
|
||||
|
||||
Note: This test uses the real Plan.model_validate to verify the fix, bypassing the
|
||||
autouse fixture that patches Plan.model_validate globally for other tests.
|
||||
"""
|
||||
# Import Plan directly and get the real model_validate method
|
||||
from src.prompts.planner_model import Plan as PlanModel
|
||||
# Get the real model_validate method (bypass any patches)
|
||||
real_model_validate = PlanModel.__pydantic_validator__.validate_python
|
||||
|
||||
# Create a valid plan JSON that matches the Plan model schema
|
||||
valid_plan = {
|
||||
"locale": "en-US",
|
||||
"has_enough_context": True,
|
||||
"thought": "Test thought",
|
||||
"title": "Test Plan Title",
|
||||
"steps": [
|
||||
{
|
||||
"need_search": True,
|
||||
"title": "Step 1",
|
||||
"description": "Step 1 description",
|
||||
"step_type": "research"
|
||||
}
|
||||
]
|
||||
}
|
||||
plan_json = json.dumps(valid_plan, ensure_ascii=False)
|
||||
|
||||
# Test case 1: Multimodal list with valid text content
|
||||
content_dict = {"content": [plan_json]}
|
||||
result = extract_plan_content(content_dict)
|
||||
|
||||
# Verify result can be parsed as JSON
|
||||
parsed_result = json.loads(result)
|
||||
|
||||
# Verify parsed result is a dict, not a list - this is the KEY assertion for issue #845
|
||||
# The original bug caused parsed_result to be a list, which fails Plan.model_validate()
|
||||
assert isinstance(parsed_result, dict), f"Expected dict but got {type(parsed_result).__name__}"
|
||||
|
||||
# Verify it can be validated by the real Plan.model_validate() without raising ValidationError
|
||||
# This is the key assertion - if parsed_result was a list (the original bug),
|
||||
# this would raise: ValidationError: 1 validation error for PlanInput should be a valid dictionary
|
||||
validated_plan = real_model_validate(parsed_result)
|
||||
assert validated_plan.title == "Test Plan Title"
|
||||
assert validated_plan.locale == "en-US"
|
||||
assert len(validated_plan.steps) == 1
|
||||
|
||||
# Test case 2: Multimodal list with content block format
|
||||
content_dict_blocks = {"content": [{"type": "text", "text": plan_json}]}
|
||||
result_blocks = extract_plan_content(content_dict_blocks)
|
||||
parsed_blocks = json.loads(result_blocks)
|
||||
assert isinstance(parsed_blocks, dict), f"Expected dict but got {type(parsed_blocks).__name__}"
|
||||
validated_blocks = real_model_validate(parsed_blocks)
|
||||
assert validated_blocks.title == "Test Plan Title"
|
||||
|
||||
# Test case 3: Mixed content - should extract only valid text
|
||||
content_dict_mixed = {"content": [plan_json, ["reference1", "reference2"]]}
|
||||
result_mixed = extract_plan_content(content_dict_mixed)
|
||||
parsed_mixed = json.loads(result_mixed)
|
||||
assert isinstance(parsed_mixed, dict), f"Expected dict but got {type(parsed_mixed).__name__}"
|
||||
validated_mixed = real_model_validate(parsed_mixed)
|
||||
assert validated_mixed.title == "Test Plan Title"
|
||||
|
||||
|
||||
# 在这里 mock 掉 get_llm_by_type,避免 ValueError
|
||||
with patch("src.llms.llm.get_llm_by_type", return_value=MagicMock()):
|
||||
|
||||
Reference in New Issue
Block a user