mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-18 12:04:45 +08:00
fix: parsed json with extra tokens issue (#656)
Fixes #598 * fix: parsed json with extra tokens issue * Added unit test for json.ts * fix the json unit test running issue * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update the code with code review suggestion --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
|
||||
import json
|
||||
|
||||
from src.utils.json_utils import repair_json_output
|
||||
from src.utils.json_utils import repair_json_output, sanitize_tool_response, _extract_json_from_content
|
||||
|
||||
|
||||
class TestRepairJsonOutput:
|
||||
@@ -106,3 +106,119 @@ class TestRepairJsonOutput:
|
||||
# Should attempt to process as JSON since it contains ```json
|
||||
assert isinstance(result, str)
|
||||
assert result == '{"key": "value"}'
|
||||
|
||||
|
||||
class TestExtractJsonFromContent:
|
||||
def test_json_with_extra_tokens_after_closing_brace(self):
|
||||
"""Test extracting JSON with extra tokens after closing brace"""
|
||||
content = '{"key": "value"} extra tokens here'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{"key": "value"}'
|
||||
|
||||
def test_json_with_extra_tokens_after_closing_bracket(self):
|
||||
"""Test extracting JSON array with extra tokens"""
|
||||
content = '[1, 2, 3] garbage data'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '[1, 2, 3]'
|
||||
|
||||
def test_nested_json_with_extra_tokens(self):
|
||||
"""Test nested JSON with extra tokens"""
|
||||
content = '{"nested": {"inner": [1, 2, 3]}} invalid text'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{"nested": {"inner": [1, 2, 3]}}'
|
||||
|
||||
def test_json_with_string_containing_braces(self):
|
||||
"""Test JSON with strings containing braces"""
|
||||
content = '{"text": "this has {braces} in it"} extra'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{"text": "this has {braces} in it"}'
|
||||
|
||||
def test_json_with_escaped_quotes(self):
|
||||
"""Test JSON with escaped quotes in strings"""
|
||||
content = '{"text": "quote \\"here\\""} junk'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{"text": "quote \\"here\\""}'
|
||||
|
||||
def test_clean_json_no_extra_tokens(self):
|
||||
"""Test clean JSON without extra tokens"""
|
||||
content = '{"key": "value"}'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{"key": "value"}'
|
||||
|
||||
def test_empty_object(self):
|
||||
"""Test empty object"""
|
||||
content = '{} extra'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '{}'
|
||||
|
||||
def test_empty_array(self):
|
||||
"""Test empty array"""
|
||||
content = '[] more stuff'
|
||||
result = _extract_json_from_content(content)
|
||||
assert result == '[]'
|
||||
|
||||
def test_extra_closing_brace_no_opening(self):
|
||||
"""Test that extra closing brace without opening is not marked as valid end"""
|
||||
content = '} garbage data'
|
||||
result = _extract_json_from_content(content)
|
||||
# Should return original content since no opening brace was seen
|
||||
assert result == content
|
||||
|
||||
def test_extra_closing_bracket_no_opening(self):
|
||||
"""Test that extra closing bracket without opening is not marked as valid end"""
|
||||
content = '] garbage data'
|
||||
result = _extract_json_from_content(content)
|
||||
# Should return original content since no opening bracket was seen
|
||||
assert result == content
|
||||
|
||||
|
||||
class TestSanitizeToolResponse:
|
||||
def test_basic_sanitization(self):
|
||||
"""Test basic tool response sanitization"""
|
||||
content = "normal response"
|
||||
result = sanitize_tool_response(content)
|
||||
assert result == "normal response"
|
||||
|
||||
def test_json_with_extra_tokens(self):
|
||||
"""Test sanitizing JSON with extra tokens"""
|
||||
content = '{"data": "value"} some garbage'
|
||||
result = sanitize_tool_response(content)
|
||||
assert result == '{"data": "value"}'
|
||||
|
||||
def test_very_long_response_truncation(self):
|
||||
"""Test truncation of very long responses"""
|
||||
long_content = "a" * 60000 # Exceeds default max of 50000
|
||||
result = sanitize_tool_response(long_content)
|
||||
assert len(result) <= 50003 # 50000 + "..."
|
||||
assert result.endswith("...")
|
||||
|
||||
def test_custom_max_length(self):
|
||||
"""Test custom maximum length"""
|
||||
long_content = "a" * 1000
|
||||
result = sanitize_tool_response(long_content, max_length=100)
|
||||
assert len(result) <= 103 # 100 + "..."
|
||||
assert result.endswith("...")
|
||||
|
||||
def test_control_character_removal(self):
|
||||
"""Test removal of control characters"""
|
||||
content = "text with \x00 null \x01 chars"
|
||||
result = sanitize_tool_response(content)
|
||||
assert "\x00" not in result
|
||||
assert "\x01" not in result
|
||||
|
||||
def test_none_content(self):
|
||||
"""Test handling of None content"""
|
||||
result = sanitize_tool_response("")
|
||||
assert result == ""
|
||||
|
||||
def test_whitespace_handling(self):
|
||||
"""Test whitespace handling"""
|
||||
content = " text with spaces "
|
||||
result = sanitize_tool_response(content)
|
||||
assert result == "text with spaces"
|
||||
|
||||
def test_json_array_with_extra_tokens(self):
|
||||
"""Test JSON array with extra tokens"""
|
||||
content = '[{"id": 1}, {"id": 2}] invalid stuff'
|
||||
result = sanitize_tool_response(content)
|
||||
assert result == '[{"id": 1}, {"id": 2}]'
|
||||
|
||||
Reference in New Issue
Block a user