fix: parsed json with extra tokens issue (#656)

Fixes #598 

* fix: parsed json with extra tokens issue

* Added unit test for json.ts

* fix the json unit test running issue

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update the code with code review suggestion

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2025-10-26 07:24:25 +08:00
committed by GitHub
parent fd5a9aeae4
commit c7a82b82b4
7 changed files with 779 additions and 7 deletions

View File

@@ -3,7 +3,7 @@
import json
from src.utils.json_utils import repair_json_output
from src.utils.json_utils import repair_json_output, sanitize_tool_response, _extract_json_from_content
class TestRepairJsonOutput:
@@ -106,3 +106,119 @@ class TestRepairJsonOutput:
# Should attempt to process as JSON since it contains ```json
assert isinstance(result, str)
assert result == '{"key": "value"}'
class TestExtractJsonFromContent:
def test_json_with_extra_tokens_after_closing_brace(self):
"""Test extracting JSON with extra tokens after closing brace"""
content = '{"key": "value"} extra tokens here'
result = _extract_json_from_content(content)
assert result == '{"key": "value"}'
def test_json_with_extra_tokens_after_closing_bracket(self):
"""Test extracting JSON array with extra tokens"""
content = '[1, 2, 3] garbage data'
result = _extract_json_from_content(content)
assert result == '[1, 2, 3]'
def test_nested_json_with_extra_tokens(self):
"""Test nested JSON with extra tokens"""
content = '{"nested": {"inner": [1, 2, 3]}} invalid text'
result = _extract_json_from_content(content)
assert result == '{"nested": {"inner": [1, 2, 3]}}'
def test_json_with_string_containing_braces(self):
"""Test JSON with strings containing braces"""
content = '{"text": "this has {braces} in it"} extra'
result = _extract_json_from_content(content)
assert result == '{"text": "this has {braces} in it"}'
def test_json_with_escaped_quotes(self):
"""Test JSON with escaped quotes in strings"""
content = '{"text": "quote \\"here\\""} junk'
result = _extract_json_from_content(content)
assert result == '{"text": "quote \\"here\\""}'
def test_clean_json_no_extra_tokens(self):
"""Test clean JSON without extra tokens"""
content = '{"key": "value"}'
result = _extract_json_from_content(content)
assert result == '{"key": "value"}'
def test_empty_object(self):
"""Test empty object"""
content = '{} extra'
result = _extract_json_from_content(content)
assert result == '{}'
def test_empty_array(self):
"""Test empty array"""
content = '[] more stuff'
result = _extract_json_from_content(content)
assert result == '[]'
def test_extra_closing_brace_no_opening(self):
"""Test that extra closing brace without opening is not marked as valid end"""
content = '} garbage data'
result = _extract_json_from_content(content)
# Should return original content since no opening brace was seen
assert result == content
def test_extra_closing_bracket_no_opening(self):
"""Test that extra closing bracket without opening is not marked as valid end"""
content = '] garbage data'
result = _extract_json_from_content(content)
# Should return original content since no opening bracket was seen
assert result == content
class TestSanitizeToolResponse:
def test_basic_sanitization(self):
"""Test basic tool response sanitization"""
content = "normal response"
result = sanitize_tool_response(content)
assert result == "normal response"
def test_json_with_extra_tokens(self):
"""Test sanitizing JSON with extra tokens"""
content = '{"data": "value"} some garbage'
result = sanitize_tool_response(content)
assert result == '{"data": "value"}'
def test_very_long_response_truncation(self):
"""Test truncation of very long responses"""
long_content = "a" * 60000 # Exceeds default max of 50000
result = sanitize_tool_response(long_content)
assert len(result) <= 50003 # 50000 + "..."
assert result.endswith("...")
def test_custom_max_length(self):
"""Test custom maximum length"""
long_content = "a" * 1000
result = sanitize_tool_response(long_content, max_length=100)
assert len(result) <= 103 # 100 + "..."
assert result.endswith("...")
def test_control_character_removal(self):
"""Test removal of control characters"""
content = "text with \x00 null \x01 chars"
result = sanitize_tool_response(content)
assert "\x00" not in result
assert "\x01" not in result
def test_none_content(self):
"""Test handling of None content"""
result = sanitize_tool_response("")
assert result == ""
def test_whitespace_handling(self):
"""Test whitespace handling"""
content = " text with spaces "
result = sanitize_tool_response(content)
assert result == "text with spaces"
def test_json_array_with_extra_tokens(self):
"""Test JSON array with extra tokens"""
content = '[{"id": 1}, {"id": 2}] invalid stuff'
result = sanitize_tool_response(content)
assert result == '[{"id": 1}, {"id": 2}]'