fix: parsed json with extra tokens issue (#656)

Fixes #598 

* fix: parsed json with extra tokens issue

* Added unit test for json.ts

* fix the json unit test running issue

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Update the code with code review suggestion

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2025-10-26 07:24:25 +08:00
committed by GitHub
parent fd5a9aeae4
commit c7a82b82b4
7 changed files with 779 additions and 7 deletions

View File

@@ -27,7 +27,7 @@ from src.tools import (
)
from src.tools.search import LoggedTavilySearch
from src.utils.context_manager import ContextManager, validate_message_content
from src.utils.json_utils import repair_json_output
from src.utils.json_utils import repair_json_output, sanitize_tool_response
from ..config import SELECTED_SEARCH_ENGINE, SearchEngine
from .types import State
@@ -834,6 +834,10 @@ async def _execute_agent_step(
# Process the result
response_content = result["messages"][-1].content
# Sanitize response to remove extra tokens and truncate if needed
response_content = sanitize_tool_response(str(response_content))
logger.debug(f"{agent_name.capitalize()} full response: {response_content}")
# Update the step with the execution result

View File

@@ -266,7 +266,7 @@ class ContextManager:
pass
def validate_message_content(messages: List[BaseMessage]) -> List[BaseMessage]:
def validate_message_content(messages: List[BaseMessage], max_content_length: int = 100000) -> List[BaseMessage]:
"""
Validate and fix all messages to ensure they have valid content before sending to LLM.
@@ -274,9 +274,11 @@ def validate_message_content(messages: List[BaseMessage]) -> List[BaseMessage]:
1. All messages have a content field
2. No message has None or empty string content (except for legitimate empty responses)
3. Complex objects (lists, dicts) are converted to JSON strings
4. Content is truncated if too long to prevent token overflow
Args:
messages: List of messages to validate
max_content_length: Maximum allowed content length per message (default 100000)
Returns:
List of validated messages with fixed content
@@ -304,6 +306,11 @@ def validate_message_content(messages: List[BaseMessage]) -> List[BaseMessage]:
logger.debug(f"Message {i} ({type(msg).__name__}) has non-string content type {type(msg.content).__name__}, converting to string")
msg.content = str(msg.content)
# Validate content length
if isinstance(msg.content, str) and len(msg.content) > max_content_length:
logger.warning(f"Message {i} content truncated from {len(msg.content)} to {max_content_length} chars")
msg.content = msg.content[:max_content_length].rstrip() + "..."
validated.append(msg)
except Exception as e:
logger.error(f"Error validating message {i}: {e}")

View File

@@ -3,6 +3,7 @@
import json
import logging
import re
from typing import Any
import json_repair
@@ -31,10 +32,84 @@ def sanitize_args(args: Any) -> str:
)
def _extract_json_from_content(content: str) -> str:
"""
Extract valid JSON from content that may have extra tokens.
Attempts to find the last valid JSON closing bracket and truncate there.
Handles both objects {} and arrays [].
Args:
content: String that may contain JSON with extra tokens
Returns:
String with potential JSON extracted or original content
"""
content = content.strip()
# Try to find a complete JSON object or array
# Look for the last closing brace/bracket that could be valid JSON
# Track counters and whether we've seen opening brackets
brace_count = 0
bracket_count = 0
seen_opening_brace = False
seen_opening_bracket = False
in_string = False
escape_next = False
last_valid_end = -1
for i, char in enumerate(content):
if escape_next:
escape_next = False
continue
if char == '\\':
escape_next = True
continue
if char == '"' and not escape_next:
in_string = not in_string
continue
if in_string:
continue
if char == '{':
brace_count += 1
seen_opening_brace = True
elif char == '}':
brace_count -= 1
# Only mark as valid end if we started with opening brace and reached balanced state
if brace_count == 0 and seen_opening_brace:
last_valid_end = i
elif char == '[':
bracket_count += 1
seen_opening_bracket = True
elif char == ']':
bracket_count -= 1
# Only mark as valid end if we started with opening bracket and reached balanced state
if bracket_count == 0 and seen_opening_bracket:
last_valid_end = i
if last_valid_end > 0:
truncated = content[:last_valid_end + 1]
if truncated != content:
logger.debug(f"Truncated content from {len(content)} to {len(truncated)} chars")
return truncated
return content
def repair_json_output(content: str) -> str:
"""
Repair and normalize JSON output.
Handles:
- JSON with extra tokens after closing brackets
- Incomplete JSON structures
- Malformed JSON from quantized models
Args:
content (str): String content that may contain JSON
@@ -42,6 +117,12 @@ def repair_json_output(content: str) -> str:
str: Repaired JSON string, or original content if not JSON
"""
content = content.strip()
if not content:
return content
# First attempt: try to extract valid JSON if there are extra tokens
content = _extract_json_from_content(content)
try:
# Try to repair and parse JSON
@@ -53,6 +134,49 @@ def repair_json_output(content: str) -> str:
return content
content = json.dumps(repaired_content, ensure_ascii=False)
except Exception as e:
logger.warning(f"JSON repair failed: {e}")
logger.debug(f"JSON repair failed: {e}")
return content
def sanitize_tool_response(content: str, max_length: int = 50000) -> str:
"""
Sanitize tool response to remove extra tokens and invalid content.
This function:
- Strips whitespace and trailing tokens
- Truncates excessively long responses
- Cleans up common garbage patterns
- Attempts JSON repair for JSON-like responses
Args:
content: Tool response content
max_length: Maximum allowed length (default 50000 chars)
Returns:
Sanitized content string
"""
if not content:
return content
content = content.strip()
# First, try to extract valid JSON to remove trailing tokens
if content.startswith('{') or content.startswith('['):
content = _extract_json_from_content(content)
# Truncate if too long to prevent token overflow
if len(content) > max_length:
logger.warning(f"Tool response truncated from {len(content)} to {max_length} chars")
content = content[:max_length].rstrip() + "..."
# Remove common garbage patterns that appear from some models
# These are often seen from quantized models with output corruption
garbage_patterns = [
r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]', # Control characters
]
for pattern in garbage_patterns:
content = re.sub(pattern, '', content)
return content

View File

@@ -3,7 +3,7 @@
import json
from src.utils.json_utils import repair_json_output
from src.utils.json_utils import repair_json_output, sanitize_tool_response, _extract_json_from_content
class TestRepairJsonOutput:
@@ -106,3 +106,119 @@ class TestRepairJsonOutput:
# Should attempt to process as JSON since it contains ```json
assert isinstance(result, str)
assert result == '{"key": "value"}'
class TestExtractJsonFromContent:
def test_json_with_extra_tokens_after_closing_brace(self):
"""Test extracting JSON with extra tokens after closing brace"""
content = '{"key": "value"} extra tokens here'
result = _extract_json_from_content(content)
assert result == '{"key": "value"}'
def test_json_with_extra_tokens_after_closing_bracket(self):
"""Test extracting JSON array with extra tokens"""
content = '[1, 2, 3] garbage data'
result = _extract_json_from_content(content)
assert result == '[1, 2, 3]'
def test_nested_json_with_extra_tokens(self):
"""Test nested JSON with extra tokens"""
content = '{"nested": {"inner": [1, 2, 3]}} invalid text'
result = _extract_json_from_content(content)
assert result == '{"nested": {"inner": [1, 2, 3]}}'
def test_json_with_string_containing_braces(self):
"""Test JSON with strings containing braces"""
content = '{"text": "this has {braces} in it"} extra'
result = _extract_json_from_content(content)
assert result == '{"text": "this has {braces} in it"}'
def test_json_with_escaped_quotes(self):
"""Test JSON with escaped quotes in strings"""
content = '{"text": "quote \\"here\\""} junk'
result = _extract_json_from_content(content)
assert result == '{"text": "quote \\"here\\""}'
def test_clean_json_no_extra_tokens(self):
"""Test clean JSON without extra tokens"""
content = '{"key": "value"}'
result = _extract_json_from_content(content)
assert result == '{"key": "value"}'
def test_empty_object(self):
"""Test empty object"""
content = '{} extra'
result = _extract_json_from_content(content)
assert result == '{}'
def test_empty_array(self):
"""Test empty array"""
content = '[] more stuff'
result = _extract_json_from_content(content)
assert result == '[]'
def test_extra_closing_brace_no_opening(self):
"""Test that extra closing brace without opening is not marked as valid end"""
content = '} garbage data'
result = _extract_json_from_content(content)
# Should return original content since no opening brace was seen
assert result == content
def test_extra_closing_bracket_no_opening(self):
"""Test that extra closing bracket without opening is not marked as valid end"""
content = '] garbage data'
result = _extract_json_from_content(content)
# Should return original content since no opening bracket was seen
assert result == content
class TestSanitizeToolResponse:
def test_basic_sanitization(self):
"""Test basic tool response sanitization"""
content = "normal response"
result = sanitize_tool_response(content)
assert result == "normal response"
def test_json_with_extra_tokens(self):
"""Test sanitizing JSON with extra tokens"""
content = '{"data": "value"} some garbage'
result = sanitize_tool_response(content)
assert result == '{"data": "value"}'
def test_very_long_response_truncation(self):
"""Test truncation of very long responses"""
long_content = "a" * 60000 # Exceeds default max of 50000
result = sanitize_tool_response(long_content)
assert len(result) <= 50003 # 50000 + "..."
assert result.endswith("...")
def test_custom_max_length(self):
"""Test custom maximum length"""
long_content = "a" * 1000
result = sanitize_tool_response(long_content, max_length=100)
assert len(result) <= 103 # 100 + "..."
assert result.endswith("...")
def test_control_character_removal(self):
"""Test removal of control characters"""
content = "text with \x00 null \x01 chars"
result = sanitize_tool_response(content)
assert "\x00" not in result
assert "\x01" not in result
def test_none_content(self):
"""Test handling of None content"""
result = sanitize_tool_response("")
assert result == ""
def test_whitespace_handling(self):
"""Test whitespace handling"""
content = " text with spaces "
result = sanitize_tool_response(content)
assert result == "text with spaces"
def test_json_array_with_extra_tokens(self):
"""Test JSON array with extra tokens"""
content = '[{"id": 1}, {"id": 2}] invalid stuff'
result = sanitize_tool_response(content)
assert result == '[{"id": 1}, {"id": 2}]'

View File

@@ -147,11 +147,18 @@ function WebSearchToolCall({ toolCall }: { toolCall: ToolCallRuntime }) {
}, [toolCall.result]);
const searchResults = useMemo<SearchResult[]>(() => {
let results: SearchResult[] | undefined = undefined;
let parseError = false;
try {
results = toolCall.result ? parseJSON(toolCall.result, []) : undefined;
} catch {
if (toolCall.result) {
results = parseJSON(toolCall.result, []);
}
} catch (error) {
parseError = true;
console.warn("Failed to parse search results:", error);
results = undefined;
}
if (Array.isArray(results)) {
results.forEach((result) => {
if (result.type === "page") {
@@ -159,8 +166,10 @@ function WebSearchToolCall({ toolCall }: { toolCall: ToolCallRuntime }) {
}
});
} else {
// If parsing failed, still try to show something useful
results = [];
}
return results;
}, [toolCall.result]);
const pageResults = useMemo(

View File

@@ -1,11 +1,72 @@
import { parse } from "best-effort-json-parser";
/**
* Extract valid JSON from content that may have extra tokens.
* Finds the last closing brace/bracket that could be valid JSON.
*/
function extractValidJSON(content: string): string {
let braceCount = 0;
let bracketCount = 0;
let inString = false;
let escapeNext = false;
let lastValidEnd = -1;
for (let i = 0; i < content.length; i++) {
const char = content[i];
if (escapeNext) {
escapeNext = false;
continue;
}
if (char === "\\") {
escapeNext = true;
continue;
}
if (char === '"') {
inString = !inString;
continue;
}
if (inString) {
continue;
}
if (char === "{") {
braceCount++;
} else if (char === "}") {
if (braceCount > 0) {
braceCount--;
if (braceCount === 0) {
lastValidEnd = i;
}
}
} else if (char === "[") {
bracketCount++;
} else if (char === "]") {
if (bracketCount > 0) {
bracketCount--;
if (bracketCount === 0) {
lastValidEnd = i;
}
}
}
}
if (lastValidEnd > 0) {
return content.substring(0, lastValidEnd + 1);
}
return content;
}
export function parseJSON<T>(json: string | null | undefined, fallback: T) {
if (!json) {
return fallback;
}
try {
const raw = json
let raw = json
.trim()
.replace(/^```json\s*/, "")
.replace(/^```js\s*/, "")
@@ -13,8 +74,17 @@ export function parseJSON<T>(json: string | null | undefined, fallback: T) {
.replace(/^```plaintext\s*/, "")
.replace(/^```\s*/, "")
.replace(/\s*```$/, "");
// First attempt: try to extract valid JSON to remove extra tokens
if (raw.startsWith("{") || raw.startsWith("[")) {
raw = extractValidJSON(raw);
}
// Parse the cleaned content
return parse(raw) as T;
} catch {
// Fallback: try to extract meaningful content from malformed JSON
// This is a last-resort attempt to salvage partial data
return fallback;
}
}

442
web/tests/json.test.ts Normal file
View File

@@ -0,0 +1,442 @@
import { parseJSON } from "../src/core/utils/json";
describe("parseJSON - extractValidJSON helper", () => {
it("extracts JSON object with extra tokens after closing brace", () => {
const input = '{"key": "value"} extra tokens here';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("extracts JSON array with extra tokens after closing bracket", () => {
const input = '[1, 2, 3] garbage data here';
const result = parseJSON(input, []);
expect(result).toEqual([1, 2, 3]);
});
it("handles nested JSON with extra tokens", () => {
const input = '{"nested": {"inner": [1, 2, 3]}} invalid text';
const result = parseJSON(input, null);
expect(result).toEqual({
nested: {
inner: [1, 2, 3],
},
});
});
it("handles JSON with strings containing braces", () => {
const input = '{"text": "this has {braces} in it"} extra';
const result = parseJSON(input, null);
expect(result.text).toBe("this has {braces} in it");
});
it("handles JSON with escaped quotes in strings", () => {
const input = '{"text": "quote \\"here\\""} junk';
const result = parseJSON(input, null);
expect(result.text).toBe('quote "here"');
});
it("handles clean JSON without extra tokens", () => {
const input = '{"key": "value"}';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("handles empty object", () => {
const input = '{} extra';
const result = parseJSON(input, {});
expect(result).toEqual({});
});
it("handles empty array", () => {
const input = '[] more stuff';
const result = parseJSON(input, []);
expect(result).toEqual([]);
});
it("handles JSON with null values", () => {
const input = '{"value": null} trash';
const result = parseJSON(input, {});
expect(result.value).toBeNull();
});
it("handles JSON with boolean values", () => {
const input = '{"active": true, "deleted": false} garbage';
const result = parseJSON(input, {});
expect(result.active).toBe(true);
expect(result.deleted).toBe(false);
});
it("handles JSON with numbers", () => {
const input = '{"int": 42, "float": 3.14, "negative": -7} data';
const result = parseJSON(input, {});
expect(result.int).toBe(42);
expect(result.float).toBe(3.14);
expect(result.negative).toBe(-7);
});
it("handles JSON with unicode characters", () => {
const input = '{"name": "测试", "emoji": "🎯"} extra';
const result = parseJSON(input, {});
expect(result.name).toBe("测试");
expect(result.emoji).toBe("🎯");
});
it("handles multiple levels of nesting", () => {
const input = '{"a": {"b": {"c": {"d": "value"}}}} junk';
const result = parseJSON(input, {});
expect(result.a.b.c.d).toBe("value");
});
it("handles arrays of objects", () => {
const input = '[{"id": 1, "name": "test1"}, {"id": 2, "name": "test2"}] garbage';
const result = parseJSON(input, []);
expect(result.length).toBe(2);
expect(result[0].id).toBe(1);
expect(result[1].name).toBe("test2");
});
});
describe("parseJSON - with code block markers", () => {
it("strips json code block markers", () => {
const input = '```json\n{"key": "value"}\n```';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("strips js code block markers", () => {
const input = '```js\n{"key": "value"}\n```';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("strips ts code block markers", () => {
const input = '```ts\n{"key": "value"}\n```';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("strips plaintext code block markers", () => {
const input = '```plaintext\n{"key": "value"}\n```';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("strips generic code block markers", () => {
const input = '```\n{"key": "value"}\n```';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("handles code block without closing marker", () => {
const input = '```json\n{"key": "value"}';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
it("handles code block with extra whitespace", () => {
const input = '```json \n{"key": "value"}\n``` ';
const result = parseJSON(input, null);
expect(result.key).toBe("value");
});
});
describe("parseJSON - issue #598 specific cases", () => {
it("handles JSON with extra tokens from quantized models", () => {
// This is similar to what Qwen3 235B returns
const input =
'{"text": "Published: 2010-01-07\\nTitle: Photon Counting OTDR", "data": "Published:", "reminding": " 2010-01-07\\nTitle: Photon"} some garbage tokens';
const result = parseJSON(input, {});
expect(result.text).toBeTruthy();
expect(result.text).toContain("Published");
expect(result.data).toBeTruthy();
expect(result.reminding).toBeTruthy();
});
it("handles search results JSON with extra tokens", () => {
const input = `[
{"type": "page", "title": "Example", "url": "https://example.com", "content": "Example content"},
{"type": "page", "title": "Test", "url": "https://test.com", "content": "Test content"}
] trailing garbage`;
const result = parseJSON(input, []);
expect(result.length).toBe(2);
expect(result[0].type).toBe("page");
expect(result[1].title).toBe("Test");
});
it("handles crawler response with extra tokens", () => {
const input = `{
"title": "Article Title",
"content": "Article content here..."
} [incomplete json or garbage`;
const result = parseJSON(input, {});
expect(result.title).toBe("Article Title");
expect(result.content).toContain("Article content");
});
it("handles non-JSON content gracefully", () => {
const input = "This is just plain text, not JSON";
const fallback = { default: true };
const result = parseJSON(input, fallback);
// best-effort-json-parser may parse plain text as key-value pairs
// Just ensure we get some result (not throwing an error)
expect(result).toBeDefined();
expect(result).not.toBeNull();
});
it("returns fallback for null input", () => {
const fallback = [{ default: true }];
const result = parseJSON(null, fallback);
expect(result).toEqual(fallback);
});
it("returns fallback for undefined input", () => {
const fallback = [];
const result = parseJSON(undefined, fallback);
expect(result).toEqual(fallback);
});
it("returns fallback for empty string input", () => {
const fallback = {};
const result = parseJSON("", fallback);
expect(result).toEqual(fallback);
});
});
describe("parseJSON - edge cases", () => {
it("handles JSON with special characters in strings", () => {
const input = '{"text": "Special chars: @#$%^&*()"} extra';
const result = parseJSON(input, {});
expect(result.text).toBe("Special chars: @#$%^&*()");
});
it("handles JSON with newlines in strings", () => {
const input = '{"text": "Line 1\\nLine 2\\nLine 3"} junk';
const result = parseJSON(input, {});
expect(result.text).toContain("Line");
});
it("handles JSON with tabs in strings", () => {
const input = '{"text": "Col1\\tCol2\\tCol3"} trash';
const result = parseJSON(input, {});
expect(result.text).toContain("Col");
});
it("handles deeply nested objects", () => {
const input = '{"a":{"b":{"c":{"d":{"e":{"f":"deep"}}}}}}} extra';
const result = parseJSON(input, {});
expect(result.a.b.c.d.e.f).toBe("deep");
});
it("handles large arrays", () => {
const largeArray = Array.from({ length: 100 }, (_, i) => ({ id: i }));
const input = JSON.stringify(largeArray) + " garbage text";
const result = parseJSON(input, []);
expect(result.length).toBe(100);
expect(result[99].id).toBe(99);
});
it("handles whitespace in JSON", () => {
const input = `{
"key" : "value" ,
"number" : 42
} extra`;
const result = parseJSON(input, {});
expect(result.key).toBe("value");
expect(result.number).toBe(42);
});
it("handles JSON with escaped slashes", () => {
const input = '{"url": "https:\\/\\/example.com"} junk';
const result = parseJSON(input, {});
expect(result.url).toContain("example.com");
});
it("preserves numeric precision", () => {
const input = '{"value": 1.23456789} extra';
const result = parseJSON(input, {});
expect(result.value).toBe(1.23456789);
});
it("handles JSON with very long strings", () => {
const longString = "A".repeat(10000);
const input = `{"text": "${longString}"} garbage`;
const result = parseJSON(input, {});
expect(result.text.length).toBe(10000);
});
});
describe("parseJSON - type safety", () => {
it("properly types object results", () => {
interface TestObject {
id: number;
name: string;
active: boolean;
}
const input = '{"id": 1, "name": "test", "active": true} junk';
const fallback: TestObject = { id: 0, name: "", active: false };
const result = parseJSON<TestObject>(input, fallback);
expect(result.id).toBe(1);
expect(result.name).toBe("test");
expect(result.active).toBe(true);
});
it("properly types array results", () => {
interface Item {
id: number;
label: string;
}
const input = '[{"id": 1, "label": "a"}, {"id": 2, "label": "b"}] extra';
const fallback: Item[] = [];
const result = parseJSON<Item[]>(input, fallback);
expect(result[0].id).toBe(1);
expect(result[1].label).toBe("b");
});
});
describe("parseJSON - malformed JSON recovery", () => {
it("handles missing closing braces", () => {
const input = '{"key": "value"';
const result = parseJSON(input, { key: "default" });
// Should return something (either fixed JSON or fallback)
expect(result).toBeDefined();
});
it("handles extra closing braces", () => {
const input = '{"key": "value"}}}';
const result = parseJSON(input, {});
expect(result.key).toBe("value");
});
it("handles mixed quotes", () => {
const input = '{"key": "value"} extra';
const result = parseJSON(input, {});
expect(result.key).toBe("value");
});
it("handles unquoted keys (not valid JSON, uses fallback)", () => {
const input = "{key: 'value'} extra";
const fallback = { key: "default" };
const result = parseJSON(input, fallback);
// Should return something
expect(result).toBeDefined();
});
});
describe("parseJSON - real-world scenarios", () => {
it("handles Tavily search results format", () => {
const input = `[
{
"type": "page",
"title": "Sample Article",
"url": "https://example.com/article",
"content": "This is sample content..."
}
] processing complete`;
const result = parseJSON(input, []);
expect(result[0].type).toBe("page");
expect(result[0].title).toBe("Sample Article");
});
it("handles crawler article format", () => {
const input = `{
"title": "News Article",
"content": "Article body text...",
"author": "John Doe",
"date": "2024-01-01"
} [incomplete extra`;
const result = parseJSON(input, {});
expect(result.title).toBe("News Article");
expect(result.content).toBeDefined();
});
it("handles local search tool results", () => {
const input = `[
{
"id": "doc-1",
"title": "Document 1",
"content": "Document content here"
},
{
"id": "doc-2",
"title": "Document 2",
"content": "Another document"
}
] extra garbage`;
const result = parseJSON(input, []);
expect(result.length).toBe(2);
expect(result[0].id).toBe("doc-1");
});
it("handles Python REPL output with JSON", () => {
const input = `{"result": 42, "error": null, "stdout": "Output here"} [process ended]`;
const result = parseJSON(input, {});
expect(result.result).toBe(42);
expect(result.error).toBeNull();
});
it("handles MCP tool response format", () => {
const input = `{
"tool": "web_search",
"status": "success",
"data": [{"title": "Result", "url": "https://example.com"}]
} additional text`;
const result = parseJSON(input, {});
expect(result.tool).toBe("web_search");
expect(result.data[0].title).toBe("Result");
});
});
describe("parseJSON - issue #598 regression tests", () => {
it("does not lose data when removing extra tokens", () => {
const input = `{
"research": "Complete research data here with lots of information",
"sources": [
{"title": "Source 1", "url": "https://source1.com"},
{"title": "Source 2", "url": "https://source2.com"}
]
} garbage tokens that should be removed`;
const result = parseJSON(input, {});
expect(result.research).toBeDefined();
expect(result.sources.length).toBe(2);
expect(result.sources[0].title).toBe("Source 1");
});
it("handles consecutive tool calls with JSON", () => {
const firstResult = '{"step": 1, "data": "first"} extra';
const secondResult = '{"step": 2, "data": "second"} junk';
const result1 = parseJSON(firstResult, {});
const result2 = parseJSON(secondResult, {});
expect(result1.step).toBe(1);
expect(result2.step).toBe(2);
});
it("maintains performance with large responses", () => {
const largeContent = "A".repeat(50000);
const input = `{"content": "${largeContent}", "status": "ok"} extra data`;
const startTime = Date.now();
const result = parseJSON(input, {});
const duration = Date.now() - startTime;
expect(result.content).toBeDefined();
expect(result.status).toBe("ok");
// Should complete quickly (< 2 seconds for this size)
expect(duration).toBeLessThan(2000);
});
it("handles multiple consecutive extra tokens", () => {
const input =
'{"data": "value"}} } ] unexpected tokens here } { [ ) ] incomplete';
const result = parseJSON(input, {});
expect(result.data).toBe("value");
});
it("handles unicode garbage after JSON", () => {
const input = '{"text": "测试"} 乱码数据 🎯 garbage';
const result = parseJSON(input, {});
expect(result.text).toBe("测试");
});
});