mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-11 17:44:45 +08:00
fix: improve JSON repair handling for markdown code blocks (#841)
* fix: improve JSON repair handling for markdown code blocks * unified import path * compress_crawl_udf * fix * reverse
This commit is contained in:
@@ -332,9 +332,12 @@ def planner_node(
|
||||
logger.debug(f"Current state messages: {state['messages']}")
|
||||
logger.info(f"Planner response: {full_response}")
|
||||
|
||||
# Clean the response first to handle markdown code blocks (```json, ```ts, etc.)
|
||||
cleaned_response = repair_json_output(full_response)
|
||||
|
||||
# Validate explicitly that response content is valid JSON before proceeding to parse it
|
||||
if not full_response.strip().startswith('{') and not full_response.strip().startswith('['):
|
||||
logger.warning("Planner response does not appear to be valid JSON")
|
||||
if not cleaned_response.strip().startswith('{') and not cleaned_response.strip().startswith('['):
|
||||
logger.warning("Planner response does not appear to be valid JSON after cleanup")
|
||||
if plan_iterations > 0:
|
||||
return Command(
|
||||
update=preserve_state_meta_fields(state),
|
||||
@@ -347,7 +350,7 @@ def planner_node(
|
||||
)
|
||||
|
||||
try:
|
||||
curr_plan = json.loads(repair_json_output(full_response))
|
||||
curr_plan = json.loads(cleaned_response)
|
||||
# Need to extract the plan from the full_response
|
||||
curr_plan_content = extract_plan_content(curr_plan)
|
||||
# load the current_plan
|
||||
@@ -1428,4 +1431,4 @@ async def analyst_node(
|
||||
config,
|
||||
"analyst",
|
||||
[], # No tools - pure reasoning
|
||||
)
|
||||
)
|
||||
|
||||
@@ -8,8 +8,8 @@ from urllib.parse import urlparse
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from src.crawler.article import Article
|
||||
from src.crawler import Crawler
|
||||
|
||||
from .decorators import log_io
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -43,8 +43,18 @@ def crawl_tool(
|
||||
try:
|
||||
crawler = Crawler()
|
||||
article = crawler.crawl(url)
|
||||
return json.dumps({"url": url, "crawled_content": article.to_markdown()[:1000]}, ensure_ascii=False)
|
||||
article_content = compress_crawl_content(article)
|
||||
return json.dumps({"url": url, "crawled_content": article_content}, ensure_ascii=False)
|
||||
except BaseException as e:
|
||||
error_msg = f"Failed to crawl. Error: {repr(e)}"
|
||||
logger.error(error_msg)
|
||||
return error_msg
|
||||
|
||||
|
||||
def compress_crawl_content(article: Article) -> str:
|
||||
"""
|
||||
Compress user-defined function for article content.
|
||||
We can customize this function to implement different compression strategies.
|
||||
Currently, it truncates the markdown content to the first 1000 characters.
|
||||
"""
|
||||
return article.to_markdown()[:1000]
|
||||
|
||||
@@ -7,6 +7,7 @@ import re
|
||||
from typing import Any
|
||||
|
||||
import json_repair
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -121,6 +122,27 @@ def repair_json_output(content: str) -> str:
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# Handle markdown code blocks (```json, ```ts, or ```)
|
||||
# This must be checked first, as content may start with ``` instead of { or [
|
||||
if "```" in content:
|
||||
# Remove opening markdown code block markers (```json, ```ts, or ```), allowing
|
||||
# optional leading spaces and multiple blank lines after the fence.
|
||||
content = re.sub(
|
||||
r'^[ \t]*```(?:json|ts)?[ \t]*\n+',
|
||||
'',
|
||||
content,
|
||||
flags=re.IGNORECASE | re.MULTILINE,
|
||||
)
|
||||
# Remove closing markdown code block markers (```), allowing optional
|
||||
# leading newlines and trailing spaces.
|
||||
content = re.sub(
|
||||
r'\n*```[ \t]*$',
|
||||
'',
|
||||
content,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
content = content.strip()
|
||||
|
||||
# First attempt: try to extract valid JSON if there are extra tokens
|
||||
content = _extract_json_from_content(content)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user