mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
fix: issue 1138 windows encoding (#1139)
* fix(windows): use utf-8 for text file operations * fix(windows): normalize sandbox path masking * fix(windows): preserve utf-8 handling after backend split
This commit is contained in:
@@ -63,7 +63,7 @@ def _extract_file_from_skill_archive(zip_path: Path, internal_path: str) -> byte
|
|||||||
summary="Get Artifact File",
|
summary="Get Artifact File",
|
||||||
description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.",
|
description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.",
|
||||||
)
|
)
|
||||||
async def get_artifact(thread_id: str, path: str, request: Request) -> FileResponse:
|
async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
|
||||||
"""Get an artifact file by its path.
|
"""Get an artifact file by its path.
|
||||||
|
|
||||||
The endpoint automatically detects file types and returns appropriate content types.
|
The endpoint automatically detects file types and returns appropriate content types.
|
||||||
@@ -147,12 +147,12 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> FileRespo
|
|||||||
return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})
|
return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})
|
||||||
|
|
||||||
if mime_type and mime_type == "text/html":
|
if mime_type and mime_type == "text/html":
|
||||||
return HTMLResponse(content=actual_path.read_text())
|
return HTMLResponse(content=actual_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
if mime_type and mime_type.startswith("text/"):
|
if mime_type and mime_type.startswith("text/"):
|
||||||
return PlainTextResponse(content=actual_path.read_text(), media_type=mime_type)
|
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
|
||||||
|
|
||||||
if is_text_file_by_content(actual_path):
|
if is_text_file_by_content(actual_path):
|
||||||
return PlainTextResponse(content=actual_path.read_text(), media_type=mime_type)
|
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
|
||||||
|
|
||||||
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"})
|
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"})
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Write the configuration to file
|
# Write the configuration to file
|
||||||
with open(config_path, "w") as f:
|
with open(config_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(config_data, f, indent=2)
|
json.dump(config_data, f, indent=2)
|
||||||
|
|
||||||
logger.info(f"MCP configuration updated and saved to: {config_path}")
|
logger.info(f"MCP configuration updated and saved to: {config_path}")
|
||||||
|
|||||||
@@ -307,7 +307,7 @@ async def update_skill(skill_name: str, request: SkillUpdateRequest) -> SkillRes
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Write the configuration to file
|
# Write the configuration to file
|
||||||
with open(config_path, "w") as f:
|
with open(config_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(config_data, f, indent=2)
|
json.dump(config_data, f, indent=2)
|
||||||
|
|
||||||
logger.info(f"Skills configuration updated and saved to: {config_path}")
|
logger.info(f"Skills configuration updated and saved to: {config_path}")
|
||||||
|
|||||||
@@ -401,7 +401,7 @@ class AioSandboxProvider(SandboxProvider):
|
|||||||
paths.ensure_thread_dirs(thread_id)
|
paths.ensure_thread_dirs(thread_id)
|
||||||
lock_path = paths.thread_dir(thread_id) / f"{sandbox_id}.lock"
|
lock_path = paths.thread_dir(thread_id) / f"{sandbox_id}.lock"
|
||||||
|
|
||||||
with open(lock_path, "a") as lock_file:
|
with open(lock_path, "a", encoding="utf-8") as lock_file:
|
||||||
try:
|
try:
|
||||||
fcntl.flock(lock_file, fcntl.LOCK_EX)
|
fcntl.flock(lock_file, fcntl.LOCK_EX)
|
||||||
# Re-check in-process caches under the file lock in case another
|
# Re-check in-process caches under the file lock in case another
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ class LocalSandbox(Sandbox):
|
|||||||
def read_file(self, path: str) -> str:
|
def read_file(self, path: str) -> str:
|
||||||
resolved_path = self._resolve_path(path)
|
resolved_path = self._resolve_path(path)
|
||||||
try:
|
try:
|
||||||
with open(resolved_path) as f:
|
with open(resolved_path, encoding="utf-8") as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
@@ -193,7 +193,7 @@ class LocalSandbox(Sandbox):
|
|||||||
if dir_path:
|
if dir_path:
|
||||||
os.makedirs(dir_path, exist_ok=True)
|
os.makedirs(dir_path, exist_ok=True)
|
||||||
mode = "a" if append else "w"
|
mode = "a" if append else "w"
|
||||||
with open(resolved_path, mode) as f:
|
with open(resolved_path, mode, encoding="utf-8") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _path_variants(path: str) -> set[str]:
|
||||||
|
return {path, path.replace("\\", "/"), path.replace("/", "\\")}
|
||||||
|
|
||||||
|
|
||||||
def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
|
def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
|
||||||
"""Replace virtual /mnt/user-data paths with actual thread data paths.
|
"""Replace virtual /mnt/user-data paths with actual thread data paths.
|
||||||
|
|
||||||
@@ -101,15 +105,15 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)
|
|||||||
for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
||||||
raw_base = str(Path(actual_base))
|
raw_base = str(Path(actual_base))
|
||||||
resolved_base = str(Path(actual_base).resolve())
|
resolved_base = str(Path(actual_base).resolve())
|
||||||
for base in {raw_base, resolved_base}:
|
for base in _path_variants(raw_base) | _path_variants(resolved_base):
|
||||||
escaped_actual = re.escape(base)
|
escaped_actual = re.escape(base).replace(r"\\", r"[/\\]")
|
||||||
pattern = re.compile(escaped_actual + r"(?:/[^\s\"';&|<>()]*)?")
|
pattern = re.compile(escaped_actual + r"(?:[/\\][^\s\"';&|<>()]*)?")
|
||||||
|
|
||||||
def replace_match(match: re.Match) -> str:
|
def replace_match(match: re.Match) -> str:
|
||||||
matched_path = match.group(0)
|
matched_path = match.group(0)
|
||||||
if matched_path == base:
|
if matched_path == base:
|
||||||
return virtual_base
|
return virtual_base
|
||||||
relative = matched_path[len(base) :].lstrip("/")
|
relative = matched_path[len(base) :].lstrip("/\\")
|
||||||
return f"{virtual_base}/{relative}" if relative else virtual_base
|
return f"{virtual_base}/{relative}" if relative else virtual_base
|
||||||
|
|
||||||
result = pattern.sub(replace_match, result)
|
result = pattern.sub(replace_match, result)
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ def _validate_skill_frontmatter(skill_dir: Path) -> tuple[bool, str, str | None]
|
|||||||
if not skill_md.exists():
|
if not skill_md.exists():
|
||||||
return False, "SKILL.md not found", None
|
return False, "SKILL.md not found", None
|
||||||
|
|
||||||
content = skill_md.read_text()
|
content = skill_md.read_text(encoding="utf-8")
|
||||||
if not content.startswith("---"):
|
if not content.startswith("---"):
|
||||||
return False, "No YAML frontmatter found", None
|
return False, "No YAML frontmatter found", None
|
||||||
|
|
||||||
|
|||||||
27
backend/tests/test_artifacts_router.py
Normal file
27
backend/tests/test_artifacts_router.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from starlette.requests import Request
|
||||||
|
|
||||||
|
import app.gateway.routers.artifacts as artifacts_router
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
|
||||||
|
artifact_path = tmp_path / "note.txt"
|
||||||
|
text = "Curly quotes: \u201cutf8\u201d"
|
||||||
|
artifact_path.write_text(text, encoding="utf-8")
|
||||||
|
|
||||||
|
original_read_text = Path.read_text
|
||||||
|
|
||||||
|
def read_text_with_gbk_default(self, *args, **kwargs):
|
||||||
|
kwargs.setdefault("encoding", "gbk")
|
||||||
|
return original_read_text(self, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
|
||||||
|
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
|
||||||
|
|
||||||
|
request = Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": b""})
|
||||||
|
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
|
||||||
|
|
||||||
|
assert bytes(response.body).decode("utf-8") == text
|
||||||
|
assert response.media_type == "text/plain"
|
||||||
33
backend/tests/test_local_sandbox_encoding.py
Normal file
33
backend/tests/test_local_sandbox_encoding.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import builtins
|
||||||
|
|
||||||
|
import deerflow.sandbox.local.local_sandbox as local_sandbox
|
||||||
|
from deerflow.sandbox.local.local_sandbox import LocalSandbox
|
||||||
|
|
||||||
|
|
||||||
|
def _open(base, file, mode="r", *args, **kwargs):
|
||||||
|
if "b" in mode:
|
||||||
|
return base(file, mode, *args, **kwargs)
|
||||||
|
return base(file, mode, *args, encoding=kwargs.pop("encoding", "gbk"), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
|
||||||
|
path = tmp_path / "utf8.txt"
|
||||||
|
text = "\u201cutf8\u201d"
|
||||||
|
path.write_text(text, encoding="utf-8")
|
||||||
|
base = builtins.open
|
||||||
|
|
||||||
|
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
|
||||||
|
|
||||||
|
assert LocalSandbox("t").read_file(str(path)) == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
|
||||||
|
path = tmp_path / "utf8.txt"
|
||||||
|
text = "emoji \U0001F600"
|
||||||
|
base = builtins.open
|
||||||
|
|
||||||
|
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
|
||||||
|
|
||||||
|
LocalSandbox("t").write_file(str(path), text)
|
||||||
|
|
||||||
|
assert path.read_text(encoding="utf-8") == text
|
||||||
@@ -18,8 +18,8 @@ def test_replace_virtual_path_maps_virtual_root_and_subpaths() -> None:
|
|||||||
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
|
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
|
||||||
}
|
}
|
||||||
|
|
||||||
assert replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data) == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
|
assert Path(replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
|
||||||
assert replace_virtual_path("/mnt/user-data", thread_data) == "/tmp/deer-flow/threads/t1/user-data"
|
assert Path(replace_virtual_path("/mnt/user-data", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data"
|
||||||
|
|
||||||
|
|
||||||
def test_mask_local_paths_in_output_hides_host_paths() -> None:
|
def test_mask_local_paths_in_output_hides_host_paths() -> None:
|
||||||
|
|||||||
@@ -58,3 +58,31 @@ unsupported: true
|
|||||||
assert valid is False
|
assert valid is False
|
||||||
assert "unsupported" in message
|
assert "unsupported" in message
|
||||||
assert skill_name is None
|
assert skill_name is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_skill_frontmatter_reads_utf8_on_windows_locale(tmp_path, monkeypatch) -> None:
|
||||||
|
skill_dir = tmp_path / "demo-skill"
|
||||||
|
_write_skill(
|
||||||
|
skill_dir,
|
||||||
|
"""---
|
||||||
|
name: demo-skill
|
||||||
|
description: "Curly quotes: \u201cutf8\u201d"
|
||||||
|
---
|
||||||
|
|
||||||
|
# Demo Skill
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
original_read_text = Path.read_text
|
||||||
|
|
||||||
|
def read_text_with_gbk_default(self, *args, **kwargs):
|
||||||
|
kwargs.setdefault("encoding", "gbk")
|
||||||
|
return original_read_text(self, *args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
|
||||||
|
|
||||||
|
valid, message, skill_name = VALIDATE_SKILL_FRONTMATTER(skill_dir)
|
||||||
|
|
||||||
|
assert valid is True
|
||||||
|
assert message == "Skill is valid!"
|
||||||
|
assert skill_name == "demo-skill"
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def generate_image(
|
|||||||
output_file: str,
|
output_file: str,
|
||||||
aspect_ratio: str = "16:9",
|
aspect_ratio: str = "16:9",
|
||||||
) -> str:
|
) -> str:
|
||||||
with open(prompt_file, "r") as f:
|
with open(prompt_file, "r", encoding="utf-8") as f:
|
||||||
prompt = f.read()
|
prompt = f.read()
|
||||||
parts = []
|
parts = []
|
||||||
i = 0
|
i = 0
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ def generate_ppt(
|
|||||||
Status message
|
Status message
|
||||||
"""
|
"""
|
||||||
# Load presentation plan
|
# Load presentation plan
|
||||||
with open(plan_file, "r") as f:
|
with open(plan_file, "r", encoding="utf-8") as f:
|
||||||
plan = json.load(f)
|
plan = json.load(f)
|
||||||
|
|
||||||
# Determine slide dimensions based on aspect ratio
|
# Determine slide dimensions based on aspect ratio
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
|
|||||||
metadata_path = eval_dir / "eval_metadata.json"
|
metadata_path = eval_dir / "eval_metadata.json"
|
||||||
if metadata_path.exists():
|
if metadata_path.exists():
|
||||||
try:
|
try:
|
||||||
with open(metadata_path) as mf:
|
with open(metadata_path, encoding="utf-8") as mf:
|
||||||
eval_id = json.load(mf).get("eval_id", eval_idx)
|
eval_id = json.load(mf).get("eval_id", eval_idx)
|
||||||
except (json.JSONDecodeError, OSError):
|
except (json.JSONDecodeError, OSError):
|
||||||
eval_id = eval_idx
|
eval_id = eval_idx
|
||||||
@@ -117,7 +117,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(grading_file) as f:
|
with open(grading_file, encoding="utf-8") as f:
|
||||||
grading = json.load(f)
|
grading = json.load(f)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
print(f"Warning: Invalid JSON in {grading_file}: {e}")
|
print(f"Warning: Invalid JSON in {grading_file}: {e}")
|
||||||
@@ -139,7 +139,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
|
|||||||
timing_file = run_dir / "timing.json"
|
timing_file = run_dir / "timing.json"
|
||||||
if result["time_seconds"] == 0.0 and timing_file.exists():
|
if result["time_seconds"] == 0.0 and timing_file.exists():
|
||||||
try:
|
try:
|
||||||
with open(timing_file) as tf:
|
with open(timing_file, encoding="utf-8") as tf:
|
||||||
timing_data = json.load(tf)
|
timing_data = json.load(tf)
|
||||||
result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
|
result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
|
||||||
result["tokens"] = timing_data.get("total_tokens", 0)
|
result["tokens"] = timing_data.get("total_tokens", 0)
|
||||||
@@ -374,13 +374,13 @@ def main():
|
|||||||
output_md = output_json.with_suffix(".md")
|
output_md = output_json.with_suffix(".md")
|
||||||
|
|
||||||
# Write benchmark.json
|
# Write benchmark.json
|
||||||
with open(output_json, "w") as f:
|
with open(output_json, "w", encoding="utf-8") as f:
|
||||||
json.dump(benchmark, f, indent=2)
|
json.dump(benchmark, f, indent=2)
|
||||||
print(f"Generated: {output_json}")
|
print(f"Generated: {output_json}")
|
||||||
|
|
||||||
# Write benchmark.md
|
# Write benchmark.md
|
||||||
markdown = generate_markdown(benchmark)
|
markdown = generate_markdown(benchmark)
|
||||||
with open(output_md, "w") as f:
|
with open(output_md, "w", encoding="utf-8") as f:
|
||||||
f.write(markdown)
|
f.write(markdown)
|
||||||
print(f"Generated: {output_md}")
|
print(f"Generated: {output_md}")
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ def generate_video(
|
|||||||
output_file: str,
|
output_file: str,
|
||||||
aspect_ratio: str = "16:9",
|
aspect_ratio: str = "16:9",
|
||||||
) -> str:
|
) -> str:
|
||||||
with open(prompt_file, "r") as f:
|
with open(prompt_file, "r", encoding="utf-8") as f:
|
||||||
prompt = f.read()
|
prompt = f.read()
|
||||||
referenceImages = []
|
referenceImages = []
|
||||||
i = 0
|
i = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user