fix: issue 1138 windows encoding (#1139)

* fix(windows): use utf-8 for text file operations

* fix(windows): normalize sandbox path masking

* fix(windows): preserve utf-8 handling after backend split
This commit is contained in:
-Astraia-
2026-03-16 16:53:12 +08:00
committed by GitHub
parent 76803b826f
commit 191b60a326
15 changed files with 116 additions and 24 deletions

View File

@@ -63,7 +63,7 @@ def _extract_file_from_skill_archive(zip_path: Path, internal_path: str) -> byte
summary="Get Artifact File", summary="Get Artifact File",
description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.", description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.",
) )
async def get_artifact(thread_id: str, path: str, request: Request) -> FileResponse: async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
"""Get an artifact file by its path. """Get an artifact file by its path.
The endpoint automatically detects file types and returns appropriate content types. The endpoint automatically detects file types and returns appropriate content types.
@@ -147,12 +147,12 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> FileRespo
return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"}) return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})
if mime_type and mime_type == "text/html": if mime_type and mime_type == "text/html":
return HTMLResponse(content=actual_path.read_text()) return HTMLResponse(content=actual_path.read_text(encoding="utf-8"))
if mime_type and mime_type.startswith("text/"): if mime_type and mime_type.startswith("text/"):
return PlainTextResponse(content=actual_path.read_text(), media_type=mime_type) return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
if is_text_file_by_content(actual_path): if is_text_file_by_content(actual_path):
return PlainTextResponse(content=actual_path.read_text(), media_type=mime_type) return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"}) return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"})

View File

@@ -152,7 +152,7 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig
} }
# Write the configuration to file # Write the configuration to file
with open(config_path, "w") as f: with open(config_path, "w", encoding="utf-8") as f:
json.dump(config_data, f, indent=2) json.dump(config_data, f, indent=2)
logger.info(f"MCP configuration updated and saved to: {config_path}") logger.info(f"MCP configuration updated and saved to: {config_path}")

View File

@@ -307,7 +307,7 @@ async def update_skill(skill_name: str, request: SkillUpdateRequest) -> SkillRes
} }
# Write the configuration to file # Write the configuration to file
with open(config_path, "w") as f: with open(config_path, "w", encoding="utf-8") as f:
json.dump(config_data, f, indent=2) json.dump(config_data, f, indent=2)
logger.info(f"Skills configuration updated and saved to: {config_path}") logger.info(f"Skills configuration updated and saved to: {config_path}")

View File

@@ -401,7 +401,7 @@ class AioSandboxProvider(SandboxProvider):
paths.ensure_thread_dirs(thread_id) paths.ensure_thread_dirs(thread_id)
lock_path = paths.thread_dir(thread_id) / f"{sandbox_id}.lock" lock_path = paths.thread_dir(thread_id) / f"{sandbox_id}.lock"
with open(lock_path, "a") as lock_file: with open(lock_path, "a", encoding="utf-8") as lock_file:
try: try:
fcntl.flock(lock_file, fcntl.LOCK_EX) fcntl.flock(lock_file, fcntl.LOCK_EX)
# Re-check in-process caches under the file lock in case another # Re-check in-process caches under the file lock in case another

View File

@@ -180,7 +180,7 @@ class LocalSandbox(Sandbox):
def read_file(self, path: str) -> str: def read_file(self, path: str) -> str:
resolved_path = self._resolve_path(path) resolved_path = self._resolve_path(path)
try: try:
with open(resolved_path) as f: with open(resolved_path, encoding="utf-8") as f:
return f.read() return f.read()
except OSError as e: except OSError as e:
# Re-raise with the original path for clearer error messages, hiding internal resolved paths # Re-raise with the original path for clearer error messages, hiding internal resolved paths
@@ -193,7 +193,7 @@ class LocalSandbox(Sandbox):
if dir_path: if dir_path:
os.makedirs(dir_path, exist_ok=True) os.makedirs(dir_path, exist_ok=True)
mode = "a" if append else "w" mode = "a" if append else "w"
with open(resolved_path, mode) as f: with open(resolved_path, mode, encoding="utf-8") as f:
f.write(content) f.write(content)
except OSError as e: except OSError as e:
# Re-raise with the original path for clearer error messages, hiding internal resolved paths # Re-raise with the original path for clearer error messages, hiding internal resolved paths

View File

@@ -25,6 +25,10 @@ _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
) )
def _path_variants(path: str) -> set[str]:
return {path, path.replace("\\", "/"), path.replace("/", "\\")}
def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str: def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
"""Replace virtual /mnt/user-data paths with actual thread data paths. """Replace virtual /mnt/user-data paths with actual thread data paths.
@@ -101,15 +105,15 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)
for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True): for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
raw_base = str(Path(actual_base)) raw_base = str(Path(actual_base))
resolved_base = str(Path(actual_base).resolve()) resolved_base = str(Path(actual_base).resolve())
for base in {raw_base, resolved_base}: for base in _path_variants(raw_base) | _path_variants(resolved_base):
escaped_actual = re.escape(base) escaped_actual = re.escape(base).replace(r"\\", r"[/\\]")
pattern = re.compile(escaped_actual + r"(?:/[^\s\"';&|<>()]*)?") pattern = re.compile(escaped_actual + r"(?:[/\\][^\s\"';&|<>()]*)?")
def replace_match(match: re.Match) -> str: def replace_match(match: re.Match) -> str:
matched_path = match.group(0) matched_path = match.group(0)
if matched_path == base: if matched_path == base:
return virtual_base return virtual_base
relative = matched_path[len(base) :].lstrip("/") relative = matched_path[len(base) :].lstrip("/\\")
return f"{virtual_base}/{relative}" if relative else virtual_base return f"{virtual_base}/{relative}" if relative else virtual_base
result = pattern.sub(replace_match, result) result = pattern.sub(replace_match, result)

View File

@@ -25,7 +25,7 @@ def _validate_skill_frontmatter(skill_dir: Path) -> tuple[bool, str, str | None]
if not skill_md.exists(): if not skill_md.exists():
return False, "SKILL.md not found", None return False, "SKILL.md not found", None
content = skill_md.read_text() content = skill_md.read_text(encoding="utf-8")
if not content.startswith("---"): if not content.startswith("---"):
return False, "No YAML frontmatter found", None return False, "No YAML frontmatter found", None

View File

@@ -0,0 +1,27 @@
import asyncio
from pathlib import Path
from starlette.requests import Request
import app.gateway.routers.artifacts as artifacts_router
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
text = "Curly quotes: \u201cutf8\u201d"
artifact_path.write_text(text, encoding="utf-8")
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
request = Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": b""})
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
assert bytes(response.body).decode("utf-8") == text
assert response.media_type == "text/plain"

View File

@@ -0,0 +1,33 @@
import builtins
import deerflow.sandbox.local.local_sandbox as local_sandbox
from deerflow.sandbox.local.local_sandbox import LocalSandbox
def _open(base, file, mode="r", *args, **kwargs):
if "b" in mode:
return base(file, mode, *args, **kwargs)
return base(file, mode, *args, encoding=kwargs.pop("encoding", "gbk"), **kwargs)
def test_read_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "\u201cutf8\u201d"
path.write_text(text, encoding="utf-8")
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
assert LocalSandbox("t").read_file(str(path)) == text
def test_write_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "emoji \U0001F600"
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
LocalSandbox("t").write_file(str(path), text)
assert path.read_text(encoding="utf-8") == text

View File

@@ -18,8 +18,8 @@ def test_replace_virtual_path_maps_virtual_root_and_subpaths() -> None:
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs", "outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
} }
assert replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data) == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt" assert Path(replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
assert replace_virtual_path("/mnt/user-data", thread_data) == "/tmp/deer-flow/threads/t1/user-data" assert Path(replace_virtual_path("/mnt/user-data", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data"
def test_mask_local_paths_in_output_hides_host_paths() -> None: def test_mask_local_paths_in_output_hides_host_paths() -> None:

View File

@@ -58,3 +58,31 @@ unsupported: true
assert valid is False assert valid is False
assert "unsupported" in message assert "unsupported" in message
assert skill_name is None assert skill_name is None
def test_validate_skill_frontmatter_reads_utf8_on_windows_locale(tmp_path, monkeypatch) -> None:
skill_dir = tmp_path / "demo-skill"
_write_skill(
skill_dir,
"""---
name: demo-skill
description: "Curly quotes: \u201cutf8\u201d"
---
# Demo Skill
""",
)
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
valid, message, skill_name = VALIDATE_SKILL_FRONTMATTER(skill_dir)
assert valid is True
assert message == "Skill is valid!"
assert skill_name == "demo-skill"

View File

@@ -33,7 +33,7 @@ def generate_image(
output_file: str, output_file: str,
aspect_ratio: str = "16:9", aspect_ratio: str = "16:9",
) -> str: ) -> str:
with open(prompt_file, "r") as f: with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read() prompt = f.read()
parts = [] parts = []
i = 0 i = 0

View File

@@ -24,7 +24,7 @@ def generate_ppt(
Status message Status message
""" """
# Load presentation plan # Load presentation plan
with open(plan_file, "r") as f: with open(plan_file, "r", encoding="utf-8") as f:
plan = json.load(f) plan = json.load(f)
# Determine slide dimensions based on aspect ratio # Determine slide dimensions based on aspect ratio

View File

@@ -87,7 +87,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
metadata_path = eval_dir / "eval_metadata.json" metadata_path = eval_dir / "eval_metadata.json"
if metadata_path.exists(): if metadata_path.exists():
try: try:
with open(metadata_path) as mf: with open(metadata_path, encoding="utf-8") as mf:
eval_id = json.load(mf).get("eval_id", eval_idx) eval_id = json.load(mf).get("eval_id", eval_idx)
except (json.JSONDecodeError, OSError): except (json.JSONDecodeError, OSError):
eval_id = eval_idx eval_id = eval_idx
@@ -117,7 +117,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
continue continue
try: try:
with open(grading_file) as f: with open(grading_file, encoding="utf-8") as f:
grading = json.load(f) grading = json.load(f)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
print(f"Warning: Invalid JSON in {grading_file}: {e}") print(f"Warning: Invalid JSON in {grading_file}: {e}")
@@ -139,7 +139,7 @@ def load_run_results(benchmark_dir: Path) -> dict:
timing_file = run_dir / "timing.json" timing_file = run_dir / "timing.json"
if result["time_seconds"] == 0.0 and timing_file.exists(): if result["time_seconds"] == 0.0 and timing_file.exists():
try: try:
with open(timing_file) as tf: with open(timing_file, encoding="utf-8") as tf:
timing_data = json.load(tf) timing_data = json.load(tf)
result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0) result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
result["tokens"] = timing_data.get("total_tokens", 0) result["tokens"] = timing_data.get("total_tokens", 0)
@@ -374,13 +374,13 @@ def main():
output_md = output_json.with_suffix(".md") output_md = output_json.with_suffix(".md")
# Write benchmark.json # Write benchmark.json
with open(output_json, "w") as f: with open(output_json, "w", encoding="utf-8") as f:
json.dump(benchmark, f, indent=2) json.dump(benchmark, f, indent=2)
print(f"Generated: {output_json}") print(f"Generated: {output_json}")
# Write benchmark.md # Write benchmark.md
markdown = generate_markdown(benchmark) markdown = generate_markdown(benchmark)
with open(output_md, "w") as f: with open(output_md, "w", encoding="utf-8") as f:
f.write(markdown) f.write(markdown)
print(f"Generated: {output_md}") print(f"Generated: {output_md}")

View File

@@ -11,7 +11,7 @@ def generate_video(
output_file: str, output_file: str,
aspect_ratio: str = "16:9", aspect_ratio: str = "16:9",
) -> str: ) -> str:
with open(prompt_file, "r") as f: with open(prompt_file, "r", encoding="utf-8") as f:
prompt = f.read() prompt = f.read()
referenceImages = [] referenceImages = []
i = 0 i = 0