fix: issue 1138 windows encoding (#1139)

* fix(windows): use utf-8 for text file operations

* fix(windows): normalize sandbox path masking

* fix(windows): preserve utf-8 handling after backend split
This commit is contained in:
-Astraia-
2026-03-16 16:53:12 +08:00
committed by GitHub
parent 76803b826f
commit 191b60a326
15 changed files with 116 additions and 24 deletions

View File

@@ -0,0 +1,27 @@
import asyncio
from pathlib import Path
from starlette.requests import Request
import app.gateway.routers.artifacts as artifacts_router
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
text = "Curly quotes: \u201cutf8\u201d"
artifact_path.write_text(text, encoding="utf-8")
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
request = Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": b""})
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
assert bytes(response.body).decode("utf-8") == text
assert response.media_type == "text/plain"

View File

@@ -0,0 +1,33 @@
import builtins
import deerflow.sandbox.local.local_sandbox as local_sandbox
from deerflow.sandbox.local.local_sandbox import LocalSandbox
def _open(base, file, mode="r", *args, **kwargs):
if "b" in mode:
return base(file, mode, *args, **kwargs)
return base(file, mode, *args, encoding=kwargs.pop("encoding", "gbk"), **kwargs)
def test_read_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "\u201cutf8\u201d"
path.write_text(text, encoding="utf-8")
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
assert LocalSandbox("t").read_file(str(path)) == text
def test_write_file_uses_utf8_on_windows_locale(tmp_path, monkeypatch):
path = tmp_path / "utf8.txt"
text = "emoji \U0001F600"
base = builtins.open
monkeypatch.setattr(local_sandbox, "open", lambda file, mode="r", *args, **kwargs: _open(base, file, mode, *args, **kwargs), raising=False)
LocalSandbox("t").write_file(str(path), text)
assert path.read_text(encoding="utf-8") == text

View File

@@ -18,8 +18,8 @@ def test_replace_virtual_path_maps_virtual_root_and_subpaths() -> None:
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
}
assert replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data) == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
assert replace_virtual_path("/mnt/user-data", thread_data) == "/tmp/deer-flow/threads/t1/user-data"
assert Path(replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
assert Path(replace_virtual_path("/mnt/user-data", thread_data)).as_posix() == "/tmp/deer-flow/threads/t1/user-data"
def test_mask_local_paths_in_output_hides_host_paths() -> None:

View File

@@ -58,3 +58,31 @@ unsupported: true
assert valid is False
assert "unsupported" in message
assert skill_name is None
def test_validate_skill_frontmatter_reads_utf8_on_windows_locale(tmp_path, monkeypatch) -> None:
skill_dir = tmp_path / "demo-skill"
_write_skill(
skill_dir,
"""---
name: demo-skill
description: "Curly quotes: \u201cutf8\u201d"
---
# Demo Skill
""",
)
original_read_text = Path.read_text
def read_text_with_gbk_default(self, *args, **kwargs):
kwargs.setdefault("encoding", "gbk")
return original_read_text(self, *args, **kwargs)
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
valid, message, skill_name = VALIDATE_SKILL_FRONTMATTER(skill_dir)
assert valid is True
assert message == "Skill is valid!"
assert skill_name == "demo-skill"