fix: normalize presented artifact paths (#998)

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
Xinmin Zeng
2026-03-06 22:51:27 +08:00
committed by GitHub
parent 9d2144d431
commit 09325ca28f
2 changed files with 145 additions and 1 deletions

View File

@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Annotated
from langchain.tools import InjectedToolCallId, ToolRuntime, tool
@@ -6,6 +7,58 @@ from langgraph.types import Command
from langgraph.typing import ContextT
from src.agents.thread_state import ThreadState
from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"
def _normalize_presented_filepath(
runtime: ToolRuntime[ContextT, ThreadState],
filepath: str,
) -> str:
"""Normalize a presented file path to the `/mnt/user-data/outputs/*` contract.
Accepts either:
- A virtual sandbox path such as `/mnt/user-data/outputs/report.md`
- A host-side thread outputs path such as
`/app/backend/.deer-flow/threads/<thread>/user-data/outputs/report.md`
Returns:
The normalized virtual path.
Raises:
ValueError: If runtime metadata is missing or the path is outside the
current thread's outputs directory.
"""
if runtime.state is None:
raise ValueError("Thread runtime state is not available")
thread_id = runtime.context.get("thread_id")
if not thread_id:
raise ValueError("Thread ID is not available in runtime context")
thread_data = runtime.state.get("thread_data") or {}
outputs_path = thread_data.get("outputs_path")
if not outputs_path:
raise ValueError("Thread outputs path is not available in runtime state")
outputs_dir = Path(outputs_path).resolve()
stripped = filepath.lstrip("/")
virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"):
actual_path = get_paths().resolve_virtual_path(thread_id, filepath)
else:
actual_path = Path(filepath).expanduser().resolve()
try:
relative_path = actual_path.relative_to(outputs_dir)
except ValueError as exc:
raise ValueError(
f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}"
) from exc
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
@tool("present_files", parse_docstring=True)
@@ -33,7 +86,23 @@ def present_file_tool(
Args:
filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented.
"""
try:
normalized_paths = [
_normalize_presented_filepath(runtime, filepath) for filepath in filepaths
]
except ValueError as exc:
return Command(
update={
"messages": [ToolMessage(f"Error: {exc}", tool_call_id=tool_call_id)]
},
)
# The merge_artifacts reducer will handle merging and deduplication
return Command(
update={"artifacts": filepaths, "messages": [ToolMessage("Successfully presented files", tool_call_id=tool_call_id)]},
update={
"artifacts": normalized_paths,
"messages": [
ToolMessage("Successfully presented files", tool_call_id=tool_call_id)
],
},
)

View File

@@ -0,0 +1,75 @@
"""Core behavior tests for present_files path normalization."""
import importlib
from types import SimpleNamespace
present_file_tool_module = importlib.import_module(
"src.tools.builtins.present_file_tool"
)
def _make_runtime(outputs_path: str) -> SimpleNamespace:
return SimpleNamespace(
state={"thread_data": {"outputs_path": outputs_path}},
context={"thread_id": "thread-1"},
)
def test_present_files_normalizes_host_outputs_path(tmp_path):
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
outputs_dir.mkdir(parents=True)
artifact_path = outputs_dir / "report.md"
artifact_path.write_text("ok")
result = present_file_tool_module.present_file_tool.func(
runtime=_make_runtime(str(outputs_dir)),
filepaths=[str(artifact_path)],
tool_call_id="tc-1",
)
assert result.update["artifacts"] == ["/mnt/user-data/outputs/report.md"]
assert result.update["messages"][0].content == "Successfully presented files"
def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
outputs_dir.mkdir(parents=True)
artifact_path = outputs_dir / "summary.json"
artifact_path.write_text("{}")
monkeypatch.setattr(
present_file_tool_module,
"get_paths",
lambda: SimpleNamespace(
resolve_virtual_path=lambda thread_id, path: artifact_path
),
)
result = present_file_tool_module.present_file_tool.func(
runtime=_make_runtime(str(outputs_dir)),
filepaths=["/mnt/user-data/outputs/summary.json"],
tool_call_id="tc-2",
)
assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"]
def test_present_files_rejects_paths_outside_outputs(tmp_path):
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
workspace_dir = tmp_path / "threads" / "thread-1" / "user-data" / "workspace"
outputs_dir.mkdir(parents=True)
workspace_dir.mkdir(parents=True)
leaked_path = workspace_dir / "notes.txt"
leaked_path.write_text("leak")
result = present_file_tool_module.present_file_tool.func(
runtime=_make_runtime(str(outputs_dir)),
filepaths=[str(leaked_path)],
tool_call_id="tc-3",
)
assert "artifacts" not in result.update
assert (
result.update["messages"][0].content
== f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
)