mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-02 22:02:13 +08:00
fix: normalize presented artifact paths (#998)
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
from langchain.tools import InjectedToolCallId, ToolRuntime, tool
|
||||
@@ -6,6 +7,58 @@ from langgraph.types import Command
|
||||
from langgraph.typing import ContextT
|
||||
|
||||
from src.agents.thread_state import ThreadState
|
||||
from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
|
||||
|
||||
OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"
|
||||
|
||||
|
||||
def _normalize_presented_filepath(
|
||||
runtime: ToolRuntime[ContextT, ThreadState],
|
||||
filepath: str,
|
||||
) -> str:
|
||||
"""Normalize a presented file path to the `/mnt/user-data/outputs/*` contract.
|
||||
|
||||
Accepts either:
|
||||
- A virtual sandbox path such as `/mnt/user-data/outputs/report.md`
|
||||
- A host-side thread outputs path such as
|
||||
`/app/backend/.deer-flow/threads/<thread>/user-data/outputs/report.md`
|
||||
|
||||
Returns:
|
||||
The normalized virtual path.
|
||||
|
||||
Raises:
|
||||
ValueError: If runtime metadata is missing or the path is outside the
|
||||
current thread's outputs directory.
|
||||
"""
|
||||
if runtime.state is None:
|
||||
raise ValueError("Thread runtime state is not available")
|
||||
|
||||
thread_id = runtime.context.get("thread_id")
|
||||
if not thread_id:
|
||||
raise ValueError("Thread ID is not available in runtime context")
|
||||
|
||||
thread_data = runtime.state.get("thread_data") or {}
|
||||
outputs_path = thread_data.get("outputs_path")
|
||||
if not outputs_path:
|
||||
raise ValueError("Thread outputs path is not available in runtime state")
|
||||
|
||||
outputs_dir = Path(outputs_path).resolve()
|
||||
stripped = filepath.lstrip("/")
|
||||
virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||
|
||||
if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"):
|
||||
actual_path = get_paths().resolve_virtual_path(thread_id, filepath)
|
||||
else:
|
||||
actual_path = Path(filepath).expanduser().resolve()
|
||||
|
||||
try:
|
||||
relative_path = actual_path.relative_to(outputs_dir)
|
||||
except ValueError as exc:
|
||||
raise ValueError(
|
||||
f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}"
|
||||
) from exc
|
||||
|
||||
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
|
||||
|
||||
|
||||
@tool("present_files", parse_docstring=True)
|
||||
@@ -33,7 +86,23 @@ def present_file_tool(
|
||||
Args:
|
||||
filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented.
|
||||
"""
|
||||
try:
|
||||
normalized_paths = [
|
||||
_normalize_presented_filepath(runtime, filepath) for filepath in filepaths
|
||||
]
|
||||
except ValueError as exc:
|
||||
return Command(
|
||||
update={
|
||||
"messages": [ToolMessage(f"Error: {exc}", tool_call_id=tool_call_id)]
|
||||
},
|
||||
)
|
||||
|
||||
# The merge_artifacts reducer will handle merging and deduplication
|
||||
return Command(
|
||||
update={"artifacts": filepaths, "messages": [ToolMessage("Successfully presented files", tool_call_id=tool_call_id)]},
|
||||
update={
|
||||
"artifacts": normalized_paths,
|
||||
"messages": [
|
||||
ToolMessage("Successfully presented files", tool_call_id=tool_call_id)
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
75
backend/tests/test_present_file_tool_core_logic.py
Normal file
75
backend/tests/test_present_file_tool_core_logic.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Core behavior tests for present_files path normalization."""
|
||||
|
||||
import importlib
|
||||
from types import SimpleNamespace
|
||||
|
||||
present_file_tool_module = importlib.import_module(
|
||||
"src.tools.builtins.present_file_tool"
|
||||
)
|
||||
|
||||
|
||||
def _make_runtime(outputs_path: str) -> SimpleNamespace:
|
||||
return SimpleNamespace(
|
||||
state={"thread_data": {"outputs_path": outputs_path}},
|
||||
context={"thread_id": "thread-1"},
|
||||
)
|
||||
|
||||
|
||||
def test_present_files_normalizes_host_outputs_path(tmp_path):
|
||||
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||
outputs_dir.mkdir(parents=True)
|
||||
artifact_path = outputs_dir / "report.md"
|
||||
artifact_path.write_text("ok")
|
||||
|
||||
result = present_file_tool_module.present_file_tool.func(
|
||||
runtime=_make_runtime(str(outputs_dir)),
|
||||
filepaths=[str(artifact_path)],
|
||||
tool_call_id="tc-1",
|
||||
)
|
||||
|
||||
assert result.update["artifacts"] == ["/mnt/user-data/outputs/report.md"]
|
||||
assert result.update["messages"][0].content == "Successfully presented files"
|
||||
|
||||
|
||||
def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
|
||||
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||
outputs_dir.mkdir(parents=True)
|
||||
artifact_path = outputs_dir / "summary.json"
|
||||
artifact_path.write_text("{}")
|
||||
|
||||
monkeypatch.setattr(
|
||||
present_file_tool_module,
|
||||
"get_paths",
|
||||
lambda: SimpleNamespace(
|
||||
resolve_virtual_path=lambda thread_id, path: artifact_path
|
||||
),
|
||||
)
|
||||
|
||||
result = present_file_tool_module.present_file_tool.func(
|
||||
runtime=_make_runtime(str(outputs_dir)),
|
||||
filepaths=["/mnt/user-data/outputs/summary.json"],
|
||||
tool_call_id="tc-2",
|
||||
)
|
||||
|
||||
assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"]
|
||||
|
||||
|
||||
def test_present_files_rejects_paths_outside_outputs(tmp_path):
|
||||
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||
workspace_dir = tmp_path / "threads" / "thread-1" / "user-data" / "workspace"
|
||||
outputs_dir.mkdir(parents=True)
|
||||
workspace_dir.mkdir(parents=True)
|
||||
leaked_path = workspace_dir / "notes.txt"
|
||||
leaked_path.write_text("leak")
|
||||
|
||||
result = present_file_tool_module.present_file_tool.func(
|
||||
runtime=_make_runtime(str(outputs_dir)),
|
||||
filepaths=[str(leaked_path)],
|
||||
tool_call_id="tc-3",
|
||||
)
|
||||
|
||||
assert "artifacts" not in result.update
|
||||
assert (
|
||||
result.update["messages"][0].content
|
||||
== f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
|
||||
)
|
||||
Reference in New Issue
Block a user