mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
fix: normalize presented artifact paths (#998)
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
from pathlib import Path
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
from langchain.tools import InjectedToolCallId, ToolRuntime, tool
|
from langchain.tools import InjectedToolCallId, ToolRuntime, tool
|
||||||
@@ -6,6 +7,58 @@ from langgraph.types import Command
|
|||||||
from langgraph.typing import ContextT
|
from langgraph.typing import ContextT
|
||||||
|
|
||||||
from src.agents.thread_state import ThreadState
|
from src.agents.thread_state import ThreadState
|
||||||
|
from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
|
||||||
|
|
||||||
|
OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_presented_filepath(
|
||||||
|
runtime: ToolRuntime[ContextT, ThreadState],
|
||||||
|
filepath: str,
|
||||||
|
) -> str:
|
||||||
|
"""Normalize a presented file path to the `/mnt/user-data/outputs/*` contract.
|
||||||
|
|
||||||
|
Accepts either:
|
||||||
|
- A virtual sandbox path such as `/mnt/user-data/outputs/report.md`
|
||||||
|
- A host-side thread outputs path such as
|
||||||
|
`/app/backend/.deer-flow/threads/<thread>/user-data/outputs/report.md`
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The normalized virtual path.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If runtime metadata is missing or the path is outside the
|
||||||
|
current thread's outputs directory.
|
||||||
|
"""
|
||||||
|
if runtime.state is None:
|
||||||
|
raise ValueError("Thread runtime state is not available")
|
||||||
|
|
||||||
|
thread_id = runtime.context.get("thread_id")
|
||||||
|
if not thread_id:
|
||||||
|
raise ValueError("Thread ID is not available in runtime context")
|
||||||
|
|
||||||
|
thread_data = runtime.state.get("thread_data") or {}
|
||||||
|
outputs_path = thread_data.get("outputs_path")
|
||||||
|
if not outputs_path:
|
||||||
|
raise ValueError("Thread outputs path is not available in runtime state")
|
||||||
|
|
||||||
|
outputs_dir = Path(outputs_path).resolve()
|
||||||
|
stripped = filepath.lstrip("/")
|
||||||
|
virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||||
|
|
||||||
|
if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"):
|
||||||
|
actual_path = get_paths().resolve_virtual_path(thread_id, filepath)
|
||||||
|
else:
|
||||||
|
actual_path = Path(filepath).expanduser().resolve()
|
||||||
|
|
||||||
|
try:
|
||||||
|
relative_path = actual_path.relative_to(outputs_dir)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ValueError(
|
||||||
|
f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}"
|
||||||
|
|
||||||
|
|
||||||
@tool("present_files", parse_docstring=True)
|
@tool("present_files", parse_docstring=True)
|
||||||
@@ -33,7 +86,23 @@ def present_file_tool(
|
|||||||
Args:
|
Args:
|
||||||
filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented.
|
filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented.
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
normalized_paths = [
|
||||||
|
_normalize_presented_filepath(runtime, filepath) for filepath in filepaths
|
||||||
|
]
|
||||||
|
except ValueError as exc:
|
||||||
|
return Command(
|
||||||
|
update={
|
||||||
|
"messages": [ToolMessage(f"Error: {exc}", tool_call_id=tool_call_id)]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
# The merge_artifacts reducer will handle merging and deduplication
|
# The merge_artifacts reducer will handle merging and deduplication
|
||||||
return Command(
|
return Command(
|
||||||
update={"artifacts": filepaths, "messages": [ToolMessage("Successfully presented files", tool_call_id=tool_call_id)]},
|
update={
|
||||||
|
"artifacts": normalized_paths,
|
||||||
|
"messages": [
|
||||||
|
ToolMessage("Successfully presented files", tool_call_id=tool_call_id)
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
75
backend/tests/test_present_file_tool_core_logic.py
Normal file
75
backend/tests/test_present_file_tool_core_logic.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
"""Core behavior tests for present_files path normalization."""
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
present_file_tool_module = importlib.import_module(
|
||||||
|
"src.tools.builtins.present_file_tool"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_runtime(outputs_path: str) -> SimpleNamespace:
|
||||||
|
return SimpleNamespace(
|
||||||
|
state={"thread_data": {"outputs_path": outputs_path}},
|
||||||
|
context={"thread_id": "thread-1"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_present_files_normalizes_host_outputs_path(tmp_path):
|
||||||
|
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||||
|
outputs_dir.mkdir(parents=True)
|
||||||
|
artifact_path = outputs_dir / "report.md"
|
||||||
|
artifact_path.write_text("ok")
|
||||||
|
|
||||||
|
result = present_file_tool_module.present_file_tool.func(
|
||||||
|
runtime=_make_runtime(str(outputs_dir)),
|
||||||
|
filepaths=[str(artifact_path)],
|
||||||
|
tool_call_id="tc-1",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.update["artifacts"] == ["/mnt/user-data/outputs/report.md"]
|
||||||
|
assert result.update["messages"][0].content == "Successfully presented files"
|
||||||
|
|
||||||
|
|
||||||
|
def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
|
||||||
|
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||||
|
outputs_dir.mkdir(parents=True)
|
||||||
|
artifact_path = outputs_dir / "summary.json"
|
||||||
|
artifact_path.write_text("{}")
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
present_file_tool_module,
|
||||||
|
"get_paths",
|
||||||
|
lambda: SimpleNamespace(
|
||||||
|
resolve_virtual_path=lambda thread_id, path: artifact_path
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = present_file_tool_module.present_file_tool.func(
|
||||||
|
runtime=_make_runtime(str(outputs_dir)),
|
||||||
|
filepaths=["/mnt/user-data/outputs/summary.json"],
|
||||||
|
tool_call_id="tc-2",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_present_files_rejects_paths_outside_outputs(tmp_path):
|
||||||
|
outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
|
||||||
|
workspace_dir = tmp_path / "threads" / "thread-1" / "user-data" / "workspace"
|
||||||
|
outputs_dir.mkdir(parents=True)
|
||||||
|
workspace_dir.mkdir(parents=True)
|
||||||
|
leaked_path = workspace_dir / "notes.txt"
|
||||||
|
leaked_path.write_text("leak")
|
||||||
|
|
||||||
|
result = present_file_tool_module.present_file_tool.func(
|
||||||
|
runtime=_make_runtime(str(outputs_dir)),
|
||||||
|
filepaths=[str(leaked_path)],
|
||||||
|
tool_call_id="tc-3",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "artifacts" not in result.update
|
||||||
|
assert (
|
||||||
|
result.update["messages"][0].content
|
||||||
|
== f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user