diff --git a/backend/src/tools/builtins/present_file_tool.py b/backend/src/tools/builtins/present_file_tool.py index de5c41a..8c3ff89 100644 --- a/backend/src/tools/builtins/present_file_tool.py +++ b/backend/src/tools/builtins/present_file_tool.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Annotated from langchain.tools import InjectedToolCallId, ToolRuntime, tool @@ -6,6 +7,58 @@ from langgraph.types import Command from langgraph.typing import ContextT from src.agents.thread_state import ThreadState +from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths + +OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs" + + +def _normalize_presented_filepath( + runtime: ToolRuntime[ContextT, ThreadState], + filepath: str, +) -> str: + """Normalize a presented file path to the `/mnt/user-data/outputs/*` contract. + + Accepts either: + - A virtual sandbox path such as `/mnt/user-data/outputs/report.md` + - A host-side thread outputs path such as + `/app/backend/.deer-flow/threads//user-data/outputs/report.md` + + Returns: + The normalized virtual path. + + Raises: + ValueError: If runtime metadata is missing or the path is outside the + current thread's outputs directory. + """ + if runtime.state is None: + raise ValueError("Thread runtime state is not available") + + thread_id = runtime.context.get("thread_id") + if not thread_id: + raise ValueError("Thread ID is not available in runtime context") + + thread_data = runtime.state.get("thread_data") or {} + outputs_path = thread_data.get("outputs_path") + if not outputs_path: + raise ValueError("Thread outputs path is not available in runtime state") + + outputs_dir = Path(outputs_path).resolve() + stripped = filepath.lstrip("/") + virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/") + + if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"): + actual_path = get_paths().resolve_virtual_path(thread_id, filepath) + else: + actual_path = Path(filepath).expanduser().resolve() + + try: + relative_path = actual_path.relative_to(outputs_dir) + except ValueError as exc: + raise ValueError( + f"Only files in {OUTPUTS_VIRTUAL_PREFIX} can be presented: {filepath}" + ) from exc + + return f"{OUTPUTS_VIRTUAL_PREFIX}/{relative_path.as_posix()}" @tool("present_files", parse_docstring=True) @@ -33,7 +86,23 @@ def present_file_tool( Args: filepaths: List of absolute file paths to present to the user. **Only** files in `/mnt/user-data/outputs` can be presented. """ + try: + normalized_paths = [ + _normalize_presented_filepath(runtime, filepath) for filepath in filepaths + ] + except ValueError as exc: + return Command( + update={ + "messages": [ToolMessage(f"Error: {exc}", tool_call_id=tool_call_id)] + }, + ) + # The merge_artifacts reducer will handle merging and deduplication return Command( - update={"artifacts": filepaths, "messages": [ToolMessage("Successfully presented files", tool_call_id=tool_call_id)]}, + update={ + "artifacts": normalized_paths, + "messages": [ + ToolMessage("Successfully presented files", tool_call_id=tool_call_id) + ], + }, ) diff --git a/backend/tests/test_present_file_tool_core_logic.py b/backend/tests/test_present_file_tool_core_logic.py new file mode 100644 index 0000000..104931f --- /dev/null +++ b/backend/tests/test_present_file_tool_core_logic.py @@ -0,0 +1,75 @@ +"""Core behavior tests for present_files path normalization.""" + +import importlib +from types import SimpleNamespace + +present_file_tool_module = importlib.import_module( + "src.tools.builtins.present_file_tool" +) + + +def _make_runtime(outputs_path: str) -> SimpleNamespace: + return SimpleNamespace( + state={"thread_data": {"outputs_path": outputs_path}}, + context={"thread_id": "thread-1"}, + ) + + +def test_present_files_normalizes_host_outputs_path(tmp_path): + outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs" + outputs_dir.mkdir(parents=True) + artifact_path = outputs_dir / "report.md" + artifact_path.write_text("ok") + + result = present_file_tool_module.present_file_tool.func( + runtime=_make_runtime(str(outputs_dir)), + filepaths=[str(artifact_path)], + tool_call_id="tc-1", + ) + + assert result.update["artifacts"] == ["/mnt/user-data/outputs/report.md"] + assert result.update["messages"][0].content == "Successfully presented files" + + +def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch): + outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs" + outputs_dir.mkdir(parents=True) + artifact_path = outputs_dir / "summary.json" + artifact_path.write_text("{}") + + monkeypatch.setattr( + present_file_tool_module, + "get_paths", + lambda: SimpleNamespace( + resolve_virtual_path=lambda thread_id, path: artifact_path + ), + ) + + result = present_file_tool_module.present_file_tool.func( + runtime=_make_runtime(str(outputs_dir)), + filepaths=["/mnt/user-data/outputs/summary.json"], + tool_call_id="tc-2", + ) + + assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"] + + +def test_present_files_rejects_paths_outside_outputs(tmp_path): + outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs" + workspace_dir = tmp_path / "threads" / "thread-1" / "user-data" / "workspace" + outputs_dir.mkdir(parents=True) + workspace_dir.mkdir(parents=True) + leaked_path = workspace_dir / "notes.txt" + leaked_path.write_text("leak") + + result = present_file_tool_module.present_file_tool.func( + runtime=_make_runtime(str(outputs_dir)), + filepaths=[str(leaked_path)], + tool_call_id="tc-3", + ) + + assert "artifacts" not in result.update + assert ( + result.update["messages"][0].content + == f"Error: Only files in /mnt/user-data/outputs can be presented: {leaked_path}" + )