mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-21 21:24:46 +08:00
feat(sandbox): harden local file access and mask host paths
- enforce local sandbox file tools to only accept /mnt/user-data paths - add path traversal checks against thread workspace/uploads/outputs roots - preserve requested virtual paths in tool error messages (no host path leaks) - mask local absolute paths in bash output back to virtual sandbox paths - update bash tool guidance to prefer thread-local venv + python -m pip - add regression tests for path mapping, masking, and access restrictions Fixes #968
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from langchain.tools import ToolRuntime, tool
|
from langchain.tools import ToolRuntime, tool
|
||||||
from langgraph.typing import ContextT
|
from langgraph.typing import ContextT
|
||||||
@@ -29,36 +30,118 @@ def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
|
|||||||
Returns:
|
Returns:
|
||||||
The path with virtual prefix replaced by actual path.
|
The path with virtual prefix replaced by actual path.
|
||||||
"""
|
"""
|
||||||
if not path.startswith(VIRTUAL_PATH_PREFIX):
|
|
||||||
return path
|
|
||||||
|
|
||||||
if thread_data is None:
|
if thread_data is None:
|
||||||
return path
|
return path
|
||||||
|
|
||||||
# Map virtual subdirectories to thread_data keys
|
mappings = _thread_virtual_to_actual_mappings(thread_data)
|
||||||
path_mapping = {
|
if not mappings:
|
||||||
"workspace": thread_data.get("workspace_path"),
|
|
||||||
"uploads": thread_data.get("uploads_path"),
|
|
||||||
"outputs": thread_data.get("outputs_path"),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract the subdirectory after /mnt/user-data/
|
|
||||||
relative_path = path[len(VIRTUAL_PATH_PREFIX) :].lstrip("/")
|
|
||||||
if not relative_path:
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
# Find which subdirectory this path belongs to
|
# Longest-prefix-first replacement with segment-boundary checks.
|
||||||
parts = relative_path.split("/", 1)
|
for virtual_base, actual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
||||||
subdir = parts[0]
|
if path == virtual_base:
|
||||||
rest = parts[1] if len(parts) > 1 else ""
|
return actual_base
|
||||||
|
if path.startswith(f"{virtual_base}/"):
|
||||||
|
rest = path[len(virtual_base) :].lstrip("/")
|
||||||
|
return str(Path(actual_base) / rest) if rest else actual_base
|
||||||
|
|
||||||
actual_base = path_mapping.get(subdir)
|
return path
|
||||||
if actual_base is None:
|
|
||||||
return path
|
|
||||||
|
|
||||||
if rest:
|
|
||||||
return f"{actual_base}/{rest}"
|
def _thread_virtual_to_actual_mappings(thread_data: ThreadDataState) -> dict[str, str]:
|
||||||
return actual_base
|
"""Build virtual-to-actual path mappings for a thread."""
|
||||||
|
mappings: dict[str, str] = {}
|
||||||
|
|
||||||
|
workspace = thread_data.get("workspace_path")
|
||||||
|
uploads = thread_data.get("uploads_path")
|
||||||
|
outputs = thread_data.get("outputs_path")
|
||||||
|
|
||||||
|
if workspace:
|
||||||
|
mappings[f"{VIRTUAL_PATH_PREFIX}/workspace"] = workspace
|
||||||
|
if uploads:
|
||||||
|
mappings[f"{VIRTUAL_PATH_PREFIX}/uploads"] = uploads
|
||||||
|
if outputs:
|
||||||
|
mappings[f"{VIRTUAL_PATH_PREFIX}/outputs"] = outputs
|
||||||
|
|
||||||
|
# Also map the virtual root when all known dirs share the same parent.
|
||||||
|
actual_dirs = [Path(p) for p in (workspace, uploads, outputs) if p]
|
||||||
|
if actual_dirs:
|
||||||
|
common_parent = str(Path(actual_dirs[0]).parent)
|
||||||
|
if all(str(path.parent) == common_parent for path in actual_dirs):
|
||||||
|
mappings[VIRTUAL_PATH_PREFIX] = common_parent
|
||||||
|
|
||||||
|
return mappings
|
||||||
|
|
||||||
|
|
||||||
|
def _thread_actual_to_virtual_mappings(thread_data: ThreadDataState) -> dict[str, str]:
|
||||||
|
"""Build actual-to-virtual mappings for output masking."""
|
||||||
|
return {actual: virtual for virtual, actual in _thread_virtual_to_actual_mappings(thread_data).items()}
|
||||||
|
|
||||||
|
|
||||||
|
def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None) -> str:
|
||||||
|
"""Mask host absolute paths from local sandbox output using virtual paths."""
|
||||||
|
if thread_data is None:
|
||||||
|
return output
|
||||||
|
|
||||||
|
mappings = _thread_actual_to_virtual_mappings(thread_data)
|
||||||
|
if not mappings:
|
||||||
|
return output
|
||||||
|
|
||||||
|
result = output
|
||||||
|
for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
||||||
|
raw_base = str(Path(actual_base))
|
||||||
|
resolved_base = str(Path(actual_base).resolve())
|
||||||
|
for base in {raw_base, resolved_base}:
|
||||||
|
escaped_actual = re.escape(base)
|
||||||
|
pattern = re.compile(escaped_actual + r"(?:/[^\s\"';&|<>()]*)?")
|
||||||
|
|
||||||
|
def replace_match(match: re.Match) -> str:
|
||||||
|
matched_path = match.group(0)
|
||||||
|
if matched_path == base:
|
||||||
|
return virtual_base
|
||||||
|
relative = matched_path[len(base) :].lstrip("/")
|
||||||
|
return f"{virtual_base}/{relative}" if relative else virtual_base
|
||||||
|
|
||||||
|
result = pattern.sub(replace_match, result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_local_tool_path(path: str, thread_data: ThreadDataState | None) -> str:
|
||||||
|
"""Resolve and validate a local-sandbox tool path.
|
||||||
|
|
||||||
|
Only virtual paths under /mnt/user-data are allowed in local mode.
|
||||||
|
"""
|
||||||
|
if thread_data is None:
|
||||||
|
raise SandboxRuntimeError("Thread data not available for local sandbox")
|
||||||
|
|
||||||
|
if path != VIRTUAL_PATH_PREFIX and not path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||||
|
raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX} are allowed")
|
||||||
|
|
||||||
|
resolved_path = replace_virtual_path(path, thread_data)
|
||||||
|
resolved = Path(resolved_path).resolve()
|
||||||
|
|
||||||
|
allowed_roots = [
|
||||||
|
Path(p).resolve()
|
||||||
|
for p in (
|
||||||
|
thread_data.get("workspace_path"),
|
||||||
|
thread_data.get("uploads_path"),
|
||||||
|
thread_data.get("outputs_path"),
|
||||||
|
)
|
||||||
|
if p is not None
|
||||||
|
]
|
||||||
|
|
||||||
|
if not allowed_roots:
|
||||||
|
raise SandboxRuntimeError("No allowed local sandbox directories configured")
|
||||||
|
|
||||||
|
for root in allowed_roots:
|
||||||
|
try:
|
||||||
|
resolved.relative_to(root)
|
||||||
|
return str(resolved)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise PermissionError("Access denied: path traversal detected")
|
||||||
|
|
||||||
|
|
||||||
def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState | None) -> str:
|
def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState | None) -> str:
|
||||||
@@ -235,7 +318,8 @@ def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, com
|
|||||||
|
|
||||||
|
|
||||||
- Use `python` to run Python code.
|
- Use `python` to run Python code.
|
||||||
- Use `pip install` to install Python packages.
|
- Prefer a thread-local virtual environment in `/mnt/user-data/workspace/.venv`.
|
||||||
|
- Use `python -m pip` (inside the virtual environment) to install Python packages.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
description: Explain why you are running this command in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
description: Explain why you are running this command in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||||
@@ -244,9 +328,11 @@ def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, com
|
|||||||
try:
|
try:
|
||||||
sandbox = ensure_sandbox_initialized(runtime)
|
sandbox = ensure_sandbox_initialized(runtime)
|
||||||
ensure_thread_directories_exist(runtime)
|
ensure_thread_directories_exist(runtime)
|
||||||
|
thread_data = get_thread_data(runtime)
|
||||||
if is_local_sandbox(runtime):
|
if is_local_sandbox(runtime):
|
||||||
thread_data = get_thread_data(runtime)
|
|
||||||
command = replace_virtual_paths_in_command(command, thread_data)
|
command = replace_virtual_paths_in_command(command, thread_data)
|
||||||
|
output = sandbox.execute_command(command)
|
||||||
|
return mask_local_paths_in_output(output, thread_data)
|
||||||
return sandbox.execute_command(command)
|
return sandbox.execute_command(command)
|
||||||
except SandboxError as e:
|
except SandboxError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
@@ -265,9 +351,10 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
|
|||||||
try:
|
try:
|
||||||
sandbox = ensure_sandbox_initialized(runtime)
|
sandbox = ensure_sandbox_initialized(runtime)
|
||||||
ensure_thread_directories_exist(runtime)
|
ensure_thread_directories_exist(runtime)
|
||||||
|
requested_path = path
|
||||||
if is_local_sandbox(runtime):
|
if is_local_sandbox(runtime):
|
||||||
thread_data = get_thread_data(runtime)
|
thread_data = get_thread_data(runtime)
|
||||||
path = replace_virtual_path(path, thread_data)
|
path = resolve_local_tool_path(path, thread_data)
|
||||||
children = sandbox.list_dir(path)
|
children = sandbox.list_dir(path)
|
||||||
if not children:
|
if not children:
|
||||||
return "(empty)"
|
return "(empty)"
|
||||||
@@ -275,9 +362,9 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
|
|||||||
except SandboxError as e:
|
except SandboxError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return f"Error: Directory not found: {path}"
|
return f"Error: Directory not found: {requested_path}"
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
return f"Error: Permission denied: {path}"
|
return f"Error: Permission denied: {requested_path}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: Unexpected error listing directory: {type(e).__name__}: {e}"
|
return f"Error: Unexpected error listing directory: {type(e).__name__}: {e}"
|
||||||
|
|
||||||
@@ -301,9 +388,10 @@ def read_file_tool(
|
|||||||
try:
|
try:
|
||||||
sandbox = ensure_sandbox_initialized(runtime)
|
sandbox = ensure_sandbox_initialized(runtime)
|
||||||
ensure_thread_directories_exist(runtime)
|
ensure_thread_directories_exist(runtime)
|
||||||
|
requested_path = path
|
||||||
if is_local_sandbox(runtime):
|
if is_local_sandbox(runtime):
|
||||||
thread_data = get_thread_data(runtime)
|
thread_data = get_thread_data(runtime)
|
||||||
path = replace_virtual_path(path, thread_data)
|
path = resolve_local_tool_path(path, thread_data)
|
||||||
content = sandbox.read_file(path)
|
content = sandbox.read_file(path)
|
||||||
if not content:
|
if not content:
|
||||||
return "(empty)"
|
return "(empty)"
|
||||||
@@ -313,11 +401,11 @@ def read_file_tool(
|
|||||||
except SandboxError as e:
|
except SandboxError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return f"Error: File not found: {path}"
|
return f"Error: File not found: {requested_path}"
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
return f"Error: Permission denied reading file: {path}"
|
return f"Error: Permission denied reading file: {requested_path}"
|
||||||
except IsADirectoryError:
|
except IsADirectoryError:
|
||||||
return f"Error: Path is a directory, not a file: {path}"
|
return f"Error: Path is a directory, not a file: {requested_path}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: Unexpected error reading file: {type(e).__name__}: {e}"
|
return f"Error: Unexpected error reading file: {type(e).__name__}: {e}"
|
||||||
|
|
||||||
@@ -340,19 +428,20 @@ def write_file_tool(
|
|||||||
try:
|
try:
|
||||||
sandbox = ensure_sandbox_initialized(runtime)
|
sandbox = ensure_sandbox_initialized(runtime)
|
||||||
ensure_thread_directories_exist(runtime)
|
ensure_thread_directories_exist(runtime)
|
||||||
|
requested_path = path
|
||||||
if is_local_sandbox(runtime):
|
if is_local_sandbox(runtime):
|
||||||
thread_data = get_thread_data(runtime)
|
thread_data = get_thread_data(runtime)
|
||||||
path = replace_virtual_path(path, thread_data)
|
path = resolve_local_tool_path(path, thread_data)
|
||||||
sandbox.write_file(path, content, append)
|
sandbox.write_file(path, content, append)
|
||||||
return "OK"
|
return "OK"
|
||||||
except SandboxError as e:
|
except SandboxError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
return f"Error: Permission denied writing to file: {path}"
|
return f"Error: Permission denied writing to file: {requested_path}"
|
||||||
except IsADirectoryError:
|
except IsADirectoryError:
|
||||||
return f"Error: Path is a directory, not a file: {path}"
|
return f"Error: Path is a directory, not a file: {requested_path}"
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
return f"Error: Failed to write file '{path}': {e}"
|
return f"Error: Failed to write file '{requested_path}': {e}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: Unexpected error writing file: {type(e).__name__}: {e}"
|
return f"Error: Unexpected error writing file: {type(e).__name__}: {e}"
|
||||||
|
|
||||||
@@ -379,14 +468,15 @@ def str_replace_tool(
|
|||||||
try:
|
try:
|
||||||
sandbox = ensure_sandbox_initialized(runtime)
|
sandbox = ensure_sandbox_initialized(runtime)
|
||||||
ensure_thread_directories_exist(runtime)
|
ensure_thread_directories_exist(runtime)
|
||||||
|
requested_path = path
|
||||||
if is_local_sandbox(runtime):
|
if is_local_sandbox(runtime):
|
||||||
thread_data = get_thread_data(runtime)
|
thread_data = get_thread_data(runtime)
|
||||||
path = replace_virtual_path(path, thread_data)
|
path = resolve_local_tool_path(path, thread_data)
|
||||||
content = sandbox.read_file(path)
|
content = sandbox.read_file(path)
|
||||||
if not content:
|
if not content:
|
||||||
return "OK"
|
return "OK"
|
||||||
if old_str not in content:
|
if old_str not in content:
|
||||||
return f"Error: String to replace not found in file: {path}"
|
return f"Error: String to replace not found in file: {requested_path}"
|
||||||
if replace_all:
|
if replace_all:
|
||||||
content = content.replace(old_str, new_str)
|
content = content.replace(old_str, new_str)
|
||||||
else:
|
else:
|
||||||
@@ -396,8 +486,8 @@ def str_replace_tool(
|
|||||||
except SandboxError as e:
|
except SandboxError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return f"Error: File not found: {path}"
|
return f"Error: File not found: {requested_path}"
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
return f"Error: Permission denied accessing file: {path}"
|
return f"Error: Permission denied accessing file: {requested_path}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error: Unexpected error replacing string: {type(e).__name__}: {e}"
|
return f"Error: Unexpected error replacing string: {type(e).__name__}: {e}"
|
||||||
|
|||||||
58
backend/tests/test_sandbox_tools_security.py
Normal file
58
backend/tests/test_sandbox_tools_security.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.sandbox.tools import (
|
||||||
|
VIRTUAL_PATH_PREFIX,
|
||||||
|
mask_local_paths_in_output,
|
||||||
|
replace_virtual_path,
|
||||||
|
resolve_local_tool_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_replace_virtual_path_maps_virtual_root_and_subpaths() -> None:
|
||||||
|
thread_data = {
|
||||||
|
"workspace_path": "/tmp/deer-flow/threads/t1/user-data/workspace",
|
||||||
|
"uploads_path": "/tmp/deer-flow/threads/t1/user-data/uploads",
|
||||||
|
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
|
||||||
|
}
|
||||||
|
|
||||||
|
assert replace_virtual_path("/mnt/user-data/workspace/a.txt", thread_data) == "/tmp/deer-flow/threads/t1/user-data/workspace/a.txt"
|
||||||
|
assert replace_virtual_path("/mnt/user-data", thread_data) == "/tmp/deer-flow/threads/t1/user-data"
|
||||||
|
|
||||||
|
|
||||||
|
def test_mask_local_paths_in_output_hides_host_paths() -> None:
|
||||||
|
thread_data = {
|
||||||
|
"workspace_path": "/tmp/deer-flow/threads/t1/user-data/workspace",
|
||||||
|
"uploads_path": "/tmp/deer-flow/threads/t1/user-data/uploads",
|
||||||
|
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
|
||||||
|
}
|
||||||
|
|
||||||
|
output = "Created: /tmp/deer-flow/threads/t1/user-data/workspace/result.txt"
|
||||||
|
masked = mask_local_paths_in_output(output, thread_data)
|
||||||
|
|
||||||
|
assert "/tmp/deer-flow/threads/t1/user-data" not in masked
|
||||||
|
assert "/mnt/user-data/workspace/result.txt" in masked
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_local_tool_path_rejects_non_virtual_path() -> None:
|
||||||
|
thread_data = {
|
||||||
|
"workspace_path": "/tmp/deer-flow/threads/t1/user-data/workspace",
|
||||||
|
"uploads_path": "/tmp/deer-flow/threads/t1/user-data/uploads",
|
||||||
|
"outputs_path": "/tmp/deer-flow/threads/t1/user-data/outputs",
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(PermissionError, match="Only paths under"):
|
||||||
|
resolve_local_tool_path("/Users/someone/config.yaml", thread_data)
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_local_tool_path_rejects_path_traversal() -> None:
|
||||||
|
base = Path("/tmp/deer-flow/threads/t1/user-data")
|
||||||
|
thread_data = {
|
||||||
|
"workspace_path": str(base / "workspace"),
|
||||||
|
"uploads_path": str(base / "uploads"),
|
||||||
|
"outputs_path": str(base / "outputs"),
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(PermissionError, match="path traversal"):
|
||||||
|
resolve_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../../../etc/passwd", thread_data)
|
||||||
Reference in New Issue
Block a user