mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-05-04 11:00:44 +08:00
refactor: split backend into harness (deerflow.*) and app (app.*) (#1131)
* refactor: extract shared utils to break harness→app cross-layer imports Move _validate_skill_frontmatter to src/skills/validation.py and CONVERTIBLE_EXTENSIONS + convert_file_to_markdown to src/utils/file_conversion.py. This eliminates the two reverse dependencies from client.py (harness layer) into gateway/routers/ (app layer), preparing for the harness/app package split. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: split backend/src into harness (deerflow.*) and app (app.*) Physically split the monolithic backend/src/ package into two layers: - **Harness** (`packages/harness/deerflow/`): publishable agent framework package with import prefix `deerflow.*`. Contains agents, sandbox, tools, models, MCP, skills, config, and all core infrastructure. - **App** (`app/`): unpublished application code with import prefix `app.*`. Contains gateway (FastAPI REST API) and channels (IM integrations). Key changes: - Move 13 harness modules to packages/harness/deerflow/ via git mv - Move gateway + channels to app/ via git mv - Rename all imports: src.* → deerflow.* (harness) / app.* (app layer) - Set up uv workspace with deerflow-harness as workspace member - Update langgraph.json, config.example.yaml, all scripts, Docker files - Add build-system (hatchling) to harness pyproject.toml - Add PYTHONPATH=. to gateway startup commands for app.* resolution - Update ruff.toml with known-first-party for import sorting - Update all documentation to reflect new directory structure Boundary rule enforced: harness code never imports from app. All 429 tests pass. Lint clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: add harness→app boundary check test and update docs Add test_harness_boundary.py that scans all Python files in packages/harness/deerflow/ and fails if any `from app.*` or `import app.*` statement is found. This enforces the architectural rule that the harness layer never depends on the app layer. Update CLAUDE.md to document the harness/app split architecture, import conventions, and the boundary enforcement test. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add config versioning with auto-upgrade on startup When config.example.yaml schema changes, developers' local config.yaml files can silently become outdated. This adds a config_version field and auto-upgrade mechanism so breaking changes (like src.* → deerflow.* renames) are applied automatically before services start. - Add config_version: 1 to config.example.yaml - Add startup version check warning in AppConfig.from_file() - Add scripts/config-upgrade.sh with migration registry for value replacements - Add `make config-upgrade` target - Auto-run config-upgrade in serve.sh and start-daemon.sh before starting services - Add config error hints in service failure messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix comments * fix: update src.* import in test_sandbox_tools_security to deerflow.* Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: handle empty config and search parent dirs for config.example.yaml Address Copilot review comments on PR #1131: - Guard against yaml.safe_load() returning None for empty config files - Search parent directories for config.example.yaml instead of only looking next to config.yaml, fixing detection in common setups Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: correct skills root path depth and config_version type coercion - loader.py: fix get_skills_root_path() to use 5 parent levels (was 3) after harness split, file lives at packages/harness/deerflow/skills/ so parent×3 resolved to backend/packages/harness/ instead of backend/ - app_config.py: coerce config_version to int() before comparison in _check_config_version() to prevent TypeError when YAML stores value as string (e.g. config_version: "1") - tests: add regression tests for both fixes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: update test imports from src.* to deerflow.*/app.* after harness refactor Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
8
backend/packages/harness/deerflow/sandbox/__init__.py
Normal file
8
backend/packages/harness/deerflow/sandbox/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from .sandbox import Sandbox
|
||||
from .sandbox_provider import SandboxProvider, get_sandbox_provider
|
||||
|
||||
__all__ = [
|
||||
"Sandbox",
|
||||
"SandboxProvider",
|
||||
"get_sandbox_provider",
|
||||
]
|
||||
71
backend/packages/harness/deerflow/sandbox/exceptions.py
Normal file
71
backend/packages/harness/deerflow/sandbox/exceptions.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Sandbox-related exceptions with structured error information."""
|
||||
|
||||
|
||||
class SandboxError(Exception):
|
||||
"""Base exception for all sandbox-related errors."""
|
||||
|
||||
def __init__(self, message: str, details: dict | None = None):
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.details = details or {}
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.details:
|
||||
detail_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
|
||||
return f"{self.message} ({detail_str})"
|
||||
return self.message
|
||||
|
||||
|
||||
class SandboxNotFoundError(SandboxError):
|
||||
"""Raised when a sandbox cannot be found or is not available."""
|
||||
|
||||
def __init__(self, message: str = "Sandbox not found", sandbox_id: str | None = None):
|
||||
details = {"sandbox_id": sandbox_id} if sandbox_id else None
|
||||
super().__init__(message, details)
|
||||
self.sandbox_id = sandbox_id
|
||||
|
||||
|
||||
class SandboxRuntimeError(SandboxError):
|
||||
"""Raised when sandbox runtime is not available or misconfigured."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxCommandError(SandboxError):
|
||||
"""Raised when a command execution fails in the sandbox."""
|
||||
|
||||
def __init__(self, message: str, command: str | None = None, exit_code: int | None = None):
|
||||
details = {}
|
||||
if command:
|
||||
details["command"] = command[:100] + "..." if len(command) > 100 else command
|
||||
if exit_code is not None:
|
||||
details["exit_code"] = exit_code
|
||||
super().__init__(message, details)
|
||||
self.command = command
|
||||
self.exit_code = exit_code
|
||||
|
||||
|
||||
class SandboxFileError(SandboxError):
|
||||
"""Raised when a file operation fails in the sandbox."""
|
||||
|
||||
def __init__(self, message: str, path: str | None = None, operation: str | None = None):
|
||||
details = {}
|
||||
if path:
|
||||
details["path"] = path
|
||||
if operation:
|
||||
details["operation"] = operation
|
||||
super().__init__(message, details)
|
||||
self.path = path
|
||||
self.operation = operation
|
||||
|
||||
|
||||
class SandboxPermissionError(SandboxFileError):
|
||||
"""Raised when a permission error occurs during file operations."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxFileNotFoundError(SandboxFileError):
|
||||
"""Raised when a file or directory is not found."""
|
||||
|
||||
pass
|
||||
@@ -0,0 +1,3 @@
|
||||
from .local_sandbox_provider import LocalSandboxProvider
|
||||
|
||||
__all__ = ["LocalSandboxProvider"]
|
||||
112
backend/packages/harness/deerflow/sandbox/local/list_dir.py
Normal file
112
backend/packages/harness/deerflow/sandbox/local/list_dir.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import fnmatch
|
||||
from pathlib import Path
|
||||
|
||||
IGNORE_PATTERNS = [
|
||||
# Version Control
|
||||
".git",
|
||||
".svn",
|
||||
".hg",
|
||||
".bzr",
|
||||
# Dependencies
|
||||
"node_modules",
|
||||
"__pycache__",
|
||||
".venv",
|
||||
"venv",
|
||||
".env",
|
||||
"env",
|
||||
".tox",
|
||||
".nox",
|
||||
".eggs",
|
||||
"*.egg-info",
|
||||
"site-packages",
|
||||
# Build outputs
|
||||
"dist",
|
||||
"build",
|
||||
".next",
|
||||
".nuxt",
|
||||
".output",
|
||||
".turbo",
|
||||
"target",
|
||||
"out",
|
||||
# IDE & Editor
|
||||
".idea",
|
||||
".vscode",
|
||||
"*.swp",
|
||||
"*.swo",
|
||||
"*~",
|
||||
".project",
|
||||
".classpath",
|
||||
".settings",
|
||||
# OS generated
|
||||
".DS_Store",
|
||||
"Thumbs.db",
|
||||
"desktop.ini",
|
||||
"*.lnk",
|
||||
# Logs & temp files
|
||||
"*.log",
|
||||
"*.tmp",
|
||||
"*.temp",
|
||||
"*.bak",
|
||||
"*.cache",
|
||||
".cache",
|
||||
"logs",
|
||||
# Coverage & test artifacts
|
||||
".coverage",
|
||||
"coverage",
|
||||
".nyc_output",
|
||||
"htmlcov",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
]
|
||||
|
||||
|
||||
def _should_ignore(name: str) -> bool:
|
||||
"""Check if a file/directory name matches any ignore pattern."""
|
||||
for pattern in IGNORE_PATTERNS:
|
||||
if fnmatch.fnmatch(name, pattern):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def list_dir(path: str, max_depth: int = 2) -> list[str]:
|
||||
"""
|
||||
List files and directories up to max_depth levels deep.
|
||||
|
||||
Args:
|
||||
path: The root directory path to list.
|
||||
max_depth: Maximum depth to traverse (default: 2).
|
||||
1 = only direct children, 2 = children + grandchildren, etc.
|
||||
|
||||
Returns:
|
||||
A list of absolute paths for files and directories,
|
||||
excluding items matching IGNORE_PATTERNS.
|
||||
"""
|
||||
result: list[str] = []
|
||||
root_path = Path(path).resolve()
|
||||
|
||||
if not root_path.is_dir():
|
||||
return result
|
||||
|
||||
def _traverse(current_path: Path, current_depth: int) -> None:
|
||||
"""Recursively traverse directories up to max_depth."""
|
||||
if current_depth > max_depth:
|
||||
return
|
||||
|
||||
try:
|
||||
for item in current_path.iterdir():
|
||||
if _should_ignore(item.name):
|
||||
continue
|
||||
|
||||
post_fix = "/" if item.is_dir() else ""
|
||||
result.append(str(item.resolve()) + post_fix)
|
||||
|
||||
# Recurse into subdirectories if not at max depth
|
||||
if item.is_dir() and current_depth < max_depth:
|
||||
_traverse(item, current_depth + 1)
|
||||
except PermissionError:
|
||||
pass
|
||||
|
||||
_traverse(root_path, 1)
|
||||
|
||||
return sorted(result)
|
||||
212
backend/packages/harness/deerflow/sandbox/local/local_sandbox.py
Normal file
212
backend/packages/harness/deerflow/sandbox/local/local_sandbox.py
Normal file
@@ -0,0 +1,212 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from deerflow.sandbox.local.list_dir import list_dir
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
|
||||
|
||||
class LocalSandbox(Sandbox):
|
||||
def __init__(self, id: str, path_mappings: dict[str, str] | None = None):
|
||||
"""
|
||||
Initialize local sandbox with optional path mappings.
|
||||
|
||||
Args:
|
||||
id: Sandbox identifier
|
||||
path_mappings: Dictionary mapping container paths to local paths
|
||||
Example: {"/mnt/skills": "/absolute/path/to/skills"}
|
||||
"""
|
||||
super().__init__(id)
|
||||
self.path_mappings = path_mappings or {}
|
||||
|
||||
def _resolve_path(self, path: str) -> str:
|
||||
"""
|
||||
Resolve container path to actual local path using mappings.
|
||||
|
||||
Args:
|
||||
path: Path that might be a container path
|
||||
|
||||
Returns:
|
||||
Resolved local path
|
||||
"""
|
||||
path_str = str(path)
|
||||
|
||||
# Try each mapping (longest prefix first for more specific matches)
|
||||
for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True):
|
||||
if path_str.startswith(container_path):
|
||||
# Replace the container path prefix with local path
|
||||
relative = path_str[len(container_path) :].lstrip("/")
|
||||
resolved = str(Path(local_path) / relative) if relative else local_path
|
||||
return resolved
|
||||
|
||||
# No mapping found, return original path
|
||||
return path_str
|
||||
|
||||
def _reverse_resolve_path(self, path: str) -> str:
|
||||
"""
|
||||
Reverse resolve local path back to container path using mappings.
|
||||
|
||||
Args:
|
||||
path: Local path that might need to be mapped to container path
|
||||
|
||||
Returns:
|
||||
Container path if mapping exists, otherwise original path
|
||||
"""
|
||||
path_str = str(Path(path).resolve())
|
||||
|
||||
# Try each mapping (longest local path first for more specific matches)
|
||||
for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True):
|
||||
local_path_resolved = str(Path(local_path).resolve())
|
||||
if path_str.startswith(local_path_resolved):
|
||||
# Replace the local path prefix with container path
|
||||
relative = path_str[len(local_path_resolved) :].lstrip("/")
|
||||
resolved = f"{container_path}/{relative}" if relative else container_path
|
||||
return resolved
|
||||
|
||||
# No mapping found, return original path
|
||||
return path_str
|
||||
|
||||
def _reverse_resolve_paths_in_output(self, output: str) -> str:
|
||||
"""
|
||||
Reverse resolve local paths back to container paths in output string.
|
||||
|
||||
Args:
|
||||
output: Output string that may contain local paths
|
||||
|
||||
Returns:
|
||||
Output with local paths resolved to container paths
|
||||
"""
|
||||
import re
|
||||
|
||||
# Sort mappings by local path length (longest first) for correct prefix matching
|
||||
sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True)
|
||||
|
||||
if not sorted_mappings:
|
||||
return output
|
||||
|
||||
# Create pattern that matches absolute paths
|
||||
# Match paths like /Users/... or other absolute paths
|
||||
result = output
|
||||
for container_path, local_path in sorted_mappings:
|
||||
local_path_resolved = str(Path(local_path).resolve())
|
||||
# Escape the local path for use in regex
|
||||
escaped_local = re.escape(local_path_resolved)
|
||||
# Match the local path followed by optional path components
|
||||
pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?")
|
||||
|
||||
def replace_match(match: re.Match) -> str:
|
||||
matched_path = match.group(0)
|
||||
return self._reverse_resolve_path(matched_path)
|
||||
|
||||
result = pattern.sub(replace_match, result)
|
||||
|
||||
return result
|
||||
|
||||
def _resolve_paths_in_command(self, command: str) -> str:
|
||||
"""
|
||||
Resolve container paths to local paths in a command string.
|
||||
|
||||
Args:
|
||||
command: Command string that may contain container paths
|
||||
|
||||
Returns:
|
||||
Command with container paths resolved to local paths
|
||||
"""
|
||||
import re
|
||||
|
||||
# Sort mappings by length (longest first) for correct prefix matching
|
||||
sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
# Build regex pattern to match all container paths
|
||||
# Match container path followed by optional path components
|
||||
if not sorted_mappings:
|
||||
return command
|
||||
|
||||
# Create pattern that matches any of the container paths
|
||||
patterns = [re.escape(container_path) + r"(?:/[^\s\"';&|<>()]*)??" for container_path, _ in sorted_mappings]
|
||||
pattern = re.compile("|".join(f"({p})" for p in patterns))
|
||||
|
||||
def replace_match(match: re.Match) -> str:
|
||||
matched_path = match.group(0)
|
||||
return self._resolve_path(matched_path)
|
||||
|
||||
return pattern.sub(replace_match, command)
|
||||
|
||||
@staticmethod
|
||||
def _get_shell() -> str:
|
||||
"""Detect available shell executable with fallback.
|
||||
|
||||
Returns the first available shell in order of preference:
|
||||
/bin/zsh → /bin/bash → /bin/sh → first `sh` found on PATH.
|
||||
Raises a RuntimeError if no suitable shell is found.
|
||||
"""
|
||||
for shell in ("/bin/zsh", "/bin/bash", "/bin/sh"):
|
||||
if os.path.isfile(shell) and os.access(shell, os.X_OK):
|
||||
return shell
|
||||
shell_from_path = shutil.which("sh")
|
||||
if shell_from_path is not None:
|
||||
return shell_from_path
|
||||
raise RuntimeError("No suitable shell executable found. Tried /bin/zsh, /bin/bash, /bin/sh, and `sh` on PATH.")
|
||||
|
||||
def execute_command(self, command: str) -> str:
|
||||
# Resolve container paths in command before execution
|
||||
resolved_command = self._resolve_paths_in_command(command)
|
||||
|
||||
result = subprocess.run(
|
||||
resolved_command,
|
||||
executable=self._get_shell(),
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
)
|
||||
output = result.stdout
|
||||
if result.stderr:
|
||||
output += f"\nStd Error:\n{result.stderr}" if output else result.stderr
|
||||
if result.returncode != 0:
|
||||
output += f"\nExit Code: {result.returncode}"
|
||||
|
||||
final_output = output if output else "(no output)"
|
||||
# Reverse resolve local paths back to container paths in output
|
||||
return self._reverse_resolve_paths_in_output(final_output)
|
||||
|
||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||
resolved_path = self._resolve_path(path)
|
||||
entries = list_dir(resolved_path, max_depth)
|
||||
# Reverse resolve local paths back to container paths in output
|
||||
return [self._reverse_resolve_paths_in_output(entry) for entry in entries]
|
||||
|
||||
def read_file(self, path: str) -> str:
|
||||
resolved_path = self._resolve_path(path)
|
||||
try:
|
||||
with open(resolved_path) as f:
|
||||
return f.read()
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||
resolved_path = self._resolve_path(path)
|
||||
try:
|
||||
dir_path = os.path.dirname(resolved_path)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
mode = "a" if append else "w"
|
||||
with open(resolved_path, mode) as f:
|
||||
f.write(content)
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
def update_file(self, path: str, content: bytes) -> None:
|
||||
resolved_path = self._resolve_path(path)
|
||||
try:
|
||||
dir_path = os.path.dirname(resolved_path)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
with open(resolved_path, "wb") as f:
|
||||
f.write(content)
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
@@ -0,0 +1,60 @@
|
||||
from deerflow.sandbox.local.local_sandbox import LocalSandbox
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
from deerflow.sandbox.sandbox_provider import SandboxProvider
|
||||
|
||||
_singleton: LocalSandbox | None = None
|
||||
|
||||
|
||||
class LocalSandboxProvider(SandboxProvider):
|
||||
def __init__(self):
|
||||
"""Initialize the local sandbox provider with path mappings."""
|
||||
self._path_mappings = self._setup_path_mappings()
|
||||
|
||||
def _setup_path_mappings(self) -> dict[str, str]:
|
||||
"""
|
||||
Setup path mappings for local sandbox.
|
||||
|
||||
Maps container paths to actual local paths, including skills directory.
|
||||
|
||||
Returns:
|
||||
Dictionary of path mappings
|
||||
"""
|
||||
mappings = {}
|
||||
|
||||
# Map skills container path to local skills directory
|
||||
try:
|
||||
from deerflow.config import get_app_config
|
||||
|
||||
config = get_app_config()
|
||||
skills_path = config.skills.get_skills_path()
|
||||
container_path = config.skills.container_path
|
||||
|
||||
# Only add mapping if skills directory exists
|
||||
if skills_path.exists():
|
||||
mappings[container_path] = str(skills_path)
|
||||
except Exception as e:
|
||||
# Log but don't fail if config loading fails
|
||||
print(f"Warning: Could not setup skills path mapping: {e}")
|
||||
|
||||
return mappings
|
||||
|
||||
def acquire(self, thread_id: str | None = None) -> str:
|
||||
global _singleton
|
||||
if _singleton is None:
|
||||
_singleton = LocalSandbox("local", path_mappings=self._path_mappings)
|
||||
return _singleton.id
|
||||
|
||||
def get(self, sandbox_id: str) -> Sandbox | None:
|
||||
if sandbox_id == "local":
|
||||
if _singleton is None:
|
||||
self.acquire()
|
||||
return _singleton
|
||||
return None
|
||||
|
||||
def release(self, sandbox_id: str) -> None:
|
||||
# LocalSandbox uses singleton pattern - no cleanup needed.
|
||||
# Note: This method is intentionally not called by SandboxMiddleware
|
||||
# to allow sandbox reuse across multiple turns in a thread.
|
||||
# For Docker-based providers (e.g., AioSandboxProvider), cleanup
|
||||
# happens at application shutdown via the shutdown() method.
|
||||
pass
|
||||
81
backend/packages/harness/deerflow/sandbox/middleware.py
Normal file
81
backend/packages/harness/deerflow/sandbox/middleware.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import logging
|
||||
from typing import NotRequired, override
|
||||
|
||||
from langchain.agents import AgentState
|
||||
from langchain.agents.middleware import AgentMiddleware
|
||||
from langgraph.runtime import Runtime
|
||||
|
||||
from deerflow.agents.thread_state import SandboxState, ThreadDataState
|
||||
from deerflow.sandbox import get_sandbox_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SandboxMiddlewareState(AgentState):
|
||||
"""Compatible with the `ThreadState` schema."""
|
||||
|
||||
sandbox: NotRequired[SandboxState | None]
|
||||
thread_data: NotRequired[ThreadDataState | None]
|
||||
|
||||
|
||||
class SandboxMiddleware(AgentMiddleware[SandboxMiddlewareState]):
|
||||
"""Create a sandbox environment and assign it to an agent.
|
||||
|
||||
Lifecycle Management:
|
||||
- With lazy_init=True (default): Sandbox is acquired on first tool call
|
||||
- With lazy_init=False: Sandbox is acquired on first agent invocation (before_agent)
|
||||
- Sandbox is reused across multiple turns within the same thread
|
||||
- Sandbox is NOT released after each agent call to avoid wasteful recreation
|
||||
- Cleanup happens at application shutdown via SandboxProvider.shutdown()
|
||||
"""
|
||||
|
||||
state_schema = SandboxMiddlewareState
|
||||
|
||||
def __init__(self, lazy_init: bool = True):
|
||||
"""Initialize sandbox middleware.
|
||||
|
||||
Args:
|
||||
lazy_init: If True, defer sandbox acquisition until first tool call.
|
||||
If False, acquire sandbox eagerly in before_agent().
|
||||
Default is True for optimal performance.
|
||||
"""
|
||||
super().__init__()
|
||||
self._lazy_init = lazy_init
|
||||
|
||||
def _acquire_sandbox(self, thread_id: str) -> str:
|
||||
provider = get_sandbox_provider()
|
||||
sandbox_id = provider.acquire(thread_id)
|
||||
logger.info(f"Acquiring sandbox {sandbox_id}")
|
||||
return sandbox_id
|
||||
|
||||
@override
|
||||
def before_agent(self, state: SandboxMiddlewareState, runtime: Runtime) -> dict | None:
|
||||
# Skip acquisition if lazy_init is enabled
|
||||
if self._lazy_init:
|
||||
return super().before_agent(state, runtime)
|
||||
|
||||
# Eager initialization (original behavior)
|
||||
if "sandbox" not in state or state["sandbox"] is None:
|
||||
thread_id = runtime.context["thread_id"]
|
||||
sandbox_id = self._acquire_sandbox(thread_id)
|
||||
logger.info(f"Assigned sandbox {sandbox_id} to thread {thread_id}")
|
||||
return {"sandbox": {"sandbox_id": sandbox_id}}
|
||||
return super().before_agent(state, runtime)
|
||||
|
||||
@override
|
||||
def after_agent(self, state: SandboxMiddlewareState, runtime: Runtime) -> dict | None:
|
||||
sandbox = state.get("sandbox")
|
||||
if sandbox is not None:
|
||||
sandbox_id = sandbox["sandbox_id"]
|
||||
logger.info(f"Releasing sandbox {sandbox_id}")
|
||||
get_sandbox_provider().release(sandbox_id)
|
||||
return None
|
||||
|
||||
if runtime.context.get("sandbox_id") is not None:
|
||||
sandbox_id = runtime.context.get("sandbox_id")
|
||||
logger.info(f"Releasing sandbox {sandbox_id} from context")
|
||||
get_sandbox_provider().release(sandbox_id)
|
||||
return None
|
||||
|
||||
# No sandbox to release
|
||||
return super().after_agent(state, runtime)
|
||||
72
backend/packages/harness/deerflow/sandbox/sandbox.py
Normal file
72
backend/packages/harness/deerflow/sandbox/sandbox.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Sandbox(ABC):
|
||||
"""Abstract base class for sandbox environments"""
|
||||
|
||||
_id: str
|
||||
|
||||
def __init__(self, id: str):
|
||||
self._id = id
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
return self._id
|
||||
|
||||
@abstractmethod
|
||||
def execute_command(self, command: str) -> str:
|
||||
"""Execute bash command in sandbox.
|
||||
|
||||
Args:
|
||||
command: The command to execute.
|
||||
|
||||
Returns:
|
||||
The standard or error output of the command.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def read_file(self, path: str) -> str:
|
||||
"""Read the content of a file.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to read.
|
||||
|
||||
Returns:
|
||||
The content of the file.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||
"""List the contents of a directory.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the directory to list.
|
||||
max_depth: The maximum depth to traverse. Default is 2.
|
||||
|
||||
Returns:
|
||||
The contents of the directory.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||
"""Write content to a file.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to write to.
|
||||
content: The text content to write to the file.
|
||||
append: Whether to append the content to the file. If False, the file will be created or overwritten.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_file(self, path: str, content: bytes) -> None:
|
||||
"""Update a file with binary content.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to update.
|
||||
content: The binary content to write to the file.
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,96 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from deerflow.config import get_app_config
|
||||
from deerflow.reflection import resolve_class
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
|
||||
|
||||
class SandboxProvider(ABC):
|
||||
"""Abstract base class for sandbox providers"""
|
||||
|
||||
@abstractmethod
|
||||
def acquire(self, thread_id: str | None = None) -> str:
|
||||
"""Acquire a sandbox environment and return its ID.
|
||||
|
||||
Returns:
|
||||
The ID of the acquired sandbox environment.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get(self, sandbox_id: str) -> Sandbox | None:
|
||||
"""Get a sandbox environment by ID.
|
||||
|
||||
Args:
|
||||
sandbox_id: The ID of the sandbox environment to retain.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def release(self, sandbox_id: str) -> None:
|
||||
"""Release a sandbox environment.
|
||||
|
||||
Args:
|
||||
sandbox_id: The ID of the sandbox environment to destroy.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
_default_sandbox_provider: SandboxProvider | None = None
|
||||
|
||||
|
||||
def get_sandbox_provider(**kwargs) -> SandboxProvider:
|
||||
"""Get the sandbox provider singleton.
|
||||
|
||||
Returns a cached singleton instance. Use `reset_sandbox_provider()` to clear
|
||||
the cache, or `shutdown_sandbox_provider()` to properly shutdown and clear.
|
||||
|
||||
Returns:
|
||||
A sandbox provider instance.
|
||||
"""
|
||||
global _default_sandbox_provider
|
||||
if _default_sandbox_provider is None:
|
||||
config = get_app_config()
|
||||
cls = resolve_class(config.sandbox.use, SandboxProvider)
|
||||
_default_sandbox_provider = cls(**kwargs)
|
||||
return _default_sandbox_provider
|
||||
|
||||
|
||||
def reset_sandbox_provider() -> None:
|
||||
"""Reset the sandbox provider singleton.
|
||||
|
||||
This clears the cached instance without calling shutdown.
|
||||
The next call to `get_sandbox_provider()` will create a new instance.
|
||||
Useful for testing or when switching configurations.
|
||||
|
||||
Note: If the provider has active sandboxes, they will be orphaned.
|
||||
Use `shutdown_sandbox_provider()` for proper cleanup.
|
||||
"""
|
||||
global _default_sandbox_provider
|
||||
_default_sandbox_provider = None
|
||||
|
||||
|
||||
def shutdown_sandbox_provider() -> None:
|
||||
"""Shutdown and reset the sandbox provider.
|
||||
|
||||
This properly shuts down the provider (releasing all sandboxes)
|
||||
before clearing the singleton. Call this when the application
|
||||
is shutting down or when you need to completely reset the sandbox system.
|
||||
"""
|
||||
global _default_sandbox_provider
|
||||
if _default_sandbox_provider is not None:
|
||||
if hasattr(_default_sandbox_provider, "shutdown"):
|
||||
_default_sandbox_provider.shutdown()
|
||||
_default_sandbox_provider = None
|
||||
|
||||
|
||||
def set_sandbox_provider(provider: SandboxProvider) -> None:
|
||||
"""Set a custom sandbox provider instance.
|
||||
|
||||
This allows injecting a custom or mock provider for testing purposes.
|
||||
|
||||
Args:
|
||||
provider: The SandboxProvider instance to use.
|
||||
"""
|
||||
global _default_sandbox_provider
|
||||
_default_sandbox_provider = provider
|
||||
538
backend/packages/harness/deerflow/sandbox/tools.py
Normal file
538
backend/packages/harness/deerflow/sandbox/tools.py
Normal file
@@ -0,0 +1,538 @@
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from langchain.tools import ToolRuntime, tool
|
||||
from langgraph.typing import ContextT
|
||||
|
||||
from deerflow.agents.thread_state import ThreadDataState, ThreadState
|
||||
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||
from deerflow.sandbox.exceptions import (
|
||||
SandboxError,
|
||||
SandboxNotFoundError,
|
||||
SandboxRuntimeError,
|
||||
)
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
|
||||
|
||||
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])/(?:[^\s\"'`;&|<>()]+)")
|
||||
_LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
|
||||
"/bin/",
|
||||
"/usr/bin/",
|
||||
"/usr/sbin/",
|
||||
"/sbin/",
|
||||
"/opt/homebrew/bin/",
|
||||
"/dev/",
|
||||
)
|
||||
|
||||
|
||||
def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
|
||||
"""Replace virtual /mnt/user-data paths with actual thread data paths.
|
||||
|
||||
Mapping:
|
||||
/mnt/user-data/workspace/* -> thread_data['workspace_path']/*
|
||||
/mnt/user-data/uploads/* -> thread_data['uploads_path']/*
|
||||
/mnt/user-data/outputs/* -> thread_data['outputs_path']/*
|
||||
|
||||
Args:
|
||||
path: The path that may contain virtual path prefix.
|
||||
thread_data: The thread data containing actual paths.
|
||||
|
||||
Returns:
|
||||
The path with virtual prefix replaced by actual path.
|
||||
"""
|
||||
if thread_data is None:
|
||||
return path
|
||||
|
||||
mappings = _thread_virtual_to_actual_mappings(thread_data)
|
||||
if not mappings:
|
||||
return path
|
||||
|
||||
# Longest-prefix-first replacement with segment-boundary checks.
|
||||
for virtual_base, actual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
if path == virtual_base:
|
||||
return actual_base
|
||||
if path.startswith(f"{virtual_base}/"):
|
||||
rest = path[len(virtual_base) :].lstrip("/")
|
||||
return str(Path(actual_base) / rest) if rest else actual_base
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def _thread_virtual_to_actual_mappings(thread_data: ThreadDataState) -> dict[str, str]:
|
||||
"""Build virtual-to-actual path mappings for a thread."""
|
||||
mappings: dict[str, str] = {}
|
||||
|
||||
workspace = thread_data.get("workspace_path")
|
||||
uploads = thread_data.get("uploads_path")
|
||||
outputs = thread_data.get("outputs_path")
|
||||
|
||||
if workspace:
|
||||
mappings[f"{VIRTUAL_PATH_PREFIX}/workspace"] = workspace
|
||||
if uploads:
|
||||
mappings[f"{VIRTUAL_PATH_PREFIX}/uploads"] = uploads
|
||||
if outputs:
|
||||
mappings[f"{VIRTUAL_PATH_PREFIX}/outputs"] = outputs
|
||||
|
||||
# Also map the virtual root when all known dirs share the same parent.
|
||||
actual_dirs = [Path(p) for p in (workspace, uploads, outputs) if p]
|
||||
if actual_dirs:
|
||||
common_parent = str(Path(actual_dirs[0]).parent)
|
||||
if all(str(path.parent) == common_parent for path in actual_dirs):
|
||||
mappings[VIRTUAL_PATH_PREFIX] = common_parent
|
||||
|
||||
return mappings
|
||||
|
||||
|
||||
def _thread_actual_to_virtual_mappings(thread_data: ThreadDataState) -> dict[str, str]:
|
||||
"""Build actual-to-virtual mappings for output masking."""
|
||||
return {actual: virtual for virtual, actual in _thread_virtual_to_actual_mappings(thread_data).items()}
|
||||
|
||||
|
||||
def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None) -> str:
|
||||
"""Mask host absolute paths from local sandbox output using virtual paths."""
|
||||
if thread_data is None:
|
||||
return output
|
||||
|
||||
mappings = _thread_actual_to_virtual_mappings(thread_data)
|
||||
if not mappings:
|
||||
return output
|
||||
|
||||
result = output
|
||||
for actual_base, virtual_base in sorted(mappings.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
raw_base = str(Path(actual_base))
|
||||
resolved_base = str(Path(actual_base).resolve())
|
||||
for base in {raw_base, resolved_base}:
|
||||
escaped_actual = re.escape(base)
|
||||
pattern = re.compile(escaped_actual + r"(?:/[^\s\"';&|<>()]*)?")
|
||||
|
||||
def replace_match(match: re.Match) -> str:
|
||||
matched_path = match.group(0)
|
||||
if matched_path == base:
|
||||
return virtual_base
|
||||
relative = matched_path[len(base) :].lstrip("/")
|
||||
return f"{virtual_base}/{relative}" if relative else virtual_base
|
||||
|
||||
result = pattern.sub(replace_match, result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def resolve_local_tool_path(path: str, thread_data: ThreadDataState | None) -> str:
|
||||
"""Resolve and validate a local-sandbox tool path.
|
||||
|
||||
Only virtual paths under /mnt/user-data are allowed in local mode.
|
||||
"""
|
||||
if thread_data is None:
|
||||
raise SandboxRuntimeError("Thread data not available for local sandbox")
|
||||
|
||||
if not path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/ are allowed")
|
||||
|
||||
resolved_path = replace_virtual_path(path, thread_data)
|
||||
resolved = Path(resolved_path).resolve()
|
||||
|
||||
allowed_roots = [
|
||||
Path(p).resolve()
|
||||
for p in (
|
||||
thread_data.get("workspace_path"),
|
||||
thread_data.get("uploads_path"),
|
||||
thread_data.get("outputs_path"),
|
||||
)
|
||||
if p is not None
|
||||
]
|
||||
|
||||
if not allowed_roots:
|
||||
raise SandboxRuntimeError("No allowed local sandbox directories configured")
|
||||
|
||||
for root in allowed_roots:
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
return str(resolved)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
raise PermissionError("Access denied: path traversal detected")
|
||||
|
||||
|
||||
def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None:
|
||||
"""Validate absolute paths in local-sandbox bash commands.
|
||||
|
||||
In local mode, commands must use virtual paths under /mnt/user-data for
|
||||
user data access. A small allowlist of common system path prefixes is kept
|
||||
for executable and device references (e.g. /bin/sh, /dev/null).
|
||||
"""
|
||||
if thread_data is None:
|
||||
raise SandboxRuntimeError("Thread data not available for local sandbox")
|
||||
|
||||
unsafe_paths: list[str] = []
|
||||
|
||||
for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command):
|
||||
if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
continue
|
||||
|
||||
if any(
|
||||
absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix)
|
||||
for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES
|
||||
):
|
||||
continue
|
||||
|
||||
unsafe_paths.append(absolute_path)
|
||||
|
||||
if unsafe_paths:
|
||||
unsafe = ", ".join(sorted(dict.fromkeys(unsafe_paths)))
|
||||
raise PermissionError(f"Unsafe absolute paths in command: {unsafe}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
|
||||
|
||||
def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState | None) -> str:
|
||||
"""Replace all virtual /mnt/user-data paths in a command string.
|
||||
|
||||
Args:
|
||||
command: The command string that may contain virtual paths.
|
||||
thread_data: The thread data containing actual paths.
|
||||
|
||||
Returns:
|
||||
The command with all virtual paths replaced.
|
||||
"""
|
||||
if VIRTUAL_PATH_PREFIX not in command:
|
||||
return command
|
||||
|
||||
if thread_data is None:
|
||||
return command
|
||||
|
||||
# Pattern to match /mnt/user-data followed by path characters
|
||||
pattern = re.compile(rf"{re.escape(VIRTUAL_PATH_PREFIX)}(/[^\s\"';&|<>()]*)?")
|
||||
|
||||
def replace_match(match: re.Match) -> str:
|
||||
full_path = match.group(0)
|
||||
return replace_virtual_path(full_path, thread_data)
|
||||
|
||||
return pattern.sub(replace_match, command)
|
||||
|
||||
|
||||
def get_thread_data(runtime: ToolRuntime[ContextT, ThreadState] | None) -> ThreadDataState | None:
|
||||
"""Extract thread_data from runtime state."""
|
||||
if runtime is None:
|
||||
return None
|
||||
if runtime.state is None:
|
||||
return None
|
||||
return runtime.state.get("thread_data")
|
||||
|
||||
|
||||
def is_local_sandbox(runtime: ToolRuntime[ContextT, ThreadState] | None) -> bool:
|
||||
"""Check if the current sandbox is a local sandbox.
|
||||
|
||||
Path replacement is only needed for local sandbox since aio sandbox
|
||||
already has /mnt/user-data mounted in the container.
|
||||
"""
|
||||
if runtime is None:
|
||||
return False
|
||||
if runtime.state is None:
|
||||
return False
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is None:
|
||||
return False
|
||||
return sandbox_state.get("sandbox_id") == "local"
|
||||
|
||||
|
||||
def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = None) -> Sandbox:
|
||||
"""Extract sandbox instance from tool runtime.
|
||||
|
||||
DEPRECATED: Use ensure_sandbox_initialized() for lazy initialization support.
|
||||
This function assumes sandbox is already initialized and will raise error if not.
|
||||
|
||||
Raises:
|
||||
SandboxRuntimeError: If runtime is not available or sandbox state is missing.
|
||||
SandboxNotFoundError: If sandbox with the given ID cannot be found.
|
||||
"""
|
||||
if runtime is None:
|
||||
raise SandboxRuntimeError("Tool runtime not available")
|
||||
if runtime.state is None:
|
||||
raise SandboxRuntimeError("Tool runtime state not available")
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is None:
|
||||
raise SandboxRuntimeError("Sandbox state not initialized in runtime")
|
||||
sandbox_id = sandbox_state.get("sandbox_id")
|
||||
if sandbox_id is None:
|
||||
raise SandboxRuntimeError("Sandbox ID not found in state")
|
||||
sandbox = get_sandbox_provider().get(sandbox_id)
|
||||
if sandbox is None:
|
||||
raise SandboxNotFoundError(f"Sandbox with ID '{sandbox_id}' not found", sandbox_id=sandbox_id)
|
||||
|
||||
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for downstream use
|
||||
return sandbox
|
||||
|
||||
|
||||
def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | None = None) -> Sandbox:
|
||||
"""Ensure sandbox is initialized, acquiring lazily if needed.
|
||||
|
||||
On first call, acquires a sandbox from the provider and stores it in runtime state.
|
||||
Subsequent calls return the existing sandbox.
|
||||
|
||||
Thread-safety is guaranteed by the provider's internal locking mechanism.
|
||||
|
||||
Args:
|
||||
runtime: Tool runtime containing state and context.
|
||||
|
||||
Returns:
|
||||
Initialized sandbox instance.
|
||||
|
||||
Raises:
|
||||
SandboxRuntimeError: If runtime is not available or thread_id is missing.
|
||||
SandboxNotFoundError: If sandbox acquisition fails.
|
||||
"""
|
||||
if runtime is None:
|
||||
raise SandboxRuntimeError("Tool runtime not available")
|
||||
|
||||
if runtime.state is None:
|
||||
raise SandboxRuntimeError("Tool runtime state not available")
|
||||
|
||||
# Check if sandbox already exists in state
|
||||
sandbox_state = runtime.state.get("sandbox")
|
||||
if sandbox_state is not None:
|
||||
sandbox_id = sandbox_state.get("sandbox_id")
|
||||
if sandbox_id is not None:
|
||||
sandbox = get_sandbox_provider().get(sandbox_id)
|
||||
if sandbox is not None:
|
||||
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
|
||||
return sandbox
|
||||
# Sandbox was released, fall through to acquire new one
|
||||
|
||||
# Lazy acquisition: get thread_id and acquire sandbox
|
||||
thread_id = runtime.context.get("thread_id")
|
||||
if thread_id is None:
|
||||
raise SandboxRuntimeError("Thread ID not available in runtime context")
|
||||
|
||||
provider = get_sandbox_provider()
|
||||
sandbox_id = provider.acquire(thread_id)
|
||||
|
||||
# Update runtime state - this persists across tool calls
|
||||
runtime.state["sandbox"] = {"sandbox_id": sandbox_id}
|
||||
|
||||
# Retrieve and return the sandbox
|
||||
sandbox = provider.get(sandbox_id)
|
||||
if sandbox is None:
|
||||
raise SandboxNotFoundError("Sandbox not found after acquisition", sandbox_id=sandbox_id)
|
||||
|
||||
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
|
||||
return sandbox
|
||||
|
||||
|
||||
def ensure_thread_directories_exist(runtime: ToolRuntime[ContextT, ThreadState] | None) -> None:
|
||||
"""Ensure thread data directories (workspace, uploads, outputs) exist.
|
||||
|
||||
This function is called lazily when any sandbox tool is first used.
|
||||
For local sandbox, it creates the directories on the filesystem.
|
||||
For other sandboxes (like aio), directories are already mounted in the container.
|
||||
|
||||
Args:
|
||||
runtime: Tool runtime containing state and context.
|
||||
"""
|
||||
if runtime is None:
|
||||
return
|
||||
|
||||
# Only create directories for local sandbox
|
||||
if not is_local_sandbox(runtime):
|
||||
return
|
||||
|
||||
thread_data = get_thread_data(runtime)
|
||||
if thread_data is None:
|
||||
return
|
||||
|
||||
# Check if directories have already been created
|
||||
if runtime.state.get("thread_directories_created"):
|
||||
return
|
||||
|
||||
# Create the three directories
|
||||
import os
|
||||
|
||||
for key in ["workspace_path", "uploads_path", "outputs_path"]:
|
||||
path = thread_data.get(key)
|
||||
if path:
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
# Mark as created to avoid redundant operations
|
||||
runtime.state["thread_directories_created"] = True
|
||||
|
||||
|
||||
@tool("bash", parse_docstring=True)
|
||||
def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, command: str) -> str:
|
||||
"""Execute a bash command in a Linux environment.
|
||||
|
||||
|
||||
- Use `python` to run Python code.
|
||||
- Prefer a thread-local virtual environment in `/mnt/user-data/workspace/.venv`.
|
||||
- Use `python -m pip` (inside the virtual environment) to install Python packages.
|
||||
|
||||
Args:
|
||||
description: Explain why you are running this command in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||
command: The bash command to execute. Always use absolute paths for files and directories.
|
||||
"""
|
||||
try:
|
||||
sandbox = ensure_sandbox_initialized(runtime)
|
||||
ensure_thread_directories_exist(runtime)
|
||||
thread_data = get_thread_data(runtime)
|
||||
if is_local_sandbox(runtime):
|
||||
validate_local_bash_command_paths(command, thread_data)
|
||||
command = replace_virtual_paths_in_command(command, thread_data)
|
||||
output = sandbox.execute_command(command)
|
||||
return mask_local_paths_in_output(output, thread_data)
|
||||
return sandbox.execute_command(command)
|
||||
except SandboxError as e:
|
||||
return f"Error: {e}"
|
||||
except PermissionError as e:
|
||||
return f"Error: {e}"
|
||||
except Exception as e:
|
||||
return f"Error: Unexpected error executing command: {type(e).__name__}: {e}"
|
||||
|
||||
|
||||
@tool("ls", parse_docstring=True)
|
||||
def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: str) -> str:
|
||||
"""List the contents of a directory up to 2 levels deep in tree format.
|
||||
|
||||
Args:
|
||||
description: Explain why you are listing this directory in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||
path: The **absolute** path to the directory to list.
|
||||
"""
|
||||
try:
|
||||
sandbox = ensure_sandbox_initialized(runtime)
|
||||
ensure_thread_directories_exist(runtime)
|
||||
requested_path = path
|
||||
if is_local_sandbox(runtime):
|
||||
thread_data = get_thread_data(runtime)
|
||||
path = resolve_local_tool_path(path, thread_data)
|
||||
children = sandbox.list_dir(path)
|
||||
if not children:
|
||||
return "(empty)"
|
||||
return "\n".join(children)
|
||||
except SandboxError as e:
|
||||
return f"Error: {e}"
|
||||
except FileNotFoundError:
|
||||
return f"Error: Directory not found: {requested_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied: {requested_path}"
|
||||
except Exception as e:
|
||||
return f"Error: Unexpected error listing directory: {type(e).__name__}: {e}"
|
||||
|
||||
|
||||
@tool("read_file", parse_docstring=True)
|
||||
def read_file_tool(
|
||||
runtime: ToolRuntime[ContextT, ThreadState],
|
||||
description: str,
|
||||
path: str,
|
||||
start_line: int | None = None,
|
||||
end_line: int | None = None,
|
||||
) -> str:
|
||||
"""Read the contents of a text file. Use this to examine source code, configuration files, logs, or any text-based file.
|
||||
|
||||
Args:
|
||||
description: Explain why you are reading this file in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||
path: The **absolute** path to the file to read.
|
||||
start_line: Optional starting line number (1-indexed, inclusive). Use with end_line to read a specific range.
|
||||
end_line: Optional ending line number (1-indexed, inclusive). Use with start_line to read a specific range.
|
||||
"""
|
||||
try:
|
||||
sandbox = ensure_sandbox_initialized(runtime)
|
||||
ensure_thread_directories_exist(runtime)
|
||||
requested_path = path
|
||||
if is_local_sandbox(runtime):
|
||||
thread_data = get_thread_data(runtime)
|
||||
path = resolve_local_tool_path(path, thread_data)
|
||||
content = sandbox.read_file(path)
|
||||
if not content:
|
||||
return "(empty)"
|
||||
if start_line is not None and end_line is not None:
|
||||
content = "\n".join(content.splitlines()[start_line - 1 : end_line])
|
||||
return content
|
||||
except SandboxError as e:
|
||||
return f"Error: {e}"
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found: {requested_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied reading file: {requested_path}"
|
||||
except IsADirectoryError:
|
||||
return f"Error: Path is a directory, not a file: {requested_path}"
|
||||
except Exception as e:
|
||||
return f"Error: Unexpected error reading file: {type(e).__name__}: {e}"
|
||||
|
||||
|
||||
@tool("write_file", parse_docstring=True)
|
||||
def write_file_tool(
|
||||
runtime: ToolRuntime[ContextT, ThreadState],
|
||||
description: str,
|
||||
path: str,
|
||||
content: str,
|
||||
append: bool = False,
|
||||
) -> str:
|
||||
"""Write text content to a file.
|
||||
|
||||
Args:
|
||||
description: Explain why you are writing to this file in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||
path: The **absolute** path to the file to write to. ALWAYS PROVIDE THIS PARAMETER SECOND.
|
||||
content: The content to write to the file. ALWAYS PROVIDE THIS PARAMETER THIRD.
|
||||
"""
|
||||
try:
|
||||
sandbox = ensure_sandbox_initialized(runtime)
|
||||
ensure_thread_directories_exist(runtime)
|
||||
requested_path = path
|
||||
if is_local_sandbox(runtime):
|
||||
thread_data = get_thread_data(runtime)
|
||||
path = resolve_local_tool_path(path, thread_data)
|
||||
sandbox.write_file(path, content, append)
|
||||
return "OK"
|
||||
except SandboxError as e:
|
||||
return f"Error: {e}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied writing to file: {requested_path}"
|
||||
except IsADirectoryError:
|
||||
return f"Error: Path is a directory, not a file: {requested_path}"
|
||||
except OSError as e:
|
||||
return f"Error: Failed to write file '{requested_path}': {e}"
|
||||
except Exception as e:
|
||||
return f"Error: Unexpected error writing file: {type(e).__name__}: {e}"
|
||||
|
||||
|
||||
@tool("str_replace", parse_docstring=True)
|
||||
def str_replace_tool(
|
||||
runtime: ToolRuntime[ContextT, ThreadState],
|
||||
description: str,
|
||||
path: str,
|
||||
old_str: str,
|
||||
new_str: str,
|
||||
replace_all: bool = False,
|
||||
) -> str:
|
||||
"""Replace a substring in a file with another substring.
|
||||
If `replace_all` is False (default), the substring to replace must appear **exactly once** in the file.
|
||||
|
||||
Args:
|
||||
description: Explain why you are replacing the substring in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
|
||||
path: The **absolute** path to the file to replace the substring in. ALWAYS PROVIDE THIS PARAMETER SECOND.
|
||||
old_str: The substring to replace. ALWAYS PROVIDE THIS PARAMETER THIRD.
|
||||
new_str: The new substring. ALWAYS PROVIDE THIS PARAMETER FOURTH.
|
||||
replace_all: Whether to replace all occurrences of the substring. If False, only the first occurrence will be replaced. Default is False.
|
||||
"""
|
||||
try:
|
||||
sandbox = ensure_sandbox_initialized(runtime)
|
||||
ensure_thread_directories_exist(runtime)
|
||||
requested_path = path
|
||||
if is_local_sandbox(runtime):
|
||||
thread_data = get_thread_data(runtime)
|
||||
path = resolve_local_tool_path(path, thread_data)
|
||||
content = sandbox.read_file(path)
|
||||
if not content:
|
||||
return "OK"
|
||||
if old_str not in content:
|
||||
return f"Error: String to replace not found in file: {requested_path}"
|
||||
if replace_all:
|
||||
content = content.replace(old_str, new_str)
|
||||
else:
|
||||
content = content.replace(old_str, new_str, 1)
|
||||
sandbox.write_file(path, content)
|
||||
return "OK"
|
||||
except SandboxError as e:
|
||||
return f"Error: {e}"
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found: {requested_path}"
|
||||
except PermissionError:
|
||||
return f"Error: Permission denied accessing file: {requested_path}"
|
||||
except Exception as e:
|
||||
return f"Error: Unexpected error replacing string: {type(e).__name__}: {e}"
|
||||
Reference in New Issue
Block a user