chore(docker): Refactor sandbox state management and improve Docker integration (#1068)

* Refactor sandbox state management and improve Docker integration

- Removed FileSandboxStateStore and SandboxStateStore classes for a cleaner architecture.
- Enhanced LocalContainerBackend to handle port allocation retries and introduced environment variable support for sandbox host configuration.
- Updated Paths class to include host_base_dir for Docker volume mounts and ensured proper permissions for sandbox directories.
- Modified ExtensionsConfig to improve error handling when loading configuration files and adjusted environment variable resolution.
- Updated sandbox configuration to include a replicas option for managing concurrent sandbox containers.
- Improved logging and context management in SandboxMiddleware for better sandbox lifecycle handling.
- Enhanced network port allocation logic to bind to 0.0.0.0 for compatibility with Docker.
- Updated Docker Compose files to ensure proper volume management and environment variable configuration.
- Created scripts to ensure necessary configuration files are present before starting services.
- Cleaned up unused MCP server configurations in extensions_config.example.json.

* Address Copilot review suggestions from PR #1068 (#9)

---------

Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
JeffJiang
2026-03-11 10:03:01 +08:00
committed by GitHub
parent 6ae7f0c0ee
commit f836d8e17c
18 changed files with 455 additions and 384 deletions

View File

@@ -133,11 +133,15 @@ class ExtensionsConfig(BaseModel):
# Return empty config if extensions config file is not found
return cls(mcp_servers={}, skills={})
with open(resolved_path, encoding="utf-8") as f:
config_data = json.load(f)
cls.resolve_env_variables(config_data)
return cls.model_validate(config_data)
try:
with open(resolved_path, encoding="utf-8") as f:
config_data = json.load(f)
cls.resolve_env_variables(config_data)
return cls.model_validate(config_data)
except json.JSONDecodeError as e:
raise ValueError(f"Extensions config file at {resolved_path} is not valid JSON: {e}") from e
except Exception as e:
raise RuntimeError(f"Failed to load extensions config from {resolved_path}: {e}") from e
@classmethod
def resolve_env_variables(cls, config: dict[str, Any]) -> dict[str, Any]:
@@ -156,8 +160,12 @@ class ExtensionsConfig(BaseModel):
if value.startswith("$"):
env_value = os.getenv(value[1:])
if env_value is None:
raise ValueError(f"Environment variable {value[1:]} not found for config value {value}")
config[key] = env_value
# Unresolved placeholder — store empty string so downstream
# consumers (e.g. MCP servers) don't receive the literal "$VAR"
# token as an actual environment value.
config[key] = ""
else:
config[key] = env_value
else:
config[key] = value
elif isinstance(value, dict):

View File

@@ -38,6 +38,21 @@ class Paths:
def __init__(self, base_dir: str | Path | None = None) -> None:
self._base_dir = Path(base_dir).resolve() if base_dir is not None else None
@property
def host_base_dir(self) -> Path:
"""Host-visible base dir for Docker volume mount sources.
When running inside Docker with a mounted Docker socket (DooD), the Docker
daemon runs on the host and resolves mount paths against the host filesystem.
Set DEER_FLOW_HOST_BASE_DIR to the host-side path that corresponds to this
container's base_dir so that sandbox container volume mounts work correctly.
Falls back to base_dir when the env var is not set (native/local execution).
"""
if env := os.getenv("DEER_FLOW_HOST_BASE_DIR"):
return Path(env)
return self.base_dir
@property
def base_dir(self) -> Path:
"""Root directory for all application data."""
@@ -124,10 +139,21 @@ class Paths:
return self.thread_dir(thread_id) / "user-data"
def ensure_thread_dirs(self, thread_id: str) -> None:
"""Create all standard sandbox directories for a thread."""
self.sandbox_work_dir(thread_id).mkdir(parents=True, exist_ok=True)
self.sandbox_uploads_dir(thread_id).mkdir(parents=True, exist_ok=True)
self.sandbox_outputs_dir(thread_id).mkdir(parents=True, exist_ok=True)
"""Create all standard sandbox directories for a thread.
Directories are created with mode 0o777 so that sandbox containers
(which may run as a different UID than the host backend process) can
write to the volume-mounted paths without "Permission denied" errors.
The explicit chmod() call is necessary because Path.mkdir(mode=...) is
subject to the process umask and may not yield the intended permissions.
"""
for d in [
self.sandbox_work_dir(thread_id),
self.sandbox_uploads_dir(thread_id),
self.sandbox_outputs_dir(thread_id),
]:
d.mkdir(parents=True, exist_ok=True)
d.chmod(0o777)
def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
"""Resolve a sandbox virtual path to the actual host filesystem path.

View File

@@ -18,8 +18,7 @@ class SandboxConfig(BaseModel):
AioSandboxProvider specific options:
image: Docker image to use (default: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest)
port: Base port for sandbox containers (default: 8080)
base_url: If set, uses existing sandbox instead of starting new container
auto_start: Whether to automatically start Docker container (default: true)
replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
container_prefix: Prefix for container names (default: deer-flow-sandbox)
idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
mounts: List of volume mounts to share directories with the container
@@ -38,13 +37,9 @@ class SandboxConfig(BaseModel):
default=None,
description="Base port for sandbox containers",
)
base_url: str | None = Field(
replicas: int | None = Field(
default=None,
description="If set, uses existing sandbox at this URL instead of starting new container",
)
auto_start: bool | None = Field(
default=None,
description="Whether to automatically start Docker container",
description="Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.",
)
container_prefix: str | None = Field(
default=None,