Adds Kubernetes sandbox provisioner support (#35)

* Adds Kubernetes sandbox provisioner support

* Improves Docker dev setup by standardizing host paths

Replaces hardcoded host paths with a configurable root directory,
making the development environment more portable and easier to use
across different machines. Automatically sets the root path if not
already defined, reducing manual setup steps.
This commit is contained in:
JeffJiang
2026-02-12 11:02:09 +08:00
committed by GitHub
parent e87fd74e17
commit 300e5a519a
36 changed files with 2136 additions and 1286 deletions

View File

@@ -6,11 +6,56 @@
# - frontend: Frontend Next.js dev server (port 3000)
# - gateway: Backend Gateway API (port 8001)
# - langgraph: LangGraph server (port 2024)
# - provisioner: Sandbox provisioner (creates Pods in host Kubernetes)
#
# Prerequisites:
# - Host machine must have a running Kubernetes cluster (Docker Desktop K8s,
# minikube, kind, etc.) with kubectl configured (~/.kube/config).
#
# Access: http://localhost:2026
services:
# Nginx Reverse Proxy
# ── Sandbox Provisioner ────────────────────────────────────────────────
# Manages per-sandbox Pod + Service lifecycle in the host Kubernetes
# cluster via the K8s API.
# Backend accesses sandboxes directly via host.docker.internal:{NodePort}.
provisioner:
build:
context: ./provisioner
dockerfile: Dockerfile
container_name: deer-flow-provisioner
volumes:
- ~/.kube/config:/root/.kube/config:ro
environment:
- K8S_NAMESPACE=deer-flow
- SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
# Host paths for K8s HostPath volumes (must be absolute paths accessible by K8s node)
# On Docker Desktop/OrbStack, use your actual host paths like /Users/username/...
# Set these in your shell before running docker-compose:
# export DEER_FLOW_ROOT=/absolute/path/to/deer-flow
- SKILLS_HOST_PATH=${DEER_FLOW_ROOT}/skills
- THREADS_HOST_PATH=${DEER_FLOW_ROOT}/backend/.deer-flow/threads
- KUBECONFIG_PATH=/root/.kube/config
- NODE_HOST=host.docker.internal
# Override K8S API server URL since kubeconfig uses 127.0.0.1
# which is unreachable from inside the container
- K8S_API_SERVER=https://host.docker.internal:26443
env_file:
- ../.env
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
- deer-flow-dev
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
interval: 10s
timeout: 5s
retries: 6
start_period: 15s
# ── Reverse Proxy ──────────────────────────────────────────────────────
# Routes API traffic to gateway, langgraph, and provisioner services.
nginx:
image: nginx:alpine
container_name: deer-flow-nginx
@@ -22,6 +67,7 @@ services:
- frontend
- gateway
- langgraph
- provisioner
networks:
- deer-flow-dev
restart: unless-stopped
@@ -58,6 +104,8 @@ services:
build:
context: ../
dockerfile: backend/Dockerfile
cache_from:
- type=local,src=/tmp/docker-cache-gateway
container_name: deer-flow-gateway
command: sh -c "cd backend && uv run uvicorn src.gateway.app:app --host 0.0.0.0 --port 8001 --reload --reload-include='*.yaml .env' > /app/logs/gateway.log 2>&1"
volumes:
@@ -66,11 +114,14 @@ services:
- ../config.yaml:/app/config.yaml
- ../skills:/app/skills
- ../logs:/app/logs
- ../backend/.deer-flow:/app/backend/.deer-flow
# Mount uv cache for faster dependency installation
- ~/.cache/uv:/root/.cache/uv
working_dir: /app
environment:
- CI=true
env_file:
- ../backend/.env
- ../.env
extra_hosts:
# For Linux: map host.docker.internal to host gateway
- "host.docker.internal:host-gateway"
@@ -83,6 +134,8 @@ services:
build:
context: ../
dockerfile: backend/Dockerfile
cache_from:
- type=local,src=/tmp/docker-cache-langgraph
container_name: deer-flow-langgraph
command: sh -c "cd backend && uv run langgraph dev --no-browser --allow-blocking --host 0.0.0.0 --port 2024 > /app/logs/langgraph.log 2>&1"
volumes:
@@ -91,15 +144,23 @@ services:
- ../config.yaml:/app/config.yaml
- ../skills:/app/skills
- ../logs:/app/logs
- ../backend/.deer-flow:/app/backend/.deer-flow
# Mount uv cache for faster dependency installation
- ~/.cache/uv:/root/.cache/uv
working_dir: /app
environment:
- CI=true
env_file:
- ../backend/.env
- ../.env
networks:
- deer-flow-dev
restart: unless-stopped
volumes: {}
networks:
deer-flow-dev:
driver: bridge
ipam:
config:
- subnet: 192.168.200.0/24

View File

@@ -1,427 +0,0 @@
# Kubernetes Sandbox Setup
This guide explains how to deploy and configure the DeerFlow sandbox execution environment on Kubernetes.
## Overview
The Kubernetes sandbox deployment allows you to run DeerFlow's code execution sandbox in a Kubernetes cluster, providing:
- **Isolated Execution**: Sandbox runs in dedicated Kubernetes pods
- **Scalability**: Easy horizontal scaling with replica configuration
- **Cluster Integration**: Seamless integration with existing Kubernetes infrastructure
- **Persistent Skills**: Skills directory mounted from host or PersistentVolume
## Prerequisites
Before you begin, ensure you have:
1. **Kubernetes Cluster**: One of the following:
- Docker Desktop with Kubernetes enabled
- OrbStack with Kubernetes enabled
- Minikube
- Any production Kubernetes cluster
2. **kubectl**: Kubernetes command-line tool
```bash
# macOS
brew install kubectl
# Linux
# See: https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/
```
3. **Docker**: For pulling the sandbox image (optional, but recommended)
```bash
# Verify installation
docker version
```
## Quick Start
### 1. Enable Kubernetes
**Docker Desktop:**
```
Settings → Kubernetes → Enable Kubernetes → Apply & Restart
```
**OrbStack:**
```
Settings → Enable Kubernetes
```
**Minikube:**
```bash
minikube start
```
### 2. Run Setup Script
The easiest way to get started:
```bash
cd docker/k8s
./setup.sh
```
This will:
- ✅ Check kubectl installation and cluster connectivity
- ✅ Pull the sandbox Docker image (optional, can be skipped)
- ✅ Create the `deer-flow` namespace
- ✅ Deploy the sandbox service and deployment
- ✅ Verify the deployment is running
### 3. Configure Backend
Add the following to `backend/config.yaml`:
```yaml
sandbox:
use: src.community.aio_sandbox:AioSandboxProvider
base_url: http://deer-flow-sandbox.deer-flow.svc.cluster.local:8080
```
### 4. Verify Deployment
Check that the sandbox pod is running:
```bash
kubectl get pods -n deer-flow
```
You should see:
```
NAME READY STATUS RESTARTS AGE
deer-flow-sandbox-xxxxxxxxxx-xxxxx 1/1 Running 0 1m
```
## Advanced Configuration
### Custom Skills Path
By default, the setup script uses `PROJECT_ROOT/skills`. You can specify a custom path:
**Using command-line argument:**
```bash
./setup.sh --skills-path /custom/path/to/skills
```
**Using environment variable:**
```bash
SKILLS_PATH=/custom/path/to/skills ./setup.sh
```
### Custom Sandbox Image
To use a different sandbox image:
**Using command-line argument:**
```bash
./setup.sh --image your-registry/sandbox:tag
```
**Using environment variable:**
```bash
SANDBOX_IMAGE=your-registry/sandbox:tag ./setup.sh
```
### Skip Image Pull
If you already have the image locally or want to pull it manually later:
```bash
./setup.sh --skip-pull
```
### Combined Options
```bash
./setup.sh --skip-pull --skills-path /custom/skills --image custom/sandbox:latest
```
## Manual Deployment
If you prefer manual deployment or need more control:
### 1. Create Namespace
```bash
kubectl apply -f namespace.yaml
```
### 2. Create Service
```bash
kubectl apply -f sandbox-service.yaml
```
### 3. Deploy Sandbox
First, update the skills path in `sandbox-deployment.yaml`:
```bash
# Replace __SKILLS_PATH__ with your actual path
sed 's|__SKILLS_PATH__|/Users/feng/Projects/deer-flow/skills|g' \
sandbox-deployment.yaml | kubectl apply -f -
```
Or manually edit `sandbox-deployment.yaml` and replace `__SKILLS_PATH__` with your skills directory path.
### 4. Verify Deployment
```bash
# Check all resources
kubectl get all -n deer-flow
# Check pod status
kubectl get pods -n deer-flow
# Check pod logs
kubectl logs -n deer-flow -l app=deer-flow-sandbox
# Describe pod for detailed info
kubectl describe pod -n deer-flow -l app=deer-flow-sandbox
```
## Configuration Options
### Resource Limits
Edit `sandbox-deployment.yaml` to adjust resource limits:
```yaml
resources:
requests:
cpu: 100m # Minimum CPU
memory: 256Mi # Minimum memory
limits:
cpu: 1000m # Maximum CPU (1 core)
memory: 1Gi # Maximum memory
```
### Scaling
Adjust the number of replicas:
```yaml
spec:
replicas: 3 # Run 3 sandbox pods
```
Or scale dynamically:
```bash
kubectl scale deployment deer-flow-sandbox -n deer-flow --replicas=3
```
### Health Checks
The deployment includes readiness and liveness probes:
- **Readiness Probe**: Checks if the pod is ready to serve traffic
- **Liveness Probe**: Restarts the pod if it becomes unhealthy
Configure in `sandbox-deployment.yaml`:
```yaml
readinessProbe:
httpGet:
path: /v1/sandbox
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
```
## Troubleshooting
### Pod Not Starting
Check pod status and events:
```bash
kubectl describe pod -n deer-flow -l app=deer-flow-sandbox
```
Common issues:
- **ImagePullBackOff**: Docker image cannot be pulled
- Solution: Pre-pull image with `docker pull <image>`
- **Skills path not found**: HostPath doesn't exist
- Solution: Verify the skills path exists on the host
- **Resource constraints**: Not enough CPU/memory
- Solution: Adjust resource requests/limits
### Service Not Accessible
Verify the service is running:
```bash
kubectl get service -n deer-flow
kubectl describe service deer-flow-sandbox -n deer-flow
```
Test connectivity from another pod:
```bash
kubectl run test-pod -n deer-flow --rm -it --image=curlimages/curl -- \
curl http://deer-flow-sandbox.deer-flow.svc.cluster.local:8080/v1/sandbox
```
### Check Logs
View sandbox logs:
```bash
# Follow logs in real-time
kubectl logs -n deer-flow -l app=deer-flow-sandbox -f
# View logs from previous container (if crashed)
kubectl logs -n deer-flow -l app=deer-flow-sandbox --previous
```
### Health Check Failures
If pods show as not ready:
```bash
# Check readiness probe
kubectl get events -n deer-flow --sort-by='.lastTimestamp'
# Exec into pod to debug
kubectl exec -it -n deer-flow <pod-name> -- /bin/sh
```
## Cleanup
### Remove All Resources
Using the setup script:
```bash
./setup.sh --cleanup
```
Or manually:
```bash
kubectl delete -f sandbox-deployment.yaml
kubectl delete -f sandbox-service.yaml
kubectl delete namespace deer-flow
```
### Remove Specific Resources
```bash
# Delete only the deployment (keeps namespace and service)
kubectl delete deployment deer-flow-sandbox -n deer-flow
# Delete pods (they will be recreated by deployment)
kubectl delete pods -n deer-flow -l app=deer-flow-sandbox
```
## Architecture
```
┌─────────────────────────────────────────────┐
│ DeerFlow Backend │
│ (config.yaml: base_url configured) │
└────────────────┬────────────────────────────┘
│ HTTP requests
┌─────────────────────────────────────────────┐
│ Kubernetes Service (ClusterIP) │
│ deer-flow-sandbox.deer-flow.svc:8080 │
└────────────────┬────────────────────────────┘
│ Load balancing
┌─────────────────────────────────────────────┐
│ Sandbox Pods (replicas) │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
│ │ Port 8080│ │ Port 8080│ │ Port 8080│ │
│ └──────────┘ └──────────┘ └──────────┘ │
└────────────────┬────────────────────────────┘
│ Volume mount
┌─────────────────────────────────────────────┐
│ Host Skills Directory │
│ /path/to/deer-flow/skills │
└─────────────────────────────────────────────┘
```
## Setup Script Reference
### Command-Line Options
```bash
./setup.sh [options]
Options:
-h, --help Show help message
-c, --cleanup Remove all Kubernetes resources
-p, --skip-pull Skip pulling sandbox image
--image <image> Use custom sandbox image
--skills-path <path> Custom skills directory path
Environment Variables:
SANDBOX_IMAGE Custom sandbox image
SKILLS_PATH Custom skills path
Examples:
./setup.sh # Use default settings
./setup.sh --skills-path /custom/path # Use custom skills path
./setup.sh --skip-pull --image custom:tag # Custom image, skip pull
SKILLS_PATH=/custom/path ./setup.sh # Use env variable
```
## Production Considerations
### Security
1. **Network Policies**: Restrict pod-to-pod communication
2. **RBAC**: Configure appropriate service account permissions
3. **Pod Security**: Enable pod security standards
4. **Image Security**: Scan images for vulnerabilities
### High Availability
1. **Multiple Replicas**: Run at least 3 replicas
2. **Pod Disruption Budget**: Prevent all pods from being evicted
3. **Node Affinity**: Distribute pods across nodes
4. **Resource Quotas**: Set namespace resource limits
### Monitoring
1. **Prometheus**: Scrape metrics from pods
2. **Logging**: Centralized log aggregation
3. **Alerting**: Set up alerts for pod failures
4. **Tracing**: Distributed tracing for requests
### Storage
For production, consider using PersistentVolume instead of hostPath:
1. **Create PersistentVolume**: Define storage backend
2. **Create PersistentVolumeClaim**: Request storage
3. **Update Deployment**: Use PVC instead of hostPath
See `skills-pv-pvc.yaml.bak` for reference implementation.
## Next Steps
After successful deployment:
1. **Start Backend**: `make dev` or `make docker-start`
2. **Test Sandbox**: Create a conversation and execute code
3. **Monitor**: Watch pod logs and resource usage
4. **Scale**: Adjust replicas based on workload
## Support
For issues and questions:
- Check troubleshooting section above
- Review pod logs: `kubectl logs -n deer-flow -l app=deer-flow-sandbox`
- See main project documentation: [../../README.md](../../README.md)
- Report issues on GitHub

View File

@@ -1,7 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: deer-flow
labels:
app.kubernetes.io/name: deer-flow
app.kubernetes.io/component: sandbox

View File

@@ -1,65 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: deer-flow-sandbox
namespace: deer-flow
labels:
app.kubernetes.io/name: deer-flow
app.kubernetes.io/component: sandbox
spec:
replicas: 1
selector:
matchLabels:
app: deer-flow-sandbox
template:
metadata:
labels:
app: deer-flow-sandbox
app.kubernetes.io/name: deer-flow
app.kubernetes.io/component: sandbox
spec:
containers:
- name: sandbox
image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
ports:
- name: http
containerPort: 8080
protocol: TCP
readinessProbe:
httpGet:
path: /v1/sandbox
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
livenessProbe:
httpGet:
path: /v1/sandbox
port: 8080
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 3
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 1Gi
volumeMounts:
- name: skills
mountPath: /mnt/skills
readOnly: true
securityContext:
privileged: false
allowPrivilegeEscalation: true
volumes:
- name: skills
hostPath:
# Path to skills directory on the host machine
# This will be replaced by setup.sh with the actual path
path: __SKILLS_PATH__
type: Directory
restartPolicy: Always

View File

@@ -1,21 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: deer-flow-sandbox
namespace: deer-flow
labels:
app.kubernetes.io/name: deer-flow
app.kubernetes.io/component: sandbox
spec:
type: ClusterIP
clusterIP: None # Headless service for direct Pod DNS access
ports:
- name: http
port: 8080
targetPort: 8080
protocol: TCP
selector:
app: deer-flow-sandbox
# Enable DNS-based service discovery
# Pods will be accessible at: {pod-name}.deer-flow-sandbox.deer-flow.svc.cluster.local:8080
publishNotReadyAddresses: false

View File

@@ -1,245 +0,0 @@
#!/bin/bash
# Kubernetes Sandbox Initialization Script for Deer-Flow
# This script sets up the Kubernetes environment for the sandbox provider
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
# Default sandbox image
DEFAULT_SANDBOX_IMAGE="enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${BLUE}╔════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Deer-Flow Kubernetes Sandbox Setup ║${NC}"
echo -e "${BLUE}╚════════════════════════════════════════════╝${NC}"
echo
# Function to print status messages
info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Check if kubectl is installed
check_kubectl() {
info "Checking kubectl installation..."
if ! command -v kubectl &> /dev/null; then
error "kubectl is not installed. Please install kubectl first."
echo " - macOS: brew install kubectl"
echo " - Linux: https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/"
exit 1
fi
success "kubectl is installed"
}
# Check if Kubernetes cluster is accessible
check_cluster() {
info "Checking Kubernetes cluster connection..."
if ! kubectl cluster-info &> /dev/null; then
error "Cannot connect to Kubernetes cluster."
echo "Please ensure:"
echo " - Docker Desktop: Settings → Kubernetes → Enable Kubernetes"
echo " - Or OrbStack: Enable Kubernetes in settings"
echo " - Or Minikube: minikube start"
exit 1
fi
success "Connected to Kubernetes cluster"
}
# Apply Kubernetes resources
apply_resources() {
info "Applying Kubernetes resources..."
# Determine skills path
SKILLS_PATH="${SKILLS_PATH:-${PROJECT_ROOT}/skills}"
info "Using skills path: ${SKILLS_PATH}"
# Validate skills path exists
if [[ ! -d "${SKILLS_PATH}" ]]; then
warn "Skills path does not exist: ${SKILLS_PATH}"
warn "Creating directory..."
mkdir -p "${SKILLS_PATH}"
fi
echo " → Creating namespace..."
kubectl apply -f "${SCRIPT_DIR}/namespace.yaml"
echo " → Creating sandbox service..."
kubectl apply -f "${SCRIPT_DIR}/sandbox-service.yaml"
echo " → Creating sandbox deployment with skills path: ${SKILLS_PATH}"
# Replace __SKILLS_PATH__ placeholder with actual path
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS
sed "s|__SKILLS_PATH__|${SKILLS_PATH}|g" "${SCRIPT_DIR}/sandbox-deployment.yaml" | kubectl apply -f -
else
# Linux
sed "s|__SKILLS_PATH__|${SKILLS_PATH}|g" "${SCRIPT_DIR}/sandbox-deployment.yaml" | kubectl apply -f -
fi
success "All Kubernetes resources applied"
}
# Verify deployment
verify_deployment() {
info "Verifying deployment..."
echo " → Checking namespace..."
kubectl get namespace deer-flow
echo " → Checking service..."
kubectl get service -n deer-flow
echo " → Checking deployment..."
kubectl get deployment -n deer-flow
echo " → Checking pods..."
kubectl get pods -n deer-flow
success "Deployment verified"
}
# Pull sandbox image
pull_image() {
info "Checking sandbox image..."
IMAGE="${SANDBOX_IMAGE:-$DEFAULT_SANDBOX_IMAGE}"
# Check if image already exists locally
if docker image inspect "$IMAGE" &> /dev/null; then
success "Image already exists locally: $IMAGE"
return 0
fi
info "Pulling sandbox image (this may take a few minutes on first run)..."
echo " → Image: $IMAGE"
echo
if docker pull "$IMAGE"; then
success "Image pulled successfully"
else
warn "Failed to pull image. Pod startup may be slow on first run."
echo " You can manually pull the image later with:"
echo " docker pull $IMAGE"
fi
}
# Print next steps
print_next_steps() {
echo
echo -e "${BLUE}╔════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ Setup Complete! ║${NC}"
echo -e "${BLUE}╚════════════════════════════════════════════╝${NC}"
echo
echo -e "${YELLOW}To enable Kubernetes sandbox, add the following to backend/config.yaml:${NC}"
echo
echo -e "${GREEN}sandbox:${NC}"
echo -e "${GREEN} use: src.community.aio_sandbox:AioSandboxProvider${NC}"
echo -e "${GREEN} base_url: http://deer-flow-sandbox.deer-flow.svc.cluster.local:8080${NC}"
echo
echo
echo -e "${GREEN}Next steps:${NC}"
echo " make dev # Start backend and frontend in development mode"
echo " make docker-start # Start backend and frontend in Docker containers"
echo
}
# Cleanup function
cleanup() {
if [[ "$1" == "--cleanup" ]] || [[ "$1" == "-c" ]]; then
info "Cleaning up Kubernetes resources..."
kubectl delete -f "${SCRIPT_DIR}/sandbox-deployment.yaml" --ignore-not-found=true
kubectl delete -f "${SCRIPT_DIR}/sandbox-service.yaml" --ignore-not-found=true
kubectl delete -f "${SCRIPT_DIR}/namespace.yaml" --ignore-not-found=true
success "Cleanup complete"
exit 0
fi
}
# Show help
show_help() {
echo "Usage: $0 [options]"
echo
echo "Options:"
echo " -h, --help Show this help message"
echo " -c, --cleanup Remove all Kubernetes resources"
echo " -p, --skip-pull Skip pulling sandbox image"
echo " --image <image> Use custom sandbox image"
echo " --skills-path <path> Custom skills directory path"
echo
echo "Environment variables:"
echo " SANDBOX_IMAGE Custom sandbox image (default: $DEFAULT_SANDBOX_IMAGE)"
echo " SKILLS_PATH Custom skills path (default: PROJECT_ROOT/skills)"
echo
echo "Examples:"
echo " $0 # Use default settings"
echo " $0 --skills-path /custom/path # Use custom skills path"
echo " SKILLS_PATH=/custom/path $0 # Use env variable"
echo
exit 0
}
# Parse arguments
SKIP_PULL=false
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_help
;;
-c|--cleanup)
cleanup "$1"
;;
-p|--skip-pull)
SKIP_PULL=true
shift
;;
--image)
SANDBOX_IMAGE="$2"
shift 2
;;
--skills-path)
SKILLS_PATH="$2"
shift 2
;;
*)
shift
;;
esac
done
# Main execution
main() {
check_kubectl
check_cluster
# Pull image first to avoid Pod startup timeout
if [[ "$SKIP_PULL" == false ]]; then
pull_image
fi
apply_resources
verify_deployment
print_next_steps
}
main

View File

@@ -14,6 +14,9 @@ http {
access_log /dev/stdout;
error_log /dev/stderr;
# Docker internal DNS (for resolving k3s hostname)
resolver 127.0.0.11 valid=10s ipv6=off;
# Upstream servers (using Docker service names)
upstream gateway {
server gateway:8001;
@@ -27,9 +30,14 @@ http {
server frontend:3000;
}
upstream provisioner {
server provisioner:8002;
}
# ── Main server (path-based routing) ─────────────────────────────────
server {
listen 2026;
listen [::]:2026;
listen 2026 default_server;
listen [::]:2026 default_server;
server_name _;
# Hide CORS headers from upstream to prevent duplicates
@@ -180,6 +188,16 @@ http {
proxy_set_header X-Forwarded-Proto $scheme;
}
# ── Provisioner API (sandbox management) ────────────────────────
location /api/sandboxes {
proxy_pass http://provisioner;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# All other requests go to frontend
location / {
proxy_pass http://frontend;

View File

@@ -0,0 +1,19 @@
FROM python:3.12-slim
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
RUN pip install --no-cache-dir \
fastapi \
"uvicorn[standard]" \
kubernetes
WORKDIR /app
COPY app.py .
EXPOSE 8002
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8002"]

View File

@@ -0,0 +1,318 @@
# DeerFlow Sandbox Provisioner
The **Sandbox Provisioner** is a FastAPI service that dynamically manages sandbox Pods in Kubernetes. It provides a REST API for the DeerFlow backend to create, monitor, and destroy isolated sandbox environments for code execution.
## Architecture
```
┌────────────┐ HTTP ┌─────────────┐ K8s API ┌──────────────┐
│ Backend │ ─────▸ │ Provisioner │ ────────▸ │ Host K8s │
│ (gateway/ │ │ :8002 │ │ API Server │
│ langgraph) │ └─────────────┘ └──────┬───────┘
└────────────┘ │ creates
┌─────────────┐ ┌────▼─────┐
│ Backend │ ──────▸ │ Sandbox │
│ (via Docker │ NodePort│ Pod(s) │
│ network) │ └──────────┘
└─────────────┘
```
### How It Works
1. **Backend Request**: When the backend needs to execute code, it sends a `POST /api/sandboxes` request with a `sandbox_id` and `thread_id`.
2. **Pod Creation**: The provisioner creates a dedicated Pod in the `deer-flow` namespace with:
- The sandbox container image (all-in-one-sandbox)
- HostPath volumes mounted for:
- `/mnt/skills` → Read-only access to public skills
- `/mnt/user-data` → Read-write access to thread-specific data
- Resource limits (CPU, memory, ephemeral storage)
- Readiness/liveness probes
3. **Service Creation**: A NodePort Service is created to expose the Pod, with Kubernetes auto-allocating a port from the NodePort range (typically 30000-32767).
4. **Access URL**: The provisioner returns `http://host.docker.internal:{NodePort}` to the backend, which the backend containers can reach directly.
5. **Cleanup**: When the session ends, `DELETE /api/sandboxes/{sandbox_id}` removes both the Pod and Service.
## Requirements
Host machine with a running Kubernetes cluster (Docker Desktop K8s, OrbStack, minikube, kind, etc.)
### Enable Kubernetes in Docker Desktop
1. Open Docker Desktop settings
2. Go to "Kubernetes" tab
3. Check "Enable Kubernetes"
4. Click "Apply & Restart"
### Enable Kubernetes in OrbStack
1. Open OrbStack settings
2. Go to "Kubernetes" tab
3. Check "Enable Kubernetes"
## API Endpoints
### `GET /health`
Health check endpoint.
**Response**:
```json
{
"status": "ok"
}
```
### `POST /api/sandboxes`
Create a new sandbox Pod + Service.
**Request**:
```json
{
"sandbox_id": "abc-123",
"thread_id": "thread-456"
}
```
**Response**:
```json
{
"sandbox_id": "abc-123",
"sandbox_url": "http://host.docker.internal:32123",
"status": "Pending"
}
```
**Idempotent**: Calling with the same `sandbox_id` returns the existing sandbox info.
### `GET /api/sandboxes/{sandbox_id}`
Get status and URL of a specific sandbox.
**Response**:
```json
{
"sandbox_id": "abc-123",
"sandbox_url": "http://host.docker.internal:32123",
"status": "Running"
}
```
**Status Values**: `Pending`, `Running`, `Succeeded`, `Failed`, `Unknown`, `NotFound`
### `DELETE /api/sandboxes/{sandbox_id}`
Destroy a sandbox Pod + Service.
**Response**:
```json
{
"ok": true,
"sandbox_id": "abc-123"
}
```
### `GET /api/sandboxes`
List all sandboxes currently managed.
**Response**:
```json
{
"sandboxes": [
{
"sandbox_id": "abc-123",
"sandbox_url": "http://host.docker.internal:32123",
"status": "Running"
}
],
"count": 1
}
```
## Configuration
The provisioner is configured via environment variables (set in [docker-compose-dev.yaml](../docker-compose-dev.yaml)):
| Variable | Default | Description |
|----------|---------|-------------|
| `K8S_NAMESPACE` | `deer-flow` | Kubernetes namespace for sandbox resources |
| `SANDBOX_IMAGE` | `enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest` | Container image for sandbox Pods |
| `SKILLS_HOST_PATH` | - | **Host machine** path to skills directory (must be absolute) |
| `THREADS_HOST_PATH` | - | **Host machine** path to threads data directory (must be absolute) |
| `KUBECONFIG_PATH` | `/root/.kube/config` | Path to kubeconfig **inside** the provisioner container |
| `NODE_HOST` | `host.docker.internal` | Hostname that backend containers use to reach host NodePorts |
| `K8S_API_SERVER` | (from kubeconfig) | Override K8s API server URL (e.g., `https://host.docker.internal:26443`) |
### Important: K8S_API_SERVER Override
If your kubeconfig uses `localhost`, `127.0.0.1`, or `0.0.0.0` as the API server address (common with OrbStack, minikube, kind), the provisioner **cannot** reach it from inside the Docker container.
**Solution**: Set `K8S_API_SERVER` to use `host.docker.internal`:
```yaml
# docker-compose-dev.yaml
provisioner:
environment:
- K8S_API_SERVER=https://host.docker.internal:26443 # Replace 26443 with your API port
```
Check your kubeconfig API server:
```bash
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
```
## Prerequisites
### Host Machine Requirements
1. **Kubernetes Cluster**:
- Docker Desktop with Kubernetes enabled, or
- OrbStack (built-in K8s), or
- minikube, kind, k3s, etc.
2. **kubectl Configured**:
- `~/.kube/config` must exist and be valid
- Current context should point to your local cluster
3. **Kubernetes Access**:
- The provisioner needs permissions to:
- Create/read/delete Pods in the `deer-flow` namespace
- Create/read/delete Services in the `deer-flow` namespace
- Read Namespaces (to create `deer-flow` if missing)
4. **Host Paths**:
- The `SKILLS_HOST_PATH` and `THREADS_HOST_PATH` must be **absolute paths on the host machine**
- These paths are mounted into sandbox Pods via K8s HostPath volumes
- The paths must exist and be readable by the K8s node
### Docker Compose Setup
The provisioner runs as part of the docker-compose-dev stack:
```bash
# Start all services including provisioner
make docker-start
# Or start just the provisioner
docker compose -p deer-flow-dev -f docker/docker-compose-dev.yaml up -d provisioner
```
The compose file:
- Mounts your host's `~/.kube/config` into the container
- Adds `extra_hosts` entry for `host.docker.internal` (required on Linux)
- Configures environment variables for K8s access
## Testing
### Manual API Testing
```bash
# Health check
curl http://localhost:8002/health
# Create a sandbox (via provisioner container for internal DNS)
docker exec deer-flow-provisioner curl -X POST http://localhost:8002/api/sandboxes \
-H "Content-Type: application/json" \
-d '{"sandbox_id":"test-001","thread_id":"thread-001"}'
# Check sandbox status
docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes/test-001
# List all sandboxes
docker exec deer-flow-provisioner curl http://localhost:8002/api/sandboxes
# Verify Pod and Service in K8s
kubectl get pod,svc -n deer-flow -l sandbox-id=test-001
# Delete sandbox
docker exec deer-flow-provisioner curl -X DELETE http://localhost:8002/api/sandboxes/test-001
```
### Verify from Backend Containers
Once a sandbox is created, the backend containers (gateway, langgraph) can access it:
```bash
# Get sandbox URL from provisioner
SANDBOX_URL=$(docker exec deer-flow-provisioner curl -s http://localhost:8002/api/sandboxes/test-001 | jq -r .sandbox_url)
# Test from gateway container
docker exec deer-flow-gateway curl -s $SANDBOX_URL/v1/sandbox
```
## Troubleshooting
### Issue: "Kubeconfig not found"
**Cause**: The kubeconfig file doesn't exist at the mounted path.
**Solution**:
- Ensure `~/.kube/config` exists on your host machine
- Run `kubectl config view` to verify
- Check the volume mount in docker-compose-dev.yaml
### Issue: "Connection refused" to K8s API
**Cause**: The provisioner can't reach the K8s API server.
**Solution**:
1. Check your kubeconfig server address:
```bash
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}'
```
2. If it's `localhost` or `127.0.0.1`, set `K8S_API_SERVER`:
```yaml
environment:
- K8S_API_SERVER=https://host.docker.internal:PORT
```
### Issue: "Unprocessable Entity" when creating Pod
**Cause**: HostPath volumes contain invalid paths (e.g., relative paths with `..`).
**Solution**:
- Use absolute paths for `SKILLS_HOST_PATH` and `THREADS_HOST_PATH`
- Verify the paths exist on your host machine:
```bash
ls -la /path/to/skills
ls -la /path/to/backend/.deer-flow/threads
```
### Issue: Pod stuck in "ContainerCreating"
**Cause**: Usually pulling the sandbox image from the registry.
**Solution**:
- Pre-pull the image: `make docker-init`
- Check Pod events: `kubectl describe pod sandbox-XXX -n deer-flow`
- Check node: `kubectl get nodes`
### Issue: Cannot access sandbox URL from backend
**Cause**: NodePort not reachable or `NODE_HOST` misconfigured.
**Solution**:
- Verify the Service exists: `kubectl get svc -n deer-flow`
- Test from host: `curl http://localhost:NODE_PORT/v1/sandbox`
- Ensure `extra_hosts` is set in docker-compose (Linux)
- Check `NODE_HOST` env var matches how backend reaches host
## Security Considerations
1. **HostPath Volumes**: The provisioner mounts host directories into sandbox Pods. Ensure these paths contain only trusted data.
2. **Resource Limits**: Each sandbox Pod has CPU, memory, and storage limits to prevent resource exhaustion.
3. **Network Isolation**: Sandbox Pods run in the `deer-flow` namespace but share the host's network namespace via NodePort. Consider NetworkPolicies for stricter isolation.
4. **kubeconfig Access**: The provisioner has full access to your Kubernetes cluster via the mounted kubeconfig. Run it only in trusted environments.
5. **Image Trust**: The sandbox image should come from a trusted registry. Review and audit the image contents.
## Future Enhancements
- [ ] Support for custom resource requests/limits per sandbox
- [ ] PersistentVolume support for larger data requirements
- [ ] Automatic cleanup of stale sandboxes (timeout-based)
- [ ] Metrics and monitoring (Prometheus integration)
- [ ] Multi-cluster support (route to different K8s clusters)
- [ ] Pod affinity/anti-affinity rules for better placement
- [ ] NetworkPolicy templates for sandbox isolation

486
docker/provisioner/app.py Normal file
View File

@@ -0,0 +1,486 @@
"""DeerFlow Sandbox Provisioner Service.
Dynamically creates and manages per-sandbox Pods in Kubernetes.
Each ``sandbox_id`` gets its own Pod + NodePort Service. The backend
accesses sandboxes directly via ``{NODE_HOST}:{NodePort}``.
The provisioner connects to the host machine's Kubernetes cluster via a
mounted kubeconfig (``~/.kube/config``). Sandbox Pods run on the host
K8s and are accessed by the backend via ``{NODE_HOST}:{NodePort}``.
Endpoints:
POST /api/sandboxes — Create a sandbox Pod + Service
DELETE /api/sandboxes/{sandbox_id} — Destroy a sandbox Pod + Service
GET /api/sandboxes/{sandbox_id} — Get sandbox status & URL
GET /api/sandboxes — List all sandboxes
GET /health — Provisioner health check
Architecture (docker-compose-dev):
┌────────────┐ HTTP ┌─────────────┐ K8s API ┌──────────────┐
│ remote │ ─────▸ │ provisioner │ ────────▸ │ host K8s │
│ _backend │ │ :8002 │ │ API server │
└────────────┘ └─────────────┘ └──────┬───────┘
│ creates
┌─────────────┐ ┌──────▼───────┐
│ backend │ ────────▸ │ sandbox │
│ │ direct │ Pod(s) │
└─────────────┘ NodePort └──────────────┘
"""
from __future__ import annotations
import logging
import os
import time
from contextlib import asynccontextmanager
import urllib3
from fastapi import FastAPI, HTTPException
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubernetes.client.rest import ApiException
from pydantic import BaseModel
# Suppress only the InsecureRequestWarning from urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logger = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
# ── Configuration (all tuneable via environment variables) ───────────────
K8S_NAMESPACE = os.environ.get("K8S_NAMESPACE", "deer-flow")
SANDBOX_IMAGE = os.environ.get(
"SANDBOX_IMAGE",
"enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest",
)
SKILLS_HOST_PATH = os.environ.get("SKILLS_HOST_PATH", "/skills")
THREADS_HOST_PATH = os.environ.get("THREADS_HOST_PATH", "/.deer-flow/threads")
# Path to the kubeconfig *inside* the provisioner container.
# Typically the host's ~/.kube/config is mounted here.
KUBECONFIG_PATH = os.environ.get("KUBECONFIG_PATH", "/root/.kube/config")
# The hostname / IP that the *backend container* uses to reach NodePort
# services on the host Kubernetes node. On Docker Desktop for macOS this
# is ``host.docker.internal``; on Linux it may be the host's LAN IP.
NODE_HOST = os.environ.get("NODE_HOST", "host.docker.internal")
# ── K8s client setup ────────────────────────────────────────────────────
core_v1: k8s_client.CoreV1Api | None = None
def _init_k8s_client() -> k8s_client.CoreV1Api:
"""Load kubeconfig from the mounted host config and return a CoreV1Api.
Tries the mounted kubeconfig first, then falls back to in-cluster
config (useful if the provisioner itself runs inside K8s).
"""
try:
k8s_config.load_kube_config(config_file=KUBECONFIG_PATH)
logger.info(f"Loaded kubeconfig from {KUBECONFIG_PATH}")
except Exception:
logger.warning("Could not load kubeconfig from file, trying in-cluster config")
k8s_config.load_incluster_config()
# When connecting from inside Docker to the host's K8s API, the
# kubeconfig may reference ``localhost`` or ``127.0.0.1``. We
# optionally rewrite the server address so it reaches the host.
k8s_api_server = os.environ.get("K8S_API_SERVER")
if k8s_api_server:
configuration = k8s_client.Configuration.get_default_copy()
configuration.host = k8s_api_server
# Self-signed certs are common for local clusters
configuration.verify_ssl = False
api_client = k8s_client.ApiClient(configuration)
return k8s_client.CoreV1Api(api_client)
return k8s_client.CoreV1Api()
def _wait_for_kubeconfig(timeout: int = 30) -> None:
"""Block until the kubeconfig file is available."""
deadline = time.time() + timeout
while time.time() < deadline:
if os.path.exists(KUBECONFIG_PATH):
logger.info(f"Found kubeconfig at {KUBECONFIG_PATH}")
return
logger.info(f"Waiting for kubeconfig at {KUBECONFIG_PATH}")
time.sleep(2)
raise RuntimeError(f"Kubeconfig not found at {KUBECONFIG_PATH} after {timeout}s")
def _ensure_namespace() -> None:
"""Create the K8s namespace if it does not yet exist."""
try:
core_v1.read_namespace(K8S_NAMESPACE)
logger.info(f"Namespace '{K8S_NAMESPACE}' already exists")
except ApiException as exc:
if exc.status == 404:
ns = k8s_client.V1Namespace(
metadata=k8s_client.V1ObjectMeta(
name=K8S_NAMESPACE,
labels={
"app.kubernetes.io/name": "deer-flow",
"app.kubernetes.io/component": "sandbox",
},
)
)
core_v1.create_namespace(ns)
logger.info(f"Created namespace '{K8S_NAMESPACE}'")
else:
raise
# ── FastAPI lifespan ─────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(_app: FastAPI):
global core_v1
_wait_for_kubeconfig()
core_v1 = _init_k8s_client()
_ensure_namespace()
logger.info("Provisioner is ready (using host Kubernetes)")
yield
app = FastAPI(title="DeerFlow Sandbox Provisioner", lifespan=lifespan)
# ── Request / Response models ───────────────────────────────────────────
class CreateSandboxRequest(BaseModel):
sandbox_id: str
thread_id: str
class SandboxResponse(BaseModel):
sandbox_id: str
sandbox_url: str # Direct access URL, e.g. http://host.docker.internal:{NodePort}
status: str
# ── K8s resource helpers ─────────────────────────────────────────────────
def _pod_name(sandbox_id: str) -> str:
return f"sandbox-{sandbox_id}"
def _svc_name(sandbox_id: str) -> str:
return f"sandbox-{sandbox_id}-svc"
def _sandbox_url(node_port: int) -> str:
"""Build the sandbox URL using the configured NODE_HOST."""
return f"http://{NODE_HOST}:{node_port}"
def _build_pod(sandbox_id: str, thread_id: str) -> k8s_client.V1Pod:
"""Construct a Pod manifest for a single sandbox."""
return k8s_client.V1Pod(
metadata=k8s_client.V1ObjectMeta(
name=_pod_name(sandbox_id),
namespace=K8S_NAMESPACE,
labels={
"app": "deer-flow-sandbox",
"sandbox-id": sandbox_id,
"app.kubernetes.io/name": "deer-flow",
"app.kubernetes.io/component": "sandbox",
},
),
spec=k8s_client.V1PodSpec(
containers=[
k8s_client.V1Container(
name="sandbox",
image=SANDBOX_IMAGE,
image_pull_policy="IfNotPresent",
ports=[
k8s_client.V1ContainerPort(
name="http",
container_port=8080,
protocol="TCP",
)
],
readiness_probe=k8s_client.V1Probe(
http_get=k8s_client.V1HTTPGetAction(
path="/v1/sandbox",
port=8080,
),
initial_delay_seconds=5,
period_seconds=5,
timeout_seconds=3,
failure_threshold=3,
),
liveness_probe=k8s_client.V1Probe(
http_get=k8s_client.V1HTTPGetAction(
path="/v1/sandbox",
port=8080,
),
initial_delay_seconds=10,
period_seconds=10,
timeout_seconds=3,
failure_threshold=3,
),
resources=k8s_client.V1ResourceRequirements(
requests={
"cpu": "100m",
"memory": "256Mi",
"ephemeral-storage": "500Mi",
},
limits={
"cpu": "1000m",
"memory": "1Gi",
"ephemeral-storage": "500Mi",
},
),
volume_mounts=[
k8s_client.V1VolumeMount(
name="skills",
mount_path="/mnt/skills",
read_only=True,
),
k8s_client.V1VolumeMount(
name="user-data",
mount_path="/mnt/user-data",
read_only=False,
),
],
security_context=k8s_client.V1SecurityContext(
privileged=False,
allow_privilege_escalation=True,
),
)
],
volumes=[
k8s_client.V1Volume(
name="skills",
host_path=k8s_client.V1HostPathVolumeSource(
path=SKILLS_HOST_PATH,
type="Directory",
),
),
k8s_client.V1Volume(
name="user-data",
host_path=k8s_client.V1HostPathVolumeSource(
path=f"{THREADS_HOST_PATH}/{thread_id}/user-data",
type="DirectoryOrCreate",
),
),
],
restart_policy="Always",
),
)
def _build_service(sandbox_id: str) -> k8s_client.V1Service:
"""Construct a NodePort Service manifest (port auto-allocated by K8s)."""
return k8s_client.V1Service(
metadata=k8s_client.V1ObjectMeta(
name=_svc_name(sandbox_id),
namespace=K8S_NAMESPACE,
labels={
"app": "deer-flow-sandbox",
"sandbox-id": sandbox_id,
"app.kubernetes.io/name": "deer-flow",
"app.kubernetes.io/component": "sandbox",
},
),
spec=k8s_client.V1ServiceSpec(
type="NodePort",
ports=[
k8s_client.V1ServicePort(
name="http",
port=8080,
target_port=8080,
protocol="TCP",
# nodePort omitted → K8s auto-allocates from the range
)
],
selector={
"sandbox-id": sandbox_id,
},
),
)
def _get_node_port(sandbox_id: str) -> int | None:
"""Read the K8s-allocated NodePort from the Service."""
try:
svc = core_v1.read_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
for port in svc.spec.ports or []:
if port.name == "http":
return port.node_port
except ApiException:
pass
return None
def _get_pod_phase(sandbox_id: str) -> str:
"""Return the Pod phase (Pending / Running / Succeeded / Failed / Unknown)."""
try:
pod = core_v1.read_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
return pod.status.phase or "Unknown"
except ApiException:
return "NotFound"
# ── API endpoints ────────────────────────────────────────────────────────
@app.get("/health")
async def health():
"""Provisioner health check."""
return {"status": "ok"}
@app.post("/api/sandboxes", response_model=SandboxResponse)
async def create_sandbox(req: CreateSandboxRequest):
"""Create a sandbox Pod + NodePort Service for *sandbox_id*.
If the sandbox already exists, returns the existing information
(idempotent).
"""
sandbox_id = req.sandbox_id
thread_id = req.thread_id
logger.info(
f"Received request to create sandbox '{sandbox_id}' for thread '{thread_id}'"
)
# ── Fast path: sandbox already exists ────────────────────────────
existing_port = _get_node_port(sandbox_id)
if existing_port:
return SandboxResponse(
sandbox_id=sandbox_id,
sandbox_url=_sandbox_url(existing_port),
status=_get_pod_phase(sandbox_id),
)
# ── Create Pod ───────────────────────────────────────────────────
try:
core_v1.create_namespaced_pod(K8S_NAMESPACE, _build_pod(sandbox_id, thread_id))
logger.info(f"Created Pod {_pod_name(sandbox_id)}")
except ApiException as exc:
if exc.status != 409: # 409 = AlreadyExists
raise HTTPException(
status_code=500, detail=f"Pod creation failed: {exc.reason}"
)
# ── Create Service ───────────────────────────────────────────────
try:
core_v1.create_namespaced_service(K8S_NAMESPACE, _build_service(sandbox_id))
logger.info(f"Created Service {_svc_name(sandbox_id)}")
except ApiException as exc:
if exc.status != 409:
# Roll back the Pod on failure
try:
core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
except ApiException:
pass
raise HTTPException(
status_code=500, detail=f"Service creation failed: {exc.reason}"
)
# ── Read the auto-allocated NodePort ─────────────────────────────
node_port: int | None = None
for _ in range(20):
node_port = _get_node_port(sandbox_id)
if node_port:
break
time.sleep(0.5)
if not node_port:
raise HTTPException(
status_code=500, detail="NodePort was not allocated in time"
)
return SandboxResponse(
sandbox_id=sandbox_id,
sandbox_url=_sandbox_url(node_port),
status=_get_pod_phase(sandbox_id),
)
@app.delete("/api/sandboxes/{sandbox_id}")
async def destroy_sandbox(sandbox_id: str):
"""Destroy a sandbox Pod + Service."""
errors: list[str] = []
# Delete Service
try:
core_v1.delete_namespaced_service(_svc_name(sandbox_id), K8S_NAMESPACE)
logger.info(f"Deleted Service {_svc_name(sandbox_id)}")
except ApiException as exc:
if exc.status != 404:
errors.append(f"service: {exc.reason}")
# Delete Pod
try:
core_v1.delete_namespaced_pod(_pod_name(sandbox_id), K8S_NAMESPACE)
logger.info(f"Deleted Pod {_pod_name(sandbox_id)}")
except ApiException as exc:
if exc.status != 404:
errors.append(f"pod: {exc.reason}")
if errors:
raise HTTPException(
status_code=500, detail=f"Partial cleanup: {', '.join(errors)}"
)
return {"ok": True, "sandbox_id": sandbox_id}
@app.get("/api/sandboxes/{sandbox_id}", response_model=SandboxResponse)
async def get_sandbox(sandbox_id: str):
"""Return current status and URL for a sandbox."""
node_port = _get_node_port(sandbox_id)
if not node_port:
raise HTTPException(status_code=404, detail=f"Sandbox '{sandbox_id}' not found")
return SandboxResponse(
sandbox_id=sandbox_id,
sandbox_url=_sandbox_url(node_port),
status=_get_pod_phase(sandbox_id),
)
@app.get("/api/sandboxes")
async def list_sandboxes():
"""List every sandbox currently managed in the namespace."""
try:
services = core_v1.list_namespaced_service(
K8S_NAMESPACE,
label_selector="app=deer-flow-sandbox",
)
except ApiException as exc:
raise HTTPException(
status_code=500, detail=f"Failed to list services: {exc.reason}"
)
sandboxes: list[SandboxResponse] = []
for svc in services.items:
sid = (svc.metadata.labels or {}).get("sandbox-id")
if not sid:
continue
node_port = None
for port in svc.spec.ports or []:
if port.name == "http":
node_port = port.node_port
break
if node_port:
sandboxes.append(
SandboxResponse(
sandbox_id=sid,
sandbox_url=_sandbox_url(node_port),
status=_get_pod_phase(sid),
)
)
return {"sandboxes": sandboxes, "count": len(sandboxes)}