Refactor base paths with centralized path management (#901)

* Initial plan

* refactor: centralize path management and improve memory storage configuration

* fix: update memory storage path in config.example.yaml for clarity

* Initial plan

* Address PR #901 review comments: security fixes and documentation improvements

Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>
This commit is contained in:
JeffJiang
2026-02-25 21:30:33 +08:00
committed by GitHub
parent adfe5c4b44
commit d24a66ffd3
14 changed files with 235 additions and 91 deletions

View File

@@ -1,14 +1,10 @@
"""Shared path resolution for thread virtual paths (e.g. mnt/user-data/outputs/...)."""
import os
from pathlib import Path
from fastapi import HTTPException
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
# Virtual path prefix used in sandbox environments (without leading slash for URL path matching)
VIRTUAL_PATH_PREFIX = "mnt/user-data"
from src.config.paths import get_paths
def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
@@ -16,8 +12,8 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
Args:
thread_id: The thread ID.
virtual_path: The virtual path (e.g., mnt/user-data/outputs/file.txt).
Leading slashes are stripped.
virtual_path: The virtual path as seen inside the sandbox
(e.g., /mnt/user-data/outputs/file.txt).
Returns:
The resolved filesystem path.
@@ -25,20 +21,8 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
Raises:
HTTPException: If the path is invalid or outside allowed directories.
"""
virtual_path = virtual_path.lstrip("/")
if not virtual_path.startswith(VIRTUAL_PATH_PREFIX):
raise HTTPException(status_code=400, detail=f"Path must start with /{VIRTUAL_PATH_PREFIX}")
relative_path = virtual_path[len(VIRTUAL_PATH_PREFIX) :].lstrip("/")
base_dir = Path(os.getcwd()) / THREAD_DATA_BASE_DIR / thread_id / "user-data"
actual_path = base_dir / relative_path
try:
actual_path = actual_path.resolve()
base_resolved = base_dir.resolve()
if not str(actual_path).startswith(str(base_resolved)):
raise HTTPException(status_code=403, detail="Access denied: path traversal detected")
except (ValueError, RuntimeError):
raise HTTPException(status_code=400, detail="Invalid path")
return actual_path
return get_paths().resolve_virtual_path(thread_id, virtual_path)
except ValueError as e:
status = 403 if "traversal" in str(e) else 400
raise HTTPException(status_code=status, detail=str(e))

View File

@@ -1,13 +1,12 @@
"""Upload router for handling file uploads."""
import logging
import os
from pathlib import Path
from fastapi import APIRouter, File, HTTPException, UploadFile
from pydantic import BaseModel
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
from src.sandbox.sandbox_provider import get_sandbox_provider
logger = logging.getLogger(__name__)
@@ -43,7 +42,7 @@ def get_uploads_dir(thread_id: str) -> Path:
Returns:
Path to the uploads directory.
"""
base_dir = Path(os.getcwd()) / THREAD_DATA_BASE_DIR / thread_id / "user-data" / "uploads"
base_dir = get_paths().sandbox_uploads_dir(thread_id)
base_dir.mkdir(parents=True, exist_ok=True)
return base_dir
@@ -106,34 +105,40 @@ async def upload_files(
continue
try:
# Normalize filename to prevent path traversal
safe_filename = Path(file.filename).name
if not safe_filename:
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
continue
# Save the original file
file_path = uploads_dir / file.filename
file_path = uploads_dir / safe_filename
content = await file.read()
# Build relative path from backend root
relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{file.filename}"
virtual_path = f"/mnt/user-data/uploads/{file.filename}"
relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / safe_filename)
virtual_path = f"{VIRTUAL_PATH_PREFIX}/uploads/{safe_filename}"
sandbox.update_file(virtual_path, content)
file_info = {
"filename": file.filename,
"filename": safe_filename,
"size": str(len(content)),
"path": relative_path, # Actual filesystem path (relative to backend/)
"virtual_path": virtual_path, # Path for Agent in sandbox
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{file.filename}", # HTTP URL
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{safe_filename}", # HTTP URL
}
logger.info(f"Saved file: {file.filename} ({len(content)} bytes) to {relative_path}")
logger.info(f"Saved file: {safe_filename} ({len(content)} bytes) to {relative_path}")
# Check if file should be converted to markdown
file_ext = file_path.suffix.lower()
if file_ext in CONVERTIBLE_EXTENSIONS:
md_path = await convert_file_to_markdown(file_path)
if md_path:
md_relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{md_path.name}"
md_relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / md_path.name)
file_info["markdown_file"] = md_path.name
file_info["markdown_path"] = md_relative_path
file_info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
file_info["markdown_virtual_path"] = f"{VIRTUAL_PATH_PREFIX}/uploads/{md_path.name}"
file_info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
uploaded_files.append(file_info)
@@ -168,13 +173,13 @@ async def list_uploaded_files(thread_id: str) -> dict:
for file_path in sorted(uploads_dir.iterdir()):
if file_path.is_file():
stat = file_path.stat()
relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{file_path.name}"
relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / file_path.name)
files.append(
{
"filename": file_path.name,
"size": stat.st_size,
"path": relative_path, # Actual filesystem path (relative to backend/)
"virtual_path": f"/mnt/user-data/uploads/{file_path.name}", # Path for Agent in sandbox
"path": relative_path, # Actual filesystem path
"virtual_path": f"{VIRTUAL_PATH_PREFIX}/uploads/{file_path.name}", # Path for Agent in sandbox
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{file_path.name}", # HTTP URL
"extension": file_path.suffix,
"modified": stat.st_mtime,