mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-24 22:54:46 +08:00
Refactor base paths with centralized path management (#901)
* Initial plan * refactor: centralize path management and improve memory storage configuration * fix: update memory storage path in config.example.yaml for clarity * Initial plan * Address PR #901 review comments: security fixes and documentation improvements Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>
This commit is contained in:
@@ -1,14 +1,10 @@
|
||||
"""Shared path resolution for thread virtual paths (e.g. mnt/user-data/outputs/...)."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
|
||||
|
||||
# Virtual path prefix used in sandbox environments (without leading slash for URL path matching)
|
||||
VIRTUAL_PATH_PREFIX = "mnt/user-data"
|
||||
from src.config.paths import get_paths
|
||||
|
||||
|
||||
def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
|
||||
@@ -16,8 +12,8 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
|
||||
|
||||
Args:
|
||||
thread_id: The thread ID.
|
||||
virtual_path: The virtual path (e.g., mnt/user-data/outputs/file.txt).
|
||||
Leading slashes are stripped.
|
||||
virtual_path: The virtual path as seen inside the sandbox
|
||||
(e.g., /mnt/user-data/outputs/file.txt).
|
||||
|
||||
Returns:
|
||||
The resolved filesystem path.
|
||||
@@ -25,20 +21,8 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
|
||||
Raises:
|
||||
HTTPException: If the path is invalid or outside allowed directories.
|
||||
"""
|
||||
virtual_path = virtual_path.lstrip("/")
|
||||
if not virtual_path.startswith(VIRTUAL_PATH_PREFIX):
|
||||
raise HTTPException(status_code=400, detail=f"Path must start with /{VIRTUAL_PATH_PREFIX}")
|
||||
relative_path = virtual_path[len(VIRTUAL_PATH_PREFIX) :].lstrip("/")
|
||||
|
||||
base_dir = Path(os.getcwd()) / THREAD_DATA_BASE_DIR / thread_id / "user-data"
|
||||
actual_path = base_dir / relative_path
|
||||
|
||||
try:
|
||||
actual_path = actual_path.resolve()
|
||||
base_resolved = base_dir.resolve()
|
||||
if not str(actual_path).startswith(str(base_resolved)):
|
||||
raise HTTPException(status_code=403, detail="Access denied: path traversal detected")
|
||||
except (ValueError, RuntimeError):
|
||||
raise HTTPException(status_code=400, detail="Invalid path")
|
||||
|
||||
return actual_path
|
||||
return get_paths().resolve_virtual_path(thread_id, virtual_path)
|
||||
except ValueError as e:
|
||||
status = 403 if "traversal" in str(e) else 400
|
||||
raise HTTPException(status_code=status, detail=str(e))
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
"""Upload router for handling file uploads."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, File, HTTPException, UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
|
||||
from src.config.paths import VIRTUAL_PATH_PREFIX, get_paths
|
||||
from src.sandbox.sandbox_provider import get_sandbox_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -43,7 +42,7 @@ def get_uploads_dir(thread_id: str) -> Path:
|
||||
Returns:
|
||||
Path to the uploads directory.
|
||||
"""
|
||||
base_dir = Path(os.getcwd()) / THREAD_DATA_BASE_DIR / thread_id / "user-data" / "uploads"
|
||||
base_dir = get_paths().sandbox_uploads_dir(thread_id)
|
||||
base_dir.mkdir(parents=True, exist_ok=True)
|
||||
return base_dir
|
||||
|
||||
@@ -106,34 +105,40 @@ async def upload_files(
|
||||
continue
|
||||
|
||||
try:
|
||||
# Normalize filename to prevent path traversal
|
||||
safe_filename = Path(file.filename).name
|
||||
if not safe_filename:
|
||||
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
|
||||
continue
|
||||
|
||||
# Save the original file
|
||||
file_path = uploads_dir / file.filename
|
||||
file_path = uploads_dir / safe_filename
|
||||
content = await file.read()
|
||||
|
||||
# Build relative path from backend root
|
||||
relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{file.filename}"
|
||||
virtual_path = f"/mnt/user-data/uploads/{file.filename}"
|
||||
relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / safe_filename)
|
||||
virtual_path = f"{VIRTUAL_PATH_PREFIX}/uploads/{safe_filename}"
|
||||
sandbox.update_file(virtual_path, content)
|
||||
|
||||
file_info = {
|
||||
"filename": file.filename,
|
||||
"filename": safe_filename,
|
||||
"size": str(len(content)),
|
||||
"path": relative_path, # Actual filesystem path (relative to backend/)
|
||||
"virtual_path": virtual_path, # Path for Agent in sandbox
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{file.filename}", # HTTP URL
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{safe_filename}", # HTTP URL
|
||||
}
|
||||
|
||||
logger.info(f"Saved file: {file.filename} ({len(content)} bytes) to {relative_path}")
|
||||
logger.info(f"Saved file: {safe_filename} ({len(content)} bytes) to {relative_path}")
|
||||
|
||||
# Check if file should be converted to markdown
|
||||
file_ext = file_path.suffix.lower()
|
||||
if file_ext in CONVERTIBLE_EXTENSIONS:
|
||||
md_path = await convert_file_to_markdown(file_path)
|
||||
if md_path:
|
||||
md_relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{md_path.name}"
|
||||
md_relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / md_path.name)
|
||||
file_info["markdown_file"] = md_path.name
|
||||
file_info["markdown_path"] = md_relative_path
|
||||
file_info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
|
||||
file_info["markdown_virtual_path"] = f"{VIRTUAL_PATH_PREFIX}/uploads/{md_path.name}"
|
||||
file_info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
|
||||
|
||||
uploaded_files.append(file_info)
|
||||
@@ -168,13 +173,13 @@ async def list_uploaded_files(thread_id: str) -> dict:
|
||||
for file_path in sorted(uploads_dir.iterdir()):
|
||||
if file_path.is_file():
|
||||
stat = file_path.stat()
|
||||
relative_path = f".deer-flow/threads/{thread_id}/user-data/uploads/{file_path.name}"
|
||||
relative_path = str(get_paths().sandbox_uploads_dir(thread_id) / file_path.name)
|
||||
files.append(
|
||||
{
|
||||
"filename": file_path.name,
|
||||
"size": stat.st_size,
|
||||
"path": relative_path, # Actual filesystem path (relative to backend/)
|
||||
"virtual_path": f"/mnt/user-data/uploads/{file_path.name}", # Path for Agent in sandbox
|
||||
"path": relative_path, # Actual filesystem path
|
||||
"virtual_path": f"{VIRTUAL_PATH_PREFIX}/uploads/{file_path.name}", # Path for Agent in sandbox
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{file_path.name}", # HTTP URL
|
||||
"extension": file_path.suffix,
|
||||
"modified": stat.st_mtime,
|
||||
|
||||
Reference in New Issue
Block a user