Refactor base paths with centralized path management (#901)

* Initial plan

* refactor: centralize path management and improve memory storage configuration

* fix: update memory storage path in config.example.yaml for clarity

* Initial plan

* Address PR #901 review comments: security fixes and documentation improvements

Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>
This commit is contained in:
JeffJiang
2026-02-25 21:30:33 +08:00
committed by GitHub
parent adfe5c4b44
commit d24a66ffd3
14 changed files with 235 additions and 91 deletions

View File

@@ -1,7 +1,6 @@
"""Memory updater for reading, writing, and updating memory data."""
import json
import os
import uuid
from datetime import datetime
from pathlib import Path
@@ -12,14 +11,18 @@ from src.agents.memory.prompt import (
format_conversation_for_update,
)
from src.config.memory_config import get_memory_config
from src.config.paths import get_paths
from src.models import create_chat_model
def _get_memory_file_path() -> Path:
"""Get the path to the memory file."""
config = get_memory_config()
# Resolve relative to current working directory (backend/)
return Path(os.getcwd()) / config.storage_path
if config.storage_path:
p = Path(config.storage_path)
# Absolute path: use as-is; relative path: resolve against base_dir
return p if p.is_absolute() else get_paths().base_dir / p
return get_paths().memory_file
def _create_empty_memory() -> dict[str, Any]:

View File

@@ -1,5 +1,3 @@
import os
from pathlib import Path
from typing import NotRequired, override
from langchain.agents import AgentState
@@ -7,7 +5,7 @@ from langchain.agents.middleware import AgentMiddleware
from langgraph.runtime import Runtime
from src.agents.thread_state import ThreadDataState
from src.sandbox.consts import THREAD_DATA_BASE_DIR
from src.config.paths import Paths, get_paths
class ThreadDataMiddlewareState(AgentState):
@@ -20,9 +18,9 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
"""Create thread data directories for each thread execution.
Creates the following directory structure:
- backend/.deer-flow/threads/{thread_id}/user-data/workspace
- backend/.deer-flow/threads/{thread_id}/user-data/uploads
- backend/.deer-flow/threads/{thread_id}/user-data/outputs
- {base_dir}/threads/{thread_id}/user-data/workspace
- {base_dir}/threads/{thread_id}/user-data/uploads
- {base_dir}/threads/{thread_id}/user-data/outputs
Lifecycle Management:
- With lazy_init=True (default): Only compute paths, directories created on-demand
@@ -35,13 +33,13 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
"""Initialize the middleware.
Args:
base_dir: Base directory for thread data. Defaults to the current working directory.
base_dir: Base directory for thread data. Defaults to Paths resolution.
lazy_init: If True, defer directory creation until needed.
If False, create directories eagerly in before_agent().
Default is True for optimal performance.
"""
super().__init__()
self._base_dir = base_dir or os.getcwd()
self._paths = Paths(base_dir) if base_dir else get_paths()
self._lazy_init = lazy_init
def _get_thread_paths(self, thread_id: str) -> dict[str, str]:
@@ -53,11 +51,10 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
Returns:
Dictionary with workspace_path, uploads_path, and outputs_path.
"""
thread_dir = Path(self._base_dir) / THREAD_DATA_BASE_DIR / thread_id / "user-data"
return {
"workspace_path": str(thread_dir / "workspace"),
"uploads_path": str(thread_dir / "uploads"),
"outputs_path": str(thread_dir / "outputs"),
"workspace_path": str(self._paths.sandbox_work_dir(thread_id)),
"uploads_path": str(self._paths.sandbox_uploads_dir(thread_id)),
"outputs_path": str(self._paths.sandbox_outputs_dir(thread_id)),
}
def _create_thread_directories(self, thread_id: str) -> dict[str, str]:
@@ -69,10 +66,8 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
Returns:
Dictionary with the created directory paths.
"""
paths = self._get_thread_paths(thread_id)
for path in paths.values():
os.makedirs(path, exist_ok=True)
return paths
self._paths.ensure_thread_dirs(thread_id)
return self._get_thread_paths(thread_id)
@override
def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime) -> dict | None:

View File

@@ -1,6 +1,5 @@
"""Middleware to inject uploaded files information into agent context."""
import os
import re
from pathlib import Path
from typing import NotRequired, override
@@ -10,7 +9,7 @@ from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import HumanMessage
from langgraph.runtime import Runtime
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
from src.config.paths import Paths, get_paths
class UploadsMiddlewareState(AgentState):
@@ -32,10 +31,10 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
"""Initialize the middleware.
Args:
base_dir: Base directory for thread data. Defaults to the current working directory.
base_dir: Base directory for thread data. Defaults to Paths resolution.
"""
super().__init__()
self._base_dir = base_dir or os.getcwd()
self._paths = Paths(base_dir) if base_dir else get_paths()
def _get_uploads_dir(self, thread_id: str) -> Path:
"""Get the uploads directory for a thread.
@@ -46,7 +45,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
Returns:
Path to the uploads directory.
"""
return Path(self._base_dir) / THREAD_DATA_BASE_DIR / thread_id / "user-data" / "uploads"
return self._paths.sandbox_uploads_dir(thread_id)
def _list_newly_uploaded_files(self, thread_id: str, last_message_files: set[str]) -> list[dict]:
"""List only newly uploaded files that weren't in the last message.