feat(memory): Introduce configurable memory storage abstraction (#1353)

* feat(内存存储): 添加可配置的内存存储提供者支持实现内存存储的抽象基类 MemoryStorage 和文件存储实现 FileMemoryStorage 重构内存数据加载和保存逻辑到存储提供者中添加 storage_class 配置项以支持自定义存储提供者 * refactor(memory): 重构内存存储模块并更新相关测试将内存存储逻辑从updater模块移动到独立的storage模块使用存储接口模式替代直接文件操作更新所有相关测试以使用新的存储接口 * Update backend/packages/harness/deerflow/agents/memory/storage.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update backend/packages/harness/deerflow/agents/memory/storage.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix(内存存储): 添加线程安全锁并增加测试用例添加线程锁确保内存存储单例初始化的线程安全增加对无效代理名称的验证测试补充单例线程安全性和异常处理的测试用例 * Update backend/tests/test_memory_storage.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix(agents): 使用统一模式验证代理名称修改代理名称验证逻辑以使用仓库中定义的AGENT_NAME_PATTERN模式，确保代码库一致性并防止路径遍历等安全问题。同时更新测试用例以覆盖更多无效名称情况。 --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-05-03 02:30:44 +08:00 · 2026-03-27 07:41:06 +08:00
parent e1853df06a
commit 1c542ab7f1
7 changed files with 442 additions and 177 deletions
--- a/backend/packages/harness/deerflow/agents/memory/updater.py
+++ b/backend/packages/harness/deerflow/agents/memory/updater.py
@@ -5,115 +5,25 @@ import logging
 import re
 import uuid
 from datetime import datetime
-from pathlib import Path
 from typing import Any

 from deerflow.agents.memory.prompt import (
    MEMORY_UPDATE_PROMPT,
    format_conversation_for_update,
 )
+from deerflow.agents.memory.storage import get_memory_storage
 from deerflow.config.memory_config import get_memory_config
-from deerflow.config.paths import get_paths
 from deerflow.models import create_chat_model

 logger = logging.getLogger(__name__)

-
-def _get_memory_file_path(agent_name: str | None = None) -> Path:
-    """Get the path to the memory file.
-
-    Args:
-        agent_name: If provided, returns the per-agent memory file path.
-                    If None, returns the global memory file path.
-
-    Returns:
-        Path to the memory file.
-    """
-    if agent_name is not None:
-        return get_paths().agent_memory_file(agent_name)
-
-    config = get_memory_config()
-    if config.storage_path:
-        p = Path(config.storage_path)
-        # Absolute path: use as-is; relative path: resolve against base_dir
-        return p if p.is_absolute() else get_paths().base_dir / p
-    return get_paths().memory_file
-
-
-def _create_empty_memory() -> dict[str, Any]:
-    """Create an empty memory structure."""
-    return {
-        "version": "1.0",
-        "lastUpdated": datetime.utcnow().isoformat() + "Z",
-        "user": {
-            "workContext": {"summary": "", "updatedAt": ""},
-            "personalContext": {"summary": "", "updatedAt": ""},
-            "topOfMind": {"summary": "", "updatedAt": ""},
-        },
-        "history": {
-            "recentMonths": {"summary": "", "updatedAt": ""},
-            "earlierContext": {"summary": "", "updatedAt": ""},
-            "longTermBackground": {"summary": "", "updatedAt": ""},
-        },
-        "facts": [],
-    }
-
-
-# Per-agent memory cache: keyed by agent_name (None = global)
-# Value: (memory_data, file_mtime)
-_memory_cache: dict[str | None, tuple[dict[str, Any], float | None]] = {}
-
-
 def get_memory_data(agent_name: str | None = None) -> dict[str, Any]:
-    """Get the current memory data (cached with file modification time check).
-
-    The cache is automatically invalidated if the memory file has been modified
-    since the last load, ensuring fresh data is always returned.
-
-    Args:
-        agent_name: If provided, loads per-agent memory. If None, loads global memory.
-
-    Returns:
-        The memory data dictionary.
-    """
-    file_path = _get_memory_file_path(agent_name)
-
-    # Get current file modification time
-    try:
-        current_mtime = file_path.stat().st_mtime if file_path.exists() else None
-    except OSError:
-        current_mtime = None
-
-    cached = _memory_cache.get(agent_name)
-
-    # Invalidate cache if file has been modified or doesn't exist
-    if cached is None or cached[1] != current_mtime:
-        memory_data = _load_memory_from_file(agent_name)
-        _memory_cache[agent_name] = (memory_data, current_mtime)
-        return memory_data
-
-    return cached[0]
-
+    """Get the current memory data via storage provider."""
+    return get_memory_storage().load(agent_name)

 def reload_memory_data(agent_name: str | None = None) -> dict[str, Any]:
-    """Reload memory data from file, forcing cache invalidation.
-
-    Args:
-        agent_name: If provided, reloads per-agent memory. If None, reloads global memory.
-
-    Returns:
-        The reloaded memory data dictionary.
-    """
-    file_path = _get_memory_file_path(agent_name)
-    memory_data = _load_memory_from_file(agent_name)
-
-    try:
-        mtime = file_path.stat().st_mtime if file_path.exists() else None
-    except OSError:
-        mtime = None
-
-    _memory_cache[agent_name] = (memory_data, mtime)
-    return memory_data
+    """Reload memory data via storage provider."""
+    return get_memory_storage().reload(agent_name)


 def _extract_text(content: Any) -> str:
@@ -153,29 +63,6 @@ def _extract_text(content: Any) -> str:
    return str(content)


-def _load_memory_from_file(agent_name: str | None = None) -> dict[str, Any]:
-    """Load memory data from file.
-
-    Args:
-        agent_name: If provided, loads per-agent memory file. If None, loads global.
-
-    Returns:
-        The memory data dictionary.
-    """
-    file_path = _get_memory_file_path(agent_name)
-
-    if not file_path.exists():
-        return _create_empty_memory()
-
-    try:
-        with open(file_path, encoding="utf-8") as f:
-            data = json.load(f)
-        return data
-    except (json.JSONDecodeError, OSError) as e:
-        logger.warning("Failed to load memory file: %s", e)
-        return _create_empty_memory()
-
-
 # Matches sentences that describe a file-upload *event* rather than general
 # file-related work.  Deliberately narrow to avoid removing legitimate facts
 # such as "User works with CSV files" or "prefers PDF export".
@@ -222,48 +109,6 @@ def _fact_content_key(content: Any) -> str | None:
    return stripped


-def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
-    """Save memory data to file and update cache.
-
-    Args:
-        memory_data: The memory data to save.
-        agent_name: If provided, saves to per-agent memory file. If None, saves to global.
-
-    Returns:
-        True if successful, False otherwise.
-    """
-    file_path = _get_memory_file_path(agent_name)
-
-    try:
-        # Ensure directory exists
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-
-        # Update lastUpdated timestamp
-        memory_data["lastUpdated"] = datetime.utcnow().isoformat() + "Z"
-
-        # Write atomically using temp file
-        temp_path = file_path.with_suffix(".tmp")
-        with open(temp_path, "w", encoding="utf-8") as f:
-            json.dump(memory_data, f, indent=2, ensure_ascii=False)
-
-        # Rename temp file to actual file (atomic on most systems)
-        temp_path.replace(file_path)
-
-        # Update cache and file modification time
-        try:
-            mtime = file_path.stat().st_mtime
-        except OSError:
-            mtime = None
-
-        _memory_cache[agent_name] = (memory_data, mtime)
-
-        logger.info("Memory saved to %s", file_path)
-        return True
-    except OSError as e:
-        logger.error("Failed to save memory file: %s", e)
-        return False
-
-
 class MemoryUpdater:
    """Updates memory using LLM based on conversation context."""

@@ -338,7 +183,7 @@ class MemoryUpdater:
            updated_memory = _strip_upload_mentions_from_memory(updated_memory)

            # Save
-            return _save_memory_to_file(updated_memory, agent_name)
+            return get_memory_storage().save(updated_memory, agent_name)

        except json.JSONDecodeError as e:
            logger.warning("Failed to parse LLM response for memory update: %s", e)