feat: add global memory mechanism for personalized conversations

Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-19 04:14:46 +08:00 · 2026-02-03 13:31:05 +08:00
parent 86255511e1
commit 0ea666e0cf
10 changed files with 929 additions and 3 deletions
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -4,6 +4,7 @@ from langchain_core.runnables import RunnableConfig

 from src.agents.lead_agent.prompt import apply_prompt_template
 from src.agents.middlewares.clarification_middleware import ClarificationMiddleware
+from src.agents.middlewares.memory_middleware import MemoryMiddleware
 from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
 from src.agents.middlewares.title_middleware import TitleMiddleware
 from src.agents.middlewares.uploads_middleware import UploadsMiddleware
@@ -175,6 +176,8 @@ Being proactive with task management demonstrates thoroughness and ensures all r
 # UploadsMiddleware should be after ThreadDataMiddleware to access thread_id
 # SummarizationMiddleware should be early to reduce context before other processing
 # TodoListMiddleware should be before ClarificationMiddleware to allow todo management
+# TitleMiddleware generates title after first exchange
+# MemoryMiddleware queues conversation for memory update (after TitleMiddleware)
 # ViewImageMiddleware should be before ClarificationMiddleware to inject image details before LLM
 # ClarificationMiddleware should be last to intercept clarification requests after model calls
 def _build_middlewares(config: RunnableConfig):
@@ -202,6 +205,9 @@ def _build_middlewares(config: RunnableConfig):
    # Add TitleMiddleware
    middlewares.append(TitleMiddleware())

+    # Add MemoryMiddleware (after TitleMiddleware)
+    middlewares.append(MemoryMiddleware())
+
    # Add ViewImageMiddleware only if the current model supports vision
    model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model")
    from src.config import get_app_config
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -7,6 +7,8 @@ SYSTEM_PROMPT_TEMPLATE = """
 You are DeerFlow 2.0, an open-source super agent.
 </role>

+{memory_context}
+
 <thinking_style>
 - Think concisely and strategically about the user's request BEFORE taking action
 - Break down the task: What is clear? What is ambiguous? What is missing?
@@ -164,6 +166,37 @@ The key AI trends for 2026 include enhanced reasoning capabilities, multimodal i
 """


+def _get_memory_context() -> str:
+    """Get memory context for injection into system prompt.
+
+    Returns:
+        Formatted memory context string wrapped in XML tags, or empty string if disabled.
+    """
+    try:
+        from src.agents.memory import format_memory_for_injection, get_memory_data
+        from src.config.memory_config import get_memory_config
+
+        config = get_memory_config()
+        if not config.enabled or not config.injection_enabled:
+            return ""
+
+        memory_data = get_memory_data()
+        memory_content = format_memory_for_injection(
+            memory_data, max_tokens=config.max_injection_tokens
+        )
+
+        if not memory_content.strip():
+            return ""
+
+        return f"""<memory>
+{memory_content}
+</memory>
+"""
+    except Exception as e:
+        print(f"Failed to load memory context: {e}")
+        return ""
+
+
 def apply_prompt_template() -> str:
    # Load only enabled skills
    skills = load_skills(enabled_only=True)
@@ -192,7 +225,14 @@ def apply_prompt_template() -> str:
    else:
        skills_list = "<!-- No skills available -->"

-    # Format the prompt with dynamic skills
-    prompt = SYSTEM_PROMPT_TEMPLATE.format(skills_list=skills_list, skills_base_path=container_base_path)
+    # Get memory context
+    memory_context = _get_memory_context()
+
+    # Format the prompt with dynamic skills and memory
+    prompt = SYSTEM_PROMPT_TEMPLATE.format(
+        skills_list=skills_list,
+        skills_base_path=container_base_path,
+        memory_context=memory_context,
+    )

    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"