feat: add memory API and optimize memory middleware

- Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-19 04:14:46 +08:00 · 2026-02-03 13:41:04 +08:00
parent 18d85ab6e5
commit 7b7a7abaf2
4 changed files with 267 additions and 8 deletions
--- a/backend/src/agents/middlewares/memory_middleware.py
+++ b/backend/src/agents/middlewares/memory_middleware.py
@@ -1,6 +1,6 @@
 """Middleware for memory mechanism."""
-from typing import override
+from typing import Any, override
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
@@ -16,13 +16,48 @@ class MemoryMiddlewareState(AgentState):
    pass
 def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
    """Filter messages to keep only user inputs and final assistant responses.
    This filters out:
    - Tool messages (intermediate tool call results)
    - AI messages with tool_calls (intermediate steps, not final responses)
    Only keeps:
    - Human messages (user input)
    - AI messages without tool_calls (final assistant responses)
    Args:
        messages: List of all conversation messages.
    Returns:
        Filtered list containing only user inputs and final assistant responses.
    """
    filtered = []
    for msg in messages:
        msg_type = getattr(msg, "type", None)
        if msg_type == "human":
            # Always keep user messages
            filtered.append(msg)
        elif msg_type == "ai":
            # Only keep AI messages that are final responses (no tool_calls)
            tool_calls = getattr(msg, "tool_calls", None)
            if not tool_calls:
                filtered.append(msg)
        # Skip tool messages and AI messages with tool_calls
    return filtered
 class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
    """Middleware that queues conversation for memory update after agent execution.
    This middleware:
    1. After each agent execution, queues the conversation for memory update
-    2. The queue uses debouncing to batch multiple updates together
+    2. Only includes user inputs and final assistant responses (ignores tool calls)
-    3. Memory is updated asynchronously via LLM summarization
+    3. The queue uses debouncing to batch multiple updates together
    4. Memory is updated asynchronously via LLM summarization
    """
    state_schema = MemoryMiddlewareState
@@ -54,16 +89,19 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
            print("MemoryMiddleware: No messages in state, skipping memory update")
            return None
        # Filter to only keep user inputs and final assistant responses
        filtered_messages = _filter_messages_for_memory(messages)
        # Only queue if there's meaningful conversation
        # At minimum need one user message and one assistant response
-        user_messages = [m for m in messages if getattr(m, "type", None) == "human"]
+        user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
-        assistant_messages = [m for m in messages if getattr(m, "type", None) == "ai"]
+        assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]
        if not user_messages or not assistant_messages:
            return None
-        # Queue the conversation for memory update
+        # Queue the filtered conversation for memory update
        queue = get_memory_queue()
-        queue.add(thread_id=thread_id, messages=list(messages))
+        queue.add(thread_id=thread_id, messages=filtered_messages)
        return None
--- a/backend/src/gateway/app.py
+++ b/backend/src/gateway/app.py
@@ -5,7 +5,7 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from src.gateway.config import get_gateway_config
-from src.gateway.routers import artifacts, mcp, models, skills, uploads
+from src.gateway.routers import artifacts, mcp, memory, models, skills, uploads
 # Configure logging
 logging.basicConfig(
@@ -50,6 +50,7 @@ API Gateway for DeerFlow - A LangGraph-based AI agent backend with sandbox execu
 - **Models Management**: Query and retrieve available AI models
 - **MCP Configuration**: Manage Model Context Protocol (MCP) server configurations
 - **Memory Management**: Access and manage global memory data for personalized conversations
 - **Skills Management**: Query and manage skills and their enabled status
 - **Artifacts**: Access thread artifacts and generated files
 - **Health Monitoring**: System health check endpoints
@@ -73,6 +74,10 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
                "name": "mcp",
                "description": "Manage Model Context Protocol (MCP) server configurations",
            },
            {
                "name": "memory",
                "description": "Access and manage global memory data for personalized conversations",
            },
            {
                "name": "skills",
                "description": "Manage skills and their configurations",
@@ -101,6 +106,9 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
    # MCP API is mounted at /api/mcp
    app.include_router(mcp.router)
    # Memory API is mounted at /api/memory
    app.include_router(memory.router)
    # Skills API is mounted at /api/skills
    app.include_router(skills.router)
--- a/backend/src/gateway/routers/memory.py
+++ b/backend/src/gateway/routers/memory.py
@@ -0,0 +1,201 @@
 """Memory API router for retrieving and managing global memory data."""
 from fastapi import APIRouter
 from pydantic import BaseModel, Field
 from src.agents.memory.updater import get_memory_data, reload_memory_data
 from src.config.memory_config import get_memory_config
 router = APIRouter(prefix="/api", tags=["memory"])
 class ContextSection(BaseModel):
    """Model for context sections (user and history)."""
    summary: str = Field(default="", description="Summary content")
    updatedAt: str = Field(default="", description="Last update timestamp")
 class UserContext(BaseModel):
    """Model for user context."""
    workContext: ContextSection = Field(default_factory=ContextSection)
    personalContext: ContextSection = Field(default_factory=ContextSection)
    topOfMind: ContextSection = Field(default_factory=ContextSection)
 class HistoryContext(BaseModel):
    """Model for history context."""
    recentMonths: ContextSection = Field(default_factory=ContextSection)
    earlierContext: ContextSection = Field(default_factory=ContextSection)
    longTermBackground: ContextSection = Field(default_factory=ContextSection)
 class Fact(BaseModel):
    """Model for a memory fact."""
    id: str = Field(..., description="Unique identifier for the fact")
    content: str = Field(..., description="Fact content")
    category: str = Field(default="context", description="Fact category")
    confidence: float = Field(default=0.5, description="Confidence score (0-1)")
    createdAt: str = Field(default="", description="Creation timestamp")
    source: str = Field(default="unknown", description="Source thread ID")
 class MemoryResponse(BaseModel):
    """Response model for memory data."""
    version: str = Field(default="1.0", description="Memory schema version")
    lastUpdated: str = Field(default="", description="Last update timestamp")
    user: UserContext = Field(default_factory=UserContext)
    history: HistoryContext = Field(default_factory=HistoryContext)
    facts: list[Fact] = Field(default_factory=list)
 class MemoryConfigResponse(BaseModel):
    """Response model for memory configuration."""
    enabled: bool = Field(..., description="Whether memory is enabled")
    storage_path: str = Field(..., description="Path to memory storage file")
    debounce_seconds: int = Field(..., description="Debounce time for memory updates")
    max_facts: int = Field(..., description="Maximum number of facts to store")
    fact_confidence_threshold: float = Field(..., description="Minimum confidence threshold for facts")
    injection_enabled: bool = Field(..., description="Whether memory injection is enabled")
    max_injection_tokens: int = Field(..., description="Maximum tokens for memory injection")
 class MemoryStatusResponse(BaseModel):
    """Response model for memory status."""
    config: MemoryConfigResponse
    data: MemoryResponse
@router.get(
    "/memory",
    response_model=MemoryResponse,
    summary="Get Memory Data",
    description="Retrieve the current global memory data including user context, history, and facts.",
 )
 async def get_memory() -> MemoryResponse:
    """Get the current global memory data.
    Returns:
        The current memory data with user context, history, and facts.
    Example Response:
        ```json
        {
            "version": "1.0",
            "lastUpdated": "2024-01-15T10:30:00Z",
            "user": {
                "workContext": {"summary": "Working on DeerFlow project", "updatedAt": "..."},
                "personalContext": {"summary": "Prefers concise responses", "updatedAt": "..."},
                "topOfMind": {"summary": "Building memory API", "updatedAt": "..."}
            },
            "history": {
                "recentMonths": {"summary": "Recent development activities", "updatedAt": "..."},
                "earlierContext": {"summary": "", "updatedAt": ""},
                "longTermBackground": {"summary": "", "updatedAt": ""}
            },
            "facts": [
                {
                    "id": "fact_abc123",
                    "content": "User prefers TypeScript over JavaScript",
                    "category": "preference",
                    "confidence": 0.9,
                    "createdAt": "2024-01-15T10:30:00Z",
                    "source": "thread_xyz"
                }
            ]
        }
        ```
    """
    memory_data = get_memory_data()
    return MemoryResponse(**memory_data)
@router.post(
    "/memory/reload",
    response_model=MemoryResponse,
    summary="Reload Memory Data",
    description="Reload memory data from the storage file, refreshing the in-memory cache.",
 )
 async def reload_memory() -> MemoryResponse:
    """Reload memory data from file.
    This forces a reload of the memory data from the storage file,
    useful when the file has been modified externally.
    Returns:
        The reloaded memory data.
    """
    memory_data = reload_memory_data()
    return MemoryResponse(**memory_data)
@router.get(
    "/memory/config",
    response_model=MemoryConfigResponse,
    summary="Get Memory Configuration",
    description="Retrieve the current memory system configuration.",
 )
 async def get_memory_config_endpoint() -> MemoryConfigResponse:
    """Get the memory system configuration.
    Returns:
        The current memory configuration settings.
    Example Response:
        ```json
        {
            "enabled": true,
            "storage_path": ".deer-flow/memory.json",
            "debounce_seconds": 30,
            "max_facts": 100,
            "fact_confidence_threshold": 0.7,
            "injection_enabled": true,
            "max_injection_tokens": 2000
        }
        ```
    """
    config = get_memory_config()
    return MemoryConfigResponse(
        enabled=config.enabled,
        storage_path=config.storage_path,
        debounce_seconds=config.debounce_seconds,
        max_facts=config.max_facts,
        fact_confidence_threshold=config.fact_confidence_threshold,
        injection_enabled=config.injection_enabled,
        max_injection_tokens=config.max_injection_tokens,
    )
@router.get(
    "/memory/status",
    response_model=MemoryStatusResponse,
    summary="Get Memory Status",
    description="Retrieve both memory configuration and current data in a single request.",
 )
 async def get_memory_status() -> MemoryStatusResponse:
    """Get the memory system status including configuration and data.
    Returns:
        Combined memory configuration and current data.
    """
    config = get_memory_config()
    memory_data = get_memory_data()
    return MemoryStatusResponse(
        config=MemoryConfigResponse(
            enabled=config.enabled,
            storage_path=config.storage_path,
            debounce_seconds=config.debounce_seconds,
            max_facts=config.max_facts,
            fact_confidence_threshold=config.fact_confidence_threshold,
            injection_enabled=config.injection_enabled,
            max_injection_tokens=config.max_injection_tokens,
        ),
        data=MemoryResponse(**memory_data),
    )
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -278,3 +278,15 @@ summarization:
 # - Custom MCP server implementations
 #
 # For more information, see: https://modelcontextprotocol.io
 # Global memory mechanism
 # Stores user context and conversation history for personalized responses
 memory:
  enabled: true
  storage_path: .deer-flow/memory.json  # Path relative to backend directory
  debounce_seconds: 30  # Wait time before processing queued updates
  model_name: null  # Use default model
  max_facts: 100  # Maximum number of facts to store
  fact_confidence_threshold: 0.7  # Minimum confidence for storing facts
  injection_enabled: true  # Whether to inject memory into system prompt
  max_injection_tokens: 2000  # Maximum tokens for memory injection