feat: add memory API and optimize memory middleware

- Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-18 12:04:45 +08:00 · 2026-02-03 13:41:04 +08:00
parent 0ea666e0cf
commit 3b30913e10
4 changed files with 267 additions and 8 deletions
--- a/backend/src/agents/middlewares/memory_middleware.py
+++ b/backend/src/agents/middlewares/memory_middleware.py
@@ -1,6 +1,6 @@
 """Middleware for memory mechanism."""

-from typing import override
+from typing import Any, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
@@ -16,13 +16,48 @@ class MemoryMiddlewareState(AgentState):
    pass


+def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
+    """Filter messages to keep only user inputs and final assistant responses.
+
+    This filters out:
+    - Tool messages (intermediate tool call results)
+    - AI messages with tool_calls (intermediate steps, not final responses)
+
+    Only keeps:
+    - Human messages (user input)
+    - AI messages without tool_calls (final assistant responses)
+
+    Args:
+        messages: List of all conversation messages.
+
+    Returns:
+        Filtered list containing only user inputs and final assistant responses.
+    """
+    filtered = []
+    for msg in messages:
+        msg_type = getattr(msg, "type", None)
+
+        if msg_type == "human":
+            # Always keep user messages
+            filtered.append(msg)
+        elif msg_type == "ai":
+            # Only keep AI messages that are final responses (no tool_calls)
+            tool_calls = getattr(msg, "tool_calls", None)
+            if not tool_calls:
+                filtered.append(msg)
+        # Skip tool messages and AI messages with tool_calls
+
+    return filtered
+
+
 class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
    """Middleware that queues conversation for memory update after agent execution.

    This middleware:
    1. After each agent execution, queues the conversation for memory update
-    2. The queue uses debouncing to batch multiple updates together
-    3. Memory is updated asynchronously via LLM summarization
+    2. Only includes user inputs and final assistant responses (ignores tool calls)
+    3. The queue uses debouncing to batch multiple updates together
+    4. Memory is updated asynchronously via LLM summarization
    """

    state_schema = MemoryMiddlewareState
@@ -54,16 +89,19 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
            print("MemoryMiddleware: No messages in state, skipping memory update")
            return None

+        # Filter to only keep user inputs and final assistant responses
+        filtered_messages = _filter_messages_for_memory(messages)
+
        # Only queue if there's meaningful conversation
        # At minimum need one user message and one assistant response
-        user_messages = [m for m in messages if getattr(m, "type", None) == "human"]
-        assistant_messages = [m for m in messages if getattr(m, "type", None) == "ai"]
+        user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
+        assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]

        if not user_messages or not assistant_messages:
            return None

-        # Queue the conversation for memory update
+        # Queue the filtered conversation for memory update
        queue = get_memory_queue()
-        queue.add(thread_id=thread_id, messages=list(messages))
+        queue.add(thread_id=thread_id, messages=filtered_messages)

        return None