feat: add memory API and optimize memory middleware

- Add memory API endpoints for retrieving memory data:
  - GET /api/memory - get current memory data
  - POST /api/memory/reload - reload from file
  - GET /api/memory/config - get memory configuration
  - GET /api/memory/status - get config and data together
- Optimize MemoryMiddleware to only use user inputs and final
  assistant responses, filtering out intermediate tool calls
- Add memory configuration example to config.example.yaml

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hetaoBackend
2026-02-03 13:41:04 +08:00
parent 0ea666e0cf
commit 3b30913e10
4 changed files with 267 additions and 8 deletions

View File

@@ -1,6 +1,6 @@
"""Middleware for memory mechanism."""
from typing import override
from typing import Any, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
@@ -16,13 +16,48 @@ class MemoryMiddlewareState(AgentState):
pass
def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
"""Filter messages to keep only user inputs and final assistant responses.
This filters out:
- Tool messages (intermediate tool call results)
- AI messages with tool_calls (intermediate steps, not final responses)
Only keeps:
- Human messages (user input)
- AI messages without tool_calls (final assistant responses)
Args:
messages: List of all conversation messages.
Returns:
Filtered list containing only user inputs and final assistant responses.
"""
filtered = []
for msg in messages:
msg_type = getattr(msg, "type", None)
if msg_type == "human":
# Always keep user messages
filtered.append(msg)
elif msg_type == "ai":
# Only keep AI messages that are final responses (no tool_calls)
tool_calls = getattr(msg, "tool_calls", None)
if not tool_calls:
filtered.append(msg)
# Skip tool messages and AI messages with tool_calls
return filtered
class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
"""Middleware that queues conversation for memory update after agent execution.
This middleware:
1. After each agent execution, queues the conversation for memory update
2. The queue uses debouncing to batch multiple updates together
3. Memory is updated asynchronously via LLM summarization
2. Only includes user inputs and final assistant responses (ignores tool calls)
3. The queue uses debouncing to batch multiple updates together
4. Memory is updated asynchronously via LLM summarization
"""
state_schema = MemoryMiddlewareState
@@ -54,16 +89,19 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
print("MemoryMiddleware: No messages in state, skipping memory update")
return None
# Filter to only keep user inputs and final assistant responses
filtered_messages = _filter_messages_for_memory(messages)
# Only queue if there's meaningful conversation
# At minimum need one user message and one assistant response
user_messages = [m for m in messages if getattr(m, "type", None) == "human"]
assistant_messages = [m for m in messages if getattr(m, "type", None) == "ai"]
user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]
if not user_messages or not assistant_messages:
return None
# Queue the conversation for memory update
# Queue the filtered conversation for memory update
queue = get_memory_queue()
queue.add(thread_id=thread_id, messages=list(messages))
queue.add(thread_id=thread_id, messages=filtered_messages)
return None