backend/src/agents/middlewares/memory_middleware.py

"""Middleware for memory mechanism."""

from typing import Any, override

from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langgraph.runtime import Runtime

from src.agents.memory.queue import get_memory_queue
from src.config.memory_config import get_memory_config


class MemoryMiddlewareState(AgentState):
    """Compatible with the `ThreadState` schema."""

    pass


def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
    """Filter messages to keep only user inputs and final assistant responses.

    This filters out:
    - Tool messages (intermediate tool call results)
    - AI messages with tool_calls (intermediate steps, not final responses)

    Only keeps:
    - Human messages (user input)
    - AI messages without tool_calls (final assistant responses)

    Args:
        messages: List of all conversation messages.

    Returns:
        Filtered list containing only user inputs and final assistant responses.
    """
    filtered = []
    for msg in messages:
        msg_type = getattr(msg, "type", None)

        if msg_type == "human":
            # Always keep user messages
            filtered.append(msg)
        elif msg_type == "ai":
            # Only keep AI messages that are final responses (no tool_calls)
            tool_calls = getattr(msg, "tool_calls", None)
            if not tool_calls:
                filtered.append(msg)
        # Skip tool messages and AI messages with tool_calls

    return filtered


class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
    """Middleware that queues conversation for memory update after agent execution.

    This middleware:
    1. After each agent execution, queues the conversation for memory update
    2. Only includes user inputs and final assistant responses (ignores tool calls)
    3. The queue uses debouncing to batch multiple updates together
    4. Memory is updated asynchronously via LLM summarization
    """

    state_schema = MemoryMiddlewareState

    @override
    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict | None:
        """Queue conversation for memory update after agent completes.

        Args:
            state: The current agent state.
            runtime: The runtime context.

        Returns:
            None (no state changes needed from this middleware).
        """
        config = get_memory_config()
        if not config.enabled:
            return None

        # Get thread ID from runtime context
        thread_id = runtime.context.get("thread_id")
        if not thread_id:
            print("MemoryMiddleware: No thread_id in context, skipping memory update")
            return None

        # Get messages from state
        messages = state.get("messages", [])
        if not messages:
            print("MemoryMiddleware: No messages in state, skipping memory update")
            return None

        # Filter to only keep user inputs and final assistant responses
        filtered_messages = _filter_messages_for_memory(messages)

        # Only queue if there's meaningful conversation
        # At minimum need one user message and one assistant response
        user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]
        assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]

        if not user_messages or not assistant_messages:
            return None

        # Queue the filtered conversation for memory update
        queue = get_memory_queue()
        queue.add(thread_id=thread_id, messages=filtered_messages)

        return None
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00			`"""Middleware for memory mechanism."""`

feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`from typing import Any, override`
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00
			`from langchain.agents import AgentState`
			`from langchain.agents.middleware import AgentMiddleware`
			`from langgraph.runtime import Runtime`

			`from src.agents.memory.queue import get_memory_queue`
			`from src.config.memory_config import get_memory_config`


			`class MemoryMiddlewareState(AgentState):`
			"""Compatible with the `ThreadState` schema."""

			`pass`


feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:`
			`"""Filter messages to keep only user inputs and final assistant responses.`

			`This filters out:`
			`- Tool messages (intermediate tool call results)`
			`- AI messages with tool_calls (intermediate steps, not final responses)`

			`Only keeps:`
			`- Human messages (user input)`
			`- AI messages without tool_calls (final assistant responses)`

			`Args:`
			`messages: List of all conversation messages.`

			`Returns:`
			`Filtered list containing only user inputs and final assistant responses.`
			`"""`
			`filtered = []`
			`for msg in messages:`
			`msg_type = getattr(msg, "type", None)`

			`if msg_type == "human":`
			`# Always keep user messages`
			`filtered.append(msg)`
			`elif msg_type == "ai":`
			`# Only keep AI messages that are final responses (no tool_calls)`
			`tool_calls = getattr(msg, "tool_calls", None)`
			`if not tool_calls:`
			`filtered.append(msg)`
			`# Skip tool messages and AI messages with tool_calls`

			`return filtered`


feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00			`class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):`
			`"""Middleware that queues conversation for memory update after agent execution.`

			`This middleware:`
			`1. After each agent execution, queues the conversation for memory update`
feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`2. Only includes user inputs and final assistant responses (ignores tool calls)`
			`3. The queue uses debouncing to batch multiple updates together`
			`4. Memory is updated asynchronously via LLM summarization`
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00			`"""`

			`state_schema = MemoryMiddlewareState`

			`@override`
			`def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict \| None:`
			`"""Queue conversation for memory update after agent completes.`

			`Args:`
			`state: The current agent state.`
			`runtime: The runtime context.`

			`Returns:`
			`None (no state changes needed from this middleware).`
			`"""`
			`config = get_memory_config()`
			`if not config.enabled:`
			`return None`

			`# Get thread ID from runtime context`
			`thread_id = runtime.context.get("thread_id")`
			`if not thread_id:`
			`print("MemoryMiddleware: No thread_id in context, skipping memory update")`
			`return None`

			`# Get messages from state`
			`messages = state.get("messages", [])`
			`if not messages:`
			`print("MemoryMiddleware: No messages in state, skipping memory update")`
			`return None`

feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`# Filter to only keep user inputs and final assistant responses`
			`filtered_messages = _filter_messages_for_memory(messages)`

feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00			`# Only queue if there's meaningful conversation`
			`# At minimum need one user message and one assistant response`
feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`user_messages = [m for m in filtered_messages if getattr(m, "type", None) == "human"]`
			`assistant_messages = [m for m in filtered_messages if getattr(m, "type", None) == "ai"]`
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00
			`if not user_messages or not assistant_messages:`
			`return None`

feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`# Queue the filtered conversation for memory update`
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00			`queue = get_memory_queue()`
feat: add memory API and optimize memory middleware - Add memory API endpoints for retrieving memory data: - GET /api/memory - get current memory data - POST /api/memory/reload - reload from file - GET /api/memory/config - get memory configuration - GET /api/memory/status - get config and data together - Optimize MemoryMiddleware to only use user inputs and final assistant responses, filtering out intermediate tool calls - Add memory configuration example to config.example.yaml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:41:04 +08:00			`queue.add(thread_id=thread_id, messages=filtered_messages)`
feat: add global memory mechanism for personalized conversations Implement a memory system that stores user context and conversation history in memory.json, uses LLM to summarize conversations, and injects relevant context into system prompts for personalized responses. Key components: - MemoryConfig for configuration management - MemoryUpdateQueue with debounce for batch processing - MemoryUpdater for LLM-based memory extraction - MemoryMiddleware to queue conversations after agent execution - Memory injection into lead agent system prompt Note: Add memory section to config.yaml to enable (see config.example.yaml) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-03 13:31:05 +08:00
			`return None`