From f5bd691172ecd07dfe70af30fca5a123492a679c Mon Sep 17 00:00:00 2001
From: JeffJiang <for-eleven@hotmail.com>
Date: Tue, 10 Mar 2026 11:24:53 +0800
Subject: [PATCH] feat(middleware): introduce TodoMiddleware for context-loss
 detection in todo management (#1041)

* feat(middleware): introduce TodoMiddleware for context-loss detection in todo management

* Address PR #1041 review suggestions: todo reminder dedup, thread switching, artifact deselect, debug log (#8)

* Initial plan

* Handle all suggestions from PR #1041 review

Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>

* fix(chat-box): prevent automatic deselection of artifacts when switching threads
fix(hooks): reset thread state on new thread creation

---------

Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
Co-authored-by: foreleven <4785594+foreleven@users.noreply.github.com>
---
 backend/src/agents/lead_agent/agent.py        |   9 +-
 .../src/agents/middlewares/todo_middleware.py | 100 ++++++++++++++++++
 backend/src/gateway/routers/suggestions.py    |   2 +-
 backend/src/models/factory.py                 |   4 +-
 backend/src/sandbox/tools.py                  |   1 -
 .../components/workspace/chats/chat-box.tsx   |  14 +--
 frontend/src/core/messages/utils.ts           |   4 +
 frontend/src/core/threads/hooks.ts            |  10 +-
 8 files changed, 125 insertions(+), 19 deletions(-)
 create mode 100644 backend/src/agents/middlewares/todo_middleware.py

diff --git a/backend/src/agents/lead_agent/agent.py b/backend/src/agents/lead_agent/agent.py
index e285026..29ca905 100644
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -1,7 +1,7 @@
 import logging
 
 from langchain.agents import create_agent
-from langchain.agents.middleware import SummarizationMiddleware, TodoListMiddleware
+from langchain.agents.middleware import SummarizationMiddleware
 from langchain_core.runnables import RunnableConfig
 
 from src.agents.lead_agent.prompt import apply_prompt_template
@@ -11,6 +11,7 @@ from src.agents.middlewares.memory_middleware import MemoryMiddleware
 from src.agents.middlewares.subagent_limit_middleware import SubagentLimitMiddleware
 from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
 from src.agents.middlewares.title_middleware import TitleMiddleware
+from src.agents.middlewares.todo_middleware import TodoMiddleware
 from src.agents.middlewares.uploads_middleware import UploadsMiddleware
 from src.agents.middlewares.view_image_middleware import ViewImageMiddleware
 from src.agents.thread_state import ThreadState
@@ -80,14 +81,14 @@ def _create_summarization_middleware() -> SummarizationMiddleware | None:
     return SummarizationMiddleware(**kwargs)
 
 
-def _create_todo_list_middleware(is_plan_mode: bool) -> TodoListMiddleware | None:
+def _create_todo_list_middleware(is_plan_mode: bool) -> TodoMiddleware | None:
     """Create and configure the TodoList middleware.
 
     Args:
         is_plan_mode: Whether to enable plan mode with TodoList middleware.
 
     Returns:
-        TodoListMiddleware instance if plan mode is enabled, None otherwise.
+        TodoMiddleware instance if plan mode is enabled, None otherwise.
     """
     if not is_plan_mode:
         return None
@@ -192,7 +193,7 @@ Being proactive with task management demonstrates thoroughness and ensures all r
 **Remember**: If you only need a few tool calls to complete a task and it's clear what to do, it's better to just do the task directly and NOT use this tool at all.
 """
 
-    return TodoListMiddleware(system_prompt=system_prompt, tool_description=tool_description)
+    return TodoMiddleware(system_prompt=system_prompt, tool_description=tool_description)
 
 
 # ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
diff --git a/backend/src/agents/middlewares/todo_middleware.py b/backend/src/agents/middlewares/todo_middleware.py
new file mode 100644
index 0000000..c35a3e1
--- /dev/null
+++ b/backend/src/agents/middlewares/todo_middleware.py
@@ -0,0 +1,100 @@
+"""Middleware that extends TodoListMiddleware with context-loss detection.
+
+When the message history is truncated (e.g., by SummarizationMiddleware), the
+original `write_todos` tool call and its ToolMessage can be scrolled out of the
+active context window. This middleware detects that situation and injects a
+reminder message so the model still knows about the outstanding todo list.
+"""
+
+from __future__ import annotations
+
+from typing import Any, override
+
+from langchain.agents.middleware import TodoListMiddleware
+from langchain.agents.middleware.todo import PlanningState, Todo
+from langchain_core.messages import AIMessage, HumanMessage
+from langgraph.runtime import Runtime
+
+
+def _todos_in_messages(messages: list[Any]) -> bool:
+    """Return True if any AIMessage in *messages* contains a write_todos tool call."""
+    for msg in messages:
+        if isinstance(msg, AIMessage) and msg.tool_calls:
+            for tc in msg.tool_calls:
+                if tc.get("name") == "write_todos":
+                    return True
+    return False
+
+
+def _reminder_in_messages(messages: list[Any]) -> bool:
+    """Return True if a todo_reminder HumanMessage is already present in *messages*."""
+    for msg in messages:
+        if isinstance(msg, HumanMessage) and getattr(msg, "name", None) == "todo_reminder":
+            return True
+    return False
+
+
+def _format_todos(todos: list[Todo]) -> str:
+    """Format a list of Todo items into a human-readable string."""
+    lines: list[str] = []
+    for todo in todos:
+        status = todo.get("status", "pending")
+        content = todo.get("content", "")
+        lines.append(f"- [{status}] {content}")
+    return "\n".join(lines)
+
+
+class TodoMiddleware(TodoListMiddleware):
+    """Extends TodoListMiddleware with `write_todos` context-loss detection.
+
+    When the original `write_todos` tool call has been truncated from the message
+    history (e.g., after summarization), the model loses awareness of the current
+    todo list. This middleware detects that gap in `before_model` / `abefore_model`
+    and injects a reminder message so the model can continue tracking progress.
+    """
+
+    @override
+    def before_model(
+        self,
+        state: PlanningState,
+        runtime: Runtime,  # noqa: ARG002
+    ) -> dict[str, Any] | None:
+        """Inject a todo-list reminder when write_todos has left the context window."""
+        todos: list[Todo] = state.get("todos") or []  # type: ignore[assignment]
+        if not todos:
+            return None
+
+        messages = state.get("messages") or []
+        if _todos_in_messages(messages):
+            # write_todos is still visible in context — nothing to do.
+            return None
+
+        if _reminder_in_messages(messages):
+            # A reminder was already injected and hasn't been truncated yet.
+            return None
+
+        # The todo list exists in state but the original write_todos call is gone.
+        # Inject a reminder as a HumanMessage so the model stays aware.
+        formatted = _format_todos(todos)
+        reminder = HumanMessage(
+            name="todo_reminder",
+            content=(
+                "<system_reminder>\n"
+                "Your todo list from earlier is no longer visible in the current context window, "
+                "but it is still active. Here is the current state:\n\n"
+                f"{formatted}\n\n"
+                "Continue tracking and updating this todo list as you work. "
+                "Call `write_todos` whenever the status of any item changes.\n"
+                "</system_reminder>"
+            ),
+        )
+        return {"messages": [reminder]}
+
+    @override
+    async def abefore_model(
+        self,
+        state: PlanningState,
+        runtime: Runtime,
+    ) -> dict[str, Any] | None:
+        """Async version of before_model."""
+        return self.before_model(state, runtime)
diff --git a/backend/src/gateway/routers/suggestions.py b/backend/src/gateway/routers/suggestions.py
index 031f3bc..9a4e3a6 100644
--- a/backend/src/gateway/routers/suggestions.py
+++ b/backend/src/gateway/routers/suggestions.py
@@ -99,7 +99,7 @@ async def generate_suggestions(thread_id: str, request: SuggestionsRequest) -> S
         "- Output MUST be a JSON array of strings only.\n\n"
         "Conversation:\n"
         f"{conversation}\n"
-    ).format(n=n, conversation=conversation)
+    )
 
     try:
         model = create_chat_model(name=request.model_name, thinking_enabled=False)
diff --git a/backend/src/models/factory.py b/backend/src/models/factory.py
index 769afa2..80da587 100644
--- a/backend/src/models/factory.py
+++ b/backend/src/models/factory.py
@@ -58,8 +58,8 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *
         elif effective_wte.get("thinking", {}).get("type"):
             # Native langchain_anthropic: thinking is a direct constructor parameter
             kwargs.update({"thinking": {"type": "disabled"}})
-    if not model_config.supports_reasoning_effort:
-        kwargs.update({"reasoning_effort": None})
+    if not model_config.supports_reasoning_effort and "reasoning_effort" in kwargs:
+        del kwargs["reasoning_effort"]
 
     model_instance = model_class(**kwargs, **model_settings_from_config)
 
diff --git a/backend/src/sandbox/tools.py b/backend/src/sandbox/tools.py
index a6150c5..66102c1 100644
--- a/backend/src/sandbox/tools.py
+++ b/backend/src/sandbox/tools.py
@@ -178,7 +178,6 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
         raise SandboxRuntimeError("Thread ID not available in runtime context")
 
     provider = get_sandbox_provider()
-    print(f"Lazy acquiring sandbox for thread {thread_id}")
     sandbox_id = provider.acquire(thread_id)
 
     # Update runtime state - this persists across tool calls
diff --git a/frontend/src/components/workspace/chats/chat-box.tsx b/frontend/src/components/workspace/chats/chat-box.tsx
index f77a3c1..d41f756 100644
--- a/frontend/src/components/workspace/chats/chat-box.tsx
+++ b/frontend/src/components/workspace/chats/chat-box.tsx
@@ -50,13 +50,13 @@ const ChatBox: React.FC<{ children: React.ReactNode; threadId: string }> = ({
     // Update artifacts from the current thread
     setArtifacts(thread.values.artifacts);
 
-    // Deselect if the currently selected artifact no longer exists
-    if (
-      selectedArtifact &&
-      !thread.values.artifacts?.includes(selectedArtifact)
-    ) {
-      deselect();
-    }
+    // DO NOT automatically deselect the artifact when switching threads, because the artifacts auto discovering is not work now.
+    // if (
+    //   selectedArtifact &&
+    //   !thread.values.artifacts?.includes(selectedArtifact)
+    // ) {
+    //   deselect();
+    // }
 
     if (
       env.NEXT_PUBLIC_STATIC_WEBSITE_ONLY === "true" &&
diff --git a/frontend/src/core/messages/utils.ts b/frontend/src/core/messages/utils.ts
index 3498ac8..3e341cc 100644
--- a/frontend/src/core/messages/utils.ts
+++ b/frontend/src/core/messages/utils.ts
@@ -52,6 +52,10 @@ export function groupMessages<T>(
   }
 
   for (const message of messages) {
+    if (message.name === "todo_reminder") {
+      continue;
+    }
+
     if (message.type === "human") {
       groups.push({ id: message.id, type: "human", messages: [message] });
       continue;
diff --git a/frontend/src/core/threads/hooks.ts b/frontend/src/core/threads/hooks.ts
index 3cab73e..42885ad 100644
--- a/frontend/src/core/threads/hooks.ts
+++ b/frontend/src/core/threads/hooks.ts
@@ -60,11 +60,12 @@ export function useThreadStream({
 
   useEffect(() => {
     const normalizedThreadId = threadId ?? null;
-    if (threadIdRef.current !== normalizedThreadId) {
-      threadIdRef.current = normalizedThreadId;
-      startedRef.current = false; // Reset for new thread
+    if (!normalizedThreadId) {
+      // Just reset for new thread creation when threadId becomes null/undefined
+      startedRef.current = false;
       setOnStreamThreadId(normalizedThreadId);
     }
+    threadIdRef.current = normalizedThreadId;
   }, [threadId]);
 
   const _handleOnStart = useCallback((id: string) => {
@@ -77,7 +78,6 @@ export function useThreadStream({
   const handleStreamStart = useCallback(
     (_threadId: string) => {
       threadIdRef.current = _threadId;
-      setOnStreamThreadId(_threadId);
       _handleOnStart(_threadId);
     },
     [_handleOnStart],
@@ -85,6 +85,7 @@ export function useThreadStream({
 
   const queryClient = useQueryClient();
   const updateSubtask = useUpdateSubtask();
+
   const thread = useStream<AgentThreadState>({
     client: getAPIClient(isMock),
     assistantId: "lead_agent",
@@ -93,6 +94,7 @@ export function useThreadStream({
     fetchStateHistory: { limit: 1 },
     onCreated(meta) {
       handleStreamStart(meta.thread_id);
+      setOnStreamThreadId(meta.thread_id);
     },
     onLangChainEvent(event) {
       if (event.event === "on_tool_end") {