From 9d0a0ea0221f84dc46b2bd879f6a595603a41b74 Mon Sep 17 00:00:00 2001
From: hetaoBackend <hetao7@pku.edu.cn>
Date: Wed, 28 Jan 2026 14:03:43 +0800
Subject: [PATCH] fix: preserve reasoning_content in multi-turn conversations

When using thinking-enabled models (like Kimi K2.5, DeepSeek), the API
expects reasoning_content on all assistant messages. The original
ChatDeepSeek stores reasoning_content in additional_kwargs but doesn't
include it when making subsequent API calls, causing "reasoning_content
is missing" errors.

This adds PatchedChatDeepSeek which overrides _get_request_payload to
restore reasoning_content from additional_kwargs into the payload.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/debug.py                              |  2 +-
 .../agents/middlewares/uploads_middleware.py  | 14 ++--
 backend/src/models/patched_deepseek.py        | 71 +++++++++++++++++++
 config.example.yaml                           | 13 ++++
 4 files changed, 93 insertions(+), 7 deletions(-)
 create mode 100644 backend/src/models/patched_deepseek.py

diff --git a/backend/debug.py b/backend/debug.py
index b92c0cb..d3212d1 100644
--- a/backend/debug.py
+++ b/backend/debug.py
@@ -41,7 +41,7 @@ async def main():
             "thinking_enabled": True,
             "is_plan_mode": True,
             # Uncomment to use a specific model
-            "model_name": "deepseek-v3.2",
+            "model_name": "kimi-k2.5",
         }
     }
 
diff --git a/backend/src/agents/middlewares/uploads_middleware.py b/backend/src/agents/middlewares/uploads_middleware.py
index 01105ef..823ea92 100644
--- a/backend/src/agents/middlewares/uploads_middleware.py
+++ b/backend/src/agents/middlewares/uploads_middleware.py
@@ -6,7 +6,7 @@ from typing import NotRequired, override
 
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langchain_core.messages import SystemMessage
+from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime
 
 from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
@@ -124,17 +124,19 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
         # List uploaded files
         files = self._list_uploaded_files(thread_id)
 
+        if not files:
+            return None
+
         # Create system message with file list
         files_message = self._create_files_message(files)
-        system_message = SystemMessage(content=files_message)
+        files_human_message = HumanMessage(content=files_message)
 
         # Inject the message into the message history
-        # This will be added after the system prompt but before user messages
+        # This will be added before user messages
         messages = list(state.get("messages", []))
 
-        # Insert after the first system message (the main prompt)
-        insert_index = 1 if messages and hasattr(messages[0], "type") and messages[0].type == "system" else 0
-        messages.insert(insert_index, system_message)
+        insert_index = 0
+        messages.insert(insert_index, files_human_message)
 
         return {
             "uploaded_files": files,
diff --git a/backend/src/models/patched_deepseek.py b/backend/src/models/patched_deepseek.py
new file mode 100644
index 0000000..d2eed36
--- /dev/null
+++ b/backend/src/models/patched_deepseek.py
@@ -0,0 +1,71 @@
+"""Patched ChatDeepSeek that preserves reasoning_content in multi-turn conversations.
+
+This module provides a patched version of ChatDeepSeek that properly handles
+reasoning_content when sending messages back to the API. The original implementation
+stores reasoning_content in additional_kwargs but doesn't include it when making
+subsequent API calls, which causes errors with APIs that require reasoning_content
+on all assistant messages when thinking mode is enabled.
+"""
+
+from typing import Any
+
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.messages import AIMessage
+from langchain_deepseek import ChatDeepSeek
+
+
+class PatchedChatDeepSeek(ChatDeepSeek):
+    """ChatDeepSeek with proper reasoning_content preservation.
+
+    When using thinking/reasoning enabled models, the API expects reasoning_content
+    to be present on ALL assistant messages in multi-turn conversations. This patched
+    version ensures reasoning_content from additional_kwargs is included in the
+    request payload.
+    """
+
+    def _get_request_payload(
+        self,
+        input_: LanguageModelInput,
+        *,
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> dict:
+        """Get request payload with reasoning_content preserved.
+
+        Overrides the parent method to inject reasoning_content from
+        additional_kwargs into assistant messages in the payload.
+        """
+        # Get the original messages before conversion
+        original_messages = self._convert_input(input_).to_messages()
+
+        # Call parent to get the base payload
+        payload = super()._get_request_payload(input_, stop=stop, **kwargs)
+
+        # Match payload messages with original messages to restore reasoning_content
+        payload_messages = payload.get("messages", [])
+
+        # The payload messages and original messages should be in the same order
+        # Iterate through both and match by position
+        if len(payload_messages) == len(original_messages):
+            for payload_msg, orig_msg in zip(payload_messages, original_messages):
+                if (
+                    payload_msg.get("role") == "assistant"
+                    and isinstance(orig_msg, AIMessage)
+                ):
+                    reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
+                    if reasoning_content is not None:
+                        payload_msg["reasoning_content"] = reasoning_content
+        else:
+            # Fallback: match by counting assistant messages
+            ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
+            assistant_payloads = [
+                (i, m) for i, m in enumerate(payload_messages)
+                if m.get("role") == "assistant"
+            ]
+
+            for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
+                reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")
+                if reasoning_content is not None:
+                    payload_messages[idx]["reasoning_content"] = reasoning_content
+
+        return payload
diff --git a/config.example.yaml b/config.example.yaml
index b41dbc1..3891777 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -56,6 +56,19 @@ models:
   #       thinking:
   #         type: enabled
 
+  # Example: Kimi K2.5 model
+  # - name: kimi-k2.5
+  #   display_name: Kimi K2.5
+  #   use: src.models.patched_deepseek:PatchedChatDeepSeek
+  #   model: kimi-k2.5
+  #   api_base: https://api.moonshot.cn/v1
+  #   api_key: $MOONSHOT_API_KEY
+  #   max_tokens: 32768
+  #   supports_thinking: true
+  #   extra_body:
+  #     thinking:
+  #       type: enabled
+
 # ============================================================================
 # Tool Groups Configuration
 # ============================================================================