fix: preserve reasoning_content in multi-turn conversations

When using thinking-enabled models (like Kimi K2.5, DeepSeek), the API expects reasoning_content on all assistant messages. The original ChatDeepSeek stores reasoning_content in additional_kwargs but doesn't include it when making subsequent API calls, causing "reasoning_content is missing" errors. This adds PatchedChatDeepSeek which overrides _get_request_payload to restore reasoning_content from additional_kwargs into the payload. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-02 22:02:13 +08:00 · 2026-01-28 14:03:43 +08:00
parent d84a34b7cd
commit fa9fba3f8e
4 changed files with 93 additions and 7 deletions
--- a/backend/debug.py
+++ b/backend/debug.py
@@ -41,7 +41,7 @@ async def main():
            "thinking_enabled": True,
            "is_plan_mode": True,
            # Uncomment to use a specific model
-            "model_name": "deepseek-v3.2",
+            "model_name": "kimi-k2.5",
        }
    }

--- a/backend/src/agents/middlewares/uploads_middleware.py
+++ b/backend/src/agents/middlewares/uploads_middleware.py
@@ -6,7 +6,7 @@ from typing import NotRequired, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langchain_core.messages import SystemMessage
+from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime

 from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
@@ -124,17 +124,19 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        # List uploaded files
        files = self._list_uploaded_files(thread_id)

+        if not files:
+            return None
+
        # Create system message with file list
        files_message = self._create_files_message(files)
-        system_message = SystemMessage(content=files_message)
+        files_human_message = HumanMessage(content=files_message)

        # Inject the message into the message history
-        # This will be added after the system prompt but before user messages
+        # This will be added before user messages
        messages = list(state.get("messages", []))

-        # Insert after the first system message (the main prompt)
-        insert_index = 1 if messages and hasattr(messages[0], "type") and messages[0].type == "system" else 0
-        messages.insert(insert_index, system_message)
+        insert_index = 0
+        messages.insert(insert_index, files_human_message)

        return {
            "uploaded_files": files,
--- a/backend/src/models/patched_deepseek.py
+++ b/backend/src/models/patched_deepseek.py
@@ -0,0 +1,71 @@
+"""Patched ChatDeepSeek that preserves reasoning_content in multi-turn conversations.
+
+This module provides a patched version of ChatDeepSeek that properly handles
+reasoning_content when sending messages back to the API. The original implementation
+stores reasoning_content in additional_kwargs but doesn't include it when making
+subsequent API calls, which causes errors with APIs that require reasoning_content
+on all assistant messages when thinking mode is enabled.
+"""
+
+from typing import Any
+
+from langchain_core.language_models import LanguageModelInput
+from langchain_core.messages import AIMessage
+from langchain_deepseek import ChatDeepSeek
+
+
+class PatchedChatDeepSeek(ChatDeepSeek):
+    """ChatDeepSeek with proper reasoning_content preservation.
+
+    When using thinking/reasoning enabled models, the API expects reasoning_content
+    to be present on ALL assistant messages in multi-turn conversations. This patched
+    version ensures reasoning_content from additional_kwargs is included in the
+    request payload.
+    """
+
+    def _get_request_payload(
+        self,
+        input_: LanguageModelInput,
+        *,
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> dict:
+        """Get request payload with reasoning_content preserved.
+
+        Overrides the parent method to inject reasoning_content from
+        additional_kwargs into assistant messages in the payload.
+        """
+        # Get the original messages before conversion
+        original_messages = self._convert_input(input_).to_messages()
+
+        # Call parent to get the base payload
+        payload = super()._get_request_payload(input_, stop=stop, **kwargs)
+
+        # Match payload messages with original messages to restore reasoning_content
+        payload_messages = payload.get("messages", [])
+
+        # The payload messages and original messages should be in the same order
+        # Iterate through both and match by position
+        if len(payload_messages) == len(original_messages):
+            for payload_msg, orig_msg in zip(payload_messages, original_messages):
+                if (
+                    payload_msg.get("role") == "assistant"
+                    and isinstance(orig_msg, AIMessage)
+                ):
+                    reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
+                    if reasoning_content is not None:
+                        payload_msg["reasoning_content"] = reasoning_content
+        else:
+            # Fallback: match by counting assistant messages
+            ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
+            assistant_payloads = [
+                (i, m) for i, m in enumerate(payload_messages)
+                if m.get("role") == "assistant"
+            ]
+
+            for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
+                reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")
+                if reasoning_content is not None:
+                    payload_messages[idx]["reasoning_content"] = reasoning_content
+
+        return payload
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -56,6 +56,19 @@ models:
  #       thinking:
  #         type: enabled

+  # Example: Kimi K2.5 model
+  # - name: kimi-k2.5
+  #   display_name: Kimi K2.5
+  #   use: src.models.patched_deepseek:PatchedChatDeepSeek
+  #   model: kimi-k2.5
+  #   api_base: https://api.moonshot.cn/v1
+  #   api_key: $MOONSHOT_API_KEY
+  #   max_tokens: 32768
+  #   supports_thinking: true
+  #   extra_body:
+  #     thinking:
+  #       type: enabled
+
 # ============================================================================
 # Tool Groups Configuration
 # ============================================================================