From 9d0a0ea0221f84dc46b2bd879f6a595603a41b74 Mon Sep 17 00:00:00 2001 From: hetaoBackend Date: Wed, 28 Jan 2026 14:03:43 +0800 Subject: [PATCH] fix: preserve reasoning_content in multi-turn conversations When using thinking-enabled models (like Kimi K2.5, DeepSeek), the API expects reasoning_content on all assistant messages. The original ChatDeepSeek stores reasoning_content in additional_kwargs but doesn't include it when making subsequent API calls, causing "reasoning_content is missing" errors. This adds PatchedChatDeepSeek which overrides _get_request_payload to restore reasoning_content from additional_kwargs into the payload. Co-Authored-By: Claude Opus 4.5 --- backend/debug.py | 2 +- .../agents/middlewares/uploads_middleware.py | 14 ++-- backend/src/models/patched_deepseek.py | 71 +++++++++++++++++++ config.example.yaml | 13 ++++ 4 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 backend/src/models/patched_deepseek.py diff --git a/backend/debug.py b/backend/debug.py index b92c0cb..d3212d1 100644 --- a/backend/debug.py +++ b/backend/debug.py @@ -41,7 +41,7 @@ async def main(): "thinking_enabled": True, "is_plan_mode": True, # Uncomment to use a specific model - "model_name": "deepseek-v3.2", + "model_name": "kimi-k2.5", } } diff --git a/backend/src/agents/middlewares/uploads_middleware.py b/backend/src/agents/middlewares/uploads_middleware.py index 01105ef..823ea92 100644 --- a/backend/src/agents/middlewares/uploads_middleware.py +++ b/backend/src/agents/middlewares/uploads_middleware.py @@ -6,7 +6,7 @@ from typing import NotRequired, override from langchain.agents import AgentState from langchain.agents.middleware import AgentMiddleware -from langchain_core.messages import SystemMessage +from langchain_core.messages import HumanMessage from langgraph.runtime import Runtime from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR @@ -124,17 +124,19 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]): # List uploaded files files = self._list_uploaded_files(thread_id) + if not files: + return None + # Create system message with file list files_message = self._create_files_message(files) - system_message = SystemMessage(content=files_message) + files_human_message = HumanMessage(content=files_message) # Inject the message into the message history - # This will be added after the system prompt but before user messages + # This will be added before user messages messages = list(state.get("messages", [])) - # Insert after the first system message (the main prompt) - insert_index = 1 if messages and hasattr(messages[0], "type") and messages[0].type == "system" else 0 - messages.insert(insert_index, system_message) + insert_index = 0 + messages.insert(insert_index, files_human_message) return { "uploaded_files": files, diff --git a/backend/src/models/patched_deepseek.py b/backend/src/models/patched_deepseek.py new file mode 100644 index 0000000..d2eed36 --- /dev/null +++ b/backend/src/models/patched_deepseek.py @@ -0,0 +1,71 @@ +"""Patched ChatDeepSeek that preserves reasoning_content in multi-turn conversations. + +This module provides a patched version of ChatDeepSeek that properly handles +reasoning_content when sending messages back to the API. The original implementation +stores reasoning_content in additional_kwargs but doesn't include it when making +subsequent API calls, which causes errors with APIs that require reasoning_content +on all assistant messages when thinking mode is enabled. +""" + +from typing import Any + +from langchain_core.language_models import LanguageModelInput +from langchain_core.messages import AIMessage +from langchain_deepseek import ChatDeepSeek + + +class PatchedChatDeepSeek(ChatDeepSeek): + """ChatDeepSeek with proper reasoning_content preservation. + + When using thinking/reasoning enabled models, the API expects reasoning_content + to be present on ALL assistant messages in multi-turn conversations. This patched + version ensures reasoning_content from additional_kwargs is included in the + request payload. + """ + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> dict: + """Get request payload with reasoning_content preserved. + + Overrides the parent method to inject reasoning_content from + additional_kwargs into assistant messages in the payload. + """ + # Get the original messages before conversion + original_messages = self._convert_input(input_).to_messages() + + # Call parent to get the base payload + payload = super()._get_request_payload(input_, stop=stop, **kwargs) + + # Match payload messages with original messages to restore reasoning_content + payload_messages = payload.get("messages", []) + + # The payload messages and original messages should be in the same order + # Iterate through both and match by position + if len(payload_messages) == len(original_messages): + for payload_msg, orig_msg in zip(payload_messages, original_messages): + if ( + payload_msg.get("role") == "assistant" + and isinstance(orig_msg, AIMessage) + ): + reasoning_content = orig_msg.additional_kwargs.get("reasoning_content") + if reasoning_content is not None: + payload_msg["reasoning_content"] = reasoning_content + else: + # Fallback: match by counting assistant messages + ai_messages = [m for m in original_messages if isinstance(m, AIMessage)] + assistant_payloads = [ + (i, m) for i, m in enumerate(payload_messages) + if m.get("role") == "assistant" + ] + + for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages): + reasoning_content = ai_msg.additional_kwargs.get("reasoning_content") + if reasoning_content is not None: + payload_messages[idx]["reasoning_content"] = reasoning_content + + return payload diff --git a/config.example.yaml b/config.example.yaml index b41dbc1..3891777 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -56,6 +56,19 @@ models: # thinking: # type: enabled + # Example: Kimi K2.5 model + # - name: kimi-k2.5 + # display_name: Kimi K2.5 + # use: src.models.patched_deepseek:PatchedChatDeepSeek + # model: kimi-k2.5 + # api_base: https://api.moonshot.cn/v1 + # api_key: $MOONSHOT_API_KEY + # max_tokens: 32768 + # supports_thinking: true + # extra_body: + # thinking: + # type: enabled + # ============================================================================ # Tool Groups Configuration # ============================================================================