fix: preserve reasoning_content in multi-turn conversations

When using thinking-enabled models (like Kimi K2.5, DeepSeek), the API
expects reasoning_content on all assistant messages. The original
ChatDeepSeek stores reasoning_content in additional_kwargs but doesn't
include it when making subsequent API calls, causing "reasoning_content
is missing" errors.

This adds PatchedChatDeepSeek which overrides _get_request_payload to
restore reasoning_content from additional_kwargs into the payload.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
hetaoBackend
2026-01-28 14:03:43 +08:00
parent d84a34b7cd
commit fa9fba3f8e
4 changed files with 93 additions and 7 deletions

View File

@@ -41,7 +41,7 @@ async def main():
"thinking_enabled": True,
"is_plan_mode": True,
# Uncomment to use a specific model
"model_name": "deepseek-v3.2",
"model_name": "kimi-k2.5",
}
}

View File

@@ -6,7 +6,7 @@ from typing import NotRequired, override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import SystemMessage
from langchain_core.messages import HumanMessage
from langgraph.runtime import Runtime
from src.agents.middlewares.thread_data_middleware import THREAD_DATA_BASE_DIR
@@ -124,17 +124,19 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
# List uploaded files
files = self._list_uploaded_files(thread_id)
if not files:
return None
# Create system message with file list
files_message = self._create_files_message(files)
system_message = SystemMessage(content=files_message)
files_human_message = HumanMessage(content=files_message)
# Inject the message into the message history
# This will be added after the system prompt but before user messages
# This will be added before user messages
messages = list(state.get("messages", []))
# Insert after the first system message (the main prompt)
insert_index = 1 if messages and hasattr(messages[0], "type") and messages[0].type == "system" else 0
messages.insert(insert_index, system_message)
insert_index = 0
messages.insert(insert_index, files_human_message)
return {
"uploaded_files": files,

View File

@@ -0,0 +1,71 @@
"""Patched ChatDeepSeek that preserves reasoning_content in multi-turn conversations.
This module provides a patched version of ChatDeepSeek that properly handles
reasoning_content when sending messages back to the API. The original implementation
stores reasoning_content in additional_kwargs but doesn't include it when making
subsequent API calls, which causes errors with APIs that require reasoning_content
on all assistant messages when thinking mode is enabled.
"""
from typing import Any
from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import AIMessage
from langchain_deepseek import ChatDeepSeek
class PatchedChatDeepSeek(ChatDeepSeek):
"""ChatDeepSeek with proper reasoning_content preservation.
When using thinking/reasoning enabled models, the API expects reasoning_content
to be present on ALL assistant messages in multi-turn conversations. This patched
version ensures reasoning_content from additional_kwargs is included in the
request payload.
"""
def _get_request_payload(
self,
input_: LanguageModelInput,
*,
stop: list[str] | None = None,
**kwargs: Any,
) -> dict:
"""Get request payload with reasoning_content preserved.
Overrides the parent method to inject reasoning_content from
additional_kwargs into assistant messages in the payload.
"""
# Get the original messages before conversion
original_messages = self._convert_input(input_).to_messages()
# Call parent to get the base payload
payload = super()._get_request_payload(input_, stop=stop, **kwargs)
# Match payload messages with original messages to restore reasoning_content
payload_messages = payload.get("messages", [])
# The payload messages and original messages should be in the same order
# Iterate through both and match by position
if len(payload_messages) == len(original_messages):
for payload_msg, orig_msg in zip(payload_messages, original_messages):
if (
payload_msg.get("role") == "assistant"
and isinstance(orig_msg, AIMessage)
):
reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
if reasoning_content is not None:
payload_msg["reasoning_content"] = reasoning_content
else:
# Fallback: match by counting assistant messages
ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
assistant_payloads = [
(i, m) for i, m in enumerate(payload_messages)
if m.get("role") == "assistant"
]
for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")
if reasoning_content is not None:
payload_messages[idx]["reasoning_content"] = reasoning_content
return payload

View File

@@ -56,6 +56,19 @@ models:
# thinking:
# type: enabled
# Example: Kimi K2.5 model
# - name: kimi-k2.5
# display_name: Kimi K2.5
# use: src.models.patched_deepseek:PatchedChatDeepSeek
# model: kimi-k2.5
# api_base: https://api.moonshot.cn/v1
# api_key: $MOONSHOT_API_KEY
# max_tokens: 32768
# supports_thinking: true
# extra_body:
# thinking:
# type: enabled
# ============================================================================
# Tool Groups Configuration
# ============================================================================