diff --git a/backend/docs/CONFIGURATION.md b/backend/docs/CONFIGURATION.md index e4fee52..19359a3 100644 --- a/backend/docs/CONFIGURATION.md +++ b/backend/docs/CONFIGURATION.md @@ -136,6 +136,36 @@ models: type: enabled ``` +**Gemini with thinking via OpenAI-compatible gateway**: + +When routing Gemini through an OpenAI-compatible proxy (Vertex AI OpenAI compat endpoint, AI Studio, or third-party gateways) with thinking enabled, the API attaches a `thought_signature` to each tool-call object returned in the response. Every subsequent request that replays those assistant messages **must** echo those signatures back on the tool-call entries or the API returns: + +``` +HTTP 400 INVALID_ARGUMENT: function call `` in the N. content block is +missing a `thought_signature`. +``` + +Standard `langchain_openai:ChatOpenAI` silently drops `thought_signature` when serialising messages. Use `deerflow.models.patched_openai:PatchedChatOpenAI` instead — it re-injects the tool-call signatures (sourced from `AIMessage.additional_kwargs["tool_calls"]`) into every outgoing payload: + +```yaml +models: + - name: gemini-2.5-pro-thinking + display_name: Gemini 2.5 Pro (Thinking) + use: deerflow.models.patched_openai:PatchedChatOpenAI + model: google/gemini-2.5-pro-preview # model name as expected by your gateway + api_key: $GEMINI_API_KEY + base_url: https:///v1 + max_tokens: 16384 + supports_thinking: true + supports_vision: true + when_thinking_enabled: + extra_body: + thinking: + type: enabled +``` + +For Gemini accessed **without** thinking (e.g. via OpenRouter where thinking is not activated), the plain `langchain_openai:ChatOpenAI` with `supports_thinking: false` is sufficient and no patch is needed. + ### Tool Groups Organize tools into logical groups: diff --git a/backend/packages/harness/deerflow/models/patched_openai.py b/backend/packages/harness/deerflow/models/patched_openai.py new file mode 100644 index 0000000..f2fe242 --- /dev/null +++ b/backend/packages/harness/deerflow/models/patched_openai.py @@ -0,0 +1,134 @@ +"""Patched ChatOpenAI that preserves thought_signature for Gemini thinking models. + +When using Gemini with thinking enabled via an OpenAI-compatible gateway (e.g. +Vertex AI, Google AI Studio, or any proxy), the API requires that the +``thought_signature`` field on tool-call objects is echoed back verbatim in +every subsequent request. + +The OpenAI-compatible gateway stores the raw tool-call dicts (including +``thought_signature``) in ``additional_kwargs["tool_calls"]``, but standard +``langchain_openai.ChatOpenAI`` only serialises the standard fields (``id``, +``type``, ``function``) into the outgoing payload, silently dropping the +signature. That causes an HTTP 400 ``INVALID_ARGUMENT`` error: + + Unable to submit request because function call `` in the N. content + block is missing a `thought_signature`. + +This module fixes the problem by overriding ``_get_request_payload`` to +re-inject tool-call signatures back into the outgoing payload for any assistant +message that originally carried them. +""" + +from __future__ import annotations + +from typing import Any + +from langchain_core.language_models import LanguageModelInput +from langchain_core.messages import AIMessage +from langchain_openai import ChatOpenAI + + +class PatchedChatOpenAI(ChatOpenAI): + """ChatOpenAI with ``thought_signature`` preservation for Gemini thinking via OpenAI gateway. + + When using Gemini with thinking enabled via an OpenAI-compatible gateway, + the API expects ``thought_signature`` to be present on tool-call objects in + multi-turn conversations. This patched version restores those signatures + from ``AIMessage.additional_kwargs["tool_calls"]`` into the serialised + request payload before it is sent to the API. + + Usage in ``config.yaml``:: + + - name: gemini-2.5-pro-thinking + display_name: Gemini 2.5 Pro (Thinking) + use: deerflow.models.patched_openai:PatchedChatOpenAI + model: google/gemini-2.5-pro-preview + api_key: $GEMINI_API_KEY + base_url: https:///v1 + max_tokens: 16384 + supports_thinking: true + supports_vision: true + when_thinking_enabled: + extra_body: + thinking: + type: enabled + """ + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> dict: + """Get request payload with ``thought_signature`` preserved on tool-call objects. + + Overrides the parent method to re-inject ``thought_signature`` fields + on tool-call objects that were stored in + ``additional_kwargs["tool_calls"]`` by LangChain but dropped during + serialisation. + """ + # Capture the original LangChain messages *before* conversion so we can + # access fields that the serialiser might drop. + original_messages = self._convert_input(input_).to_messages() + + # Obtain the base payload from the parent implementation. + payload = super()._get_request_payload(input_, stop=stop, **kwargs) + + payload_messages = payload.get("messages", []) + + if len(payload_messages) == len(original_messages): + for payload_msg, orig_msg in zip(payload_messages, original_messages): + if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage): + _restore_tool_call_signatures(payload_msg, orig_msg) + else: + # Fallback: match assistant-role entries positionally against AIMessages. + ai_messages = [m for m in original_messages if isinstance(m, AIMessage)] + assistant_payloads = [ + (i, m) for i, m in enumerate(payload_messages) if m.get("role") == "assistant" + ] + for (_, payload_msg), ai_msg in zip(assistant_payloads, ai_messages): + _restore_tool_call_signatures(payload_msg, ai_msg) + + return payload + + +def _restore_tool_call_signatures(payload_msg: dict, orig_msg: AIMessage) -> None: + """Re-inject ``thought_signature`` onto tool-call objects in *payload_msg*. + + When the Gemini OpenAI-compatible gateway returns a response with function + calls, each tool-call object may carry a ``thought_signature``. LangChain + stores the raw tool-call dicts in ``additional_kwargs["tool_calls"]`` but + only serialises the standard fields (``id``, ``type``, ``function``) into + the outgoing payload, silently dropping the signature. + + This function matches raw tool-call entries (by ``id``, falling back to + positional order) and copies the signature back onto the serialised + payload entries. + """ + raw_tool_calls: list[dict] = orig_msg.additional_kwargs.get("tool_calls") or [] + payload_tool_calls: list[dict] = payload_msg.get("tool_calls") or [] + + if not raw_tool_calls or not payload_tool_calls: + return + + # Build an id → raw_tc lookup for efficient matching. + raw_by_id: dict[str, dict] = {} + for raw_tc in raw_tool_calls: + tc_id = raw_tc.get("id") + if tc_id: + raw_by_id[tc_id] = raw_tc + + for idx, payload_tc in enumerate(payload_tool_calls): + # Try matching by id first, then fall back to positional. + raw_tc = raw_by_id.get(payload_tc.get("id", "")) + if raw_tc is None and idx < len(raw_tool_calls): + raw_tc = raw_tool_calls[idx] + + if raw_tc is None: + continue + + # The gateway may use either snake_case or camelCase. + sig = raw_tc.get("thought_signature") or raw_tc.get("thoughtSignature") + if sig: + payload_tc["thought_signature"] = sig diff --git a/backend/tests/test_patched_openai.py b/backend/tests/test_patched_openai.py new file mode 100644 index 0000000..0659c4a --- /dev/null +++ b/backend/tests/test_patched_openai.py @@ -0,0 +1,176 @@ +"""Tests for deerflow.models.patched_openai.PatchedChatOpenAI. + +These tests verify that _restore_tool_call_signatures correctly re-injects +``thought_signature`` onto tool-call objects stored in +``additional_kwargs["tool_calls"]``, covering id-based matching, positional +fallback, camelCase keys, and several edge-cases. +""" + +from __future__ import annotations + +from langchain_core.messages import AIMessage + +from deerflow.models.patched_openai import _restore_tool_call_signatures + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +RAW_TC_SIGNED = { + "id": "call_1", + "type": "function", + "function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'}, + "thought_signature": "SIG_A==", +} + +RAW_TC_UNSIGNED = { + "id": "call_2", + "type": "function", + "function": {"name": "bash", "arguments": '{"cmd":"ls"}'}, +} + +PAYLOAD_TC_1 = { + "type": "function", + "id": "call_1", + "function": {"name": "web_fetch", "arguments": '{"url":"http://example.com"}'}, +} + +PAYLOAD_TC_2 = { + "type": "function", + "id": "call_2", + "function": {"name": "bash", "arguments": '{"cmd":"ls"}'}, +} + + +def _ai_msg_with_raw_tool_calls(raw_tool_calls: list[dict]) -> AIMessage: + return AIMessage(content="", additional_kwargs={"tool_calls": raw_tool_calls}) + + +# --------------------------------------------------------------------------- +# Core: signed tool-call restoration +# --------------------------------------------------------------------------- + + +def test_tool_call_signature_restored_by_id(): + """thought_signature is copied to the payload tool-call matched by id.""" + payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]} + orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A==" + + +def test_tool_call_signature_for_parallel_calls(): + """For parallel function calls, only the first has a signature (per Gemini spec).""" + payload_msg = { + "role": "assistant", + "content": None, + "tool_calls": [PAYLOAD_TC_1.copy(), PAYLOAD_TC_2.copy()], + } + orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED, RAW_TC_UNSIGNED]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_A==" + assert "thought_signature" not in payload_msg["tool_calls"][1] + + +def test_tool_call_signature_camel_case(): + """thoughtSignature (camelCase) from some gateways is also handled.""" + raw_camel = { + "id": "call_1", + "type": "function", + "function": {"name": "web_fetch", "arguments": "{}"}, + "thoughtSignature": "SIG_CAMEL==", + } + payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]} + orig = _ai_msg_with_raw_tool_calls([raw_camel]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert payload_msg["tool_calls"][0]["thought_signature"] == "SIG_CAMEL==" + + +def test_tool_call_signature_positional_fallback(): + """When ids don't match, falls back to positional matching.""" + raw_no_id = { + "type": "function", + "function": {"name": "web_fetch", "arguments": "{}"}, + "thought_signature": "SIG_POS==", + } + payload_tc = { + "type": "function", + "id": "call_99", + "function": {"name": "web_fetch", "arguments": "{}"}, + } + payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc]} + orig = _ai_msg_with_raw_tool_calls([raw_no_id]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert payload_tc["thought_signature"] == "SIG_POS==" + + +# --------------------------------------------------------------------------- +# Edge cases: no-op scenarios for tool-call signatures +# --------------------------------------------------------------------------- + + +def test_tool_call_no_raw_tool_calls_is_noop(): + """No change when additional_kwargs has no tool_calls.""" + payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_1.copy()]} + orig = AIMessage(content="", additional_kwargs={}) + + _restore_tool_call_signatures(payload_msg, orig) + + assert "thought_signature" not in payload_msg["tool_calls"][0] + + +def test_tool_call_no_payload_tool_calls_is_noop(): + """No change when payload has no tool_calls.""" + payload_msg = {"role": "assistant", "content": "just text"} + orig = _ai_msg_with_raw_tool_calls([RAW_TC_SIGNED]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert "tool_calls" not in payload_msg + + +def test_tool_call_unsigned_raw_entries_is_noop(): + """No signature added when raw tool-calls have no thought_signature.""" + payload_msg = {"role": "assistant", "content": None, "tool_calls": [PAYLOAD_TC_2.copy()]} + orig = _ai_msg_with_raw_tool_calls([RAW_TC_UNSIGNED]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert "thought_signature" not in payload_msg["tool_calls"][0] + + +def test_tool_call_multiple_sequential_signatures(): + """Sequential tool calls each carry their own signature.""" + raw_tc_a = { + "id": "call_a", + "type": "function", + "function": {"name": "check_flight", "arguments": "{}"}, + "thought_signature": "SIG_STEP1==", + } + raw_tc_b = { + "id": "call_b", + "type": "function", + "function": {"name": "book_taxi", "arguments": "{}"}, + "thought_signature": "SIG_STEP2==", + } + payload_tc_a = {"type": "function", "id": "call_a", "function": {"name": "check_flight", "arguments": "{}"}} + payload_tc_b = {"type": "function", "id": "call_b", "function": {"name": "book_taxi", "arguments": "{}"}} + payload_msg = {"role": "assistant", "content": None, "tool_calls": [payload_tc_a, payload_tc_b]} + orig = _ai_msg_with_raw_tool_calls([raw_tc_a, raw_tc_b]) + + _restore_tool_call_signatures(payload_msg, orig) + + assert payload_tc_a["thought_signature"] == "SIG_STEP1==" + assert payload_tc_b["thought_signature"] == "SIG_STEP2==" + + +# Integration behavior for PatchedChatOpenAI is validated indirectly via +# _restore_tool_call_signatures unit coverage above. diff --git a/config.example.yaml b/config.example.yaml index 2f28e95..5b75602 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -81,7 +81,7 @@ models: # thinking: # type: enabled - # Example: Google Gemini model + # Example: Google Gemini model (native SDK, no thinking support) # - name: gemini-2.5-pro # display_name: Gemini 2.5 Pro # use: langchain_google_genai:ChatGoogleGenerativeAI @@ -90,6 +90,25 @@ models: # max_tokens: 8192 # supports_vision: true + # Example: Gemini model via OpenAI-compatible gateway (with thinking support) + # Use PatchedChatOpenAI so that tool-call thought_signature values on tool_calls + # are preserved across multi-turn tool-call conversations — required by the + # Gemini API when thinking is enabled. See: + # https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thought-signatures + # - name: gemini-2.5-pro-thinking + # display_name: Gemini 2.5 Pro (Thinking) + # use: deerflow.models.patched_openai:PatchedChatOpenAI + # model: google/gemini-2.5-pro-preview # model name as expected by your gateway + # api_key: $GEMINI_API_KEY + # base_url: https:///v1 + # max_tokens: 16384 + # supports_thinking: true + # supports_vision: true + # when_thinking_enabled: + # extra_body: + # thinking: + # type: enabled + # Example: DeepSeek model (with thinking support) # - name: deepseek-v3 # display_name: DeepSeek V3 (Thinking)