From ebe6f418f38180b81b681fb0d008149ff5722923 Mon Sep 17 00:00:00 2001 From: shaw Date: Mon, 9 Mar 2026 11:42:35 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20gpt->claude=E6=A0=BC=E5=BC=8F=E8=BD=AC?= =?UTF-8?q?=E6=8D=A2=E5=AF=B9=E9=BD=90effort=E6=98=A0=E5=B0=84=E5=92=8Cfas?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pkg/apicompat/anthropic_responses_test.go | 99 ++++++++++++++++++- .../pkg/apicompat/anthropic_to_responses.go | 39 +++++--- backend/internal/pkg/apicompat/types.go | 29 +++--- backend/internal/service/gateway_beta_test.go | 26 +++++ backend/internal/service/gateway_service.go | 13 +++ .../service/openai_gateway_messages.go | 24 +++++ 6 files changed, 202 insertions(+), 28 deletions(-) diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go index c4677aba..1c1d39bb 100644 --- a/backend/internal/pkg/apicompat/anthropic_responses_test.go +++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go @@ -631,7 +631,8 @@ func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) { resp, err := AnthropicToResponses(req) require.NoError(t, err) require.NotNil(t, resp.Reasoning) - assert.Equal(t, "high", resp.Reasoning.Effort) + // thinking.type is ignored for effort; default xhigh applies. + assert.Equal(t, "xhigh", resp.Reasoning.Effort) assert.Equal(t, "auto", resp.Reasoning.Summary) assert.Contains(t, resp.Include, "reasoning.encrypted_content") assert.NotContains(t, resp.Include, "reasoning.summary") @@ -648,7 +649,8 @@ func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) { resp, err := AnthropicToResponses(req) require.NoError(t, err) require.NotNil(t, resp.Reasoning) - assert.Equal(t, "medium", resp.Reasoning.Effort) + // thinking.type is ignored for effort; default xhigh applies. + assert.Equal(t, "xhigh", resp.Reasoning.Effort) assert.Equal(t, "auto", resp.Reasoning.Summary) assert.NotContains(t, resp.Include, "reasoning.summary") } @@ -663,8 +665,9 @@ func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) { resp, err := AnthropicToResponses(req) require.NoError(t, err) - assert.Nil(t, resp.Reasoning) - assert.NotContains(t, resp.Include, "reasoning.summary") + // Default effort applies (high → xhigh) even when thinking is disabled. + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "xhigh", resp.Reasoning.Effort) } func TestAnthropicToResponses_NoThinking(t *testing.T) { @@ -676,7 +679,93 @@ func TestAnthropicToResponses_NoThinking(t *testing.T) { resp, err := AnthropicToResponses(req) require.NoError(t, err) - assert.Nil(t, resp.Reasoning) + // Default effort applies (high → xhigh) when no thinking/output_config is set. + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "xhigh", resp.Reasoning.Effort) +} + +// --------------------------------------------------------------------------- +// output_config.effort override tests +// --------------------------------------------------------------------------- + +func TestAnthropicToResponses_OutputConfigOverridesDefault(t *testing.T) { + // Default is xhigh, but output_config.effort="low" overrides. low→low after mapping. + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000}, + OutputConfig: &AnthropicOutputConfig{Effort: "low"}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "low", resp.Reasoning.Effort) + assert.Equal(t, "auto", resp.Reasoning.Summary) +} + +func TestAnthropicToResponses_OutputConfigWithoutThinking(t *testing.T) { + // No thinking field, but output_config.effort="medium" → creates reasoning. + // medium→high after mapping. + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + OutputConfig: &AnthropicOutputConfig{Effort: "medium"}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "high", resp.Reasoning.Effort) + assert.Equal(t, "auto", resp.Reasoning.Summary) +} + +func TestAnthropicToResponses_OutputConfigHigh(t *testing.T) { + // output_config.effort="high" → mapped to "xhigh". + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + OutputConfig: &AnthropicOutputConfig{Effort: "high"}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "xhigh", resp.Reasoning.Effort) + assert.Equal(t, "auto", resp.Reasoning.Summary) +} + +func TestAnthropicToResponses_NoOutputConfig(t *testing.T) { + // No output_config → default xhigh regardless of thinking.type. + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "xhigh", resp.Reasoning.Effort) +} + +func TestAnthropicToResponses_OutputConfigWithoutEffort(t *testing.T) { + // output_config present but effort empty (e.g. only format set) → default xhigh. + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + OutputConfig: &AnthropicOutputConfig{}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "xhigh", resp.Reasoning.Effort) } // --------------------------------------------------------------------------- diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go index 09a6f227..592bec39 100644 --- a/backend/internal/pkg/apicompat/anthropic_to_responses.go +++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go @@ -45,18 +45,16 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) { out.Tools = convertAnthropicToolsToResponses(req.Tools) } - // Convert thinking → reasoning. - // generate_summary="auto" causes the upstream to emit reasoning_summary_text - // streaming events; the include array only needs reasoning.encrypted_content - // (already set above) for content continuity. - if req.Thinking != nil { - switch req.Thinking.Type { - case "enabled": - out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"} - case "adaptive": - out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"} - } - // "disabled" or unknown → omit reasoning + // Determine reasoning effort: only output_config.effort controls the + // level; thinking.type is ignored. Default is xhigh when unset. + // Anthropic levels map to OpenAI: low→low, medium→high, high→xhigh. + effort := "high" // default → maps to xhigh + if req.OutputConfig != nil && req.OutputConfig.Effort != "" { + effort = req.OutputConfig.Effort + } + out.Reasoning = &ResponsesReasoning{ + Effort: mapAnthropicEffortToResponses(effort), + Summary: "auto", } // Convert tool_choice @@ -380,6 +378,23 @@ func extractAnthropicTextFromBlocks(blocks []AnthropicContentBlock) string { return strings.Join(parts, "\n\n") } +// mapAnthropicEffortToResponses converts Anthropic reasoning effort levels to +// OpenAI Responses API effort levels. +// +// low → low +// medium → high +// high → xhigh +func mapAnthropicEffortToResponses(effort string) string { + switch effort { + case "medium": + return "high" + case "high": + return "xhigh" + default: + return effort // "low" and any unknown values pass through unchanged + } +} + // convertAnthropicToolsToResponses maps Anthropic tool definitions to // Responses API tools. Server-side tools like web_search are mapped to their // OpenAI equivalents; regular tools become function tools. diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index bb9432ac..aa58b58f 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -12,17 +12,23 @@ import "encoding/json" // AnthropicRequest is the request body for POST /v1/messages. type AnthropicRequest struct { - Model string `json:"model"` - MaxTokens int `json:"max_tokens"` - System json.RawMessage `json:"system,omitempty"` // string or []AnthropicContentBlock - Messages []AnthropicMessage `json:"messages"` - Tools []AnthropicTool `json:"tools,omitempty"` - Stream bool `json:"stream,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - StopSeqs []string `json:"stop_sequences,omitempty"` - Thinking *AnthropicThinking `json:"thinking,omitempty"` - ToolChoice json.RawMessage `json:"tool_choice,omitempty"` + Model string `json:"model"` + MaxTokens int `json:"max_tokens"` + System json.RawMessage `json:"system,omitempty"` // string or []AnthropicContentBlock + Messages []AnthropicMessage `json:"messages"` + Tools []AnthropicTool `json:"tools,omitempty"` + Stream bool `json:"stream,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + StopSeqs []string `json:"stop_sequences,omitempty"` + Thinking *AnthropicThinking `json:"thinking,omitempty"` + ToolChoice json.RawMessage `json:"tool_choice,omitempty"` + OutputConfig *AnthropicOutputConfig `json:"output_config,omitempty"` +} + +// AnthropicOutputConfig controls output generation parameters. +type AnthropicOutputConfig struct { + Effort string `json:"effort,omitempty"` // "low" | "medium" | "high" } // AnthropicThinking configures extended thinking in the Anthropic API. @@ -156,6 +162,7 @@ type ResponsesRequest struct { Store *bool `json:"store,omitempty"` Reasoning *ResponsesReasoning `json:"reasoning,omitempty"` ToolChoice json.RawMessage `json:"tool_choice,omitempty"` + ServiceTier string `json:"service_tier,omitempty"` } // ResponsesReasoning configures reasoning effort in the Responses API. diff --git a/backend/internal/service/gateway_beta_test.go b/backend/internal/service/gateway_beta_test.go index 0a66a53d..a389ecf1 100644 --- a/backend/internal/service/gateway_beta_test.go +++ b/backend/internal/service/gateway_beta_test.go @@ -148,6 +148,32 @@ func TestBuildBetaTokenSet(t *testing.T) { require.Empty(t, empty) } +func TestContainsBetaToken(t *testing.T) { + tests := []struct { + name string + header string + token string + want bool + }{ + {"present in middle", "oauth-2025-04-20,fast-mode-2026-02-01,interleaved-thinking-2025-05-14", "fast-mode-2026-02-01", true}, + {"present at start", "fast-mode-2026-02-01,oauth-2025-04-20", "fast-mode-2026-02-01", true}, + {"present at end", "oauth-2025-04-20,fast-mode-2026-02-01", "fast-mode-2026-02-01", true}, + {"only token", "fast-mode-2026-02-01", "fast-mode-2026-02-01", true}, + {"not present", "oauth-2025-04-20,interleaved-thinking-2025-05-14", "fast-mode-2026-02-01", false}, + {"with spaces", "oauth-2025-04-20, fast-mode-2026-02-01 , interleaved-thinking-2025-05-14", "fast-mode-2026-02-01", true}, + {"empty header", "", "fast-mode-2026-02-01", false}, + {"empty token", "fast-mode-2026-02-01", "", false}, + {"partial match", "fast-mode-2026-02-01-extra", "fast-mode-2026-02-01", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := containsBetaToken(tt.header, tt.token) + require.Equal(t, tt.want, got) + }) + } +} + func TestStripBetaTokensWithSet_EmptyDropSet(t *testing.T) { header := "oauth-2025-04-20,interleaved-thinking-2025-05-14" got := stripBetaTokensWithSet(header, map[string]struct{}{}) diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 10437d21..29cd0a27 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -5341,6 +5341,19 @@ func droppedBetaSet(extra ...string) map[string]struct{} { return m } +// containsBetaToken checks if a comma-separated header value contains the given token. +func containsBetaToken(header, token string) bool { + if header == "" || token == "" { + return false + } + for _, p := range strings.Split(header, ",") { + if strings.TrimSpace(p) == token { + return true + } + } + return false +} + func buildBetaTokenSet(tokens []string) map[string]struct{} { m := make(map[string]struct{}, len(tokens)) for _, t := range tokens { diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go index fe97b734..58ff0680 100644 --- a/backend/internal/service/openai_gateway_messages.go +++ b/backend/internal/service/openai_gateway_messages.go @@ -12,6 +12,7 @@ import ( "time" "github.com/Wei-Shaw/sub2api/internal/pkg/apicompat" + "github.com/Wei-Shaw/sub2api/internal/pkg/claude" "github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/Wei-Shaw/sub2api/internal/util/responseheaders" "github.com/gin-gonic/gin" @@ -46,6 +47,11 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( return nil, fmt.Errorf("convert anthropic to responses: %w", err) } + // 2b. Handle BetaFastMode → service_tier: "priority" + if containsBetaToken(c.GetHeader("anthropic-beta"), claude.BetaFastMode) { + responsesReq.ServiceTier = "priority" + } + // 3. Model mapping mappedModel := account.GetMappedModel(originalModel) // 分组级降级:账号未映射时使用分组默认映射模型 @@ -94,6 +100,12 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( return nil, fmt.Errorf("build upstream request: %w", err) } + // Override session_id with a deterministic UUID derived from the sticky + // session key (buildUpstreamRequest may have set it to the raw value). + if promptCacheKey != "" { + upstreamReq.Header.Set("session_id", generateSessionUUID(promptCacheKey)) + } + // 7. Send request proxyURL := "" if account.Proxy != nil { @@ -160,6 +172,18 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( result, handleErr = s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime) } + // Propagate ServiceTier and ReasoningEffort to result for billing + if handleErr == nil && result != nil { + if responsesReq.ServiceTier != "" { + st := responsesReq.ServiceTier + result.ServiceTier = &st + } + if responsesReq.Reasoning != nil && responsesReq.Reasoning.Effort != "" { + re := responsesReq.Reasoning.Effort + result.ReasoningEffort = &re + } + } + // Extract and save Codex usage snapshot from response headers (for OAuth accounts) if handleErr == nil && account.Type == AccountTypeOAuth { if snapshot := ParseCodexRateLimitHeaders(resp.Header); snapshot != nil {