diff --git a/backend/internal/handler/dto/types.go b/backend/internal/handler/dto/types.go index 20e83973..c52e357e 100644 --- a/backend/internal/handler/dto/types.go +++ b/backend/internal/handler/dto/types.go @@ -334,8 +334,8 @@ type UsageLog struct { Model string `json:"model"` // ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex". ServiceTier *string `json:"service_tier,omitempty"` - // ReasoningEffort is the request's reasoning effort level (OpenAI Responses API). - // nil means not provided / not applicable. + // ReasoningEffort is the request's reasoning effort level. + // OpenAI: "low"/"medium"/"high"/"xhigh"; Claude: "low"/"medium"/"high"/"max". ReasoningEffort *string `json:"reasoning_effort,omitempty"` // InboundEndpoint is the client-facing API endpoint path, e.g. /v1/chat/completions. InboundEndpoint *string `json:"inbound_endpoint,omitempty"` diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index a0eb42f6..09652ada 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -443,6 +443,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) { clientIP := ip.GetClientIP(c) requestPayloadHash := service.HashUsageRequestPayload(body) + if result.ReasoningEffort == nil { + result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort) + } + // 使用量记录通过有界 worker 池提交,避免请求热路径创建无界 goroutine。 h.submitUsageRecordTask(func(ctx context.Context) { if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{ @@ -754,6 +758,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) { clientIP := ip.GetClientIP(c) requestPayloadHash := service.HashUsageRequestPayload(body) + if result.ReasoningEffort == nil { + result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort) + } + // 使用量记录通过有界 worker 池提交,避免请求热路径创建无界 goroutine。 h.submitUsageRecordTask(func(ctx context.Context) { if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{ diff --git a/backend/internal/service/gateway_record_usage_test.go b/backend/internal/service/gateway_record_usage_test.go index 4e7e545a..4c1f0317 100644 --- a/backend/internal/service/gateway_record_usage_test.go +++ b/backend/internal/service/gateway_record_usage_test.go @@ -369,3 +369,54 @@ func TestGatewayServiceRecordUsage_BillingErrorSkipsUsageLogWrite(t *testing.T) require.Equal(t, 1, billingRepo.calls) require.Equal(t, 0, usageRepo.calls) } + +func TestGatewayServiceRecordUsage_ReasoningEffortPersisted(t *testing.T) { + usageRepo := &openAIRecordUsageBestEffortLogRepoStub{} + svc := newGatewayRecordUsageServiceForTest(usageRepo, &openAIRecordUsageUserRepoStub{}, &openAIRecordUsageSubRepoStub{}) + + effort := "max" + err := svc.RecordUsage(context.Background(), &RecordUsageInput{ + Result: &ForwardResult{ + RequestID: "effort_test", + Usage: ClaudeUsage{ + InputTokens: 10, + OutputTokens: 5, + }, + Model: "claude-opus-4-6", + Duration: time.Second, + ReasoningEffort: &effort, + }, + APIKey: &APIKey{ID: 1}, + User: &User{ID: 1}, + Account: &Account{ID: 1}, + }) + + require.NoError(t, err) + require.NotNil(t, usageRepo.lastLog) + require.NotNil(t, usageRepo.lastLog.ReasoningEffort) + require.Equal(t, "max", *usageRepo.lastLog.ReasoningEffort) +} + +func TestGatewayServiceRecordUsage_ReasoningEffortNil(t *testing.T) { + usageRepo := &openAIRecordUsageBestEffortLogRepoStub{} + svc := newGatewayRecordUsageServiceForTest(usageRepo, &openAIRecordUsageUserRepoStub{}, &openAIRecordUsageSubRepoStub{}) + + err := svc.RecordUsage(context.Background(), &RecordUsageInput{ + Result: &ForwardResult{ + RequestID: "no_effort_test", + Usage: ClaudeUsage{ + InputTokens: 10, + OutputTokens: 5, + }, + Model: "claude-sonnet-4", + Duration: time.Second, + }, + APIKey: &APIKey{ID: 1}, + User: &User{ID: 1}, + Account: &Account{ID: 1}, + }) + + require.NoError(t, err) + require.NotNil(t, usageRepo.lastLog) + require.Nil(t, usageRepo.lastLog.ReasoningEffort) +} diff --git a/backend/internal/service/gateway_request.go b/backend/internal/service/gateway_request.go index f7bc57ac..3816aea9 100644 --- a/backend/internal/service/gateway_request.go +++ b/backend/internal/service/gateway_request.go @@ -60,6 +60,7 @@ type ParsedRequest struct { Messages []any // messages 数组 HasSystem bool // 是否包含 system 字段(包含 null 也视为显式传入) ThinkingEnabled bool // 是否开启 thinking(部分平台会影响最终模型名) + OutputEffort string // output_config.effort(Claude API 的推理强度控制) MaxTokens int // max_tokens 值(用于探测请求拦截) SessionContext *SessionContext // 可选:请求上下文区分因子(nil 时行为不变) @@ -116,6 +117,9 @@ func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) { parsed.ThinkingEnabled = true } + // output_config.effort: Claude API 的推理强度控制参数 + parsed.OutputEffort = strings.TrimSpace(gjson.Get(jsonStr, "output_config.effort").String()) + // max_tokens: 仅接受整数值 maxTokensResult := gjson.Get(jsonStr, "max_tokens") if maxTokensResult.Exists() && maxTokensResult.Type == gjson.Number { @@ -747,6 +751,21 @@ func filterThinkingBlocksInternal(body []byte, _ bool) []byte { return newBody } +// NormalizeClaudeOutputEffort normalizes Claude's output_config.effort value. +// Returns nil for empty or unrecognized values. +func NormalizeClaudeOutputEffort(raw string) *string { + value := strings.ToLower(strings.TrimSpace(raw)) + if value == "" { + return nil + } + switch value { + case "low", "medium", "high", "max": + return &value + default: + return nil + } +} + // ========================= // Thinking Budget Rectifier // ========================= diff --git a/backend/internal/service/gateway_request_test.go b/backend/internal/service/gateway_request_test.go index 42b61e3f..f60ed9fb 100644 --- a/backend/internal/service/gateway_request_test.go +++ b/backend/internal/service/gateway_request_test.go @@ -972,6 +972,76 @@ func BenchmarkParseGatewayRequest_Old_Large(b *testing.B) { } } +func TestParseGatewayRequest_OutputEffort(t *testing.T) { + tests := []struct { + name string + body string + wantEffort string + }{ + { + name: "output_config.effort present", + body: `{"model":"claude-opus-4-6","output_config":{"effort":"medium"},"messages":[]}`, + wantEffort: "medium", + }, + { + name: "output_config.effort max", + body: `{"model":"claude-opus-4-6","output_config":{"effort":"max"},"messages":[]}`, + wantEffort: "max", + }, + { + name: "output_config without effort", + body: `{"model":"claude-opus-4-6","output_config":{},"messages":[]}`, + wantEffort: "", + }, + { + name: "no output_config", + body: `{"model":"claude-opus-4-6","messages":[]}`, + wantEffort: "", + }, + { + name: "effort with whitespace trimmed", + body: `{"model":"claude-opus-4-6","output_config":{"effort":" high "},"messages":[]}`, + wantEffort: "high", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + parsed, err := ParseGatewayRequest([]byte(tt.body), "") + require.NoError(t, err) + require.Equal(t, tt.wantEffort, parsed.OutputEffort) + }) + } +} + +func TestNormalizeClaudeOutputEffort(t *testing.T) { + tests := []struct { + input string + want *string + }{ + {"low", strPtr("low")}, + {"medium", strPtr("medium")}, + {"high", strPtr("high")}, + {"max", strPtr("max")}, + {"LOW", strPtr("low")}, + {"Max", strPtr("max")}, + {" medium ", strPtr("medium")}, + {"", nil}, + {"unknown", nil}, + {"xhigh", nil}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := NormalizeClaudeOutputEffort(tt.input) + if tt.want == nil { + require.Nil(t, got) + } else { + require.NotNil(t, got) + require.Equal(t, *tt.want, *got) + } + }) + } +} + func BenchmarkParseGatewayRequest_New_Large(b *testing.B) { data := buildLargeJSON() b.SetBytes(int64(len(data))) diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 55b11ec2..70f64121 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -492,6 +492,7 @@ type ForwardResult struct { Duration time.Duration FirstTokenMs *int // 首字时间(流式请求) ClientDisconnect bool // 客户端是否在流式传输过程中断开 + ReasoningEffort *string // 图片生成计费字段(图片生成模型使用) ImageCount int // 生成的图片数量 @@ -7523,6 +7524,7 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu AccountID: account.ID, RequestID: requestID, Model: result.Model, + ReasoningEffort: result.ReasoningEffort, InputTokens: result.Usage.InputTokens, OutputTokens: result.Usage.OutputTokens, CacheCreationTokens: result.Usage.CacheCreationInputTokens, @@ -7699,6 +7701,7 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input * AccountID: account.ID, RequestID: requestID, Model: result.Model, + ReasoningEffort: result.ReasoningEffort, InputTokens: result.Usage.InputTokens, OutputTokens: result.Usage.OutputTokens, CacheCreationTokens: result.Usage.CacheCreationInputTokens, diff --git a/backend/internal/service/usage_log.go b/backend/internal/service/usage_log.go index ef313a01..7f1bef7f 100644 --- a/backend/internal/service/usage_log.go +++ b/backend/internal/service/usage_log.go @@ -100,8 +100,9 @@ type UsageLog struct { Model string // ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex". ServiceTier *string - // ReasoningEffort is the request's reasoning effort level (OpenAI Responses API), - // e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable. + // ReasoningEffort is the request's reasoning effort level. + // OpenAI: "low" / "medium" / "high" / "xhigh"; Claude: "low" / "medium" / "high" / "max". + // Nil means not provided / not applicable. ReasoningEffort *string // InboundEndpoint is the client-facing API endpoint path, e.g. /v1/chat/completions. InboundEndpoint *string