fix: extract and log Claude output_config.effort in usage records

Claude's output_config.effort parameter (low/medium/high/max) was not
being extracted from requests or logged in the reasoning_effort column
of usage logs. Only the OpenAI path populated this field.

Changes:
- Extract output_config.effort in ParseGatewayRequest
- Add ReasoningEffort field to ForwardResult
- Populate reasoning_effort in both RecordUsage and RecordUsageWithLongContext
- Guard against overwriting service-set effort values in handler
- Update stale comments that described reasoning_effort as OpenAI-only
- Add unit tests for extraction, normalization, and persistence
This commit is contained in:
YanzheL
2026-03-15 12:55:37 +08:00
parent 6da5fa01b9
commit 1bff2292a6
7 changed files with 156 additions and 4 deletions

View File

@@ -334,8 +334,8 @@ type UsageLog struct {
Model string `json:"model"`
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
ServiceTier *string `json:"service_tier,omitempty"`
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API).
// nil means not provided / not applicable.
// ReasoningEffort is the request's reasoning effort level.
// OpenAI: "low"/"medium"/"high"/"xhigh"; Claude: "low"/"medium"/"high"/"max".
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
GroupID *int64 `json:"group_id"`

View File

@@ -436,6 +436,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
clientIP := ip.GetClientIP(c)
requestPayloadHash := service.HashUsageRequestPayload(body)
if result.ReasoningEffort == nil {
result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort)
}
// 使用量记录通过有界 worker 池提交,避免请求热路径创建无界 goroutine。
h.submitUsageRecordTask(func(ctx context.Context) {
if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -740,6 +744,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
clientIP := ip.GetClientIP(c)
requestPayloadHash := service.HashUsageRequestPayload(body)
if result.ReasoningEffort == nil {
result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort)
}
// 使用量记录通过有界 worker 池提交,避免请求热路径创建无界 goroutine。
h.submitUsageRecordTask(func(ctx context.Context) {
if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{

View File

@@ -369,3 +369,54 @@ func TestGatewayServiceRecordUsage_BillingErrorSkipsUsageLogWrite(t *testing.T)
require.Equal(t, 1, billingRepo.calls)
require.Equal(t, 0, usageRepo.calls)
}
func TestGatewayServiceRecordUsage_ReasoningEffortPersisted(t *testing.T) {
usageRepo := &openAIRecordUsageBestEffortLogRepoStub{}
svc := newGatewayRecordUsageServiceForTest(usageRepo, &openAIRecordUsageUserRepoStub{}, &openAIRecordUsageSubRepoStub{})
effort := "max"
err := svc.RecordUsage(context.Background(), &RecordUsageInput{
Result: &ForwardResult{
RequestID: "effort_test",
Usage: ClaudeUsage{
InputTokens: 10,
OutputTokens: 5,
},
Model: "claude-opus-4-6",
Duration: time.Second,
ReasoningEffort: &effort,
},
APIKey: &APIKey{ID: 1},
User: &User{ID: 1},
Account: &Account{ID: 1},
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
require.NotNil(t, usageRepo.lastLog.ReasoningEffort)
require.Equal(t, "max", *usageRepo.lastLog.ReasoningEffort)
}
func TestGatewayServiceRecordUsage_ReasoningEffortNil(t *testing.T) {
usageRepo := &openAIRecordUsageBestEffortLogRepoStub{}
svc := newGatewayRecordUsageServiceForTest(usageRepo, &openAIRecordUsageUserRepoStub{}, &openAIRecordUsageSubRepoStub{})
err := svc.RecordUsage(context.Background(), &RecordUsageInput{
Result: &ForwardResult{
RequestID: "no_effort_test",
Usage: ClaudeUsage{
InputTokens: 10,
OutputTokens: 5,
},
Model: "claude-sonnet-4",
Duration: time.Second,
},
APIKey: &APIKey{ID: 1},
User: &User{ID: 1},
Account: &Account{ID: 1},
})
require.NoError(t, err)
require.NotNil(t, usageRepo.lastLog)
require.Nil(t, usageRepo.lastLog.ReasoningEffort)
}

View File

@@ -60,6 +60,7 @@ type ParsedRequest struct {
Messages []any // messages 数组
HasSystem bool // 是否包含 system 字段(包含 null 也视为显式传入)
ThinkingEnabled bool // 是否开启 thinking部分平台会影响最终模型名
OutputEffort string // output_config.effortClaude API 的推理强度控制)
MaxTokens int // max_tokens 值(用于探测请求拦截)
SessionContext *SessionContext // 可选请求上下文区分因子nil 时行为不变)
@@ -116,6 +117,9 @@ func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) {
parsed.ThinkingEnabled = true
}
// output_config.effort: Claude API 的推理强度控制参数
parsed.OutputEffort = strings.TrimSpace(gjson.Get(jsonStr, "output_config.effort").String())
// max_tokens: 仅接受整数值
maxTokensResult := gjson.Get(jsonStr, "max_tokens")
if maxTokensResult.Exists() && maxTokensResult.Type == gjson.Number {
@@ -747,6 +751,21 @@ func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
return newBody
}
// NormalizeClaudeOutputEffort normalizes Claude's output_config.effort value.
// Returns nil for empty or unrecognized values.
func NormalizeClaudeOutputEffort(raw string) *string {
value := strings.ToLower(strings.TrimSpace(raw))
if value == "" {
return nil
}
switch value {
case "low", "medium", "high", "max":
return &value
default:
return nil
}
}
// =========================
// Thinking Budget Rectifier
// =========================

View File

@@ -972,6 +972,76 @@ func BenchmarkParseGatewayRequest_Old_Large(b *testing.B) {
}
}
func TestParseGatewayRequest_OutputEffort(t *testing.T) {
tests := []struct {
name string
body string
wantEffort string
}{
{
name: "output_config.effort present",
body: `{"model":"claude-opus-4-6","output_config":{"effort":"medium"},"messages":[]}`,
wantEffort: "medium",
},
{
name: "output_config.effort max",
body: `{"model":"claude-opus-4-6","output_config":{"effort":"max"},"messages":[]}`,
wantEffort: "max",
},
{
name: "output_config without effort",
body: `{"model":"claude-opus-4-6","output_config":{},"messages":[]}`,
wantEffort: "",
},
{
name: "no output_config",
body: `{"model":"claude-opus-4-6","messages":[]}`,
wantEffort: "",
},
{
name: "effort with whitespace trimmed",
body: `{"model":"claude-opus-4-6","output_config":{"effort":" high "},"messages":[]}`,
wantEffort: "high",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parsed, err := ParseGatewayRequest([]byte(tt.body), "")
require.NoError(t, err)
require.Equal(t, tt.wantEffort, parsed.OutputEffort)
})
}
}
func TestNormalizeClaudeOutputEffort(t *testing.T) {
tests := []struct {
input string
want *string
}{
{"low", strPtr("low")},
{"medium", strPtr("medium")},
{"high", strPtr("high")},
{"max", strPtr("max")},
{"LOW", strPtr("low")},
{"Max", strPtr("max")},
{" medium ", strPtr("medium")},
{"", nil},
{"unknown", nil},
{"xhigh", nil},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got := NormalizeClaudeOutputEffort(tt.input)
if tt.want == nil {
require.Nil(t, got)
} else {
require.NotNil(t, got)
require.Equal(t, *tt.want, *got)
}
})
}
}
func BenchmarkParseGatewayRequest_New_Large(b *testing.B) {
data := buildLargeJSON()
b.SetBytes(int64(len(data)))

View File

@@ -492,6 +492,7 @@ type ForwardResult struct {
Duration time.Duration
FirstTokenMs *int // 首字时间(流式请求)
ClientDisconnect bool // 客户端是否在流式传输过程中断开
ReasoningEffort *string
// 图片生成计费字段(图片生成模型使用)
ImageCount int // 生成的图片数量
@@ -7523,6 +7524,7 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
AccountID: account.ID,
RequestID: requestID,
Model: result.Model,
ReasoningEffort: result.ReasoningEffort,
InputTokens: result.Usage.InputTokens,
OutputTokens: result.Usage.OutputTokens,
CacheCreationTokens: result.Usage.CacheCreationInputTokens,
@@ -7699,6 +7701,7 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input *
AccountID: account.ID,
RequestID: requestID,
Model: result.Model,
ReasoningEffort: result.ReasoningEffort,
InputTokens: result.Usage.InputTokens,
OutputTokens: result.Usage.OutputTokens,
CacheCreationTokens: result.Usage.CacheCreationInputTokens,

View File

@@ -100,8 +100,9 @@ type UsageLog struct {
Model string
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
ServiceTier *string
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API),
// e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable.
// ReasoningEffort is the request's reasoning effort level.
// OpenAI: "low" / "medium" / "high" / "xhigh"; Claude: "low" / "medium" / "high" / "max".
// Nil means not provided / not applicable.
ReasoningEffort *string
GroupID *int64