fix(openai-gateway): route APIKey accounts to /v1/chat/completions when upstream lacks Responses API

OpenAI APIKey accounts with base_url pointing to third-party OpenAI-compatible upstreams (DeepSeek, Kimi, GLM, Qwen, etc.) were failing because the gateway unconditionally converted Chat Completions requests to Responses format and forwarded to {base_url}/v1/responses, which only exists on OpenAI's official endpoint. Detection-based routing: - Probe upstream capability on account create/update via a minimal POST to /v1/responses; HTTP 404/405 means 'unsupported', any other response means 'supported'. - Persist result as accounts.extra.openai_responses_supported (bool). - ForwardAsChatCompletions branches at function entry: APIKey accounts with explicit support=false go through new forwardAsRawChatCompletions which passthrough-forwards CC body to /v1/chat/completions without protocol conversion. Default behavior for accounts without the marker preserves the legacy 'always Responses' path — existing OpenAI APIKey accounts that were working before this change continue to work without modification (the 'reality is evidence' principle: an account that has been running implies upstream capability). Probe is fired async after Create / Update / BatchCreate; failures only log, never block the admin flow. BulkUpdate omitted (low signal of base_url changes; can be added if needed). Implementation: - New pkg internal/pkg/openai_compat: marker key + ShouldUseResponsesAPI - New service file openai_apikey_responses_probe.go: probe + persist - New service file openai_gateway_chat_completions_raw.go: CC pass-through - Account test endpoint short-circuits with explicit message for probed-unsupported accounts (full CC test path is a TODO) Zero schema changes, zero migrations, zero frontend changes, zero wire modifications — all wired through existing AccountTestService injection. Closes: DeepSeek-OpenAI account (id=128) production failure
2026-05-05 05:30:44 +08:00 · 2026-04-30 19:25:45 +08:00
parent 48912014a1
commit 4e4cc80971
7 changed files with 708 additions and 3 deletions
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -637,9 +637,39 @@ func (h *AccountHandler) Update(c *gin.Context) {
 		return
 	}
 	// OpenAI APIKey: credentials 修改后重新探测上游能力（base_url/api_key 可能变更）。
 	// 异步执行，探测失败不影响账号更新响应。
 	if len(req.Credentials) > 0 {
 		h.scheduleOpenAIResponsesProbe(account)
 	}
 	response.Success(c, h.buildAccountResponseWithRuntime(c.Request.Context(), account))
 }
 // scheduleOpenAIResponsesProbe 异步触发 OpenAI APIKey 账号的 Responses API 能力探测。
 //
 // 仅对 platform=openai && type=apikey 账号生效；其他账号无操作。
 // 探测本身在 goroutine 中执行（会发一次 HTTP 请求到上游），不会阻塞
 // 当前请求。探测错误仅记录日志，不向上下文传播：探测失败时标记保持缺失，
 // 网关会按"现状即证据"默认走 Responses。
 func (h *AccountHandler) scheduleOpenAIResponsesProbe(account *service.Account) {
 	if account == nil || account.Platform != service.PlatformOpenAI || account.Type != service.AccountTypeAPIKey {
 		return
 	}
 	if h.accountTestService == nil {
 		return
 	}
 	accountID := account.ID
 	go func() {
 		defer func() {
 			if r := recover(); r != nil {
 				slog.Error("openai_responses_probe_panic", "account_id", accountID, "recover", r)
 			}
 		}()
 		h.accountTestService.ProbeOpenAIAPIKeyResponsesSupport(context.Background(), accountID)
 	}()
 }
 // Delete handles deleting an account
 // DELETE /api/v1/admin/accounts/:id
 func (h *AccountHandler) Delete(c *gin.Context) {
@@ -1231,6 +1261,8 @@ func (h *AccountHandler) BatchCreate(c *gin.Context) {
 					openaiPrivacyAccounts = append(openaiPrivacyAccounts, account)
 				}
 			}
 			// OpenAI APIKey 账号异步探测 /v1/responses 能力。
 			h.scheduleOpenAIResponsesProbe(account)
 			success++
 			results = append(results, gin.H{
 				"name":    item.Name,
--- a/backend/internal/pkg/openai_compat/upstream_capability.go
+++ b/backend/internal/pkg/openai_compat/upstream_capability.go
@@ -0,0 +1,75 @@
 // Package openai_compat 提供 OpenAI 协议族在不同上游间的能力差异判定工具。
 //
 // 背景：sub2api 的 OpenAI APIKey 账号通过 base_url 接入多种第三方 OpenAI 兼容上游
 // （DeepSeek、Kimi、GLM、Qwen 等）。这些上游普遍只支持 /v1/chat/completions，
 // 不存在 /v1/responses 端点。但网关历史代码无差别走 CC→Responses 转换并打到
 // /v1/responses，导致兼容上游 404。
 //
 // 本包提供基于"账号探测标记"的能力判定，配合
 // internal/service/openai_apikey_responses_probe.go 在创建/修改账号时一次性
 // 探测并落标。
 //
 // 设计取舍：
 //   - 不维护静态 host 白名单——避免新增厂商时必须改代码（讨论沉淀于
 //     pensieve/short-term/knowledge/upstream-capability-detection-design-tradeoffs）
 //   - 标记缺失时默认 true（即"走 Responses"），保持与重构前老代码完全一致的存量
 //     账号行为（"现状即证据"原则；详见
 //     pensieve/short-term/maxims/preserve-existing-runtime-behavior-when-replacing-logic-in-stateful-systems）
 package openai_compat
 // AccountResponsesSupport 描述账号上游对 OpenAI Responses API 的支持状态。
 //
 // 仅用于 platform=openai + type=apikey 的账号；其他账号类型不应调用本包判定。
 type AccountResponsesSupport int
 const (
 	// ResponsesSupportUnknown 表示账号尚未完成能力探测（extra 字段缺失）。
 	// 上游路由层应按"现状即证据"原则默认走 Responses，保持与重构前一致。
 	ResponsesSupportUnknown AccountResponsesSupport = iota
 	// ResponsesSupportYes 探测确认上游支持 /v1/responses。
 	ResponsesSupportYes
 	// ResponsesSupportNo 探测确认上游不支持 /v1/responses，应走
 	// /v1/chat/completions 直转路径。
 	ResponsesSupportNo
 )
 // ExtraKeyResponsesSupported 是 accounts.extra JSON 中存储探测结果的键名。
 // 值类型为 bool：true=支持、false=不支持、键缺失=未探测。
 const ExtraKeyResponsesSupported = "openai_responses_supported"
 // ResolveResponsesSupport 从账号的 extra map 中读取探测标记。
 //
 // 标记缺失或类型不匹配时返回 ResponsesSupportUnknown——调用方应按
 // "未探测=保留旧行为=走 Responses" 处理（参见 ShouldUseResponsesAPI）。
 func ResolveResponsesSupport(extra map[string]any) AccountResponsesSupport {
 	if extra == nil {
 		return ResponsesSupportUnknown
 	}
 	v, ok := extra[ExtraKeyResponsesSupported]
 	if !ok {
 		return ResponsesSupportUnknown
 	}
 	supported, ok := v.(bool)
 	if !ok {
 		return ResponsesSupportUnknown
 	}
 	if supported {
 		return ResponsesSupportYes
 	}
 	return ResponsesSupportNo
 }
 // ShouldUseResponsesAPI 判断 OpenAI APIKey 账号的入站 /v1/chat/completions 请求
 // 是否应走"CC→Responses 转换 + 上游 /v1/responses"路径。
 //
 // 返回 true 的两种情况：
 //  1. 账号已探测确认支持 Responses
 //  2. 账号未探测（标记缺失）——按"现状即证据"原则保留旧行为
 //
 // 仅当账号已探测且确认不支持时返回 false，此时调用方应走 CC 直转路径
 // （详见 internal/service/openai_gateway_chat_completions_raw.go）。
 func ShouldUseResponsesAPI(extra map[string]any) bool {
 	return ResolveResponsesSupport(extra) != ResponsesSupportNo
 }
--- a/backend/internal/pkg/openai_compat/upstream_capability_test.go
+++ b/backend/internal/pkg/openai_compat/upstream_capability_test.go
@@ -0,0 +1,55 @@
 package openai_compat
 import "testing"
 func TestResolveResponsesSupport(t *testing.T) {
 	tests := []struct {
 		name  string
 		extra map[string]any
 		want  AccountResponsesSupport
 	}{
 		{"nil extra", nil, ResponsesSupportUnknown},
 		{"empty extra", map[string]any{}, ResponsesSupportUnknown},
 		{"key missing", map[string]any{"other": "value"}, ResponsesSupportUnknown},
 		{"value true", map[string]any{ExtraKeyResponsesSupported: true}, ResponsesSupportYes},
 		{"value false", map[string]any{ExtraKeyResponsesSupported: false}, ResponsesSupportNo},
 		{"value wrong type string", map[string]any{ExtraKeyResponsesSupported: "true"}, ResponsesSupportUnknown},
 		{"value wrong type number", map[string]any{ExtraKeyResponsesSupported: 1}, ResponsesSupportUnknown},
 		{"value nil", map[string]any{ExtraKeyResponsesSupported: nil}, ResponsesSupportUnknown},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			got := ResolveResponsesSupport(tc.extra)
 			if got != tc.want {
 				t.Errorf("ResolveResponsesSupport(%v) = %v, want %v", tc.extra, got, tc.want)
 			}
 		})
 	}
 }
 func TestShouldUseResponsesAPI(t *testing.T) {
 	tests := []struct {
 		name  string
 		extra map[string]any
 		want  bool
 	}{
 		// 关键不变量：未探测必须返回 true（保留旧行为）
 		{"unknown defaults to true (preserve old behavior)", nil, true},
 		{"unknown empty defaults to true", map[string]any{}, true},
 		{"unknown wrong type defaults to true", map[string]any{ExtraKeyResponsesSupported: "yes"}, true},
 		// 已探测：标记决定
 		{"explicitly supported", map[string]any{ExtraKeyResponsesSupported: true}, true},
 		{"explicitly unsupported", map[string]any{ExtraKeyResponsesSupported: false}, false},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			got := ShouldUseResponsesAPI(tc.extra)
 			if got != tc.want {
 				t.Errorf("ShouldUseResponsesAPI(%v) = %v, want %v", tc.extra, got, tc.want)
 			}
 		})
 	}
 }
--- a/backend/internal/service/account_test_service.go
+++ b/backend/internal/service/account_test_service.go
@@ -21,6 +21,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/geminicli"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
 	"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
@@ -554,6 +555,15 @@ func (s *AccountTestService) testOpenAIAccountConnection(c *gin.Context, account
 		if err != nil {
 			return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error()))
 		}
 		// 账号已被探测为不支持 Responses（如 DeepSeek/Kimi 等）时，丢出明确提示。
 		// 账号本身可用（网关会走 CC 直转），仅测试入口需要补齐 CC SSE 处理逻辑。
 		// TODO：实现 CC 格式的账号测试路径（需专门的 CC SSE handler）。
 		if !openai_compat.ShouldUseResponsesAPI(account.Extra) {
 			return s.sendErrorAndEnd(c,
 				"账号已被探测为不支持 OpenAI Responses API（如 DeepSeek/Kimi 等三方兼容上游），"+
 					"账号本身可正常使用，但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。",
 			)
 		}
 		apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
 	} else {
 		return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type))
--- a/backend/internal/service/openai_apikey_responses_probe.go
+++ b/backend/internal/service/openai_apikey_responses_probe.go
@@ -0,0 +1,149 @@
 package service
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"io"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
 )
 // openaiResponsesProbeTimeout 是探测请求的超时时长。
 // 探测必须快速失败——超时不应阻塞账号创建/更新流程。
 const openaiResponsesProbeTimeout = 8 * time.Second
 // openaiResponsesProbePayload 是探测使用的最小 Responses 请求体。
 // 仅作能力探测，不期望响应内容质量；Stream=false 减少 SSE 解析开销。
 //
 // 注意：探测的目标是区分"端点存在"与"端点不存在"——只要上游返回非 404 的
 // 4xx/5xx（如 400 invalid_request_error / 401 unauthorized / 422 等），
 // 都视为"端点存在 → 支持 Responses"。仅 404 / 405 视为"端点不存在"。
 func openaiResponsesProbePayload(modelID string) []byte {
 	if strings.TrimSpace(modelID) == "" {
 		modelID = openai.DefaultTestModel
 	}
 	body, _ := json.Marshal(map[string]any{
 		"model": modelID,
 		"input": []map[string]any{
 			{
 				"role": "user",
 				"content": []map[string]any{
 					{"type": "input_text", "text": "hi"},
 				},
 			},
 		},
 		"instructions": openai.DefaultInstructions,
 		"stream":       false,
 	})
 	return body
 }
 // ProbeOpenAIAPIKeyResponsesSupport 探测 OpenAI APIKey 账号上游是否支持
 // /v1/responses 端点，并将结果持久化到 accounts.extra.openai_responses_supported。
 //
 // 调用时机：账号创建/更新后，且仅当 platform=openai && type=apikey 时。
 //
 // 探测策略（参见包文档 internal/pkg/openai_compat）：
 //   - 上游 404 / 405 → 不支持，写 false
 //   - 上游 2xx / 其他 4xx（401/422/400 等）/ 5xx → 支持，写 true
 //   - 网络层失败（连接错误、超时）→ 不写标记，保持 unknown
 //     （后续请求仍按"现状即证据"默认走 Responses）
 //
 // 该方法是幂等的：重复调用会以最新探测结果覆盖标记。
 //
 // 关于失败处理：探测本身的失败不应阻塞账号创建——账号能创建/更新成功就够了，
 // 探测结果只影响后续路由优化。所有错误都仅记录日志，不向调用方传播。
 func (s *AccountTestService) ProbeOpenAIAPIKeyResponsesSupport(ctx context.Context, accountID int64) {
 	account, err := s.accountRepo.GetByID(ctx, accountID)
 	if err != nil {
 		logger.LegacyPrintf("service.openai_probe", "probe_load_account_failed: account_id=%d err=%v", accountID, err)
 		return
 	}
 	if account.Platform != PlatformOpenAI || account.Type != AccountTypeAPIKey {
 		// 仅 OpenAI APIKey 账号需要探测；其他账号类型无能力差异。
 		return
 	}
 	apiKey := account.GetOpenAIApiKey()
 	if apiKey == "" {
 		logger.LegacyPrintf("service.openai_probe", "probe_skip_no_apikey: account_id=%d", accountID)
 		return
 	}
 	baseURL := account.GetOpenAIBaseURL()
 	if baseURL == "" {
 		baseURL = "https://api.openai.com"
 	}
 	normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL)
 	if err != nil {
 		logger.LegacyPrintf("service.openai_probe", "probe_invalid_baseurl: account_id=%d base_url=%q err=%v", accountID, baseURL, err)
 		return
 	}
 	probeURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
 	probeCtx, cancel := context.WithTimeout(ctx, openaiResponsesProbeTimeout)
 	defer cancel()
 	req, err := http.NewRequestWithContext(probeCtx, http.MethodPost, probeURL, bytes.NewReader(openaiResponsesProbePayload("")))
 	if err != nil {
 		logger.LegacyPrintf("service.openai_probe", "probe_build_request_failed: account_id=%d err=%v", accountID, err)
 		return
 	}
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Authorization", "Bearer "+apiKey)
 	req.Header.Set("Accept", "application/json")
 	proxyURL := ""
 	if account.ProxyID != nil && account.Proxy != nil {
 		proxyURL = account.Proxy.URL()
 	}
 	resp, err := s.httpUpstream.DoWithTLS(req, proxyURL, account.ID, account.Concurrency, s.tlsFPProfileService.ResolveTLSProfile(account))
 	if err != nil {
 		// 网络层失败：不写标记，保持 unknown，下次重试或由网关 fallback 处理
 		logger.LegacyPrintf("service.openai_probe", "probe_request_failed: account_id=%d url=%s err=%v", accountID, probeURL, err)
 		return
 	}
 	defer func() {
 		_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1<<20))
 		_ = resp.Body.Close()
 	}()
 	supported := isResponsesEndpointSupportedByStatus(resp.StatusCode)
 	if err := s.accountRepo.UpdateExtra(ctx, accountID, map[string]any{
 		openai_compat.ExtraKeyResponsesSupported: supported,
 	}); err != nil {
 		logger.LegacyPrintf("service.openai_probe", "probe_persist_failed: account_id=%d supported=%v err=%v", accountID, supported, err)
 		return
 	}
 	logger.LegacyPrintf("service.openai_probe",
 		"probe_done: account_id=%d base_url=%s status=%d supported=%v",
 		accountID, normalizedBaseURL, resp.StatusCode, supported,
 	)
 }
 // isResponsesEndpointSupportedByStatus 根据探测响应的 HTTP 状态码判定上游
 // 是否暴露 /v1/responses 端点。
 //
 // 关键观察：第三方 OpenAI 兼容上游（DeepSeek/Kimi 等）对未知端点统一返回 404
 // 或 405；而 OpenAI 官方/有 Responses 实现的上游会因为请求体最简（缺字段）
 // 返回 400/422 等业务错误，但端点本身存在。
 //
 // 因此：仅 404 和 405 视为"端点不存在"，其他 status 视为"端点存在"。
 //
 // 5xx 也视为"端点存在"——上游偶发故障不应误判为不支持。
 func isResponsesEndpointSupportedByStatus(status int) bool {
 	switch status {
 	case http.StatusNotFound, http.StatusMethodNotAllowed:
 		return false
 	}
 	return true
 }
--- a/backend/internal/service/openai_gateway_chat_completions.go
+++ b/backend/internal/service/openai_gateway_chat_completions.go
@@ -14,6 +14,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
 	"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
 	"github.com/gin-gonic/gin"
 	"github.com/tidwall/gjson"
@@ -39,9 +40,18 @@ var cursorResponsesUnsupportedFields = []string{
 // ForwardAsChatCompletions accepts a Chat Completions request body, converts it
 // to OpenAI Responses API format, forwards to the OpenAI upstream, and converts
-// the response back to Chat Completions format. All account types (OAuth and API
+// the response back to Chat Completions format.
-// Key) go through the Responses API conversion path since the upstream only
+//
-// exposes the /v1/responses endpoint.
+// 历史背景：该函数原本对所有 OpenAI 账号无差别走 CC→Responses 转换 + /v1/responses
 // 端点——这在 OAuth（ChatGPT 内部 API 仅支持 Responses）和官方 APIKey 账号上是
 // 正确的，但 sub2api 接入 DeepSeek/Kimi/GLM 等第三方 OpenAI 兼容上游后假设破裂：
 // 这些上游普遍只支持 /v1/chat/completions，无 /v1/responses 端点。
 //
 // 当前路由策略（基于账号探测标记，详见 openai_compat.ShouldUseResponsesAPI）：
 //   - APIKey 账号 + 探测确认不支持 Responses → 走 forwardAsRawChatCompletions
 //     直转上游 /v1/chat/completions，不做协议转换
 //   - 其他所有情况（OAuth、APIKey 探测确认支持、未探测）→ 走原有 CC→Responses
 //     转换路径（保留旧行为，存量未探测账号零兼容破坏）
 func (s *OpenAIGatewayService) ForwardAsChatCompletions(
 	ctx context.Context,
 	c *gin.Context,
@@ -50,6 +60,12 @@ func (s *OpenAIGatewayService) ForwardAsChatCompletions(
 	promptCacheKey string,
 	defaultMappedModel string,
 ) (*OpenAIForwardResult, error) {
 	// 入口分流：APIKey 账号 + 已探测且确认上游不支持 Responses，走 CC 直转。
 	// 标记缺失（未探测）按"现状即证据"原则继续走下方原 Responses 转换路径。
 	if account.Type == AccountTypeAPIKey && !openai_compat.ShouldUseResponsesAPI(account.Extra) {
 		return s.forwardAsRawChatCompletions(ctx, c, account, body, defaultMappedModel)
 	}
 	startTime := time.Now()
 	// 1. Parse Chat Completions request
--- a/backend/internal/service/openai_gateway_chat_completions_raw.go
+++ b/backend/internal/service/openai_gateway_chat_completions_raw.go
@@ -0,0 +1,368 @@
 package service
 import (
 	"bufio"
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net/http"
 	"strings"
 	"time"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
 	"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
 	"github.com/gin-gonic/gin"
 	"github.com/tidwall/gjson"
 	"go.uber.org/zap"
 )
 // forwardAsRawChatCompletions 直转客户端的 Chat Completions 请求到上游
 // `{base_url}/v1/chat/completions`，**不**做 CC↔Responses 协议转换。
 //
 // 适用场景：account.platform=openai && account.type=apikey && 上游已被探测确认
 // 不支持 /v1/responses 端点（如 DeepSeek/Kimi/GLM/Qwen 等第三方 OpenAI 兼容上游）。
 //
 // 与 ForwardAsChatCompletions 的关键差异：
 //
 //   - 不调用 apicompat.ChatCompletionsToResponses，body 仅做模型 ID 改写
 //   - 上游 URL 拼到 /v1/chat/completions 而非 /v1/responses
 //   - 流式响应 SSE 直接透传给客户端（上游 chunk 已是 CC 格式）
 //   - 非流式响应 JSON 直接透传，仅按需提取 usage
 //   - 不应用 codex OAuth transform（APIKey 路径无 OAuth）
 //   - 不注入 prompt_cache_key（OAuth 专属机制）
 //
 // 调用入口：openai_gateway_chat_completions.go::ForwardAsChatCompletions
 // 在函数顶部按 openai_compat.ShouldUseResponsesAPI 分流。
 func (s *OpenAIGatewayService) forwardAsRawChatCompletions(
 	ctx context.Context,
 	c *gin.Context,
 	account *Account,
 	body []byte,
 	defaultMappedModel string,
 ) (*OpenAIForwardResult, error) {
 	startTime := time.Now()
 	// 1. Parse minimal fields needed for routing/billing
 	originalModel := gjson.GetBytes(body, "model").String()
 	if originalModel == "" {
 		writeChatCompletionsError(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 		return nil, fmt.Errorf("missing model in request")
 	}
 	clientStream := gjson.GetBytes(body, "stream").Bool()
 	includeUsage := gjson.GetBytes(body, "stream_options.include_usage").Bool()
 	// 2. Resolve model mapping (same as ForwardAsChatCompletions)
 	billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel)
 	upstreamModel := normalizeOpenAIModelForUpstream(account, billingModel)
 	// 3. Rewrite model in body (no protocol conversion)
 	upstreamBody := body
 	if upstreamModel != originalModel {
 		upstreamBody = ReplaceModelInBody(body, upstreamModel)
 	}
 	// 4. Apply OpenAI fast policy on the CC body
 	updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, upstreamBody)
 	if policyErr != nil {
 		var blocked *OpenAIFastBlockedError
 		if errors.As(policyErr, &blocked) {
 			writeChatCompletionsError(c, http.StatusForbidden, "permission_error", blocked.Message)
 		}
 		return nil, policyErr
 	}
 	upstreamBody = updatedBody
 	logger.L().Debug("openai chat_completions raw: forwarding without protocol conversion",
 		zap.Int64("account_id", account.ID),
 		zap.String("original_model", originalModel),
 		zap.String("billing_model", billingModel),
 		zap.String("upstream_model", upstreamModel),
 		zap.Bool("stream", clientStream),
 	)
 	// 5. Build upstream request
 	apiKey := account.GetOpenAIApiKey()
 	if apiKey == "" {
 		return nil, fmt.Errorf("account %d missing api_key", account.ID)
 	}
 	baseURL := account.GetOpenAIBaseURL()
 	if baseURL == "" {
 		baseURL = "https://api.openai.com"
 	}
 	validatedURL, err := s.validateUpstreamBaseURL(baseURL)
 	if err != nil {
 		return nil, fmt.Errorf("invalid base_url: %w", err)
 	}
 	targetURL := buildOpenAIChatCompletionsURL(validatedURL)
 	upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(upstreamBody))
 	if err != nil {
 		return nil, fmt.Errorf("build upstream request: %w", err)
 	}
 	upstreamReq.Header.Set("Content-Type", "application/json")
 	upstreamReq.Header.Set("Authorization", "Bearer "+apiKey)
 	if clientStream {
 		upstreamReq.Header.Set("Accept", "text/event-stream")
 	} else {
 		upstreamReq.Header.Set("Accept", "application/json")
 	}
 	// Whitelist passthrough headers (subset of openaiAllowedHeaders relevant to CC).
 	for key, values := range c.Request.Header {
 		lowerKey := strings.ToLower(key)
 		if openaiAllowedHeaders[lowerKey] {
 			for _, v := range values {
 				upstreamReq.Header.Add(key, v)
 			}
 		}
 	}
 	customUA := account.GetOpenAIUserAgent()
 	if customUA != "" {
 		upstreamReq.Header.Set("user-agent", customUA)
 	}
 	// 6. Send request
 	proxyURL := ""
 	if account.Proxy != nil {
 		proxyURL = account.Proxy.URL()
 	}
 	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 	if err != nil {
 		safeErr := sanitizeUpstreamErrorMessage(err.Error())
 		setOpsUpstreamError(c, 0, safeErr, "")
 		appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
 			Platform:           account.Platform,
 			AccountID:          account.ID,
 			AccountName:        account.Name,
 			UpstreamStatusCode: 0,
 			Kind:               "request_error",
 			Message:            safeErr,
 		})
 		writeChatCompletionsError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed")
 		return nil, fmt.Errorf("upstream request failed: %s", safeErr)
 	}
 	defer func() { _ = resp.Body.Close() }()
 	// 7. Handle error response with failover
 	if resp.StatusCode >= 400 {
 		respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 		_ = resp.Body.Close()
 		resp.Body = io.NopCloser(bytes.NewReader(respBody))
 		upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
 		upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
 		if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
 			upstreamDetail := ""
 			if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
 				maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
 				if maxBytes <= 0 {
 					maxBytes = 2048
 				}
 				upstreamDetail = truncateString(string(respBody), maxBytes)
 			}
 			appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
 				Platform:           account.Platform,
 				AccountID:          account.ID,
 				AccountName:        account.Name,
 				UpstreamStatusCode: resp.StatusCode,
 				UpstreamRequestID:  resp.Header.Get("x-request-id"),
 				Kind:               "failover",
 				Message:            upstreamMsg,
 				Detail:             upstreamDetail,
 			})
 			if s.rateLimitService != nil {
 				s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
 			}
 			return nil, &UpstreamFailoverError{
 				StatusCode:             resp.StatusCode,
 				ResponseBody:           respBody,
 				RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)),
 			}
 		}
 		return s.handleChatCompletionsErrorResponse(resp, c, account)
 	}
 	// 8. Forward response
 	if clientStream {
 		return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, includeUsage, startTime)
 	}
 	return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime)
 }
 // streamRawChatCompletions 透传上游 CC SSE 流到客户端，并提取 usage（包括
 // 末尾 [DONE] 之前的 chunk 中的 usage 字段，按 OpenAI CC 协议）。
 func (s *OpenAIGatewayService) streamRawChatCompletions(
 	c *gin.Context,
 	resp *http.Response,
 	originalModel string,
 	billingModel string,
 	upstreamModel string,
 	includeUsage bool,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
 	if s.responseHeaderFilter != nil {
 		responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
 	}
 	c.Writer.Header().Set("Content-Type", "text/event-stream")
 	c.Writer.Header().Set("Cache-Control", "no-cache")
 	c.Writer.Header().Set("Connection", "keep-alive")
 	c.Writer.Header().Set("X-Accel-Buffering", "no")
 	c.Writer.WriteHeader(http.StatusOK)
 	scanner := bufio.NewScanner(resp.Body)
 	maxLineSize := defaultMaxLineSize
 	if s.cfg != nil && s.cfg.Gateway.MaxLineSize > 0 {
 		maxLineSize = s.cfg.Gateway.MaxLineSize
 	}
 	scanner.Buffer(make([]byte, 0, 64*1024), maxLineSize)
 	var usage OpenAIUsage
 	var firstTokenMs *int
 	for scanner.Scan() {
 		line := scanner.Text()
 		// Direct passthrough: write each line + blank line separator
 		if _, werr := c.Writer.WriteString(line + "\n"); werr != nil {
 			logger.L().Debug("openai chat_completions raw: client write failed",
 				zap.Error(werr),
 				zap.String("request_id", requestID),
 			)
 			break
 		}
 		if line == "" {
 			c.Writer.Flush()
 			continue
 		}
 		c.Writer.Flush()
 		// Track first token timing on first non-empty data line
 		if firstTokenMs == nil && strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
 			elapsed := int(time.Since(startTime).Milliseconds())
 			firstTokenMs = &elapsed
 		}
 		// Extract usage from any chunk that carries it (CC streams typically put
 		// usage in the final chunk before [DONE], but may also appear elsewhere).
 		if strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
 			payload := line[6:]
 			if u := extractCCStreamUsage(payload); u != nil {
 				usage = *u
 			}
 		}
 	}
 	if err := scanner.Err(); err != nil {
 		if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) {
 			logger.L().Warn("openai chat_completions raw: stream read error",
 				zap.Error(err),
 				zap.String("request_id", requestID),
 			)
 		}
 	}
 	_ = includeUsage // CC 协议下 usage 是否包含由客户端的 stream_options 决定，上游会自行处理；我们仅做提取。
 	return &OpenAIForwardResult{
 		RequestID:     requestID,
 		Usage:         usage,
 		Model:         originalModel,
 		BillingModel:  billingModel,
 		UpstreamModel: upstreamModel,
 		Stream:        true,
 		Duration:      time.Since(startTime),
 		FirstTokenMs:  firstTokenMs,
 	}, nil
 }
 // bufferRawChatCompletions 透传上游 CC 非流式 JSON 响应。
 func (s *OpenAIGatewayService) bufferRawChatCompletions(
 	c *gin.Context,
 	resp *http.Response,
 	originalModel string,
 	billingModel string,
 	upstreamModel string,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
 	respBody, err := io.ReadAll(io.LimitReader(resp.Body, 32<<20))
 	if err != nil {
 		writeChatCompletionsError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response")
 		return nil, fmt.Errorf("read upstream body: %w", err)
 	}
 	var ccResp apicompat.ChatCompletionsResponse
 	var usage OpenAIUsage
 	if err := json.Unmarshal(respBody, &ccResp); err == nil && ccResp.Usage != nil {
 		usage = OpenAIUsage{
 			InputTokens:  ccResp.Usage.PromptTokens,
 			OutputTokens: ccResp.Usage.CompletionTokens,
 		}
 		if ccResp.Usage.PromptTokensDetails != nil {
 			usage.CacheReadInputTokens = ccResp.Usage.PromptTokensDetails.CachedTokens
 		}
 	}
 	if s.responseHeaderFilter != nil {
 		responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
 	}
 	if ct := resp.Header.Get("Content-Type"); ct != "" {
 		c.Writer.Header().Set("Content-Type", ct)
 	} else {
 		c.Writer.Header().Set("Content-Type", "application/json")
 	}
 	c.Writer.WriteHeader(http.StatusOK)
 	_, _ = c.Writer.Write(respBody)
 	return &OpenAIForwardResult{
 		RequestID:     requestID,
 		Usage:         usage,
 		Model:         originalModel,
 		BillingModel:  billingModel,
 		UpstreamModel: upstreamModel,
 		Stream:        false,
 		Duration:      time.Since(startTime),
 	}, nil
 }
 // extractCCStreamUsage 从单个 CC 流式 chunk 的 payload 中提取 usage 字段。
 // CC 协议中 usage 仅出现在末尾 chunk（且仅当客户端请求 stream_options.include_usage
 // 时），但上游可能在多个 chunk 中重复——总是用最新值。
 func extractCCStreamUsage(payload string) *OpenAIUsage {
 	usageResult := gjson.Get(payload, "usage")
 	if !usageResult.Exists() || !usageResult.IsObject() {
 		return nil
 	}
 	u := OpenAIUsage{
 		InputTokens:  int(gjson.Get(payload, "usage.prompt_tokens").Int()),
 		OutputTokens: int(gjson.Get(payload, "usage.completion_tokens").Int()),
 	}
 	if cached := gjson.Get(payload, "usage.prompt_tokens_details.cached_tokens"); cached.Exists() {
 		u.CacheReadInputTokens = int(cached.Int())
 	}
 	return &u
 }
 // buildOpenAIChatCompletionsURL 拼接上游 Chat Completions 端点 URL。
 //
 //   - base 已是 /chat/completions：原样返回
 //   - base 以 /v1 结尾：追加 /chat/completions
 //   - 其他情况：追加 /v1/chat/completions
 //
 // 与 buildOpenAIResponsesURL 是姐妹函数。
 func buildOpenAIChatCompletionsURL(base string) string {
 	normalized := strings.TrimRight(strings.TrimSpace(base), "/")
 	if strings.HasSuffix(normalized, "/chat/completions") {
 		return normalized
 	}
 	if strings.HasSuffix(normalized, "/v1") {
 		return normalized + "/chat/completions"
 	}
 	return normalized + "/v1/chat/completions"
 }