fix(openai-gateway): route APIKey accounts to /v1/chat/completions when upstream lacks Responses API

OpenAI APIKey accounts with base_url pointing to third-party OpenAI-compatible upstreams (DeepSeek, Kimi, GLM, Qwen, etc.) were failing because the gateway unconditionally converted Chat Completions requests to Responses format and forwarded to {base_url}/v1/responses, which only exists on OpenAI's official endpoint. Detection-based routing: - Probe upstream capability on account create/update via a minimal POST to /v1/responses; HTTP 404/405 means 'unsupported', any other response means 'supported'. - Persist result as accounts.extra.openai_responses_supported (bool). - ForwardAsChatCompletions branches at function entry: APIKey accounts with explicit support=false go through new forwardAsRawChatCompletions which passthrough-forwards CC body to /v1/chat/completions without protocol conversion. Default behavior for accounts without the marker preserves the legacy 'always Responses' path — existing OpenAI APIKey accounts that were working before this change continue to work without modification (the 'reality is evidence' principle: an account that has been running implies upstream capability). Probe is fired async after Create / Update / BatchCreate; failures only log, never block the admin flow. BulkUpdate omitted (low signal of base_url changes; can be added if needed). Implementation: - New pkg internal/pkg/openai_compat: marker key + ShouldUseResponsesAPI - New service file openai_apikey_responses_probe.go: probe + persist - New service file openai_gateway_chat_completions_raw.go: CC pass-through - Account test endpoint short-circuits with explicit message for probed-unsupported accounts (full CC test path is a TODO) Zero schema changes, zero migrations, zero frontend changes, zero wire modifications — all wired through existing AccountTestService injection. Closes: DeepSeek-OpenAI account (id=128) production failure
2026-05-04 21:20:51 +08:00 · 2026-04-30 19:25:45 +08:00
parent 48912014a1
commit 4e4cc80971
7 changed files with 708 additions and 3 deletions
--- a/backend/internal/handler/admin/account_handler.go
+++ b/backend/internal/handler/admin/account_handler.go
@@ -637,9 +637,39 @@ func (h *AccountHandler) Update(c *gin.Context) {
 		return
 	}

+	// OpenAI APIKey: credentials 修改后重新探测上游能力（base_url/api_key 可能变更）。
+	// 异步执行，探测失败不影响账号更新响应。
+	if len(req.Credentials) > 0 {
+		h.scheduleOpenAIResponsesProbe(account)
+	}
+
 	response.Success(c, h.buildAccountResponseWithRuntime(c.Request.Context(), account))
 }

+// scheduleOpenAIResponsesProbe 异步触发 OpenAI APIKey 账号的 Responses API 能力探测。
+//
+// 仅对 platform=openai && type=apikey 账号生效；其他账号无操作。
+// 探测本身在 goroutine 中执行（会发一次 HTTP 请求到上游），不会阻塞
+// 当前请求。探测错误仅记录日志，不向上下文传播：探测失败时标记保持缺失，
+// 网关会按"现状即证据"默认走 Responses。
+func (h *AccountHandler) scheduleOpenAIResponsesProbe(account *service.Account) {
+	if account == nil || account.Platform != service.PlatformOpenAI || account.Type != service.AccountTypeAPIKey {
+		return
+	}
+	if h.accountTestService == nil {
+		return
+	}
+	accountID := account.ID
+	go func() {
+		defer func() {
+			if r := recover(); r != nil {
+				slog.Error("openai_responses_probe_panic", "account_id", accountID, "recover", r)
+			}
+		}()
+		h.accountTestService.ProbeOpenAIAPIKeyResponsesSupport(context.Background(), accountID)
+	}()
+}
+
 // Delete handles deleting an account
 // DELETE /api/v1/admin/accounts/:id
 func (h *AccountHandler) Delete(c *gin.Context) {
@@ -1231,6 +1261,8 @@ func (h *AccountHandler) BatchCreate(c *gin.Context) {
 					openaiPrivacyAccounts = append(openaiPrivacyAccounts, account)
 				}
 			}
+			// OpenAI APIKey 账号异步探测 /v1/responses 能力。
+			h.scheduleOpenAIResponsesProbe(account)
 			success++
 			results = append(results, gin.H{
 				"name":    item.Name,
--- a/backend/internal/pkg/openai_compat/upstream_capability.go
+++ b/backend/internal/pkg/openai_compat/upstream_capability.go
@@ -0,0 +1,75 @@
+// Package openai_compat 提供 OpenAI 协议族在不同上游间的能力差异判定工具。
+//
+// 背景：sub2api 的 OpenAI APIKey 账号通过 base_url 接入多种第三方 OpenAI 兼容上游
+// （DeepSeek、Kimi、GLM、Qwen 等）。这些上游普遍只支持 /v1/chat/completions，
+// 不存在 /v1/responses 端点。但网关历史代码无差别走 CC→Responses 转换并打到
+// /v1/responses，导致兼容上游 404。
+//
+// 本包提供基于"账号探测标记"的能力判定，配合
+// internal/service/openai_apikey_responses_probe.go 在创建/修改账号时一次性
+// 探测并落标。
+//
+// 设计取舍：
+//   - 不维护静态 host 白名单——避免新增厂商时必须改代码（讨论沉淀于
+//     pensieve/short-term/knowledge/upstream-capability-detection-design-tradeoffs）
+//   - 标记缺失时默认 true（即"走 Responses"），保持与重构前老代码完全一致的存量
+//     账号行为（"现状即证据"原则；详见
+//     pensieve/short-term/maxims/preserve-existing-runtime-behavior-when-replacing-logic-in-stateful-systems）
+package openai_compat
+
+// AccountResponsesSupport 描述账号上游对 OpenAI Responses API 的支持状态。
+//
+// 仅用于 platform=openai + type=apikey 的账号；其他账号类型不应调用本包判定。
+type AccountResponsesSupport int
+
+const (
+	// ResponsesSupportUnknown 表示账号尚未完成能力探测（extra 字段缺失）。
+	// 上游路由层应按"现状即证据"原则默认走 Responses，保持与重构前一致。
+	ResponsesSupportUnknown AccountResponsesSupport = iota
+
+	// ResponsesSupportYes 探测确认上游支持 /v1/responses。
+	ResponsesSupportYes
+
+	// ResponsesSupportNo 探测确认上游不支持 /v1/responses，应走
+	// /v1/chat/completions 直转路径。
+	ResponsesSupportNo
+)
+
+// ExtraKeyResponsesSupported 是 accounts.extra JSON 中存储探测结果的键名。
+// 值类型为 bool：true=支持、false=不支持、键缺失=未探测。
+const ExtraKeyResponsesSupported = "openai_responses_supported"
+
+// ResolveResponsesSupport 从账号的 extra map 中读取探测标记。
+//
+// 标记缺失或类型不匹配时返回 ResponsesSupportUnknown——调用方应按
+// "未探测=保留旧行为=走 Responses" 处理（参见 ShouldUseResponsesAPI）。
+func ResolveResponsesSupport(extra map[string]any) AccountResponsesSupport {
+	if extra == nil {
+		return ResponsesSupportUnknown
+	}
+	v, ok := extra[ExtraKeyResponsesSupported]
+	if !ok {
+		return ResponsesSupportUnknown
+	}
+	supported, ok := v.(bool)
+	if !ok {
+		return ResponsesSupportUnknown
+	}
+	if supported {
+		return ResponsesSupportYes
+	}
+	return ResponsesSupportNo
+}
+
+// ShouldUseResponsesAPI 判断 OpenAI APIKey 账号的入站 /v1/chat/completions 请求
+// 是否应走"CC→Responses 转换 + 上游 /v1/responses"路径。
+//
+// 返回 true 的两种情况：
+//  1. 账号已探测确认支持 Responses
+//  2. 账号未探测（标记缺失）——按"现状即证据"原则保留旧行为
+//
+// 仅当账号已探测且确认不支持时返回 false，此时调用方应走 CC 直转路径
+// （详见 internal/service/openai_gateway_chat_completions_raw.go）。
+func ShouldUseResponsesAPI(extra map[string]any) bool {
+	return ResolveResponsesSupport(extra) != ResponsesSupportNo
+}
--- a/backend/internal/pkg/openai_compat/upstream_capability_test.go
+++ b/backend/internal/pkg/openai_compat/upstream_capability_test.go
@@ -0,0 +1,55 @@
+package openai_compat
+
+import "testing"
+
+func TestResolveResponsesSupport(t *testing.T) {
+	tests := []struct {
+		name  string
+		extra map[string]any
+		want  AccountResponsesSupport
+	}{
+		{"nil extra", nil, ResponsesSupportUnknown},
+		{"empty extra", map[string]any{}, ResponsesSupportUnknown},
+		{"key missing", map[string]any{"other": "value"}, ResponsesSupportUnknown},
+		{"value true", map[string]any{ExtraKeyResponsesSupported: true}, ResponsesSupportYes},
+		{"value false", map[string]any{ExtraKeyResponsesSupported: false}, ResponsesSupportNo},
+		{"value wrong type string", map[string]any{ExtraKeyResponsesSupported: "true"}, ResponsesSupportUnknown},
+		{"value wrong type number", map[string]any{ExtraKeyResponsesSupported: 1}, ResponsesSupportUnknown},
+		{"value nil", map[string]any{ExtraKeyResponsesSupported: nil}, ResponsesSupportUnknown},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := ResolveResponsesSupport(tc.extra)
+			if got != tc.want {
+				t.Errorf("ResolveResponsesSupport(%v) = %v, want %v", tc.extra, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestShouldUseResponsesAPI(t *testing.T) {
+	tests := []struct {
+		name  string
+		extra map[string]any
+		want  bool
+	}{
+		// 关键不变量：未探测必须返回 true（保留旧行为）
+		{"unknown defaults to true (preserve old behavior)", nil, true},
+		{"unknown empty defaults to true", map[string]any{}, true},
+		{"unknown wrong type defaults to true", map[string]any{ExtraKeyResponsesSupported: "yes"}, true},
+
+		// 已探测：标记决定
+		{"explicitly supported", map[string]any{ExtraKeyResponsesSupported: true}, true},
+		{"explicitly unsupported", map[string]any{ExtraKeyResponsesSupported: false}, false},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := ShouldUseResponsesAPI(tc.extra)
+			if got != tc.want {
+				t.Errorf("ShouldUseResponsesAPI(%v) = %v, want %v", tc.extra, got, tc.want)
+			}
+		})
+	}
+}
--- a/backend/internal/service/account_test_service.go
+++ b/backend/internal/service/account_test_service.go
@@ -21,6 +21,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/geminicli"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
 	"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
@@ -554,6 +555,15 @@ func (s *AccountTestService) testOpenAIAccountConnection(c *gin.Context, account
 		if err != nil {
 			return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error()))
 		}
+		// 账号已被探测为不支持 Responses（如 DeepSeek/Kimi 等）时，丢出明确提示。
+		// 账号本身可用（网关会走 CC 直转），仅测试入口需要补齐 CC SSE 处理逻辑。
+		// TODO：实现 CC 格式的账号测试路径（需专门的 CC SSE handler）。
+		if !openai_compat.ShouldUseResponsesAPI(account.Extra) {
+			return s.sendErrorAndEnd(c,
+				"账号已被探测为不支持 OpenAI Responses API（如 DeepSeek/Kimi 等三方兼容上游），"+
+					"账号本身可正常使用，但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。",
+			)
+		}
 		apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
 	} else {
 		return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type))
--- a/backend/internal/service/openai_apikey_responses_probe.go
+++ b/backend/internal/service/openai_apikey_responses_probe.go
@@ -0,0 +1,149 @@
+package service
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
+)
+
+// openaiResponsesProbeTimeout 是探测请求的超时时长。
+// 探测必须快速失败——超时不应阻塞账号创建/更新流程。
+const openaiResponsesProbeTimeout = 8 * time.Second
+
+// openaiResponsesProbePayload 是探测使用的最小 Responses 请求体。
+// 仅作能力探测，不期望响应内容质量；Stream=false 减少 SSE 解析开销。
+//
+// 注意：探测的目标是区分"端点存在"与"端点不存在"——只要上游返回非 404 的
+// 4xx/5xx（如 400 invalid_request_error / 401 unauthorized / 422 等），
+// 都视为"端点存在 → 支持 Responses"。仅 404 / 405 视为"端点不存在"。
+func openaiResponsesProbePayload(modelID string) []byte {
+	if strings.TrimSpace(modelID) == "" {
+		modelID = openai.DefaultTestModel
+	}
+	body, _ := json.Marshal(map[string]any{
+		"model": modelID,
+		"input": []map[string]any{
+			{
+				"role": "user",
+				"content": []map[string]any{
+					{"type": "input_text", "text": "hi"},
+				},
+			},
+		},
+		"instructions": openai.DefaultInstructions,
+		"stream":       false,
+	})
+	return body
+}
+
+// ProbeOpenAIAPIKeyResponsesSupport 探测 OpenAI APIKey 账号上游是否支持
+// /v1/responses 端点，并将结果持久化到 accounts.extra.openai_responses_supported。
+//
+// 调用时机：账号创建/更新后，且仅当 platform=openai && type=apikey 时。
+//
+// 探测策略（参见包文档 internal/pkg/openai_compat）：
+//   - 上游 404 / 405 → 不支持，写 false
+//   - 上游 2xx / 其他 4xx（401/422/400 等）/ 5xx → 支持，写 true
+//   - 网络层失败（连接错误、超时）→ 不写标记，保持 unknown
+//     （后续请求仍按"现状即证据"默认走 Responses）
+//
+// 该方法是幂等的：重复调用会以最新探测结果覆盖标记。
+//
+// 关于失败处理：探测本身的失败不应阻塞账号创建——账号能创建/更新成功就够了，
+// 探测结果只影响后续路由优化。所有错误都仅记录日志，不向调用方传播。
+func (s *AccountTestService) ProbeOpenAIAPIKeyResponsesSupport(ctx context.Context, accountID int64) {
+	account, err := s.accountRepo.GetByID(ctx, accountID)
+	if err != nil {
+		logger.LegacyPrintf("service.openai_probe", "probe_load_account_failed: account_id=%d err=%v", accountID, err)
+		return
+	}
+	if account.Platform != PlatformOpenAI || account.Type != AccountTypeAPIKey {
+		// 仅 OpenAI APIKey 账号需要探测；其他账号类型无能力差异。
+		return
+	}
+
+	apiKey := account.GetOpenAIApiKey()
+	if apiKey == "" {
+		logger.LegacyPrintf("service.openai_probe", "probe_skip_no_apikey: account_id=%d", accountID)
+		return
+	}
+	baseURL := account.GetOpenAIBaseURL()
+	if baseURL == "" {
+		baseURL = "https://api.openai.com"
+	}
+	normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL)
+	if err != nil {
+		logger.LegacyPrintf("service.openai_probe", "probe_invalid_baseurl: account_id=%d base_url=%q err=%v", accountID, baseURL, err)
+		return
+	}
+
+	probeURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
+
+	probeCtx, cancel := context.WithTimeout(ctx, openaiResponsesProbeTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(probeCtx, http.MethodPost, probeURL, bytes.NewReader(openaiResponsesProbePayload("")))
+	if err != nil {
+		logger.LegacyPrintf("service.openai_probe", "probe_build_request_failed: account_id=%d err=%v", accountID, err)
+		return
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("Accept", "application/json")
+
+	proxyURL := ""
+	if account.ProxyID != nil && account.Proxy != nil {
+		proxyURL = account.Proxy.URL()
+	}
+
+	resp, err := s.httpUpstream.DoWithTLS(req, proxyURL, account.ID, account.Concurrency, s.tlsFPProfileService.ResolveTLSProfile(account))
+	if err != nil {
+		// 网络层失败：不写标记，保持 unknown，下次重试或由网关 fallback 处理
+		logger.LegacyPrintf("service.openai_probe", "probe_request_failed: account_id=%d url=%s err=%v", accountID, probeURL, err)
+		return
+	}
+	defer func() {
+		_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1<<20))
+		_ = resp.Body.Close()
+	}()
+
+	supported := isResponsesEndpointSupportedByStatus(resp.StatusCode)
+
+	if err := s.accountRepo.UpdateExtra(ctx, accountID, map[string]any{
+		openai_compat.ExtraKeyResponsesSupported: supported,
+	}); err != nil {
+		logger.LegacyPrintf("service.openai_probe", "probe_persist_failed: account_id=%d supported=%v err=%v", accountID, supported, err)
+		return
+	}
+
+	logger.LegacyPrintf("service.openai_probe",
+		"probe_done: account_id=%d base_url=%s status=%d supported=%v",
+		accountID, normalizedBaseURL, resp.StatusCode, supported,
+	)
+}
+
+// isResponsesEndpointSupportedByStatus 根据探测响应的 HTTP 状态码判定上游
+// 是否暴露 /v1/responses 端点。
+//
+// 关键观察：第三方 OpenAI 兼容上游（DeepSeek/Kimi 等）对未知端点统一返回 404
+// 或 405；而 OpenAI 官方/有 Responses 实现的上游会因为请求体最简（缺字段）
+// 返回 400/422 等业务错误，但端点本身存在。
+//
+// 因此：仅 404 和 405 视为"端点不存在"，其他 status 视为"端点存在"。
+//
+// 5xx 也视为"端点存在"——上游偶发故障不应误判为不支持。
+func isResponsesEndpointSupportedByStatus(status int) bool {
+	switch status {
+	case http.StatusNotFound, http.StatusMethodNotAllowed:
+		return false
+	}
+	return true
+}
--- a/backend/internal/service/openai_gateway_chat_completions.go
+++ b/backend/internal/service/openai_gateway_chat_completions.go
@@ -14,6 +14,7 @@ import (

 	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
 	"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
 	"github.com/gin-gonic/gin"
 	"github.com/tidwall/gjson"
@@ -39,9 +40,18 @@ var cursorResponsesUnsupportedFields = []string{

 // ForwardAsChatCompletions accepts a Chat Completions request body, converts it
 // to OpenAI Responses API format, forwards to the OpenAI upstream, and converts
-// the response back to Chat Completions format. All account types (OAuth and API
-// Key) go through the Responses API conversion path since the upstream only
-// exposes the /v1/responses endpoint.
+// the response back to Chat Completions format.
+//
+// 历史背景：该函数原本对所有 OpenAI 账号无差别走 CC→Responses 转换 + /v1/responses
+// 端点——这在 OAuth（ChatGPT 内部 API 仅支持 Responses）和官方 APIKey 账号上是
+// 正确的，但 sub2api 接入 DeepSeek/Kimi/GLM 等第三方 OpenAI 兼容上游后假设破裂：
+// 这些上游普遍只支持 /v1/chat/completions，无 /v1/responses 端点。
+//
+// 当前路由策略（基于账号探测标记，详见 openai_compat.ShouldUseResponsesAPI）：
+//   - APIKey 账号 + 探测确认不支持 Responses → 走 forwardAsRawChatCompletions
+//     直转上游 /v1/chat/completions，不做协议转换
+//   - 其他所有情况（OAuth、APIKey 探测确认支持、未探测）→ 走原有 CC→Responses
+//     转换路径（保留旧行为，存量未探测账号零兼容破坏）
 func (s *OpenAIGatewayService) ForwardAsChatCompletions(
 	ctx context.Context,
 	c *gin.Context,
@@ -50,6 +60,12 @@ func (s *OpenAIGatewayService) ForwardAsChatCompletions(
 	promptCacheKey string,
 	defaultMappedModel string,
 ) (*OpenAIForwardResult, error) {
+	// 入口分流：APIKey 账号 + 已探测且确认上游不支持 Responses，走 CC 直转。
+	// 标记缺失（未探测）按"现状即证据"原则继续走下方原 Responses 转换路径。
+	if account.Type == AccountTypeAPIKey && !openai_compat.ShouldUseResponsesAPI(account.Extra) {
+		return s.forwardAsRawChatCompletions(ctx, c, account, body, defaultMappedModel)
+	}
+
 	startTime := time.Now()

 	// 1. Parse Chat Completions request
--- a/backend/internal/service/openai_gateway_chat_completions_raw.go
+++ b/backend/internal/service/openai_gateway_chat_completions_raw.go
@@ -0,0 +1,368 @@
+package service
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
+	"github.com/gin-gonic/gin"
+	"github.com/tidwall/gjson"
+	"go.uber.org/zap"
+)
+
+// forwardAsRawChatCompletions 直转客户端的 Chat Completions 请求到上游
+// `{base_url}/v1/chat/completions`，**不**做 CC↔Responses 协议转换。
+//
+// 适用场景：account.platform=openai && account.type=apikey && 上游已被探测确认
+// 不支持 /v1/responses 端点（如 DeepSeek/Kimi/GLM/Qwen 等第三方 OpenAI 兼容上游）。
+//
+// 与 ForwardAsChatCompletions 的关键差异：
+//
+//   - 不调用 apicompat.ChatCompletionsToResponses，body 仅做模型 ID 改写
+//   - 上游 URL 拼到 /v1/chat/completions 而非 /v1/responses
+//   - 流式响应 SSE 直接透传给客户端（上游 chunk 已是 CC 格式）
+//   - 非流式响应 JSON 直接透传，仅按需提取 usage
+//   - 不应用 codex OAuth transform（APIKey 路径无 OAuth）
+//   - 不注入 prompt_cache_key（OAuth 专属机制）
+//
+// 调用入口：openai_gateway_chat_completions.go::ForwardAsChatCompletions
+// 在函数顶部按 openai_compat.ShouldUseResponsesAPI 分流。
+func (s *OpenAIGatewayService) forwardAsRawChatCompletions(
+	ctx context.Context,
+	c *gin.Context,
+	account *Account,
+	body []byte,
+	defaultMappedModel string,
+) (*OpenAIForwardResult, error) {
+	startTime := time.Now()
+
+	// 1. Parse minimal fields needed for routing/billing
+	originalModel := gjson.GetBytes(body, "model").String()
+	if originalModel == "" {
+		writeChatCompletionsError(c, http.StatusBadRequest, "invalid_request_error", "model is required")
+		return nil, fmt.Errorf("missing model in request")
+	}
+	clientStream := gjson.GetBytes(body, "stream").Bool()
+	includeUsage := gjson.GetBytes(body, "stream_options.include_usage").Bool()
+
+	// 2. Resolve model mapping (same as ForwardAsChatCompletions)
+	billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel)
+	upstreamModel := normalizeOpenAIModelForUpstream(account, billingModel)
+
+	// 3. Rewrite model in body (no protocol conversion)
+	upstreamBody := body
+	if upstreamModel != originalModel {
+		upstreamBody = ReplaceModelInBody(body, upstreamModel)
+	}
+
+	// 4. Apply OpenAI fast policy on the CC body
+	updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, upstreamBody)
+	if policyErr != nil {
+		var blocked *OpenAIFastBlockedError
+		if errors.As(policyErr, &blocked) {
+			writeChatCompletionsError(c, http.StatusForbidden, "permission_error", blocked.Message)
+		}
+		return nil, policyErr
+	}
+	upstreamBody = updatedBody
+
+	logger.L().Debug("openai chat_completions raw: forwarding without protocol conversion",
+		zap.Int64("account_id", account.ID),
+		zap.String("original_model", originalModel),
+		zap.String("billing_model", billingModel),
+		zap.String("upstream_model", upstreamModel),
+		zap.Bool("stream", clientStream),
+	)
+
+	// 5. Build upstream request
+	apiKey := account.GetOpenAIApiKey()
+	if apiKey == "" {
+		return nil, fmt.Errorf("account %d missing api_key", account.ID)
+	}
+	baseURL := account.GetOpenAIBaseURL()
+	if baseURL == "" {
+		baseURL = "https://api.openai.com"
+	}
+	validatedURL, err := s.validateUpstreamBaseURL(baseURL)
+	if err != nil {
+		return nil, fmt.Errorf("invalid base_url: %w", err)
+	}
+	targetURL := buildOpenAIChatCompletionsURL(validatedURL)
+
+	upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(upstreamBody))
+	if err != nil {
+		return nil, fmt.Errorf("build upstream request: %w", err)
+	}
+	upstreamReq.Header.Set("Content-Type", "application/json")
+	upstreamReq.Header.Set("Authorization", "Bearer "+apiKey)
+	if clientStream {
+		upstreamReq.Header.Set("Accept", "text/event-stream")
+	} else {
+		upstreamReq.Header.Set("Accept", "application/json")
+	}
+
+	// Whitelist passthrough headers (subset of openaiAllowedHeaders relevant to CC).
+	for key, values := range c.Request.Header {
+		lowerKey := strings.ToLower(key)
+		if openaiAllowedHeaders[lowerKey] {
+			for _, v := range values {
+				upstreamReq.Header.Add(key, v)
+			}
+		}
+	}
+	customUA := account.GetOpenAIUserAgent()
+	if customUA != "" {
+		upstreamReq.Header.Set("user-agent", customUA)
+	}
+
+	// 6. Send request
+	proxyURL := ""
+	if account.Proxy != nil {
+		proxyURL = account.Proxy.URL()
+	}
+	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
+	if err != nil {
+		safeErr := sanitizeUpstreamErrorMessage(err.Error())
+		setOpsUpstreamError(c, 0, safeErr, "")
+		appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+			Platform:           account.Platform,
+			AccountID:          account.ID,
+			AccountName:        account.Name,
+			UpstreamStatusCode: 0,
+			Kind:               "request_error",
+			Message:            safeErr,
+		})
+		writeChatCompletionsError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed")
+		return nil, fmt.Errorf("upstream request failed: %s", safeErr)
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	// 7. Handle error response with failover
+	if resp.StatusCode >= 400 {
+		respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+		_ = resp.Body.Close()
+		resp.Body = io.NopCloser(bytes.NewReader(respBody))
+
+		upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
+		upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
+		if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
+			upstreamDetail := ""
+			if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+				maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+				if maxBytes <= 0 {
+					maxBytes = 2048
+				}
+				upstreamDetail = truncateString(string(respBody), maxBytes)
+			}
+			appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+				Platform:           account.Platform,
+				AccountID:          account.ID,
+				AccountName:        account.Name,
+				UpstreamStatusCode: resp.StatusCode,
+				UpstreamRequestID:  resp.Header.Get("x-request-id"),
+				Kind:               "failover",
+				Message:            upstreamMsg,
+				Detail:             upstreamDetail,
+			})
+			if s.rateLimitService != nil {
+				s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+			}
+			return nil, &UpstreamFailoverError{
+				StatusCode:             resp.StatusCode,
+				ResponseBody:           respBody,
+				RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)),
+			}
+		}
+		return s.handleChatCompletionsErrorResponse(resp, c, account)
+	}
+
+	// 8. Forward response
+	if clientStream {
+		return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, includeUsage, startTime)
+	}
+	return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime)
+}
+
+// streamRawChatCompletions 透传上游 CC SSE 流到客户端，并提取 usage（包括
+// 末尾 [DONE] 之前的 chunk 中的 usage 字段，按 OpenAI CC 协议）。
+func (s *OpenAIGatewayService) streamRawChatCompletions(
+	c *gin.Context,
+	resp *http.Response,
+	originalModel string,
+	billingModel string,
+	upstreamModel string,
+	includeUsage bool,
+	startTime time.Time,
+) (*OpenAIForwardResult, error) {
+	requestID := resp.Header.Get("x-request-id")
+
+	if s.responseHeaderFilter != nil {
+		responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
+	}
+	c.Writer.Header().Set("Content-Type", "text/event-stream")
+	c.Writer.Header().Set("Cache-Control", "no-cache")
+	c.Writer.Header().Set("Connection", "keep-alive")
+	c.Writer.Header().Set("X-Accel-Buffering", "no")
+	c.Writer.WriteHeader(http.StatusOK)
+
+	scanner := bufio.NewScanner(resp.Body)
+	maxLineSize := defaultMaxLineSize
+	if s.cfg != nil && s.cfg.Gateway.MaxLineSize > 0 {
+		maxLineSize = s.cfg.Gateway.MaxLineSize
+	}
+	scanner.Buffer(make([]byte, 0, 64*1024), maxLineSize)
+
+	var usage OpenAIUsage
+	var firstTokenMs *int
+
+	for scanner.Scan() {
+		line := scanner.Text()
+		// Direct passthrough: write each line + blank line separator
+		if _, werr := c.Writer.WriteString(line + "\n"); werr != nil {
+			logger.L().Debug("openai chat_completions raw: client write failed",
+				zap.Error(werr),
+				zap.String("request_id", requestID),
+			)
+			break
+		}
+		if line == "" {
+			c.Writer.Flush()
+			continue
+		}
+		c.Writer.Flush()
+
+		// Track first token timing on first non-empty data line
+		if firstTokenMs == nil && strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
+			elapsed := int(time.Since(startTime).Milliseconds())
+			firstTokenMs = &elapsed
+		}
+
+		// Extract usage from any chunk that carries it (CC streams typically put
+		// usage in the final chunk before [DONE], but may also appear elsewhere).
+		if strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
+			payload := line[6:]
+			if u := extractCCStreamUsage(payload); u != nil {
+				usage = *u
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) {
+			logger.L().Warn("openai chat_completions raw: stream read error",
+				zap.Error(err),
+				zap.String("request_id", requestID),
+			)
+		}
+	}
+
+	_ = includeUsage // CC 协议下 usage 是否包含由客户端的 stream_options 决定，上游会自行处理；我们仅做提取。
+
+	return &OpenAIForwardResult{
+		RequestID:     requestID,
+		Usage:         usage,
+		Model:         originalModel,
+		BillingModel:  billingModel,
+		UpstreamModel: upstreamModel,
+		Stream:        true,
+		Duration:      time.Since(startTime),
+		FirstTokenMs:  firstTokenMs,
+	}, nil
+}
+
+// bufferRawChatCompletions 透传上游 CC 非流式 JSON 响应。
+func (s *OpenAIGatewayService) bufferRawChatCompletions(
+	c *gin.Context,
+	resp *http.Response,
+	originalModel string,
+	billingModel string,
+	upstreamModel string,
+	startTime time.Time,
+) (*OpenAIForwardResult, error) {
+	requestID := resp.Header.Get("x-request-id")
+
+	respBody, err := io.ReadAll(io.LimitReader(resp.Body, 32<<20))
+	if err != nil {
+		writeChatCompletionsError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response")
+		return nil, fmt.Errorf("read upstream body: %w", err)
+	}
+
+	var ccResp apicompat.ChatCompletionsResponse
+	var usage OpenAIUsage
+	if err := json.Unmarshal(respBody, &ccResp); err == nil && ccResp.Usage != nil {
+		usage = OpenAIUsage{
+			InputTokens:  ccResp.Usage.PromptTokens,
+			OutputTokens: ccResp.Usage.CompletionTokens,
+		}
+		if ccResp.Usage.PromptTokensDetails != nil {
+			usage.CacheReadInputTokens = ccResp.Usage.PromptTokensDetails.CachedTokens
+		}
+	}
+
+	if s.responseHeaderFilter != nil {
+		responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
+	}
+	if ct := resp.Header.Get("Content-Type"); ct != "" {
+		c.Writer.Header().Set("Content-Type", ct)
+	} else {
+		c.Writer.Header().Set("Content-Type", "application/json")
+	}
+	c.Writer.WriteHeader(http.StatusOK)
+	_, _ = c.Writer.Write(respBody)
+
+	return &OpenAIForwardResult{
+		RequestID:     requestID,
+		Usage:         usage,
+		Model:         originalModel,
+		BillingModel:  billingModel,
+		UpstreamModel: upstreamModel,
+		Stream:        false,
+		Duration:      time.Since(startTime),
+	}, nil
+}
+
+// extractCCStreamUsage 从单个 CC 流式 chunk 的 payload 中提取 usage 字段。
+// CC 协议中 usage 仅出现在末尾 chunk（且仅当客户端请求 stream_options.include_usage
+// 时），但上游可能在多个 chunk 中重复——总是用最新值。
+func extractCCStreamUsage(payload string) *OpenAIUsage {
+	usageResult := gjson.Get(payload, "usage")
+	if !usageResult.Exists() || !usageResult.IsObject() {
+		return nil
+	}
+	u := OpenAIUsage{
+		InputTokens:  int(gjson.Get(payload, "usage.prompt_tokens").Int()),
+		OutputTokens: int(gjson.Get(payload, "usage.completion_tokens").Int()),
+	}
+	if cached := gjson.Get(payload, "usage.prompt_tokens_details.cached_tokens"); cached.Exists() {
+		u.CacheReadInputTokens = int(cached.Int())
+	}
+	return &u
+}
+
+// buildOpenAIChatCompletionsURL 拼接上游 Chat Completions 端点 URL。
+//
+//   - base 已是 /chat/completions：原样返回
+//   - base 以 /v1 结尾：追加 /chat/completions
+//   - 其他情况：追加 /v1/chat/completions
+//
+// 与 buildOpenAIResponsesURL 是姐妹函数。
+func buildOpenAIChatCompletionsURL(base string) string {
+	normalized := strings.TrimRight(strings.TrimSpace(base), "/")
+	if strings.HasSuffix(normalized, "/chat/completions") {
+		return normalized
+	}
+	if strings.HasSuffix(normalized, "/v1") {
+		return normalized + "/chat/completions"
+	}
+	return normalized + "/v1/chat/completions"
+}