fix(openai-gateway): route APIKey accounts to /v1/chat/completions when upstream lacks Responses API

OpenAI APIKey accounts with base_url pointing to third-party OpenAI-compatible
upstreams (DeepSeek, Kimi, GLM, Qwen, etc.) were failing because the gateway
unconditionally converted Chat Completions requests to Responses format and
forwarded to {base_url}/v1/responses, which only exists on OpenAI's official
endpoint.

Detection-based routing:
- Probe upstream capability on account create/update via a minimal POST to
  /v1/responses; HTTP 404/405 means 'unsupported', any other response means
  'supported'.
- Persist result as accounts.extra.openai_responses_supported (bool).
- ForwardAsChatCompletions branches at function entry: APIKey accounts with
  explicit support=false go through new forwardAsRawChatCompletions which
  passthrough-forwards CC body to /v1/chat/completions without protocol
  conversion.

Default behavior for accounts without the marker preserves the legacy
'always Responses' path — existing OpenAI APIKey accounts that were working
before this change continue to work without modification (the 'reality is
evidence' principle: an account that has been running implies upstream
capability).

Probe is fired async after Create / Update / BatchCreate; failures only log,
never block the admin flow. BulkUpdate omitted (low signal of base_url
changes; can be added if needed).

Implementation:
- New pkg internal/pkg/openai_compat: marker key + ShouldUseResponsesAPI
- New service file openai_apikey_responses_probe.go: probe + persist
- New service file openai_gateway_chat_completions_raw.go: CC pass-through
- Account test endpoint short-circuits with explicit message for
  probed-unsupported accounts (full CC test path is a TODO)

Zero schema changes, zero migrations, zero frontend changes, zero wire
modifications — all wired through existing AccountTestService injection.

Closes: DeepSeek-OpenAI account (id=128) production failure
This commit is contained in:
alfadb-bot
2026-04-30 19:25:45 +08:00
parent 48912014a1
commit 4e4cc80971
7 changed files with 708 additions and 3 deletions

View File

@@ -637,9 +637,39 @@ func (h *AccountHandler) Update(c *gin.Context) {
return return
} }
// OpenAI APIKey: credentials 修改后重新探测上游能力base_url/api_key 可能变更)。
// 异步执行,探测失败不影响账号更新响应。
if len(req.Credentials) > 0 {
h.scheduleOpenAIResponsesProbe(account)
}
response.Success(c, h.buildAccountResponseWithRuntime(c.Request.Context(), account)) response.Success(c, h.buildAccountResponseWithRuntime(c.Request.Context(), account))
} }
// scheduleOpenAIResponsesProbe 异步触发 OpenAI APIKey 账号的 Responses API 能力探测。
//
// 仅对 platform=openai && type=apikey 账号生效;其他账号无操作。
// 探测本身在 goroutine 中执行(会发一次 HTTP 请求到上游),不会阻塞
// 当前请求。探测错误仅记录日志,不向上下文传播:探测失败时标记保持缺失,
// 网关会按"现状即证据"默认走 Responses。
func (h *AccountHandler) scheduleOpenAIResponsesProbe(account *service.Account) {
if account == nil || account.Platform != service.PlatformOpenAI || account.Type != service.AccountTypeAPIKey {
return
}
if h.accountTestService == nil {
return
}
accountID := account.ID
go func() {
defer func() {
if r := recover(); r != nil {
slog.Error("openai_responses_probe_panic", "account_id", accountID, "recover", r)
}
}()
h.accountTestService.ProbeOpenAIAPIKeyResponsesSupport(context.Background(), accountID)
}()
}
// Delete handles deleting an account // Delete handles deleting an account
// DELETE /api/v1/admin/accounts/:id // DELETE /api/v1/admin/accounts/:id
func (h *AccountHandler) Delete(c *gin.Context) { func (h *AccountHandler) Delete(c *gin.Context) {
@@ -1231,6 +1261,8 @@ func (h *AccountHandler) BatchCreate(c *gin.Context) {
openaiPrivacyAccounts = append(openaiPrivacyAccounts, account) openaiPrivacyAccounts = append(openaiPrivacyAccounts, account)
} }
} }
// OpenAI APIKey 账号异步探测 /v1/responses 能力。
h.scheduleOpenAIResponsesProbe(account)
success++ success++
results = append(results, gin.H{ results = append(results, gin.H{
"name": item.Name, "name": item.Name,

View File

@@ -0,0 +1,75 @@
// Package openai_compat 提供 OpenAI 协议族在不同上游间的能力差异判定工具。
//
// 背景sub2api 的 OpenAI APIKey 账号通过 base_url 接入多种第三方 OpenAI 兼容上游
// DeepSeek、Kimi、GLM、Qwen 等)。这些上游普遍只支持 /v1/chat/completions
// 不存在 /v1/responses 端点。但网关历史代码无差别走 CC→Responses 转换并打到
// /v1/responses导致兼容上游 404。
//
// 本包提供基于"账号探测标记"的能力判定,配合
// internal/service/openai_apikey_responses_probe.go 在创建/修改账号时一次性
// 探测并落标。
//
// 设计取舍:
// - 不维护静态 host 白名单——避免新增厂商时必须改代码(讨论沉淀于
// pensieve/short-term/knowledge/upstream-capability-detection-design-tradeoffs
// - 标记缺失时默认 true即"走 Responses"),保持与重构前老代码完全一致的存量
// 账号行为("现状即证据"原则;详见
// pensieve/short-term/maxims/preserve-existing-runtime-behavior-when-replacing-logic-in-stateful-systems
package openai_compat
// AccountResponsesSupport 描述账号上游对 OpenAI Responses API 的支持状态。
//
// 仅用于 platform=openai + type=apikey 的账号;其他账号类型不应调用本包判定。
type AccountResponsesSupport int
const (
// ResponsesSupportUnknown 表示账号尚未完成能力探测extra 字段缺失)。
// 上游路由层应按"现状即证据"原则默认走 Responses保持与重构前一致。
ResponsesSupportUnknown AccountResponsesSupport = iota
// ResponsesSupportYes 探测确认上游支持 /v1/responses。
ResponsesSupportYes
// ResponsesSupportNo 探测确认上游不支持 /v1/responses应走
// /v1/chat/completions 直转路径。
ResponsesSupportNo
)
// ExtraKeyResponsesSupported 是 accounts.extra JSON 中存储探测结果的键名。
// 值类型为 booltrue=支持、false=不支持、键缺失=未探测。
const ExtraKeyResponsesSupported = "openai_responses_supported"
// ResolveResponsesSupport 从账号的 extra map 中读取探测标记。
//
// 标记缺失或类型不匹配时返回 ResponsesSupportUnknown——调用方应按
// "未探测=保留旧行为=走 Responses" 处理(参见 ShouldUseResponsesAPI
func ResolveResponsesSupport(extra map[string]any) AccountResponsesSupport {
if extra == nil {
return ResponsesSupportUnknown
}
v, ok := extra[ExtraKeyResponsesSupported]
if !ok {
return ResponsesSupportUnknown
}
supported, ok := v.(bool)
if !ok {
return ResponsesSupportUnknown
}
if supported {
return ResponsesSupportYes
}
return ResponsesSupportNo
}
// ShouldUseResponsesAPI 判断 OpenAI APIKey 账号的入站 /v1/chat/completions 请求
// 是否应走"CC→Responses 转换 + 上游 /v1/responses"路径。
//
// 返回 true 的两种情况:
// 1. 账号已探测确认支持 Responses
// 2. 账号未探测(标记缺失)——按"现状即证据"原则保留旧行为
//
// 仅当账号已探测且确认不支持时返回 false此时调用方应走 CC 直转路径
// (详见 internal/service/openai_gateway_chat_completions_raw.go
func ShouldUseResponsesAPI(extra map[string]any) bool {
return ResolveResponsesSupport(extra) != ResponsesSupportNo
}

View File

@@ -0,0 +1,55 @@
package openai_compat
import "testing"
func TestResolveResponsesSupport(t *testing.T) {
tests := []struct {
name string
extra map[string]any
want AccountResponsesSupport
}{
{"nil extra", nil, ResponsesSupportUnknown},
{"empty extra", map[string]any{}, ResponsesSupportUnknown},
{"key missing", map[string]any{"other": "value"}, ResponsesSupportUnknown},
{"value true", map[string]any{ExtraKeyResponsesSupported: true}, ResponsesSupportYes},
{"value false", map[string]any{ExtraKeyResponsesSupported: false}, ResponsesSupportNo},
{"value wrong type string", map[string]any{ExtraKeyResponsesSupported: "true"}, ResponsesSupportUnknown},
{"value wrong type number", map[string]any{ExtraKeyResponsesSupported: 1}, ResponsesSupportUnknown},
{"value nil", map[string]any{ExtraKeyResponsesSupported: nil}, ResponsesSupportUnknown},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := ResolveResponsesSupport(tc.extra)
if got != tc.want {
t.Errorf("ResolveResponsesSupport(%v) = %v, want %v", tc.extra, got, tc.want)
}
})
}
}
func TestShouldUseResponsesAPI(t *testing.T) {
tests := []struct {
name string
extra map[string]any
want bool
}{
// 关键不变量:未探测必须返回 true保留旧行为
{"unknown defaults to true (preserve old behavior)", nil, true},
{"unknown empty defaults to true", map[string]any{}, true},
{"unknown wrong type defaults to true", map[string]any{ExtraKeyResponsesSupported: "yes"}, true},
// 已探测:标记决定
{"explicitly supported", map[string]any{ExtraKeyResponsesSupported: true}, true},
{"explicitly unsupported", map[string]any{ExtraKeyResponsesSupported: false}, false},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := ShouldUseResponsesAPI(tc.extra)
if got != tc.want {
t.Errorf("ShouldUseResponsesAPI(%v) = %v, want %v", tc.extra, got, tc.want)
}
})
}
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/Wei-Shaw/sub2api/internal/pkg/claude" "github.com/Wei-Shaw/sub2api/internal/pkg/claude"
"github.com/Wei-Shaw/sub2api/internal/pkg/geminicli" "github.com/Wei-Shaw/sub2api/internal/pkg/geminicli"
"github.com/Wei-Shaw/sub2api/internal/pkg/openai" "github.com/Wei-Shaw/sub2api/internal/pkg/openai"
"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator" "github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/google/uuid" "github.com/google/uuid"
@@ -554,6 +555,15 @@ func (s *AccountTestService) testOpenAIAccountConnection(c *gin.Context, account
if err != nil { if err != nil {
return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error())) return s.sendErrorAndEnd(c, fmt.Sprintf("Invalid base URL: %s", err.Error()))
} }
// 账号已被探测为不支持 Responses如 DeepSeek/Kimi 等)时,丢出明确提示。
// 账号本身可用(网关会走 CC 直转),仅测试入口需要补齐 CC SSE 处理逻辑。
// TODO实现 CC 格式的账号测试路径(需专门的 CC SSE handler
if !openai_compat.ShouldUseResponsesAPI(account.Extra) {
return s.sendErrorAndEnd(c,
"账号已被探测为不支持 OpenAI Responses API如 DeepSeek/Kimi 等三方兼容上游),"+
"账号本身可正常使用,但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。",
)
}
apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses" apiURL = strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
} else { } else {
return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type)) return s.sendErrorAndEnd(c, fmt.Sprintf("Unsupported account type: %s", account.Type))

View File

@@ -0,0 +1,149 @@
package service
import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
)
// openaiResponsesProbeTimeout 是探测请求的超时时长。
// 探测必须快速失败——超时不应阻塞账号创建/更新流程。
const openaiResponsesProbeTimeout = 8 * time.Second
// openaiResponsesProbePayload 是探测使用的最小 Responses 请求体。
// 仅作能力探测不期望响应内容质量Stream=false 减少 SSE 解析开销。
//
// 注意:探测的目标是区分"端点存在"与"端点不存在"——只要上游返回非 404 的
// 4xx/5xx如 400 invalid_request_error / 401 unauthorized / 422 等),
// 都视为"端点存在 → 支持 Responses"。仅 404 / 405 视为"端点不存在"。
func openaiResponsesProbePayload(modelID string) []byte {
if strings.TrimSpace(modelID) == "" {
modelID = openai.DefaultTestModel
}
body, _ := json.Marshal(map[string]any{
"model": modelID,
"input": []map[string]any{
{
"role": "user",
"content": []map[string]any{
{"type": "input_text", "text": "hi"},
},
},
},
"instructions": openai.DefaultInstructions,
"stream": false,
})
return body
}
// ProbeOpenAIAPIKeyResponsesSupport 探测 OpenAI APIKey 账号上游是否支持
// /v1/responses 端点,并将结果持久化到 accounts.extra.openai_responses_supported。
//
// 调用时机:账号创建/更新后,且仅当 platform=openai && type=apikey 时。
//
// 探测策略(参见包文档 internal/pkg/openai_compat
// - 上游 404 / 405 → 不支持,写 false
// - 上游 2xx / 其他 4xx401/422/400 等)/ 5xx → 支持,写 true
// - 网络层失败(连接错误、超时)→ 不写标记,保持 unknown
// (后续请求仍按"现状即证据"默认走 Responses
//
// 该方法是幂等的:重复调用会以最新探测结果覆盖标记。
//
// 关于失败处理:探测本身的失败不应阻塞账号创建——账号能创建/更新成功就够了,
// 探测结果只影响后续路由优化。所有错误都仅记录日志,不向调用方传播。
func (s *AccountTestService) ProbeOpenAIAPIKeyResponsesSupport(ctx context.Context, accountID int64) {
account, err := s.accountRepo.GetByID(ctx, accountID)
if err != nil {
logger.LegacyPrintf("service.openai_probe", "probe_load_account_failed: account_id=%d err=%v", accountID, err)
return
}
if account.Platform != PlatformOpenAI || account.Type != AccountTypeAPIKey {
// 仅 OpenAI APIKey 账号需要探测;其他账号类型无能力差异。
return
}
apiKey := account.GetOpenAIApiKey()
if apiKey == "" {
logger.LegacyPrintf("service.openai_probe", "probe_skip_no_apikey: account_id=%d", accountID)
return
}
baseURL := account.GetOpenAIBaseURL()
if baseURL == "" {
baseURL = "https://api.openai.com"
}
normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL)
if err != nil {
logger.LegacyPrintf("service.openai_probe", "probe_invalid_baseurl: account_id=%d base_url=%q err=%v", accountID, baseURL, err)
return
}
probeURL := strings.TrimSuffix(normalizedBaseURL, "/") + "/responses"
probeCtx, cancel := context.WithTimeout(ctx, openaiResponsesProbeTimeout)
defer cancel()
req, err := http.NewRequestWithContext(probeCtx, http.MethodPost, probeURL, bytes.NewReader(openaiResponsesProbePayload("")))
if err != nil {
logger.LegacyPrintf("service.openai_probe", "probe_build_request_failed: account_id=%d err=%v", accountID, err)
return
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+apiKey)
req.Header.Set("Accept", "application/json")
proxyURL := ""
if account.ProxyID != nil && account.Proxy != nil {
proxyURL = account.Proxy.URL()
}
resp, err := s.httpUpstream.DoWithTLS(req, proxyURL, account.ID, account.Concurrency, s.tlsFPProfileService.ResolveTLSProfile(account))
if err != nil {
// 网络层失败:不写标记,保持 unknown下次重试或由网关 fallback 处理
logger.LegacyPrintf("service.openai_probe", "probe_request_failed: account_id=%d url=%s err=%v", accountID, probeURL, err)
return
}
defer func() {
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 1<<20))
_ = resp.Body.Close()
}()
supported := isResponsesEndpointSupportedByStatus(resp.StatusCode)
if err := s.accountRepo.UpdateExtra(ctx, accountID, map[string]any{
openai_compat.ExtraKeyResponsesSupported: supported,
}); err != nil {
logger.LegacyPrintf("service.openai_probe", "probe_persist_failed: account_id=%d supported=%v err=%v", accountID, supported, err)
return
}
logger.LegacyPrintf("service.openai_probe",
"probe_done: account_id=%d base_url=%s status=%d supported=%v",
accountID, normalizedBaseURL, resp.StatusCode, supported,
)
}
// isResponsesEndpointSupportedByStatus 根据探测响应的 HTTP 状态码判定上游
// 是否暴露 /v1/responses 端点。
//
// 关键观察:第三方 OpenAI 兼容上游DeepSeek/Kimi 等)对未知端点统一返回 404
// 或 405而 OpenAI 官方/有 Responses 实现的上游会因为请求体最简(缺字段)
// 返回 400/422 等业务错误,但端点本身存在。
//
// 因此:仅 404 和 405 视为"端点不存在",其他 status 视为"端点存在"。
//
// 5xx 也视为"端点存在"——上游偶发故障不应误判为不支持。
func isResponsesEndpointSupportedByStatus(status int) bool {
switch status {
case http.StatusNotFound, http.StatusMethodNotAllowed:
return false
}
return true
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat" "github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger" "github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/Wei-Shaw/sub2api/internal/pkg/openai_compat"
"github.com/Wei-Shaw/sub2api/internal/util/responseheaders" "github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
@@ -39,9 +40,18 @@ var cursorResponsesUnsupportedFields = []string{
// ForwardAsChatCompletions accepts a Chat Completions request body, converts it // ForwardAsChatCompletions accepts a Chat Completions request body, converts it
// to OpenAI Responses API format, forwards to the OpenAI upstream, and converts // to OpenAI Responses API format, forwards to the OpenAI upstream, and converts
// the response back to Chat Completions format. All account types (OAuth and API // the response back to Chat Completions format.
// Key) go through the Responses API conversion path since the upstream only //
// exposes the /v1/responses endpoint. // 历史背景:该函数原本对所有 OpenAI 账号无差别走 CC→Responses 转换 + /v1/responses
// 端点——这在 OAuthChatGPT 内部 API 仅支持 Responses和官方 APIKey 账号上是
// 正确的,但 sub2api 接入 DeepSeek/Kimi/GLM 等第三方 OpenAI 兼容上游后假设破裂:
// 这些上游普遍只支持 /v1/chat/completions无 /v1/responses 端点。
//
// 当前路由策略(基于账号探测标记,详见 openai_compat.ShouldUseResponsesAPI
// - APIKey 账号 + 探测确认不支持 Responses → 走 forwardAsRawChatCompletions
// 直转上游 /v1/chat/completions不做协议转换
// - 其他所有情况OAuth、APIKey 探测确认支持、未探测)→ 走原有 CC→Responses
// 转换路径(保留旧行为,存量未探测账号零兼容破坏)
func (s *OpenAIGatewayService) ForwardAsChatCompletions( func (s *OpenAIGatewayService) ForwardAsChatCompletions(
ctx context.Context, ctx context.Context,
c *gin.Context, c *gin.Context,
@@ -50,6 +60,12 @@ func (s *OpenAIGatewayService) ForwardAsChatCompletions(
promptCacheKey string, promptCacheKey string,
defaultMappedModel string, defaultMappedModel string,
) (*OpenAIForwardResult, error) { ) (*OpenAIForwardResult, error) {
// 入口分流APIKey 账号 + 已探测且确认上游不支持 Responses走 CC 直转。
// 标记缺失(未探测)按"现状即证据"原则继续走下方原 Responses 转换路径。
if account.Type == AccountTypeAPIKey && !openai_compat.ShouldUseResponsesAPI(account.Extra) {
return s.forwardAsRawChatCompletions(ctx, c, account, body, defaultMappedModel)
}
startTime := time.Now() startTime := time.Now()
// 1. Parse Chat Completions request // 1. Parse Chat Completions request

View File

@@ -0,0 +1,368 @@
package service
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
"github.com/gin-gonic/gin"
"github.com/tidwall/gjson"
"go.uber.org/zap"
)
// forwardAsRawChatCompletions 直转客户端的 Chat Completions 请求到上游
// `{base_url}/v1/chat/completions`**不**做 CC↔Responses 协议转换。
//
// 适用场景account.platform=openai && account.type=apikey && 上游已被探测确认
// 不支持 /v1/responses 端点(如 DeepSeek/Kimi/GLM/Qwen 等第三方 OpenAI 兼容上游)。
//
// 与 ForwardAsChatCompletions 的关键差异:
//
// - 不调用 apicompat.ChatCompletionsToResponsesbody 仅做模型 ID 改写
// - 上游 URL 拼到 /v1/chat/completions 而非 /v1/responses
// - 流式响应 SSE 直接透传给客户端(上游 chunk 已是 CC 格式)
// - 非流式响应 JSON 直接透传,仅按需提取 usage
// - 不应用 codex OAuth transformAPIKey 路径无 OAuth
// - 不注入 prompt_cache_keyOAuth 专属机制)
//
// 调用入口openai_gateway_chat_completions.go::ForwardAsChatCompletions
// 在函数顶部按 openai_compat.ShouldUseResponsesAPI 分流。
func (s *OpenAIGatewayService) forwardAsRawChatCompletions(
ctx context.Context,
c *gin.Context,
account *Account,
body []byte,
defaultMappedModel string,
) (*OpenAIForwardResult, error) {
startTime := time.Now()
// 1. Parse minimal fields needed for routing/billing
originalModel := gjson.GetBytes(body, "model").String()
if originalModel == "" {
writeChatCompletionsError(c, http.StatusBadRequest, "invalid_request_error", "model is required")
return nil, fmt.Errorf("missing model in request")
}
clientStream := gjson.GetBytes(body, "stream").Bool()
includeUsage := gjson.GetBytes(body, "stream_options.include_usage").Bool()
// 2. Resolve model mapping (same as ForwardAsChatCompletions)
billingModel := resolveOpenAIForwardModel(account, originalModel, defaultMappedModel)
upstreamModel := normalizeOpenAIModelForUpstream(account, billingModel)
// 3. Rewrite model in body (no protocol conversion)
upstreamBody := body
if upstreamModel != originalModel {
upstreamBody = ReplaceModelInBody(body, upstreamModel)
}
// 4. Apply OpenAI fast policy on the CC body
updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, upstreamBody)
if policyErr != nil {
var blocked *OpenAIFastBlockedError
if errors.As(policyErr, &blocked) {
writeChatCompletionsError(c, http.StatusForbidden, "permission_error", blocked.Message)
}
return nil, policyErr
}
upstreamBody = updatedBody
logger.L().Debug("openai chat_completions raw: forwarding without protocol conversion",
zap.Int64("account_id", account.ID),
zap.String("original_model", originalModel),
zap.String("billing_model", billingModel),
zap.String("upstream_model", upstreamModel),
zap.Bool("stream", clientStream),
)
// 5. Build upstream request
apiKey := account.GetOpenAIApiKey()
if apiKey == "" {
return nil, fmt.Errorf("account %d missing api_key", account.ID)
}
baseURL := account.GetOpenAIBaseURL()
if baseURL == "" {
baseURL = "https://api.openai.com"
}
validatedURL, err := s.validateUpstreamBaseURL(baseURL)
if err != nil {
return nil, fmt.Errorf("invalid base_url: %w", err)
}
targetURL := buildOpenAIChatCompletionsURL(validatedURL)
upstreamReq, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(upstreamBody))
if err != nil {
return nil, fmt.Errorf("build upstream request: %w", err)
}
upstreamReq.Header.Set("Content-Type", "application/json")
upstreamReq.Header.Set("Authorization", "Bearer "+apiKey)
if clientStream {
upstreamReq.Header.Set("Accept", "text/event-stream")
} else {
upstreamReq.Header.Set("Accept", "application/json")
}
// Whitelist passthrough headers (subset of openaiAllowedHeaders relevant to CC).
for key, values := range c.Request.Header {
lowerKey := strings.ToLower(key)
if openaiAllowedHeaders[lowerKey] {
for _, v := range values {
upstreamReq.Header.Add(key, v)
}
}
}
customUA := account.GetOpenAIUserAgent()
if customUA != "" {
upstreamReq.Header.Set("user-agent", customUA)
}
// 6. Send request
proxyURL := ""
if account.Proxy != nil {
proxyURL = account.Proxy.URL()
}
resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
if err != nil {
safeErr := sanitizeUpstreamErrorMessage(err.Error())
setOpsUpstreamError(c, 0, safeErr, "")
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: 0,
Kind: "request_error",
Message: safeErr,
})
writeChatCompletionsError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed")
return nil, fmt.Errorf("upstream request failed: %s", safeErr)
}
defer func() { _ = resp.Body.Close() }()
// 7. Handle error response with failover
if resp.StatusCode >= 400 {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
resp.Body = io.NopCloser(bytes.NewReader(respBody))
upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
upstreamDetail := ""
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString(string(respBody), maxBytes)
}
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
Kind: "failover",
Message: upstreamMsg,
Detail: upstreamDetail,
})
if s.rateLimitService != nil {
s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
}
return nil, &UpstreamFailoverError{
StatusCode: resp.StatusCode,
ResponseBody: respBody,
RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)),
}
}
return s.handleChatCompletionsErrorResponse(resp, c, account)
}
// 8. Forward response
if clientStream {
return s.streamRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, includeUsage, startTime)
}
return s.bufferRawChatCompletions(c, resp, originalModel, billingModel, upstreamModel, startTime)
}
// streamRawChatCompletions 透传上游 CC SSE 流到客户端,并提取 usage包括
// 末尾 [DONE] 之前的 chunk 中的 usage 字段,按 OpenAI CC 协议)。
func (s *OpenAIGatewayService) streamRawChatCompletions(
c *gin.Context,
resp *http.Response,
originalModel string,
billingModel string,
upstreamModel string,
includeUsage bool,
startTime time.Time,
) (*OpenAIForwardResult, error) {
requestID := resp.Header.Get("x-request-id")
if s.responseHeaderFilter != nil {
responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
}
c.Writer.Header().Set("Content-Type", "text/event-stream")
c.Writer.Header().Set("Cache-Control", "no-cache")
c.Writer.Header().Set("Connection", "keep-alive")
c.Writer.Header().Set("X-Accel-Buffering", "no")
c.Writer.WriteHeader(http.StatusOK)
scanner := bufio.NewScanner(resp.Body)
maxLineSize := defaultMaxLineSize
if s.cfg != nil && s.cfg.Gateway.MaxLineSize > 0 {
maxLineSize = s.cfg.Gateway.MaxLineSize
}
scanner.Buffer(make([]byte, 0, 64*1024), maxLineSize)
var usage OpenAIUsage
var firstTokenMs *int
for scanner.Scan() {
line := scanner.Text()
// Direct passthrough: write each line + blank line separator
if _, werr := c.Writer.WriteString(line + "\n"); werr != nil {
logger.L().Debug("openai chat_completions raw: client write failed",
zap.Error(werr),
zap.String("request_id", requestID),
)
break
}
if line == "" {
c.Writer.Flush()
continue
}
c.Writer.Flush()
// Track first token timing on first non-empty data line
if firstTokenMs == nil && strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
elapsed := int(time.Since(startTime).Milliseconds())
firstTokenMs = &elapsed
}
// Extract usage from any chunk that carries it (CC streams typically put
// usage in the final chunk before [DONE], but may also appear elsewhere).
if strings.HasPrefix(line, "data: ") && line != "data: [DONE]" {
payload := line[6:]
if u := extractCCStreamUsage(payload); u != nil {
usage = *u
}
}
}
if err := scanner.Err(); err != nil {
if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) {
logger.L().Warn("openai chat_completions raw: stream read error",
zap.Error(err),
zap.String("request_id", requestID),
)
}
}
_ = includeUsage // CC 协议下 usage 是否包含由客户端的 stream_options 决定,上游会自行处理;我们仅做提取。
return &OpenAIForwardResult{
RequestID: requestID,
Usage: usage,
Model: originalModel,
BillingModel: billingModel,
UpstreamModel: upstreamModel,
Stream: true,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
}, nil
}
// bufferRawChatCompletions 透传上游 CC 非流式 JSON 响应。
func (s *OpenAIGatewayService) bufferRawChatCompletions(
c *gin.Context,
resp *http.Response,
originalModel string,
billingModel string,
upstreamModel string,
startTime time.Time,
) (*OpenAIForwardResult, error) {
requestID := resp.Header.Get("x-request-id")
respBody, err := io.ReadAll(io.LimitReader(resp.Body, 32<<20))
if err != nil {
writeChatCompletionsError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response")
return nil, fmt.Errorf("read upstream body: %w", err)
}
var ccResp apicompat.ChatCompletionsResponse
var usage OpenAIUsage
if err := json.Unmarshal(respBody, &ccResp); err == nil && ccResp.Usage != nil {
usage = OpenAIUsage{
InputTokens: ccResp.Usage.PromptTokens,
OutputTokens: ccResp.Usage.CompletionTokens,
}
if ccResp.Usage.PromptTokensDetails != nil {
usage.CacheReadInputTokens = ccResp.Usage.PromptTokensDetails.CachedTokens
}
}
if s.responseHeaderFilter != nil {
responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
}
if ct := resp.Header.Get("Content-Type"); ct != "" {
c.Writer.Header().Set("Content-Type", ct)
} else {
c.Writer.Header().Set("Content-Type", "application/json")
}
c.Writer.WriteHeader(http.StatusOK)
_, _ = c.Writer.Write(respBody)
return &OpenAIForwardResult{
RequestID: requestID,
Usage: usage,
Model: originalModel,
BillingModel: billingModel,
UpstreamModel: upstreamModel,
Stream: false,
Duration: time.Since(startTime),
}, nil
}
// extractCCStreamUsage 从单个 CC 流式 chunk 的 payload 中提取 usage 字段。
// CC 协议中 usage 仅出现在末尾 chunk且仅当客户端请求 stream_options.include_usage
// 时),但上游可能在多个 chunk 中重复——总是用最新值。
func extractCCStreamUsage(payload string) *OpenAIUsage {
usageResult := gjson.Get(payload, "usage")
if !usageResult.Exists() || !usageResult.IsObject() {
return nil
}
u := OpenAIUsage{
InputTokens: int(gjson.Get(payload, "usage.prompt_tokens").Int()),
OutputTokens: int(gjson.Get(payload, "usage.completion_tokens").Int()),
}
if cached := gjson.Get(payload, "usage.prompt_tokens_details.cached_tokens"); cached.Exists() {
u.CacheReadInputTokens = int(cached.Int())
}
return &u
}
// buildOpenAIChatCompletionsURL 拼接上游 Chat Completions 端点 URL。
//
// - base 已是 /chat/completions原样返回
// - base 以 /v1 结尾:追加 /chat/completions
// - 其他情况:追加 /v1/chat/completions
//
// 与 buildOpenAIResponsesURL 是姐妹函数。
func buildOpenAIChatCompletionsURL(base string) string {
normalized := strings.TrimRight(strings.TrimSpace(base), "/")
if strings.HasSuffix(normalized, "/chat/completions") {
return normalized
}
if strings.HasSuffix(normalized, "/v1") {
return normalized + "/chat/completions"
}
return normalized + "/v1/chat/completions"
}