2025-12-22 22:58:31 +08:00
package service
import (
"bufio"
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
2026-04-02 20:28:04 +08:00
"log/slog"
2026-02-28 15:01:20 +08:00
"math/rand"
2025-12-22 22:58:31 +08:00
"net/http"
2026-01-01 04:01:51 +08:00
"sort"
2025-12-23 16:26:07 +08:00
"strconv"
2025-12-22 22:58:31 +08:00
"strings"
2026-02-28 15:01:20 +08:00
"sync"
2026-01-04 20:19:07 +08:00
"sync/atomic"
2025-12-22 22:58:31 +08:00
"time"
2025-12-24 21:07:21 +08:00
"github.com/Wei-Shaw/sub2api/internal/config"
2026-04-07 19:30:45 +08:00
"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
2026-02-12 19:01:09 +08:00
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
2026-01-10 03:12:56 +08:00
"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
2026-01-02 17:40:57 +08:00
"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
2026-03-16 10:28:11 +08:00
"github.com/cespare/xxhash/v2"
2025-12-22 22:58:31 +08:00
"github.com/gin-gonic/gin"
2026-03-06 18:50:28 +08:00
"github.com/google/uuid"
2026-02-07 17:09:55 +08:00
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
2026-02-12 19:01:09 +08:00
"go.uber.org/zap"
2025-12-22 22:58:31 +08:00
)
const (
// ChatGPT internal API for OAuth accounts
chatgptCodexURL = "https://chatgpt.com/backend-api/codex/responses"
// OpenAI Platform API for API Key accounts (fallback)
openaiPlatformAPIURL = "https://api.openai.com/v1/responses"
openaiStickySessionTTL = time . Hour // 粘性会话TTL
2026-04-25 05:26:33 +00:00
codexCLIUserAgent = "codex_cli_rs/0.125.0"
2026-02-13 19:27:07 +08:00
// codex_cli_only 拒绝时单个请求头日志长度上限(字符)
codexCLIOnlyHeaderValueMaxBytes = 256
2025-12-22 22:58:31 +08:00
2026-02-12 09:41:37 +08:00
// OpenAIParsedRequestBodyKey 缓存 handler 侧已解析的请求体,避免重复解析。
OpenAIParsedRequestBodyKey = "openai_parsed_request_body"
2026-02-28 15:01:20 +08:00
// OpenAI WS Mode 失败后的重连次数上限(不含首次尝试)。
// 与 Codex 客户端保持一致:失败后最多重连 5 次。
openAIWSReconnectRetryLimit = 5
// OpenAI WS Mode 重连退避默认值(可由配置覆盖)。
openAIWSRetryBackoffInitialDefault = 120 * time . Millisecond
openAIWSRetryBackoffMaxDefault = 2 * time . Second
openAIWSRetryJitterRatioDefault = 0.2
2026-03-06 18:50:28 +08:00
openAICompactSessionSeedKey = "openai_compact_session_seed"
2026-04-25 05:26:33 +00:00
codexCLIVersion = "0.125.0"
2026-03-11 13:53:19 +08:00
// Codex 限额快照仅用于后台展示/诊断,不需要每个成功请求都立即落库。
openAICodexSnapshotPersistMinInterval = 30 * time . Second
2026-02-12 09:41:37 +08:00
)
2025-12-26 03:49:55 -08:00
2026-02-12 20:12:15 +08:00
// OpenAI allowed headers whitelist (for non-passthrough).
2025-12-22 22:58:31 +08:00
var openaiAllowedHeaders = map [ string ] bool {
2026-02-28 15:01:20 +08:00
"accept-language" : true ,
"content-type" : true ,
"conversation_id" : true ,
"user-agent" : true ,
"originator" : true ,
"session_id" : true ,
"x-codex-turn-state" : true ,
"x-codex-turn-metadata" : true ,
2025-12-22 22:58:31 +08:00
}
2026-02-12 20:12:15 +08:00
// OpenAI passthrough allowed headers whitelist.
// 透传模式下仅放行这些低风险请求头,避免将非标准/环境噪声头传给上游触发风控。
var openaiPassthroughAllowedHeaders = map [ string ] bool {
2026-02-28 15:01:20 +08:00
"accept" : true ,
"accept-language" : true ,
"content-type" : true ,
"conversation_id" : true ,
"openai-beta" : true ,
"user-agent" : true ,
"originator" : true ,
"session_id" : true ,
"x-codex-turn-state" : true ,
"x-codex-turn-metadata" : true ,
2026-02-12 20:12:15 +08:00
}
2026-02-13 19:27:07 +08:00
// codex_cli_only 拒绝时记录的请求头白名单(仅用于诊断日志,不参与上游透传)
var codexCLIOnlyDebugHeaderWhitelist = [ ] string {
"User-Agent" ,
"Content-Type" ,
"Accept" ,
"Accept-Language" ,
"OpenAI-Beta" ,
"Originator" ,
"Session_ID" ,
"Conversation_ID" ,
"X-Request-ID" ,
"X-Client-Request-ID" ,
"X-Forwarded-For" ,
"X-Real-IP" ,
}
2025-12-23 16:26:07 +08:00
// OpenAICodexUsageSnapshot represents Codex API usage limits from response headers
type OpenAICodexUsageSnapshot struct {
PrimaryUsedPercent * float64 ` json:"primary_used_percent,omitempty" `
PrimaryResetAfterSeconds * int ` json:"primary_reset_after_seconds,omitempty" `
PrimaryWindowMinutes * int ` json:"primary_window_minutes,omitempty" `
SecondaryUsedPercent * float64 ` json:"secondary_used_percent,omitempty" `
SecondaryResetAfterSeconds * int ` json:"secondary_reset_after_seconds,omitempty" `
SecondaryWindowMinutes * int ` json:"secondary_window_minutes,omitempty" `
PrimaryOverSecondaryPercent * float64 ` json:"primary_over_secondary_percent,omitempty" `
UpdatedAt string ` json:"updated_at,omitempty" `
}
2026-01-25 13:32:08 +08:00
// NormalizedCodexLimits contains normalized 5h/7d rate limit data
type NormalizedCodexLimits struct {
Used5hPercent * float64
Reset5hSeconds * int
Window5hMinutes * int
Used7dPercent * float64
Reset7dSeconds * int
Window7dMinutes * int
}
// Normalize converts primary/secondary fields to canonical 5h/7d fields.
// Strategy: Compare window_minutes to determine which is 5h vs 7d.
// Returns nil if snapshot is nil or has no useful data.
func ( s * OpenAICodexUsageSnapshot ) Normalize ( ) * NormalizedCodexLimits {
if s == nil {
return nil
}
result := & NormalizedCodexLimits { }
primaryMins := 0
secondaryMins := 0
hasPrimaryWindow := false
hasSecondaryWindow := false
if s . PrimaryWindowMinutes != nil {
primaryMins = * s . PrimaryWindowMinutes
hasPrimaryWindow = true
}
if s . SecondaryWindowMinutes != nil {
secondaryMins = * s . SecondaryWindowMinutes
hasSecondaryWindow = true
}
// Determine mapping based on window_minutes
use5hFromPrimary := false
use7dFromPrimary := false
if hasPrimaryWindow && hasSecondaryWindow {
// Both known: smaller window is 5h, larger is 7d
if primaryMins < secondaryMins {
use5hFromPrimary = true
} else {
use7dFromPrimary = true
}
} else if hasPrimaryWindow {
// Only primary known: classify by threshold (<=360 min = 6h -> 5h window)
if primaryMins <= 360 {
use5hFromPrimary = true
} else {
use7dFromPrimary = true
}
} else if hasSecondaryWindow {
// Only secondary known: classify by threshold
if secondaryMins <= 360 {
// 5h from secondary, so primary (if any data) is 7d
use7dFromPrimary = true
} else {
// 7d from secondary, so primary (if any data) is 5h
use5hFromPrimary = true
}
} else {
// No window_minutes: fall back to legacy assumption (primary=7d, secondary=5h)
use7dFromPrimary = true
}
// Assign values
if use5hFromPrimary {
result . Used5hPercent = s . PrimaryUsedPercent
result . Reset5hSeconds = s . PrimaryResetAfterSeconds
result . Window5hMinutes = s . PrimaryWindowMinutes
result . Used7dPercent = s . SecondaryUsedPercent
result . Reset7dSeconds = s . SecondaryResetAfterSeconds
result . Window7dMinutes = s . SecondaryWindowMinutes
} else if use7dFromPrimary {
result . Used7dPercent = s . PrimaryUsedPercent
result . Reset7dSeconds = s . PrimaryResetAfterSeconds
result . Window7dMinutes = s . PrimaryWindowMinutes
result . Used5hPercent = s . SecondaryUsedPercent
result . Reset5hSeconds = s . SecondaryResetAfterSeconds
result . Window5hMinutes = s . SecondaryWindowMinutes
}
return result
}
2025-12-22 22:58:31 +08:00
// OpenAIUsage represents OpenAI API response usage
type OpenAIUsage struct {
InputTokens int ` json:"input_tokens" `
OutputTokens int ` json:"output_tokens" `
CacheCreationInputTokens int ` json:"cache_creation_input_tokens,omitempty" `
CacheReadInputTokens int ` json:"cache_read_input_tokens,omitempty" `
2026-04-01 15:08:57 +08:00
ImageOutputTokens int ` json:"image_output_tokens,omitempty" `
2025-12-22 22:58:31 +08:00
}
// OpenAIForwardResult represents the result of forwarding
type OpenAIForwardResult struct {
2026-02-03 14:36:29 +08:00
RequestID string
Usage OpenAIUsage
2026-03-06 22:39:33 +08:00
Model string // 原始模型(用于响应和日志显示)
// BillingModel is the model used for cost calculation.
// When non-empty, CalculateCost uses this instead of Model.
// This is set by the Anthropic Messages conversion path where
// the mapped upstream model differs from the client-facing model.
BillingModel string
2026-03-17 19:25:35 +08:00
// UpstreamModel is the actual model sent to the upstream provider after mapping.
// Empty when no mapping was applied (requested model was used as-is).
UpstreamModel string
2026-03-08 23:22:28 +08:00
// ServiceTier records the OpenAI Responses API service tier, e.g. "priority" / "flex".
// Nil means the request did not specify a recognized tier.
ServiceTier * string
2026-02-03 14:36:29 +08:00
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
// Stored for usage records display; nil means not provided / not applicable.
ReasoningEffort * string
Stream bool
2026-02-28 15:01:20 +08:00
OpenAIWSMode bool
2026-03-06 20:46:10 +08:00
ResponseHeaders http . Header
2026-02-03 14:36:29 +08:00
Duration time . Duration
FirstTokenMs * int
2026-04-22 12:30:08 +08:00
ImageCount int
ImageSize string
2025-12-22 22:58:31 +08:00
}
2026-02-28 15:01:20 +08:00
type OpenAIWSRetryMetricsSnapshot struct {
RetryAttemptsTotal int64 ` json:"retry_attempts_total" `
RetryBackoffMsTotal int64 ` json:"retry_backoff_ms_total" `
RetryExhaustedTotal int64 ` json:"retry_exhausted_total" `
NonRetryableFastFallbackTotal int64 ` json:"non_retryable_fast_fallback_total" `
}
type OpenAICompatibilityFallbackMetricsSnapshot struct {
SessionHashLegacyReadFallbackTotal int64 ` json:"session_hash_legacy_read_fallback_total" `
SessionHashLegacyReadFallbackHit int64 ` json:"session_hash_legacy_read_fallback_hit" `
SessionHashLegacyDualWriteTotal int64 ` json:"session_hash_legacy_dual_write_total" `
SessionHashLegacyReadHitRate float64 ` json:"session_hash_legacy_read_hit_rate" `
MetadataLegacyFallbackIsMaxTokensOneHaikuTotal int64 ` json:"metadata_legacy_fallback_is_max_tokens_one_haiku_total" `
MetadataLegacyFallbackThinkingEnabledTotal int64 ` json:"metadata_legacy_fallback_thinking_enabled_total" `
MetadataLegacyFallbackPrefetchedStickyAccount int64 ` json:"metadata_legacy_fallback_prefetched_sticky_account_total" `
MetadataLegacyFallbackPrefetchedStickyGroup int64 ` json:"metadata_legacy_fallback_prefetched_sticky_group_total" `
MetadataLegacyFallbackSingleAccountRetryTotal int64 ` json:"metadata_legacy_fallback_single_account_retry_total" `
MetadataLegacyFallbackAccountSwitchCountTotal int64 ` json:"metadata_legacy_fallback_account_switch_count_total" `
MetadataLegacyFallbackTotal int64 ` json:"metadata_legacy_fallback_total" `
}
type openAIWSRetryMetrics struct {
retryAttempts atomic . Int64
retryBackoffMs atomic . Int64
retryExhausted atomic . Int64
nonRetryableFastFallback atomic . Int64
}
2026-03-11 13:53:19 +08:00
type accountWriteThrottle struct {
minInterval time . Duration
mu sync . Mutex
lastByID map [ int64 ] time . Time
}
func newAccountWriteThrottle ( minInterval time . Duration ) * accountWriteThrottle {
return & accountWriteThrottle {
minInterval : minInterval ,
lastByID : make ( map [ int64 ] time . Time ) ,
}
}
func ( t * accountWriteThrottle ) Allow ( id int64 , now time . Time ) bool {
if t == nil || id <= 0 || t . minInterval <= 0 {
return true
}
t . mu . Lock ( )
defer t . mu . Unlock ( )
if last , ok := t . lastByID [ id ] ; ok && now . Sub ( last ) < t . minInterval {
return false
}
t . lastByID [ id ] = now
if len ( t . lastByID ) > 4096 {
cutoff := now . Add ( - 4 * t . minInterval )
for accountID , writtenAt := range t . lastByID {
if writtenAt . Before ( cutoff ) {
delete ( t . lastByID , accountID )
}
}
}
return true
}
var defaultOpenAICodexSnapshotPersistThrottle = newAccountWriteThrottle ( openAICodexSnapshotPersistMinInterval )
2026-04-25 14:40:03 +08:00
// ErrNoAvailableCompactAccounts indicates the request needs /responses/compact
// support but no compatible account is available.
var ErrNoAvailableCompactAccounts = errors . New ( "no available OpenAI accounts support /responses/compact" )
2025-12-22 22:58:31 +08:00
// OpenAIGatewayService handles OpenAI API gateway operations
type OpenAIGatewayService struct {
2026-03-06 14:54:52 +08:00
accountRepo AccountRepository
usageLogRepo UsageLogRepository
2026-03-12 16:53:18 +08:00
usageBillingRepo UsageBillingRepository
2026-03-06 14:54:52 +08:00
userRepo UserRepository
userSubRepo UserSubscriptionRepository
cache GatewayCache
cfg * config . Config
codexDetector CodexClientRestrictionDetector
schedulerSnapshot * SchedulerSnapshotService
concurrencyService * ConcurrencyService
billingService * BillingService
rateLimitService * RateLimitService
billingCacheService * BillingCacheService
userGroupRateResolver * userGroupRateResolver
httpUpstream HTTPUpstream
deferredService * DeferredService
openAITokenProvider * OpenAITokenProvider
toolCorrector * CodexToolCorrector
openaiWSResolver OpenAIWSProtocolResolver
2026-03-30 22:58:28 +08:00
resolver * ModelPricingResolver
2026-03-31 00:23:45 +08:00
channelService * ChannelService
2026-04-12 02:48:57 +08:00
balanceNotifyService * BalanceNotifyService
2026-04-28 00:34:23 +08:00
settingService * SettingService
2026-02-28 15:01:20 +08:00
2026-03-05 11:50:58 +08:00
openaiWSPoolOnce sync . Once
openaiWSStateStoreOnce sync . Once
openaiSchedulerOnce sync . Once
openaiWSPassthroughDialerOnce sync . Once
openaiWSPool * openAIWSConnPool
openaiWSStateStore OpenAIWSStateStore
openaiScheduler OpenAIAccountScheduler
openaiWSPassthroughDialer openAIWSClientDialer
openaiAccountStats * openAIAccountRuntimeStats
2026-02-28 15:01:20 +08:00
openaiWSFallbackUntil sync . Map // key: int64(accountID), value: time.Time
openaiWSRetryMetrics openAIWSRetryMetrics
responseHeaderFilter * responseheaders . CompiledHeaderFilter
2026-03-11 13:53:19 +08:00
codexSnapshotThrottle * accountWriteThrottle
2025-12-22 22:58:31 +08:00
}
// NewOpenAIGatewayService creates a new OpenAIGatewayService
func NewOpenAIGatewayService (
2025-12-25 17:15:01 +08:00
accountRepo AccountRepository ,
usageLogRepo UsageLogRepository ,
2026-03-12 16:53:18 +08:00
usageBillingRepo UsageBillingRepository ,
2025-12-25 17:15:01 +08:00
userRepo UserRepository ,
userSubRepo UserSubscriptionRepository ,
2026-03-06 14:54:52 +08:00
userGroupRateRepo UserGroupRateRepository ,
2025-12-25 17:15:01 +08:00
cache GatewayCache ,
2025-12-22 22:58:31 +08:00
cfg * config . Config ,
2026-01-12 14:19:06 +08:00
schedulerSnapshot * SchedulerSnapshotService ,
2026-01-01 04:01:51 +08:00
concurrencyService * ConcurrencyService ,
2025-12-22 22:58:31 +08:00
billingService * BillingService ,
rateLimitService * RateLimitService ,
billingCacheService * BillingCacheService ,
2025-12-25 17:15:01 +08:00
httpUpstream HTTPUpstream ,
2025-12-28 08:07:15 +08:00
deferredService * DeferredService ,
2026-01-15 18:27:06 +08:00
openAITokenProvider * OpenAITokenProvider ,
2026-03-30 22:58:28 +08:00
resolver * ModelPricingResolver ,
2026-03-31 00:23:45 +08:00
channelService * ChannelService ,
2026-04-12 02:48:57 +08:00
balanceNotifyService * BalanceNotifyService ,
2026-04-28 00:34:23 +08:00
settingService * SettingService ,
2025-12-22 22:58:31 +08:00
) * OpenAIGatewayService {
2026-02-28 15:01:20 +08:00
svc := & OpenAIGatewayService {
2026-03-06 14:54:52 +08:00
accountRepo : accountRepo ,
usageLogRepo : usageLogRepo ,
2026-03-12 16:53:18 +08:00
usageBillingRepo : usageBillingRepo ,
2026-03-06 14:54:52 +08:00
userRepo : userRepo ,
userSubRepo : userSubRepo ,
cache : cache ,
cfg : cfg ,
codexDetector : NewOpenAICodexClientRestrictionDetector ( cfg ) ,
schedulerSnapshot : schedulerSnapshot ,
concurrencyService : concurrencyService ,
billingService : billingService ,
rateLimitService : rateLimitService ,
billingCacheService : billingCacheService ,
userGroupRateResolver : newUserGroupRateResolver (
userGroupRateRepo ,
nil ,
resolveUserGroupRateCacheTTL ( cfg ) ,
nil ,
"service.openai_gateway" ,
) ,
2026-03-11 13:53:19 +08:00
httpUpstream : httpUpstream ,
deferredService : deferredService ,
openAITokenProvider : openAITokenProvider ,
toolCorrector : NewCodexToolCorrector ( ) ,
openaiWSResolver : NewOpenAIWSProtocolResolver ( cfg ) ,
2026-03-30 22:58:28 +08:00
resolver : resolver ,
2026-03-31 00:23:45 +08:00
channelService : channelService ,
2026-04-12 02:48:57 +08:00
balanceNotifyService : balanceNotifyService ,
2026-04-28 00:34:23 +08:00
settingService : settingService ,
2026-03-11 13:53:19 +08:00
responseHeaderFilter : compileResponseHeaderFilter ( cfg ) ,
codexSnapshotThrottle : newAccountWriteThrottle ( openAICodexSnapshotPersistMinInterval ) ,
2026-02-28 15:01:20 +08:00
}
svc . logOpenAIWSModeBootstrap ( )
return svc
}
2026-03-31 00:23:45 +08:00
// ResolveChannelMapping 解析渠道级模型映射(代理到 ChannelService)
func ( s * OpenAIGatewayService ) ResolveChannelMapping ( ctx context . Context , groupID int64 , model string ) ChannelMappingResult {
if s . channelService == nil {
return ChannelMappingResult { MappedModel : model }
}
return s . channelService . ResolveChannelMapping ( ctx , groupID , model )
}
// IsModelRestricted 检查模型是否被渠道限制(代理到 ChannelService)
func ( s * OpenAIGatewayService ) IsModelRestricted ( ctx context . Context , groupID int64 , model string ) bool {
if s . channelService == nil {
return false
}
return s . channelService . IsModelRestricted ( ctx , groupID , model )
}
2026-04-02 13:36:58 +08:00
// ResolveChannelMappingAndRestrict 解析渠道映射。
// 模型限制检查已移至调度阶段, restricted 始终返回 false。
2026-03-31 02:11:24 +08:00
func ( s * OpenAIGatewayService ) ResolveChannelMappingAndRestrict ( ctx context . Context , groupID * int64 , model string ) ( ChannelMappingResult , bool ) {
2026-03-31 15:26:20 +08:00
if s . channelService == nil {
return ChannelMappingResult { MappedModel : model } , false
}
return s . channelService . ResolveChannelMappingAndRestrict ( ctx , groupID , model )
2026-03-31 02:11:24 +08:00
}
2026-04-02 20:28:04 +08:00
func ( s * OpenAIGatewayService ) checkChannelPricingRestriction ( ctx context . Context , groupID * int64 , requestedModel string ) bool {
if groupID == nil || s . channelService == nil || requestedModel == "" {
return false
}
mapping := s . channelService . ResolveChannelMapping ( ctx , * groupID , requestedModel )
billingModel := billingModelForRestriction ( mapping . BillingModelSource , requestedModel , mapping . MappedModel )
if billingModel == "" {
return false
}
return s . channelService . IsModelRestricted ( ctx , * groupID , billingModel )
}
2026-04-25 14:40:03 +08:00
func ( s * OpenAIGatewayService ) isUpstreamModelRestrictedByChannel ( ctx context . Context , groupID int64 , account * Account , requestedModel string , requireCompact bool ) bool {
2026-04-02 20:28:04 +08:00
if s . channelService == nil {
return false
}
2026-04-25 14:40:03 +08:00
upstreamModel := resolveOpenAIAccountUpstreamModelForRequest ( account , requestedModel , requireCompact )
2026-04-02 20:28:04 +08:00
if upstreamModel == "" {
return false
}
return s . channelService . IsModelRestricted ( ctx , groupID , upstreamModel )
}
func ( s * OpenAIGatewayService ) needsUpstreamChannelRestrictionCheck ( ctx context . Context , groupID * int64 ) bool {
if groupID == nil || s . channelService == nil {
return false
}
ch , err := s . channelService . GetChannelForGroup ( ctx , * groupID )
if err != nil {
slog . Warn ( "failed to check openai channel upstream restriction" , "group_id" , * groupID , "error" , err )
return false
}
if ch == nil || ! ch . RestrictModels {
return false
}
return ch . BillingModelSource == BillingModelSourceUpstream
}
2026-03-31 02:11:24 +08:00
// ReplaceModelInBody 替换请求体中的 JSON model 字段(通用 gjson/sjson 实现)。
func ( s * OpenAIGatewayService ) ReplaceModelInBody ( body [ ] byte , newModel string ) [ ] byte {
2026-03-31 15:26:20 +08:00
return ReplaceModelInBody ( body , newModel )
2026-03-31 02:11:24 +08:00
}
2026-03-11 13:53:19 +08:00
func ( s * OpenAIGatewayService ) getCodexSnapshotThrottle ( ) * accountWriteThrottle {
if s != nil && s . codexSnapshotThrottle != nil {
return s . codexSnapshotThrottle
}
return defaultOpenAICodexSnapshotPersistThrottle
}
2026-03-06 00:37:37 +08:00
func ( s * OpenAIGatewayService ) billingDeps ( ) * billingDeps {
return & billingDeps {
2026-04-12 02:48:57 +08:00
accountRepo : s . accountRepo ,
userRepo : s . userRepo ,
userSubRepo : s . userSubRepo ,
billingCacheService : s . billingCacheService ,
deferredService : s . deferredService ,
balanceNotifyService : s . balanceNotifyService ,
2026-03-06 00:37:37 +08:00
}
}
2026-02-28 15:01:20 +08:00
// CloseOpenAIWSPool 关闭 OpenAI WebSocket 连接池的后台 worker 和空闲连接。
// 应在应用优雅关闭时调用。
func ( s * OpenAIGatewayService ) CloseOpenAIWSPool ( ) {
if s != nil && s . openaiWSPool != nil {
s . openaiWSPool . Close ( )
}
}
func ( s * OpenAIGatewayService ) logOpenAIWSModeBootstrap ( ) {
if s == nil || s . cfg == nil {
return
2025-12-22 22:58:31 +08:00
}
2026-02-28 15:01:20 +08:00
wsCfg := s . cfg . Gateway . OpenAIWS
logOpenAIWSModeInfo (
"bootstrap enabled=%v oauth_enabled=%v apikey_enabled=%v force_http=%v responses_websockets_v2=%v responses_websockets=%v payload_log_sample_rate=%.3f event_flush_batch_size=%d event_flush_interval_ms=%d prewarm_cooldown_ms=%d retry_backoff_initial_ms=%d retry_backoff_max_ms=%d retry_jitter_ratio=%.3f retry_total_budget_ms=%d ws_read_limit_bytes=%d" ,
wsCfg . Enabled ,
wsCfg . OAuthEnabled ,
wsCfg . APIKeyEnabled ,
wsCfg . ForceHTTP ,
wsCfg . ResponsesWebsocketsV2 ,
wsCfg . ResponsesWebsockets ,
wsCfg . PayloadLogSampleRate ,
wsCfg . EventFlushBatchSize ,
wsCfg . EventFlushIntervalMS ,
wsCfg . PrewarmCooldownMS ,
wsCfg . RetryBackoffInitialMS ,
wsCfg . RetryBackoffMaxMS ,
wsCfg . RetryJitterRatio ,
wsCfg . RetryTotalBudgetMS ,
openAIWSMessageReadLimitBytes ,
)
2025-12-22 22:58:31 +08:00
}
2026-02-12 22:32:59 +08:00
func ( s * OpenAIGatewayService ) getCodexClientRestrictionDetector ( ) CodexClientRestrictionDetector {
if s != nil && s . codexDetector != nil {
return s . codexDetector
}
var cfg * config . Config
if s != nil {
cfg = s . cfg
}
return NewOpenAICodexClientRestrictionDetector ( cfg )
}
2026-02-28 15:01:20 +08:00
func ( s * OpenAIGatewayService ) getOpenAIWSProtocolResolver ( ) OpenAIWSProtocolResolver {
if s != nil && s . openaiWSResolver != nil {
return s . openaiWSResolver
}
var cfg * config . Config
if s != nil {
cfg = s . cfg
}
return NewOpenAIWSProtocolResolver ( cfg )
}
func classifyOpenAIWSReconnectReason ( err error ) ( string , bool ) {
if err == nil {
return "" , false
}
var fallbackErr * openAIWSFallbackError
if ! errors . As ( err , & fallbackErr ) || fallbackErr == nil {
return "" , false
}
reason := strings . TrimSpace ( fallbackErr . Reason )
if reason == "" {
return "" , false
}
baseReason := strings . TrimPrefix ( reason , "prewarm_" )
switch baseReason {
case "policy_violation" ,
"message_too_big" ,
"upgrade_required" ,
"ws_unsupported" ,
"auth_failed" ,
2026-03-14 11:42:42 +08:00
"invalid_encrypted_content" ,
2026-02-28 15:01:20 +08:00
"previous_response_not_found" :
return reason , false
}
switch baseReason {
case "read_event" ,
"write_request" ,
"write" ,
"acquire_timeout" ,
"acquire_conn" ,
"conn_queue_full" ,
"dial_failed" ,
"upstream_5xx" ,
"event_error" ,
"error_event" ,
"upstream_error_event" ,
"ws_connection_limit_reached" ,
"missing_final_response" :
return reason , true
default :
return reason , false
}
}
func resolveOpenAIWSFallbackErrorResponse ( err error ) ( statusCode int , errType string , clientMessage string , upstreamMessage string , ok bool ) {
if err == nil {
return 0 , "" , "" , "" , false
}
var fallbackErr * openAIWSFallbackError
if ! errors . As ( err , & fallbackErr ) || fallbackErr == nil {
return 0 , "" , "" , "" , false
}
reason := strings . TrimSpace ( fallbackErr . Reason )
reason = strings . TrimPrefix ( reason , "prewarm_" )
if reason == "" {
return 0 , "" , "" , "" , false
}
var dialErr * openAIWSDialError
if fallbackErr . Err != nil && errors . As ( fallbackErr . Err , & dialErr ) && dialErr != nil {
if dialErr . StatusCode > 0 {
statusCode = dialErr . StatusCode
}
if dialErr . Err != nil {
upstreamMessage = sanitizeUpstreamErrorMessage ( strings . TrimSpace ( dialErr . Err . Error ( ) ) )
}
}
switch reason {
2026-03-14 11:42:42 +08:00
case "invalid_encrypted_content" :
if statusCode == 0 {
statusCode = http . StatusBadRequest
}
errType = "invalid_request_error"
if upstreamMessage == "" {
upstreamMessage = "encrypted content could not be verified"
}
2026-02-28 15:01:20 +08:00
case "previous_response_not_found" :
if statusCode == 0 {
statusCode = http . StatusBadRequest
}
errType = "invalid_request_error"
if upstreamMessage == "" {
upstreamMessage = "previous response not found"
}
case "upgrade_required" :
if statusCode == 0 {
statusCode = http . StatusUpgradeRequired
}
case "ws_unsupported" :
if statusCode == 0 {
statusCode = http . StatusBadRequest
}
case "auth_failed" :
if statusCode == 0 {
statusCode = http . StatusUnauthorized
}
case "upstream_rate_limited" :
if statusCode == 0 {
statusCode = http . StatusTooManyRequests
}
default :
if statusCode == 0 {
return 0 , "" , "" , "" , false
}
}
if upstreamMessage == "" && fallbackErr . Err != nil {
upstreamMessage = sanitizeUpstreamErrorMessage ( strings . TrimSpace ( fallbackErr . Err . Error ( ) ) )
}
if upstreamMessage == "" {
switch reason {
case "upgrade_required" :
upstreamMessage = "upstream websocket upgrade required"
case "ws_unsupported" :
upstreamMessage = "upstream websocket not supported"
case "auth_failed" :
upstreamMessage = "upstream authentication failed"
case "upstream_rate_limited" :
upstreamMessage = "upstream rate limit exceeded, please retry later"
default :
upstreamMessage = "Upstream request failed"
}
}
if errType == "" {
if statusCode == http . StatusTooManyRequests {
errType = "rate_limit_error"
} else {
errType = "upstream_error"
}
}
clientMessage = upstreamMessage
return statusCode , errType , clientMessage , upstreamMessage , true
}
func ( s * OpenAIGatewayService ) writeOpenAIWSFallbackErrorResponse ( c * gin . Context , account * Account , wsErr error ) bool {
if c == nil || c . Writer == nil || c . Writer . Written ( ) {
return false
}
statusCode , errType , clientMessage , upstreamMessage , ok := resolveOpenAIWSFallbackErrorResponse ( wsErr )
if ! ok {
return false
}
if strings . TrimSpace ( clientMessage ) == "" {
clientMessage = "Upstream request failed"
}
if strings . TrimSpace ( upstreamMessage ) == "" {
upstreamMessage = clientMessage
}
setOpsUpstreamError ( c , statusCode , upstreamMessage , "" )
if account != nil {
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : statusCode ,
Kind : "ws_error" ,
Message : upstreamMessage ,
} )
}
c . JSON ( statusCode , gin . H {
"error" : gin . H {
"type" : errType ,
"message" : clientMessage ,
} ,
} )
return true
}
func ( s * OpenAIGatewayService ) openAIWSRetryBackoff ( attempt int ) time . Duration {
if attempt <= 0 {
return 0
}
initial := openAIWSRetryBackoffInitialDefault
maxBackoff := openAIWSRetryBackoffMaxDefault
jitterRatio := openAIWSRetryJitterRatioDefault
if s != nil && s . cfg != nil {
wsCfg := s . cfg . Gateway . OpenAIWS
if wsCfg . RetryBackoffInitialMS > 0 {
initial = time . Duration ( wsCfg . RetryBackoffInitialMS ) * time . Millisecond
}
if wsCfg . RetryBackoffMaxMS > 0 {
maxBackoff = time . Duration ( wsCfg . RetryBackoffMaxMS ) * time . Millisecond
}
if wsCfg . RetryJitterRatio >= 0 {
jitterRatio = wsCfg . RetryJitterRatio
}
}
if initial <= 0 {
return 0
}
if maxBackoff <= 0 {
maxBackoff = initial
}
if maxBackoff < initial {
maxBackoff = initial
}
if jitterRatio < 0 {
jitterRatio = 0
}
if jitterRatio > 1 {
jitterRatio = 1
}
shift := attempt - 1
if shift < 0 {
shift = 0
}
backoff := initial
if shift > 0 {
backoff = initial * time . Duration ( 1 << shift )
}
if backoff > maxBackoff {
backoff = maxBackoff
}
if jitterRatio <= 0 {
return backoff
}
jitter := time . Duration ( float64 ( backoff ) * jitterRatio )
if jitter <= 0 {
return backoff
}
delta := time . Duration ( rand . Int63n ( int64 ( jitter ) * 2 + 1 ) ) - jitter
withJitter := backoff + delta
if withJitter < 0 {
return 0
}
return withJitter
}
func ( s * OpenAIGatewayService ) openAIWSRetryTotalBudget ( ) time . Duration {
if s != nil && s . cfg != nil {
ms := s . cfg . Gateway . OpenAIWS . RetryTotalBudgetMS
if ms <= 0 {
return 0
}
return time . Duration ( ms ) * time . Millisecond
}
return 0
}
func ( s * OpenAIGatewayService ) recordOpenAIWSRetryAttempt ( backoff time . Duration ) {
if s == nil {
return
}
s . openaiWSRetryMetrics . retryAttempts . Add ( 1 )
if backoff > 0 {
s . openaiWSRetryMetrics . retryBackoffMs . Add ( backoff . Milliseconds ( ) )
}
}
func ( s * OpenAIGatewayService ) recordOpenAIWSRetryExhausted ( ) {
if s == nil {
return
}
s . openaiWSRetryMetrics . retryExhausted . Add ( 1 )
}
func ( s * OpenAIGatewayService ) recordOpenAIWSNonRetryableFastFallback ( ) {
if s == nil {
return
}
s . openaiWSRetryMetrics . nonRetryableFastFallback . Add ( 1 )
}
func ( s * OpenAIGatewayService ) SnapshotOpenAIWSRetryMetrics ( ) OpenAIWSRetryMetricsSnapshot {
if s == nil {
return OpenAIWSRetryMetricsSnapshot { }
}
return OpenAIWSRetryMetricsSnapshot {
RetryAttemptsTotal : s . openaiWSRetryMetrics . retryAttempts . Load ( ) ,
RetryBackoffMsTotal : s . openaiWSRetryMetrics . retryBackoffMs . Load ( ) ,
RetryExhaustedTotal : s . openaiWSRetryMetrics . retryExhausted . Load ( ) ,
NonRetryableFastFallbackTotal : s . openaiWSRetryMetrics . nonRetryableFastFallback . Load ( ) ,
}
}
func SnapshotOpenAICompatibilityFallbackMetrics ( ) OpenAICompatibilityFallbackMetricsSnapshot {
legacyReadFallbackTotal , legacyReadFallbackHit , legacyDualWriteTotal := openAIStickyCompatStats ( )
isMaxTokensOneHaiku , thinkingEnabled , prefetchedStickyAccount , prefetchedStickyGroup , singleAccountRetry , accountSwitchCount := RequestMetadataFallbackStats ( )
readHitRate := float64 ( 0 )
if legacyReadFallbackTotal > 0 {
readHitRate = float64 ( legacyReadFallbackHit ) / float64 ( legacyReadFallbackTotal )
}
metadataFallbackTotal := isMaxTokensOneHaiku + thinkingEnabled + prefetchedStickyAccount + prefetchedStickyGroup + singleAccountRetry + accountSwitchCount
return OpenAICompatibilityFallbackMetricsSnapshot {
SessionHashLegacyReadFallbackTotal : legacyReadFallbackTotal ,
SessionHashLegacyReadFallbackHit : legacyReadFallbackHit ,
SessionHashLegacyDualWriteTotal : legacyDualWriteTotal ,
SessionHashLegacyReadHitRate : readHitRate ,
MetadataLegacyFallbackIsMaxTokensOneHaikuTotal : isMaxTokensOneHaiku ,
MetadataLegacyFallbackThinkingEnabledTotal : thinkingEnabled ,
MetadataLegacyFallbackPrefetchedStickyAccount : prefetchedStickyAccount ,
MetadataLegacyFallbackPrefetchedStickyGroup : prefetchedStickyGroup ,
MetadataLegacyFallbackSingleAccountRetryTotal : singleAccountRetry ,
MetadataLegacyFallbackAccountSwitchCountTotal : accountSwitchCount ,
MetadataLegacyFallbackTotal : metadataFallbackTotal ,
}
}
2026-02-12 22:32:59 +08:00
func ( s * OpenAIGatewayService ) detectCodexClientRestriction ( c * gin . Context , account * Account ) CodexClientRestrictionDetectionResult {
return s . getCodexClientRestrictionDetector ( ) . Detect ( c , account )
}
func getAPIKeyIDFromContext ( c * gin . Context ) int64 {
if c == nil {
return 0
}
v , exists := c . Get ( "api_key" )
if ! exists {
return 0
}
apiKey , ok := v . ( * APIKey )
if ! ok || apiKey == nil {
return 0
}
return apiKey . ID
}
2026-03-16 10:28:11 +08:00
// isolateOpenAISessionID 将 apiKeyID 混入 session 标识符,
// 确保不同 API Key 的用户即使使用相同的原始 session_id/conversation_id,
// 到达上游的标识符也不同,防止跨用户会话碰撞。
func isolateOpenAISessionID ( apiKeyID int64 , raw string ) string {
raw = strings . TrimSpace ( raw )
if raw == "" {
return ""
}
h := xxhash . New ( )
_ , _ = fmt . Fprintf ( h , "k%d:" , apiKeyID )
_ , _ = h . WriteString ( raw )
return fmt . Sprintf ( "%016x" , h . Sum64 ( ) )
}
2026-02-13 19:27:07 +08:00
func logCodexCLIOnlyDetection ( ctx context . Context , c * gin . Context , account * Account , apiKeyID int64 , result CodexClientRestrictionDetectionResult , body [ ] byte ) {
2026-02-12 22:32:59 +08:00
if ! result . Enabled {
return
}
if ctx == nil {
ctx = context . Background ( )
}
accountID := int64 ( 0 )
if account != nil {
accountID = account . ID
}
fields := [ ] zap . Field {
zap . String ( "component" , "service.openai_gateway" ) ,
zap . Int64 ( "account_id" , accountID ) ,
zap . Bool ( "codex_cli_only_enabled" , result . Enabled ) ,
zap . Bool ( "codex_official_client_match" , result . Matched ) ,
zap . String ( "reject_reason" , result . Reason ) ,
}
if apiKeyID > 0 {
fields = append ( fields , zap . Int64 ( "api_key_id" , apiKeyID ) )
}
2026-02-13 19:27:07 +08:00
if ! result . Matched {
fields = appendCodexCLIOnlyRejectedRequestFields ( fields , c , body )
}
2026-02-12 22:32:59 +08:00
log := logger . FromContext ( ctx ) . With ( fields ... )
if result . Matched {
return
}
log . Warn ( "OpenAI codex_cli_only 拒绝非官方客户端请求" )
}
2026-02-13 19:27:07 +08:00
func appendCodexCLIOnlyRejectedRequestFields ( fields [ ] zap . Field , c * gin . Context , body [ ] byte ) [ ] zap . Field {
if c == nil || c . Request == nil {
return fields
}
req := c . Request
requestModel , requestStream , promptCacheKey := extractOpenAIRequestMetaFromBody ( body )
fields = append ( fields ,
zap . String ( "request_method" , strings . TrimSpace ( req . Method ) ) ,
zap . String ( "request_path" , strings . TrimSpace ( req . URL . Path ) ) ,
zap . String ( "request_query" , strings . TrimSpace ( req . URL . RawQuery ) ) ,
zap . String ( "request_host" , strings . TrimSpace ( req . Host ) ) ,
zap . String ( "request_client_ip" , strings . TrimSpace ( c . ClientIP ( ) ) ) ,
zap . String ( "request_remote_addr" , strings . TrimSpace ( req . RemoteAddr ) ) ,
2026-02-14 09:59:19 +08:00
zap . String ( "request_user_agent" , strings . TrimSpace ( req . Header . Get ( "User-Agent" ) ) ) ,
2026-02-13 19:27:07 +08:00
zap . String ( "request_content_type" , strings . TrimSpace ( req . Header . Get ( "Content-Type" ) ) ) ,
zap . Int64 ( "request_content_length" , req . ContentLength ) ,
zap . Bool ( "request_stream" , requestStream ) ,
)
if requestModel != "" {
fields = append ( fields , zap . String ( "request_model" , requestModel ) )
}
if promptCacheKey != "" {
fields = append ( fields , zap . String ( "request_prompt_cache_key_sha256" , hashSensitiveValueForLog ( promptCacheKey ) ) )
}
if headers := snapshotCodexCLIOnlyHeaders ( req . Header ) ; len ( headers ) > 0 {
fields = append ( fields , zap . Any ( "request_headers" , headers ) )
}
fields = append ( fields , zap . Int ( "request_body_size" , len ( body ) ) )
return fields
}
func snapshotCodexCLIOnlyHeaders ( header http . Header ) map [ string ] string {
if len ( header ) == 0 {
return nil
}
result := make ( map [ string ] string , len ( codexCLIOnlyDebugHeaderWhitelist ) )
for _ , key := range codexCLIOnlyDebugHeaderWhitelist {
value := strings . TrimSpace ( header . Get ( key ) )
if value == "" {
continue
}
result [ strings . ToLower ( key ) ] = truncateString ( value , codexCLIOnlyHeaderValueMaxBytes )
}
return result
}
func hashSensitiveValueForLog ( raw string ) string {
value := strings . TrimSpace ( raw )
if value == "" {
return ""
}
sum := sha256 . Sum256 ( [ ] byte ( value ) )
return hex . EncodeToString ( sum [ : 8 ] )
}
func logOpenAIInstructionsRequiredDebug (
ctx context . Context ,
c * gin . Context ,
account * Account ,
upstreamStatusCode int ,
upstreamMsg string ,
requestBody [ ] byte ,
upstreamBody [ ] byte ,
) {
msg := strings . TrimSpace ( upstreamMsg )
if ! isOpenAIInstructionsRequiredError ( upstreamStatusCode , msg , upstreamBody ) {
return
}
if ctx == nil {
ctx = context . Background ( )
}
accountID := int64 ( 0 )
accountName := ""
if account != nil {
accountID = account . ID
accountName = strings . TrimSpace ( account . Name )
}
userAgent := ""
2026-03-07 14:12:38 +08:00
originator := ""
2026-02-13 19:27:07 +08:00
if c != nil {
userAgent = strings . TrimSpace ( c . GetHeader ( "User-Agent" ) )
2026-03-07 14:12:38 +08:00
originator = strings . TrimSpace ( c . GetHeader ( "originator" ) )
2026-02-13 19:27:07 +08:00
}
fields := [ ] zap . Field {
zap . String ( "component" , "service.openai_gateway" ) ,
zap . Int64 ( "account_id" , accountID ) ,
zap . String ( "account_name" , accountName ) ,
zap . Int ( "upstream_status_code" , upstreamStatusCode ) ,
zap . String ( "upstream_error_message" , msg ) ,
zap . String ( "request_user_agent" , userAgent ) ,
2026-03-07 14:12:38 +08:00
zap . Bool ( "codex_official_client_match" , openai . IsCodexOfficialClientByHeaders ( userAgent , originator ) ) ,
2026-02-13 19:27:07 +08:00
}
fields = appendCodexCLIOnlyRejectedRequestFields ( fields , c , requestBody )
logger . FromContext ( ctx ) . With ( fields ... ) . Warn ( "OpenAI 上游返回 Instructions are required, 已记录请求详情用于排查" )
}
func isOpenAIInstructionsRequiredError ( upstreamStatusCode int , upstreamMsg string , upstreamBody [ ] byte ) bool {
if upstreamStatusCode != http . StatusBadRequest {
return false
}
hasInstructionRequired := func ( text string ) bool {
lower := strings . ToLower ( strings . TrimSpace ( text ) )
if lower == "" {
return false
}
if strings . Contains ( lower , "instructions are required" ) {
return true
}
if strings . Contains ( lower , "required parameter: 'instructions'" ) {
return true
}
if strings . Contains ( lower , "required parameter: instructions" ) {
return true
}
if strings . Contains ( lower , "missing required parameter" ) && strings . Contains ( lower , "instructions" ) {
return true
}
return strings . Contains ( lower , "instruction" ) && strings . Contains ( lower , "required" )
}
if hasInstructionRequired ( upstreamMsg ) {
return true
}
if len ( upstreamBody ) == 0 {
return false
}
errMsg := gjson . GetBytes ( upstreamBody , "error.message" ) . String ( )
errMsgLower := strings . ToLower ( strings . TrimSpace ( errMsg ) )
errCode := strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( upstreamBody , "error.code" ) . String ( ) ) )
errParam := strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( upstreamBody , "error.param" ) . String ( ) ) )
errType := strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( upstreamBody , "error.type" ) . String ( ) ) )
if errParam == "instructions" {
return true
}
if hasInstructionRequired ( errMsg ) {
return true
}
if strings . Contains ( errCode , "missing_required_parameter" ) && strings . Contains ( errMsgLower , "instructions" ) {
return true
}
if strings . Contains ( errType , "invalid_request" ) && strings . Contains ( errMsgLower , "instructions" ) && strings . Contains ( errMsgLower , "required" ) {
return true
}
return false
}
2026-03-10 03:00:58 +08:00
func isOpenAITransientProcessingError ( upstreamStatusCode int , upstreamMsg string , upstreamBody [ ] byte ) bool {
if upstreamStatusCode != http . StatusBadRequest {
return false
}
match := func ( text string ) bool {
lower := strings . ToLower ( strings . TrimSpace ( text ) )
if lower == "" {
return false
}
if strings . Contains ( lower , "an error occurred while processing your request" ) {
return true
}
return strings . Contains ( lower , "you can retry your request" ) &&
strings . Contains ( lower , "help.openai.com" ) &&
strings . Contains ( lower , "request id" )
}
if match ( upstreamMsg ) {
return true
}
if len ( upstreamBody ) == 0 {
return false
}
if match ( gjson . GetBytes ( upstreamBody , "error.message" ) . String ( ) ) {
return true
}
return match ( string ( upstreamBody ) )
}
2026-03-06 14:29:22 +08:00
// ExtractSessionID extracts the raw session ID from headers or body without hashing.
// Used by ForwardAsAnthropic to pass as prompt_cache_key for upstream cache.
func ( s * OpenAIGatewayService ) ExtractSessionID ( c * gin . Context , body [ ] byte ) string {
if c == nil {
return ""
}
sessionID := strings . TrimSpace ( c . GetHeader ( "session_id" ) )
if sessionID == "" {
sessionID = strings . TrimSpace ( c . GetHeader ( "conversation_id" ) )
}
if sessionID == "" && len ( body ) > 0 {
sessionID = strings . TrimSpace ( gjson . GetBytes ( body , "prompt_cache_key" ) . String ( ) )
}
return sessionID
}
2026-04-26 17:05:19 +00:00
func explicitOpenAISessionID ( c * gin . Context , body [ ] byte ) string {
if c == nil {
return ""
}
sessionID := strings . TrimSpace ( c . GetHeader ( "session_id" ) )
if sessionID == "" {
sessionID = strings . TrimSpace ( c . GetHeader ( "conversation_id" ) )
}
if sessionID == "" && len ( body ) > 0 {
sessionID = strings . TrimSpace ( gjson . GetBytes ( body , "prompt_cache_key" ) . String ( ) )
}
return sessionID
}
// GenerateExplicitSessionHash generates a sticky-session hash only from explicit
// client session signals. It intentionally skips content-derived fallback and is
// used by stateless endpoints such as /v1/images.
func ( s * OpenAIGatewayService ) GenerateExplicitSessionHash ( c * gin . Context , body [ ] byte ) string {
sessionID := explicitOpenAISessionID ( c , body )
if sessionID == "" {
return ""
}
currentHash , legacyHash := deriveOpenAISessionHashes ( sessionID )
attachOpenAILegacySessionHashToGin ( c , legacyHash )
return currentHash
}
2026-01-17 02:31:16 +08:00
// GenerateSessionHash generates a sticky-session hash for OpenAI requests.
//
// Priority:
// 1. Header: session_id
// 2. Header: conversation_id
// 3. Body: prompt_cache_key (opencode)
2026-04-02 00:11:06 +08:00
// 4. Body: content-based fallback (model + system + tools + first user message)
2026-02-10 08:59:30 +08:00
func ( s * OpenAIGatewayService ) GenerateSessionHash ( c * gin . Context , body [ ] byte ) string {
2026-01-17 02:31:16 +08:00
if c == nil {
return ""
}
2026-04-26 17:05:19 +00:00
sessionID := explicitOpenAISessionID ( c , body )
2026-04-02 00:11:06 +08:00
if sessionID == "" && len ( body ) > 0 {
sessionID = deriveOpenAIContentSessionSeed ( body )
}
2025-12-22 22:58:31 +08:00
if sessionID == "" {
return ""
}
2026-01-17 02:31:16 +08:00
2026-02-28 15:01:20 +08:00
currentHash , legacyHash := deriveOpenAISessionHashes ( sessionID )
attachOpenAILegacySessionHashToGin ( c , legacyHash )
return currentHash
}
// GenerateSessionHashWithFallback 先按常规信号生成会话哈希;
// 当未携带 session_id/conversation_id/prompt_cache_key 时,使用 fallbackSeed 生成稳定哈希。
// 该方法用于 WS ingress, 避免会话信号缺失时发生跨账号漂移。
func ( s * OpenAIGatewayService ) GenerateSessionHashWithFallback ( c * gin . Context , body [ ] byte , fallbackSeed string ) string {
sessionHash := s . GenerateSessionHash ( c , body )
if sessionHash != "" {
return sessionHash
}
seed := strings . TrimSpace ( fallbackSeed )
if seed == "" {
return ""
}
currentHash , legacyHash := deriveOpenAISessionHashes ( seed )
attachOpenAILegacySessionHashToGin ( c , legacyHash )
return currentHash
2025-12-22 22:58:31 +08:00
}
2026-03-07 14:12:38 +08:00
func resolveOpenAIUpstreamOriginator ( c * gin . Context , isOfficialClient bool ) string {
if c != nil {
if originator := strings . TrimSpace ( c . GetHeader ( "originator" ) ) ; originator != "" {
return originator
}
}
if isOfficialClient {
return "codex_cli_rs"
}
return "opencode"
}
2026-01-01 04:01:51 +08:00
// BindStickySession sets session -> account binding with standard TTL.
2026-01-08 23:07:00 +08:00
func ( s * OpenAIGatewayService ) BindStickySession ( ctx context . Context , groupID * int64 , sessionHash string , accountID int64 ) error {
2026-01-01 04:01:51 +08:00
if sessionHash == "" || accountID <= 0 {
return nil
}
2026-02-28 15:01:20 +08:00
ttl := openaiStickySessionTTL
if s != nil && s . cfg != nil && s . cfg . Gateway . OpenAIWS . StickySessionTTLSeconds > 0 {
ttl = time . Duration ( s . cfg . Gateway . OpenAIWS . StickySessionTTLSeconds ) * time . Second
}
return s . setStickySessionAccountID ( ctx , groupID , sessionHash , accountID , ttl )
2026-01-01 04:01:51 +08:00
}
2025-12-22 22:58:31 +08:00
// SelectAccount selects an OpenAI account with sticky session support
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) SelectAccount ( ctx context . Context , groupID * int64 , sessionHash string ) ( * Account , error ) {
2025-12-22 22:58:31 +08:00
return s . SelectAccountForModel ( ctx , groupID , sessionHash , "" )
}
// SelectAccountForModel selects an account supporting the requested model
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) SelectAccountForModel ( ctx context . Context , groupID * int64 , sessionHash string , requestedModel string ) ( * Account , error ) {
2025-12-27 11:44:00 +08:00
return s . SelectAccountForModelWithExclusions ( ctx , groupID , sessionHash , requestedModel , nil )
}
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
2026-01-20 11:19:32 +08:00
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
2025-12-27 11:44:00 +08:00
func ( s * OpenAIGatewayService ) SelectAccountForModelWithExclusions ( ctx context . Context , groupID * int64 , sessionHash string , requestedModel string , excludedIDs map [ int64 ] struct { } ) ( * Account , error ) {
2026-04-25 14:40:03 +08:00
return s . selectAccountForModelWithExclusions ( ctx , groupID , sessionHash , requestedModel , excludedIDs , false , 0 )
}
// noAvailableOpenAISelectionError builds the standard "no account available" error
// while preserving the compact-specific error when applicable.
func noAvailableOpenAISelectionError ( requestedModel string , compactBlocked bool ) error {
if compactBlocked {
return ErrNoAvailableCompactAccounts
}
if requestedModel != "" {
return fmt . Errorf ( "no available OpenAI accounts supporting model: %s" , requestedModel )
}
return errors . New ( "no available OpenAI accounts" )
}
// openAICompactSupportTier classifies an OpenAI account by compact capability.
// 0 = explicitly unsupported, 1 = unknown / not yet probed, 2 = explicitly supported.
func openAICompactSupportTier ( account * Account ) int {
if account == nil || ! account . IsOpenAI ( ) {
return 0
}
supported , known := account . OpenAICompactSupportKnown ( )
if ! known {
return 1
}
if supported {
return 2
}
return 0
}
// isOpenAIAccountEligibleForRequest centralises the schedulable / OpenAI / model /
// compact-support checks used during account selection.
func isOpenAIAccountEligibleForRequest ( account * Account , requestedModel string , requireCompact bool ) bool {
if account == nil || ! account . IsSchedulable ( ) || ! account . IsOpenAI ( ) {
return false
}
if requestedModel != "" && ! account . IsModelSupported ( requestedModel ) {
return false
}
if requireCompact && openAICompactSupportTier ( account ) == 0 {
return false
}
return true
2026-02-28 15:01:20 +08:00
}
2026-01-20 11:19:32 +08:00
2026-04-25 14:40:03 +08:00
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
// compact support are tried first, followed by unknown, then explicitly unsupported.
// The relative order within each tier is preserved.
func prioritizeOpenAICompactAccounts ( accounts [ ] * Account ) [ ] * Account {
if len ( accounts ) == 0 {
return nil
}
supported := make ( [ ] * Account , 0 , len ( accounts ) )
unknown := make ( [ ] * Account , 0 , len ( accounts ) )
unsupported := make ( [ ] * Account , 0 , len ( accounts ) )
for _ , account := range accounts {
switch openAICompactSupportTier ( account ) {
case 2 :
supported = append ( supported , account )
case 1 :
unknown = append ( unknown , account )
default :
unsupported = append ( unsupported , account )
}
}
out := make ( [ ] * Account , 0 , len ( accounts ) )
out = append ( out , supported ... )
out = append ( out , unknown ... )
out = append ( out , unsupported ... )
return out
}
// resolveOpenAIAccountUpstreamModelForRequest resolves the upstream model that
// would be sent for a given request, honouring compact-only mappings when the
// caller is on the /responses/compact path.
func resolveOpenAIAccountUpstreamModelForRequest ( account * Account , requestedModel string , requireCompact bool ) string {
upstreamModel := resolveOpenAIForwardModel ( account , requestedModel , "" )
if upstreamModel == "" {
return ""
}
if requireCompact {
return resolveOpenAICompactForwardModel ( account , upstreamModel )
}
return upstreamModel
}
func ( s * OpenAIGatewayService ) selectAccountForModelWithExclusions ( ctx context . Context , groupID * int64 , sessionHash string , requestedModel string , excludedIDs map [ int64 ] struct { } , requireCompact bool , stickyAccountID int64 ) ( * Account , error ) {
2026-04-02 20:28:04 +08:00
if s . checkChannelPricingRestriction ( ctx , groupID , requestedModel ) {
2026-04-03 13:54:18 +08:00
slog . Warn ( "channel pricing restriction blocked request" ,
"group_id" , derefGroupID ( groupID ) ,
"model" , requestedModel )
2026-04-02 20:28:04 +08:00
return nil , fmt . Errorf ( "%w supporting model: %s (channel pricing restriction)" , ErrNoAvailableAccounts , requestedModel )
}
2026-01-20 11:19:32 +08:00
// 1. 尝试粘性会话命中
// Try sticky session hit
2026-04-25 14:40:03 +08:00
if account := s . tryStickySessionHit ( ctx , groupID , sessionHash , requestedModel , excludedIDs , requireCompact , stickyAccountID ) ; account != nil {
2026-01-20 11:19:32 +08:00
return account , nil
2025-12-22 22:58:31 +08:00
}
2026-01-20 11:19:32 +08:00
// 2. 获取可调度的 OpenAI 账号
// Get schedulable OpenAI accounts
2026-01-12 14:19:06 +08:00
accounts , err := s . listSchedulableAccounts ( ctx , groupID )
2025-12-22 22:58:31 +08:00
if err != nil {
return nil , fmt . Errorf ( "query accounts failed: %w" , err )
}
2026-01-20 11:19:32 +08:00
// 3. 按优先级 + LRU 选择最佳账号
// Select by priority + LRU
2026-04-25 14:40:03 +08:00
selected , compactBlocked := s . selectBestAccount ( ctx , groupID , accounts , requestedModel , excludedIDs , requireCompact )
2026-01-20 11:19:32 +08:00
if selected == nil {
2026-04-25 14:40:03 +08:00
return nil , noAvailableOpenAISelectionError ( requestedModel , compactBlocked )
2026-01-20 11:19:32 +08:00
}
// 4. 设置粘性会话绑定
// Set sticky session binding
if sessionHash != "" {
2026-02-28 15:01:20 +08:00
_ = s . setStickySessionAccountID ( ctx , groupID , sessionHash , selected . ID , openaiStickySessionTTL )
2026-01-20 11:19:32 +08:00
}
2026-04-08 10:10:15 -07:00
return s . hydrateSelectedAccount ( ctx , selected )
2026-01-20 11:19:32 +08:00
}
// tryStickySessionHit 尝试从粘性会话获取账号。
// 如果命中且账号可用则返回账号;如果账号不可用则清理会话并返回 nil。
//
// tryStickySessionHit attempts to get account from sticky session.
// Returns account if hit and usable; clears session and returns nil if account is unavailable.
2026-04-25 14:40:03 +08:00
func ( s * OpenAIGatewayService ) tryStickySessionHit ( ctx context . Context , groupID * int64 , sessionHash , requestedModel string , excludedIDs map [ int64 ] struct { } , requireCompact bool , stickyAccountID int64 ) * Account {
2026-01-20 11:19:32 +08:00
if sessionHash == "" {
return nil
}
2026-02-28 15:01:20 +08:00
accountID := stickyAccountID
if accountID <= 0 {
var err error
accountID , err = s . getStickySessionAccountID ( ctx , groupID , sessionHash )
if err != nil || accountID <= 0 {
return nil
}
2026-01-20 11:19:32 +08:00
}
if _ , excluded := excludedIDs [ accountID ] ; excluded {
return nil
}
account , err := s . getSchedulableAccount ( ctx , accountID )
if err != nil {
return nil
}
// 检查账号是否需要清理粘性会话
// Check if sticky session should be cleared
2026-02-07 12:31:10 +08:00
if shouldClearStickySession ( account , requestedModel ) {
2026-02-28 15:01:20 +08:00
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
2026-01-20 11:19:32 +08:00
return nil
}
// 验证账号是否可用于当前请求
// Verify account is usable for current request
2026-04-25 14:40:03 +08:00
if ! isOpenAIAccountEligibleForRequest ( account , requestedModel , false ) {
2026-01-20 11:19:32 +08:00
return nil
}
2026-04-25 14:40:03 +08:00
account = s . recheckSelectedOpenAIAccountFromDB ( ctx , account , requestedModel , requireCompact )
2026-03-23 03:50:03 +08:00
if account == nil {
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
return nil
}
2026-04-02 20:28:04 +08:00
if groupID != nil && s . needsUpstreamChannelRestrictionCheck ( ctx , groupID ) &&
2026-04-25 14:40:03 +08:00
s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , account , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
return nil
}
2026-01-20 11:19:32 +08:00
// 刷新会话 TTL 并返回账号
// Refresh session TTL and return account
2026-02-28 15:01:20 +08:00
_ = s . refreshStickySessionTTL ( ctx , groupID , sessionHash , openaiStickySessionTTL )
2026-01-20 11:19:32 +08:00
return account
}
// selectBestAccount 从候选账号中选择最佳账号(优先级 + LRU) 。
// 返回 nil 表示无可用账号。
//
// selectBestAccount selects the best account from candidates (priority + LRU).
2026-04-25 14:40:03 +08:00
// Returns nil if no available account. The second return reports whether at
// least one candidate was filtered out solely because it lacks compact support
// (only meaningful when requireCompact=true).
func ( s * OpenAIGatewayService ) selectBestAccount ( ctx context . Context , groupID * int64 , accounts [ ] Account , requestedModel string , excludedIDs map [ int64 ] struct { } , requireCompact bool ) ( * Account , bool ) {
2025-12-26 15:40:24 +08:00
var selected * Account
2026-04-25 14:40:03 +08:00
selectedCompactTier := - 1
compactBlocked := false
2026-04-02 20:28:04 +08:00
needsUpstreamCheck := s . needsUpstreamChannelRestrictionCheck ( ctx , groupID )
2026-01-20 11:19:32 +08:00
2025-12-22 22:58:31 +08:00
for i := range accounts {
acc := & accounts [ i ]
2026-01-20 11:19:32 +08:00
// 跳过被排除的账号
// Skip excluded accounts
2025-12-27 11:44:00 +08:00
if _ , excluded := excludedIDs [ acc . ID ] ; excluded {
continue
}
2026-01-20 11:19:32 +08:00
2026-04-25 14:40:03 +08:00
fresh := s . resolveFreshSchedulableOpenAIAccount ( ctx , acc , requestedModel , false )
2026-03-07 20:59:17 +08:00
if fresh == nil {
2025-12-22 22:58:31 +08:00
continue
}
2026-04-25 14:40:03 +08:00
fresh = s . recheckSelectedOpenAIAccountFromDB ( ctx , fresh , requestedModel , false )
2026-03-23 03:50:03 +08:00
if fresh == nil {
continue
}
2026-04-25 14:40:03 +08:00
if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , fresh , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
continue
}
2026-04-25 14:40:03 +08:00
compactTier := 0
if requireCompact {
compactTier = openAICompactSupportTier ( fresh )
if compactTier == 0 {
compactBlocked = true
continue
}
}
2026-01-20 11:19:32 +08:00
// 选择优先级最高且最久未使用的账号
// Select highest priority and least recently used
2025-12-22 22:58:31 +08:00
if selected == nil {
2026-03-07 20:59:17 +08:00
selected = fresh
2026-04-25 14:40:03 +08:00
selectedCompactTier = compactTier
continue
}
// compact 模式下高 tier 优先;同 tier 内才比较 priority/LRU。
if requireCompact && compactTier != selectedCompactTier {
if compactTier > selectedCompactTier {
selected = fresh
selectedCompactTier = compactTier
}
2025-12-22 22:58:31 +08:00
continue
}
2026-01-20 11:19:32 +08:00
2026-03-07 20:59:17 +08:00
if s . isBetterAccount ( fresh , selected ) {
selected = fresh
2026-04-25 14:40:03 +08:00
selectedCompactTier = compactTier
2025-12-22 22:58:31 +08:00
}
}
2026-04-25 14:40:03 +08:00
return selected , compactBlocked
2026-01-20 11:19:32 +08:00
}
2025-12-22 22:58:31 +08:00
2026-01-20 11:19:32 +08:00
// isBetterAccount 判断 candidate 是否比 current 更优。
// 规则:优先级更高(数值更小)优先;同优先级时,未使用过的优先,其次是最久未使用的。
//
// isBetterAccount checks if candidate is better than current.
// Rules: higher priority (lower value) wins; same priority: never used > least recently used.
func ( s * OpenAIGatewayService ) isBetterAccount ( candidate , current * Account ) bool {
// 优先级更高(数值更小)
// Higher priority (lower value)
if candidate . Priority < current . Priority {
return true
}
if candidate . Priority > current . Priority {
return false
2025-12-22 22:58:31 +08:00
}
2026-01-20 11:19:32 +08:00
// 同优先级,比较最后使用时间
// Same priority, compare last used time
switch {
case candidate . LastUsedAt == nil && current . LastUsedAt != nil :
// candidate 从未使用,优先
return true
case candidate . LastUsedAt != nil && current . LastUsedAt == nil :
// current 从未使用,保持
return false
case candidate . LastUsedAt == nil && current . LastUsedAt == nil :
// 都未使用,保持
return false
default :
// 都使用过,选择最久未使用的
return candidate . LastUsedAt . Before ( * current . LastUsedAt )
}
2025-12-22 22:58:31 +08:00
}
2026-01-01 04:01:51 +08:00
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
func ( s * OpenAIGatewayService ) SelectAccountWithLoadAwareness ( ctx context . Context , groupID * int64 , sessionHash string , requestedModel string , excludedIDs map [ int64 ] struct { } ) ( * AccountSelectionResult , error ) {
2026-04-25 14:40:03 +08:00
return s . selectAccountWithLoadAwareness ( ctx , groupID , sessionHash , requestedModel , excludedIDs , false )
}
func ( s * OpenAIGatewayService ) selectAccountWithLoadAwareness ( ctx context . Context , groupID * int64 , sessionHash string , requestedModel string , excludedIDs map [ int64 ] struct { } , requireCompact bool ) ( * AccountSelectionResult , error ) {
2026-04-02 20:28:04 +08:00
if s . checkChannelPricingRestriction ( ctx , groupID , requestedModel ) {
2026-04-03 13:54:18 +08:00
slog . Warn ( "channel pricing restriction blocked request" ,
"group_id" , derefGroupID ( groupID ) ,
"model" , requestedModel )
2026-04-02 20:28:04 +08:00
return nil , fmt . Errorf ( "%w supporting model: %s (channel pricing restriction)" , ErrNoAvailableAccounts , requestedModel )
}
2026-01-01 04:01:51 +08:00
cfg := s . schedulingConfig ( )
2026-04-02 20:28:04 +08:00
needsUpstreamCheck := s . needsUpstreamChannelRestrictionCheck ( ctx , groupID )
2026-01-01 04:01:51 +08:00
var stickyAccountID int64
if sessionHash != "" && s . cache != nil {
2026-02-28 15:01:20 +08:00
if accountID , err := s . getStickySessionAccountID ( ctx , groupID , sessionHash ) ; err == nil {
2026-01-01 04:01:51 +08:00
stickyAccountID = accountID
}
}
if s . concurrencyService == nil || ! cfg . LoadBatchEnabled {
2026-04-25 14:40:03 +08:00
account , err := s . selectAccountForModelWithExclusions ( ctx , groupID , sessionHash , requestedModel , excludedIDs , requireCompact , stickyAccountID )
2026-01-01 04:01:51 +08:00
if err != nil {
return nil , err
}
result , err := s . tryAcquireAccountSlot ( ctx , account . ID , account . Concurrency )
if err == nil && result . Acquired {
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , account , true , result . ReleaseFunc , nil )
2026-01-01 04:01:51 +08:00
}
if stickyAccountID > 0 && stickyAccountID == account . ID && s . concurrencyService != nil {
waitingCount , _ := s . concurrencyService . GetAccountWaitingCount ( ctx , account . ID )
if waitingCount < cfg . StickySessionMaxWaiting {
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , account , false , nil , & AccountWaitPlan {
AccountID : account . ID ,
MaxConcurrency : account . Concurrency ,
Timeout : cfg . StickySessionWaitTimeout ,
MaxWaiting : cfg . StickySessionMaxWaiting ,
} )
2026-01-01 04:01:51 +08:00
}
}
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , account , false , nil , & AccountWaitPlan {
AccountID : account . ID ,
MaxConcurrency : account . Concurrency ,
Timeout : cfg . FallbackWaitTimeout ,
MaxWaiting : cfg . FallbackMaxWaiting ,
} )
2026-01-01 04:01:51 +08:00
}
accounts , err := s . listSchedulableAccounts ( ctx , groupID )
if err != nil {
return nil , err
}
if len ( accounts ) == 0 {
2026-03-15 17:25:35 +08:00
return nil , ErrNoAvailableAccounts
2026-01-01 04:01:51 +08:00
}
isExcluded := func ( accountID int64 ) bool {
if excludedIDs == nil {
return false
}
_ , excluded := excludedIDs [ accountID ]
return excluded
}
// ============ Layer 1: Sticky session ============
if sessionHash != "" {
2026-02-28 15:01:20 +08:00
accountID := stickyAccountID
if accountID > 0 && ! isExcluded ( accountID ) {
2026-01-12 14:19:06 +08:00
account , err := s . getSchedulableAccount ( ctx , accountID )
2026-01-20 11:19:32 +08:00
if err == nil {
2026-02-07 12:31:10 +08:00
clearSticky := shouldClearStickySession ( account , requestedModel )
2026-01-20 11:19:32 +08:00
if clearSticky {
2026-02-28 15:01:20 +08:00
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
2026-01-01 04:01:51 +08:00
}
2026-04-25 14:40:03 +08:00
if ! clearSticky && isOpenAIAccountEligibleForRequest ( account , requestedModel , false ) {
account = s . recheckSelectedOpenAIAccountFromDB ( ctx , account , requestedModel , requireCompact )
2026-03-23 03:50:03 +08:00
if account == nil {
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
2026-04-25 14:40:03 +08:00
} else if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , account , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
_ = s . deleteStickySessionAccountID ( ctx , groupID , sessionHash )
2026-03-23 03:50:03 +08:00
} else {
result , err := s . tryAcquireAccountSlot ( ctx , accountID , account . Concurrency )
if err == nil && result . Acquired {
_ = s . refreshStickySessionTTL ( ctx , groupID , sessionHash , openaiStickySessionTTL )
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , account , true , result . ReleaseFunc , nil )
2026-03-23 03:50:03 +08:00
}
2026-01-01 04:01:51 +08:00
2026-03-23 03:50:03 +08:00
waitingCount , _ := s . concurrencyService . GetAccountWaitingCount ( ctx , accountID )
if waitingCount < cfg . StickySessionMaxWaiting {
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , account , false , nil , & AccountWaitPlan {
AccountID : accountID ,
MaxConcurrency : account . Concurrency ,
Timeout : cfg . StickySessionWaitTimeout ,
MaxWaiting : cfg . StickySessionMaxWaiting ,
} )
2026-03-23 03:50:03 +08:00
}
2026-01-20 11:19:32 +08:00
}
2026-01-01 04:01:51 +08:00
}
}
}
}
// ============ Layer 2: Load-aware selection ============
2026-04-25 14:40:03 +08:00
baseCandidateCount := 0
2026-01-01 04:01:51 +08:00
candidates := make ( [ ] * Account , 0 , len ( accounts ) )
for i := range accounts {
acc := & accounts [ i ]
if isExcluded ( acc . ID ) {
continue
}
2026-01-13 22:49:26 -08:00
// Scheduler snapshots can be temporarily stale (bucket rebuild is throttled);
// re-check schedulability here so recently rate-limited/overloaded accounts
// are not selected again before the bucket is rebuilt.
if ! acc . IsSchedulable ( ) {
continue
}
2026-01-01 04:01:51 +08:00
if requestedModel != "" && ! acc . IsModelSupported ( requestedModel ) {
continue
}
2026-04-25 14:40:03 +08:00
if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , acc , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
continue
}
2026-04-25 14:40:03 +08:00
baseCandidateCount ++
2026-01-01 04:01:51 +08:00
candidates = append ( candidates , acc )
}
if len ( candidates ) == 0 {
2026-03-15 17:25:35 +08:00
return nil , ErrNoAvailableAccounts
2026-01-01 04:01:51 +08:00
}
accountLoads := make ( [ ] AccountWithConcurrency , 0 , len ( candidates ) )
for _ , acc := range candidates {
accountLoads = append ( accountLoads , AccountWithConcurrency {
ID : acc . ID ,
2026-03-06 05:07:10 +08:00
MaxConcurrency : acc . EffectiveLoadFactor ( ) ,
2026-01-01 04:01:51 +08:00
} )
}
loadMap , err := s . concurrencyService . GetAccountsLoadBatch ( ctx , accountLoads )
if err != nil {
ordered := append ( [ ] * Account ( nil ) , candidates ... )
sortAccountsByPriorityAndLastUsed ( ordered , false )
2026-04-25 14:40:03 +08:00
if requireCompact {
ordered = prioritizeOpenAICompactAccounts ( ordered )
}
2026-01-01 04:01:51 +08:00
for _ , acc := range ordered {
2026-04-25 14:40:03 +08:00
fresh := s . resolveFreshSchedulableOpenAIAccount ( ctx , acc , requestedModel , false )
2026-03-07 20:59:17 +08:00
if fresh == nil {
continue
}
2026-04-25 14:40:03 +08:00
fresh = s . recheckSelectedOpenAIAccountFromDB ( ctx , fresh , requestedModel , requireCompact )
if fresh == nil {
continue
}
if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , fresh , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
continue
}
2026-03-07 20:59:17 +08:00
result , err := s . tryAcquireAccountSlot ( ctx , fresh . ID , fresh . Concurrency )
2026-01-01 04:01:51 +08:00
if err == nil && result . Acquired {
if sessionHash != "" {
2026-03-07 20:59:17 +08:00
_ = s . setStickySessionAccountID ( ctx , groupID , sessionHash , fresh . ID , openaiStickySessionTTL )
2026-01-01 04:01:51 +08:00
}
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , fresh , true , result . ReleaseFunc , nil )
2026-01-01 04:01:51 +08:00
}
}
} else {
var available [ ] accountWithLoad
for _ , acc := range candidates {
loadInfo := loadMap [ acc . ID ]
if loadInfo == nil {
loadInfo = & AccountLoadInfo { AccountID : acc . ID }
}
if loadInfo . LoadRate < 100 {
available = append ( available , accountWithLoad {
account : acc ,
loadInfo : loadInfo ,
} )
}
}
if len ( available ) > 0 {
sort . SliceStable ( available , func ( i , j int ) bool {
a , b := available [ i ] , available [ j ]
if a . account . Priority != b . account . Priority {
return a . account . Priority < b . account . Priority
}
if a . loadInfo . LoadRate != b . loadInfo . LoadRate {
return a . loadInfo . LoadRate < b . loadInfo . LoadRate
}
switch {
case a . account . LastUsedAt == nil && b . account . LastUsedAt != nil :
return true
case a . account . LastUsedAt != nil && b . account . LastUsedAt == nil :
return false
case a . account . LastUsedAt == nil && b . account . LastUsedAt == nil :
return false
default :
return a . account . LastUsedAt . Before ( * b . account . LastUsedAt )
}
} )
2026-02-09 07:33:17 +08:00
shuffleWithinSortGroups ( available )
2026-01-01 04:01:51 +08:00
2026-04-25 14:40:03 +08:00
selectionOrder := make ( [ ] accountWithLoad , 0 , len ( available ) )
if requireCompact {
appendTier := func ( out [ ] accountWithLoad , tier int ) [ ] accountWithLoad {
for _ , item := range available {
if openAICompactSupportTier ( item . account ) == tier {
out = append ( out , item )
}
}
return out
}
selectionOrder = appendTier ( selectionOrder , 2 )
selectionOrder = appendTier ( selectionOrder , 1 )
// tier 0 候选作为兜底追加: DB recheck 时若发现 cache tier 0 实际
// 已升级为 1/2( 探测刚跑完, cache 尚未刷新),仍可正常命中。
selectionOrder = appendTier ( selectionOrder , 0 )
} else {
selectionOrder = append ( selectionOrder , available ... )
}
for _ , item := range selectionOrder {
fresh := s . resolveFreshSchedulableOpenAIAccount ( ctx , item . account , requestedModel , false )
if fresh == nil {
continue
}
fresh = s . recheckSelectedOpenAIAccountFromDB ( ctx , fresh , requestedModel , requireCompact )
2026-03-07 20:59:17 +08:00
if fresh == nil {
continue
}
2026-04-25 14:40:03 +08:00
if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , fresh , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
continue
}
2026-03-07 20:59:17 +08:00
result , err := s . tryAcquireAccountSlot ( ctx , fresh . ID , fresh . Concurrency )
2026-01-01 04:01:51 +08:00
if err == nil && result . Acquired {
if sessionHash != "" {
2026-03-07 20:59:17 +08:00
_ = s . setStickySessionAccountID ( ctx , groupID , sessionHash , fresh . ID , openaiStickySessionTTL )
2026-01-01 04:01:51 +08:00
}
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , fresh , true , result . ReleaseFunc , nil )
2026-01-01 04:01:51 +08:00
}
}
}
}
// ============ Layer 3: Fallback wait ============
sortAccountsByPriorityAndLastUsed ( candidates , false )
2026-04-25 14:40:03 +08:00
if requireCompact {
candidates = prioritizeOpenAICompactAccounts ( candidates )
}
2026-01-01 04:01:51 +08:00
for _ , acc := range candidates {
2026-04-25 14:40:03 +08:00
fresh := s . resolveFreshSchedulableOpenAIAccount ( ctx , acc , requestedModel , false )
2026-03-07 20:59:17 +08:00
if fresh == nil {
continue
}
2026-04-25 14:40:03 +08:00
fresh = s . recheckSelectedOpenAIAccountFromDB ( ctx , fresh , requestedModel , requireCompact )
if fresh == nil {
continue
}
if needsUpstreamCheck && s . isUpstreamModelRestrictedByChannel ( ctx , * groupID , fresh , requestedModel , requireCompact ) {
2026-04-02 20:28:04 +08:00
continue
}
2026-04-08 10:10:15 -07:00
return s . newSelectionResult ( ctx , fresh , false , nil , & AccountWaitPlan {
AccountID : fresh . ID ,
MaxConcurrency : fresh . Concurrency ,
Timeout : cfg . FallbackWaitTimeout ,
MaxWaiting : cfg . FallbackMaxWaiting ,
} )
2026-01-01 04:01:51 +08:00
}
2026-04-25 14:40:03 +08:00
if requireCompact && baseCandidateCount > 0 {
return nil , ErrNoAvailableCompactAccounts
}
2026-03-15 17:25:35 +08:00
return nil , ErrNoAvailableAccounts
2026-01-01 04:01:51 +08:00
}
func ( s * OpenAIGatewayService ) listSchedulableAccounts ( ctx context . Context , groupID * int64 ) ( [ ] Account , error ) {
2026-01-12 14:19:06 +08:00
if s . schedulerSnapshot != nil {
accounts , _ , err := s . schedulerSnapshot . ListSchedulableAccounts ( ctx , groupID , PlatformOpenAI , false )
return accounts , err
}
2026-01-01 04:01:51 +08:00
var accounts [ ] Account
var err error
if s . cfg != nil && s . cfg . RunMode == config . RunModeSimple {
accounts , err = s . accountRepo . ListSchedulableByPlatform ( ctx , PlatformOpenAI )
} else if groupID != nil {
accounts , err = s . accountRepo . ListSchedulableByGroupIDAndPlatform ( ctx , * groupID , PlatformOpenAI )
} else {
2026-03-03 13:10:26 +08:00
accounts , err = s . accountRepo . ListSchedulableUngroupedByPlatform ( ctx , PlatformOpenAI )
2026-01-01 04:01:51 +08:00
}
if err != nil {
return nil , fmt . Errorf ( "query accounts failed: %w" , err )
}
return accounts , nil
}
func ( s * OpenAIGatewayService ) tryAcquireAccountSlot ( ctx context . Context , accountID int64 , maxConcurrency int ) ( * AcquireResult , error ) {
if s . concurrencyService == nil {
return & AcquireResult { Acquired : true , ReleaseFunc : func ( ) { } } , nil
}
return s . concurrencyService . AcquireAccountSlot ( ctx , accountID , maxConcurrency )
}
2026-04-25 14:40:03 +08:00
func ( s * OpenAIGatewayService ) resolveFreshSchedulableOpenAIAccount ( ctx context . Context , account * Account , requestedModel string , requireCompact bool ) * Account {
2026-03-07 20:59:17 +08:00
if account == nil {
return nil
}
fresh := account
if s . schedulerSnapshot != nil {
current , err := s . getSchedulableAccount ( ctx , account . ID )
if err != nil || current == nil {
return nil
}
fresh = current
}
2026-04-25 14:40:03 +08:00
if ! isOpenAIAccountEligibleForRequest ( fresh , requestedModel , requireCompact ) {
2026-03-07 20:59:17 +08:00
return nil
}
return fresh
}
2026-04-25 14:40:03 +08:00
func ( s * OpenAIGatewayService ) recheckSelectedOpenAIAccountFromDB ( ctx context . Context , account * Account , requestedModel string , requireCompact bool ) * Account {
2026-03-23 03:50:03 +08:00
if account == nil {
return nil
}
if s . schedulerSnapshot == nil || s . accountRepo == nil {
2026-04-25 14:40:03 +08:00
if ! isOpenAIAccountEligibleForRequest ( account , requestedModel , requireCompact ) {
return nil
}
2026-03-23 03:50:03 +08:00
return account
}
latest , err := s . accountRepo . GetByID ( ctx , account . ID )
if err != nil || latest == nil {
return nil
}
2026-04-25 14:40:03 +08:00
if ! isOpenAIAccountEligibleForRequest ( latest , requestedModel , requireCompact ) {
2026-03-23 03:50:03 +08:00
return nil
}
return latest
}
2026-01-12 14:19:06 +08:00
func ( s * OpenAIGatewayService ) getSchedulableAccount ( ctx context . Context , accountID int64 ) ( * Account , error ) {
2026-03-08 00:14:15 +08:00
var (
account * Account
err error
)
2026-01-12 14:19:06 +08:00
if s . schedulerSnapshot != nil {
2026-03-08 00:14:15 +08:00
account , err = s . schedulerSnapshot . GetAccount ( ctx , accountID )
} else {
account , err = s . accountRepo . GetByID ( ctx , accountID )
}
if err != nil || account == nil {
return account , err
2026-01-12 14:19:06 +08:00
}
2026-03-08 00:14:15 +08:00
return account , nil
2026-01-12 14:19:06 +08:00
}
2026-04-08 10:10:15 -07:00
func ( s * OpenAIGatewayService ) hydrateSelectedAccount ( ctx context . Context , account * Account ) ( * Account , error ) {
if account == nil || s . schedulerSnapshot == nil {
return account , nil
}
hydrated , err := s . schedulerSnapshot . GetAccount ( ctx , account . ID )
if err != nil {
return nil , err
}
if hydrated == nil {
return nil , fmt . Errorf ( "selected openai account %d not found during hydration" , account . ID )
}
return hydrated , nil
}
func ( s * OpenAIGatewayService ) newSelectionResult ( ctx context . Context , account * Account , acquired bool , release func ( ) , waitPlan * AccountWaitPlan ) ( * AccountSelectionResult , error ) {
hydrated , err := s . hydrateSelectedAccount ( ctx , account )
if err != nil {
return nil , err
}
return & AccountSelectionResult {
Account : hydrated ,
Acquired : acquired ,
ReleaseFunc : release ,
WaitPlan : waitPlan ,
} , nil
}
2026-01-01 04:01:51 +08:00
func ( s * OpenAIGatewayService ) schedulingConfig ( ) config . GatewaySchedulingConfig {
if s . cfg != nil {
return s . cfg . Gateway . Scheduling
}
return config . GatewaySchedulingConfig {
StickySessionMaxWaiting : 3 ,
StickySessionWaitTimeout : 45 * time . Second ,
FallbackWaitTimeout : 30 * time . Second ,
FallbackMaxWaiting : 100 ,
LoadBatchEnabled : true ,
SlotCleanupInterval : 30 * time . Second ,
}
}
2025-12-22 22:58:31 +08:00
// GetAccessToken gets the access token for an OpenAI account
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) GetAccessToken ( ctx context . Context , account * Account ) ( string , string , error ) {
2025-12-23 10:25:32 +08:00
switch account . Type {
2025-12-26 15:40:24 +08:00
case AccountTypeOAuth :
2026-01-15 18:27:06 +08:00
// 使用 TokenProvider 获取缓存的 token
if s . openAITokenProvider != nil {
accessToken , err := s . openAITokenProvider . GetAccessToken ( ctx , account )
if err != nil {
return "" , "" , err
}
return accessToken , "oauth" , nil
}
// 降级: TokenProvider 未配置时直接从账号读取
2025-12-22 22:58:31 +08:00
accessToken := account . GetOpenAIAccessToken ( )
if accessToken == "" {
return "" , "" , errors . New ( "access_token not found in credentials" )
}
return accessToken , "oauth" , nil
2026-01-04 19:27:53 +08:00
case AccountTypeAPIKey :
2025-12-22 22:58:31 +08:00
apiKey := account . GetOpenAIApiKey ( )
if apiKey == "" {
return "" , "" , errors . New ( "api_key not found in credentials" )
}
return apiKey , "apikey" , nil
2025-12-23 10:25:32 +08:00
default :
return "" , "" , fmt . Errorf ( "unsupported account type: %s" , account . Type )
2025-12-22 22:58:31 +08:00
}
}
2025-12-27 11:44:00 +08:00
func ( s * OpenAIGatewayService ) shouldFailoverUpstreamError ( statusCode int ) bool {
switch statusCode {
2025-12-31 11:46:53 +08:00
case 401 , 402 , 403 , 429 , 529 :
2025-12-27 11:44:00 +08:00
return true
default :
return statusCode >= 500
}
}
2026-03-10 03:00:58 +08:00
func ( s * OpenAIGatewayService ) shouldFailoverOpenAIUpstreamResponse ( statusCode int , upstreamMsg string , upstreamBody [ ] byte ) bool {
if s . shouldFailoverUpstreamError ( statusCode ) {
return true
}
return isOpenAITransientProcessingError ( statusCode , upstreamMsg , upstreamBody )
}
2025-12-27 11:44:00 +08:00
func ( s * OpenAIGatewayService ) handleFailoverSideEffects ( ctx context . Context , resp * http . Response , account * Account ) {
2026-01-11 15:30:27 +08:00
body , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
2025-12-27 11:44:00 +08:00
s . rateLimitService . HandleUpstreamError ( ctx , account , resp . StatusCode , resp . Header , body )
}
2025-12-22 22:58:31 +08:00
// Forward forwards request to OpenAI API
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) Forward ( ctx context . Context , c * gin . Context , account * Account , body [ ] byte ) ( * OpenAIForwardResult , error ) {
2025-12-22 22:58:31 +08:00
startTime := time . Now ( )
2026-02-12 22:32:59 +08:00
restrictionResult := s . detectCodexClientRestriction ( c , account )
apiKeyID := getAPIKeyIDFromContext ( c )
2026-02-13 19:27:07 +08:00
logCodexCLIOnlyDetection ( ctx , c , account , apiKeyID , restrictionResult , body )
2026-02-12 22:32:59 +08:00
if restrictionResult . Enabled && ! restrictionResult . Matched {
c . JSON ( http . StatusForbidden , gin . H {
"error" : gin . H {
"type" : "forbidden_error" ,
"message" : "This account only allows Codex official clients" ,
} ,
} )
return nil , errors . New ( "codex_cli_only restriction: only codex official clients are allowed" )
}
2026-02-11 00:59:39 +08:00
originalBody := body
2026-02-12 09:41:37 +08:00
reqModel , reqStream , promptCacheKey := extractOpenAIRequestMetaFromBody ( body )
2025-12-22 22:58:31 +08:00
originalModel := reqModel
2026-03-07 14:12:38 +08:00
isCodexCLI := openai . IsCodexOfficialClientByHeaders ( c . GetHeader ( "User-Agent" ) , c . GetHeader ( "originator" ) ) || ( s . cfg != nil && s . cfg . Gateway . ForceCodexCLI )
2026-02-28 15:01:20 +08:00
wsDecision := s . getOpenAIWSProtocolResolver ( ) . Resolve ( account )
clientTransport := GetOpenAIClientTransport ( c )
// 仅允许 WS 入站请求走 WS 上游,避免出现 HTTP -> WS 协议混用。
wsDecision = resolveOpenAIWSDecisionByClientTransport ( wsDecision , clientTransport )
if c != nil {
c . Set ( "openai_ws_transport_decision" , string ( wsDecision . Transport ) )
c . Set ( "openai_ws_transport_reason" , wsDecision . Reason )
}
if wsDecision . Transport == OpenAIUpstreamTransportResponsesWebsocketV2 {
logOpenAIWSModeDebug (
"selected account_id=%d account_type=%s transport=%s reason=%s model=%s stream=%v" ,
account . ID ,
account . Type ,
normalizeOpenAIWSLogValue ( string ( wsDecision . Transport ) ) ,
normalizeOpenAIWSLogValue ( wsDecision . Reason ) ,
reqModel ,
reqStream ,
)
}
// 当前仅支持 WSv2; WSv1 命中时直接返回错误,避免出现“配置可开但行为不确定”。
if wsDecision . Transport == OpenAIUpstreamTransportResponsesWebsocket {
if c != nil {
c . JSON ( http . StatusBadRequest , gin . H {
"error" : gin . H {
"type" : "invalid_request_error" ,
"message" : "OpenAI WSv1 is temporarily unsupported. Please enable responses_websockets_v2." ,
} ,
} )
}
return nil , errors . New ( "openai ws v1 is temporarily unsupported; use ws v2" )
}
2026-02-12 10:56:07 +08:00
passthroughEnabled := account . IsOpenAIPassthroughEnabled ( )
2026-02-11 00:59:39 +08:00
if passthroughEnabled {
2026-02-12 09:41:37 +08:00
// 透传分支只需要轻量提取字段,避免热路径全量 Unmarshal。
reasoningEffort := extractOpenAIReasoningEffortFromBody ( body , reqModel )
2026-02-12 10:56:07 +08:00
return s . forwardOpenAIPassthrough ( ctx , c , account , originalBody , reqModel , reasoningEffort , reqStream , startTime )
2026-02-11 00:59:39 +08:00
}
2026-02-12 09:41:37 +08:00
reqBody , err := getOpenAIRequestBodyMap ( c , body )
if err != nil {
return nil , err
}
if v , ok := reqBody [ "model" ] . ( string ) ; ok {
reqModel = v
originalModel = reqModel
}
if v , ok := reqBody [ "stream" ] . ( bool ) ; ok {
reqStream = v
}
if promptCacheKey == "" {
if v , ok := reqBody [ "prompt_cache_key" ] . ( string ) ; ok {
promptCacheKey = strings . TrimSpace ( v )
}
}
// Track if body needs re-serialization
bodyModified := false
2026-02-28 15:01:20 +08:00
// 单字段补丁快速路径:只要整个变更集最终可归约为同一路径的 set/delete, 就避免全量 Marshal。
patchDisabled := false
patchHasOp := false
patchDelete := false
patchPath := ""
var patchValue any
markPatchSet := func ( path string , value any ) {
if strings . TrimSpace ( path ) == "" {
patchDisabled = true
return
}
if patchDisabled {
return
}
if ! patchHasOp {
patchHasOp = true
patchDelete = false
patchPath = path
patchValue = value
return
}
if patchDelete || patchPath != path {
patchDisabled = true
return
}
patchValue = value
}
markPatchDelete := func ( path string ) {
if strings . TrimSpace ( path ) == "" {
patchDisabled = true
return
}
if patchDisabled {
return
}
if ! patchHasOp {
patchHasOp = true
patchDelete = true
patchPath = path
return
}
if ! patchDelete || patchPath != path {
patchDisabled = true
}
}
disablePatch := func ( ) {
patchDisabled = true
}
2026-02-12 09:41:37 +08:00
2026-03-07 13:39:47 +08:00
// 非透传模式下, instructions 为空时注入默认指令。
if isInstructionsEmpty ( reqBody ) {
reqBody [ "instructions" ] = "You are a helpful coding assistant."
bodyModified = true
markPatchSet ( "instructions" , "You are a helpful coding assistant." )
2026-02-12 10:56:07 +08:00
}
2026-04-23 15:13:57 +00:00
if isCodexCLI && ensureOpenAIResponsesImageGenerationTool ( reqBody ) {
bodyModified = true
disablePatch ( )
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Injected /responses image_generation tool for Codex client" )
}
2026-04-23 09:53:57 +08:00
if normalizeOpenAIResponsesImageGenerationTools ( reqBody ) {
bodyModified = true
disablePatch ( )
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Normalized /responses image_generation tool payload" )
}
2026-04-23 15:13:57 +00:00
if isCodexCLI && applyCodexImageGenerationBridgeInstructions ( reqBody ) {
bodyModified = true
disablePatch ( )
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Added Codex image_generation bridge instructions" )
}
2026-04-23 09:53:57 +08:00
2026-01-13 17:01:21 +08:00
// 对所有请求执行模型映射(包含 Codex CLI) 。
2026-03-24 19:20:15 +08:00
billingModel := account . GetMappedModel ( reqModel )
if billingModel != reqModel {
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Model mapping applied: %s -> %s (account: %s, isCodexCLI: %v)" , reqModel , billingModel , account . Name , isCodexCLI )
reqBody [ "model" ] = billingModel
2026-01-12 13:23:05 -08:00
bodyModified = true
2026-03-24 19:20:15 +08:00
markPatchSet ( "model" , billingModel )
2025-12-22 22:58:31 +08:00
}
2026-03-24 19:20:15 +08:00
upstreamModel := billingModel
2026-04-23 15:13:57 +00:00
if normalizeOpenAIResponsesImageOnlyModel ( reqBody ) {
bodyModified = true
disablePatch ( )
if model , ok := reqBody [ "model" ] . ( string ) ; ok {
upstreamModel = strings . TrimSpace ( model )
}
logger . LegacyPrintf (
"service.openai_gateway" ,
"[OpenAI] Normalized /responses image-only model request inbound_model=%s image_model=%s upstream_model=%s" ,
reqModel ,
billingModel ,
upstreamModel ,
)
}
2026-04-23 09:53:57 +08:00
if err := validateOpenAIResponsesImageModel ( reqBody , upstreamModel ) ; err != nil {
setOpsUpstreamError ( c , http . StatusBadRequest , err . Error ( ) , "" )
c . JSON ( http . StatusBadRequest , gin . H {
"error" : gin . H {
"type" : "invalid_request_error" ,
"message" : err . Error ( ) ,
"param" : "model" ,
} ,
} )
return nil , err
}
if hasOpenAIImageGenerationTool ( reqBody ) {
logger . LegacyPrintf (
"service.openai_gateway" ,
"[OpenAI] /responses image_generation request inbound_model=%s mapped_model=%s account_type=%s" ,
reqModel ,
upstreamModel ,
account . Type ,
)
}
2026-04-24 07:42:31 +00:00
if err := validateCodexSparkInput ( reqBody , upstreamModel ) ; err != nil {
setOpsUpstreamError ( c , http . StatusBadRequest , err . Error ( ) , "" )
c . JSON ( http . StatusBadRequest , gin . H {
"error" : gin . H {
"type" : "invalid_request_error" ,
"message" : err . Error ( ) ,
"param" : "input" ,
} ,
} )
return nil , err
}
2025-12-22 22:58:31 +08:00
2026-04-25 14:40:03 +08:00
// Compact-only model 映射:仅在 /responses/compact 路径生效,且优先级高于
// OAuth 模型规范化(避免 OAuth 规范化覆盖 compact-only 自定义模型)。
isCompactRequest := isOpenAIResponsesCompactPath ( c )
compactMapped := false
if isCompactRequest {
compactMappedModel := resolveOpenAICompactForwardModel ( account , billingModel )
if compactMappedModel != "" && compactMappedModel != billingModel {
compactMapped = true
upstreamModel = compactMappedModel
reqBody [ "model" ] = compactMappedModel
bodyModified = true
markPatchSet ( "model" , compactMappedModel )
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Compact model mapping applied: %s -> %s (account: %s, isCodexCLI: %v)" , billingModel , compactMappedModel , account . Name , isCodexCLI )
}
}
2026-04-07 11:27:57 +03:00
// OpenAI OAuth 账号走 ChatGPT internal Codex endpoint, 需要将模型名规范化为
// 上游可识别的 Codex/GPT 系列。API Key 账号则应保留原始/映射后的模型名,
// 以兼容自定义 base_url 的 OpenAI-compatible 上游。
2026-01-12 20:18:53 -08:00
if model , ok := reqBody [ "model" ] . ( string ) ; ok {
2026-04-25 14:40:03 +08:00
if ! compactMapped {
upstreamModel = normalizeOpenAIModelForUpstream ( account , model )
if upstreamModel != "" && upstreamModel != model {
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Upstream model resolved: %s -> %s (account: %s, type: %s, isCodexCLI: %v)" ,
model , upstreamModel , account . Name , account . Type , isCodexCLI )
reqBody [ "model" ] = upstreamModel
bodyModified = true
markPatchSet ( "model" , upstreamModel )
}
2026-01-12 20:18:53 -08:00
}
2026-03-11 21:12:07 +08:00
// 移除 gpt-5.2-codex 以下的版本 verbosity 参数
// 确保高版本模型向低版本模型映射不报错
2026-03-24 19:20:15 +08:00
if ! SupportsVerbosity ( upstreamModel ) {
2026-03-11 21:12:07 +08:00
if text , ok := reqBody [ "text" ] . ( map [ string ] any ) ; ok {
2026-03-11 22:56:20 +08:00
delete ( text , "verbosity" )
2026-03-11 21:12:07 +08:00
}
}
2026-01-12 20:18:53 -08:00
}
2026-01-13 17:01:21 +08:00
// 规范化 reasoning.effort 参数( minimal -> none) , 与上游允许值对齐。
2026-01-12 20:18:53 -08:00
if reasoning , ok := reqBody [ "reasoning" ] . ( map [ string ] any ) ; ok {
if effort , ok := reasoning [ "effort" ] . ( string ) ; ok && effort == "minimal" {
reasoning [ "effort" ] = "none"
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchSet ( "reasoning.effort" , "none" )
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Normalized reasoning.effort: minimal -> none (account: %s)" , account . Name )
2026-01-12 20:18:53 -08:00
}
}
2026-02-03 21:22:33 +08:00
if account . Type == AccountTypeOAuth {
2026-04-25 14:40:03 +08:00
codexResult := applyCodexOAuthTransform ( reqBody , isCodexCLI , isCompactRequest )
2026-01-09 18:35:58 +08:00
if codexResult . Modified {
2026-01-09 00:34:49 +08:00
bodyModified = true
2026-02-28 15:01:20 +08:00
disablePatch ( )
2026-01-09 00:34:49 +08:00
}
2026-01-09 18:35:58 +08:00
if codexResult . NormalizedModel != "" {
2026-03-24 19:20:15 +08:00
upstreamModel = codexResult . NormalizedModel
2026-01-09 18:35:58 +08:00
}
if codexResult . PromptCacheKey != "" {
promptCacheKey = codexResult . PromptCacheKey
}
2025-12-22 22:58:31 +08:00
}
2026-01-12 11:08:28 -08:00
// Handle max_output_tokens based on platform and account type
if ! isCodexCLI {
if maxOutputTokens , hasMaxOutputTokens := reqBody [ "max_output_tokens" ] ; hasMaxOutputTokens {
switch account . Platform {
case PlatformOpenAI :
// For OpenAI API Key, remove max_output_tokens (not supported)
// For OpenAI OAuth (Responses API), keep it (supported)
if account . Type == AccountTypeAPIKey {
delete ( reqBody , "max_output_tokens" )
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchDelete ( "max_output_tokens" )
2026-01-12 11:08:28 -08:00
}
case PlatformAnthropic :
// For Anthropic (Claude), convert to max_tokens
delete ( reqBody , "max_output_tokens" )
2026-02-28 15:01:20 +08:00
markPatchDelete ( "max_output_tokens" )
2026-01-12 11:08:28 -08:00
if _ , hasMaxTokens := reqBody [ "max_tokens" ] ; ! hasMaxTokens {
reqBody [ "max_tokens" ] = maxOutputTokens
2026-02-28 15:01:20 +08:00
disablePatch ( )
2026-01-12 11:08:28 -08:00
}
bodyModified = true
case PlatformGemini :
// For Gemini, remove (will be handled by Gemini-specific transform)
delete ( reqBody , "max_output_tokens" )
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchDelete ( "max_output_tokens" )
2026-01-12 11:08:28 -08:00
default :
// For unknown platforms, remove to be safe
delete ( reqBody , "max_output_tokens" )
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchDelete ( "max_output_tokens" )
2026-01-12 11:08:28 -08:00
}
}
// Also handle max_completion_tokens (similar logic)
if _ , hasMaxCompletionTokens := reqBody [ "max_completion_tokens" ] ; hasMaxCompletionTokens {
if account . Type == AccountTypeAPIKey || account . Platform != PlatformOpenAI {
delete ( reqBody , "max_completion_tokens" )
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchDelete ( "max_completion_tokens" )
2026-01-12 11:08:28 -08:00
}
}
2026-01-17 16:06:44 +08:00
2026-02-04 15:56:01 +08:00
// Remove unsupported fields (not supported by upstream OpenAI API)
2026-02-28 15:01:20 +08:00
unsupportedFields := [ ] string { "prompt_cache_retention" , "safety_identifier" }
for _ , unsupportedField := range unsupportedFields {
2026-02-04 15:56:01 +08:00
if _ , has := reqBody [ unsupportedField ] ; has {
delete ( reqBody , unsupportedField )
bodyModified = true
2026-02-28 15:01:20 +08:00
markPatchDelete ( unsupportedField )
2026-02-04 15:56:01 +08:00
}
2026-01-17 16:06:44 +08:00
}
2026-01-12 11:08:28 -08:00
}
2026-02-28 15:01:20 +08:00
// 仅在 WSv2 模式保留 previous_response_id, 其他模式( HTTP/WSv1) 统一过滤。
// 注意:该规则同样适用于 Codex CLI 请求,避免 WSv1 向上游透传不支持字段。
if wsDecision . Transport != OpenAIUpstreamTransportResponsesWebsocketV2 {
if _ , has := reqBody [ "previous_response_id" ] ; has {
delete ( reqBody , "previous_response_id" )
bodyModified = true
markPatchDelete ( "previous_response_id" )
}
}
2026-04-01 00:46:38 +08:00
if sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap ( reqBody ) {
bodyModified = true
disablePatch ( )
}
2026-04-28 00:34:23 +08:00
// Apply OpenAI fast policy (参照 Claude BetaPolicy 的 fast-mode 过滤):
// 针对 body 的 service_tier 字段("priority" 即 fast, "flex"),按策略
// 执行 filter( 删除字段) 或 block( 拒绝请求) 。对 gpt-5.5 等模型屏蔽
// fast 时在此生效。
//
// 注意:
// 1. 此处统一使用 upstreamModel( 已经过 GetMappedModel +
// normalizeOpenAIModelForUpstream + Codex OAuth normalize) , 与
// chat-completions / messages 入口保持一致,避免不同入口因为模型
// 维度不同而出现 whitelist 命中差异。
// 2. action=pass 时也要把 raw "fast" 归一化为 "priority" 写回 body,
// 否则 native /responses 入口透传 "fast" 给上游会被拒。chat-
// completions 入口由 normalizeResponsesBodyServiceTier 完成同一
// 行为,这里手工实现等效逻辑。
if rawTier , ok := reqBody [ "service_tier" ] . ( string ) ; ok {
if normTier := normalizedOpenAIServiceTierValue ( rawTier ) ; normTier != "" {
action , errMsg := s . evaluateOpenAIFastPolicy ( ctx , account , upstreamModel , normTier )
switch action {
case BetaPolicyActionBlock :
msg := errMsg
if msg == "" {
msg = fmt . Sprintf ( "openai service_tier=%s is not allowed for model %s" , normTier , upstreamModel )
}
blocked := & OpenAIFastBlockedError { Message : msg }
writeOpenAIFastPolicyBlockedResponse ( c , blocked )
return nil , blocked
case BetaPolicyActionFilter :
delete ( reqBody , "service_tier" )
bodyModified = true
disablePatch ( )
default :
// pass: 若客户端传的是别名 "fast",归一化为 "priority"
// 后写回 body, 确保上游收到的是其能识别的规范值。
if normTier != rawTier {
reqBody [ "service_tier" ] = normTier
bodyModified = true
markPatchSet ( "service_tier" , normTier )
}
}
}
}
2025-12-22 22:58:31 +08:00
// Re-serialize body only if modified
if bodyModified {
2026-02-28 15:01:20 +08:00
serializedByPatch := false
if ! patchDisabled && patchHasOp {
var patchErr error
if patchDelete {
body , patchErr = sjson . DeleteBytes ( body , patchPath )
} else {
body , patchErr = sjson . SetBytes ( body , patchPath , patchValue )
}
if patchErr == nil {
serializedByPatch = true
}
}
if ! serializedByPatch {
var marshalErr error
body , marshalErr = json . Marshal ( reqBody )
if marshalErr != nil {
return nil , fmt . Errorf ( "serialize request body: %w" , marshalErr )
}
2025-12-22 22:58:31 +08:00
}
}
// Get access token
token , _ , err := s . GetAccessToken ( ctx , account )
if err != nil {
return nil , err
}
2026-02-28 15:01:20 +08:00
// Capture upstream request body for ops retry of this attempt.
setOpsUpstreamRequestBody ( c , body )
// 命中 WS 时仅走 WebSocket Mode; 不再自动回退 HTTP。
if wsDecision . Transport == OpenAIUpstreamTransportResponsesWebsocketV2 {
wsReqBody := reqBody
if len ( reqBody ) > 0 {
wsReqBody = make ( map [ string ] any , len ( reqBody ) )
for k , v := range reqBody {
wsReqBody [ k ] = v
}
}
_ , hasPreviousResponseID := wsReqBody [ "previous_response_id" ]
logOpenAIWSModeDebug (
"forward_start account_id=%d account_type=%s model=%s stream=%v has_previous_response_id=%v" ,
account . ID ,
account . Type ,
2026-03-24 19:20:15 +08:00
upstreamModel ,
2026-02-28 15:01:20 +08:00
reqStream ,
hasPreviousResponseID ,
)
maxAttempts := openAIWSReconnectRetryLimit + 1
wsAttempts := 0
var wsResult * OpenAIForwardResult
var wsErr error
wsLastFailureReason := ""
wsPrevResponseRecoveryTried := false
2026-03-14 11:42:42 +08:00
wsInvalidEncryptedContentRecoveryTried := false
2026-02-28 15:01:20 +08:00
recoverPrevResponseNotFound := func ( attempt int ) bool {
if wsPrevResponseRecoveryTried {
return false
}
previousResponseID := openAIWSPayloadString ( wsReqBody , "previous_response_id" )
if previousResponseID == "" {
logOpenAIWSModeInfo (
"reconnect_prev_response_recovery_skip account_id=%d attempt=%d reason=missing_previous_response_id previous_response_id_present=false" ,
account . ID ,
attempt ,
)
return false
}
if HasFunctionCallOutput ( wsReqBody ) {
logOpenAIWSModeInfo (
"reconnect_prev_response_recovery_skip account_id=%d attempt=%d reason=has_function_call_output previous_response_id_present=true" ,
account . ID ,
attempt ,
)
return false
}
delete ( wsReqBody , "previous_response_id" )
wsPrevResponseRecoveryTried = true
logOpenAIWSModeInfo (
"reconnect_prev_response_recovery account_id=%d attempt=%d action=drop_previous_response_id retry=1 previous_response_id=%s previous_response_id_kind=%s" ,
account . ID ,
attempt ,
truncateOpenAIWSLogValue ( previousResponseID , openAIWSIDValueMaxLen ) ,
normalizeOpenAIWSLogValue ( ClassifyOpenAIPreviousResponseIDKind ( previousResponseID ) ) ,
)
return true
}
2026-03-14 11:42:42 +08:00
recoverInvalidEncryptedContent := func ( attempt int ) bool {
if wsInvalidEncryptedContentRecoveryTried {
return false
}
removedReasoningItems := trimOpenAIEncryptedReasoningItems ( wsReqBody )
if ! removedReasoningItems {
logOpenAIWSModeInfo (
"reconnect_invalid_encrypted_content_recovery_skip account_id=%d attempt=%d reason=missing_encrypted_reasoning_items" ,
account . ID ,
attempt ,
)
return false
}
previousResponseID := openAIWSPayloadString ( wsReqBody , "previous_response_id" )
hasFunctionCallOutput := HasFunctionCallOutput ( wsReqBody )
if previousResponseID != "" && ! hasFunctionCallOutput {
delete ( wsReqBody , "previous_response_id" )
}
wsInvalidEncryptedContentRecoveryTried = true
logOpenAIWSModeInfo (
"reconnect_invalid_encrypted_content_recovery account_id=%d attempt=%d action=drop_encrypted_reasoning_items retry=1 previous_response_id_present=%v previous_response_id=%s previous_response_id_kind=%s has_function_call_output=%v dropped_previous_response_id=%v" ,
account . ID ,
attempt ,
previousResponseID != "" ,
truncateOpenAIWSLogValue ( previousResponseID , openAIWSIDValueMaxLen ) ,
normalizeOpenAIWSLogValue ( ClassifyOpenAIPreviousResponseIDKind ( previousResponseID ) ) ,
hasFunctionCallOutput ,
previousResponseID != "" && ! hasFunctionCallOutput ,
)
return true
}
2026-02-28 15:01:20 +08:00
retryBudget := s . openAIWSRetryTotalBudget ( )
retryStartedAt := time . Now ( )
wsRetryLoop :
for attempt := 1 ; attempt <= maxAttempts ; attempt ++ {
wsAttempts = attempt
wsResult , wsErr = s . forwardOpenAIWSV2 (
ctx ,
c ,
account ,
wsReqBody ,
token ,
wsDecision ,
isCodexCLI ,
reqStream ,
originalModel ,
2026-03-24 19:20:15 +08:00
upstreamModel ,
2026-02-28 15:01:20 +08:00
startTime ,
attempt ,
wsLastFailureReason ,
)
if wsErr == nil {
break
}
if c != nil && c . Writer != nil && c . Writer . Written ( ) {
break
}
reason , retryable := classifyOpenAIWSReconnectReason ( wsErr )
if reason != "" {
wsLastFailureReason = reason
}
// previous_response_not_found 说明续链锚点不可用:
// 对非 function_call_output 场景,允许一次“去掉 previous_response_id 后重放”。
if reason == "previous_response_not_found" && recoverPrevResponseNotFound ( attempt ) {
continue
}
2026-03-14 11:42:42 +08:00
if reason == "invalid_encrypted_content" && recoverInvalidEncryptedContent ( attempt ) {
continue
}
2026-02-28 15:01:20 +08:00
if retryable && attempt < maxAttempts {
backoff := s . openAIWSRetryBackoff ( attempt )
if retryBudget > 0 && time . Since ( retryStartedAt ) + backoff > retryBudget {
s . recordOpenAIWSRetryExhausted ( )
logOpenAIWSModeInfo (
"reconnect_budget_exhausted account_id=%d attempts=%d max_retries=%d reason=%s elapsed_ms=%d budget_ms=%d" ,
account . ID ,
attempt ,
openAIWSReconnectRetryLimit ,
normalizeOpenAIWSLogValue ( reason ) ,
time . Since ( retryStartedAt ) . Milliseconds ( ) ,
retryBudget . Milliseconds ( ) ,
)
break
}
s . recordOpenAIWSRetryAttempt ( backoff )
logOpenAIWSModeInfo (
"reconnect_retry account_id=%d retry=%d max_retries=%d reason=%s backoff_ms=%d" ,
account . ID ,
attempt ,
openAIWSReconnectRetryLimit ,
normalizeOpenAIWSLogValue ( reason ) ,
backoff . Milliseconds ( ) ,
)
if backoff > 0 {
timer := time . NewTimer ( backoff )
select {
case <- ctx . Done ( ) :
if ! timer . Stop ( ) {
<- timer . C
}
wsErr = wrapOpenAIWSFallback ( "retry_backoff_canceled" , ctx . Err ( ) )
break wsRetryLoop
case <- timer . C :
}
}
continue
}
if retryable {
s . recordOpenAIWSRetryExhausted ( )
logOpenAIWSModeInfo (
"reconnect_exhausted account_id=%d attempts=%d max_retries=%d reason=%s" ,
account . ID ,
attempt ,
openAIWSReconnectRetryLimit ,
normalizeOpenAIWSLogValue ( reason ) ,
)
} else if reason != "" {
s . recordOpenAIWSNonRetryableFastFallback ( )
logOpenAIWSModeInfo (
"reconnect_stop account_id=%d attempt=%d reason=%s" ,
account . ID ,
attempt ,
normalizeOpenAIWSLogValue ( reason ) ,
)
}
break
}
if wsErr == nil {
firstTokenMs := int64 ( 0 )
hasFirstTokenMs := wsResult != nil && wsResult . FirstTokenMs != nil
if hasFirstTokenMs {
firstTokenMs = int64 ( * wsResult . FirstTokenMs )
}
requestID := ""
if wsResult != nil {
requestID = strings . TrimSpace ( wsResult . RequestID )
}
logOpenAIWSModeDebug (
"forward_succeeded account_id=%d request_id=%s stream=%v has_first_token_ms=%v first_token_ms=%d ws_attempts=%d" ,
account . ID ,
requestID ,
reqStream ,
hasFirstTokenMs ,
firstTokenMs ,
wsAttempts ,
)
2026-03-24 19:20:15 +08:00
wsResult . UpstreamModel = upstreamModel
2026-02-28 15:01:20 +08:00
return wsResult , nil
}
s . writeOpenAIWSFallbackErrorResponse ( c , account , wsErr )
return nil , wsErr
}
2026-03-14 11:42:42 +08:00
httpInvalidEncryptedContentRetryTried := false
for {
// Build upstream request
2026-03-14 13:04:24 +08:00
upstreamCtx , releaseUpstreamCtx := detachStreamUpstreamContext ( ctx , reqStream )
upstreamReq , err := s . buildUpstreamRequest ( upstreamCtx , c , account , body , token , reqStream , promptCacheKey , isCodexCLI )
releaseUpstreamCtx ( )
2026-03-14 11:42:42 +08:00
if err != nil {
return nil , err
}
2025-12-22 22:58:31 +08:00
2026-03-14 11:42:42 +08:00
// Get proxy URL
proxyURL := ""
if account . ProxyID != nil && account . Proxy != nil {
proxyURL = account . Proxy . URL ( )
}
2025-12-22 22:58:31 +08:00
2026-03-14 11:42:42 +08:00
// Send request
upstreamStart := time . Now ( )
resp , err := s . httpUpstream . Do ( upstreamReq , proxyURL , account . ID , account . Concurrency )
SetOpsLatencyMs ( c , OpsUpstreamLatencyMsKey , time . Since ( upstreamStart ) . Milliseconds ( ) )
if err != nil {
// Ensure the client receives an error response (handlers assume Forward writes on non-failover errors).
safeErr := sanitizeUpstreamErrorMessage ( err . Error ( ) )
setOpsUpstreamError ( c , 0 , safeErr , "" )
2026-01-11 15:30:27 +08:00
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
2026-01-15 15:14:44 +08:00
AccountName : account . Name ,
2026-03-14 11:42:42 +08:00
UpstreamStatusCode : 0 ,
Kind : "request_error" ,
Message : safeErr ,
} )
c . JSON ( http . StatusBadGateway , gin . H {
"error" : gin . H {
"type" : "upstream_error" ,
"message" : "Upstream request failed" ,
} ,
2026-01-11 15:30:27 +08:00
} )
2026-03-14 11:42:42 +08:00
return nil , fmt . Errorf ( "upstream request failed: %s" , safeErr )
}
// Handle error response
if resp . StatusCode >= 400 {
respBody , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
_ = resp . Body . Close ( )
resp . Body = io . NopCloser ( bytes . NewReader ( respBody ) )
upstreamMsg := strings . TrimSpace ( extractUpstreamErrorMessage ( respBody ) )
upstreamMsg = sanitizeUpstreamErrorMessage ( upstreamMsg )
upstreamCode := extractUpstreamErrorCode ( respBody )
if ! httpInvalidEncryptedContentRetryTried && resp . StatusCode == http . StatusBadRequest && upstreamCode == "invalid_encrypted_content" {
if trimOpenAIEncryptedReasoningItems ( reqBody ) {
body , err = json . Marshal ( reqBody )
if err != nil {
return nil , fmt . Errorf ( "serialize invalid_encrypted_content retry body: %w" , err )
}
setOpsUpstreamRequestBody ( c , body )
httpInvalidEncryptedContentRetryTried = true
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Retrying non-WSv2 request once after invalid_encrypted_content (account: %s)" , account . Name )
continue
}
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI] Skip non-WSv2 invalid_encrypted_content retry because encrypted reasoning items are missing (account: %s)" , account . Name )
}
if s . shouldFailoverOpenAIUpstreamResponse ( resp . StatusCode , upstreamMsg , respBody ) {
upstreamDetail := ""
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString ( string ( respBody ) , maxBytes )
}
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Kind : "failover" ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
} )
2026-01-11 15:30:27 +08:00
2026-03-14 11:42:42 +08:00
s . handleFailoverSideEffects ( ctx , resp , account )
return nil , & UpstreamFailoverError {
StatusCode : resp . StatusCode ,
ResponseBody : respBody ,
RetryableOnSameAccount : account . IsPoolMode ( ) && ( isPoolModeRetryableStatus ( resp . StatusCode ) || isOpenAITransientProcessingError ( resp . StatusCode , upstreamMsg , respBody ) ) ,
}
2026-03-08 13:57:23 +08:00
}
2026-03-14 11:42:42 +08:00
return s . handleErrorResponse ( ctx , resp , c , account , body )
2025-12-27 11:44:00 +08:00
}
2026-03-14 11:42:42 +08:00
defer func ( ) { _ = resp . Body . Close ( ) } ( )
2025-12-22 22:58:31 +08:00
2026-03-14 11:42:42 +08:00
// Handle normal response
var usage * OpenAIUsage
var firstTokenMs * int
if reqStream {
2026-03-24 19:20:15 +08:00
streamResult , err := s . handleStreamingResponse ( ctx , resp , c , account , startTime , originalModel , upstreamModel )
2026-03-14 11:42:42 +08:00
if err != nil {
return nil , err
}
usage = streamResult . usage
firstTokenMs = streamResult . firstTokenMs
} else {
2026-03-24 19:20:15 +08:00
usage , err = s . handleNonStreamingResponse ( ctx , resp , c , account , originalModel , upstreamModel )
2026-03-14 11:42:42 +08:00
if err != nil {
return nil , err
}
2025-12-22 22:58:31 +08:00
}
2026-03-14 11:42:42 +08:00
// Extract and save Codex usage snapshot from response headers (for OAuth accounts)
if account . Type == AccountTypeOAuth {
if snapshot := ParseCodexRateLimitHeaders ( resp . Header ) ; snapshot != nil {
s . updateCodexUsageSnapshot ( ctx , account . ID , snapshot )
}
2025-12-23 16:26:07 +08:00
}
2026-03-14 11:42:42 +08:00
if usage == nil {
usage = & OpenAIUsage { }
}
2026-02-07 16:15:30 +08:00
2026-03-14 11:42:42 +08:00
reasoningEffort := extractOpenAIReasoningEffort ( reqBody , originalModel )
serviceTier := extractOpenAIServiceTier ( reqBody )
2026-02-03 14:36:29 +08:00
2026-03-14 11:42:42 +08:00
return & OpenAIForwardResult {
RequestID : resp . Header . Get ( "x-request-id" ) ,
Usage : * usage ,
Model : originalModel ,
2026-03-24 19:20:15 +08:00
UpstreamModel : upstreamModel ,
2026-03-14 11:42:42 +08:00
ServiceTier : serviceTier ,
ReasoningEffort : reasoningEffort ,
Stream : reqStream ,
OpenAIWSMode : false ,
Duration : time . Since ( startTime ) ,
FirstTokenMs : firstTokenMs ,
} , nil
}
2025-12-22 22:58:31 +08:00
}
2026-02-12 10:56:07 +08:00
func ( s * OpenAIGatewayService ) forwardOpenAIPassthrough (
2026-02-11 00:59:39 +08:00
ctx context . Context ,
c * gin . Context ,
account * Account ,
body [ ] byte ,
reqModel string ,
reasoningEffort * string ,
reqStream bool ,
startTime time . Time ,
) ( * OpenAIForwardResult , error ) {
2026-04-25 14:40:03 +08:00
upstreamPassthroughModel := ""
if isOpenAIResponsesCompactPath ( c ) {
compactMappedModel := resolveOpenAICompactForwardModel ( account , reqModel )
if compactMappedModel != "" && compactMappedModel != reqModel {
nextBody , setErr := sjson . SetBytes ( body , "model" , compactMappedModel )
if setErr != nil {
return nil , fmt . Errorf ( "set compact passthrough model: %w" , setErr )
}
body = nextBody
upstreamPassthroughModel = compactMappedModel
}
}
2026-02-12 21:02:52 +08:00
if account != nil && account . Type == AccountTypeOAuth {
2026-02-14 10:49:01 +08:00
if rejectReason := detectOpenAIPassthroughInstructionsRejectReason ( reqModel , body ) ; rejectReason != "" {
rejectMsg := "OpenAI codex passthrough requires a non-empty instructions field"
setOpsUpstreamError ( c , http . StatusForbidden , rejectMsg , "" )
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : http . StatusForbidden ,
Passthrough : true ,
Kind : "request_error" ,
Message : rejectMsg ,
Detail : rejectReason ,
} )
logOpenAIPassthroughInstructionsRejected ( ctx , c , account , reqModel , rejectReason , body )
c . JSON ( http . StatusForbidden , gin . H {
"error" : gin . H {
"type" : "forbidden_error" ,
"message" : rejectMsg ,
} ,
} )
return nil , fmt . Errorf ( "openai passthrough rejected before upstream: %s" , rejectReason )
}
2026-03-06 18:50:28 +08:00
normalizedBody , normalized , err := normalizeOpenAIPassthroughOAuthBody ( body , isOpenAIResponsesCompactPath ( c ) )
2026-02-12 21:02:52 +08:00
if err != nil {
return nil , err
}
if normalized {
body = normalizedBody
}
2026-03-06 18:50:28 +08:00
reqStream = gjson . GetBytes ( body , "stream" ) . Bool ( )
2026-02-12 21:02:52 +08:00
}
2026-04-01 00:46:38 +08:00
sanitizedBody , sanitized , err := sanitizeEmptyBase64InputImagesInOpenAIBody ( body )
if err != nil {
return nil , err
}
if sanitized {
body = sanitizedBody
}
2026-04-28 00:34:23 +08:00
// Apply OpenAI fast policy to the passthrough body (filter/block by service_tier).
// 统一使用 upstream 视角的 model: 透传路径下 body 已经过 compact 映射 +
// OAuth normalize, body 中的 model 字段即上游真正会看到的 slug。
// 这样可以与 chat-completions / messages / native /responses 入口的
// upstreamModel 保持一致,避免 whitelist 命中差异。当 body 中没有
// model 字段时退回 reqModel。
policyModel := strings . TrimSpace ( gjson . GetBytes ( body , "model" ) . String ( ) )
if policyModel == "" {
policyModel = reqModel
}
updatedBody , policyErr := s . applyOpenAIFastPolicyToBody ( ctx , account , policyModel , body )
if policyErr != nil {
var blocked * OpenAIFastBlockedError
if errors . As ( policyErr , & blocked ) {
writeOpenAIFastPolicyBlockedResponse ( c , blocked )
}
return nil , policyErr
}
body = updatedBody
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" ,
2026-02-12 11:41:06 +08:00
"[OpenAI 自动透传] 命中自动透传分支: account=%d name=%s type=%s model=%s stream=%v" ,
account . ID ,
account . Name ,
account . Type ,
reqModel ,
reqStream ,
)
2026-02-12 14:16:18 +08:00
if reqStream && c != nil && c . Request != nil {
if timeoutHeaders := collectOpenAIPassthroughTimeoutHeaders ( c . Request . Header ) ; len ( timeoutHeaders ) > 0 {
2026-02-12 19:01:09 +08:00
streamWarnLogger := logger . FromContext ( ctx ) . With (
zap . String ( "component" , "service.openai_gateway" ) ,
zap . Int64 ( "account_id" , account . ID ) ,
zap . Strings ( "timeout_headers" , timeoutHeaders ) ,
)
2026-02-12 14:16:18 +08:00
if s . isOpenAIPassthroughTimeoutHeadersAllowed ( ) {
2026-02-12 19:01:09 +08:00
streamWarnLogger . Warn ( "OpenAI passthrough 透传请求包含超时相关请求头,且当前配置为放行,可能导致上游提前断流" )
2026-02-12 14:16:18 +08:00
} else {
2026-02-12 19:01:09 +08:00
streamWarnLogger . Warn ( "OpenAI passthrough 检测到超时相关请求头,将按配置过滤以降低断流风险" )
2026-02-12 14:16:18 +08:00
}
}
}
2026-02-11 00:59:39 +08:00
// Get access token
token , _ , err := s . GetAccessToken ( ctx , account )
if err != nil {
return nil , err
}
2026-03-12 16:53:18 +08:00
upstreamCtx , releaseUpstreamCtx := detachStreamUpstreamContext ( ctx , reqStream )
upstreamReq , err := s . buildUpstreamRequestOpenAIPassthrough ( upstreamCtx , c , account , body , token )
releaseUpstreamCtx ( )
2026-02-11 00:59:39 +08:00
if err != nil {
return nil , err
}
proxyURL := ""
if account . ProxyID != nil && account . Proxy != nil {
proxyURL = account . Proxy . URL ( )
}
2026-02-12 09:41:37 +08:00
setOpsUpstreamRequestBody ( c , body )
2026-02-11 00:59:39 +08:00
if c != nil {
c . Set ( "openai_passthrough" , true )
}
2026-02-12 09:41:37 +08:00
upstreamStart := time . Now ( )
2026-02-11 00:59:39 +08:00
resp , err := s . httpUpstream . Do ( upstreamReq , proxyURL , account . ID , account . Concurrency )
2026-02-12 09:41:37 +08:00
SetOpsLatencyMs ( c , OpsUpstreamLatencyMsKey , time . Since ( upstreamStart ) . Milliseconds ( ) )
2026-02-11 00:59:39 +08:00
if err != nil {
safeErr := sanitizeUpstreamErrorMessage ( err . Error ( ) )
setOpsUpstreamError ( c , 0 , safeErr , "" )
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : 0 ,
Passthrough : true ,
Kind : "request_error" ,
Message : safeErr ,
} )
c . JSON ( http . StatusBadGateway , gin . H {
"error" : gin . H {
"type" : "upstream_error" ,
"message" : "Upstream request failed" ,
} ,
} )
return nil , fmt . Errorf ( "upstream request failed: %s" , safeErr )
}
defer func ( ) { _ = resp . Body . Close ( ) } ( )
if resp . StatusCode >= 400 {
2026-03-30 22:29:26 +08:00
// 透传模式默认保持原样代理;但 429/529 属于网关必须兜底的
// 上游容量类错误,应先触发多账号 failover 以维持基础 SLA。
if shouldFailoverOpenAIPassthroughResponse ( resp . StatusCode ) {
return nil , s . handleFailoverErrorResponsePassthrough ( ctx , resp , c , account , body )
}
2026-02-13 19:27:07 +08:00
return nil , s . handleErrorResponsePassthrough ( ctx , resp , c , account , body )
2026-02-11 00:59:39 +08:00
}
var usage * OpenAIUsage
var firstTokenMs * int
if reqStream {
2026-04-25 14:40:03 +08:00
result , err := s . handleStreamingResponsePassthrough ( ctx , resp , c , account , startTime , reqModel , upstreamPassthroughModel )
2026-02-11 00:59:39 +08:00
if err != nil {
return nil , err
}
usage = result . usage
firstTokenMs = result . firstTokenMs
} else {
2026-04-25 14:40:03 +08:00
usage , err = s . handleNonStreamingResponsePassthrough ( ctx , resp , c , reqModel , upstreamPassthroughModel )
2026-02-11 00:59:39 +08:00
if err != nil {
return nil , err
}
}
if snapshot := ParseCodexRateLimitHeaders ( resp . Header ) ; snapshot != nil {
s . updateCodexUsageSnapshot ( ctx , account . ID , snapshot )
}
if usage == nil {
usage = & OpenAIUsage { }
}
return & OpenAIForwardResult {
RequestID : resp . Header . Get ( "x-request-id" ) ,
Usage : * usage ,
Model : reqModel ,
2026-04-25 14:40:03 +08:00
UpstreamModel : upstreamPassthroughModel ,
2026-03-08 23:22:28 +08:00
ServiceTier : extractOpenAIServiceTierFromBody ( body ) ,
2026-02-11 00:59:39 +08:00
ReasoningEffort : reasoningEffort ,
Stream : reqStream ,
2026-02-28 15:01:20 +08:00
OpenAIWSMode : false ,
2026-02-11 00:59:39 +08:00
Duration : time . Since ( startTime ) ,
FirstTokenMs : firstTokenMs ,
} , nil
}
2026-02-14 10:49:01 +08:00
func logOpenAIPassthroughInstructionsRejected (
ctx context . Context ,
c * gin . Context ,
account * Account ,
reqModel string ,
rejectReason string ,
body [ ] byte ,
) {
if ctx == nil {
ctx = context . Background ( )
}
accountID := int64 ( 0 )
accountName := ""
accountType := ""
if account != nil {
accountID = account . ID
accountName = strings . TrimSpace ( account . Name )
accountType = strings . TrimSpace ( string ( account . Type ) )
}
fields := [ ] zap . Field {
zap . String ( "component" , "service.openai_gateway" ) ,
zap . Int64 ( "account_id" , accountID ) ,
zap . String ( "account_name" , accountName ) ,
zap . String ( "account_type" , accountType ) ,
zap . String ( "request_model" , strings . TrimSpace ( reqModel ) ) ,
zap . String ( "reject_reason" , strings . TrimSpace ( rejectReason ) ) ,
}
fields = appendCodexCLIOnlyRejectedRequestFields ( fields , c , body )
logger . FromContext ( ctx ) . With ( fields ... ) . Warn ( "OpenAI passthrough 本地拦截: Codex 请求缺少有效 instructions" )
}
2026-02-12 10:56:07 +08:00
func ( s * OpenAIGatewayService ) buildUpstreamRequestOpenAIPassthrough (
2026-02-11 00:59:39 +08:00
ctx context . Context ,
c * gin . Context ,
account * Account ,
body [ ] byte ,
token string ,
) ( * http . Request , error ) {
2026-02-12 10:56:07 +08:00
targetURL := openaiPlatformAPIURL
switch account . Type {
case AccountTypeOAuth :
targetURL = chatgptCodexURL
case AccountTypeAPIKey :
baseURL := account . GetOpenAIBaseURL ( )
if baseURL != "" {
validatedURL , err := s . validateUpstreamBaseURL ( baseURL )
if err != nil {
return nil , err
}
targetURL = buildOpenAIResponsesURL ( validatedURL )
}
}
2026-03-06 18:50:28 +08:00
targetURL = appendOpenAIResponsesRequestPathSuffix ( targetURL , openAIResponsesRequestPathSuffix ( c ) )
2026-02-12 10:56:07 +08:00
req , err := http . NewRequestWithContext ( ctx , http . MethodPost , targetURL , bytes . NewReader ( body ) )
2026-02-11 00:59:39 +08:00
if err != nil {
return nil , err
}
2026-02-12 20:12:15 +08:00
// 透传客户端请求头(安全白名单)。
2026-02-12 14:16:18 +08:00
allowTimeoutHeaders := s . isOpenAIPassthroughTimeoutHeadersAllowed ( )
2026-02-11 00:59:39 +08:00
if c != nil && c . Request != nil {
for key , values := range c . Request . Header {
2026-02-12 20:12:15 +08:00
lower := strings . ToLower ( strings . TrimSpace ( key ) )
if ! isOpenAIPassthroughAllowedRequestHeader ( lower , allowTimeoutHeaders ) {
2026-02-12 14:16:18 +08:00
continue
}
2026-02-11 00:59:39 +08:00
for _ , v := range values {
req . Header . Add ( key , v )
}
}
}
// 覆盖入站鉴权残留,并注入上游认证
req . Header . Del ( "authorization" )
req . Header . Del ( "x-api-key" )
req . Header . Del ( "x-goog-api-key" )
req . Header . Set ( "authorization" , "Bearer " + token )
2026-02-12 10:56:07 +08:00
// OAuth 透传到 ChatGPT internal API 时补齐必要头。
if account . Type == AccountTypeOAuth {
2026-02-12 20:12:15 +08:00
promptCacheKey := strings . TrimSpace ( gjson . GetBytes ( body , "prompt_cache_key" ) . String ( ) )
2026-02-12 10:56:07 +08:00
req . Host = "chatgpt.com"
if chatgptAccountID := account . GetChatGPTAccountID ( ) ; chatgptAccountID != "" {
req . Header . Set ( "chatgpt-account-id" , chatgptAccountID )
}
2026-03-16 10:28:11 +08:00
apiKeyID := getAPIKeyIDFromContext ( c )
// 先保存客户端原始值,再做 compact 补充,避免后续统一隔离时读到已处理的值。
clientSessionID := strings . TrimSpace ( req . Header . Get ( "session_id" ) )
clientConversationID := strings . TrimSpace ( req . Header . Get ( "conversation_id" ) )
2026-03-06 18:50:28 +08:00
if isOpenAIResponsesCompactPath ( c ) {
req . Header . Set ( "accept" , "application/json" )
if req . Header . Get ( "version" ) == "" {
req . Header . Set ( "version" , codexCLIVersion )
}
2026-03-16 10:28:11 +08:00
if clientSessionID == "" {
clientSessionID = resolveOpenAICompactSessionID ( c )
2026-03-06 18:50:28 +08:00
}
} else if req . Header . Get ( "accept" ) == "" {
2026-02-12 20:12:15 +08:00
req . Header . Set ( "accept" , "text/event-stream" )
}
2026-02-12 10:56:07 +08:00
if req . Header . Get ( "OpenAI-Beta" ) == "" {
req . Header . Set ( "OpenAI-Beta" , "responses=experimental" )
}
if req . Header . Get ( "originator" ) == "" {
req . Header . Set ( "originator" , "codex_cli_rs" )
}
2026-03-16 10:28:11 +08:00
// 用隔离后的 session 标识符覆盖客户端透传值,防止跨用户会话碰撞。
if clientSessionID == "" {
clientSessionID = promptCacheKey
}
if clientConversationID == "" {
clientConversationID = promptCacheKey
}
if clientSessionID != "" {
req . Header . Set ( "session_id" , isolateOpenAISessionID ( apiKeyID , clientSessionID ) )
}
if clientConversationID != "" {
req . Header . Set ( "conversation_id" , isolateOpenAISessionID ( apiKeyID , clientConversationID ) )
2026-02-12 20:12:15 +08:00
}
}
// 透传模式也支持账户自定义 User-Agent 与 ForceCodexCLI 兜底。
customUA := account . GetOpenAIUserAgent ( )
if customUA != "" {
req . Header . Set ( "user-agent" , customUA )
}
if s . cfg != nil && s . cfg . Gateway . ForceCodexCLI {
req . Header . Set ( "user-agent" , codexCLIUserAgent )
}
// OAuth 安全透传:对非 Codex UA 统一兜底,降低被上游风控拦截概率。
if account . Type == AccountTypeOAuth && ! openai . IsCodexCLIRequest ( req . Header . Get ( "user-agent" ) ) {
req . Header . Set ( "user-agent" , codexCLIUserAgent )
2026-02-11 00:59:39 +08:00
}
if req . Header . Get ( "content-type" ) == "" {
req . Header . Set ( "content-type" , "application/json" )
}
return req , nil
}
2026-03-30 22:29:26 +08:00
func shouldFailoverOpenAIPassthroughResponse ( statusCode int ) bool {
switch statusCode {
case http . StatusTooManyRequests , 529 :
return true
default :
return false
}
}
func ( s * OpenAIGatewayService ) handleFailoverErrorResponsePassthrough (
ctx context . Context ,
resp * http . Response ,
c * gin . Context ,
account * Account ,
requestBody [ ] byte ,
) error {
body , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
upstreamMsg := strings . TrimSpace ( extractUpstreamErrorMessage ( body ) )
upstreamMsg = sanitizeUpstreamErrorMessage ( upstreamMsg )
upstreamDetail := ""
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString ( string ( body ) , maxBytes )
}
setOpsUpstreamError ( c , resp . StatusCode , upstreamMsg , upstreamDetail )
logOpenAIInstructionsRequiredDebug ( ctx , c , account , resp . StatusCode , upstreamMsg , requestBody , body )
if s . rateLimitService != nil {
_ = s . rateLimitService . HandleUpstreamError ( ctx , account , resp . StatusCode , resp . Header , body )
}
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Passthrough : true ,
Kind : "failover" ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
UpstreamResponseBody : upstreamDetail ,
} )
return & UpstreamFailoverError {
StatusCode : resp . StatusCode ,
ResponseBody : body ,
ResponseHeaders : resp . Header . Clone ( ) ,
}
}
2026-02-13 19:27:07 +08:00
func ( s * OpenAIGatewayService ) handleErrorResponsePassthrough (
ctx context . Context ,
resp * http . Response ,
c * gin . Context ,
account * Account ,
requestBody [ ] byte ,
) error {
2026-02-11 00:59:39 +08:00
body , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
upstreamMsg := strings . TrimSpace ( extractUpstreamErrorMessage ( body ) )
upstreamMsg = sanitizeUpstreamErrorMessage ( upstreamMsg )
upstreamDetail := ""
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString ( string ( body ) , maxBytes )
}
setOpsUpstreamError ( c , resp . StatusCode , upstreamMsg , upstreamDetail )
2026-02-13 19:27:07 +08:00
logOpenAIInstructionsRequiredDebug ( ctx , c , account , resp . StatusCode , upstreamMsg , requestBody , body )
2026-03-24 01:24:30 +08:00
if s . rateLimitService != nil {
// Passthrough mode preserves the raw upstream error response, but runtime
// account state still needs to be updated so sticky routing can stop
// reusing a freshly rate-limited account.
_ = s . rateLimitService . HandleUpstreamError ( ctx , account , resp . StatusCode , resp . Header , body )
}
2026-02-11 00:59:39 +08:00
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Passthrough : true ,
Kind : "http_error" ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
UpstreamResponseBody : upstreamDetail ,
} )
2026-02-28 15:01:20 +08:00
writeOpenAIPassthroughResponseHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2026-02-11 00:59:39 +08:00
contentType := resp . Header . Get ( "Content-Type" )
if contentType == "" {
contentType = "application/json"
}
c . Data ( resp . StatusCode , contentType , body )
if upstreamMsg == "" {
return fmt . Errorf ( "upstream error: %d" , resp . StatusCode )
}
return fmt . Errorf ( "upstream error: %d message=%s" , resp . StatusCode , upstreamMsg )
}
2026-02-12 20:12:15 +08:00
func isOpenAIPassthroughAllowedRequestHeader ( lowerKey string , allowTimeoutHeaders bool ) bool {
if lowerKey == "" {
2026-02-11 00:59:39 +08:00
return false
}
2026-02-12 20:12:15 +08:00
if isOpenAIPassthroughTimeoutHeader ( lowerKey ) {
return allowTimeoutHeaders
}
return openaiPassthroughAllowedHeaders [ lowerKey ]
2026-02-11 00:59:39 +08:00
}
2026-02-12 14:16:18 +08:00
func isOpenAIPassthroughTimeoutHeader ( lowerKey string ) bool {
switch lowerKey {
case "x-stainless-timeout" , "x-stainless-read-timeout" , "x-stainless-connect-timeout" , "x-request-timeout" , "request-timeout" , "grpc-timeout" :
return true
default :
return false
}
}
func ( s * OpenAIGatewayService ) isOpenAIPassthroughTimeoutHeadersAllowed ( ) bool {
return s != nil && s . cfg != nil && s . cfg . Gateway . OpenAIPassthroughAllowTimeoutHeaders
}
func collectOpenAIPassthroughTimeoutHeaders ( h http . Header ) [ ] string {
if h == nil {
return nil
}
var matched [ ] string
for key , values := range h {
lowerKey := strings . ToLower ( strings . TrimSpace ( key ) )
if isOpenAIPassthroughTimeoutHeader ( lowerKey ) {
entry := lowerKey
if len ( values ) > 0 {
entry = fmt . Sprintf ( "%s=%s" , lowerKey , strings . Join ( values , "|" ) )
}
matched = append ( matched , entry )
}
}
sort . Strings ( matched )
return matched
}
2026-02-11 00:59:39 +08:00
type openaiStreamingResultPassthrough struct {
usage * OpenAIUsage
firstTokenMs * int
}
2026-04-25 15:09:40 +08:00
func openAIStreamClientOutputStarted ( c * gin . Context , localStarted bool ) bool {
if localStarted {
return true
}
return c != nil && c . Writer != nil && c . Writer . Written ( )
}
func openAIStreamEventIsPreamble ( eventType string ) bool {
switch strings . TrimSpace ( eventType ) {
case "response.created" , "response.in_progress" :
return true
default :
return false
}
}
func openAIStreamDataStartsClientOutput ( data , eventType string ) bool {
trimmed := strings . TrimSpace ( data )
if trimmed == "" {
return false
}
if strings . TrimSpace ( eventType ) == "response.failed" {
return false
}
return ! openAIStreamEventIsPreamble ( eventType )
}
func openAIStreamFailedEventShouldFailover ( payload [ ] byte , message string ) bool {
code := strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( payload , "response.error.code" ) . String ( ) ) )
if code == "" {
code = strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( payload , "error.code" ) . String ( ) ) )
}
errType := strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( payload , "response.error.type" ) . String ( ) ) )
if errType == "" {
errType = strings . ToLower ( strings . TrimSpace ( gjson . GetBytes ( payload , "error.type" ) . String ( ) ) )
}
combined := strings . ToLower ( strings . TrimSpace ( message + " " + code + " " + errType ) )
if combined == "" {
return true
}
nonRetryableMarkers := [ ] string {
"invalid_request" ,
"content_policy" ,
"policy" ,
"safety" ,
"high-risk cyber" ,
"not allowed" ,
"violat" ,
}
for _ , marker := range nonRetryableMarkers {
if strings . Contains ( combined , marker ) {
return false
}
}
return true
}
func ( s * OpenAIGatewayService ) newOpenAIStreamFailoverError (
c * gin . Context ,
account * Account ,
passthrough bool ,
upstreamRequestID string ,
payload [ ] byte ,
message string ,
) * UpstreamFailoverError {
message = sanitizeUpstreamErrorMessage ( strings . TrimSpace ( message ) )
if message == "" {
message = "OpenAI stream disconnected before completion"
}
detail := ""
if len ( payload ) > 0 && s != nil && s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
detail = truncateString ( string ( payload ) , maxBytes )
}
if c != nil {
setOpsUpstreamError ( c , http . StatusBadGateway , message , detail )
event := OpsUpstreamErrorEvent {
Platform : PlatformOpenAI ,
UpstreamStatusCode : http . StatusBadGateway ,
UpstreamRequestID : strings . TrimSpace ( upstreamRequestID ) ,
Passthrough : passthrough ,
Kind : "failover" ,
Message : message ,
Detail : detail ,
}
if account != nil {
event . Platform = account . Platform
event . AccountID = account . ID
event . AccountName = account . Name
}
appendOpsUpstreamError ( c , event )
}
body , _ := json . Marshal ( gin . H {
"error" : gin . H {
"type" : "upstream_error" ,
"message" : message ,
} ,
} )
return & UpstreamFailoverError {
StatusCode : http . StatusBadGateway ,
ResponseBody : body ,
}
}
2026-02-11 00:59:39 +08:00
func ( s * OpenAIGatewayService ) handleStreamingResponsePassthrough (
ctx context . Context ,
resp * http . Response ,
c * gin . Context ,
account * Account ,
startTime time . Time ,
2026-04-25 14:40:03 +08:00
originalModel string ,
mappedModel string ,
2026-02-11 00:59:39 +08:00
) ( * openaiStreamingResultPassthrough , error ) {
2026-02-28 15:01:20 +08:00
writeOpenAIPassthroughResponseHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2026-02-11 00:59:39 +08:00
// SSE headers
c . Header ( "Content-Type" , "text/event-stream" )
c . Header ( "Cache-Control" , "no-cache" )
c . Header ( "Connection" , "keep-alive" )
c . Header ( "X-Accel-Buffering" , "no" )
if v := resp . Header . Get ( "x-request-id" ) ; v != "" {
c . Header ( "x-request-id" , v )
}
w := c . Writer
flusher , ok := w . ( http . Flusher )
if ! ok {
return nil , errors . New ( "streaming not supported" )
}
usage := & OpenAIUsage { }
var firstTokenMs * int
2026-02-11 22:17:38 +08:00
clientDisconnected := false
2026-02-12 14:16:18 +08:00
sawDone := false
2026-03-12 16:53:18 +08:00
sawTerminalEvent := false
2026-04-25 15:09:40 +08:00
sawFailedEvent := false
failedMessage := ""
clientOutputStarted := false
2026-02-12 14:16:18 +08:00
upstreamRequestID := strings . TrimSpace ( resp . Header . Get ( "x-request-id" ) )
2026-04-25 15:09:40 +08:00
pendingLines := make ( [ ] string , 0 , 8 )
writePendingLines := func ( ) bool {
for _ , pending := range pendingLines {
if _ , err := fmt . Fprintln ( w , pending ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI passthrough] Client disconnected during streaming, continue draining upstream for usage: account=%d" , account . ID )
return false
}
}
pendingLines = pendingLines [ : 0 ]
return true
}
2026-02-11 00:59:39 +08:00
scanner := bufio . NewScanner ( resp . Body )
maxLineSize := defaultMaxLineSize
if s . cfg != nil && s . cfg . Gateway . MaxLineSize > 0 {
maxLineSize = s . cfg . Gateway . MaxLineSize
}
scanBuf := getSSEScannerBuf64K ( )
scanner . Buffer ( scanBuf [ : 0 ] , maxLineSize )
defer putSSEScannerBuf64K ( scanBuf )
2026-04-25 14:40:03 +08:00
needModelReplace := strings . TrimSpace ( originalModel ) != "" && strings . TrimSpace ( mappedModel ) != "" && strings . TrimSpace ( originalModel ) != strings . TrimSpace ( mappedModel )
2026-02-11 00:59:39 +08:00
for scanner . Scan ( ) {
line := scanner . Text ( )
2026-04-25 15:09:40 +08:00
lineStartsClientOutput := false
forceFlushFailedEvent := false
2026-02-12 09:41:37 +08:00
if data , ok := extractOpenAISSEDataLine ( line ) ; ok {
2026-02-28 15:01:20 +08:00
dataBytes := [ ] byte ( data )
2026-02-12 14:16:18 +08:00
trimmedData := strings . TrimSpace ( data )
2026-04-25 14:40:03 +08:00
if needModelReplace && strings . Contains ( data , mappedModel ) {
line = s . replaceModelInSSELine ( line , mappedModel , originalModel )
if replacedData , replaced := extractOpenAISSEDataLine ( line ) ; replaced {
dataBytes = [ ] byte ( replacedData )
trimmedData = strings . TrimSpace ( replacedData )
}
}
2026-04-25 15:09:40 +08:00
eventType := strings . TrimSpace ( gjson . Get ( trimmedData , "type" ) . String ( ) )
if eventType == "response.failed" {
failedMessage = extractOpenAISSEErrorMessage ( dataBytes )
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) && openAIStreamFailedEventShouldFailover ( dataBytes , failedMessage ) {
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } ,
s . newOpenAIStreamFailoverError ( c , account , true , upstreamRequestID , dataBytes , failedMessage )
}
forceFlushFailedEvent = true
sawFailedEvent = true
}
2026-02-12 14:16:18 +08:00
if trimmedData == "[DONE]" {
sawDone = true
}
2026-03-12 16:53:18 +08:00
if openAIStreamEventIsTerminal ( trimmedData ) {
sawTerminalEvent = true
}
2026-04-25 15:09:40 +08:00
lineStartsClientOutput = forceFlushFailedEvent || openAIStreamDataStartsClientOutput ( trimmedData , eventType )
if firstTokenMs == nil && lineStartsClientOutput && trimmedData != "[DONE]" {
2026-02-11 00:59:39 +08:00
ms := int ( time . Since ( startTime ) . Milliseconds ( ) )
firstTokenMs = & ms
}
2026-02-28 15:01:20 +08:00
s . parseSSEUsageBytes ( dataBytes , usage )
2026-02-11 00:59:39 +08:00
}
2026-02-11 22:17:38 +08:00
if ! clientDisconnected {
2026-04-25 15:09:40 +08:00
if ! clientOutputStarted && ! lineStartsClientOutput {
pendingLines = append ( pendingLines , line )
continue
}
if ! clientOutputStarted && len ( pendingLines ) > 0 {
if ! writePendingLines ( ) {
continue
}
}
2026-02-11 22:17:38 +08:00
if _ , err := fmt . Fprintln ( w , line ) ; err != nil {
clientDisconnected = true
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI passthrough] Client disconnected during streaming, continue draining upstream for usage: account=%d" , account . ID )
2026-02-11 22:17:38 +08:00
} else {
2026-04-25 15:09:40 +08:00
clientOutputStarted = true
2026-02-11 22:17:38 +08:00
flusher . Flush ( )
}
2026-02-11 00:59:39 +08:00
}
}
if err := scanner . Err ( ) ; err != nil {
2026-04-25 15:09:40 +08:00
if sawTerminalEvent && ! sawFailedEvent {
2026-02-11 22:17:38 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , nil
}
2026-04-25 15:09:40 +08:00
if sawFailedEvent {
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , fmt . Errorf ( "upstream response failed: %s" , failedMessage )
2026-03-12 16:53:18 +08:00
}
2026-02-11 00:59:39 +08:00
if errors . Is ( err , context . Canceled ) || errors . Is ( err , context . DeadlineExceeded ) {
2026-03-12 16:53:18 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , fmt . Errorf ( "stream usage incomplete: %w" , err )
2026-02-11 00:59:39 +08:00
}
if errors . Is ( err , bufio . ErrTooLong ) {
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "[OpenAI passthrough] SSE line too long: account=%d max_size=%d error=%v" , account . ID , maxLineSize , err )
2026-02-11 00:59:39 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , err
}
2026-04-25 15:09:40 +08:00
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) {
msg := "OpenAI stream disconnected before completion"
if errText := strings . TrimSpace ( err . Error ( ) ) ; errText != "" {
msg += ": " + errText
}
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } ,
s . newOpenAIStreamFailoverError ( c , account , true , upstreamRequestID , nil , msg )
}
if clientDisconnected {
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , fmt . Errorf ( "stream usage incomplete after disconnect: %w" , err )
}
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" ,
2026-02-13 19:27:07 +08:00
"[OpenAI passthrough] 流读取异常中断: account=%d request_id=%s err=%v" ,
2026-02-12 14:16:18 +08:00
account . ID ,
upstreamRequestID ,
err ,
)
2026-02-11 00:59:39 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , fmt . Errorf ( "stream read error: %w" , err )
}
2026-04-25 15:09:40 +08:00
if sawFailedEvent {
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , fmt . Errorf ( "upstream response failed: %s" , failedMessage )
}
2026-03-12 16:53:18 +08:00
if ! clientDisconnected && ! sawDone && ! sawTerminalEvent && ctx . Err ( ) == nil {
2026-02-12 19:01:09 +08:00
logger . FromContext ( ctx ) . With (
zap . String ( "component" , "service.openai_gateway" ) ,
zap . Int64 ( "account_id" , account . ID ) ,
zap . String ( "upstream_request_id" , upstreamRequestID ) ,
2026-02-13 19:27:07 +08:00
) . Info ( "OpenAI passthrough 上游流在未收到 [DONE] 时结束,疑似断流" )
2026-04-25 15:09:40 +08:00
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) {
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } ,
s . newOpenAIStreamFailoverError ( c , account , true , upstreamRequestID , nil , "OpenAI stream ended before a terminal event" )
}
2026-03-12 16:53:18 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , errors . New ( "stream usage incomplete: missing terminal event" )
2026-02-12 14:16:18 +08:00
}
2026-02-11 00:59:39 +08:00
return & openaiStreamingResultPassthrough { usage : usage , firstTokenMs : firstTokenMs } , nil
}
func ( s * OpenAIGatewayService ) handleNonStreamingResponsePassthrough (
ctx context . Context ,
resp * http . Response ,
c * gin . Context ,
2026-04-25 14:40:03 +08:00
originalModel string ,
mappedModel string ,
2026-02-11 00:59:39 +08:00
) ( * OpenAIUsage , error ) {
2026-04-16 01:53:22 +08:00
body , err := ReadUpstreamResponseBody ( resp . Body , s . cfg , c , openAITooLargeError )
2026-02-11 00:59:39 +08:00
if err != nil {
return nil , err
}
2026-04-07 22:49:14 +08:00
// Detect SSE responses from upstream and convert to JSON.
// Some upstreams (e.g. other sub2api instances) may return SSE even when
// stream=false was requested. Without this conversion the client would
// receive raw SSE text or a terminal event with empty output.
if isEventStreamResponse ( resp . Header ) {
2026-04-25 14:40:03 +08:00
return s . handlePassthroughSSEToJSON ( resp , c , body , originalModel , mappedModel )
2026-04-07 22:49:14 +08:00
}
2026-02-11 00:59:39 +08:00
usage := & OpenAIUsage { }
usageParsed := false
if len ( body ) > 0 {
2026-02-28 15:01:20 +08:00
if parsedUsage , ok := extractOpenAIUsageFromJSONBytes ( body ) ; ok {
* usage = parsedUsage
2026-02-11 00:59:39 +08:00
usageParsed = true
}
}
if ! usageParsed {
// 兜底:尝试从 SSE 文本中解析 usage
usage = s . parseSSEUsageFromBody ( string ( body ) )
}
2026-02-28 15:01:20 +08:00
writeOpenAIPassthroughResponseHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2026-02-11 00:59:39 +08:00
contentType := resp . Header . Get ( "Content-Type" )
if contentType == "" {
contentType = "application/json"
}
2026-04-25 14:40:03 +08:00
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
body = s . replaceModelInResponseBody ( body , mappedModel , originalModel )
}
2026-02-11 00:59:39 +08:00
c . Data ( resp . StatusCode , contentType , body )
return usage , nil
}
2026-04-07 22:49:14 +08:00
// handlePassthroughSSEToJSON converts an SSE response body into a JSON
2026-04-25 14:40:03 +08:00
// response for the passthrough path. It mirrors handleSSEToJSON while
// preserving passthrough payloads, except compact-only model remapping may
// rewrite model fields back to the original requested model.
func ( s * OpenAIGatewayService ) handlePassthroughSSEToJSON ( resp * http . Response , c * gin . Context , body [ ] byte , originalModel string , mappedModel string ) ( * OpenAIUsage , error ) {
2026-04-07 22:49:14 +08:00
bodyText := string ( body )
finalResponse , ok := extractCodexFinalResponse ( bodyText )
usage := & OpenAIUsage { }
if ok {
if parsedUsage , parsed := extractOpenAIUsageFromJSONBytes ( finalResponse ) ; parsed {
* usage = parsedUsage
}
// When the terminal event has an empty output array, reconstruct
// output from accumulated delta events so the client gets full content.
if len ( gjson . GetBytes ( finalResponse , "output" ) . Array ( ) ) == 0 {
if outputJSON , reconstructed := reconstructResponseOutputFromSSE ( bodyText ) ; reconstructed {
if patched , err := sjson . SetRawBytes ( finalResponse , "output" , outputJSON ) ; err == nil {
finalResponse = patched
}
}
}
body = finalResponse
2026-04-25 14:40:03 +08:00
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
body = s . replaceModelInResponseBody ( body , mappedModel , originalModel )
}
2026-04-07 22:49:14 +08:00
// Correct tool calls in final response
body = s . correctToolCallsInResponseBody ( body )
} else {
terminalType , terminalPayload , terminalOK := extractOpenAISSETerminalEvent ( bodyText )
if terminalOK && terminalType == "response.failed" {
msg := extractOpenAISSEErrorMessage ( terminalPayload )
if msg == "" {
msg = "Upstream compact response failed"
}
return nil , s . writeOpenAINonStreamingProtocolError ( resp , c , msg )
}
usage = s . parseSSEUsageFromBody ( bodyText )
2026-04-25 14:40:03 +08:00
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
bodyText = s . replaceModelInSSEBody ( bodyText , mappedModel , originalModel )
}
body = [ ] byte ( bodyText )
2026-04-07 22:49:14 +08:00
}
writeOpenAIPassthroughResponseHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
contentType := "application/json; charset=utf-8"
if ! ok {
contentType = resp . Header . Get ( "Content-Type" )
if contentType == "" {
contentType = "text/event-stream"
}
}
c . Data ( resp . StatusCode , contentType , body )
return usage , nil
}
2026-02-28 15:01:20 +08:00
func writeOpenAIPassthroughResponseHeaders ( dst http . Header , src http . Header , filter * responseheaders . CompiledHeaderFilter ) {
2026-02-11 00:59:39 +08:00
if dst == nil || src == nil {
return
}
2026-02-28 15:01:20 +08:00
if filter != nil {
responseheaders . WriteFilteredHeaders ( dst , src , filter )
2026-02-11 00:59:39 +08:00
} else {
// 兜底:尽量保留最基础的 content-type
if v := strings . TrimSpace ( src . Get ( "Content-Type" ) ) ; v != "" {
dst . Set ( "Content-Type" , v )
}
}
// 透传模式强制放行 x-codex-* 响应头(若上游返回)。
// 注意:真实 http.Response.Header 的 key 一般会被 canonicalize; 但为了兼容测试/自建响应,
// 这里用 EqualFold 做一次大小写不敏感的查找。
getCaseInsensitiveValues := func ( h http . Header , want string ) [ ] string {
if h == nil {
return nil
}
for k , vals := range h {
if strings . EqualFold ( k , want ) {
return vals
}
}
return nil
}
for _ , rawKey := range [ ] string {
"x-codex-primary-used-percent" ,
"x-codex-primary-reset-after-seconds" ,
"x-codex-primary-window-minutes" ,
"x-codex-secondary-used-percent" ,
"x-codex-secondary-reset-after-seconds" ,
"x-codex-secondary-window-minutes" ,
"x-codex-primary-over-secondary-limit-percent" ,
} {
vals := getCaseInsensitiveValues ( src , rawKey )
if len ( vals ) == 0 {
continue
}
key := http . CanonicalHeaderKey ( rawKey )
dst . Del ( key )
for _ , v := range vals {
dst . Add ( key , v )
}
}
}
2026-01-10 20:53:16 +08:00
func ( s * OpenAIGatewayService ) buildUpstreamRequest ( ctx context . Context , c * gin . Context , account * Account , body [ ] byte , token string , isStream bool , promptCacheKey string , isCodexCLI bool ) ( * http . Request , error ) {
2025-12-22 22:58:31 +08:00
// Determine target URL based on account type
var targetURL string
2025-12-23 10:25:32 +08:00
switch account . Type {
2025-12-26 15:40:24 +08:00
case AccountTypeOAuth :
2025-12-22 22:58:31 +08:00
// OAuth accounts use ChatGPT internal API
targetURL = chatgptCodexURL
2026-01-04 19:27:53 +08:00
case AccountTypeAPIKey :
2025-12-22 22:58:31 +08:00
// API Key accounts use Platform API or custom base URL
baseURL := account . GetOpenAIBaseURL ( )
2026-01-02 17:40:57 +08:00
if baseURL == "" {
2025-12-22 22:58:31 +08:00
targetURL = openaiPlatformAPIURL
2026-01-02 17:40:57 +08:00
} else {
validatedURL , err := s . validateUpstreamBaseURL ( baseURL )
if err != nil {
return nil , err
}
2026-02-12 10:56:07 +08:00
targetURL = buildOpenAIResponsesURL ( validatedURL )
2025-12-22 22:58:31 +08:00
}
2025-12-23 10:25:32 +08:00
default :
2025-12-22 22:58:31 +08:00
targetURL = openaiPlatformAPIURL
}
2026-03-06 18:50:28 +08:00
targetURL = appendOpenAIResponsesRequestPathSuffix ( targetURL , openAIResponsesRequestPathSuffix ( c ) )
2025-12-22 22:58:31 +08:00
req , err := http . NewRequestWithContext ( ctx , "POST" , targetURL , bytes . NewReader ( body ) )
if err != nil {
return nil , err
}
// Set authentication header
req . Header . Set ( "authorization" , "Bearer " + token )
// Set headers specific to OAuth accounts (ChatGPT internal API)
2025-12-26 15:40:24 +08:00
if account . Type == AccountTypeOAuth {
2025-12-22 22:58:31 +08:00
// Required: set Host for ChatGPT API (must use req.Host, not Header.Set)
req . Host = "chatgpt.com"
// Required: set chatgpt-account-id header
chatgptAccountID := account . GetChatGPTAccountID ( )
if chatgptAccountID != "" {
req . Header . Set ( "chatgpt-account-id" , chatgptAccountID )
}
}
// Whitelist passthrough headers
for key , values := range c . Request . Header {
lowerKey := strings . ToLower ( key )
if openaiAllowedHeaders [ lowerKey ] {
for _ , v := range values {
req . Header . Add ( key , v )
}
}
}
2026-01-09 18:35:58 +08:00
if account . Type == AccountTypeOAuth {
2026-03-16 10:28:11 +08:00
// 清除客户端透传的 session 头,后续用隔离后的值重新设置,防止跨用户会话碰撞。
req . Header . Del ( "conversation_id" )
req . Header . Del ( "session_id" )
2026-01-09 18:35:58 +08:00
req . Header . Set ( "OpenAI-Beta" , "responses=experimental" )
2026-03-07 14:12:38 +08:00
req . Header . Set ( "originator" , resolveOpenAIUpstreamOriginator ( c , isCodexCLI ) )
2026-03-16 10:28:11 +08:00
apiKeyID := getAPIKeyIDFromContext ( c )
2026-03-06 18:50:28 +08:00
if isOpenAIResponsesCompactPath ( c ) {
req . Header . Set ( "accept" , "application/json" )
if req . Header . Get ( "version" ) == "" {
req . Header . Set ( "version" , codexCLIVersion )
}
2026-03-16 10:28:11 +08:00
compactSession := resolveOpenAICompactSessionID ( c )
req . Header . Set ( "session_id" , isolateOpenAISessionID ( apiKeyID , compactSession ) )
2026-03-06 18:50:28 +08:00
} else {
req . Header . Set ( "accept" , "text/event-stream" )
}
2026-01-09 18:35:58 +08:00
if promptCacheKey != "" {
2026-03-16 10:28:11 +08:00
isolated := isolateOpenAISessionID ( apiKeyID , promptCacheKey )
req . Header . Set ( "conversation_id" , isolated )
req . Header . Set ( "session_id" , isolated )
2026-01-09 18:35:58 +08:00
}
}
2025-12-22 22:58:31 +08:00
// Apply custom User-Agent if configured
customUA := account . GetOpenAIUserAgent ( )
if customUA != "" {
req . Header . Set ( "user-agent" , customUA )
}
2026-02-07 09:21:15 +08:00
// 若开启 ForceCodexCLI, 则强制将上游 User-Agent 伪装为 Codex CLI。
// 用于网关未透传/改写 User-Agent 时,仍能命中 Codex 侧识别逻辑。
if s . cfg != nil && s . cfg . Gateway . ForceCodexCLI {
2026-02-12 20:12:15 +08:00
req . Header . Set ( "user-agent" , codexCLIUserAgent )
2026-02-07 09:21:15 +08:00
}
2025-12-22 22:58:31 +08:00
// Ensure required headers exist
if req . Header . Get ( "content-type" ) == "" {
req . Header . Set ( "content-type" , "application/json" )
}
return req , nil
}
2026-02-13 19:27:07 +08:00
func ( s * OpenAIGatewayService ) handleErrorResponse (
ctx context . Context ,
resp * http . Response ,
c * gin . Context ,
account * Account ,
requestBody [ ] byte ,
) ( * OpenAIForwardResult , error ) {
2026-01-11 11:49:34 +08:00
body , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
upstreamMsg := strings . TrimSpace ( extractUpstreamErrorMessage ( body ) )
upstreamMsg = sanitizeUpstreamErrorMessage ( upstreamMsg )
upstreamDetail := ""
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString ( string ( body ) , maxBytes )
}
setOpsUpstreamError ( c , resp . StatusCode , upstreamMsg , upstreamDetail )
2026-02-13 19:27:07 +08:00
logOpenAIInstructionsRequiredDebug ( ctx , c , account , resp . StatusCode , upstreamMsg , requestBody , body )
2026-01-11 11:49:34 +08:00
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" ,
2026-01-11 11:49:34 +08:00
"OpenAI upstream error %d (account=%d platform=%s type=%s): %s" ,
resp . StatusCode ,
account . ID ,
account . Platform ,
account . Type ,
truncateForLog ( body , s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes ) ,
)
}
2025-12-22 22:58:31 +08:00
2026-02-07 14:24:55 +08:00
if status , errType , errMsg , matched := applyErrorPassthroughRule (
c ,
PlatformOpenAI ,
resp . StatusCode ,
body ,
http . StatusBadGateway ,
"upstream_error" ,
"Upstream request failed" ,
) ; matched {
c . JSON ( status , gin . H {
"error" : gin . H {
"type" : errType ,
"message" : errMsg ,
} ,
} )
if upstreamMsg == "" {
upstreamMsg = errMsg
}
if upstreamMsg == "" {
return nil , fmt . Errorf ( "upstream error: %d (passthrough rule matched)" , resp . StatusCode )
}
return nil , fmt . Errorf ( "upstream error: %d (passthrough rule matched) message=%s" , resp . StatusCode , upstreamMsg )
}
2025-12-22 22:58:31 +08:00
// Check custom error codes
if ! account . ShouldHandleErrorCode ( resp . StatusCode ) {
2026-01-11 15:30:27 +08:00
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
2026-01-15 15:14:44 +08:00
AccountName : account . Name ,
2026-01-11 15:30:27 +08:00
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Kind : "http_error" ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
} )
2025-12-22 22:58:31 +08:00
c . JSON ( http . StatusInternalServerError , gin . H {
"error" : gin . H {
"type" : "upstream_error" ,
"message" : "Upstream gateway error" ,
} ,
} )
2026-01-11 11:49:34 +08:00
if upstreamMsg == "" {
return nil , fmt . Errorf ( "upstream error: %d (not in custom error codes)" , resp . StatusCode )
}
return nil , fmt . Errorf ( "upstream error: %d (not in custom error codes) message=%s" , resp . StatusCode , upstreamMsg )
2025-12-22 22:58:31 +08:00
}
// Handle upstream error (mark account status)
2026-01-03 06:37:08 -08:00
shouldDisable := false
if s . rateLimitService != nil {
shouldDisable = s . rateLimitService . HandleUpstreamError ( ctx , account , resp . StatusCode , resp . Header , body )
}
2026-01-11 15:30:27 +08:00
kind := "http_error"
if shouldDisable {
kind = "failover"
}
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
2026-01-15 15:14:44 +08:00
AccountName : account . Name ,
2026-01-11 15:30:27 +08:00
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Kind : kind ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
} )
2026-01-03 06:37:08 -08:00
if shouldDisable {
2026-03-08 13:57:23 +08:00
return nil , & UpstreamFailoverError {
StatusCode : resp . StatusCode ,
ResponseBody : body ,
RetryableOnSameAccount : account . IsPoolMode ( ) && isPoolModeRetryableStatus ( resp . StatusCode ) ,
}
2026-01-03 06:37:08 -08:00
}
2025-12-22 22:58:31 +08:00
// Return appropriate error response
var errType , errMsg string
var statusCode int
switch resp . StatusCode {
case 401 :
statusCode = http . StatusBadGateway
errType = "upstream_error"
errMsg = "Upstream authentication failed, please contact administrator"
2025-12-31 11:46:53 +08:00
case 402 :
statusCode = http . StatusBadGateway
errType = "upstream_error"
errMsg = "Upstream payment required: insufficient balance or billing issue"
2025-12-22 22:58:31 +08:00
case 403 :
statusCode = http . StatusBadGateway
errType = "upstream_error"
errMsg = "Upstream access forbidden, please contact administrator"
case 429 :
statusCode = http . StatusTooManyRequests
errType = "rate_limit_error"
errMsg = "Upstream rate limit exceeded, please retry later"
default :
statusCode = http . StatusBadGateway
errType = "upstream_error"
errMsg = "Upstream request failed"
}
c . JSON ( statusCode , gin . H {
"error" : gin . H {
"type" : errType ,
"message" : errMsg ,
} ,
} )
2026-01-11 11:49:34 +08:00
if upstreamMsg == "" {
return nil , fmt . Errorf ( "upstream error: %d" , resp . StatusCode )
}
return nil , fmt . Errorf ( "upstream error: %d message=%s" , resp . StatusCode , upstreamMsg )
2025-12-22 22:58:31 +08:00
}
2026-03-11 22:10:22 +08:00
// compatErrorWriter is the signature for format-specific error writers used by
// the compat paths (Chat Completions and Anthropic Messages).
type compatErrorWriter func ( c * gin . Context , statusCode int , errType , message string )
// handleCompatErrorResponse is the shared non-failover error handler for the
// Chat Completions and Anthropic Messages compat paths. It mirrors the logic of
// handleErrorResponse (passthrough rules, ShouldHandleErrorCode, rate-limit
// tracking, secondary failover) but delegates the final error write to the
// format-specific writer function.
func ( s * OpenAIGatewayService ) handleCompatErrorResponse (
resp * http . Response ,
c * gin . Context ,
account * Account ,
writeError compatErrorWriter ,
) ( * OpenAIForwardResult , error ) {
body , _ := io . ReadAll ( io . LimitReader ( resp . Body , 2 << 20 ) )
upstreamMsg := strings . TrimSpace ( extractUpstreamErrorMessage ( body ) )
if upstreamMsg == "" {
upstreamMsg = fmt . Sprintf ( "Upstream error: %d" , resp . StatusCode )
}
upstreamMsg = sanitizeUpstreamErrorMessage ( upstreamMsg )
upstreamDetail := ""
if s . cfg != nil && s . cfg . Gateway . LogUpstreamErrorBody {
maxBytes := s . cfg . Gateway . LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString ( string ( body ) , maxBytes )
}
setOpsUpstreamError ( c , resp . StatusCode , upstreamMsg , upstreamDetail )
// Apply error passthrough rules
if status , errType , errMsg , matched := applyErrorPassthroughRule (
c , account . Platform , resp . StatusCode , body ,
http . StatusBadGateway , "api_error" , "Upstream request failed" ,
) ; matched {
writeError ( c , status , errType , errMsg )
if upstreamMsg == "" {
upstreamMsg = errMsg
}
if upstreamMsg == "" {
return nil , fmt . Errorf ( "upstream error: %d (passthrough rule matched)" , resp . StatusCode )
}
return nil , fmt . Errorf ( "upstream error: %d (passthrough rule matched) message=%s" , resp . StatusCode , upstreamMsg )
}
// Check custom error codes — if the account does not handle this status,
// return a generic error without exposing upstream details.
if ! account . ShouldHandleErrorCode ( resp . StatusCode ) {
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Kind : "http_error" ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
} )
writeError ( c , http . StatusInternalServerError , "api_error" , "Upstream gateway error" )
if upstreamMsg == "" {
return nil , fmt . Errorf ( "upstream error: %d (not in custom error codes)" , resp . StatusCode )
}
return nil , fmt . Errorf ( "upstream error: %d (not in custom error codes) message=%s" , resp . StatusCode , upstreamMsg )
}
// Track rate limits and decide whether to trigger secondary failover.
shouldDisable := false
if s . rateLimitService != nil {
shouldDisable = s . rateLimitService . HandleUpstreamError (
c . Request . Context ( ) , account , resp . StatusCode , resp . Header , body ,
)
}
kind := "http_error"
if shouldDisable {
kind = "failover"
}
appendOpsUpstreamError ( c , OpsUpstreamErrorEvent {
Platform : account . Platform ,
AccountID : account . ID ,
AccountName : account . Name ,
UpstreamStatusCode : resp . StatusCode ,
UpstreamRequestID : resp . Header . Get ( "x-request-id" ) ,
Kind : kind ,
Message : upstreamMsg ,
Detail : upstreamDetail ,
} )
if shouldDisable {
return nil , & UpstreamFailoverError {
StatusCode : resp . StatusCode ,
ResponseBody : body ,
RetryableOnSameAccount : account . IsPoolMode ( ) && isPoolModeRetryableStatus ( resp . StatusCode ) ,
}
}
// Map status code to error type and write response
errType := "api_error"
switch {
case resp . StatusCode == 400 :
errType = "invalid_request_error"
case resp . StatusCode == 404 :
errType = "not_found_error"
case resp . StatusCode == 429 :
errType = "rate_limit_error"
case resp . StatusCode >= 500 :
errType = "api_error"
}
writeError ( c , resp . StatusCode , errType , upstreamMsg )
return nil , fmt . Errorf ( "upstream error: %d %s" , resp . StatusCode , upstreamMsg )
}
2025-12-22 22:58:31 +08:00
// openaiStreamingResult streaming response result
type openaiStreamingResult struct {
usage * OpenAIUsage
firstTokenMs * int
}
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) handleStreamingResponse ( ctx context . Context , resp * http . Response , c * gin . Context , account * Account , startTime time . Time , originalModel , mappedModel string ) ( * openaiStreamingResult , error ) {
2026-02-28 15:01:20 +08:00
if s . responseHeaderFilter != nil {
responseheaders . WriteFilteredHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2026-01-05 13:54:43 +08:00
}
2025-12-22 22:58:31 +08:00
// Set SSE response headers
c . Header ( "Content-Type" , "text/event-stream" )
c . Header ( "Cache-Control" , "no-cache" )
c . Header ( "Connection" , "keep-alive" )
c . Header ( "X-Accel-Buffering" , "no" )
// Pass through other headers
if v := resp . Header . Get ( "x-request-id" ) ; v != "" {
c . Header ( "x-request-id" , v )
}
w := c . Writer
flusher , ok := w . ( http . Flusher )
if ! ok {
return nil , errors . New ( "streaming not supported" )
}
2026-02-28 15:01:20 +08:00
bufferedWriter := bufio . NewWriterSize ( w , 4 * 1024 )
flushBuffered := func ( ) error {
if err := bufferedWriter . Flush ( ) ; err != nil {
return err
}
flusher . Flush ( )
return nil
}
2025-12-22 22:58:31 +08:00
usage := & OpenAIUsage { }
var firstTokenMs * int
scanner := bufio . NewScanner ( resp . Body )
2026-01-04 19:49:59 +08:00
maxLineSize := defaultMaxLineSize
if s . cfg != nil && s . cfg . Gateway . MaxLineSize > 0 {
maxLineSize = s . cfg . Gateway . MaxLineSize
}
2026-02-06 22:55:12 +08:00
scanBuf := getSSEScannerBuf64K ( )
scanner . Buffer ( scanBuf [ : 0 ] , maxLineSize )
2026-01-04 19:49:59 +08:00
streamInterval := time . Duration ( 0 )
if s . cfg != nil && s . cfg . Gateway . StreamDataIntervalTimeout > 0 {
streamInterval = time . Duration ( s . cfg . Gateway . StreamDataIntervalTimeout ) * time . Second
}
2026-01-04 20:19:07 +08:00
// 仅监控上游数据间隔超时,不被下游写入阻塞影响
var intervalTicker * time . Ticker
2026-01-04 19:49:59 +08:00
if streamInterval > 0 {
2026-01-04 20:19:07 +08:00
intervalTicker = time . NewTicker ( streamInterval )
defer intervalTicker . Stop ( )
2026-01-04 19:49:59 +08:00
}
var intervalCh <- chan time . Time
2026-01-04 20:19:07 +08:00
if intervalTicker != nil {
intervalCh = intervalTicker . C
2026-01-04 19:49:59 +08:00
}
keepaliveInterval := time . Duration ( 0 )
if s . cfg != nil && s . cfg . Gateway . StreamKeepaliveInterval > 0 {
keepaliveInterval = time . Duration ( s . cfg . Gateway . StreamKeepaliveInterval ) * time . Second
}
// 下游 keepalive 仅用于防止代理空闲断开
var keepaliveTicker * time . Ticker
if keepaliveInterval > 0 {
keepaliveTicker = time . NewTicker ( keepaliveInterval )
defer keepaliveTicker . Stop ( )
}
var keepaliveCh <- chan time . Time
if keepaliveTicker != nil {
keepaliveCh = keepaliveTicker . C
}
2026-04-25 12:11:27 +00:00
// Track downstream writes separately from upstream reads: pre-output failover
// can buffer response.created / response.in_progress, so keepalive must be
// based on downstream idle time.
lastDownstreamWriteAt := time . Now ( )
2026-01-04 19:49:59 +08:00
2026-01-19 13:53:39 +08:00
// 仅发送一次错误事件,避免多次写入导致协议混乱。
// 注意: OpenAI `/v1/responses` streaming 事件必须符合 OpenAI Responses schema;
// 否则下游 SDK( 例如 OpenCode) 会因为类型校验失败而报错。
2026-01-04 19:49:59 +08:00
errorEventSent := false
2026-01-15 21:51:14 +08:00
clientDisconnected := false // 客户端断开后继续 drain 上游以收集 usage
2026-03-12 16:53:18 +08:00
sawTerminalEvent := false
2026-04-25 15:09:40 +08:00
sawFailedEvent := false
failedMessage := ""
clientOutputStarted := false
upstreamRequestID := strings . TrimSpace ( resp . Header . Get ( "x-request-id" ) )
var streamFailoverErr error
2026-01-04 19:49:59 +08:00
sendErrorEvent := func ( reason string ) {
2026-01-15 21:51:14 +08:00
if errorEventSent || clientDisconnected {
2026-01-04 19:49:59 +08:00
return
}
errorEventSent = true
2026-02-28 15:01:20 +08:00
payload := ` { "type":"error","sequence_number":0,"error": { "type":"upstream_error","message": ` + strconv . Quote ( reason ) + ` ,"code": ` + strconv . Quote ( reason ) + ` }} `
if err := flushBuffered ( ) ; err != nil {
clientDisconnected = true
return
}
if _ , err := bufferedWriter . WriteString ( "data: " + payload + "\n\n" ) ; err != nil {
clientDisconnected = true
return
2026-01-19 13:53:39 +08:00
}
2026-02-28 15:01:20 +08:00
if err := flushBuffered ( ) ; err != nil {
clientDisconnected = true
2026-04-25 15:09:40 +08:00
return
2026-01-19 13:53:39 +08:00
}
2026-04-25 15:09:40 +08:00
clientOutputStarted = true
2026-04-25 12:11:27 +00:00
lastDownstreamWriteAt = time . Now ( )
2026-01-04 19:49:59 +08:00
}
2025-12-22 22:58:31 +08:00
needModelReplace := originalModel != mappedModel
2026-02-28 15:01:20 +08:00
resultWithUsage := func ( ) * openaiStreamingResult {
return & openaiStreamingResult { usage : usage , firstTokenMs : firstTokenMs }
}
finalizeStream := func ( ) ( * openaiStreamingResult , error ) {
2026-04-25 15:09:40 +08:00
if ! sawTerminalEvent {
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) {
return resultWithUsage ( ) , s . newOpenAIStreamFailoverError (
c ,
account ,
false ,
upstreamRequestID ,
nil ,
"OpenAI stream ended before a terminal event" ,
)
}
return resultWithUsage ( ) , fmt . Errorf ( "stream usage incomplete: missing terminal event" )
}
if sawFailedEvent {
return resultWithUsage ( ) , fmt . Errorf ( "upstream response failed: %s" , failedMessage )
}
2026-02-28 15:01:20 +08:00
if ! clientDisconnected {
2026-04-25 15:09:40 +08:00
hadBufferedData := bufferedWriter . Buffered ( ) > 0
2026-02-28 15:01:20 +08:00
if err := flushBuffered ( ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during final flush, returning collected usage" )
2026-04-25 15:09:40 +08:00
} else if hadBufferedData {
clientOutputStarted = true
2026-04-25 12:11:27 +00:00
lastDownstreamWriteAt = time . Now ( )
2026-01-04 19:49:59 +08:00
}
2026-02-28 15:01:20 +08:00
}
return resultWithUsage ( ) , nil
}
handleScanErr := func ( scanErr error ) ( * openaiStreamingResult , error , bool ) {
if scanErr == nil {
return nil , nil , false
}
2026-04-25 15:09:40 +08:00
if sawTerminalEvent && ! sawFailedEvent {
2026-03-12 16:53:18 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "Upstream scan ended after terminal event: %v" , scanErr )
return resultWithUsage ( ) , nil , true
}
2026-04-25 15:09:40 +08:00
if sawFailedEvent {
return resultWithUsage ( ) , fmt . Errorf ( "upstream response failed: %s" , failedMessage ) , true
}
2026-02-28 15:01:20 +08:00
// 客户端断开/取消请求时,上游读取往往会返回 context canceled。
// /v1/responses 的 SSE 事件必须符合 OpenAI 协议;这里不注入自定义 error event, 避免下游 SDK 解析失败。
if errors . Is ( scanErr , context . Canceled ) || errors . Is ( scanErr , context . DeadlineExceeded ) {
2026-03-12 16:53:18 +08:00
return resultWithUsage ( ) , fmt . Errorf ( "stream usage incomplete: %w" , scanErr ) , true
2026-02-28 15:01:20 +08:00
}
if errors . Is ( scanErr , bufio . ErrTooLong ) {
logger . LegacyPrintf ( "service.openai_gateway" , "SSE line too long: account=%d max_size=%d error=%v" , account . ID , maxLineSize , scanErr )
sendErrorEvent ( "response_too_large" )
return resultWithUsage ( ) , scanErr , true
}
2026-04-25 15:09:40 +08:00
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) {
msg := "OpenAI stream disconnected before completion"
if errText := strings . TrimSpace ( scanErr . Error ( ) ) ; errText != "" {
msg += ": " + errText
}
return resultWithUsage ( ) , s . newOpenAIStreamFailoverError ( c , account , false , upstreamRequestID , nil , msg ) , true
}
// 客户端已断开时,上游出错仅影响体验,不影响计费;返回已收集 usage
if clientDisconnected {
return resultWithUsage ( ) , fmt . Errorf ( "stream usage incomplete after disconnect: %w" , scanErr ) , true
}
2026-02-28 15:01:20 +08:00
sendErrorEvent ( "stream_read_error" )
return resultWithUsage ( ) , fmt . Errorf ( "stream read error: %w" , scanErr ) , true
}
processSSELine := func ( line string , queueDrained bool ) {
2026-04-25 15:09:40 +08:00
if streamFailoverErr != nil {
return
}
2026-02-28 15:01:20 +08:00
// Extract data from SSE line (supports both "data: " and "data:" formats)
if data , ok := extractOpenAISSEDataLine ( line ) ; ok {
2026-01-04 19:49:59 +08:00
2026-02-28 15:01:20 +08:00
// Replace model in response if needed.
// Fast path: most events do not contain model field values.
if needModelReplace && mappedModel != "" && strings . Contains ( data , mappedModel ) {
line = s . replaceModelInSSELine ( line , mappedModel , originalModel )
}
2026-01-04 19:49:59 +08:00
2026-02-28 15:01:20 +08:00
dataBytes := [ ] byte ( data )
2026-03-12 16:53:18 +08:00
if openAIStreamEventIsTerminal ( data ) {
sawTerminalEvent = true
}
2026-04-25 15:09:40 +08:00
eventType := strings . TrimSpace ( gjson . GetBytes ( dataBytes , "type" ) . String ( ) )
forceFlushFailedEvent := false
if eventType == "response.failed" {
failedMessage = extractOpenAISSEErrorMessage ( dataBytes )
if ! openAIStreamClientOutputStarted ( c , clientOutputStarted ) && openAIStreamFailedEventShouldFailover ( dataBytes , failedMessage ) {
sawFailedEvent = true
streamFailoverErr = s . newOpenAIStreamFailoverError ( c , account , false , upstreamRequestID , dataBytes , failedMessage )
return
}
forceFlushFailedEvent = true
sawFailedEvent = true
}
2026-01-15 23:52:50 +08:00
2026-02-28 15:01:20 +08:00
// Correct Codex tool calls if needed (apply_patch -> edit, etc.)
if correctedData , corrected := s . toolCorrector . CorrectToolCallsInSSEBytes ( dataBytes ) ; corrected {
dataBytes = correctedData
data = string ( correctedData )
line = "data: " + data
2026-04-25 15:09:40 +08:00
eventType = strings . TrimSpace ( gjson . GetBytes ( dataBytes , "type" ) . String ( ) )
2026-02-28 15:01:20 +08:00
}
2026-04-25 15:09:40 +08:00
startsClientOutput := forceFlushFailedEvent || openAIStreamDataStartsClientOutput ( data , eventType )
2026-01-04 19:49:59 +08:00
2026-02-28 15:01:20 +08:00
// 写入客户端(客户端断开后继续 drain 上游)
if ! clientDisconnected {
2026-04-25 15:09:40 +08:00
shouldFlush := queueDrained && ( clientOutputStarted || startsClientOutput )
if firstTokenMs == nil && startsClientOutput {
2026-02-28 15:01:20 +08:00
// 保证首个 token 事件尽快出站,避免影响 TTFT。
shouldFlush = true
2026-01-04 19:49:59 +08:00
}
2026-02-28 15:01:20 +08:00
if _ , err := bufferedWriter . WriteString ( line ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming, continuing to drain upstream for billing" )
} else if _ , err := bufferedWriter . WriteString ( "\n" ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming, continuing to drain upstream for billing" )
} else if shouldFlush {
if err := flushBuffered ( ) ; err != nil {
2026-01-15 21:51:14 +08:00
clientDisconnected = true
2026-02-28 15:01:20 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming flush, continuing to drain upstream for billing" )
2026-04-25 15:09:40 +08:00
} else {
clientOutputStarted = true
2026-04-25 12:11:27 +00:00
lastDownstreamWriteAt = time . Now ( )
2026-01-15 21:51:14 +08:00
}
2026-01-04 19:49:59 +08:00
}
2025-12-26 03:49:55 -08:00
}
2025-12-22 22:58:31 +08:00
2026-02-28 15:01:20 +08:00
// Record first token time
2026-04-25 15:09:40 +08:00
if firstTokenMs == nil && startsClientOutput {
2026-02-28 15:01:20 +08:00
ms := int ( time . Since ( startTime ) . Milliseconds ( ) )
firstTokenMs = & ms
}
s . parseSSEUsageBytes ( dataBytes , usage )
return
}
// Forward non-data lines as-is
if ! clientDisconnected {
if _ , err := bufferedWriter . WriteString ( line ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming, continuing to drain upstream for billing" )
} else if _ , err := bufferedWriter . WriteString ( "\n" ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming, continuing to drain upstream for billing" )
2026-04-25 15:09:40 +08:00
} else if queueDrained && clientOutputStarted {
2026-02-28 15:01:20 +08:00
if err := flushBuffered ( ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming flush, continuing to drain upstream for billing" )
2026-04-25 15:09:40 +08:00
} else {
clientOutputStarted = true
2026-04-25 12:11:27 +00:00
lastDownstreamWriteAt = time . Now ( )
2026-02-28 15:01:20 +08:00
}
}
}
}
// 无超时/无 keepalive 的常见路径走同步扫描,减少 goroutine 与 channel 开销。
if streamInterval <= 0 && keepaliveInterval <= 0 {
defer putSSEScannerBuf64K ( scanBuf )
for scanner . Scan ( ) {
processSSELine ( scanner . Text ( ) , true )
2026-04-25 15:09:40 +08:00
if streamFailoverErr != nil {
return resultWithUsage ( ) , streamFailoverErr
}
2026-02-28 15:01:20 +08:00
}
if result , err , done := handleScanErr ( scanner . Err ( ) ) ; done {
return result , err
}
return finalizeStream ( )
}
type scanEvent struct {
line string
err error
}
// 独立 goroutine 读取上游,避免读取阻塞影响 keepalive/超时处理
events := make ( chan scanEvent , 16 )
done := make ( chan struct { } )
sendEvent := func ( ev scanEvent ) bool {
select {
case events <- ev :
return true
case <- done :
return false
}
}
var lastReadAt int64
atomic . StoreInt64 ( & lastReadAt , time . Now ( ) . UnixNano ( ) )
go func ( scanBuf * sseScannerBuf64K ) {
defer putSSEScannerBuf64K ( scanBuf )
defer close ( events )
for scanner . Scan ( ) {
atomic . StoreInt64 ( & lastReadAt , time . Now ( ) . UnixNano ( ) )
if ! sendEvent ( scanEvent { line : scanner . Text ( ) } ) {
return
}
}
if err := scanner . Err ( ) ; err != nil {
_ = sendEvent ( scanEvent { err : err } )
}
} ( scanBuf )
defer close ( done )
for {
select {
case ev , ok := <- events :
if ! ok {
return finalizeStream ( )
}
if result , err , done := handleScanErr ( ev . err ) ; done {
return result , err
}
processSSELine ( ev . line , len ( events ) == 0 )
2026-04-25 15:09:40 +08:00
if streamFailoverErr != nil {
return resultWithUsage ( ) , streamFailoverErr
}
2026-02-28 15:01:20 +08:00
2026-01-04 19:49:59 +08:00
case <- intervalCh :
2026-01-04 20:19:07 +08:00
lastRead := time . Unix ( 0 , atomic . LoadInt64 ( & lastReadAt ) )
if time . Since ( lastRead ) < streamInterval {
continue
}
2026-01-15 21:51:14 +08:00
if clientDisconnected {
2026-03-12 16:53:18 +08:00
return resultWithUsage ( ) , fmt . Errorf ( "stream usage incomplete after timeout" )
2026-01-15 21:51:14 +08:00
}
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "Stream data interval timeout: account=%d model=%s interval=%s" , account . ID , originalModel , streamInterval )
2026-01-11 21:54:52 -08:00
// 处理流超时,可能标记账户为临时不可调度或错误状态
if s . rateLimitService != nil {
s . rateLimitService . HandleStreamTimeout ( ctx , account , originalModel )
}
2026-01-04 19:49:59 +08:00
sendErrorEvent ( "stream_timeout" )
2026-02-28 15:01:20 +08:00
return resultWithUsage ( ) , fmt . Errorf ( "stream data interval timeout" )
2026-01-04 19:49:59 +08:00
case <- keepaliveCh :
2026-01-15 21:51:14 +08:00
if clientDisconnected {
continue
}
2026-04-25 12:11:27 +00:00
if time . Since ( lastDownstreamWriteAt ) < keepaliveInterval {
2026-01-04 19:49:59 +08:00
continue
2025-12-22 22:58:31 +08:00
}
2026-02-28 15:01:20 +08:00
if _ , err := bufferedWriter . WriteString ( ":\n\n" ) ; err != nil {
2026-01-15 21:51:14 +08:00
clientDisconnected = true
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during streaming, continuing to drain upstream for billing" )
2026-01-15 21:51:14 +08:00
continue
2025-12-26 03:49:55 -08:00
}
2026-02-28 15:01:20 +08:00
if err := flushBuffered ( ) ; err != nil {
clientDisconnected = true
logger . LegacyPrintf ( "service.openai_gateway" , "Client disconnected during keepalive flush, continuing to drain upstream for billing" )
2026-04-25 12:11:27 +00:00
} else {
lastDownstreamWriteAt = time . Now ( )
2026-02-28 15:01:20 +08:00
}
2025-12-22 22:58:31 +08:00
}
}
}
2026-02-12 09:41:37 +08:00
// extractOpenAISSEDataLine 低开销提取 SSE `data:` 行内容。
// 兼容 `data: xxx` 与 `data:xxx` 两种格式。
func extractOpenAISSEDataLine ( line string ) ( string , bool ) {
if ! strings . HasPrefix ( line , "data:" ) {
return "" , false
}
start := len ( "data:" )
for start < len ( line ) {
if line [ start ] != ' ' && line [ start ] != ' ' {
break
}
start ++
}
return line [ start : ] , true
}
2025-12-22 22:58:31 +08:00
func ( s * OpenAIGatewayService ) replaceModelInSSELine ( line , fromModel , toModel string ) string {
2026-02-12 09:41:37 +08:00
data , ok := extractOpenAISSEDataLine ( line )
if ! ok {
2025-12-26 03:49:55 -08:00
return line
}
2025-12-22 22:58:31 +08:00
if data == "" || data == "[DONE]" {
return line
}
2026-02-07 17:09:55 +08:00
// 使用 gjson 精确检查 model 字段,避免全量 JSON 反序列化
if m := gjson . Get ( data , "model" ) ; m . Exists ( ) && m . Str == fromModel {
newData , err := sjson . Set ( data , "model" , toModel )
2025-12-22 22:58:31 +08:00
if err != nil {
return line
}
2026-02-07 17:09:55 +08:00
return "data: " + newData
2025-12-22 22:58:31 +08:00
}
2026-02-07 17:09:55 +08:00
// 检查嵌套的 response.model 字段
if m := gjson . Get ( data , "response.model" ) ; m . Exists ( ) && m . Str == fromModel {
newData , err := sjson . Set ( data , "response.model" , toModel )
if err != nil {
return line
2025-12-22 22:58:31 +08:00
}
2026-02-07 17:09:55 +08:00
return "data: " + newData
2025-12-22 22:58:31 +08:00
}
return line
}
2026-01-15 23:52:50 +08:00
// correctToolCallsInResponseBody 修正响应体中的工具调用
func ( s * OpenAIGatewayService ) correctToolCallsInResponseBody ( body [ ] byte ) [ ] byte {
if len ( body ) == 0 {
return body
}
2026-02-28 15:01:20 +08:00
corrected , changed := s . toolCorrector . CorrectToolCallsInSSEBytes ( body )
2026-01-15 23:52:50 +08:00
if changed {
2026-02-28 15:01:20 +08:00
return corrected
2026-01-15 23:52:50 +08:00
}
return body
}
2025-12-22 22:58:31 +08:00
func ( s * OpenAIGatewayService ) parseSSEUsage ( data string , usage * OpenAIUsage ) {
2026-02-28 15:01:20 +08:00
s . parseSSEUsageBytes ( [ ] byte ( data ) , usage )
}
func ( s * OpenAIGatewayService ) parseSSEUsageBytes ( data [ ] byte , usage * OpenAIUsage ) {
if usage == nil || len ( data ) == 0 || bytes . Equal ( data , [ ] byte ( "[DONE]" ) ) {
2026-02-12 09:41:37 +08:00
return
2025-12-22 22:58:31 +08:00
}
2026-03-12 16:53:18 +08:00
// 选择性解析:仅在数据中包含终止事件标识时才进入字段提取。
if len ( data ) < 72 {
2026-02-12 09:41:37 +08:00
return
}
2026-03-12 16:53:18 +08:00
eventType := gjson . GetBytes ( data , "type" ) . String ( )
2026-04-25 16:56:50 +08:00
if eventType != "response.completed" && eventType != "response.done" &&
eventType != "response.incomplete" && eventType != "response.cancelled" && eventType != "response.canceled" {
2026-02-12 09:41:37 +08:00
return
2025-12-22 22:58:31 +08:00
}
2026-02-12 09:41:37 +08:00
2026-02-28 15:01:20 +08:00
usage . InputTokens = int ( gjson . GetBytes ( data , "response.usage.input_tokens" ) . Int ( ) )
usage . OutputTokens = int ( gjson . GetBytes ( data , "response.usage.output_tokens" ) . Int ( ) )
usage . CacheReadInputTokens = int ( gjson . GetBytes ( data , "response.usage.input_tokens_details.cached_tokens" ) . Int ( ) )
2026-04-22 12:30:08 +08:00
usage . ImageOutputTokens = int ( gjson . GetBytes ( data , "response.usage.output_tokens_details.image_tokens" ) . Int ( ) )
2026-02-28 15:01:20 +08:00
}
func extractOpenAIUsageFromJSONBytes ( body [ ] byte ) ( OpenAIUsage , bool ) {
if len ( body ) == 0 || ! gjson . ValidBytes ( body ) {
return OpenAIUsage { } , false
}
values := gjson . GetManyBytes (
body ,
"usage.input_tokens" ,
"usage.output_tokens" ,
"usage.input_tokens_details.cached_tokens" ,
2026-04-22 12:30:08 +08:00
"usage.output_tokens_details.image_tokens" ,
2026-02-28 15:01:20 +08:00
)
return OpenAIUsage {
InputTokens : int ( values [ 0 ] . Int ( ) ) ,
OutputTokens : int ( values [ 1 ] . Int ( ) ) ,
CacheReadInputTokens : int ( values [ 2 ] . Int ( ) ) ,
2026-04-22 12:30:08 +08:00
ImageOutputTokens : int ( values [ 3 ] . Int ( ) ) ,
2026-02-28 15:01:20 +08:00
} , true
2025-12-22 22:58:31 +08:00
}
2025-12-26 15:40:24 +08:00
func ( s * OpenAIGatewayService ) handleNonStreamingResponse ( ctx context . Context , resp * http . Response , c * gin . Context , account * Account , originalModel , mappedModel string ) ( * OpenAIUsage , error ) {
2026-04-16 01:53:22 +08:00
body , err := ReadUpstreamResponseBody ( resp . Body , s . cfg , c , openAITooLargeError )
2025-12-22 22:58:31 +08:00
if err != nil {
return nil , err
}
2026-04-07 22:49:14 +08:00
// Detect SSE responses for ALL account types via Content-Type header.
// Some OpenAI-compatible upstreams (including other sub2api instances)
// may return SSE even when stream=false was requested.
if isEventStreamResponse ( resp . Header ) {
return s . handleSSEToJSON ( resp , c , body , originalModel , mappedModel )
}
// For OAuth accounts, also fall back to a body-content heuristic because
// the upstream may omit the Content-Type header while still sending SSE.
// This heuristic is NOT applied to API-key accounts to avoid false
// positives on JSON responses that coincidentally contain "data:" or
// "event:" in their text content.
2026-01-09 18:35:58 +08:00
if account . Type == AccountTypeOAuth {
bodyLooksLikeSSE := bytes . Contains ( body , [ ] byte ( "data:" ) ) || bytes . Contains ( body , [ ] byte ( "event:" ) )
2026-04-07 22:49:14 +08:00
if bodyLooksLikeSSE {
return s . handleSSEToJSON ( resp , c , body , originalModel , mappedModel )
2026-01-09 18:35:58 +08:00
}
}
2026-02-28 15:01:20 +08:00
usageValue , usageOK := extractOpenAIUsageFromJSONBytes ( body )
if ! usageOK {
return nil , fmt . Errorf ( "parse response: invalid json response" )
2025-12-22 22:58:31 +08:00
}
2026-02-28 15:01:20 +08:00
usage := & usageValue
2025-12-22 22:58:31 +08:00
// Replace model in response if needed
if originalModel != mappedModel {
body = s . replaceModelInResponseBody ( body , mappedModel , originalModel )
}
2026-02-28 15:01:20 +08:00
responseheaders . WriteFilteredHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2025-12-22 22:58:31 +08:00
2026-01-05 13:54:43 +08:00
contentType := "application/json"
if s . cfg != nil && ! s . cfg . Security . ResponseHeaders . Enabled {
if upstreamType := resp . Header . Get ( "Content-Type" ) ; upstreamType != "" {
contentType = upstreamType
}
}
c . Data ( resp . StatusCode , contentType , body )
2025-12-22 22:58:31 +08:00
return usage , nil
}
2026-01-09 18:35:58 +08:00
func isEventStreamResponse ( header http . Header ) bool {
contentType := strings . ToLower ( header . Get ( "Content-Type" ) )
return strings . Contains ( contentType , "text/event-stream" )
}
2026-04-07 22:49:14 +08:00
func ( s * OpenAIGatewayService ) handleSSEToJSON ( resp * http . Response , c * gin . Context , body [ ] byte , originalModel , mappedModel string ) ( * OpenAIUsage , error ) {
2026-01-09 18:35:58 +08:00
bodyText := string ( body )
finalResponse , ok := extractCodexFinalResponse ( bodyText )
usage := & OpenAIUsage { }
if ok {
2026-02-28 15:01:20 +08:00
if parsedUsage , parsed := extractOpenAIUsageFromJSONBytes ( finalResponse ) ; parsed {
* usage = parsedUsage
2026-01-09 18:35:58 +08:00
}
2026-04-07 19:30:45 +08:00
// When the terminal event has an empty output array, reconstruct
// output from accumulated delta events so the client gets full content.
// gjson Array() returns empty slice for null, missing, or empty arrays.
if len ( gjson . GetBytes ( finalResponse , "output" ) . Array ( ) ) == 0 {
if outputJSON , reconstructed := reconstructResponseOutputFromSSE ( bodyText ) ; reconstructed {
if patched , err := sjson . SetRawBytes ( finalResponse , "output" , outputJSON ) ; err == nil {
finalResponse = patched
}
}
}
2026-01-09 18:35:58 +08:00
body = finalResponse
if originalModel != mappedModel {
body = s . replaceModelInResponseBody ( body , mappedModel , originalModel )
}
2026-01-15 23:52:50 +08:00
// Correct tool calls in final response
body = s . correctToolCallsInResponseBody ( body )
2026-01-09 18:35:58 +08:00
} else {
2026-03-06 14:54:52 +08:00
terminalType , terminalPayload , terminalOK := extractOpenAISSETerminalEvent ( bodyText )
if terminalOK && terminalType == "response.failed" {
msg := extractOpenAISSEErrorMessage ( terminalPayload )
if msg == "" {
msg = "Upstream compact response failed"
}
return nil , s . writeOpenAINonStreamingProtocolError ( resp , c , msg )
}
2026-01-09 18:35:58 +08:00
usage = s . parseSSEUsageFromBody ( bodyText )
if originalModel != mappedModel {
bodyText = s . replaceModelInSSEBody ( bodyText , mappedModel , originalModel )
}
body = [ ] byte ( bodyText )
}
2026-02-28 15:01:20 +08:00
responseheaders . WriteFilteredHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
2026-01-09 18:35:58 +08:00
contentType := "application/json; charset=utf-8"
if ! ok {
contentType = resp . Header . Get ( "Content-Type" )
if contentType == "" {
contentType = "text/event-stream"
}
}
c . Data ( resp . StatusCode , contentType , body )
return usage , nil
}
2026-03-06 14:54:52 +08:00
func extractOpenAISSETerminalEvent ( body string ) ( string , [ ] byte , bool ) {
lines := strings . Split ( body , "\n" )
for _ , line := range lines {
data , ok := extractOpenAISSEDataLine ( line )
if ! ok || data == "" || data == "[DONE]" {
continue
}
eventType := strings . TrimSpace ( gjson . Get ( data , "type" ) . String ( ) )
switch eventType {
2026-04-25 16:56:50 +08:00
case "response.completed" , "response.done" , "response.failed" , "response.incomplete" , "response.cancelled" , "response.canceled" :
2026-03-06 14:54:52 +08:00
return eventType , [ ] byte ( data ) , true
}
}
return "" , nil , false
}
func extractOpenAISSEErrorMessage ( payload [ ] byte ) string {
if len ( payload ) == 0 {
return ""
}
for _ , path := range [ ] string { "response.error.message" , "error.message" , "message" } {
if msg := strings . TrimSpace ( gjson . GetBytes ( payload , path ) . String ( ) ) ; msg != "" {
return sanitizeUpstreamErrorMessage ( msg )
}
}
return sanitizeUpstreamErrorMessage ( strings . TrimSpace ( extractUpstreamErrorMessage ( payload ) ) )
}
func ( s * OpenAIGatewayService ) writeOpenAINonStreamingProtocolError ( resp * http . Response , c * gin . Context , message string ) error {
message = sanitizeUpstreamErrorMessage ( strings . TrimSpace ( message ) )
if message == "" {
message = "Upstream returned an invalid non-streaming response"
}
setOpsUpstreamError ( c , http . StatusBadGateway , message , "" )
responseheaders . WriteFilteredHeaders ( c . Writer . Header ( ) , resp . Header , s . responseHeaderFilter )
c . Writer . Header ( ) . Set ( "Content-Type" , "application/json; charset=utf-8" )
c . JSON ( http . StatusBadGateway , gin . H {
"error" : gin . H {
"type" : "upstream_error" ,
"message" : message ,
} ,
} )
return fmt . Errorf ( "non-streaming openai protocol error: %s" , message )
}
2026-01-09 18:35:58 +08:00
func extractCodexFinalResponse ( body string ) ( [ ] byte , bool ) {
lines := strings . Split ( body , "\n" )
for _ , line := range lines {
2026-02-12 09:41:37 +08:00
data , ok := extractOpenAISSEDataLine ( line )
if ! ok {
2026-01-09 18:35:58 +08:00
continue
}
if data == "" || data == "[DONE]" {
continue
}
2026-02-28 15:01:20 +08:00
eventType := gjson . Get ( data , "type" ) . String ( )
if eventType == "response.done" || eventType == "response.completed" {
if response := gjson . Get ( data , "response" ) ; response . Exists ( ) && response . Type == gjson . JSON && response . Raw != "" {
return [ ] byte ( response . Raw ) , true
2026-01-09 18:35:58 +08:00
}
}
}
return nil , false
}
2026-04-07 19:30:45 +08:00
// reconstructResponseOutputFromSSE scans raw SSE body text for delta events and
// returns a JSON-encoded output array reconstructed from accumulated deltas.
// Returns (nil, false) if no content was found in deltas.
func reconstructResponseOutputFromSSE ( bodyText string ) ( [ ] byte , bool ) {
acc := apicompat . NewBufferedResponseAccumulator ( )
2026-04-23 15:13:57 +00:00
imageOutputs := make ( [ ] json . RawMessage , 0 , 1 )
seenImages := make ( map [ string ] struct { } )
2026-04-07 19:30:45 +08:00
lines := strings . Split ( bodyText , "\n" )
for _ , line := range lines {
data , ok := extractOpenAISSEDataLine ( line )
if ! ok || data == "" || data == "[DONE]" {
continue
}
2026-04-23 15:13:57 +00:00
if imageOutput , ok := extractImageGenerationOutputFromSSEData ( [ ] byte ( data ) , seenImages ) ; ok {
imageOutputs = append ( imageOutputs , imageOutput )
}
2026-04-07 19:30:45 +08:00
var event apicompat . ResponsesStreamEvent
if err := json . Unmarshal ( [ ] byte ( data ) , & event ) ; err != nil {
continue
}
acc . ProcessEvent ( & event )
}
2026-04-23 15:13:57 +00:00
if ! acc . HasContent ( ) && len ( imageOutputs ) == 0 {
2026-04-07 19:30:45 +08:00
return nil , false
}
2026-04-23 15:13:57 +00:00
var output [ ] json . RawMessage
if acc . HasContent ( ) {
outputJSON , err := json . Marshal ( acc . BuildOutput ( ) )
2026-04-24 08:58:51 +08:00
if err == nil {
_ = json . Unmarshal ( outputJSON , & output )
2026-04-23 15:13:57 +00:00
}
}
output = append ( output , imageOutputs ... )
2026-04-24 08:58:51 +08:00
if len ( output ) == 0 {
return nil , false
}
2026-04-23 15:13:57 +00:00
2026-04-07 19:30:45 +08:00
outputJSON , err := json . Marshal ( output )
if err != nil {
return nil , false
}
return outputJSON , true
}
2026-04-23 15:13:57 +00:00
func extractImageGenerationOutputFromSSEData ( data [ ] byte , seen map [ string ] struct { } ) ( json . RawMessage , bool ) {
if len ( data ) == 0 || ! gjson . ValidBytes ( data ) {
return nil , false
}
if gjson . GetBytes ( data , "type" ) . String ( ) != "response.output_item.done" {
return nil , false
}
item := gjson . GetBytes ( data , "item" )
if ! item . Exists ( ) || ! item . IsObject ( ) || item . Get ( "type" ) . String ( ) != "image_generation_call" {
return nil , false
}
if strings . TrimSpace ( item . Get ( "result" ) . String ( ) ) == "" {
return nil , false
}
key := strings . TrimSpace ( item . Get ( "id" ) . String ( ) )
if key == "" {
key = strings . TrimSpace ( item . Get ( "output_format" ) . String ( ) ) + "|" + strings . TrimSpace ( item . Get ( "result" ) . String ( ) )
}
if key != "" && seen != nil {
if _ , exists := seen [ key ] ; exists {
return nil , false
}
seen [ key ] = struct { } { }
}
return json . RawMessage ( item . Raw ) , true
}
2026-01-09 18:35:58 +08:00
func ( s * OpenAIGatewayService ) parseSSEUsageFromBody ( body string ) * OpenAIUsage {
usage := & OpenAIUsage { }
lines := strings . Split ( body , "\n" )
for _ , line := range lines {
2026-02-12 09:41:37 +08:00
data , ok := extractOpenAISSEDataLine ( line )
if ! ok {
2026-01-09 18:35:58 +08:00
continue
}
if data == "" || data == "[DONE]" {
continue
}
2026-02-28 15:01:20 +08:00
s . parseSSEUsageBytes ( [ ] byte ( data ) , usage )
2026-01-09 18:35:58 +08:00
}
return usage
}
func ( s * OpenAIGatewayService ) replaceModelInSSEBody ( body , fromModel , toModel string ) string {
lines := strings . Split ( body , "\n" )
for i , line := range lines {
2026-02-12 09:41:37 +08:00
if _ , ok := extractOpenAISSEDataLine ( line ) ; ! ok {
2026-01-09 18:35:58 +08:00
continue
}
lines [ i ] = s . replaceModelInSSELine ( line , fromModel , toModel )
}
return strings . Join ( lines , "\n" )
}
2026-01-02 17:40:57 +08:00
func ( s * OpenAIGatewayService ) validateUpstreamBaseURL ( raw string ) ( string , error ) {
2026-01-05 13:54:43 +08:00
if s . cfg != nil && ! s . cfg . Security . URLAllowlist . Enabled {
2026-01-05 14:41:08 +08:00
normalized , err := urlvalidator . ValidateURLFormat ( raw , s . cfg . Security . URLAllowlist . AllowInsecureHTTP )
if err != nil {
return "" , fmt . Errorf ( "invalid base_url: %w" , err )
}
return normalized , nil
2026-01-05 13:54:43 +08:00
}
2026-01-02 17:40:57 +08:00
normalized , err := urlvalidator . ValidateHTTPSURL ( raw , urlvalidator . ValidationOptions {
AllowedHosts : s . cfg . Security . URLAllowlist . UpstreamHosts ,
RequireAllowlist : true ,
AllowPrivate : s . cfg . Security . URLAllowlist . AllowPrivateHosts ,
} )
if err != nil {
return "" , fmt . Errorf ( "invalid base_url: %w" , err )
}
return normalized , nil
}
2026-02-12 10:56:07 +08:00
// buildOpenAIResponsesURL 组装 OpenAI Responses 端点。
// - base 以 /v1 结尾:追加 /responses
// - base 已是 /responses: 原样返回
// - 其他情况:追加 /v1/responses
func buildOpenAIResponsesURL ( base string ) string {
normalized := strings . TrimRight ( strings . TrimSpace ( base ) , "/" )
if strings . HasSuffix ( normalized , "/responses" ) {
return normalized
}
if strings . HasSuffix ( normalized , "/v1" ) {
return normalized + "/responses"
}
return normalized + "/v1/responses"
}
2026-03-14 11:42:42 +08:00
func trimOpenAIEncryptedReasoningItems ( reqBody map [ string ] any ) bool {
if len ( reqBody ) == 0 {
return false
}
inputValue , has := reqBody [ "input" ]
if ! has {
return false
}
switch input := inputValue . ( type ) {
case [ ] any :
filtered := input [ : 0 ]
changed := false
for _ , item := range input {
nextItem , itemChanged , keep := sanitizeEncryptedReasoningInputItem ( item )
if itemChanged {
changed = true
}
if ! keep {
continue
}
filtered = append ( filtered , nextItem )
}
if ! changed {
return false
}
if len ( filtered ) == 0 {
delete ( reqBody , "input" )
return true
}
reqBody [ "input" ] = filtered
return true
case [ ] map [ string ] any :
filtered := input [ : 0 ]
changed := false
for _ , item := range input {
nextItem , itemChanged , keep := sanitizeEncryptedReasoningInputItem ( item )
if itemChanged {
changed = true
}
if ! keep {
continue
}
nextMap , ok := nextItem . ( map [ string ] any )
if ! ok {
filtered = append ( filtered , item )
continue
}
filtered = append ( filtered , nextMap )
}
if ! changed {
return false
}
if len ( filtered ) == 0 {
delete ( reqBody , "input" )
return true
}
reqBody [ "input" ] = filtered
return true
case map [ string ] any :
nextItem , changed , keep := sanitizeEncryptedReasoningInputItem ( input )
if ! changed {
return false
}
if ! keep {
delete ( reqBody , "input" )
return true
}
nextMap , ok := nextItem . ( map [ string ] any )
if ! ok {
return false
}
reqBody [ "input" ] = nextMap
return true
default :
return false
}
}
func sanitizeEncryptedReasoningInputItem ( item any ) ( next any , changed bool , keep bool ) {
inputItem , ok := item . ( map [ string ] any )
if ! ok {
return item , false , true
}
itemType , _ := inputItem [ "type" ] . ( string )
if strings . TrimSpace ( itemType ) != "reasoning" {
return item , false , true
}
_ , hasEncryptedContent := inputItem [ "encrypted_content" ]
if ! hasEncryptedContent {
return item , false , true
}
delete ( inputItem , "encrypted_content" )
if len ( inputItem ) == 1 {
return nil , true , false
}
return inputItem , true , true
}
2026-03-06 18:50:28 +08:00
func IsOpenAIResponsesCompactPathForTest ( c * gin . Context ) bool {
return isOpenAIResponsesCompactPath ( c )
}
func OpenAICompactSessionSeedKeyForTest ( ) string {
return openAICompactSessionSeedKey
}
func NormalizeOpenAICompactRequestBodyForTest ( body [ ] byte ) ( [ ] byte , bool , error ) {
return normalizeOpenAICompactRequestBody ( body )
}
func isOpenAIResponsesCompactPath ( c * gin . Context ) bool {
suffix := strings . TrimSpace ( openAIResponsesRequestPathSuffix ( c ) )
return suffix == "/compact" || strings . HasPrefix ( suffix , "/compact/" )
}
func normalizeOpenAICompactRequestBody ( body [ ] byte ) ( [ ] byte , bool , error ) {
if len ( body ) == 0 {
return body , false , nil
}
normalized := [ ] byte ( ` { } ` )
for _ , field := range [ ] string { "model" , "input" , "instructions" , "previous_response_id" } {
value := gjson . GetBytes ( body , field )
if ! value . Exists ( ) {
continue
}
next , err := sjson . SetRawBytes ( normalized , field , [ ] byte ( value . Raw ) )
if err != nil {
return body , false , fmt . Errorf ( "normalize compact body %s: %w" , field , err )
}
normalized = next
}
if bytes . Equal ( bytes . TrimSpace ( body ) , bytes . TrimSpace ( normalized ) ) {
return body , false , nil
}
return normalized , true , nil
}
func resolveOpenAICompactSessionID ( c * gin . Context ) string {
if c != nil {
if sessionID := strings . TrimSpace ( c . GetHeader ( "session_id" ) ) ; sessionID != "" {
return sessionID
}
if conversationID := strings . TrimSpace ( c . GetHeader ( "conversation_id" ) ) ; conversationID != "" {
return conversationID
}
if seed , ok := c . Get ( openAICompactSessionSeedKey ) ; ok {
if seedStr , ok := seed . ( string ) ; ok && strings . TrimSpace ( seedStr ) != "" {
return strings . TrimSpace ( seedStr )
}
}
}
return uuid . NewString ( )
}
func openAIResponsesRequestPathSuffix ( c * gin . Context ) string {
if c == nil || c . Request == nil || c . Request . URL == nil {
return ""
}
normalizedPath := strings . TrimRight ( strings . TrimSpace ( c . Request . URL . Path ) , "/" )
if normalizedPath == "" {
return ""
}
idx := strings . LastIndex ( normalizedPath , "/responses" )
if idx < 0 {
return ""
}
suffix := normalizedPath [ idx + len ( "/responses" ) : ]
if suffix == "" || suffix == "/" {
return ""
}
if ! strings . HasPrefix ( suffix , "/" ) {
return ""
}
return suffix
}
func appendOpenAIResponsesRequestPathSuffix ( baseURL , suffix string ) string {
trimmedBase := strings . TrimRight ( strings . TrimSpace ( baseURL ) , "/" )
trimmedSuffix := strings . TrimSpace ( suffix )
if trimmedBase == "" || trimmedSuffix == "" {
return trimmedBase
}
return trimmedBase + trimmedSuffix
}
2025-12-22 22:58:31 +08:00
func ( s * OpenAIGatewayService ) replaceModelInResponseBody ( body [ ] byte , fromModel , toModel string ) [ ] byte {
2026-02-07 17:09:55 +08:00
// 使用 gjson/sjson 精确替换 model 字段,避免全量 JSON 反序列化
if m := gjson . GetBytes ( body , "model" ) ; m . Exists ( ) && m . Str == fromModel {
newBody , err := sjson . SetBytes ( body , "model" , toModel )
if err != nil {
return body
}
return newBody
2025-12-22 22:58:31 +08:00
}
2026-02-07 17:09:55 +08:00
return body
2025-12-22 22:58:31 +08:00
}
// OpenAIRecordUsageInput input for recording usage
type OpenAIRecordUsageInput struct {
2026-03-12 16:53:18 +08:00
Result * OpenAIForwardResult
APIKey * APIKey
User * User
Account * Account
Subscription * UserSubscription
2026-03-15 11:26:42 +08:00
InboundEndpoint string
UpstreamEndpoint string
2026-03-12 16:53:18 +08:00
UserAgent string // 请求的 User-Agent
IPAddress string // 请求的客户端 IP 地址
RequestPayloadHash string
APIKeyService APIKeyQuotaUpdater
2026-04-01 01:51:19 +08:00
ChannelUsageFields
2025-12-22 22:58:31 +08:00
}
// RecordUsage records usage and deducts balance
func ( s * OpenAIGatewayService ) RecordUsage ( ctx context . Context , input * OpenAIRecordUsageInput ) error {
result := input . Result
2026-04-23 12:58:13 +08:00
if s . rateLimitService != nil && input != nil && input . Account != nil && input . Account . Platform == PlatformOpenAI {
s . rateLimitService . ResetOpenAI403Counter ( ctx , input . Account . ID )
}
2026-03-09 15:08:37 +08:00
// 跳过所有 token 均为零的用量记录——上游未返回 usage 时不应写入数据库
if result . Usage . InputTokens == 0 && result . Usage . OutputTokens == 0 &&
2026-04-22 12:30:08 +08:00
result . Usage . CacheCreationInputTokens == 0 && result . Usage . CacheReadInputTokens == 0 &&
result . Usage . ImageOutputTokens == 0 && result . ImageCount == 0 {
2026-03-09 15:08:37 +08:00
return nil
}
2026-01-04 19:27:53 +08:00
apiKey := input . APIKey
2025-12-22 22:58:31 +08:00
user := input . User
account := input . Account
subscription := input . Subscription
2025-12-23 10:01:58 +08:00
// 计算实际的新输入token( 减去缓存读取的token)
// 因为 input_tokens 包含了 cache_read_tokens, 而缓存读取的token不应按输入价格计费
actualInputTokens := result . Usage . InputTokens - result . Usage . CacheReadInputTokens
if actualInputTokens < 0 {
actualInputTokens = 0
}
2025-12-22 22:58:31 +08:00
// Calculate cost
tokens := UsageTokens {
2025-12-23 10:01:58 +08:00
InputTokens : actualInputTokens ,
2025-12-22 22:58:31 +08:00
OutputTokens : result . Usage . OutputTokens ,
CacheCreationTokens : result . Usage . CacheCreationInputTokens ,
CacheReadTokens : result . Usage . CacheReadInputTokens ,
2026-04-01 15:08:57 +08:00
ImageOutputTokens : result . Usage . ImageOutputTokens ,
2025-12-22 22:58:31 +08:00
}
// Get rate multiplier
2026-04-01 23:13:58 +08:00
multiplier := 1.0
if s . cfg != nil {
multiplier = s . cfg . Default . RateMultiplier
}
2025-12-22 22:58:31 +08:00
if apiKey . GroupID != nil && apiKey . Group != nil {
2026-03-06 14:54:52 +08:00
resolver := s . userGroupRateResolver
if resolver == nil {
resolver = newUserGroupRateResolver ( nil , nil , resolveUserGroupRateCacheTTL ( s . cfg ) , nil , "service.openai_gateway" )
}
multiplier = resolver . Resolve ( ctx , user . ID , * apiKey . GroupID , apiKey . Group . RateMultiplier )
2025-12-22 22:58:31 +08:00
}
2026-03-30 22:58:28 +08:00
var cost * CostBreakdown
var err error
2026-03-21 01:23:54 +08:00
billingModel := forwardResultBillingModel ( result . Model , result . UpstreamModel )
2026-03-31 00:23:45 +08:00
if result . BillingModel != "" {
billingModel = strings . TrimSpace ( result . BillingModel )
}
2026-04-05 17:22:59 +08:00
if input . BillingModelSource == BillingModelSourceChannelMapped && input . ChannelMappedModel != "" && input . ChannelMappedModel != input . OriginalModel {
2026-04-01 15:08:57 +08:00
billingModel = input . ChannelMappedModel
}
2026-04-01 23:13:58 +08:00
if input . BillingModelSource == BillingModelSourceRequested && input . OriginalModel != "" {
2026-03-31 00:23:45 +08:00
billingModel = input . OriginalModel
}
2026-03-08 23:22:28 +08:00
serviceTier := ""
if result . ServiceTier != nil {
serviceTier = strings . TrimSpace ( * result . ServiceTier )
}
2026-04-22 12:30:08 +08:00
cost , err = s . calculateOpenAIRecordUsageCost ( ctx , result , apiKey , billingModel , multiplier , tokens , serviceTier )
2025-12-22 22:58:31 +08:00
if err != nil {
cost = & CostBreakdown { ActualCost : 0 }
}
// Determine billing type
isSubscriptionBilling := subscription != nil && apiKey . Group != nil && apiKey . Group . IsSubscriptionType ( )
2025-12-26 15:40:24 +08:00
billingType := BillingTypeBalance
2025-12-22 22:58:31 +08:00
if isSubscriptionBilling {
2025-12-26 15:40:24 +08:00
billingType = BillingTypeSubscription
2025-12-22 22:58:31 +08:00
}
// Create usage log
durationMs := int ( result . Duration . Milliseconds ( ) )
2026-01-15 15:14:44 +08:00
accountRateMultiplier := account . BillingRateMultiplier ( )
2026-03-12 16:53:18 +08:00
requestID := resolveUsageBillingRequestID ( ctx , result . RequestID )
2026-03-31 15:26:20 +08:00
// 确定 RequestedModel( 渠道映射前的原始模型)
requestedModel := result . Model
if input . OriginalModel != "" {
requestedModel = input . OriginalModel
}
2025-12-26 15:40:24 +08:00
usageLog := & UsageLog {
2026-04-01 16:39:38 +08:00
UserID : user . ID ,
APIKeyID : apiKey . ID ,
AccountID : account . ID ,
RequestID : requestID ,
Model : result . Model ,
RequestedModel : requestedModel ,
UpstreamModel : optionalNonEqualStringPtr ( result . UpstreamModel , result . Model ) ,
ServiceTier : result . ServiceTier ,
ReasoningEffort : result . ReasoningEffort ,
InboundEndpoint : optionalTrimmedStringPtr ( input . InboundEndpoint ) ,
UpstreamEndpoint : optionalTrimmedStringPtr ( input . UpstreamEndpoint ) ,
InputTokens : actualInputTokens ,
OutputTokens : result . Usage . OutputTokens ,
CacheCreationTokens : result . Usage . CacheCreationInputTokens ,
CacheReadTokens : result . Usage . CacheReadInputTokens ,
ImageOutputTokens : result . Usage . ImageOutputTokens ,
2026-04-22 12:30:08 +08:00
ImageCount : result . ImageCount ,
ImageSize : optionalTrimmedStringPtr ( result . ImageSize ) ,
2025-12-22 22:58:31 +08:00
}
2026-04-01 16:30:47 +08:00
if cost != nil {
usageLog . InputCost = cost . InputCost
usageLog . OutputCost = cost . OutputCost
usageLog . ImageOutputCost = cost . ImageOutputCost
usageLog . CacheCreationCost = cost . CacheCreationCost
usageLog . CacheReadCost = cost . CacheReadCost
usageLog . TotalCost = cost . TotalCost
usageLog . ActualCost = cost . ActualCost
}
usageLog . RateMultiplier = multiplier
usageLog . AccountRateMultiplier = & accountRateMultiplier
usageLog . BillingType = billingType
usageLog . Stream = result . Stream
usageLog . OpenAIWSMode = result . OpenAIWSMode
usageLog . DurationMs = & durationMs
usageLog . FirstTokenMs = result . FirstTokenMs
usageLog . CreatedAt = time . Now ( )
2026-03-31 00:23:45 +08:00
// 设置渠道信息
usageLog . ChannelID = optionalInt64Ptr ( input . ChannelID )
usageLog . ModelMappingChain = optionalTrimmedStringPtr ( input . ModelMappingChain )
2026-03-30 22:58:28 +08:00
// 设置计费模式
if cost != nil && cost . BillingMode != "" {
billingMode := cost . BillingMode
usageLog . BillingMode = & billingMode
2026-04-22 12:30:08 +08:00
} else if result . ImageCount > 0 {
billingMode := string ( BillingModeImage )
usageLog . BillingMode = & billingMode
2026-03-30 22:58:28 +08:00
} else {
2026-04-02 02:22:15 +08:00
billingMode := string ( BillingModeToken )
2026-03-30 22:13:16 +08:00
usageLog . BillingMode = & billingMode
}
2026-01-06 16:23:56 +08:00
// 添加 UserAgent
if input . UserAgent != "" {
usageLog . UserAgent = & input . UserAgent
}
2026-01-09 21:59:32 +08:00
// 添加 IPAddress
if input . IPAddress != "" {
usageLog . IPAddress = & input . IPAddress
}
2025-12-22 22:58:31 +08:00
if apiKey . GroupID != nil {
usageLog . GroupID = apiKey . GroupID
}
if subscription != nil {
usageLog . SubscriptionID = & subscription . ID
}
2026-04-13 02:28:31 +08:00
// 计算账号统计定价费用(使用最终上游模型匹配自定义规则)
2026-04-11 23:39:49 +08:00
if apiKey . GroupID != nil {
2026-04-14 19:29:37 +08:00
applyAccountStatsCost ( ctx , usageLog , s . channelService , s . billingService ,
account . ID , * apiKey . GroupID , result . UpstreamModel , result . Model ,
tokens , cost . TotalCost ,
2026-04-11 23:39:49 +08:00
)
}
2025-12-29 03:17:25 +08:00
if s . cfg != nil && s . cfg . RunMode == config . RunModeSimple {
2026-03-12 16:53:18 +08:00
writeUsageLogBestEffort ( ctx , s . usageLogRepo , usageLog , "service.openai_gateway" )
2026-02-12 19:01:09 +08:00
logger . LegacyPrintf ( "service.openai_gateway" , "[SIMPLE MODE] Usage recorded (not billed): user=%d, tokens=%d" , usageLog . UserID , usageLog . TotalTokens ( ) )
2025-12-29 03:17:25 +08:00
s . deferredService . ScheduleLastUsedUpdate ( account . ID )
return nil
}
2026-03-12 16:53:18 +08:00
billingErr := func ( ) error {
_ , err := applyUsageBilling ( ctx , requestID , usageLog , & postUsageBillingParams {
2026-03-06 00:37:37 +08:00
Cost : cost ,
User : user ,
APIKey : apiKey ,
Account : account ,
Subscription : subscription ,
2026-03-12 16:53:18 +08:00
RequestPayloadHash : resolveUsageBillingPayloadFingerprint ( ctx , input . RequestPayloadHash ) ,
2026-03-06 00:37:37 +08:00
IsSubscriptionBill : isSubscriptionBilling ,
AccountRateMultiplier : accountRateMultiplier ,
APIKeyService : input . APIKeyService ,
2026-03-12 16:53:18 +08:00
} , s . billingDeps ( ) , s . usageBillingRepo )
return err
} ( )
if billingErr != nil {
return billingErr
2026-03-05 20:54:37 +08:00
}
2026-03-12 16:53:18 +08:00
writeUsageLogBestEffort ( ctx , s . usageLogRepo , usageLog , "service.openai_gateway" )
2026-03-05 20:54:37 +08:00
2025-12-22 22:58:31 +08:00
return nil
}
2025-12-23 16:26:07 +08:00
2026-04-22 12:30:08 +08:00
func ( s * OpenAIGatewayService ) calculateOpenAIRecordUsageCost (
ctx context . Context ,
result * OpenAIForwardResult ,
apiKey * APIKey ,
billingModel string ,
multiplier float64 ,
tokens UsageTokens ,
serviceTier string ,
) ( * CostBreakdown , error ) {
if result != nil && result . ImageCount > 0 {
return s . calculateOpenAIImageCost ( ctx , billingModel , apiKey , result , multiplier ) , nil
}
if s . resolver != nil && apiKey . Group != nil {
gid := apiKey . Group . ID
return s . billingService . CalculateCostUnified ( CostInput {
Ctx : ctx ,
Model : billingModel ,
GroupID : & gid ,
Tokens : tokens ,
RequestCount : 1 ,
RateMultiplier : multiplier ,
ServiceTier : serviceTier ,
Resolver : s . resolver ,
} )
}
return s . billingService . CalculateCostWithServiceTier ( billingModel , tokens , multiplier , serviceTier )
}
func ( s * OpenAIGatewayService ) calculateOpenAIImageCost (
ctx context . Context ,
billingModel string ,
apiKey * APIKey ,
result * OpenAIForwardResult ,
multiplier float64 ,
) * CostBreakdown {
2026-04-23 15:09:47 +08:00
if resolved := s . resolveOpenAIChannelPricing ( ctx , billingModel , apiKey ) ; resolved != nil &&
( resolved . Mode == BillingModePerRequest || resolved . Mode == BillingModeImage ) {
2026-04-22 12:30:08 +08:00
gid := apiKey . Group . ID
cost , err := s . billingService . CalculateCostUnified ( CostInput {
Ctx : ctx ,
Model : billingModel ,
GroupID : & gid ,
RequestCount : 1 ,
SizeTier : result . ImageSize ,
RateMultiplier : multiplier ,
Resolver : s . resolver ,
Resolved : resolved ,
} )
if err == nil {
return cost
}
logger . LegacyPrintf ( "service.openai_gateway" , "Calculate image channel cost failed: %v" , err )
}
var groupConfig * ImagePriceConfig
if apiKey != nil && apiKey . Group != nil {
groupConfig = & ImagePriceConfig {
Price1K : apiKey . Group . ImagePrice1K ,
Price2K : apiKey . Group . ImagePrice2K ,
Price4K : apiKey . Group . ImagePrice4K ,
}
}
return s . billingService . CalculateImageCost ( billingModel , result . ImageSize , result . ImageCount , groupConfig , multiplier )
}
func ( s * OpenAIGatewayService ) resolveOpenAIChannelPricing ( ctx context . Context , billingModel string , apiKey * APIKey ) * ResolvedPricing {
if s . resolver == nil || apiKey == nil || apiKey . Group == nil {
return nil
}
gid := apiKey . Group . ID
resolved := s . resolver . Resolve ( ctx , PricingInput { Model : billingModel , GroupID : & gid } )
if resolved . Source == PricingSourceChannel {
return resolved
}
return nil
}
2026-01-25 13:32:08 +08:00
// ParseCodexRateLimitHeaders extracts Codex usage limits from response headers.
// Exported for use in ratelimit_service when handling OpenAI 429 responses.
func ParseCodexRateLimitHeaders ( headers http . Header ) * OpenAICodexUsageSnapshot {
2025-12-23 16:26:07 +08:00
snapshot := & OpenAICodexUsageSnapshot { }
hasData := false
// Helper to parse float64 from header
parseFloat := func ( key string ) * float64 {
if v := headers . Get ( key ) ; v != "" {
if f , err := strconv . ParseFloat ( v , 64 ) ; err == nil {
return & f
}
}
return nil
}
// Helper to parse int from header
parseInt := func ( key string ) * int {
if v := headers . Get ( key ) ; v != "" {
if i , err := strconv . Atoi ( v ) ; err == nil {
return & i
}
}
return nil
}
// Primary (weekly) limits
if v := parseFloat ( "x-codex-primary-used-percent" ) ; v != nil {
snapshot . PrimaryUsedPercent = v
hasData = true
}
if v := parseInt ( "x-codex-primary-reset-after-seconds" ) ; v != nil {
snapshot . PrimaryResetAfterSeconds = v
hasData = true
}
if v := parseInt ( "x-codex-primary-window-minutes" ) ; v != nil {
snapshot . PrimaryWindowMinutes = v
hasData = true
}
// Secondary (5h) limits
if v := parseFloat ( "x-codex-secondary-used-percent" ) ; v != nil {
snapshot . SecondaryUsedPercent = v
hasData = true
}
if v := parseInt ( "x-codex-secondary-reset-after-seconds" ) ; v != nil {
snapshot . SecondaryResetAfterSeconds = v
hasData = true
}
if v := parseInt ( "x-codex-secondary-window-minutes" ) ; v != nil {
snapshot . SecondaryWindowMinutes = v
hasData = true
}
// Overflow ratio
if v := parseFloat ( "x-codex-primary-over-secondary-limit-percent" ) ; v != nil {
snapshot . PrimaryOverSecondaryPercent = v
hasData = true
}
if ! hasData {
return nil
}
snapshot . UpdatedAt = time . Now ( ) . Format ( time . RFC3339 )
return snapshot
}
2026-02-22 21:04:52 +08:00
func codexSnapshotBaseTime ( snapshot * OpenAICodexUsageSnapshot , fallback time . Time ) time . Time {
2025-12-23 16:26:07 +08:00
if snapshot == nil {
2026-02-22 21:04:52 +08:00
return fallback
2025-12-23 16:26:07 +08:00
}
2026-02-22 21:04:52 +08:00
if snapshot . UpdatedAt == "" {
return fallback
}
base , err := time . Parse ( time . RFC3339 , snapshot . UpdatedAt )
if err != nil {
return fallback
}
return base
}
func codexResetAtRFC3339 ( base time . Time , resetAfterSeconds * int ) * string {
if resetAfterSeconds == nil {
return nil
}
sec := * resetAfterSeconds
if sec < 0 {
sec = 0
}
resetAt := base . Add ( time . Duration ( sec ) * time . Second ) . Format ( time . RFC3339 )
return & resetAt
}
func buildCodexUsageExtraUpdates ( snapshot * OpenAICodexUsageSnapshot , fallbackNow time . Time ) map [ string ] any {
if snapshot == nil {
return nil
2026-02-11 00:59:39 +08:00
}
2025-12-23 16:26:07 +08:00
2026-02-22 21:04:52 +08:00
baseTime := codexSnapshotBaseTime ( snapshot , fallbackNow )
2025-12-23 16:26:07 +08:00
updates := make ( map [ string ] any )
2026-01-25 13:32:08 +08:00
2026-02-22 21:04:52 +08:00
// 保存原始 primary/secondary 字段,便于排查问题
2025-12-23 16:26:07 +08:00
if snapshot . PrimaryUsedPercent != nil {
updates [ "codex_primary_used_percent" ] = * snapshot . PrimaryUsedPercent
}
if snapshot . PrimaryResetAfterSeconds != nil {
updates [ "codex_primary_reset_after_seconds" ] = * snapshot . PrimaryResetAfterSeconds
}
if snapshot . PrimaryWindowMinutes != nil {
updates [ "codex_primary_window_minutes" ] = * snapshot . PrimaryWindowMinutes
}
if snapshot . SecondaryUsedPercent != nil {
updates [ "codex_secondary_used_percent" ] = * snapshot . SecondaryUsedPercent
}
if snapshot . SecondaryResetAfterSeconds != nil {
updates [ "codex_secondary_reset_after_seconds" ] = * snapshot . SecondaryResetAfterSeconds
}
if snapshot . SecondaryWindowMinutes != nil {
updates [ "codex_secondary_window_minutes" ] = * snapshot . SecondaryWindowMinutes
}
if snapshot . PrimaryOverSecondaryPercent != nil {
updates [ "codex_primary_over_secondary_percent" ] = * snapshot . PrimaryOverSecondaryPercent
}
2026-02-22 21:04:52 +08:00
updates [ "codex_usage_updated_at" ] = baseTime . Format ( time . RFC3339 )
2025-12-23 16:26:07 +08:00
2026-02-22 21:04:52 +08:00
// 归一化到 5h/7d 规范字段
2026-01-25 13:32:08 +08:00
if normalized := snapshot . Normalize ( ) ; normalized != nil {
if normalized . Used5hPercent != nil {
updates [ "codex_5h_used_percent" ] = * normalized . Used5hPercent
2025-12-25 17:00:02 +08:00
}
2026-01-25 13:32:08 +08:00
if normalized . Reset5hSeconds != nil {
updates [ "codex_5h_reset_after_seconds" ] = * normalized . Reset5hSeconds
2025-12-25 17:00:02 +08:00
}
2026-01-25 13:32:08 +08:00
if normalized . Window5hMinutes != nil {
updates [ "codex_5h_window_minutes" ] = * normalized . Window5hMinutes
2025-12-25 17:00:02 +08:00
}
2026-01-25 13:32:08 +08:00
if normalized . Used7dPercent != nil {
updates [ "codex_7d_used_percent" ] = * normalized . Used7dPercent
2025-12-25 17:00:02 +08:00
}
2026-01-25 13:32:08 +08:00
if normalized . Reset7dSeconds != nil {
updates [ "codex_7d_reset_after_seconds" ] = * normalized . Reset7dSeconds
2025-12-25 17:00:02 +08:00
}
2026-01-25 13:32:08 +08:00
if normalized . Window7dMinutes != nil {
updates [ "codex_7d_window_minutes" ] = * normalized . Window7dMinutes
2025-12-25 17:00:02 +08:00
}
2026-02-22 21:04:52 +08:00
if reset5hAt := codexResetAtRFC3339 ( baseTime , normalized . Reset5hSeconds ) ; reset5hAt != nil {
updates [ "codex_5h_reset_at" ] = * reset5hAt
}
if reset7dAt := codexResetAtRFC3339 ( baseTime , normalized . Reset7dSeconds ) ; reset7dAt != nil {
updates [ "codex_7d_reset_at" ] = * reset7dAt
}
}
return updates
}
// updateCodexUsageSnapshot saves the Codex usage snapshot to account's Extra field
func ( s * OpenAIGatewayService ) updateCodexUsageSnapshot ( ctx context . Context , accountID int64 , snapshot * OpenAICodexUsageSnapshot ) {
if snapshot == nil {
return
}
if s == nil || s . accountRepo == nil {
return
}
2026-03-07 23:59:39 +08:00
now := time . Now ( )
updates := buildCodexUsageExtraUpdates ( snapshot , now )
2026-04-15 15:29:52 +08:00
if len ( updates ) == 0 {
2026-02-22 21:04:52 +08:00
return
2025-12-25 17:00:02 +08:00
}
2026-04-15 15:29:52 +08:00
if ! s . getCodexSnapshotThrottle ( ) . Allow ( accountID , now ) {
2026-03-11 15:47:39 +08:00
return
}
2025-12-25 17:00:02 +08:00
2025-12-23 16:26:07 +08:00
go func ( ) {
updateCtx , cancel := context . WithTimeout ( context . Background ( ) , 5 * time . Second )
defer cancel ( )
2026-04-15 15:29:52 +08:00
_ = s . accountRepo . UpdateExtra ( updateCtx , accountID , updates )
2025-12-23 16:26:07 +08:00
} ( )
}
2026-02-03 14:36:29 +08:00
2026-03-06 20:46:10 +08:00
func ( s * OpenAIGatewayService ) UpdateCodexUsageSnapshotFromHeaders ( ctx context . Context , accountID int64 , headers http . Header ) {
if accountID <= 0 || headers == nil {
return
}
if snapshot := ParseCodexRateLimitHeaders ( headers ) ; snapshot != nil {
s . updateCodexUsageSnapshot ( ctx , accountID , snapshot )
}
}
2026-02-03 14:36:29 +08:00
func getOpenAIReasoningEffortFromReqBody ( reqBody map [ string ] any ) ( value string , present bool ) {
if reqBody == nil {
return "" , false
}
// Primary: reasoning.effort
if reasoning , ok := reqBody [ "reasoning" ] . ( map [ string ] any ) ; ok {
if effort , ok := reasoning [ "effort" ] . ( string ) ; ok {
return normalizeOpenAIReasoningEffort ( effort ) , true
}
}
// Fallback: some clients may use a flat field.
if effort , ok := reqBody [ "reasoning_effort" ] . ( string ) ; ok {
return normalizeOpenAIReasoningEffort ( effort ) , true
}
return "" , false
}
func deriveOpenAIReasoningEffortFromModel ( model string ) string {
if strings . TrimSpace ( model ) == "" {
return ""
}
modelID := strings . TrimSpace ( model )
if strings . Contains ( modelID , "/" ) {
parts := strings . Split ( modelID , "/" )
modelID = parts [ len ( parts ) - 1 ]
}
parts := strings . FieldsFunc ( strings . ToLower ( modelID ) , func ( r rune ) bool {
switch r {
case '-' , '_' , ' ' :
return true
default :
return false
}
} )
if len ( parts ) == 0 {
return ""
}
return normalizeOpenAIReasoningEffort ( parts [ len ( parts ) - 1 ] )
}
2026-02-12 09:41:37 +08:00
func extractOpenAIRequestMetaFromBody ( body [ ] byte ) ( model string , stream bool , promptCacheKey string ) {
if len ( body ) == 0 {
return "" , false , ""
}
model = strings . TrimSpace ( gjson . GetBytes ( body , "model" ) . String ( ) )
stream = gjson . GetBytes ( body , "stream" ) . Bool ( )
promptCacheKey = strings . TrimSpace ( gjson . GetBytes ( body , "prompt_cache_key" ) . String ( ) )
return model , stream , promptCacheKey
}
2026-02-12 21:02:52 +08:00
// normalizeOpenAIPassthroughOAuthBody 将透传 OAuth 请求体收敛为旧链路关键行为:
2026-03-06 18:50:28 +08:00
// 1) store=false 2) 非 compact 保持 stream=true; compact 强制 stream=false
func normalizeOpenAIPassthroughOAuthBody ( body [ ] byte , compact bool ) ( [ ] byte , bool , error ) {
2026-02-12 21:02:52 +08:00
if len ( body ) == 0 {
return body , false , nil
}
normalized := body
changed := false
2026-03-06 18:50:28 +08:00
if compact {
if store := gjson . GetBytes ( normalized , "store" ) ; store . Exists ( ) {
next , err := sjson . DeleteBytes ( normalized , "store" )
if err != nil {
return body , false , fmt . Errorf ( "normalize passthrough body delete store: %w" , err )
}
normalized = next
changed = true
2026-02-12 21:02:52 +08:00
}
2026-03-06 18:50:28 +08:00
if stream := gjson . GetBytes ( normalized , "stream" ) ; stream . Exists ( ) {
next , err := sjson . DeleteBytes ( normalized , "stream" )
if err != nil {
return body , false , fmt . Errorf ( "normalize passthrough body delete stream: %w" , err )
}
normalized = next
changed = true
}
} else {
if store := gjson . GetBytes ( normalized , "store" ) ; ! store . Exists ( ) || store . Type != gjson . False {
next , err := sjson . SetBytes ( normalized , "store" , false )
if err != nil {
return body , false , fmt . Errorf ( "normalize passthrough body store=false: %w" , err )
}
normalized = next
changed = true
}
if stream := gjson . GetBytes ( normalized , "stream" ) ; ! stream . Exists ( ) || stream . Type != gjson . True {
next , err := sjson . SetBytes ( normalized , "stream" , true )
if err != nil {
return body , false , fmt . Errorf ( "normalize passthrough body stream=true: %w" , err )
}
normalized = next
changed = true
2026-02-12 21:02:52 +08:00
}
}
return normalized , changed , nil
}
2026-02-14 11:23:10 +08:00
func detectOpenAIPassthroughInstructionsRejectReason ( reqModel string , body [ ] byte ) string {
model := strings . ToLower ( strings . TrimSpace ( reqModel ) )
if ! strings . Contains ( model , "codex" ) {
return ""
}
instructions := gjson . GetBytes ( body , "instructions" )
if ! instructions . Exists ( ) {
return "instructions_missing"
}
if instructions . Type != gjson . String {
return "instructions_not_string"
}
if strings . TrimSpace ( instructions . String ( ) ) == "" {
return "instructions_empty"
}
return ""
}
2026-02-12 09:41:37 +08:00
func extractOpenAIReasoningEffortFromBody ( body [ ] byte , requestedModel string ) * string {
reasoningEffort := strings . TrimSpace ( gjson . GetBytes ( body , "reasoning.effort" ) . String ( ) )
if reasoningEffort == "" {
reasoningEffort = strings . TrimSpace ( gjson . GetBytes ( body , "reasoning_effort" ) . String ( ) )
}
if reasoningEffort != "" {
normalized := normalizeOpenAIReasoningEffort ( reasoningEffort )
if normalized == "" {
return nil
}
return & normalized
}
value := deriveOpenAIReasoningEffortFromModel ( requestedModel )
if value == "" {
return nil
}
return & value
}
2026-03-08 23:22:28 +08:00
func extractOpenAIServiceTier ( reqBody map [ string ] any ) * string {
if reqBody == nil {
return nil
}
raw , ok := reqBody [ "service_tier" ] . ( string )
if ! ok {
return nil
}
return normalizeOpenAIServiceTier ( raw )
}
func extractOpenAIServiceTierFromBody ( body [ ] byte ) * string {
if len ( body ) == 0 {
return nil
}
return normalizeOpenAIServiceTier ( gjson . GetBytes ( body , "service_tier" ) . String ( ) )
}
func normalizeOpenAIServiceTier ( raw string ) * string {
value := strings . ToLower ( strings . TrimSpace ( raw ) )
if value == "" {
return nil
}
if value == "fast" {
value = "priority"
}
2026-04-28 00:34:23 +08:00
// 放过 OpenAI 官方文档定义的所有合法 tier 值: priority/flex/auto/default/scale。
// 对 Codex 客户端零影响( Codex 只发 priority 或 flex, 见 codex-rs/core/src/client.rs) ,
// 但能让直连 OpenAI SDK 的用户透传 auto/default/scale 以便抓包/调试。
// 真未知值仍返回 nil, 由 normalizeResponsesBodyServiceTier 从 body 中删除。
2026-03-08 23:22:28 +08:00
switch value {
2026-04-28 00:34:23 +08:00
case "priority" , "flex" , "auto" , "default" , "scale" :
2026-03-08 23:22:28 +08:00
return & value
default :
return nil
}
}
2026-04-28 00:34:23 +08:00
// OpenAIFastBlockedError indicates a request was rejected by the OpenAI fast
// policy (action=block). Mirrors BetaBlockedError on the Claude side.
type OpenAIFastBlockedError struct {
Message string
}
func ( e * OpenAIFastBlockedError ) Error ( ) string { return e . Message }
// evaluateOpenAIFastPolicy returns the action and error message that should be
// applied for a request with the given account/model/service_tier. When the
// policy service is unavailable or no rule matches, it returns
// (BetaPolicyActionPass, "") so callers can short-circuit safely.
//
// Matching rules:
// - Scope filters by account type (all / oauth / apikey / bedrock)
// - ServiceTier must be empty (= any), "all", or equal the normalized tier
// - ModelWhitelist narrows the rule to specific models; FallbackAction
// handles the non-matching case (default: pass)
//
// 与 Claude BetaPolicy 的差异(保留首条匹配 short-circuit) :
// - BetaPolicy 处理的是 anthropic-beta header 中的 token 集合,不同
// 规则可能针对不同 token, filter 需要累加成 set; block 则 first-match。
// - OpenAI fast policy 操作的是单个字段 service_tier: filter 即删字段,
// 没有可累加的对象。一次请求只携带一个 service_tier, 规则的 tier
// 维度天然互斥;同一 (scope, tier) 下若多条规则的 model whitelist
// 发生重叠, admin 可通过规则顺序明确意图。因此采用 first-match 而
// 非 BetaPolicy 那样的"block 覆盖 filter 覆盖 pass"语义。
func ( s * OpenAIGatewayService ) evaluateOpenAIFastPolicy ( ctx context . Context , account * Account , model , serviceTier string ) ( action , errMsg string ) {
if s == nil || s . settingService == nil {
return BetaPolicyActionPass , ""
}
tier := strings . ToLower ( strings . TrimSpace ( serviceTier ) )
if tier == "" {
return BetaPolicyActionPass , ""
}
settings := openAIFastPolicySettingsFromContext ( ctx )
if settings == nil {
fetched , err := s . settingService . GetOpenAIFastPolicySettings ( ctx )
if err != nil || fetched == nil {
return BetaPolicyActionPass , ""
}
settings = fetched
}
return evaluateOpenAIFastPolicyWithSettings ( settings , account , model , tier )
}
// evaluateOpenAIFastPolicyWithSettings is the pure-function core extracted so
// long-lived sessions (e.g. WS) can prefetch settings once and avoid hitting
// the settingService on every frame. See WSSession entry and
// openAIFastPolicySettingsFromContext for the caching glue.
func evaluateOpenAIFastPolicyWithSettings ( settings * OpenAIFastPolicySettings , account * Account , model , tier string ) ( action , errMsg string ) {
if settings == nil {
return BetaPolicyActionPass , ""
}
isOAuth := account != nil && account . IsOAuth ( )
isBedrock := account != nil && account . IsBedrock ( )
for _ , rule := range settings . Rules {
if ! betaPolicyScopeMatches ( rule . Scope , isOAuth , isBedrock ) {
continue
}
ruleTier := strings . ToLower ( strings . TrimSpace ( rule . ServiceTier ) )
if ruleTier != "" && ruleTier != OpenAIFastTierAny && ruleTier != tier {
continue
}
eff := BetaPolicyRule {
Action : rule . Action ,
ErrorMessage : rule . ErrorMessage ,
ModelWhitelist : rule . ModelWhitelist ,
FallbackAction : rule . FallbackAction ,
FallbackErrorMessage : rule . FallbackErrorMessage ,
}
return resolveRuleAction ( eff , model )
}
return BetaPolicyActionPass , ""
}
// openAIFastPolicyCtxKey 是 context 中预取的 OpenAIFastPolicySettings 缓存
// 键,仅用于 WebSocket 长会话内多帧复用同一份策略快照,避免每帧 DB 命中。
//
// Trade-off: 策略变更不会影响当前 WS session( 只影响新 session) 。这是
// 有意为之 —— 对长会话来说,"策略一致性"比"立刻生效"更重要,且 Claude
// BetaPolicy 的 gin.Context 缓存也是同样取舍。需要 hot-reload 时管理员
// 可以通过踢断 session 强制刷新。
type openAIFastPolicyCtxKeyType struct { }
var openAIFastPolicyCtxKey = openAIFastPolicyCtxKeyType { }
// withOpenAIFastPolicyContext 将一份 settings 快照绑定到 context, 供该 ctx
// 衍生 goroutine 中的 evaluateOpenAIFastPolicy 复用。
func withOpenAIFastPolicyContext ( ctx context . Context , settings * OpenAIFastPolicySettings ) context . Context {
if ctx == nil || settings == nil {
return ctx
}
return context . WithValue ( ctx , openAIFastPolicyCtxKey , settings )
}
func openAIFastPolicySettingsFromContext ( ctx context . Context ) * OpenAIFastPolicySettings {
if ctx == nil {
return nil
}
if v , ok := ctx . Value ( openAIFastPolicyCtxKey ) . ( * OpenAIFastPolicySettings ) ; ok {
return v
}
return nil
}
// applyOpenAIFastPolicyToBody applies the OpenAI fast policy to a raw request
// body. When action=filter it removes the service_tier field; when
// action=block it returns (body, *OpenAIFastBlockedError). On pass it
// normalizes the service_tier value (e.g. client alias "fast" → "priority"),
// rewriting the body so the upstream receives a slug it recognizes.
//
// Rationale for normalize-on-pass: chat-completions / messages 入口在调用本
// 函数之前已经通过 normalizeResponsesBodyServiceTier 把 service_tier 归一化
// 到了上游可识别值; passthrough( OpenAI 自动透传) / native /responses 等
// 入口没有这一前置步骤, pass 路径下若不在此处归一化,"fast" 就会被原样
// 透传到 OpenAI 上游导致 400/拒绝。把归一化收敛到本函数,所有入口行为一致。
func ( s * OpenAIGatewayService ) applyOpenAIFastPolicyToBody ( ctx context . Context , account * Account , model string , body [ ] byte ) ( [ ] byte , error ) {
if len ( body ) == 0 {
return body , nil
}
rawTier := gjson . GetBytes ( body , "service_tier" ) . String ( )
if rawTier == "" {
return body , nil
}
normTier := normalizedOpenAIServiceTierValue ( rawTier )
if normTier == "" {
return body , nil
}
action , errMsg := s . evaluateOpenAIFastPolicy ( ctx , account , model , normTier )
switch action {
case BetaPolicyActionBlock :
msg := errMsg
if msg == "" {
msg = fmt . Sprintf ( "openai service_tier=%s is not allowed for model %s" , normTier , model )
}
return body , & OpenAIFastBlockedError { Message : msg }
case BetaPolicyActionFilter :
trimmed , err := sjson . DeleteBytes ( body , "service_tier" )
if err != nil {
return body , fmt . Errorf ( "strip service_tier from body: %w" , err )
}
return trimmed , nil
default :
// pass: 把别名( 如 "fast")写回为规范值("priority")。
if normTier == rawTier {
return body , nil
}
updated , err := sjson . SetBytes ( body , "service_tier" , normTier )
if err != nil {
return body , fmt . Errorf ( "normalize service_tier on pass: %w" , err )
}
return updated , nil
}
}
// writeOpenAIFastPolicyBlockedResponse writes a 403 JSON response for a
// request blocked by the OpenAI fast policy.
func writeOpenAIFastPolicyBlockedResponse ( c * gin . Context , err * OpenAIFastBlockedError ) {
if c == nil || err == nil {
return
}
c . JSON ( http . StatusForbidden , gin . H {
"error" : gin . H {
"type" : "permission_error" ,
"message" : err . Message ,
} ,
} )
}
// applyOpenAIFastPolicyToWSResponseCreate evaluates the OpenAI fast policy
// against a single client→upstream WebSocket frame whose top-level
// "type"=="response.create". It mirrors the HTTP-side
// applyOpenAIFastPolicyToBody contract but operates on a Realtime/Responses
// WS payload:
//
// - pass: returns frame unchanged (newBytes == frame, blocked == nil)
// - filter: returns a copy with top-level service_tier removed
// - block: returns (frame, *OpenAIFastBlockedError)
//
// Only frames whose "type" field strictly equals "response.create" are
// inspected/mutated. Any other frame type — including the empty string —
// passes through untouched. The OpenAI Realtime client-event spec requires
// "type" to be set, so an empty type is treated as a malformed frame we do
// not police; the upstream is the source of truth for rejecting it.
//
// service_tier lives at the top level of response.create — same as the
// Responses HTTP body shape (see openai_gateway_chat_completions.go:304 +
// extractOpenAIServiceTierFromBody at line 5593, and the test fixture at
// openai_ws_forwarder_ingress_session_test.go:402). We therefore only need
// to inspect / strip the top-level field; there is no nested form in the
// schema today.
//
// The caller is responsible for choosing the upstream model passed in —
// this helper does not re-derive it.
func ( s * OpenAIGatewayService ) applyOpenAIFastPolicyToWSResponseCreate (
ctx context . Context ,
account * Account ,
model string ,
frame [ ] byte ,
) ( [ ] byte , * OpenAIFastBlockedError , error ) {
if len ( frame ) == 0 {
return frame , nil , nil
}
if ! gjson . ValidBytes ( frame ) {
return frame , nil , nil
}
frameType := strings . TrimSpace ( gjson . GetBytes ( frame , "type" ) . String ( ) )
// Strict match: only response.create is policy-checked. Empty / other
// types pass through untouched so we never accidentally strip fields
// from response.cancel, conversation.item.create, or any future
// client-event the spec adds. The Realtime spec requires "type" on
// every client event, so an empty type is malformed input — let the
// upstream reject it rather than guessing at our layer.
if frameType != "response.create" {
return frame , nil , nil
}
rawTier := gjson . GetBytes ( frame , "service_tier" ) . String ( )
if rawTier == "" {
return frame , nil , nil
}
normTier := normalizedOpenAIServiceTierValue ( rawTier )
if normTier == "" {
return frame , nil , nil
}
action , errMsg := s . evaluateOpenAIFastPolicy ( ctx , account , model , normTier )
switch action {
case BetaPolicyActionBlock :
msg := errMsg
if msg == "" {
msg = fmt . Sprintf ( "openai service_tier=%s is not allowed for model %s" , normTier , model )
}
return frame , & OpenAIFastBlockedError { Message : msg } , nil
case BetaPolicyActionFilter :
trimmed , err := sjson . DeleteBytes ( frame , "service_tier" )
if err != nil {
return frame , nil , fmt . Errorf ( "strip service_tier from ws frame: %w" , err )
}
return trimmed , nil , nil
default :
return frame , nil , nil
}
}
// newOpenAIFastPolicyWSEventID returns a Realtime-style event_id for a
// server-emitted error event. Matches the loose "evt_<rand>" convention used
// by upstream Realtime servers; the exact value is not load-bearing and is
// only required for client-side log correlation. We reuse the existing
// google/uuid dependency rather than pulling a new one.
func newOpenAIFastPolicyWSEventID ( ) string {
id , err := uuid . NewRandom ( )
if err != nil {
// Extremely unlikely; fall back to a fixed prefix so the field is
// still non-empty and the schema stays self-consistent.
return "evt_openai_fast_policy"
}
// Strip dashes so it visually matches "evt_<hex>" rather than UUID v4
// canonical form, mirroring what real Realtime traces look like.
return "evt_" + strings . ReplaceAll ( id . String ( ) , "-" , "" )
}
// buildOpenAIFastPolicyBlockedWSEvent renders an OpenAI Realtime/Responses
// style "error" event payload for a request blocked by the OpenAI fast
// policy. The shape mirrors Realtime error events as observed in upstream
// traces and per the spec's server "error" event:
//
// {
// "event_id": "evt_<random>",
// "type": "error",
// "error": {
// "type": "invalid_request_error",
// "code": "policy_violation",
// "message": "..."
// }
// }
//
// event_id lets clients correlate the rejection in their logs; "code" gives
// programmatic clients a stable identifier (HTTP-side equivalent is the
// 403 permission_error JSON body).
func buildOpenAIFastPolicyBlockedWSEvent ( err * OpenAIFastBlockedError ) [ ] byte {
if err == nil {
return nil
}
eventID := newOpenAIFastPolicyWSEventID ( )
payload , mErr := json . Marshal ( map [ string ] any {
"event_id" : eventID ,
"type" : "error" ,
"error" : map [ string ] any {
"type" : "invalid_request_error" ,
"code" : "policy_violation" ,
"message" : err . Message ,
} ,
} )
if mErr != nil {
// Fallback to a minimal hand-rolled payload; Marshal of the literal
// shape above should never fail in practice.
return [ ] byte ( ` { "event_id":" ` + eventID + ` ","type":"error","error": { "type":"invalid_request_error","code":"policy_violation","message":"openai fast policy blocked this request"}} ` )
}
return payload
}
2026-04-01 00:46:38 +08:00
func sanitizeEmptyBase64InputImagesInOpenAIBody ( body [ ] byte ) ( [ ] byte , bool , error ) {
if len ( body ) == 0 || ! bytes . Contains ( body , [ ] byte ( ` "image_url" ` ) ) || ! bytes . Contains ( body , [ ] byte ( ` base64, ` ) ) {
return body , false , nil
}
var reqBody map [ string ] any
if err := json . Unmarshal ( body , & reqBody ) ; err != nil {
return body , false , fmt . Errorf ( "sanitize request body: %w" , err )
}
if ! sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap ( reqBody ) {
return body , false , nil
}
normalized , err := json . Marshal ( reqBody )
if err != nil {
return body , false , fmt . Errorf ( "serialize sanitized request body: %w" , err )
}
return normalized , true , nil
}
func sanitizeEmptyBase64InputImagesInOpenAIRequestBodyMap ( reqBody map [ string ] any ) bool {
if reqBody == nil {
return false
}
input , ok := reqBody [ "input" ]
if ! ok {
return false
}
normalizedInput , changed := sanitizeEmptyBase64InputImagesInOpenAIInput ( input )
if ! changed {
return false
}
reqBody [ "input" ] = normalizedInput
return true
}
func sanitizeEmptyBase64InputImagesInOpenAIInput ( input any ) ( any , bool ) {
items , ok := input . ( [ ] any )
if ! ok {
return input , false
}
normalizedItems := make ( [ ] any , 0 , len ( items ) )
changed := false
for _ , item := range items {
itemMap , ok := item . ( map [ string ] any )
if ! ok {
normalizedItems = append ( normalizedItems , item )
continue
}
if shouldDropEmptyBase64InputImagePart ( itemMap ) {
changed = true
continue
}
content , ok := itemMap [ "content" ]
if ! ok {
normalizedItems = append ( normalizedItems , itemMap )
continue
}
parts , ok := content . ( [ ] any )
if ! ok {
normalizedItems = append ( normalizedItems , itemMap )
continue
}
normalizedParts := make ( [ ] any , 0 , len ( parts ) )
itemChanged := false
for _ , part := range parts {
if shouldDropEmptyBase64InputImagePart ( part ) {
changed = true
itemChanged = true
continue
}
normalizedParts = append ( normalizedParts , part )
}
if itemChanged {
if len ( normalizedParts ) == 0 {
continue
}
itemMap [ "content" ] = normalizedParts
}
normalizedItems = append ( normalizedItems , itemMap )
}
if ! changed {
return input , false
}
return normalizedItems , true
}
func shouldDropEmptyBase64InputImagePart ( part any ) bool {
partMap , ok := part . ( map [ string ] any )
if ! ok {
return false
}
typeValue , _ := partMap [ "type" ] . ( string )
if strings . TrimSpace ( typeValue ) != "input_image" {
return false
}
imageURL , _ := partMap [ "image_url" ] . ( string )
return isEmptyBase64DataURI ( imageURL )
}
func isEmptyBase64DataURI ( raw string ) bool {
if ! strings . HasPrefix ( raw , "data:" ) {
return false
}
rest := strings . TrimPrefix ( raw , "data:" )
semicolonIdx := strings . Index ( rest , ";" )
if semicolonIdx < 0 {
return false
}
rest = rest [ semicolonIdx + 1 : ]
if ! strings . HasPrefix ( rest , "base64," ) {
return false
}
return strings . TrimSpace ( strings . TrimPrefix ( rest , "base64," ) ) == ""
}
2026-02-12 09:41:37 +08:00
func getOpenAIRequestBodyMap ( c * gin . Context , body [ ] byte ) ( map [ string ] any , error ) {
if c != nil {
if cached , ok := c . Get ( OpenAIParsedRequestBodyKey ) ; ok {
if reqBody , ok := cached . ( map [ string ] any ) ; ok && reqBody != nil {
return reqBody , nil
}
}
}
var reqBody map [ string ] any
if err := json . Unmarshal ( body , & reqBody ) ; err != nil {
return nil , fmt . Errorf ( "parse request: %w" , err )
}
2026-02-28 15:01:20 +08:00
if c != nil {
c . Set ( OpenAIParsedRequestBodyKey , reqBody )
}
2026-02-12 09:41:37 +08:00
return reqBody , nil
}
2026-02-03 14:36:29 +08:00
func extractOpenAIReasoningEffort ( reqBody map [ string ] any , requestedModel string ) * string {
if value , present := getOpenAIReasoningEffortFromReqBody ( reqBody ) ; present {
if value == "" {
return nil
}
return & value
}
value := deriveOpenAIReasoningEffortFromModel ( requestedModel )
if value == "" {
return nil
}
return & value
}
func normalizeOpenAIReasoningEffort ( raw string ) string {
value := strings . ToLower ( strings . TrimSpace ( raw ) )
if value == "" {
return ""
}
// Normalize separators for "x-high"/"x_high" variants.
value = strings . NewReplacer ( "-" , "" , "_" , "" , " " , "" ) . Replace ( value )
switch value {
case "none" , "minimal" :
return ""
case "low" , "medium" , "high" :
return value
case "xhigh" , "extrahigh" :
return "xhigh"
default :
// Only store known effort levels for now to keep UI consistent.
return ""
}
}