backend/internal/handler/gateway_handler.go

package handler

import (
	"context"
	"crypto/rand"
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"strconv"
	"strings"
	"sync/atomic"
	"time"

	"github.com/Wei-Shaw/sub2api/internal/config"
	"github.com/Wei-Shaw/sub2api/internal/domain"
	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
	pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
	pkghttputil "github.com/Wei-Shaw/sub2api/internal/pkg/httputil"
	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
	"github.com/Wei-Shaw/sub2api/internal/pkg/timezone"
	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
	"github.com/Wei-Shaw/sub2api/internal/service"

	"github.com/gin-gonic/gin"
	"go.uber.org/zap"
)

const gatewayCompatibilityMetricsLogInterval = 1024

var gatewayCompatibilityMetricsLogCounter atomic.Uint64

// GatewayHandler handles API gateway requests
type GatewayHandler struct {
	gatewayService            *service.GatewayService
	geminiCompatService       *service.GeminiMessagesCompatService
	antigravityGatewayService *service.AntigravityGatewayService
	userService               *service.UserService
	billingCacheService       *service.BillingCacheService
	usageService              *service.UsageService
	apiKeyService             *service.APIKeyService
	usageRecordWorkerPool     *service.UsageRecordWorkerPool
	errorPassthroughService   *service.ErrorPassthroughService
	concurrencyHelper         *ConcurrencyHelper
	userMsgQueueHelper        *UserMsgQueueHelper
	maxAccountSwitches        int
	maxAccountSwitchesGemini  int
	cfg                       *config.Config
	settingService            *service.SettingService
}

// NewGatewayHandler creates a new GatewayHandler
func NewGatewayHandler(
	gatewayService *service.GatewayService,
	geminiCompatService *service.GeminiMessagesCompatService,
	antigravityGatewayService *service.AntigravityGatewayService,
	userService *service.UserService,
	concurrencyService *service.ConcurrencyService,
	billingCacheService *service.BillingCacheService,
	usageService *service.UsageService,
	apiKeyService *service.APIKeyService,
	usageRecordWorkerPool *service.UsageRecordWorkerPool,
	errorPassthroughService *service.ErrorPassthroughService,
	userMsgQueueService *service.UserMessageQueueService,
	cfg *config.Config,
	settingService *service.SettingService,
) *GatewayHandler {
	pingInterval := time.Duration(0)
	maxAccountSwitches := 10
	maxAccountSwitchesGemini := 3
	if cfg != nil {
		pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
		if cfg.Gateway.MaxAccountSwitches > 0 {
			maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
		}
		if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
			maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
		}
	}

	// 初始化用户消息串行队列 helper
	var umqHelper *UserMsgQueueHelper
	if userMsgQueueService != nil && cfg != nil {
		umqHelper = NewUserMsgQueueHelper(userMsgQueueService, SSEPingFormatClaude, pingInterval)
	}

	return &GatewayHandler{
		gatewayService:            gatewayService,
		geminiCompatService:       geminiCompatService,
		antigravityGatewayService: antigravityGatewayService,
		userService:               userService,
		billingCacheService:       billingCacheService,
		usageService:              usageService,
		apiKeyService:             apiKeyService,
		usageRecordWorkerPool:     usageRecordWorkerPool,
		errorPassthroughService:   errorPassthroughService,
		concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
		userMsgQueueHelper:        umqHelper,
		maxAccountSwitches:        maxAccountSwitches,
		maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
		cfg:                       cfg,
		settingService:            settingService,
	}
}

// Messages handles Claude API compatible messages endpoint
// POST /v1/messages
func (h *GatewayHandler) Messages(c *gin.Context) {
	// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	subject, ok := middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
		return
	}
	reqLog := requestLogger(
		c,
		"handler.gateway.messages",
		zap.Int64("user_id", subject.UserID),
		zap.Int64("api_key_id", apiKey.ID),
		zap.Any("group_id", apiKey.GroupID),
	)
	defer h.maybeLogCompatibilityFallbackMetrics(reqLog)

	// 读取请求体
	body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request)
	if err != nil {
		if maxErr, ok := extractMaxBytesError(err); ok {
			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
			return
		}
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
		return
	}

	if len(body) == 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
		return
	}

	setOpsRequestContext(c, "", false, body)

	parsedReq, err := service.ParseGatewayRequest(body, domain.PlatformAnthropic)
	if err != nil {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
		return
	}
	reqModel := parsedReq.Model
	reqStream := parsedReq.Stream
	reqLog = reqLog.With(zap.String("model", reqModel), zap.Bool("stream", reqStream))

	// 设置 max_tokens=1 + haiku 探测请求标识到 context 中
	// 必须在 SetClaudeCodeClientContext 之前设置，因为 ClaudeCodeValidator 需要读取此标识进行绕过判断
	if isMaxTokensOneHaikuRequest(reqModel, parsedReq.MaxTokens, reqStream) {
		ctx := service.WithIsMaxTokensOneHaikuRequest(c.Request.Context(), true, h.metadataBridgeEnabled())
		c.Request = c.Request.WithContext(ctx)
	}

	// 检查是否为 Claude Code 客户端，设置到 context 中（复用已解析请求，避免二次反序列化）。
	SetClaudeCodeClientContext(c, body, parsedReq)
	isClaudeCodeClient := service.IsClaudeCodeClient(c.Request.Context())

	// 版本检查：仅对 Claude Code 客户端，拒绝低于最低版本的请求
	if !h.checkClaudeCodeVersion(c) {
		return
	}

	// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
	c.Request = c.Request.WithContext(service.WithThinkingEnabled(c.Request.Context(), parsedReq.ThinkingEnabled, h.metadataBridgeEnabled()))

	setOpsRequestContext(c, reqModel, reqStream, body)

	// 验证 model 必填
	if reqModel == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
		return
	}

	// Track if we've started streaming (for error handling)
	streamStarted := false

	// 绑定错误透传服务，允许 service 层在非 failover 错误场景复用规则。
	if h.errorPassthroughService != nil {
		service.BindErrorPassthroughService(c, h.errorPassthroughService)
	}

	// 获取订阅信息（可能为nil）- 提前获取用于后续检查
	subscription, _ := middleware2.GetSubscriptionFromContext(c)

	// 0. 检查wait队列是否已满
	maxWait := service.CalculateMaxWait(subject.Concurrency)
	canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
	waitCounted := false
	if err != nil {
		reqLog.Warn("gateway.user_wait_counter_increment_failed", zap.Error(err))
		// On error, allow request to proceed
	} else if !canWait {
		reqLog.Info("gateway.user_wait_queue_full", zap.Int("max_wait", maxWait))
		h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
		return
	}
	if err == nil && canWait {
		waitCounted = true
	}
	// Ensure we decrement if we exit before acquiring the user slot.
	defer func() {
		if waitCounted {
			h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		}
	}()

	// 1. 首先获取用户并发槽位
	userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
	if err != nil {
		reqLog.Warn("gateway.user_slot_acquire_failed", zap.Error(err))
		h.handleConcurrencyError(c, err, "user", streamStarted)
		return
	}
	// User slot acquired: no longer waiting in the queue.
	if waitCounted {
		h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		waitCounted = false
	}
	// 在请求结束或 Context 取消时确保释放槽位，避免客户端断开造成泄漏
	userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
	if userReleaseFunc != nil {
		defer userReleaseFunc()
	}

	// 2. 【新增】Wait后二次检查余额/订阅
	if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
		reqLog.Info("gateway.billing_eligibility_check_failed", zap.Error(err))
		status, code, message := billingErrorDetails(err)
		h.handleStreamingAwareError(c, status, code, message, streamStarted)
		return
	}

	// 计算粘性会话hash
	parsedReq.SessionContext = &service.SessionContext{
		ClientIP:  ip.GetClientIP(c),
		UserAgent: c.GetHeader("User-Agent"),
		APIKeyID:  apiKey.ID,
	}
	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

	// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
	platform := ""
	if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
		platform = forcePlatform
	} else if apiKey.Group != nil {
		platform = apiKey.Group.Platform
	}
	sessionKey := sessionHash
	if platform == service.PlatformGemini && sessionHash != "" {
		sessionKey = "gemini:" + sessionHash
	}

	// 查询粘性会话绑定的账号 ID
	var sessionBoundAccountID int64
	if sessionKey != "" {
		sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
		if sessionBoundAccountID > 0 {
			prefetchedGroupID := int64(0)
			if apiKey.GroupID != nil {
				prefetchedGroupID = *apiKey.GroupID
			}
			ctx := service.WithPrefetchedStickySession(c.Request.Context(), sessionBoundAccountID, prefetchedGroupID, h.metadataBridgeEnabled())
			c.Request = c.Request.WithContext(ctx)
		}
	}
	// 判断是否真的绑定了粘性会话：有 sessionKey 且已经绑定到某个账号
	hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0

	if platform == service.PlatformGemini {
		fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession)

		// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
		// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
		if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
			ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
			c.Request = c.Request.WithContext(ctx)
		}

		for {
			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "") // Gemini 不使用会话限制
			if err != nil {
				if len(fs.FailedAccountIDs) == 0 {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
					return
				}
				action := fs.HandleSelectionExhausted(c.Request.Context())
				switch action {
				case FailoverContinue:
					ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
					c.Request = c.Request.WithContext(ctx)
					continue
				case FailoverCanceled:
					return
				default: // FailoverExhausted
					if fs.LastFailoverErr != nil {
						h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
					} else {
						h.handleFailoverExhaustedSimple(c, 502, streamStarted)
					}
					return
				}
			}
			account := selection.Account
			setOpsSelectedAccount(c, account.ID, account.Platform)

			// 检查请求拦截（预热请求、SUGGESTION MODE等）
			if account.IsInterceptWarmupEnabled() {
				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
				if interceptType != InterceptTypeNone {
					if selection.Acquired && selection.ReleaseFunc != nil {
						selection.ReleaseFunc()
					}
					if reqStream {
						sendMockInterceptStream(c, reqModel, interceptType)
					} else {
						sendMockInterceptResponse(c, reqModel, interceptType)
					}
					return
				}
			}

			// 3. 获取账号并发槽位
			accountReleaseFunc := selection.ReleaseFunc
			if !selection.Acquired {
				if selection.WaitPlan == nil {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
					return
				}
				accountWaitCounted := false
				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
				if err != nil {
					reqLog.Warn("gateway.account_wait_counter_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				} else if !canWait {
					reqLog.Info("gateway.account_wait_queue_full",
						zap.Int64("account_id", account.ID),
						zap.Int("max_waiting", selection.WaitPlan.MaxWaiting),
					)
					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
					return
				}
				if err == nil && canWait {
					accountWaitCounted = true
				}
				releaseWait := func() {
					if accountWaitCounted {
						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
						accountWaitCounted = false
					}
				}

				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
					c,
					account.ID,
					selection.WaitPlan.MaxConcurrency,
					selection.WaitPlan.Timeout,
					reqStream,
					&streamStarted,
				)
				if err != nil {
					reqLog.Warn("gateway.account_slot_acquire_failed", zap.Int64("account_id", account.ID), zap.Error(err))
					releaseWait()
					h.handleConcurrencyError(c, err, "account", streamStarted)
					return
				}
				// Slot acquired: no longer waiting in queue.
				releaseWait()
				if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionKey, account.ID); err != nil {
					reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				}
			}
			// 账号槽位/等待计数需要在超时或断开时安全回收
			accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)

			// 转发请求 - 根据账号平台分流
			var result *service.ForwardResult
			requestCtx := c.Request.Context()
			if fs.SwitchCount > 0 {
				requestCtx = service.WithAccountSwitchCount(requestCtx, fs.SwitchCount, h.metadataBridgeEnabled())
			}
			if account.Platform == service.PlatformAntigravity {
				result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body, hasBoundSession)
			} else {
				result, err = h.geminiCompatService.Forward(requestCtx, c, account, body)
			}
			if accountReleaseFunc != nil {
				accountReleaseFunc()
			}
			if err != nil {
				var failoverErr *service.UpstreamFailoverError
				if errors.As(err, &failoverErr) {
					action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
					switch action {
					case FailoverContinue:
						continue
					case FailoverExhausted:
						h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
						return
					case FailoverCanceled:
						return
					}
				}
				wroteFallback := h.ensureForwardErrorResponse(c, streamStarted)
				reqLog.Error("gateway.forward_failed",
					zap.Int64("account_id", account.ID),
					zap.Bool("fallback_error_response_written", wroteFallback),
					zap.Error(err),
				)
				return
			}

			// RPM 计数递增（Forward 成功后）
			// 注意：TOCTOU 竞态是已知且可接受的设计权衡，与 WindowCost 一致的 soft-limit 模式。
			// 在高并发下可能短暂超出 RPM 限制，但不会导致请求失败。
			if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
				if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
					reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				}
			}

			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
			userAgent := c.GetHeader("User-Agent")
			clientIP := ip.GetClientIP(c)

			// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
			h.submitUsageRecordTask(func(ctx context.Context) {
				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
					Result:            result,
					ParsedRequest:     parsedReq,
					APIKey:            apiKey,
					User:              apiKey.User,
					Account:           account,
					Subscription:      subscription,
					UserAgent:         userAgent,
					IPAddress:         clientIP,
					ForceCacheBilling: fs.ForceCacheBilling,
					APIKeyService:     h.apiKeyService,
				}); err != nil {
					logger.L().With(
						zap.String("component", "handler.gateway.messages"),
						zap.Int64("user_id", subject.UserID),
						zap.Int64("api_key_id", apiKey.ID),
						zap.Any("group_id", apiKey.GroupID),
						zap.String("model", reqModel),
						zap.Int64("account_id", account.ID),
					).Error("gateway.record_usage_failed", zap.Error(err))
				}
			})
			return
		}
	}

	currentAPIKey := apiKey
	currentSubscription := subscription
	var fallbackGroupID *int64
	if apiKey.Group != nil {
		fallbackGroupID = apiKey.Group.FallbackGroupIDOnInvalidRequest
	}
	fallbackUsed := false

	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
		ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
		c.Request = c.Request.WithContext(ctx)
	}

	for {
		fs := NewFailoverState(h.maxAccountSwitches, hasBoundSession)
		retryWithFallback := false

		for {
			// 选择支持该模型的账号
			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID)
			if err != nil {
				if len(fs.FailedAccountIDs) == 0 {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
					return
				}
				action := fs.HandleSelectionExhausted(c.Request.Context())
				switch action {
				case FailoverContinue:
					ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
					c.Request = c.Request.WithContext(ctx)
					continue
				case FailoverCanceled:
					return
				default: // FailoverExhausted
					if fs.LastFailoverErr != nil {
						h.handleFailoverExhausted(c, fs.LastFailoverErr, platform, streamStarted)
					} else {
						h.handleFailoverExhaustedSimple(c, 502, streamStarted)
					}
					return
				}
			}
			account := selection.Account
			setOpsSelectedAccount(c, account.ID, account.Platform)

			// 检查请求拦截（预热请求、SUGGESTION MODE等）
			if account.IsInterceptWarmupEnabled() {
				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
				if interceptType != InterceptTypeNone {
					if selection.Acquired && selection.ReleaseFunc != nil {
						selection.ReleaseFunc()
					}
					if reqStream {
						sendMockInterceptStream(c, reqModel, interceptType)
					} else {
						sendMockInterceptResponse(c, reqModel, interceptType)
					}
					return
				}
			}

			// 3. 获取账号并发槽位
			accountReleaseFunc := selection.ReleaseFunc
			if !selection.Acquired {
				if selection.WaitPlan == nil {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
					return
				}
				accountWaitCounted := false
				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
				if err != nil {
					reqLog.Warn("gateway.account_wait_counter_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				} else if !canWait {
					reqLog.Info("gateway.account_wait_queue_full",
						zap.Int64("account_id", account.ID),
						zap.Int("max_waiting", selection.WaitPlan.MaxWaiting),
					)
					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
					return
				}
				if err == nil && canWait {
					accountWaitCounted = true
				}
				releaseWait := func() {
					if accountWaitCounted {
						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
						accountWaitCounted = false
					}
				}

				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
					c,
					account.ID,
					selection.WaitPlan.MaxConcurrency,
					selection.WaitPlan.Timeout,
					reqStream,
					&streamStarted,
				)
				if err != nil {
					reqLog.Warn("gateway.account_slot_acquire_failed", zap.Int64("account_id", account.ID), zap.Error(err))
					releaseWait()
					h.handleConcurrencyError(c, err, "account", streamStarted)
					return
				}
				// Slot acquired: no longer waiting in queue.
				releaseWait()
				if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
					reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				}
			}
			// 账号槽位/等待计数需要在超时或断开时安全回收
			accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)

			// ===== 用户消息串行队列 START =====
			var queueRelease func()
			umqMode := h.getUserMsgQueueMode(account, parsedReq)

			switch umqMode {
			case config.UMQModeSerialize:
				// 串行模式：获取锁 + RPM 延迟 + 释放（当前行为不变）
				baseRPM := account.GetBaseRPM()
				release, qErr := h.userMsgQueueHelper.AcquireWithWait(
					c, account.ID, baseRPM, reqStream, &streamStarted,
					h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
					reqLog,
				)
				if qErr != nil {
					// fail-open: 记录 warn，不阻止请求
					reqLog.Warn("gateway.umq_acquire_failed",
						zap.Int64("account_id", account.ID),
						zap.Error(qErr),
					)
				} else {
					queueRelease = release
				}

			case config.UMQModeThrottle:
				// 软性限速：仅施加 RPM 自适应延迟，不阻塞并发
				baseRPM := account.GetBaseRPM()
				if tErr := h.userMsgQueueHelper.ThrottleWithPing(
					c, account.ID, baseRPM, reqStream, &streamStarted,
					h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
					reqLog,
				); tErr != nil {
					reqLog.Warn("gateway.umq_throttle_failed",
						zap.Int64("account_id", account.ID),
						zap.Error(tErr),
					)
				}

			default:
				if umqMode != "" {
					reqLog.Warn("gateway.umq_unknown_mode",
						zap.String("mode", umqMode),
						zap.Int64("account_id", account.ID),
					)
				}
			}

			// 用 wrapReleaseOnDone 确保 context 取消时自动释放（仅 serialize 模式有 queueRelease）
			queueRelease = wrapReleaseOnDone(c.Request.Context(), queueRelease)
			// 注入回调到 ParsedRequest：使用外层 wrapper 以便提前清理 AfterFunc
			parsedReq.OnUpstreamAccepted = queueRelease
			// ===== 用户消息串行队列 END =====

			// 转发请求 - 根据账号平台分流
			c.Set("parsed_request", parsedReq)
			var result *service.ForwardResult
			requestCtx := c.Request.Context()
			if fs.SwitchCount > 0 {
				requestCtx = service.WithAccountSwitchCount(requestCtx, fs.SwitchCount, h.metadataBridgeEnabled())
			}
			if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
				result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body, hasBoundSession)
			} else {
				result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
			}

			// 兜底释放串行锁（正常情况已通过回调提前释放）
			if queueRelease != nil {
				queueRelease()
			}
			// 清理回调引用，防止 failover 重试时旧回调被错误调用
			parsedReq.OnUpstreamAccepted = nil

			if accountReleaseFunc != nil {
				accountReleaseFunc()
			}
			if err != nil {
				// Beta policy block: return 400 immediately, no failover
				var betaBlockedErr *service.BetaBlockedError
				if errors.As(err, &betaBlockedErr) {
					h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", betaBlockedErr.Message)
					return
				}

				var promptTooLongErr *service.PromptTooLongError
				if errors.As(err, &promptTooLongErr) {
					reqLog.Warn("gateway.prompt_too_long_from_antigravity",
						zap.Any("current_group_id", currentAPIKey.GroupID),
						zap.Any("fallback_group_id", fallbackGroupID),
						zap.Bool("fallback_used", fallbackUsed),
					)
					if !fallbackUsed && fallbackGroupID != nil && *fallbackGroupID > 0 {
						fallbackGroup, err := h.gatewayService.ResolveGroupByID(c.Request.Context(), *fallbackGroupID)
						if err != nil {
							reqLog.Warn("gateway.resolve_fallback_group_failed", zap.Int64("fallback_group_id", *fallbackGroupID), zap.Error(err))
							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
							return
						}
						if fallbackGroup.Platform != service.PlatformAnthropic ||
							fallbackGroup.SubscriptionType == service.SubscriptionTypeSubscription ||
							fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
							reqLog.Warn("gateway.fallback_group_invalid",
								zap.Int64("fallback_group_id", fallbackGroup.ID),
								zap.String("fallback_platform", fallbackGroup.Platform),
								zap.String("fallback_subscription_type", fallbackGroup.SubscriptionType),
							)
							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
							return
						}
						fallbackAPIKey := cloneAPIKeyWithGroup(apiKey, fallbackGroup)
						if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), fallbackAPIKey.User, fallbackAPIKey, fallbackGroup, nil); err != nil {
							status, code, message := billingErrorDetails(err)
							h.handleStreamingAwareError(c, status, code, message, streamStarted)
							return
						}
						// 兜底重试按"直接请求兜底分组"处理：清除强制平台，允许按分组平台调度
						ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
						c.Request = c.Request.WithContext(ctx)
						currentAPIKey = fallbackAPIKey
						currentSubscription = nil
						fallbackUsed = true
						retryWithFallback = true
						break
					}
					_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
					return
				}
				var failoverErr *service.UpstreamFailoverError
				if errors.As(err, &failoverErr) {
					action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
					switch action {
					case FailoverContinue:
						continue
					case FailoverExhausted:
						h.handleFailoverExhausted(c, fs.LastFailoverErr, account.Platform, streamStarted)
						return
					case FailoverCanceled:
						return
					}
				}
				wroteFallback := h.ensureForwardErrorResponse(c, streamStarted)
				reqLog.Error("gateway.forward_failed",
					zap.Int64("account_id", account.ID),
					zap.Bool("fallback_error_response_written", wroteFallback),
					zap.Error(err),
				)
				return
			}

			// RPM 计数递增（Forward 成功后）
			// 注意：TOCTOU 竞态是已知且可接受的设计权衡，与 WindowCost 一致的 soft-limit 模式。
			// 在高并发下可能短暂超出 RPM 限制，但不会导致请求失败。
			if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
				if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
					reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
				}
			}

			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
			userAgent := c.GetHeader("User-Agent")
			clientIP := ip.GetClientIP(c)

			// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
			h.submitUsageRecordTask(func(ctx context.Context) {
				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
					Result:            result,
					ParsedRequest:     parsedReq,
					APIKey:            currentAPIKey,
					User:              currentAPIKey.User,
					Account:           account,
					Subscription:      currentSubscription,
					UserAgent:         userAgent,
					IPAddress:         clientIP,
					ForceCacheBilling: fs.ForceCacheBilling,
					APIKeyService:     h.apiKeyService,
				}); err != nil {
					logger.L().With(
						zap.String("component", "handler.gateway.messages"),
						zap.Int64("user_id", subject.UserID),
						zap.Int64("api_key_id", currentAPIKey.ID),
						zap.Any("group_id", currentAPIKey.GroupID),
						zap.String("model", reqModel),
						zap.Int64("account_id", account.ID),
					).Error("gateway.record_usage_failed", zap.Error(err))
				}
			})
			return
		}
		if !retryWithFallback {
			return
		}
	}
}

// Models handles listing available models
// GET /v1/models
// Returns models based on account configurations (model_mapping whitelist)
// Falls back to default models if no whitelist is configured
func (h *GatewayHandler) Models(c *gin.Context) {
	apiKey, _ := middleware2.GetAPIKeyFromContext(c)

	var groupID *int64
	var platform string

	if apiKey != nil && apiKey.Group != nil {
		groupID = &apiKey.Group.ID
		platform = apiKey.Group.Platform
	}
	if forcedPlatform, ok := middleware2.GetForcePlatformFromContext(c); ok && strings.TrimSpace(forcedPlatform) != "" {
		platform = forcedPlatform
	}

	if platform == service.PlatformSora {
		c.JSON(http.StatusOK, gin.H{
			"object": "list",
			"data":   service.DefaultSoraModels(h.cfg),
		})
		return
	}

	// Get available models from account configurations (without platform filter)
	availableModels := h.gatewayService.GetAvailableModels(c.Request.Context(), groupID, "")

	if len(availableModels) > 0 {
		// Build model list from whitelist
		models := make([]claude.Model, 0, len(availableModels))
		for _, modelID := range availableModels {
			models = append(models, claude.Model{
				ID:          modelID,
				Type:        "model",
				DisplayName: modelID,
				CreatedAt:   "2024-01-01T00:00:00Z",
			})
		}
		c.JSON(http.StatusOK, gin.H{
			"object": "list",
			"data":   models,
		})
		return
	}

	// Fallback to default models
	if platform == "openai" {
		c.JSON(http.StatusOK, gin.H{
			"object": "list",
			"data":   openai.DefaultModels,
		})
		return
	}

	c.JSON(http.StatusOK, gin.H{
		"object": "list",
		"data":   claude.DefaultModels,
	})
}

// AntigravityModels 返回 Antigravity 支持的全部模型
// GET /antigravity/models
func (h *GatewayHandler) AntigravityModels(c *gin.Context) {
	c.JSON(http.StatusOK, gin.H{
		"object": "list",
		"data":   antigravity.DefaultModels(),
	})
}

func cloneAPIKeyWithGroup(apiKey *service.APIKey, group *service.Group) *service.APIKey {
	if apiKey == nil || group == nil {
		return apiKey
	}
	cloned := *apiKey
	groupID := group.ID
	cloned.GroupID = &groupID
	cloned.Group = group
	return &cloned
}

// Usage handles getting account balance and usage statistics for CC Switch integration
// GET /v1/usage
//
// Two modes:
//   - quota_limited: API Key has quota or rate limits configured. Returns key-level limits/usage.
//   - unrestricted:  No key-level limits. Returns subscription or wallet balance info.
func (h *GatewayHandler) Usage(c *gin.Context) {
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	subject, ok := middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	ctx := c.Request.Context()

	// 解析可选的日期范围参数（用于 model_stats 查询）
	startTime, endTime := h.parseUsageDateRange(c)

	// Best-effort: 获取用量统计（按当前 API Key 过滤），失败不影响基础响应
	usageData := h.buildUsageData(ctx, apiKey.ID)

	// Best-effort: 获取模型统计
	var modelStats any
	if h.usageService != nil {
		if stats, err := h.usageService.GetAPIKeyModelStats(ctx, apiKey.ID, startTime, endTime); err == nil && len(stats) > 0 {
			modelStats = stats
		}
	}

	// 判断模式: key 有总额度或速率限制 → quota_limited，否则 → unrestricted
	isQuotaLimited := apiKey.Quota > 0 || apiKey.HasRateLimits()

	if isQuotaLimited {
		h.usageQuotaLimited(c, ctx, apiKey, usageData, modelStats)
		return
	}

	h.usageUnrestricted(c, ctx, apiKey, subject, usageData, modelStats)
}

// parseUsageDateRange 解析 start_date / end_date query params，默认返回近 30 天范围
func (h *GatewayHandler) parseUsageDateRange(c *gin.Context) (time.Time, time.Time) {
	now := timezone.Now()
	endTime := now
	startTime := now.AddDate(0, 0, -30)

	if s := c.Query("start_date"); s != "" {
		if t, err := timezone.ParseInLocation("2006-01-02", s); err == nil {
			startTime = t
		}
	}
	if s := c.Query("end_date"); s != "" {
		if t, err := timezone.ParseInLocation("2006-01-02", s); err == nil {
			endTime = t.Add(24*time.Hour - time.Second) // end of day
		}
	}
	return startTime, endTime
}

// buildUsageData 构建 today/total 用量摘要
func (h *GatewayHandler) buildUsageData(ctx context.Context, apiKeyID int64) gin.H {
	if h.usageService == nil {
		return nil
	}
	dashStats, err := h.usageService.GetAPIKeyDashboardStats(ctx, apiKeyID)
	if err != nil || dashStats == nil {
		return nil
	}
	return gin.H{
		"today": gin.H{
			"requests":              dashStats.TodayRequests,
			"input_tokens":          dashStats.TodayInputTokens,
			"output_tokens":         dashStats.TodayOutputTokens,
			"cache_creation_tokens": dashStats.TodayCacheCreationTokens,
			"cache_read_tokens":     dashStats.TodayCacheReadTokens,
			"total_tokens":          dashStats.TodayTokens,
			"cost":                  dashStats.TodayCost,
			"actual_cost":           dashStats.TodayActualCost,
		},
		"total": gin.H{
			"requests":              dashStats.TotalRequests,
			"input_tokens":          dashStats.TotalInputTokens,
			"output_tokens":         dashStats.TotalOutputTokens,
			"cache_creation_tokens": dashStats.TotalCacheCreationTokens,
			"cache_read_tokens":     dashStats.TotalCacheReadTokens,
			"total_tokens":          dashStats.TotalTokens,
			"cost":                  dashStats.TotalCost,
			"actual_cost":           dashStats.TotalActualCost,
		},
		"average_duration_ms": dashStats.AverageDurationMs,
		"rpm":                 dashStats.Rpm,
		"tpm":                 dashStats.Tpm,
	}
}

// usageQuotaLimited 处理 quota_limited 模式的响应
func (h *GatewayHandler) usageQuotaLimited(c *gin.Context, ctx context.Context, apiKey *service.APIKey, usageData gin.H, modelStats any) {
	resp := gin.H{
		"mode":    "quota_limited",
		"isValid": apiKey.Status == service.StatusAPIKeyActive || apiKey.Status == service.StatusAPIKeyQuotaExhausted || apiKey.Status == service.StatusAPIKeyExpired,
		"status":  apiKey.Status,
	}

	// 总额度信息
	if apiKey.Quota > 0 {
		remaining := apiKey.GetQuotaRemaining()
		resp["quota"] = gin.H{
			"limit":     apiKey.Quota,
			"used":      apiKey.QuotaUsed,
			"remaining": remaining,
			"unit":      "USD",
		}
		resp["remaining"] = remaining
		resp["unit"] = "USD"
	}

	// 速率限制信息（从 DB 获取实时用量）
	if apiKey.HasRateLimits() && h.apiKeyService != nil {
		rateLimitData, err := h.apiKeyService.GetRateLimitData(ctx, apiKey.ID)
		if err == nil && rateLimitData != nil {
			var rateLimits []gin.H
			if apiKey.RateLimit5h > 0 {
				used := rateLimitData.EffectiveUsage5h()
				entry := gin.H{
					"window":       "5h",
					"limit":        apiKey.RateLimit5h,
					"used":         used,
					"remaining":    max(0, apiKey.RateLimit5h-used),
					"window_start": rateLimitData.Window5hStart,
				}
				if rateLimitData.Window5hStart != nil && !service.IsWindowExpired(rateLimitData.Window5hStart, service.RateLimitWindow5h) {
					entry["reset_at"] = rateLimitData.Window5hStart.Add(service.RateLimitWindow5h)
				}
				rateLimits = append(rateLimits, entry)
			}
			if apiKey.RateLimit1d > 0 {
				used := rateLimitData.EffectiveUsage1d()
				entry := gin.H{
					"window":       "1d",
					"limit":        apiKey.RateLimit1d,
					"used":         used,
					"remaining":    max(0, apiKey.RateLimit1d-used),
					"window_start": rateLimitData.Window1dStart,
				}
				if rateLimitData.Window1dStart != nil && !service.IsWindowExpired(rateLimitData.Window1dStart, service.RateLimitWindow1d) {
					entry["reset_at"] = rateLimitData.Window1dStart.Add(service.RateLimitWindow1d)
				}
				rateLimits = append(rateLimits, entry)
			}
			if apiKey.RateLimit7d > 0 {
				used := rateLimitData.EffectiveUsage7d()
				entry := gin.H{
					"window":       "7d",
					"limit":        apiKey.RateLimit7d,
					"used":         used,
					"remaining":    max(0, apiKey.RateLimit7d-used),
					"window_start": rateLimitData.Window7dStart,
				}
				if rateLimitData.Window7dStart != nil && !service.IsWindowExpired(rateLimitData.Window7dStart, service.RateLimitWindow7d) {
					entry["reset_at"] = rateLimitData.Window7dStart.Add(service.RateLimitWindow7d)
				}
				rateLimits = append(rateLimits, entry)
			}
			if len(rateLimits) > 0 {
				resp["rate_limits"] = rateLimits
			}
		}
	}

	// 过期时间
	if apiKey.ExpiresAt != nil {
		resp["expires_at"] = apiKey.ExpiresAt
		resp["days_until_expiry"] = apiKey.GetDaysUntilExpiry()
	}

	if usageData != nil {
		resp["usage"] = usageData
	}
	if modelStats != nil {
		resp["model_stats"] = modelStats
	}

	c.JSON(http.StatusOK, resp)
}

// usageUnrestricted 处理 unrestricted 模式的响应（向后兼容）
func (h *GatewayHandler) usageUnrestricted(c *gin.Context, ctx context.Context, apiKey *service.APIKey, subject middleware2.AuthSubject, usageData gin.H, modelStats any) {
	// 订阅模式
	if apiKey.Group != nil && apiKey.Group.IsSubscriptionType() {
		resp := gin.H{
			"mode":     "unrestricted",
			"isValid":  true,
			"planName": apiKey.Group.Name,
			"unit":     "USD",
		}

		// 订阅信息可能不在 context 中（/v1/usage 路径跳过了中间件的计费检查）
		subscription, ok := middleware2.GetSubscriptionFromContext(c)
		if ok {
			remaining := h.calculateSubscriptionRemaining(apiKey.Group, subscription)
			resp["remaining"] = remaining
			resp["subscription"] = gin.H{
				"daily_usage_usd":   subscription.DailyUsageUSD,
				"weekly_usage_usd":  subscription.WeeklyUsageUSD,
				"monthly_usage_usd": subscription.MonthlyUsageUSD,
				"daily_limit_usd":   apiKey.Group.DailyLimitUSD,
				"weekly_limit_usd":  apiKey.Group.WeeklyLimitUSD,
				"monthly_limit_usd": apiKey.Group.MonthlyLimitUSD,
				"expires_at":        subscription.ExpiresAt,
			}
		}

		if usageData != nil {
			resp["usage"] = usageData
		}
		if modelStats != nil {
			resp["model_stats"] = modelStats
		}
		c.JSON(http.StatusOK, resp)
		return
	}

	// 余额模式
	latestUser, err := h.userService.GetByID(ctx, subject.UserID)
	if err != nil {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to get user info")
		return
	}

	resp := gin.H{
		"mode":      "unrestricted",
		"isValid":   true,
		"planName":  "钱包余额",
		"remaining": latestUser.Balance,
		"unit":      "USD",
		"balance":   latestUser.Balance,
	}
	if usageData != nil {
		resp["usage"] = usageData
	}
	if modelStats != nil {
		resp["model_stats"] = modelStats
	}
	c.JSON(http.StatusOK, resp)
}

// calculateSubscriptionRemaining 计算订阅剩余可用额度
// 逻辑：
// 1. 如果日/周/月任一限额达到100%，返回0
// 2. 否则返回所有已配置周期中剩余额度的最小值
func (h *GatewayHandler) calculateSubscriptionRemaining(group *service.Group, sub *service.UserSubscription) float64 {
	var remainingValues []float64

	// 检查日限额
	if group.HasDailyLimit() {
		remaining := *group.DailyLimitUSD - sub.DailyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 检查周限额
	if group.HasWeeklyLimit() {
		remaining := *group.WeeklyLimitUSD - sub.WeeklyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 检查月限额
	if group.HasMonthlyLimit() {
		remaining := *group.MonthlyLimitUSD - sub.MonthlyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 如果没有配置任何限额，返回-1表示无限制
	if len(remainingValues) == 0 {
		return -1
	}

	// 返回最小值
	min := remainingValues[0]
	for _, v := range remainingValues[1:] {
		if v < min {
			min = v
		}
	}
	return min
}

// handleConcurrencyError handles concurrency-related errors with proper 429 response
func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
		fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
}

func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
	statusCode := failoverErr.StatusCode
	responseBody := failoverErr.ResponseBody

	// 先检查透传规则
	if h.errorPassthroughService != nil && len(responseBody) > 0 {
		if rule := h.errorPassthroughService.MatchRule(platform, statusCode, responseBody); rule != nil {
			// 确定响应状态码
			respCode := statusCode
			if !rule.PassthroughCode && rule.ResponseCode != nil {
				respCode = *rule.ResponseCode
			}

			// 确定响应消息
			msg := service.ExtractUpstreamErrorMessage(responseBody)
			if !rule.PassthroughBody && rule.CustomMessage != nil {
				msg = *rule.CustomMessage
			}

			if rule.SkipMonitoring {
				c.Set(service.OpsSkipPassthroughKey, true)
			}

			h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
			return
		}
	}

	// 使用默认的错误映射
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
func (h *GatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

func (h *GatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
	switch statusCode {
	case 401:
		return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
	case 403:
		return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
	case 429:
		return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
	case 529:
		return http.StatusServiceUnavailable, "overloaded_error", "Upstream service overloaded, please retry later"
	case 500, 502, 503, 504:
		return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
	default:
		return http.StatusBadGateway, "upstream_error", "Upstream request failed"
	}
}

// handleStreamingAwareError handles errors that may occur after streaming has started
func (h *GatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
	if streamStarted {
		// Stream already started, send error as SSE event then close
		flusher, ok := c.Writer.(http.Flusher)
		if ok {
			// SSE 错误事件固定 schema，使用 Quote 直拼可避免额外 Marshal 分配。
			errorEvent := `data: {"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
			if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
				_ = c.Error(err)
			}
			flusher.Flush()
		}
		return
	}

	// Normal case: return JSON response with proper status code
	h.errorResponse(c, status, errType, message)
}

// ensureForwardErrorResponse 在 Forward 返回错误但尚未写响应时补写统一错误响应。
func (h *GatewayHandler) ensureForwardErrorResponse(c *gin.Context, streamStarted bool) bool {
	if c == nil || c.Writer == nil || c.Writer.Written() {
		return false
	}
	h.handleStreamingAwareError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed", streamStarted)
	return true
}

// checkClaudeCodeVersion 检查 Claude Code 客户端版本是否满足最低要求
// 仅对已识别的 Claude Code 客户端执行，count_tokens 路径除外
func (h *GatewayHandler) checkClaudeCodeVersion(c *gin.Context) bool {
	ctx := c.Request.Context()
	if !service.IsClaudeCodeClient(ctx) {
		return true
	}

	// 排除 count_tokens 子路径
	if strings.HasSuffix(c.Request.URL.Path, "/count_tokens") {
		return true
	}

	minVersion := h.settingService.GetMinClaudeCodeVersion(ctx)
	if minVersion == "" {
		return true // 未设置，不检查
	}

	clientVersion := service.GetClaudeCodeVersion(ctx)
	if clientVersion == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error",
			"Unable to determine Claude Code version. Please update Claude Code: npm update -g @anthropic-ai/claude-code")
		return false
	}

	if service.CompareVersions(clientVersion, minVersion) < 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error",
			fmt.Sprintf("Your Claude Code version (%s) is below the minimum required version (%s). Please update: npm update -g @anthropic-ai/claude-code",
				clientVersion, minVersion))
		return false
	}

	return true
}

// errorResponse 返回Claude API格式的错误响应
func (h *GatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
	c.JSON(status, gin.H{
		"type": "error",
		"error": gin.H{
			"type":    errType,
			"message": message,
		},
	})
}

// CountTokens handles token counting endpoint
// POST /v1/messages/count_tokens
// 特点：校验订阅/余额，但不计算并发、不记录使用量
func (h *GatewayHandler) CountTokens(c *gin.Context) {
	// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	_, ok = middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
		return
	}
	reqLog := requestLogger(
		c,
		"handler.gateway.count_tokens",
		zap.Int64("api_key_id", apiKey.ID),
		zap.Any("group_id", apiKey.GroupID),
	)
	defer h.maybeLogCompatibilityFallbackMetrics(reqLog)

	// 读取请求体
	body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request)
	if err != nil {
		if maxErr, ok := extractMaxBytesError(err); ok {
			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
			return
		}
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
		return
	}

	if len(body) == 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
		return
	}

	setOpsRequestContext(c, "", false, body)

	parsedReq, err := service.ParseGatewayRequest(body, domain.PlatformAnthropic)
	if err != nil {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
		return
	}
	// count_tokens 走 messages 严格校验时，复用已解析请求，避免二次反序列化。
	SetClaudeCodeClientContext(c, body, parsedReq)
	reqLog = reqLog.With(zap.String("model", parsedReq.Model), zap.Bool("stream", parsedReq.Stream))
	// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
	c.Request = c.Request.WithContext(service.WithThinkingEnabled(c.Request.Context(), parsedReq.ThinkingEnabled, h.metadataBridgeEnabled()))

	// 验证 model 必填
	if parsedReq.Model == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
		return
	}

	setOpsRequestContext(c, parsedReq.Model, parsedReq.Stream, body)

	// 获取订阅信息（可能为nil）
	subscription, _ := middleware2.GetSubscriptionFromContext(c)

	// 校验 billing eligibility（订阅/余额）
	// 【注意】不计算并发，但需要校验订阅/余额
	if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
		status, code, message := billingErrorDetails(err)
		h.errorResponse(c, status, code, message)
		return
	}

	// 计算粘性会话 hash
	parsedReq.SessionContext = &service.SessionContext{
		ClientIP:  ip.GetClientIP(c),
		UserAgent: c.GetHeader("User-Agent"),
		APIKeyID:  apiKey.ID,
	}
	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

	// 选择支持该模型的账号
	account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
	if err != nil {
		reqLog.Warn("gateway.count_tokens_select_account_failed", zap.Error(err))
		h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable")
		return
	}
	setOpsSelectedAccount(c, account.ID, account.Platform)

	// 转发请求（不记录使用量）
	if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil {
		reqLog.Error("gateway.count_tokens_forward_failed", zap.Int64("account_id", account.ID), zap.Error(err))
		// 错误响应已在 ForwardCountTokens 中处理
		return
	}
}

// InterceptType 表示请求拦截类型
type InterceptType int

const (
	InterceptTypeNone              InterceptType = iota
	InterceptTypeWarmup                          // 预热请求（返回 "New Conversation"）
	InterceptTypeSuggestionMode                  // SUGGESTION MODE（返回空字符串）
	InterceptTypeMaxTokensOneHaiku               // max_tokens=1 + haiku 探测请求（返回 "#"）
)

// isHaikuModel 检查模型名称是否包含 "haiku"（大小写不敏感）
func isHaikuModel(model string) bool {
	return strings.Contains(strings.ToLower(model), "haiku")
}

// isMaxTokensOneHaikuRequest 检查是否为 max_tokens=1 + haiku 模型的探测请求
// 这类请求用于 Claude Code 验证 API 连通性
// 条件：max_tokens == 1 且 model 包含 "haiku" 且非流式请求
func isMaxTokensOneHaikuRequest(model string, maxTokens int, isStream bool) bool {
	return maxTokens == 1 && isHaikuModel(model) && !isStream
}

// detectInterceptType 检测请求是否需要拦截，返回拦截类型
// 参数说明：
//   - body: 请求体字节
//   - model: 请求的模型名称
//   - maxTokens: max_tokens 值
//   - isStream: 是否为流式请求
//   - isClaudeCodeClient: 是否已通过 Claude Code 客户端校验
func detectInterceptType(body []byte, model string, maxTokens int, isStream bool, isClaudeCodeClient bool) InterceptType {
	// 优先检查 max_tokens=1 + haiku 探测请求（仅非流式）
	if isClaudeCodeClient && isMaxTokensOneHaikuRequest(model, maxTokens, isStream) {
		return InterceptTypeMaxTokensOneHaiku
	}

	// 快速检查：如果不包含任何关键字，直接返回
	bodyStr := string(body)
	hasSuggestionMode := strings.Contains(bodyStr, "[SUGGESTION MODE:")
	hasWarmupKeyword := strings.Contains(bodyStr, "title") || strings.Contains(bodyStr, "Warmup")

	if !hasSuggestionMode && !hasWarmupKeyword {
		return InterceptTypeNone
	}

	// 解析请求（只解析一次）
	var req struct {
		Messages []struct {
			Role    string `json:"role"`
			Content []struct {
				Type string `json:"type"`
				Text string `json:"text"`
			} `json:"content"`
		} `json:"messages"`
		System []struct {
			Text string `json:"text"`
		} `json:"system"`
	}
	if err := json.Unmarshal(body, &req); err != nil {
		return InterceptTypeNone
	}

	// 检查 SUGGESTION MODE（最后一条 user 消息）
	if hasSuggestionMode && len(req.Messages) > 0 {
		lastMsg := req.Messages[len(req.Messages)-1]
		if lastMsg.Role == "user" && len(lastMsg.Content) > 0 &&
			lastMsg.Content[0].Type == "text" &&
			strings.HasPrefix(lastMsg.Content[0].Text, "[SUGGESTION MODE:") {
			return InterceptTypeSuggestionMode
		}
	}

	// 检查 Warmup 请求
	if hasWarmupKeyword {
		// 检查 messages 中的标题提示模式
		for _, msg := range req.Messages {
			for _, content := range msg.Content {
				if content.Type == "text" {
					if strings.Contains(content.Text, "Please write a 5-10 word title for the following conversation:") ||
						content.Text == "Warmup" {
						return InterceptTypeWarmup
					}
				}
			}
		}
		// 检查 system 中的标题提取模式
		for _, sys := range req.System {
			if strings.Contains(sys.Text, "nalyze if this message indicates a new conversation topic. If it does, extract a 2-3 word title") {
				return InterceptTypeWarmup
			}
		}
	}

	return InterceptTypeNone
}

// sendMockInterceptStream 发送流式 mock 响应（用于请求拦截）
func sendMockInterceptStream(c *gin.Context, model string, interceptType InterceptType) {
	c.Header("Content-Type", "text/event-stream")
	c.Header("Cache-Control", "no-cache")
	c.Header("Connection", "keep-alive")
	c.Header("X-Accel-Buffering", "no")

	// 根据拦截类型决定响应内容
	var msgID string
	var outputTokens int
	var textDeltas []string

	switch interceptType {
	case InterceptTypeSuggestionMode:
		msgID = "msg_mock_suggestion"
		outputTokens = 1
		textDeltas = []string{""} // 空内容
	default: // InterceptTypeWarmup
		msgID = "msg_mock_warmup"
		outputTokens = 2
		textDeltas = []string{"New", " Conversation"}
	}

	// Build message_start event with fixed schema.
	messageStartJSON := `{"type":"message_start","message":{"id":` + strconv.Quote(msgID) + `,"type":"message","role":"assistant","model":` + strconv.Quote(model) + `,"content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}}`

	// Build events
	events := []string{
		`event: message_start` + "\n" + `data: ` + string(messageStartJSON),
		`event: content_block_start` + "\n" + `data: {"content_block":{"text":"","type":"text"},"index":0,"type":"content_block_start"}`,
	}

	// Add text deltas
	for _, text := range textDeltas {
		deltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":` + strconv.Quote(text) + `}}`
		events = append(events, `event: content_block_delta`+"\n"+`data: `+string(deltaJSON))
	}

	// Add final events
	messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":10,"output_tokens":` + strconv.Itoa(outputTokens) + `}}`

	events = append(events,
		`event: content_block_stop`+"\n"+`data: {"index":0,"type":"content_block_stop"}`,
		`event: message_delta`+"\n"+`data: `+string(messageDeltaJSON),
		`event: message_stop`+"\n"+`data: {"type":"message_stop"}`,
	)

	for _, event := range events {
		_, _ = c.Writer.WriteString(event + "\n\n")
		c.Writer.Flush()
		time.Sleep(20 * time.Millisecond)
	}
}

// generateRealisticMsgID 生成仿真的消息 ID（msg_bdrk_XXXXXXX 格式）
// 格式与 Claude API 真实响应一致，24 位随机字母数字
func generateRealisticMsgID() string {
	const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
	const idLen = 24
	randomBytes := make([]byte, idLen)
	if _, err := rand.Read(randomBytes); err != nil {
		return fmt.Sprintf("msg_bdrk_%d", time.Now().UnixNano())
	}
	b := make([]byte, idLen)
	for i := range b {
		b[i] = charset[int(randomBytes[i])%len(charset)]
	}
	return "msg_bdrk_" + string(b)
}

// sendMockInterceptResponse 发送非流式 mock 响应（用于请求拦截）
func sendMockInterceptResponse(c *gin.Context, model string, interceptType InterceptType) {
	var msgID, text, stopReason string
	var outputTokens int

	switch interceptType {
	case InterceptTypeSuggestionMode:
		msgID = "msg_mock_suggestion"
		text = ""
		outputTokens = 1
		stopReason = "end_turn"
	case InterceptTypeMaxTokensOneHaiku:
		msgID = generateRealisticMsgID()
		text = "#"
		outputTokens = 1
		stopReason = "max_tokens" // max_tokens=1 探测请求的 stop_reason 应为 max_tokens
	default: // InterceptTypeWarmup
		msgID = "msg_mock_warmup"
		text = "New Conversation"
		outputTokens = 2
		stopReason = "end_turn"
	}

	// 构建完整的响应格式（与 Claude API 响应格式一致）
	response := gin.H{
		"model":         model,
		"id":            msgID,
		"type":          "message",
		"role":          "assistant",
		"content":       []gin.H{{"type": "text", "text": text}},
		"stop_reason":   stopReason,
		"stop_sequence": nil,
		"usage": gin.H{
			"input_tokens":                10,
			"cache_creation_input_tokens": 0,
			"cache_read_input_tokens":     0,
			"cache_creation": gin.H{
				"ephemeral_5m_input_tokens": 0,
				"ephemeral_1h_input_tokens": 0,
			},
			"output_tokens": outputTokens,
			"total_tokens":  10 + outputTokens,
		},
	}

	c.JSON(http.StatusOK, response)
}

func billingErrorDetails(err error) (status int, code, message string) {
	if errors.Is(err, service.ErrBillingServiceUnavailable) {
		msg := pkgerrors.Message(err)
		if msg == "" {
			msg = "Billing service temporarily unavailable. Please retry later."
		}
		return http.StatusServiceUnavailable, "billing_service_error", msg
	}
	if errors.Is(err, service.ErrAPIKeyRateLimit5hExceeded) {
		msg := pkgerrors.Message(err)
		return http.StatusTooManyRequests, "rate_limit_exceeded", msg
	}
	if errors.Is(err, service.ErrAPIKeyRateLimit1dExceeded) {
		msg := pkgerrors.Message(err)
		return http.StatusTooManyRequests, "rate_limit_exceeded", msg
	}
	if errors.Is(err, service.ErrAPIKeyRateLimit7dExceeded) {
		msg := pkgerrors.Message(err)
		return http.StatusTooManyRequests, "rate_limit_exceeded", msg
	}
	msg := pkgerrors.Message(err)
	if msg == "" {
		logger.L().With(
			zap.String("component", "handler.gateway.billing"),
			zap.Error(err),
		).Warn("gateway.billing_error_missing_message")
		msg = "Billing error"
	}
	return http.StatusForbidden, "billing_error", msg
}

func (h *GatewayHandler) metadataBridgeEnabled() bool {
	if h == nil || h.cfg == nil {
		return true
	}
	return h.cfg.Gateway.OpenAIWS.MetadataBridgeEnabled
}

func (h *GatewayHandler) maybeLogCompatibilityFallbackMetrics(reqLog *zap.Logger) {
	if reqLog == nil {
		return
	}
	if gatewayCompatibilityMetricsLogCounter.Add(1)%gatewayCompatibilityMetricsLogInterval != 0 {
		return
	}
	metrics := service.SnapshotOpenAICompatibilityFallbackMetrics()
	reqLog.Info("gateway.compatibility_fallback_metrics",
		zap.Int64("session_hash_legacy_read_fallback_total", metrics.SessionHashLegacyReadFallbackTotal),
		zap.Int64("session_hash_legacy_read_fallback_hit", metrics.SessionHashLegacyReadFallbackHit),
		zap.Int64("session_hash_legacy_dual_write_total", metrics.SessionHashLegacyDualWriteTotal),
		zap.Float64("session_hash_legacy_read_hit_rate", metrics.SessionHashLegacyReadHitRate),
		zap.Int64("metadata_legacy_fallback_total", metrics.MetadataLegacyFallbackTotal),
	)
}

func (h *GatewayHandler) submitUsageRecordTask(task service.UsageRecordTask) {
	if task == nil {
		return
	}
	if h.usageRecordWorkerPool != nil {
		h.usageRecordWorkerPool.Submit(task)
		return
	}
	// 回退路径：worker 池未注入时同步执行，避免退回到无界 goroutine 模式。
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()
	defer func() {
		if recovered := recover(); recovered != nil {
			logger.L().With(
				zap.String("component", "handler.gateway.messages"),
				zap.Any("panic", recovered),
			).Error("gateway.usage_record_task_panic_recovered")
		}
	}()
	task(ctx)
}

// getUserMsgQueueMode 获取当前请求的 UMQ 模式
// 返回 "serialize" | "throttle" | ""
func (h *GatewayHandler) getUserMsgQueueMode(account *service.Account, parsed *service.ParsedRequest) string {
	if h.userMsgQueueHelper == nil {
		return ""
	}
	// 仅适用于 Anthropic OAuth/SetupToken 账号
	if !account.IsAnthropicOAuthOrSetupToken() {
		return ""
	}
	if !service.IsRealUserMessage(parsed) {
		return ""
	}
	// 账号级模式优先，fallback 到全局配置
	mode := account.GetUserMsgQueueMode()
	if mode == "" {
		mode = h.cfg.Gateway.UserMessageQueue.GetEffectiveMode()
	}
	return mode
}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								package handler
 								import (
 									"context"
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									"crypto/rand"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"encoding/json"
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									"errors"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"fmt"
 									"net/http"
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									"strconv"
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									"strings"
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									"sync/atomic"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"time"
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/config"
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/domain"
-												feat(antigravity): 添加 models 端点支持

- /antigravity/models: 返回全部模型（Claude + Gemini）
- /antigravity/v1/models: 返回全部模型（Claude API 格式）
- /antigravity/v1beta/models: 仅返回 Gemini 模型（v1beta 格式）

统一管理 antigravity 模型定义，避免重复代码

											
										
										
											2026-01-02 10:21:05 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
-												fix(后端): 修复 lint 失败并清理无用代码

修正测试中的 APIKey 名称引用
移除不可达返回与未使用函数
统一 gofmt 格式并处理 Close 错误

											
										
										
											2026-01-04 22:10:32 +08:00
+									pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									pkghttputil "github.com/Wei-Shaw/sub2api/internal/pkg/httputil"
-												fix(gateway): 修复 usage_logs 记录 IP 不正确的问题

在 nginx 反向代理场景下，使用 ip.GetClientIP() 替代 c.ClientIP()
以正确获取客户端真实 IP 地址

											
										
										
											2026-01-12 15:35:54 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/timezone"
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/service"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									"github.com/gin-gonic/gin"
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									"go.uber.org/zap"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								)
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+								const gatewayCompatibilityMetricsLogInterval = 1024
 								var gatewayCompatibilityMetricsLogCounter atomic.Uint64
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// GatewayHandler handles API gateway requests
 								type GatewayHandler struct {
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									gatewayService            *service.GatewayService
 									geminiCompatService       *service.GeminiMessagesCompatService
 									antigravityGatewayService *service.AntigravityGatewayService
 									userService               *service.UserService
 									billingCacheService       *service.BillingCacheService
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+									usageService              *service.UsageService
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+									apiKeyService             *service.APIKeyService
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+									usageRecordWorkerPool     *service.UsageRecordWorkerPool
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									errorPassthroughService   *service.ErrorPassthroughService
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									concurrencyHelper         *ConcurrencyHelper
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
+									userMsgQueueHelper        *UserMsgQueueHelper
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches        int
 									maxAccountSwitchesGemini  int
-												feat(Sora): 直连生成并移除sora2api依赖

实现直连 Sora 客户端、媒体落地与清理策略\n更新网关与前端配置以支持 Sora 平台\n补齐单元测试与契约测试，新增 curl 测试脚本\n\n测试: go test ./... -tags=unit

											
										
										
											2026-02-01 21:37:10 +08:00
+									cfg                       *config.Config
-												feat(gateway): 添加 Claude Code 客户端最低版本检查功能

- 通过 User-Agent 识别 Claude Code 客户端并提取版本号
- 在网关层验证客户端版本是否满足管理员配置的最低要求
- 在管理后台提供版本要求配置选项（英文/中文双语）
- 实现原子缓存 + singleflight 防止并发问题和 thundering herd
- 使用 context.WithoutCancel 隔离 DB 查询，避免客户端断连影响缓存
- 双 TTL 策略：60s 正常、5s 错误恢复，保证性能与可用性
- 仅检查 Claude Code 客户端，其他客户端不受影响
- 添加完整单元测试覆盖版本提取、比对、上下文操作

											
										
										
											2026-03-01 15:35:46 +08:00
+									settingService            *service.SettingService
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// NewGatewayHandler creates a new GatewayHandler
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+								func NewGatewayHandler(
 									gatewayService *service.GatewayService,
 									geminiCompatService *service.GeminiMessagesCompatService,
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									antigravityGatewayService *service.AntigravityGatewayService,
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+									userService *service.UserService,
 									concurrencyService *service.ConcurrencyService,
 									billingCacheService *service.BillingCacheService,
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+									usageService *service.UsageService,
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+									apiKeyService *service.APIKeyService,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+									usageRecordWorkerPool *service.UsageRecordWorkerPool,
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									errorPassthroughService *service.ErrorPassthroughService,
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
+									userMsgQueueService *service.UserMessageQueueService,
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									cfg *config.Config,
-												feat(gateway): 添加 Claude Code 客户端最低版本检查功能

- 通过 User-Agent 识别 Claude Code 客户端并提取版本号
- 在网关层验证客户端版本是否满足管理员配置的最低要求
- 在管理后台提供版本要求配置选项（英文/中文双语）
- 实现原子缓存 + singleflight 防止并发问题和 thundering herd
- 使用 context.WithoutCancel 隔离 DB 查询，避免客户端断连影响缓存
- 双 TTL 策略：60s 正常、5s 错误恢复，保证性能与可用性
- 仅检查 Claude Code 客户端，其他客户端不受影响
- 添加完整单元测试覆盖版本提取、比对、上下文操作

											
										
										
											2026-03-01 15:35:46 +08:00
+									settingService *service.SettingService,
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+								) *GatewayHandler {
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									pingInterval := time.Duration(0)
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches := 10
 									maxAccountSwitchesGemini := 3
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									if cfg != nil {
 										pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										if cfg.Gateway.MaxAccountSwitches > 0 {
 											maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
 										}
 										if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
 											maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
 										}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									}
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
 									// 初始化用户消息串行队列 helper
 									var umqHelper *UserMsgQueueHelper
 									if userMsgQueueService != nil && cfg != nil {
 										umqHelper = NewUserMsgQueueHelper(userMsgQueueService, SSEPingFormatClaude, pingInterval)
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									return &GatewayHandler{
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+										gatewayService:            gatewayService,
 										geminiCompatService:       geminiCompatService,
 										antigravityGatewayService: antigravityGatewayService,
 										userService:               userService,
 										billingCacheService:       billingCacheService,
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										usageService:              usageService,
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+										apiKeyService:             apiKeyService,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+										usageRecordWorkerPool:     usageRecordWorkerPool,
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+										errorPassthroughService:   errorPassthroughService,
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+										concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
+										userMsgQueueHelper:        umqHelper,
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										maxAccountSwitches:        maxAccountSwitches,
 										maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
-												feat(Sora): 直连生成并移除sora2api依赖

实现直连 Sora 客户端、媒体落地与清理策略\n更新网关与前端配置以支持 Sora 平台\n补齐单元测试与契约测试，新增 curl 测试脚本\n\n测试: go test ./... -tags=unit

											
										
										
											2026-02-01 21:37:10 +08:00
+										cfg:                       cfg,
-												feat(gateway): 添加 Claude Code 客户端最低版本检查功能

- 通过 User-Agent 识别 Claude Code 客户端并提取版本号
- 在网关层验证客户端版本是否满足管理员配置的最低要求
- 在管理后台提供版本要求配置选项（英文/中文双语）
- 实现原子缓存 + singleflight 防止并发问题和 thundering herd
- 使用 context.WithoutCancel 隔离 DB 查询，避免客户端断连影响缓存
- 双 TTL 策略：60s 正常、5s 错误恢复，保证性能与可用性
- 仅检查 Claude Code 客户端，其他客户端不受影响
- 添加完整单元测试覆盖版本提取、比对、上下文操作

											
										
										
											2026-03-01 15:35:46 +08:00
+										settingService:            settingService,
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									}
 								}
 								// Messages handles Claude API compatible messages endpoint
 								// POST /v1/messages
 								func (h *GatewayHandler) Messages(c *gin.Context) {
 									// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									subject, ok := middleware2.GetAuthSubjectFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
 										return
 									}
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									reqLog := requestLogger(
 										c,
 										"handler.gateway.messages",
 										zap.Int64("user_id", subject.UserID),
 										zap.Int64("api_key_id", apiKey.ID),
 										zap.Any("group_id", apiKey.GroupID),
 									)
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									defer h.maybeLogCompatibilityFallbackMetrics(reqLog)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									// 读取请求体
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										if maxErr, ok := extractMaxBytesError(err); ok {
 											h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
 											return
 										}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 										return
 									}
 									if len(body) == 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, "", false, body)
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+									parsedReq, err := service.ParseGatewayRequest(body, domain.PlatformAnthropic)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									if err != nil {
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 										return
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									reqModel := parsedReq.Model
 									reqStream := parsedReq.Stream
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									reqLog = reqLog.With(zap.String("model", reqModel), zap.Bool("stream", reqStream))
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									// 设置 max_tokens=1 + haiku 探测请求标识到 context 中
 									// 必须在 SetClaudeCodeClientContext 之前设置，因为 ClaudeCodeValidator 需要读取此标识进行绕过判断
 									if isMaxTokensOneHaikuRequest(reqModel, parsedReq.MaxTokens, reqStream) {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+										ctx := service.WithIsMaxTokensOneHaikuRequest(c.Request.Context(), true, h.metadataBridgeEnabled())
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+										c.Request = c.Request.WithContext(ctx)
 									}
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									// 检查是否为 Claude Code 客户端，设置到 context 中（复用已解析请求，避免二次反序列化）。
 									SetClaudeCodeClientContext(c, body, parsedReq)
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									isClaudeCodeClient := service.IsClaudeCodeClient(c.Request.Context())
-												feat(gateway): 添加 Claude Code 客户端最低版本检查功能

- 通过 User-Agent 识别 Claude Code 客户端并提取版本号
- 在网关层验证客户端版本是否满足管理员配置的最低要求
- 在管理后台提供版本要求配置选项（英文/中文双语）
- 实现原子缓存 + singleflight 防止并发问题和 thundering herd
- 使用 context.WithoutCancel 隔离 DB 查询，避免客户端断连影响缓存
- 双 TTL 策略：60s 正常、5s 错误恢复，保证性能与可用性
- 仅检查 Claude Code 客户端，其他客户端不受影响
- 添加完整单元测试覆盖版本提取、比对、上下文操作

											
										
										
											2026-03-01 15:35:46 +08:00
+									// 版本检查：仅对 Claude Code 客户端，拒绝低于最低版本的请求
 									if !h.checkClaudeCodeVersion(c) {
 										return
 									}
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									c.Request = c.Request.WithContext(service.WithThinkingEnabled(c.Request.Context(), parsedReq.ThinkingEnabled, h.metadataBridgeEnabled()))
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, reqModel, reqStream, body)
-												feat(groups): add Claude Code client restriction and session isolation

- Add claude_code_only field to restrict groups to Claude Code clients only
- Add fallback_group_id for non-Claude Code requests to use alternate group
- Implement ClaudeCodeValidator for User-Agent detection
- Add group-level session binding isolation (groupID in Redis key)
- Prevent cross-group sticky session pollution
- Update frontend with Claude Code restriction controls

											
										
										
											2026-01-08 23:07:00 +08:00
-												fix(网关): 添加 model 参数必填验证

在以下端点添加 model 参数的必填验证，缺失时直接返回 400 错误：
- /v1/messages
- /v1/messages/count_tokens
- /openai/v1/responses

修复前：空 model 会进入账号选择流程，最终由上游 API 返回错误
修复后：入口处直接拒绝，避免浪费资源和不明确的错误信息

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2025-12-31 16:17:45 +08:00
+									// 验证 model 必填
 									if reqModel == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 										return
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									// Track if we've started streaming (for error handling)
 									streamStarted := false
-												fix: restore non-failover error passthrough from 7b156489

											
										
										
											2026-02-07 14:24:55 +08:00
+									// 绑定错误透传服务，允许 service 层在非 failover 错误场景复用规则。
 									if h.errorPassthroughService != nil {
 										service.BindErrorPassthroughService(c, h.errorPassthroughService)
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									// 获取订阅信息（可能为nil）- 提前获取用于后续检查
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									subscription, _ := middleware2.GetSubscriptionFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									// 0. 检查wait队列是否已满
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									maxWait := service.CalculateMaxWait(subject.Concurrency)
 									canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									waitCounted := false
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Warn("gateway.user_wait_counter_increment_failed", zap.Error(err))
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										// On error, allow request to proceed
 									} else if !canWait {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Info("gateway.user_wait_queue_full", zap.Int("max_wait", maxWait))
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									if err == nil && canWait {
 										waitCounted = true
 									}
 									// Ensure we decrement if we exit before acquiring the user slot.
 									defer func() {
 										if waitCounted {
 											h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										}
 									}()
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									// 1. 首先获取用户并发槽位
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Warn("gateway.user_slot_acquire_failed", zap.Error(err))
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.handleConcurrencyError(c, err, "user", streamStarted)
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									// User slot acquired: no longer waiting in the queue.
 									if waitCounted {
 										h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										waitCounted = false
 									}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									// 在请求结束或 Context 取消时确保释放槽位，避免客户端断开造成泄漏
 									userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if userReleaseFunc != nil {
 										defer userReleaseFunc()
 									}
 									// 2. 【新增】Wait后二次检查余额/订阅
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Info("gateway.billing_eligibility_check_failed", zap.Error(err))
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										status, code, message := billingErrorDetails(err)
 										h.handleStreamingAwareError(c, status, code, message, streamStarted)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										return
 									}
 									// 计算粘性会话hash
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+									parsedReq.SessionContext = &service.SessionContext{
 										ClientIP:  ip.GetClientIP(c),
 										UserAgent: c.GetHeader("User-Agent"),
 										APIKeyID:  apiKey.ID,
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												feat(antigravity): 添加专用路由，支持仅使用 antigravity 账户

添加 /antigravity/v1/* 和 /antigravity/v1beta/* 路由：
- 通过 ForcePlatform 中间件强制使用 antigravity 平台
- 跳过混合调度逻辑，仅调度 antigravity 账户
- 支持按分组优先查找，找不到时回退查询全部 antigravity 账户

修复 context key 类型不匹配问题：
- middleware 和 service 统一使用字符串常量 "ctx_force_platform"
- 解决 Go context.Value() 类型+值匹配导致的读取失败

其他改动：
- 嵌入式前端中间件白名单添加 /antigravity/ 路径
- e2e 测试 Gemini 端点 URL 添加 endpointPrefix 支持

											
										
										
											2025-12-29 16:52:55 +08:00
+									// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+									platform := ""
-												feat(antigravity): 添加专用路由，支持仅使用 antigravity 账户

添加 /antigravity/v1/* 和 /antigravity/v1beta/* 路由：
- 通过 ForcePlatform 中间件强制使用 antigravity 平台
- 跳过混合调度逻辑，仅调度 antigravity 账户
- 支持按分组优先查找，找不到时回退查询全部 antigravity 账户

修复 context key 类型不匹配问题：
- middleware 和 service 统一使用字符串常量 "ctx_force_platform"
- 解决 Go context.Value() 类型+值匹配导致的读取失败

其他改动：
- 嵌入式前端中间件白名单添加 /antigravity/ 路径
- e2e 测试 Gemini 端点 URL 添加 endpointPrefix 支持

											
										
										
											2025-12-29 16:52:55 +08:00
+									if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
 										platform = forcePlatform
 									} else if apiKey.Group != nil {
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+										platform = apiKey.Group.Platform
 									}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									sessionKey := sessionHash
 									if platform == service.PlatformGemini && sessionHash != "" {
 										sessionKey = "gemini:" + sessionHash
 									}
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									// 查询粘性会话绑定的账号 ID
 									var sessionBoundAccountID int64
 									if sessionKey != "" {
 										sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
-												perf(gateway): 优化热点路径并补齐高覆盖测试

											
										
										
											2026-02-22 13:31:30 +08:00
+										if sessionBoundAccountID > 0 {
-												fix(gateway): 修复粘性会话预取分组错配并优化并发等待热路径

											
										
										
											2026-02-22 16:43:33 +08:00
+											prefetchedGroupID := int64(0)
 											if apiKey.GroupID != nil {
 												prefetchedGroupID = *apiKey.GroupID
 											}
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+											ctx := service.WithPrefetchedStickySession(c.Request.Context(), sessionBoundAccountID, prefetchedGroupID, h.metadataBridgeEnabled())
-												perf(gateway): 优化热点路径并补齐高覆盖测试

											
										
										
											2026-02-22 13:31:30 +08:00
+											c.Request = c.Request.WithContext(ctx)
 										}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									}
 									// 判断是否真的绑定了粘性会话：有 sessionKey 且已经绑定到某个账号
 									hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
-												fix(backend): 适配重构后的架构修复 Gemini OAuth 集成

## 主要修改

1. **移除 model 包引用**
   - 删除所有 `internal/model` 包的 import
   - 使用 service 包中的类型定义（Account, Platform常量等）

2. **修复类型转换**
   - JSONB → map[string]any
   - 添加 mergeJSONB 辅助函数
   - 添加 Account.IsGemini() 方法

3. **更新中间件调用**
   - GetUserFromContext → GetAuthSubjectFromContext
   - 适配新的并发控制签名（传递 ID 和 Concurrency 而不是完整对象）

4. **修复 handler 层**
   - 更新 gemini_v1beta_handler.go
   - 修正 billing 检查和 usage 记录

## 影响范围
- backend/internal/service/gemini_*.go
- backend/internal/service/account_test_service.go
- backend/internal/service/crs_sync_service.go
- backend/internal/handler/gemini_v1beta_handler.go
- backend/internal/handler/gateway_handler.go
- backend/internal/handler/admin/account_handler.go

											
										
										
											2025-12-26 22:07:55 +08:00
+									if platform == service.PlatformGemini {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+										fs := NewFailoverState(h.maxAccountSwitchesGemini, hasBoundSession)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
-												fix: 单账号分组首次 503 不设模型限流标记，避免后续请求雪崩

单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时，
原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换，
后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503，
导致约 30 秒的雪崩窗口。

修复：在 Handler 入口处检查分组是否只有单个 antigravity 账号，
如果是则提前设置 SingleAccountRetry context 标记，让 Service 层
首次 503 就走原地重试逻辑（不设限流标记），避免污染后续请求。

											
										
										
											2026-02-09 17:25:36 +08:00
+										// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 										// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
 										if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+											ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
-												fix: 单账号分组首次 503 不设模型限流标记，避免后续请求雪崩

单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时，
原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换，
后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503，
导致约 30 秒的雪崩窗口。

修复：在 Handler 入口处检查分组是否只有单个 antigravity 账号，
如果是则提前设置 SingleAccountRetry context 标记，让 Service 层
首次 503 就走原地重试逻辑（不设限流标记），避免污染后续请求。

											
										
										
											2026-02-09 17:25:36 +08:00
+											c.Request = c.Request.WithContext(ctx)
 										}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+										for {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+											selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, "") // Gemini 不使用会话限制
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											if err != nil {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+												if len(fs.FailedAccountIDs) == 0 {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+													return
 												}
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+												action := fs.HandleSelectionExhausted(c.Request.Context())
 												switch action {
 												case FailoverContinue:
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+													ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													c.Request = c.Request.WithContext(ctx)
 													continue
 												case FailoverCanceled:
 													return
 												default: // FailoverExhausted
 													if fs.LastFailoverErr != nil {
 														h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
 													} else {
 														h.handleFailoverExhaustedSimple(c, 502, streamStarted)
-												feat: 添加 Antigravity 单账号 503 退避重试机制

当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时，
不再设置模型限流+切换账号（因为切换回来还是同一个账号），而是在 Service 层
原地等待+重试，避免双重等待问题。

主要变更：
- Handler 层：检测单账号 503 场景，清除排除列表并设置 SingleAccountRetry 标记
- Service 层：新增 handleSingleAccountRetryInPlace 原地重试逻辑
- Service 层：预检查跳过单账号模式下的限流检查
- 新增 ctxkey.SingleAccountRetry 上下文标记

											
										
										
											2026-02-09 14:26:01 +08:00
+													}
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													return
-												feat: 添加 Antigravity 单账号 503 退避重试机制

当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时，
不再设置模型限流+切换账号（因为切换回来还是同一个账号），而是在 Service 层
原地等待+重试，避免双重等待问题。

主要变更：
- Handler 层：检测单账号 503 场景，清除排除列表并设置 SingleAccountRetry 标记
- Service 层：新增 handleSingleAccountRetryInPlace 原地重试逻辑
- Service 层：预检查跳过单账号模式下的限流检查
- 新增 ctxkey.SingleAccountRetry 上下文标记

											
										
										
											2026-02-09 14:26:01 +08:00
+												}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											account := selection.Account
-												feat(log): 落地统一日志底座与系统日志运维能力

											
										
										
											2026-02-12 16:27:29 +08:00
+											setOpsSelectedAccount(c, account.ID, account.Platform)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+											// 检查请求拦截（预热请求、SUGGESTION MODE等）
 											if account.IsInterceptWarmupEnabled() {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+												interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+												if interceptType != InterceptTypeNone {
 													if selection.Acquired && selection.ReleaseFunc != nil {
 														selection.ReleaseFunc()
 													}
 													if reqStream {
 														sendMockInterceptStream(c, reqModel, interceptType)
 													} else {
 														sendMockInterceptResponse(c, reqModel, interceptType)
 													}
 													return
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											// 3. 获取账号并发槽位
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											accountReleaseFunc := selection.ReleaseFunc
 											if !selection.Acquired {
 												if selection.WaitPlan == nil {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
 													return
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												accountWaitCounted := false
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
 												if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.account_wait_counter_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												} else if !canWait {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Info("gateway.account_wait_queue_full",
 														zap.Int64("account_id", account.ID),
 														zap.Int("max_waiting", selection.WaitPlan.MaxWaiting),
 													)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+													h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
 													return
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												}
 												if err == nil && canWait {
 													accountWaitCounted = true
 												}
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												releaseWait := func() {
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+													if accountWaitCounted {
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+														h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+														accountWaitCounted = false
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+													}
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
 												accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
 													c,
 													account.ID,
 													selection.WaitPlan.MaxConcurrency,
 													selection.WaitPlan.Timeout,
 													reqStream,
 													&streamStarted,
 												)
 												if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.account_slot_acquire_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+													releaseWait()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+													h.handleConcurrencyError(c, err, "account", streamStarted)
 													return
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												// Slot acquired: no longer waiting in queue.
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												releaseWait()
-												feat(groups): add Claude Code client restriction and session isolation

- Add claude_code_only field to restrict groups to Claude Code clients only
- Add fallback_group_id for non-Claude Code requests to use alternate group
- Implement ClaudeCodeValidator for User-Agent detection
- Add group-level session binding isolation (groupID in Redis key)
- Prevent cross-group sticky session pollution
- Update frontend with Claude Code restriction controls

											
										
										
											2026-01-08 23:07:00 +08:00
+												if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionKey, account.ID); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+											// 账号槽位/等待计数需要在超时或断开时安全回收
 											accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											// 转发请求 - 根据账号平台分流
 											var result *service.ForwardResult
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+											requestCtx := c.Request.Context()
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+											if fs.SwitchCount > 0 {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+												requestCtx = service.WithAccountSwitchCount(requestCtx, fs.SwitchCount, h.metadataBridgeEnabled())
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+											}
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											if account.Platform == service.PlatformAntigravity {
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+												result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body, hasBoundSession)
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											} else {
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+												result, err = h.geminiCompatService.Forward(requestCtx, c, account, body)
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											if accountReleaseFunc != nil {
 												accountReleaseFunc()
 											}
 											if err != nil {
 												var failoverErr *service.UpstreamFailoverError
 												if errors.As(err, &failoverErr) {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
 													switch action {
 													case FailoverContinue:
-												feat: same-account retry before failover for transient errors

For retryable transient errors (Google 400 "invalid project resource name"
and empty stream responses), retry on the same account up to 2 times
(with 500ms delay) before switching to another account.

- Add RetryableOnSameAccount field to UpstreamFailoverError
- Add same-account retry loop in both Gemini and Claude/OpenAI handler paths
- Move temp-unschedule from service layer to handler layer (only after
  all same-account retries exhausted)
- Reduce temp-unschedule cooldown from 30 minutes to 1 minute

											
										
										
											2026-02-10 00:53:54 +08:00
+														continue
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													case FailoverExhausted:
 														h.handleFailoverExhausted(c, fs.LastFailoverErr, service.PlatformGemini, streamStarted)
 														return
 													case FailoverCanceled:
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+														return
 													}
 												}
-												feat(backend): 提交后端审计修复与配套测试改动

											
										
										
											2026-02-14 11:23:10 +08:00
+												wroteFallback := h.ensureForwardErrorResponse(c, streamStarted)
 												reqLog.Error("gateway.forward_failed",
 													zap.Int64("account_id", account.ID),
 													zap.Bool("fallback_error_response_written", wroteFallback),
 													zap.Error(err),
 												)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												return
 											}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												fix: address deep code review issues for RPM limiting

- Move IncrementRPM after Forward success to prevent phantom RPM
  consumption during account switch retries
- Add base_rpm input sanitization (clamp to 0-10000) in Create/Update
- Add WindowCost scheduling checks to legacy path sticky sessions
  (4 check sites + 4 prefetch sites), fixing pre-existing gap
- Clean up rpm_strategy/rpm_sticky_buffer when disabling RPM in
  BulkEditModal (JSONB merge cannot delete keys, use empty values)
- Add json.Number test cases to TestGetBaseRPM/TestGetRPMStickyBuffer
- Document TOCTOU race as accepted soft-limit design trade-off

											
										
										
											2026-02-28 10:35:33 +08:00
+											// RPM 计数递增（Forward 成功后）
 											// 注意：TOCTOU 竞态是已知且可接受的设计权衡，与 WindowCost 一致的 soft-limit 模式。
 											// 在高并发下可能短暂超出 RPM 限制，但不会导致请求失败。
 											if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
 												if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
 													reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
 												}
 											}
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
+											// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 											userAgent := c.GetHeader("User-Agent")
-												fix(gateway): 修复 usage_logs 记录 IP 不正确的问题

在 nginx 反向代理场景下，使用 ip.GetClientIP() 替代 c.ClientIP()
以正确获取客户端真实 IP 地址

											
										
										
											2026-01-12 15:35:54 +08:00
+											clientIP := ip.GetClientIP(c)
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+											// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
 											h.submitUsageRecordTask(func(ctx context.Context) {
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Result:            result,
-												feat: add claude max usage simulation with group switch

											
										
										
											2026-02-27 01:54:54 +08:00
+													ParsedRequest:     parsedReq,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													APIKey:            apiKey,
 													User:              apiKey.User,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+													Account:           account,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Subscription:      subscription,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+													UserAgent:         userAgent,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													IPAddress:         clientIP,
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													ForceCacheBilling: fs.ForceCacheBilling,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													APIKeyService:     h.apiKeyService,
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												}); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													logger.L().With(
 														zap.String("component", "handler.gateway.messages"),
 														zap.Int64("user_id", subject.UserID),
 														zap.Int64("api_key_id", apiKey.ID),
 														zap.Any("group_id", apiKey.GroupID),
 														zap.String("model", reqModel),
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+														zap.Int64("account_id", account.ID),
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													).Error("gateway.record_usage_failed", zap.Error(err))
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												}
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+											})
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+									currentAPIKey := apiKey
 									currentSubscription := subscription
 									var fallbackGroupID *int64
 									if apiKey.Group != nil {
 										fallbackGroupID = apiKey.Group.FallbackGroupIDOnInvalidRequest
 									}
 									fallbackUsed := false
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
-												fix: 单账号分组首次 503 不设模型限流标记，避免后续请求雪崩

单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时，
原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换，
后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503，
导致约 30 秒的雪崩窗口。

修复：在 Handler 入口处检查分组是否只有单个 antigravity 账号，
如果是则提前设置 SingleAccountRetry context 标记，让 Service 层
首次 503 就走原地重试逻辑（不设限流标记），避免污染后续请求。

											
										
										
											2026-02-09 17:25:36 +08:00
+									// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 									// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
 									if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+										ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
-												fix: 单账号分组首次 503 不设模型限流标记，避免后续请求雪崩

单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时，
原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换，
后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503，
导致约 30 秒的雪崩窗口。

修复：在 Handler 入口处检查分组是否只有单个 antigravity 账号，
如果是则提前设置 SingleAccountRetry context 标记，让 Service 层
首次 503 就走原地重试逻辑（不设限流标记），避免污染后续请求。

											
										
										
											2026-02-09 17:25:36 +08:00
+										c.Request = c.Request.WithContext(ctx)
 									}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									for {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+										fs := NewFailoverState(h.maxAccountSwitches, hasBoundSession)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										retryWithFallback := false
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										for {
 											// 选择支持该模型的账号
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+											selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											if err != nil {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+												if len(fs.FailedAccountIDs) == 0 {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													return
 												}
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+												action := fs.HandleSelectionExhausted(c.Request.Context())
 												switch action {
 												case FailoverContinue:
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+													ctx := service.WithSingleAccountRetry(c.Request.Context(), true, h.metadataBridgeEnabled())
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													c.Request = c.Request.WithContext(ctx)
 													continue
 												case FailoverCanceled:
 													return
 												default: // FailoverExhausted
 													if fs.LastFailoverErr != nil {
 														h.handleFailoverExhausted(c, fs.LastFailoverErr, platform, streamStarted)
 													} else {
 														h.handleFailoverExhaustedSimple(c, 502, streamStarted)
-												feat: 添加 Antigravity 单账号 503 退避重试机制

当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时，
不再设置模型限流+切换账号（因为切换回来还是同一个账号），而是在 Service 层
原地等待+重试，避免双重等待问题。

主要变更：
- Handler 层：检测单账号 503 场景，清除排除列表并设置 SingleAccountRetry 标记
- Service 层：新增 handleSingleAccountRetryInPlace 原地重试逻辑
- Service 层：预检查跳过单账号模式下的限流检查
- 新增 ctxkey.SingleAccountRetry 上下文标记

											
										
										
											2026-02-09 14:26:01 +08:00
+													}
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													return
-												feat: 添加 Antigravity 单账号 503 退避重试机制

当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时，
不再设置模型限流+切换账号（因为切换回来还是同一个账号），而是在 Service 层
原地等待+重试，避免双重等待问题。

主要变更：
- Handler 层：检测单账号 503 场景，清除排除列表并设置 SingleAccountRetry 标记
- Service 层：新增 handleSingleAccountRetryInPlace 原地重试逻辑
- Service 层：预检查跳过单账号模式下的限流检查
- 新增 ctxkey.SingleAccountRetry 上下文标记

											
										
										
											2026-02-09 14:26:01 +08:00
+												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											account := selection.Account
-												feat(log): 落地统一日志底座与系统日志运维能力

											
										
										
											2026-02-12 16:27:29 +08:00
+											setOpsSelectedAccount(c, account.ID, account.Platform)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											// 检查请求拦截（预热请求、SUGGESTION MODE等）
 											if account.IsInterceptWarmupEnabled() {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+												interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+												if interceptType != InterceptTypeNone {
 													if selection.Acquired && selection.ReleaseFunc != nil {
 														selection.ReleaseFunc()
 													}
 													if reqStream {
 														sendMockInterceptStream(c, reqModel, interceptType)
 													} else {
 														sendMockInterceptResponse(c, reqModel, interceptType)
 													}
 													return
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
 											// 3. 获取账号并发槽位
 											accountReleaseFunc := selection.ReleaseFunc
 											if !selection.Acquired {
 												if selection.WaitPlan == nil {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
 													return
 												}
 												accountWaitCounted := false
 												canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
 												if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.account_wait_counter_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												} else if !canWait {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Info("gateway.account_wait_queue_full",
 														zap.Int64("account_id", account.ID),
 														zap.Int("max_waiting", selection.WaitPlan.MaxWaiting),
 													)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
 													return
 												}
 												if err == nil && canWait {
 													accountWaitCounted = true
 												}
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												releaseWait := func() {
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													if accountWaitCounted {
 														h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+														accountWaitCounted = false
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													}
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
 												accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
 													c,
 													account.ID,
 													selection.WaitPlan.MaxConcurrency,
 													selection.WaitPlan.Timeout,
 													reqStream,
 													&streamStarted,
 												)
 												if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.account_slot_acquire_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+													releaseWait()
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													h.handleConcurrencyError(c, err, "account", streamStarted)
 													return
 												}
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+												// Slot acquired: no longer waiting in queue.
 												releaseWait()
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+												}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											// 账号槽位/等待计数需要在超时或断开时安全回收
 											accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
+											// ===== 用户消息串行队列 START =====
 											var queueRelease func()
 											umqMode := h.getUserMsgQueueMode(account, parsedReq)
 											switch umqMode {
 											case config.UMQModeSerialize:
 												// 串行模式：获取锁 + RPM 延迟 + 释放（当前行为不变）
 												baseRPM := account.GetBaseRPM()
 												release, qErr := h.userMsgQueueHelper.AcquireWithWait(
 													c, account.ID, baseRPM, reqStream, &streamStarted,
 													h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
 													reqLog,
 												)
 												if qErr != nil {
 													// fail-open: 记录 warn，不阻止请求
 													reqLog.Warn("gateway.umq_acquire_failed",
 														zap.Int64("account_id", account.ID),
 														zap.Error(qErr),
 													)
 												} else {
 													queueRelease = release
 												}
 											case config.UMQModeThrottle:
 												// 软性限速：仅施加 RPM 自适应延迟，不阻塞并发
 												baseRPM := account.GetBaseRPM()
 												if tErr := h.userMsgQueueHelper.ThrottleWithPing(
 													c, account.ID, baseRPM, reqStream, &streamStarted,
 													h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
 													reqLog,
 												); tErr != nil {
 													reqLog.Warn("gateway.umq_throttle_failed",
 														zap.Int64("account_id", account.ID),
 														zap.Error(tErr),
 													)
 												}
 											default:
 												if umqMode != "" {
 													reqLog.Warn("gateway.umq_unknown_mode",
 														zap.String("mode", umqMode),
 														zap.Int64("account_id", account.ID),
 													)
 												}
 											}
 											// 用 wrapReleaseOnDone 确保 context 取消时自动释放（仅 serialize 模式有 queueRelease）
 											queueRelease = wrapReleaseOnDone(c.Request.Context(), queueRelease)
 											// 注入回调到 ParsedRequest：使用外层 wrapper 以便提前清理 AfterFunc
 											parsedReq.OnUpstreamAccepted = queueRelease
 											// ===== 用户消息串行队列 END =====
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											// 转发请求 - 根据账号平台分流
-												refactor: decouple claude max cache simulation from RecordUsage

Extract setupClaudeMaxStreamingHook and applyClaudeMaxNonStreamingRewrite
facade functions to helpers file. RecordUsage now uses detect-only (no
mutation), client response rewriting handled at Forward layer.

											
										
										
											2026-02-27 19:59:36 +08:00
+											c.Set("parsed_request", parsedReq)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											var result *service.ForwardResult
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+											requestCtx := c.Request.Context()
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+											if fs.SwitchCount > 0 {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+												requestCtx = service.WithAccountSwitchCount(requestCtx, fs.SwitchCount, h.metadataBridgeEnabled())
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											}
-												refactor(upstream): replace upstream account type with apikey, auto-append /antigravity

Upstream accounts now use the standard APIKey type instead of a dedicated
upstream type. GetBaseURL() and new GetGeminiBaseURL() automatically append
/antigravity for Antigravity platform APIKey accounts, eliminating the need
for separate upstream forwarding methods.

- Remove ForwardUpstream, ForwardUpstreamGemini, testUpstreamConnection
- Remove upstream branch guards in Forward/ForwardGemini/TestConnection
- Add migration 052 to convert existing upstream accounts to apikey
- Update frontend CreateAccountModal to create apikey type
- Add unit tests for GetBaseURL and GetGeminiBaseURL

											
										
										
											2026-02-08 13:06:25 +08:00
+											if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+												result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body, hasBoundSession)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											} else {
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+												result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
 											// 兜底释放串行锁（正常情况已通过回调提前释放）
 											if queueRelease != nil {
 												queueRelease()
 											}
 											// 清理回调引用，防止 failover 重试时旧回调被错误调用
 											parsedReq.OnUpstreamAccepted = nil
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											if accountReleaseFunc != nil {
 												accountReleaseFunc()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											if err != nil {
-												feat: Anthropic平台可配置 anthropic-beta 策略

											
										
										
											2026-03-10 11:14:17 +08:00
+												// Beta policy block: return 400 immediately, no failover
 												var betaBlockedErr *service.BetaBlockedError
 												if errors.As(err, &betaBlockedErr) {
 													h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", betaBlockedErr.Message)
 													return
 												}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												var promptTooLongErr *service.PromptTooLongError
 												if errors.As(err, &promptTooLongErr) {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													reqLog.Warn("gateway.prompt_too_long_from_antigravity",
 														zap.Any("current_group_id", currentAPIKey.GroupID),
 														zap.Any("fallback_group_id", fallbackGroupID),
 														zap.Bool("fallback_used", fallbackUsed),
 													)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													if !fallbackUsed && fallbackGroupID != nil && *fallbackGroupID > 0 {
 														fallbackGroup, err := h.gatewayService.ResolveGroupByID(c.Request.Context(), *fallbackGroupID)
 														if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+															reqLog.Warn("gateway.resolve_fallback_group_failed", zap.Int64("fallback_group_id", *fallbackGroupID), zap.Error(err))
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+															_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
 															return
 														}
 														if fallbackGroup.Platform != service.PlatformAnthropic ||
 															fallbackGroup.SubscriptionType == service.SubscriptionTypeSubscription ||
 															fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+															reqLog.Warn("gateway.fallback_group_invalid",
 																zap.Int64("fallback_group_id", fallbackGroup.ID),
 																zap.String("fallback_platform", fallbackGroup.Platform),
 																zap.String("fallback_subscription_type", fallbackGroup.SubscriptionType),
 															)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+															_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
 															return
 														}
 														fallbackAPIKey := cloneAPIKeyWithGroup(apiKey, fallbackGroup)
 														if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), fallbackAPIKey.User, fallbackAPIKey, fallbackGroup, nil); err != nil {
 															status, code, message := billingErrorDetails(err)
 															h.handleStreamingAwareError(c, status, code, message, streamStarted)
 															return
 														}
-												fix: address deep code review issues for RPM limiting

- Move IncrementRPM after Forward success to prevent phantom RPM
  consumption during account switch retries
- Add base_rpm input sanitization (clamp to 0-10000) in Create/Update
- Add WindowCost scheduling checks to legacy path sticky sessions
  (4 check sites + 4 prefetch sites), fixing pre-existing gap
- Clean up rpm_strategy/rpm_sticky_buffer when disabling RPM in
  BulkEditModal (JSONB merge cannot delete keys, use empty values)
- Add json.Number test cases to TestGetBaseRPM/TestGetRPMStickyBuffer
- Document TOCTOU race as accepted soft-limit design trade-off

											
										
										
											2026-02-28 10:35:33 +08:00
+														// 兜底重试按"直接请求兜底分组"处理：清除强制平台，允许按分组平台调度
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+														ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
 														c.Request = c.Request.WithContext(ctx)
 														currentAPIKey = fallbackAPIKey
 														currentSubscription = nil
 														fallbackUsed = true
 														retryWithFallback = true
 														break
 													}
 													_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+													return
 												}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												var failoverErr *service.UpstreamFailoverError
 												if errors.As(err, &failoverErr) {
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr)
 													switch action {
 													case FailoverContinue:
-												feat: same-account retry before failover for transient errors

For retryable transient errors (Google 400 "invalid project resource name"
and empty stream responses), retry on the same account up to 2 times
(with 500ms delay) before switching to another account.

- Add RetryableOnSameAccount field to UpstreamFailoverError
- Add same-account retry loop in both Gemini and Claude/OpenAI handler paths
- Move temp-unschedule from service layer to handler layer (only after
  all same-account retries exhausted)
- Reduce temp-unschedule cooldown from 30 minutes to 1 minute

											
										
										
											2026-02-10 00:53:54 +08:00
+														continue
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													case FailoverExhausted:
 														h.handleFailoverExhausted(c, fs.LastFailoverErr, account.Platform, streamStarted)
 														return
 													case FailoverCanceled:
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+														return
 													}
 												}
-												feat(backend): 提交后端审计修复与配套测试改动

											
										
										
											2026-02-14 11:23:10 +08:00
+												wroteFallback := h.ensureForwardErrorResponse(c, streamStarted)
 												reqLog.Error("gateway.forward_failed",
 													zap.Int64("account_id", account.ID),
 													zap.Bool("fallback_error_response_written", wroteFallback),
 													zap.Error(err),
 												)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												return
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
-												fix: address deep code review issues for RPM limiting

- Move IncrementRPM after Forward success to prevent phantom RPM
  consumption during account switch retries
- Add base_rpm input sanitization (clamp to 0-10000) in Create/Update
- Add WindowCost scheduling checks to legacy path sticky sessions
  (4 check sites + 4 prefetch sites), fixing pre-existing gap
- Clean up rpm_strategy/rpm_sticky_buffer when disabling RPM in
  BulkEditModal (JSONB merge cannot delete keys, use empty values)
- Add json.Number test cases to TestGetBaseRPM/TestGetRPMStickyBuffer
- Document TOCTOU race as accepted soft-limit design trade-off

											
										
										
											2026-02-28 10:35:33 +08:00
+											// RPM 计数递增（Forward 成功后）
 											// 注意：TOCTOU 竞态是已知且可接受的设计权衡，与 WindowCost 一致的 soft-limit 模式。
 											// 在高并发下可能短暂超出 RPM 限制，但不会导致请求失败。
 											if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
 												if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
 													reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
 												}
 											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 											userAgent := c.GetHeader("User-Agent")
 											clientIP := ip.GetClientIP(c)
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+											// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
 											h.submitUsageRecordTask(func(ctx context.Context) {
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Result:            result,
-												feat: add claude max usage simulation with group switch

											
										
										
											2026-02-27 01:54:54 +08:00
+													ParsedRequest:     parsedReq,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													APIKey:            currentAPIKey,
 													User:              currentAPIKey.User,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+													Account:           account,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Subscription:      currentSubscription,
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+													UserAgent:         userAgent,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													IPAddress:         clientIP,
-												refactor: extract failover error handling into FailoverState

- Extract duplicated failover logic from gateway_handler.go (3 places)
  and gemini_v1beta_handler.go into shared failover_loop.go
- Introduce FailoverState with HandleFailoverError and HandleSelectionExhausted
- Move helper functions (needForceCacheBilling, sleepWithContext) into failover_loop.go
- Add comprehensive unit tests (32+ test cases)
- Delete redundant gateway_handler_single_account_retry_test.go

											
										
										
											2026-02-24 18:08:04 +08:00
+													ForceCacheBilling: fs.ForceCacheBilling,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													APIKeyService:     h.apiKeyService,
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												}); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													logger.L().With(
 														zap.String("component", "handler.gateway.messages"),
 														zap.Int64("user_id", subject.UserID),
 														zap.Int64("api_key_id", currentAPIKey.ID),
 														zap.Any("group_id", currentAPIKey.GroupID),
 														zap.String("model", reqModel),
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+														zap.Int64("account_id", account.ID),
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+													).Error("gateway.record_usage_failed", zap.Error(err))
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												}
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+											})
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										if !retryWithFallback {
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// Models handles listing available models
 								// GET /v1/models
-												feat: 添加模型白名单选择器组件，同步 new-api 模型列表

- 新增 ModelWhitelistSelector.vue 支持模型白名单多选
- 新增 ModelIcon.vue 显示品牌图标（基于 @lobehub/icons）
- 新增 useModelWhitelist.ts 硬编码各平台模型列表
- 更新账号编辑表单支持模型白名单配置
- 支持 Claude/OpenAI/Gemini/智谱/百度/讯飞等主流平台

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-01 16:03:48 +08:00
+								// Returns models based on account configurations (model_mapping whitelist)
 								// Falls back to default models if no whitelist is configured
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								func (h *GatewayHandler) Models(c *gin.Context) {
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, _ := middleware2.GetAPIKeyFromContext(c)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat: 添加模型白名单选择器组件，同步 new-api 模型列表

- 新增 ModelWhitelistSelector.vue 支持模型白名单多选
- 新增 ModelIcon.vue 显示品牌图标（基于 @lobehub/icons）
- 新增 useModelWhitelist.ts 硬编码各平台模型列表
- 更新账号编辑表单支持模型白名单配置
- 支持 Claude/OpenAI/Gemini/智谱/百度/讯飞等主流平台

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-01 16:03:48 +08:00
+									var groupID *int64
 									var platform string
 									if apiKey != nil && apiKey.Group != nil {
 										groupID = &apiKey.Group.ID
 										platform = apiKey.Group.Platform
 									}
-												feat(Sora): 完成Sora网关接入与媒体能力

新增 Sora 网关路由、账号调度与同步服务\n补充媒体代理与签名 URL、模型列表动态拉取\n完善计费配置、前端支持与相关测试

											
										
										
											2026-01-31 20:22:22 +08:00
+									if forcedPlatform, ok := middleware2.GetForcePlatformFromContext(c); ok && strings.TrimSpace(forcedPlatform) != "" {
 										platform = forcedPlatform
 									}
 									if platform == service.PlatformSora {
 										c.JSON(http.StatusOK, gin.H{
 											"object": "list",
-												feat(Sora): 直连生成并移除sora2api依赖

实现直连 Sora 客户端、媒体落地与清理策略\n更新网关与前端配置以支持 Sora 平台\n补齐单元测试与契约测试，新增 curl 测试脚本\n\n测试: go test ./... -tags=unit

											
										
										
											2026-02-01 21:37:10 +08:00
+											"data":   service.DefaultSoraModels(h.cfg),
-												feat(Sora): 完成Sora网关接入与媒体能力

新增 Sora 网关路由、账号调度与同步服务\n补充媒体代理与签名 URL、模型列表动态拉取\n完善计费配置、前端支持与相关测试

											
										
										
											2026-01-31 20:22:22 +08:00
+										})
 										return
 									}
-												feat: 添加模型白名单选择器组件，同步 new-api 模型列表

- 新增 ModelWhitelistSelector.vue 支持模型白名单多选
- 新增 ModelIcon.vue 显示品牌图标（基于 @lobehub/icons）
- 新增 useModelWhitelist.ts 硬编码各平台模型列表
- 更新账号编辑表单支持模型白名单配置
- 支持 Claude/OpenAI/Gemini/智谱/百度/讯飞等主流平台

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-01 16:03:48 +08:00
 									// Get available models from account configurations (without platform filter)
 									availableModels := h.gatewayService.GetAvailableModels(c.Request.Context(), groupID, "")
 									if len(availableModels) > 0 {
 										// Build model list from whitelist
 										models := make([]claude.Model, 0, len(availableModels))
 										for _, modelID := range availableModels {
 											models = append(models, claude.Model{
 												ID:          modelID,
 												Type:        "model",
 												DisplayName: modelID,
 												CreatedAt:   "2024-01-01T00:00:00Z",
 											})
 										}
 										c.JSON(http.StatusOK, gin.H{
 											"object": "list",
 											"data":   models,
 										})
 										return
 									}
 									// Fallback to default models
 									if platform == "openai" {
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										c.JSON(http.StatusOK, gin.H{
 											"object": "list",
 											"data":   openai.DefaultModels,
 										})
 										return
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									c.JSON(http.StatusOK, gin.H{
 										"object": "list",
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										"data":   claude.DefaultModels,
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									})
 								}
-												feat(antigravity): 添加 models 端点支持

- /antigravity/models: 返回全部模型（Claude + Gemini）
- /antigravity/v1/models: 返回全部模型（Claude API 格式）
- /antigravity/v1beta/models: 仅返回 Gemini 模型（v1beta 格式）

统一管理 antigravity 模型定义，避免重复代码

											
										
										
											2026-01-02 10:21:05 +08:00
+								// AntigravityModels 返回 Antigravity 支持的全部模型
 								// GET /antigravity/models
 								func (h *GatewayHandler) AntigravityModels(c *gin.Context) {
 									c.JSON(http.StatusOK, gin.H{
 										"object": "list",
 										"data":   antigravity.DefaultModels(),
 									})
 								}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+								func cloneAPIKeyWithGroup(apiKey *service.APIKey, group *service.Group) *service.APIKey {
 									if apiKey == nil || group == nil {
 										return apiKey
 									}
 									cloned := *apiKey
 									groupID := group.ID
 									cloned.GroupID = &groupID
 									cloned.Group = group
 									return &cloned
 								}
-												chore: gofmt

											
										
										
											2026-02-03 16:55:13 +08:00
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+								// Usage handles getting account balance and usage statistics for CC Switch integration
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// GET /v1/usage
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+								//
 								// Two modes:
 								//   - quota_limited: API Key has quota or rate limits configured. Returns key-level limits/usage.
 								//   - unrestricted:  No key-level limits. Returns subscription or wallet balance info.
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								func (h *GatewayHandler) Usage(c *gin.Context) {
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									subject, ok := middleware2.GetAuthSubjectFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									ctx := c.Request.Context()
 									// 解析可选的日期范围参数（用于 model_stats 查询）
 									startTime, endTime := h.parseUsageDateRange(c)
-												feat(gateway): filter /v1/usage stats by API Key instead of UserID

Previously the /v1/usage endpoint aggregated usage stats (today/total
tokens, cost, RPM/TPM) across all API Keys belonging to the user.
This made it impossible to distinguish usage from different API Keys
(e.g. balance vs subscription keys).

Now the usage stats are filtered by the current request's API Key ID,
so each key only sees its own usage data. The balance/remaining fields
are unaffected and still reflect the user-level wallet balance.

Changes:
- Add GetAPIKeyDashboardStats to repository interface and implementation
- Add getPerformanceStatsByAPIKey helper (also fixes TPM to include
  cache_creation_tokens and cache_read_tokens)
- Add GetAPIKeyDashboardStats to UsageService
- Update Usage handler to call GetAPIKeyDashboardStats(apiKey.ID)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-05 11:41:25 +08:00
+									// Best-effort: 获取用量统计（按当前 API Key 过滤），失败不影响基础响应
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									usageData := h.buildUsageData(ctx, apiKey.ID)
 									// Best-effort: 获取模型统计
 									var modelStats any
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+									if h.usageService != nil {
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+										if stats, err := h.usageService.GetAPIKeyModelStats(ctx, apiKey.ID, startTime, endTime); err == nil && len(stats) > 0 {
 											modelStats = stats
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										}
 									}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									// 判断模式: key 有总额度或速率限制 → quota_limited，否则 → unrestricted
 									isQuotaLimited := apiKey.Quota > 0 || apiKey.HasRateLimits()
 									if isQuotaLimited {
 										h.usageQuotaLimited(c, ctx, apiKey, usageData, modelStats)
 										return
 									}
 									h.usageUnrestricted(c, ctx, apiKey, subject, usageData, modelStats)
 								}
 								// parseUsageDateRange 解析 start_date / end_date query params，默认返回近 30 天范围
 								func (h *GatewayHandler) parseUsageDateRange(c *gin.Context) (time.Time, time.Time) {
 									now := timezone.Now()
 									endTime := now
 									startTime := now.AddDate(0, 0, -30)
 									if s := c.Query("start_date"); s != "" {
 										if t, err := timezone.ParseInLocation("2006-01-02", s); err == nil {
 											startTime = t
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									}
 									if s := c.Query("end_date"); s != "" {
 										if t, err := timezone.ParseInLocation("2006-01-02", s); err == nil {
 											endTime = t.Add(24*time.Hour - time.Second) // end of day
 										}
 									}
 									return startTime, endTime
 								}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+								// buildUsageData 构建 today/total 用量摘要
 								func (h *GatewayHandler) buildUsageData(ctx context.Context, apiKeyID int64) gin.H {
 									if h.usageService == nil {
 										return nil
 									}
 									dashStats, err := h.usageService.GetAPIKeyDashboardStats(ctx, apiKeyID)
 									if err != nil || dashStats == nil {
 										return nil
 									}
 									return gin.H{
 										"today": gin.H{
 											"requests":              dashStats.TodayRequests,
 											"input_tokens":          dashStats.TodayInputTokens,
 											"output_tokens":         dashStats.TodayOutputTokens,
 											"cache_creation_tokens": dashStats.TodayCacheCreationTokens,
 											"cache_read_tokens":     dashStats.TodayCacheReadTokens,
 											"total_tokens":          dashStats.TodayTokens,
 											"cost":                  dashStats.TodayCost,
 											"actual_cost":           dashStats.TodayActualCost,
 										},
 										"total": gin.H{
 											"requests":              dashStats.TotalRequests,
 											"input_tokens":          dashStats.TotalInputTokens,
 											"output_tokens":         dashStats.TotalOutputTokens,
 											"cache_creation_tokens": dashStats.TotalCacheCreationTokens,
 											"cache_read_tokens":     dashStats.TotalCacheReadTokens,
 											"total_tokens":          dashStats.TotalTokens,
 											"cost":                  dashStats.TotalCost,
 											"actual_cost":           dashStats.TotalActualCost,
 										},
 										"average_duration_ms": dashStats.AverageDurationMs,
 										"rpm":                 dashStats.Rpm,
 										"tpm":                 dashStats.Tpm,
 									}
 								}
 								// usageQuotaLimited 处理 quota_limited 模式的响应
 								func (h *GatewayHandler) usageQuotaLimited(c *gin.Context, ctx context.Context, apiKey *service.APIKey, usageData gin.H, modelStats any) {
 									resp := gin.H{
 										"mode":    "quota_limited",
 										"isValid": apiKey.Status == service.StatusAPIKeyActive || apiKey.Status == service.StatusAPIKeyQuotaExhausted || apiKey.Status == service.StatusAPIKeyExpired,
 										"status":  apiKey.Status,
 									}
 									// 总额度信息
 									if apiKey.Quota > 0 {
 										remaining := apiKey.GetQuotaRemaining()
 										resp["quota"] = gin.H{
 											"limit":     apiKey.Quota,
 											"used":      apiKey.QuotaUsed,
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+											"remaining": remaining,
 											"unit":      "USD",
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+										}
 										resp["remaining"] = remaining
 										resp["unit"] = "USD"
 									}
 									// 速率限制信息（从 DB 获取实时用量）
 									if apiKey.HasRateLimits() && h.apiKeyService != nil {
 										rateLimitData, err := h.apiKeyService.GetRateLimitData(ctx, apiKey.ID)
 										if err == nil && rateLimitData != nil {
 											var rateLimits []gin.H
 											if apiKey.RateLimit5h > 0 {
-												fix: 修复keys速率限制未自动重置额度的bug

											
										
										
											2026-03-07 09:59:40 +08:00
+												used := rateLimitData.EffectiveUsage5h()
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												entry := gin.H{
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+													"window":       "5h",
 													"limit":        apiKey.RateLimit5h,
 													"used":         used,
 													"remaining":    max(0, apiKey.RateLimit5h-used),
 													"window_start": rateLimitData.Window5hStart,
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												}
 												if rateLimitData.Window5hStart != nil && !service.IsWindowExpired(rateLimitData.Window5hStart, service.RateLimitWindow5h) {
 													entry["reset_at"] = rateLimitData.Window5hStart.Add(service.RateLimitWindow5h)
 												}
 												rateLimits = append(rateLimits, entry)
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+											}
 											if apiKey.RateLimit1d > 0 {
-												fix: 修复keys速率限制未自动重置额度的bug

											
										
										
											2026-03-07 09:59:40 +08:00
+												used := rateLimitData.EffectiveUsage1d()
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												entry := gin.H{
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+													"window":       "1d",
 													"limit":        apiKey.RateLimit1d,
 													"used":         used,
 													"remaining":    max(0, apiKey.RateLimit1d-used),
 													"window_start": rateLimitData.Window1dStart,
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												}
 												if rateLimitData.Window1dStart != nil && !service.IsWindowExpired(rateLimitData.Window1dStart, service.RateLimitWindow1d) {
 													entry["reset_at"] = rateLimitData.Window1dStart.Add(service.RateLimitWindow1d)
 												}
 												rateLimits = append(rateLimits, entry)
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+											}
 											if apiKey.RateLimit7d > 0 {
-												fix: 修复keys速率限制未自动重置额度的bug

											
										
										
											2026-03-07 09:59:40 +08:00
+												used := rateLimitData.EffectiveUsage7d()
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												entry := gin.H{
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+													"window":       "7d",
 													"limit":        apiKey.RateLimit7d,
 													"used":         used,
 													"remaining":    max(0, apiKey.RateLimit7d-used),
 													"window_start": rateLimitData.Window7dStart,
-												feat: apikey限额支持查询重置时间

											
										
										
											2026-03-09 10:22:24 +08:00
+												}
 												if rateLimitData.Window7dStart != nil && !service.IsWindowExpired(rateLimitData.Window7dStart, service.RateLimitWindow7d) {
 													entry["reset_at"] = rateLimitData.Window7dStart.Add(service.RateLimitWindow7d)
 												}
 												rateLimits = append(rateLimits, entry)
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+											}
 											if len(rateLimits) > 0 {
 												resp["rate_limits"] = rateLimits
 											}
 										}
 									}
 									// 过期时间
 									if apiKey.ExpiresAt != nil {
 										resp["expires_at"] = apiKey.ExpiresAt
 										resp["days_until_expiry"] = apiKey.GetDaysUntilExpiry()
 									}
 									if usageData != nil {
 										resp["usage"] = usageData
 									}
 									if modelStats != nil {
 										resp["model_stats"] = modelStats
 									}
 									c.JSON(http.StatusOK, resp)
 								}
 								// usageUnrestricted 处理 unrestricted 模式的响应（向后兼容）
 								func (h *GatewayHandler) usageUnrestricted(c *gin.Context, ctx context.Context, apiKey *service.APIKey, subject middleware2.AuthSubject, usageData gin.H, modelStats any) {
 									// 订阅模式
 									if apiKey.Group != nil && apiKey.Group.IsSubscriptionType() {
 										resp := gin.H{
 											"mode":     "unrestricted",
 											"isValid":  true,
 											"planName": apiKey.Group.Name,
 											"unit":     "USD",
 										}
 										// 订阅信息可能不在 context 中（/v1/usage 路径跳过了中间件的计费检查）
 										subscription, ok := middleware2.GetSubscriptionFromContext(c)
 										if ok {
 											remaining := h.calculateSubscriptionRemaining(apiKey.Group, subscription)
 											resp["remaining"] = remaining
 											resp["subscription"] = gin.H{
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+												"daily_usage_usd":   subscription.DailyUsageUSD,
 												"weekly_usage_usd":  subscription.WeeklyUsageUSD,
 												"monthly_usage_usd": subscription.MonthlyUsageUSD,
 												"daily_limit_usd":   apiKey.Group.DailyLimitUSD,
 												"weekly_limit_usd":  apiKey.Group.WeeklyLimitUSD,
 												"monthly_limit_usd": apiKey.Group.MonthlyLimitUSD,
 												"expires_at":        subscription.ExpiresAt,
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+											}
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										if usageData != nil {
 											resp["usage"] = usageData
 										}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+										if modelStats != nil {
 											resp["model_stats"] = modelStats
 										}
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										c.JSON(http.StatusOK, resp)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										return
 									}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									// 余额模式
 									latestUser, err := h.userService.GetByID(ctx, subject.UserID)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to get user info")
 										return
 									}
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+									resp := gin.H{
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+										"mode":      "unrestricted",
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										"isValid":   true,
 										"planName":  "钱包余额",
 										"remaining": latestUser.Balance,
 										"unit":      "USD",
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+										"balance":   latestUser.Balance,
 									}
 									if usageData != nil {
 										resp["usage"] = usageData
 									}
-												feat: 重构 /v1/usage 端点，支持 quota_limited 和 unrestricted 双模式

- quota_limited 模式：返回 Key 级别的总额度、速率限制窗口用量和过期时间
- unrestricted 模式：返回订阅限额或钱包余额信息（向后兼容）
- 新增 model_stats 字段，支持 start_date/end_date 参数查询按模型用量统计
- 提取 buildUsageData/parseUsageDateRange 等辅助方法，减少主函数复杂度
- 新增 APIKeyService.GetRateLimitData 和 UsageService.GetAPIKeyModelStats

											
										
										
											2026-03-03 20:59:12 +08:00
+									if modelStats != nil {
 										resp["model_stats"] = modelStats
 									}
-												feat(gateway): 增强 /v1/usage 端点返回完整用量统计

为 CC Switch 集成增强 /v1/usage 网关端点，在保持原有 4 字段
(isValid, planName, remaining, unit) 向后兼容的基础上，新增：

- usage 对象：今日/累计的请求数、token 用量、费用，以及 RPM/TPM
- subscription 对象（订阅模式）：日/周/月用量和限额、过期时间
- balance 字段（余额模式）：当前钱包余额

用量数据获取采用 best-effort 策略，失败不影响基础响应。

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-02 18:30:06 +08:00
+									c.JSON(http.StatusOK, resp)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// calculateSubscriptionRemaining 计算订阅剩余可用额度
 								// 逻辑：
 								// 1. 如果日/周/月任一限额达到100%，返回0
 								// 2. 否则返回所有已配置周期中剩余额度的最小值
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+								func (h *GatewayHandler) calculateSubscriptionRemaining(group *service.Group, sub *service.UserSubscription) float64 {
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									var remainingValues []float64
 									// 检查日限额
 									if group.HasDailyLimit() {
 										remaining := *group.DailyLimitUSD - sub.DailyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 检查周限额
 									if group.HasWeeklyLimit() {
 										remaining := *group.WeeklyLimitUSD - sub.WeeklyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 检查月限额
 									if group.HasMonthlyLimit() {
 										remaining := *group.MonthlyLimitUSD - sub.MonthlyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 如果没有配置任何限额，返回-1表示无限制
 									if len(remainingValues) == 0 {
 										return -1
 									}
 									// 返回最小值
 									min := remainingValues[0]
 									for _, v := range remainingValues[1:] {
 										if v < min {
 											min = v
 										}
 									}
 									return min
 								}
 								// handleConcurrencyError handles concurrency-related errors with proper 429 response
 								func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
 									h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
 										fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
 								}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+								func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 									statusCode := failoverErr.StatusCode
 									responseBody := failoverErr.ResponseBody
 									// 先检查透传规则
 									if h.errorPassthroughService != nil && len(responseBody) > 0 {
 										if rule := h.errorPassthroughService.MatchRule(platform, statusCode, responseBody); rule != nil {
 											// 确定响应状态码
 											respCode := statusCode
 											if !rule.PassthroughCode && rule.ResponseCode != nil {
 												respCode = *rule.ResponseCode
 											}
 											// 确定响应消息
 											msg := service.ExtractUpstreamErrorMessage(responseBody)
 											if !rule.PassthroughBody && rule.CustomMessage != nil {
 												msg = *rule.CustomMessage
 											}
-												fix: 修复错误透传规则 skip_monitoring 未生效的问题

- ops_error_logger: status < 400 分支增加 OpsSkipPassthroughKey 检查
- ops_upstream_context: 新增 checkSkipMonitoringForUpstreamEvent，中间重试/故障转移事件也能触发跳过标记
- gateway_handler/openai_gateway_handler/gemini_v1beta_handler: handleFailoverExhausted 匹配规则后设置 OpsSkipPassthroughKey
- antigravity_gateway_service: writeMappedClaudeError 增加 applyErrorPassthroughRule 调用

											
										
										
											2026-02-10 20:56:01 +08:00
+											if rule.SkipMonitoring {
 												c.Set(service.OpsSkipPassthroughKey, true)
 											}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+											h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
 											return
 										}
 									}
 									// 使用默认的错误映射
 									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
 								func (h *GatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								func (h *GatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
 									switch statusCode {
 									case 401:
 										return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
 									case 403:
 										return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
 									case 429:
 										return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
 									case 529:
 										return http.StatusServiceUnavailable, "overloaded_error", "Upstream service overloaded, please retry later"
 									case 500, 502, 503, 504:
 										return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
 									default:
 										return http.StatusBadGateway, "upstream_error", "Upstream request failed"
 									}
 								}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// handleStreamingAwareError handles errors that may occur after streaming has started
 								func (h *GatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
 									if streamStarted {
 										// Stream already started, send error as SSE event then close
 										flusher, ok := c.Writer.(http.Flusher)
 										if ok {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+											// SSE 错误事件固定 schema，使用 Quote 直拼可避免额外 Marshal 分配。
 											errorEvent := `data: {"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
-												ci(backend): 添加 github actions (#10)

## 变更内容

### CI/CD
- 添加 GitHub Actions 工作流（test + golangci-lint）
- 添加 golangci-lint 配置，启用 errcheck/govet/staticcheck/unused/depguard
- 通过 depguard 强制 service 层不能直接导入 repository

### 错误处理修复
- 修复 CSV 写入、SSE 流式输出、随机数生成等未处理的错误
- GenerateRedeemCode() 现在返回 error

### 资源泄露修复
- 统一使用 defer func() { _ = xxx.Close() }() 模式

### 代码清理
- 移除未使用的常量
- 简化 nil map 检查
- 统一代码格式
											
										
										
											2025-12-20 15:29:52 +08:00
+											if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
 												_ = c.Error(err)
 											}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+											flusher.Flush()
 										}
 										return
 									}
 									// Normal case: return JSON response with proper status code
 									h.errorResponse(c, status, errType, message)
 								}
-												feat(backend): 提交后端审计修复与配套测试改动

											
										
										
											2026-02-14 11:23:10 +08:00
+								// ensureForwardErrorResponse 在 Forward 返回错误但尚未写响应时补写统一错误响应。
 								func (h *GatewayHandler) ensureForwardErrorResponse(c *gin.Context, streamStarted bool) bool {
 									if c == nil || c.Writer == nil || c.Writer.Written() {
 										return false
 									}
 									h.handleStreamingAwareError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed", streamStarted)
 									return true
 								}
-												feat(gateway): 添加 Claude Code 客户端最低版本检查功能

- 通过 User-Agent 识别 Claude Code 客户端并提取版本号
- 在网关层验证客户端版本是否满足管理员配置的最低要求
- 在管理后台提供版本要求配置选项（英文/中文双语）
- 实现原子缓存 + singleflight 防止并发问题和 thundering herd
- 使用 context.WithoutCancel 隔离 DB 查询，避免客户端断连影响缓存
- 双 TTL 策略：60s 正常、5s 错误恢复，保证性能与可用性
- 仅检查 Claude Code 客户端，其他客户端不受影响
- 添加完整单元测试覆盖版本提取、比对、上下文操作

											
										
										
											2026-03-01 15:35:46 +08:00
+								// checkClaudeCodeVersion 检查 Claude Code 客户端版本是否满足最低要求
 								// 仅对已识别的 Claude Code 客户端执行，count_tokens 路径除外
 								func (h *GatewayHandler) checkClaudeCodeVersion(c *gin.Context) bool {
 									ctx := c.Request.Context()
 									if !service.IsClaudeCodeClient(ctx) {
 										return true
 									}
 									// 排除 count_tokens 子路径
 									if strings.HasSuffix(c.Request.URL.Path, "/count_tokens") {
 										return true
 									}
 									minVersion := h.settingService.GetMinClaudeCodeVersion(ctx)
 									if minVersion == "" {
 										return true // 未设置，不检查
 									}
 									clientVersion := service.GetClaudeCodeVersion(ctx)
 									if clientVersion == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error",
 											"Unable to determine Claude Code version. Please update Claude Code: npm update -g @anthropic-ai/claude-code")
 										return false
 									}
 									if service.CompareVersions(clientVersion, minVersion) < 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error",
 											fmt.Sprintf("Your Claude Code version (%s) is below the minimum required version (%s). Please update: npm update -g @anthropic-ai/claude-code",
 												clientVersion, minVersion))
 										return false
 									}
 									return true
 								}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// errorResponse 返回Claude API格式的错误响应
 								func (h *GatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
 									c.JSON(status, gin.H{
 										"type": "error",
 										"error": gin.H{
 											"type":    errType,
 											"message": message,
 										},
 									})
 								}
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 								// CountTokens handles token counting endpoint
 								// POST /v1/messages/count_tokens
 								// 特点：校验订阅/余额，但不计算并发、不记录使用量
 								func (h *GatewayHandler) CountTokens(c *gin.Context) {
 									// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									_, ok = middleware2.GetAuthSubjectFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
 										return
 									}
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									reqLog := requestLogger(
 										c,
 										"handler.gateway.count_tokens",
 										zap.Int64("api_key_id", apiKey.ID),
 										zap.Any("group_id", apiKey.GroupID),
 									)
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									defer h.maybeLogCompatibilityFallbackMetrics(reqLog)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 读取请求体
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if err != nil {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										if maxErr, ok := extractMaxBytesError(err); ok {
 											h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
 											return
 										}
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 										return
 									}
 									if len(body) == 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, "", false, body)
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+									parsedReq, err := service.ParseGatewayRequest(body, domain.PlatformAnthropic)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									if err != nil {
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 										return
 									}
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									// count_tokens 走 messages 严格校验时，复用已解析请求，避免二次反序列化。
 									SetClaudeCodeClientContext(c, body, parsedReq)
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+									reqLog = reqLog.With(zap.String("model", parsedReq.Model), zap.Bool("stream", parsedReq.Stream))
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									c.Request = c.Request.WithContext(service.WithThinkingEnabled(c.Request.Context(), parsedReq.ThinkingEnabled, h.metadataBridgeEnabled()))
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
-												fix(网关): 添加 model 参数必填验证

在以下端点添加 model 参数的必填验证，缺失时直接返回 400 错误：
- /v1/messages
- /v1/messages/count_tokens
- /openai/v1/responses

修复前：空 model 会进入账号选择流程，最终由上游 API 返回错误
修复后：入口处直接拒绝，避免浪费资源和不明确的错误信息

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2025-12-31 16:17:45 +08:00
+									// 验证 model 必填
 									if parsedReq.Model == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, parsedReq.Model, parsedReq.Stream, body)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									// 获取订阅信息（可能为nil）
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									subscription, _ := middleware2.GetSubscriptionFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 校验 billing eligibility（订阅/余额）
 									// 【注意】不计算并发，但需要校验订阅/余额
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										status, code, message := billingErrorDetails(err)
 										h.errorResponse(c, status, code, message)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										return
 									}
 									// 计算粘性会话 hash
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+									parsedReq.SessionContext = &service.SessionContext{
 										ClientIP:  ip.GetClientIP(c),
 										UserAgent: c.GetHeader("User-Agent"),
 										APIKeyID:  apiKey.ID,
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 选择支持该模型的账号
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Warn("gateway.count_tokens_select_account_failed", zap.Error(err))
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+										h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable")
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										return
 									}
-												feat(log): 落地统一日志底座与系统日志运维能力

											
										
										
											2026-02-12 16:27:29 +08:00
+									setOpsSelectedAccount(c, account.ID, account.Platform)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 转发请求（不记录使用量）
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										reqLog.Error("gateway.count_tokens_forward_failed", zap.Int64("account_id", account.ID), zap.Error(err))
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										// 错误响应已在 ForwardCountTokens 中处理
 										return
 									}
 								}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+								// InterceptType 表示请求拦截类型
 								type InterceptType int
 								const (
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									InterceptTypeNone              InterceptType = iota
 									InterceptTypeWarmup                          // 预热请求（返回 "New Conversation"）
 									InterceptTypeSuggestionMode                  // SUGGESTION MODE（返回空字符串）
 									InterceptTypeMaxTokensOneHaiku               // max_tokens=1 + haiku 探测请求（返回 "#"）
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+								)
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// isHaikuModel 检查模型名称是否包含 "haiku"（大小写不敏感）
 								func isHaikuModel(model string) bool {
 									return strings.Contains(strings.ToLower(model), "haiku")
 								}
 								// isMaxTokensOneHaikuRequest 检查是否为 max_tokens=1 + haiku 模型的探测请求
 								// 这类请求用于 Claude Code 验证 API 连通性
 								// 条件：max_tokens == 1 且 model 包含 "haiku" 且非流式请求
 								func isMaxTokensOneHaikuRequest(model string, maxTokens int, isStream bool) bool {
 									return maxTokens == 1 && isHaikuModel(model) && !isStream
 								}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+								// detectInterceptType 检测请求是否需要拦截，返回拦截类型
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// 参数说明：
 								//   - body: 请求体字节
 								//   - model: 请求的模型名称
 								//   - maxTokens: max_tokens 值
 								//   - isStream: 是否为流式请求
 								//   - isClaudeCodeClient: 是否已通过 Claude Code 客户端校验
 								func detectInterceptType(body []byte, model string, maxTokens int, isStream bool, isClaudeCodeClient bool) InterceptType {
 									// 优先检查 max_tokens=1 + haiku 探测请求（仅非流式）
 									if isClaudeCodeClient && isMaxTokensOneHaikuRequest(model, maxTokens, isStream) {
 										return InterceptTypeMaxTokensOneHaiku
 									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// 快速检查：如果不包含任何关键字，直接返回
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									bodyStr := string(body)
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									hasSuggestionMode := strings.Contains(bodyStr, "[SUGGESTION MODE:")
 									hasWarmupKeyword := strings.Contains(bodyStr, "title") || strings.Contains(bodyStr, "Warmup")
 									if !hasSuggestionMode && !hasWarmupKeyword {
 										return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// 解析请求（只解析一次）
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									var req struct {
 										Messages []struct {
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+											Role    string `json:"role"`
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+											Content []struct {
 												Type string `json:"type"`
 												Text string `json:"text"`
 											} `json:"content"`
 										} `json:"messages"`
 										System []struct {
 											Text string `json:"text"`
 										} `json:"system"`
 									}
 									if err := json.Unmarshal(body, &req); err != nil {
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+										return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// 检查 SUGGESTION MODE（最后一条 user 消息）
 									if hasSuggestionMode && len(req.Messages) > 0 {
 										lastMsg := req.Messages[len(req.Messages)-1]
 										if lastMsg.Role == "user" && len(lastMsg.Content) > 0 &&
 											lastMsg.Content[0].Type == "text" &&
 											strings.HasPrefix(lastMsg.Content[0].Text, "[SUGGESTION MODE:") {
 											return InterceptTypeSuggestionMode
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										}
 									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// 检查 Warmup 请求
 									if hasWarmupKeyword {
 										// 检查 messages 中的标题提示模式
 										for _, msg := range req.Messages {
 											for _, content := range msg.Content {
 												if content.Type == "text" {
 													if strings.Contains(content.Text, "Please write a 5-10 word title for the following conversation:") ||
 														content.Text == "Warmup" {
 														return InterceptTypeWarmup
 													}
 												}
 											}
 										}
 										// 检查 system 中的标题提取模式
 										for _, sys := range req.System {
 											if strings.Contains(sys.Text, "nalyze if this message indicates a new conversation topic. If it does, extract a 2-3 word title") {
 												return InterceptTypeWarmup
 											}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										}
 									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+								}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+								// sendMockInterceptStream 发送流式 mock 响应（用于请求拦截）
 								func sendMockInterceptStream(c *gin.Context, model string, interceptType InterceptType) {
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									c.Header("Content-Type", "text/event-stream")
 									c.Header("Cache-Control", "no-cache")
 									c.Header("Connection", "keep-alive")
 									c.Header("X-Accel-Buffering", "no")
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// 根据拦截类型决定响应内容
 									var msgID string
 									var outputTokens int
 									var textDeltas []string
 									switch interceptType {
 									case InterceptTypeSuggestionMode:
 										msgID = "msg_mock_suggestion"
 										outputTokens = 1
 										textDeltas = []string{""} // 空内容
 									default: // InterceptTypeWarmup
 										msgID = "msg_mock_warmup"
 										outputTokens = 2
 										textDeltas = []string{"New", " Conversation"}
 									}
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									// Build message_start event with fixed schema.
 									messageStartJSON := `{"type":"message_start","message":{"id":` + strconv.Quote(msgID) + `,"type":"message","role":"assistant","model":` + strconv.Quote(model) + `,"content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":0}}}`
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// Build events
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									events := []string{
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
+										`event: message_start` + "\n" + `data: ` + string(messageStartJSON),
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										`event: content_block_start` + "\n" + `data: {"content_block":{"text":"","type":"text"},"index":0,"type":"content_block_start"}`,
 									}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									// Add text deltas
 									for _, text := range textDeltas {
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+										deltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":` + strconv.Quote(text) + `}}`
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+										events = append(events, `event: content_block_delta`+"\n"+`data: `+string(deltaJSON))
 									}
 									// Add final events
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":10,"output_tokens":` + strconv.Itoa(outputTokens) + `}}`
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
 									events = append(events,
 										`event: content_block_stop`+"\n"+`data: {"index":0,"type":"content_block_stop"}`,
 										`event: message_delta`+"\n"+`data: `+string(messageDeltaJSON),
 										`event: message_stop`+"\n"+`data: {"type":"message_stop"}`,
 									)
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									for _, event := range events {
 										_, _ = c.Writer.WriteString(event + "\n\n")
 										c.Writer.Flush()
 										time.Sleep(20 * time.Millisecond)
 									}
 								}
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// generateRealisticMsgID 生成仿真的消息 ID（msg_bdrk_XXXXXXX 格式）
 								// 格式与 Claude API 真实响应一致，24 位随机字母数字
 								func generateRealisticMsgID() string {
 									const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 									const idLen = 24
 									randomBytes := make([]byte, idLen)
 									if _, err := rand.Read(randomBytes); err != nil {
 										return fmt.Sprintf("msg_bdrk_%d", time.Now().UnixNano())
 									}
 									b := make([]byte, idLen)
 									for i := range b {
 										b[i] = charset[int(randomBytes[i])%len(charset)]
 									}
 									return "msg_bdrk_" + string(b)
 								}
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+								// sendMockInterceptResponse 发送非流式 mock 响应（用于请求拦截）
 								func sendMockInterceptResponse(c *gin.Context, model string, interceptType InterceptType) {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									var msgID, text, stopReason string
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									var outputTokens int
 									switch interceptType {
 									case InterceptTypeSuggestionMode:
 										msgID = "msg_mock_suggestion"
 										text = ""
 										outputTokens = 1
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+										stopReason = "end_turn"
 									case InterceptTypeMaxTokensOneHaiku:
 										msgID = generateRealisticMsgID()
 										text = "#"
 										outputTokens = 1
 										stopReason = "max_tokens" // max_tokens=1 探测请求的 stop_reason 应为 max_tokens
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									default: // InterceptTypeWarmup
 										msgID = "msg_mock_warmup"
 										text = "New Conversation"
 										outputTokens = 2
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+										stopReason = "end_turn"
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+									}
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									// 构建完整的响应格式（与 Claude API 响应格式一致）
 									response := gin.H{
 										"model":         model,
 										"id":            msgID,
 										"type":          "message",
 										"role":          "assistant",
 										"content":       []gin.H{{"type": "text", "text": text}},
 										"stop_reason":   stopReason,
 										"stop_sequence": nil,
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										"usage": gin.H{
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+											"input_tokens":                10,
 											"cache_creation_input_tokens": 0,
 											"cache_read_input_tokens":     0,
 											"cache_creation": gin.H{
 												"ephemeral_5m_input_tokens": 0,
 												"ephemeral_1h_input_tokens": 0,
 											},
-												feat(gateway): 增加 SUGGESTION MODE 请求拦截

扩展现有的预热请求拦截功能，新增对 SUGGESTION MODE 请求的拦截：
- 检测 messages 最后一条 user 消息是否以 [SUGGESTION MODE: 开头
- 拦截后返回空内容响应，节省 token 消耗
- 重构检测逻辑，合并为单一函数，只解析一次 JSON

											
										
										
											2026-01-23 16:57:25 +08:00
+											"output_tokens": outputTokens,
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+											"total_tokens":  10 + outputTokens,
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										},
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									}
 									c.JSON(http.StatusOK, response)
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+								}
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
 								func billingErrorDetails(err error) (status int, code, message string) {
 									if errors.Is(err, service.ErrBillingServiceUnavailable) {
-												merge: 合并 test 分支到 test-dev，解决冲突

解决的冲突文件：
- wire_gen.go: 合并 ConcurrencyService/CRSSyncService 参数和 userAttributeHandler
- gateway_handler.go: 合并 pkg/errors 和 antigravity 导入
- gateway_service.go: 合并 validateUpstreamBaseURL 和 GetAvailableModels
- config.example.yaml: 合并 billing/turnstile 配置和额外 gateway 选项

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-03 11:36:31 +08:00
+										msg := pkgerrors.Message(err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										if msg == "" {
 											msg = "Billing service temporarily unavailable. Please retry later."
 										}
 										return http.StatusServiceUnavailable, "billing_service_error", msg
 									}
-												feat: apikey支持5h/1d/7d速率控制

											
										
										
											2026-03-03 15:01:10 +08:00
+									if errors.Is(err, service.ErrAPIKeyRateLimit5hExceeded) {
 										msg := pkgerrors.Message(err)
 										return http.StatusTooManyRequests, "rate_limit_exceeded", msg
 									}
 									if errors.Is(err, service.ErrAPIKeyRateLimit1dExceeded) {
 										msg := pkgerrors.Message(err)
 										return http.StatusTooManyRequests, "rate_limit_exceeded", msg
 									}
 									if errors.Is(err, service.ErrAPIKeyRateLimit7dExceeded) {
 										msg := pkgerrors.Message(err)
 										return http.StatusTooManyRequests, "rate_limit_exceeded", msg
 									}
-												merge: 合并 test 分支到 test-dev，解决冲突

解决的冲突文件：
- wire_gen.go: 合并 ConcurrencyService/CRSSyncService 参数和 userAttributeHandler
- gateway_handler.go: 合并 pkg/errors 和 antigravity 导入
- gateway_service.go: 合并 validateUpstreamBaseURL 和 GetAvailableModels
- config.example.yaml: 合并 billing/turnstile 配置和额外 gateway 选项

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-03 11:36:31 +08:00
+									msg := pkgerrors.Message(err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+									if msg == "" {
-												chore(logging): 完成后端日志审计与结构化迁移

- 将高密度服务与处理器日志迁移到新日志系统（LegacyPrintf/结构化日志）
- 增加 stdlog bridge 与兼容测试，保留旧日志捕获能力
- 将 OpenAI 断流告警改为结构化 Warn 并改造对应测试为 sink 捕获
- 补齐后端相关文件 logger 引用并通过全量 go test

											
										
										
											2026-02-12 19:01:09 +08:00
+										logger.L().With(
 											zap.String("component", "handler.gateway.billing"),
 											zap.Error(err),
 										).Warn("gateway.billing_error_missing_message")
-												fix(audit): 第二批审计修复 — P0 生产 Bug、安全加固、性能优化、缓存一致性、代码质量

基于 backend-code-audit 审计报告，修复剩余 P0/P1/P2 共 34 项问题：

P0 生产 Bug：
- 修复 time.Since(time.Now()) 计时逻辑错误 (P0-03)
- generateRandomID 改用 crypto/rand 替代固定索引 (P0-04)
- IncrementQuotaUsed 重写为 Ent 原子操作消除 TOCTOU 竞态 (P0-05)

安全加固：
- gateway/openai handler 错误响应替换为泛化消息，防止内部信息泄露 (P1-14)
- usage_log_repo dateFormat 参数改用白名单映射，防止 SQL 注入 (P1-16)
- 默认配置安全加固：sslmode=prefer、response_headers=true、mode=release (P1-18/19, P2-15)

性能优化：
- gateway handler 循环内 defer 替换为显式 releaseWait 闭包 (P1-02)
- group_repo/promo_code_repo Count 前 Clone 查询避免状态污染 (P1-03)
- usage_log_repo 四个查询添加 LIMIT 10000 防止 OOM (P1-07)
- GetBatchUsageStats 添加时间范围参数，默认最近 30 天 (P1-10)
- ip.go CIDR 预编译为包级变量 (P1-11)
- BatchUpdateCredentials 重构为先验证后更新 (P1-13)

缓存一致性：
- billing_cache 添加 jitteredTTL 防止缓存雪崩 (P2-10)
- DeductUserBalance/UpdateSubscriptionUsage 错误传播修复 (P2-12)
- UserService.UpdateBalance 成功后异步失效 billingCache (P2-13)

代码质量：
- search 截断改为按 rune 处理，支持多字节字符 (P2-01)
- TLS Handshake 改为 HandshakeContext 支持 context 取消 (P2-07)
- CORS 预检添加 Access-Control-Max-Age: 86400 (P2-16)

测试覆盖：
- 新增 user_service_test.go（UpdateBalance 缓存失效 6 个用例）
- 新增 batch_update_credentials_test.go（fail-fast + 类型验证 7 个用例）
- 新增 response_transformer_test.go、ip_test.go、usage_log_repo_unit_test.go、search_truncate_test.go
- 集成测试：IncrementQuotaUsed 并发测试、billing_cache 错误传播测试
- config_test.go 补充 server.mode/sslmode 默认值断言

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-07 19:46:42 +08:00
+										msg = "Billing error"
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+									}
 									return http.StatusForbidden, "billing_error", msg
 								}
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+								func (h *GatewayHandler) metadataBridgeEnabled() bool {
 									if h == nil || h.cfg == nil {
 										return true
 									}
 									return h.cfg.Gateway.OpenAIWS.MetadataBridgeEnabled
 								}
 								func (h *GatewayHandler) maybeLogCompatibilityFallbackMetrics(reqLog *zap.Logger) {
 									if reqLog == nil {
 										return
 									}
 									if gatewayCompatibilityMetricsLogCounter.Add(1)%gatewayCompatibilityMetricsLogInterval != 0 {
 										return
 									}
 									metrics := service.SnapshotOpenAICompatibilityFallbackMetrics()
 									reqLog.Info("gateway.compatibility_fallback_metrics",
 										zap.Int64("session_hash_legacy_read_fallback_total", metrics.SessionHashLegacyReadFallbackTotal),
 										zap.Int64("session_hash_legacy_read_fallback_hit", metrics.SessionHashLegacyReadFallbackHit),
 										zap.Int64("session_hash_legacy_dual_write_total", metrics.SessionHashLegacyDualWriteTotal),
 										zap.Float64("session_hash_legacy_read_hit_rate", metrics.SessionHashLegacyReadHitRate),
 										zap.Int64("metadata_legacy_fallback_total", metrics.MetadataLegacyFallbackTotal),
 									)
 								}
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+								func (h *GatewayHandler) submitUsageRecordTask(task service.UsageRecordTask) {
 									if task == nil {
 										return
 									}
 									if h.usageRecordWorkerPool != nil {
 										h.usageRecordWorkerPool.Submit(task)
 										return
 									}
 									// 回退路径：worker 池未注入时同步执行，避免退回到无界 goroutine 模式。
 									ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 									defer cancel()
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+									defer func() {
 										if recovered := recover(); recovered != nil {
 											logger.L().With(
 												zap.String("component", "handler.gateway.messages"),
 												zap.Any("panic", recovered),
 											).Error("gateway.usage_record_task_panic_recovered")
 										}
 									}()
-												feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool，支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则，并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期，服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-22 12:56:57 +08:00
+									task(ctx)
 								}
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
 								// getUserMsgQueueMode 获取当前请求的 UMQ 模式
 								// 返回 "serialize" | "throttle" | ""
 								func (h *GatewayHandler) getUserMsgQueueMode(account *service.Account, parsed *service.ParsedRequest) string {
 									if h.userMsgQueueHelper == nil {
 										return ""
 									}
 									// 仅适用于 Anthropic OAuth/SetupToken 账号
 									if !account.IsAnthropicOAuthOrSetupToken() {
 										return ""
 									}
 									if !service.IsRealUserMessage(parsed) {
 										return ""
 									}
 									// 账号级模式优先，fallback 到全局配置
 									mode := account.GetUserMsgQueueMode()
 									if mode == "" {
 										mode = h.cfg.Gateway.UserMessageQueue.GetEffectiveMode()
 									}
 									return mode
 								}