backend/internal/handler/gateway_handler.go

package handler

import (
	"context"
	"crypto/rand"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"strings"
	"time"

	"github.com/Wei-Shaw/sub2api/internal/config"
	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
	pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
	"github.com/Wei-Shaw/sub2api/internal/service"

	"github.com/gin-gonic/gin"
)

// GatewayHandler handles API gateway requests
type GatewayHandler struct {
	gatewayService            *service.GatewayService
	geminiCompatService       *service.GeminiMessagesCompatService
	antigravityGatewayService *service.AntigravityGatewayService
	userService               *service.UserService
	billingCacheService       *service.BillingCacheService
	usageService              *service.UsageService
	apiKeyService             *service.APIKeyService
	errorPassthroughService   *service.ErrorPassthroughService
	concurrencyHelper         *ConcurrencyHelper
	maxAccountSwitches        int
	maxAccountSwitchesGemini  int
}

// NewGatewayHandler creates a new GatewayHandler
func NewGatewayHandler(
	gatewayService *service.GatewayService,
	geminiCompatService *service.GeminiMessagesCompatService,
	antigravityGatewayService *service.AntigravityGatewayService,
	userService *service.UserService,
	concurrencyService *service.ConcurrencyService,
	billingCacheService *service.BillingCacheService,
	usageService *service.UsageService,
	apiKeyService *service.APIKeyService,
	errorPassthroughService *service.ErrorPassthroughService,
	cfg *config.Config,
) *GatewayHandler {
	pingInterval := time.Duration(0)
	maxAccountSwitches := 10
	maxAccountSwitchesGemini := 3
	if cfg != nil {
		pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
		if cfg.Gateway.MaxAccountSwitches > 0 {
			maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
		}
		if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
			maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
		}
	}
	return &GatewayHandler{
		gatewayService:            gatewayService,
		geminiCompatService:       geminiCompatService,
		antigravityGatewayService: antigravityGatewayService,
		userService:               userService,
		billingCacheService:       billingCacheService,
		usageService:              usageService,
		apiKeyService:             apiKeyService,
		errorPassthroughService:   errorPassthroughService,
		concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
		maxAccountSwitches:        maxAccountSwitches,
		maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
	}
}

// Messages handles Claude API compatible messages endpoint
// POST /v1/messages
func (h *GatewayHandler) Messages(c *gin.Context) {
	// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	subject, ok := middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
		return
	}

	// 读取请求体
	body, err := io.ReadAll(c.Request.Body)
	if err != nil {
		if maxErr, ok := extractMaxBytesError(err); ok {
			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
			return
		}
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
		return
	}

	if len(body) == 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
		return
	}

	setOpsRequestContext(c, "", false, body)

	parsedReq, err := service.ParseGatewayRequest(body)
	if err != nil {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
		return
	}
	reqModel := parsedReq.Model
	reqStream := parsedReq.Stream

	// 设置 max_tokens=1 + haiku 探测请求标识到 context 中
	// 必须在 SetClaudeCodeClientContext 之前设置，因为 ClaudeCodeValidator 需要读取此标识进行绕过判断
	if isMaxTokensOneHaikuRequest(reqModel, parsedReq.MaxTokens, reqStream) {
		ctx := context.WithValue(c.Request.Context(), ctxkey.IsMaxTokensOneHaikuRequest, true)
		c.Request = c.Request.WithContext(ctx)
	}

	// 检查是否为 Claude Code 客户端，设置到 context 中
	SetClaudeCodeClientContext(c, body)
	isClaudeCodeClient := service.IsClaudeCodeClient(c.Request.Context())

	// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
	c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ThinkingEnabled, parsedReq.ThinkingEnabled))

	setOpsRequestContext(c, reqModel, reqStream, body)

	// 验证 model 必填
	if reqModel == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
		return
	}

	// Track if we've started streaming (for error handling)
	streamStarted := false

	// 绑定错误透传服务，允许 service 层在非 failover 错误场景复用规则。
	if h.errorPassthroughService != nil {
		service.BindErrorPassthroughService(c, h.errorPassthroughService)
	}

	// 获取订阅信息（可能为nil）- 提前获取用于后续检查
	subscription, _ := middleware2.GetSubscriptionFromContext(c)

	// 0. 检查wait队列是否已满
	maxWait := service.CalculateMaxWait(subject.Concurrency)
	canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
	waitCounted := false
	if err != nil {
		log.Printf("Increment wait count failed: %v", err)
		// On error, allow request to proceed
	} else if !canWait {
		h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
		return
	}
	if err == nil && canWait {
		waitCounted = true
	}
	// Ensure we decrement if we exit before acquiring the user slot.
	defer func() {
		if waitCounted {
			h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		}
	}()

	// 1. 首先获取用户并发槽位
	userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
	if err != nil {
		log.Printf("User concurrency acquire failed: %v", err)
		h.handleConcurrencyError(c, err, "user", streamStarted)
		return
	}
	// User slot acquired: no longer waiting in the queue.
	if waitCounted {
		h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		waitCounted = false
	}
	// 在请求结束或 Context 取消时确保释放槽位，避免客户端断开造成泄漏
	userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
	if userReleaseFunc != nil {
		defer userReleaseFunc()
	}

	// 2. 【新增】Wait后二次检查余额/订阅
	if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
		log.Printf("Billing eligibility check failed after wait: %v", err)
		status, code, message := billingErrorDetails(err)
		h.handleStreamingAwareError(c, status, code, message, streamStarted)
		return
	}

	// 计算粘性会话hash
	parsedReq.SessionContext = &service.SessionContext{
		ClientIP:  ip.GetClientIP(c),
		UserAgent: c.GetHeader("User-Agent"),
		APIKeyID:  apiKey.ID,
	}
	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

	// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
	platform := ""
	if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
		platform = forcePlatform
	} else if apiKey.Group != nil {
		platform = apiKey.Group.Platform
	}
	sessionKey := sessionHash
	if platform == service.PlatformGemini && sessionHash != "" {
		sessionKey = "gemini:" + sessionHash
	}

	// 查询粘性会话绑定的账号 ID
	var sessionBoundAccountID int64
	if sessionKey != "" {
		sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
	}
	// 判断是否真的绑定了粘性会话：有 sessionKey 且已经绑定到某个账号
	hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0

	if platform == service.PlatformGemini {
		maxAccountSwitches := h.maxAccountSwitchesGemini
		switchCount := 0
		failedAccountIDs := make(map[int64]struct{})
		var lastFailoverErr *service.UpstreamFailoverError
		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记

		for {
			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
			if err != nil {
				if len(failedAccountIDs) == 0 {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
					return
				}
				if lastFailoverErr != nil {
					h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
				} else {
					h.handleFailoverExhaustedSimple(c, 502, streamStarted)
				}
				return
			}
			account := selection.Account
			setOpsSelectedAccount(c, account.ID)

			// 检查请求拦截（预热请求、SUGGESTION MODE等）
			if account.IsInterceptWarmupEnabled() {
				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
				if interceptType != InterceptTypeNone {
					if selection.Acquired && selection.ReleaseFunc != nil {
						selection.ReleaseFunc()
					}
					if reqStream {
						sendMockInterceptStream(c, reqModel, interceptType)
					} else {
						sendMockInterceptResponse(c, reqModel, interceptType)
					}
					return
				}
			}

			// 3. 获取账号并发槽位
			accountReleaseFunc := selection.ReleaseFunc
			if !selection.Acquired {
				if selection.WaitPlan == nil {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
					return
				}
				accountWaitCounted := false
				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
				if err != nil {
					log.Printf("Increment account wait count failed: %v", err)
				} else if !canWait {
					log.Printf("Account wait queue full: account=%d", account.ID)
					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
					return
				}
				if err == nil && canWait {
					accountWaitCounted = true
				}
				// Ensure the wait counter is decremented if we exit before acquiring the slot.
				defer func() {
					if accountWaitCounted {
						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
					}
				}()

				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
					c,
					account.ID,
					selection.WaitPlan.MaxConcurrency,
					selection.WaitPlan.Timeout,
					reqStream,
					&streamStarted,
				)
				if err != nil {
					log.Printf("Account concurrency acquire failed: %v", err)
					h.handleConcurrencyError(c, err, "account", streamStarted)
					return
				}
				// Slot acquired: no longer waiting in queue.
				if accountWaitCounted {
					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
					accountWaitCounted = false
				}
				if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionKey, account.ID); err != nil {
					log.Printf("Bind sticky session failed: %v", err)
				}
			}
			// 账号槽位/等待计数需要在超时或断开时安全回收
			accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)

			// 转发请求 - 根据账号平台分流
			var result *service.ForwardResult
			requestCtx := c.Request.Context()
			if switchCount > 0 {
				requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
			}
			if account.Platform == service.PlatformAntigravity {
				result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body, hasBoundSession)
			} else {
				result, err = h.geminiCompatService.Forward(requestCtx, c, account, body)
			}
			if accountReleaseFunc != nil {
				accountReleaseFunc()
			}
			if err != nil {
				var failoverErr *service.UpstreamFailoverError
				if errors.As(err, &failoverErr) {
					failedAccountIDs[account.ID] = struct{}{}
					lastFailoverErr = failoverErr
					if failoverErr.ForceCacheBilling {
						forceCacheBilling = true
					}
					if switchCount >= maxAccountSwitches {
						h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
						return
					}
					switchCount++
					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
					continue
				}
				// 错误响应已在Forward中处理，这里只记录日志
				log.Printf("Forward request failed: %v", err)
				return
			}

			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
			userAgent := c.GetHeader("User-Agent")
			clientIP := ip.GetClientIP(c)

			// 异步记录使用量（subscription已在函数开头获取）
			go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string, fcb bool) {
				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
				defer cancel()
				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
					Result:            result,
					APIKey:            apiKey,
					User:              apiKey.User,
					Account:           usedAccount,
					Subscription:      subscription,
					UserAgent:         ua,
					IPAddress:         clientIP,
					ForceCacheBilling: fcb,
					APIKeyService:     h.apiKeyService,
				}); err != nil {
					log.Printf("Record usage failed: %v", err)
				}
			}(result, account, userAgent, clientIP, forceCacheBilling)
			return
		}
	}

	currentAPIKey := apiKey
	currentSubscription := subscription
	var fallbackGroupID *int64
	if apiKey.Group != nil {
		fallbackGroupID = apiKey.Group.FallbackGroupIDOnInvalidRequest
	}
	fallbackUsed := false

	for {
		maxAccountSwitches := h.maxAccountSwitches
		switchCount := 0
		failedAccountIDs := make(map[int64]struct{})
		var lastFailoverErr *service.UpstreamFailoverError
		retryWithFallback := false
		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记

		for {
			// 选择支持该模型的账号
			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
			if err != nil {
				if len(failedAccountIDs) == 0 {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
					return
				}
				if lastFailoverErr != nil {
					h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
				} else {
					h.handleFailoverExhaustedSimple(c, 502, streamStarted)
				}
				return
			}
			account := selection.Account
			setOpsSelectedAccount(c, account.ID)

			// 检查请求拦截（预热请求、SUGGESTION MODE等）
			if account.IsInterceptWarmupEnabled() {
				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
				if interceptType != InterceptTypeNone {
					if selection.Acquired && selection.ReleaseFunc != nil {
						selection.ReleaseFunc()
					}
					if reqStream {
						sendMockInterceptStream(c, reqModel, interceptType)
					} else {
						sendMockInterceptResponse(c, reqModel, interceptType)
					}
					return
				}
			}

			// 3. 获取账号并发槽位
			accountReleaseFunc := selection.ReleaseFunc
			if !selection.Acquired {
				if selection.WaitPlan == nil {
					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
					return
				}
				accountWaitCounted := false
				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
				if err != nil {
					log.Printf("Increment account wait count failed: %v", err)
				} else if !canWait {
					log.Printf("Account wait queue full: account=%d", account.ID)
					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
					return
				}
				if err == nil && canWait {
					accountWaitCounted = true
				}
				defer func() {
					if accountWaitCounted {
						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
					}
				}()

				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
					c,
					account.ID,
					selection.WaitPlan.MaxConcurrency,
					selection.WaitPlan.Timeout,
					reqStream,
					&streamStarted,
				)
				if err != nil {
					log.Printf("Account concurrency acquire failed: %v", err)
					h.handleConcurrencyError(c, err, "account", streamStarted)
					return
				}
				if accountWaitCounted {
					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
					accountWaitCounted = false
				}
				if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
					log.Printf("Bind sticky session failed: %v", err)
				}
			}
			// 账号槽位/等待计数需要在超时或断开时安全回收
			accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)

			// 转发请求 - 根据账号平台分流
			var result *service.ForwardResult
			requestCtx := c.Request.Context()
			if switchCount > 0 {
				requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
			}
			if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
				result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body, hasBoundSession)
			} else {
				result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
			}
			if accountReleaseFunc != nil {
				accountReleaseFunc()
			}
			if err != nil {
				var promptTooLongErr *service.PromptTooLongError
				if errors.As(err, &promptTooLongErr) {
					log.Printf("Prompt too long from antigravity: group=%d fallback_group_id=%v fallback_used=%v", currentAPIKey.GroupID, fallbackGroupID, fallbackUsed)
					if !fallbackUsed && fallbackGroupID != nil && *fallbackGroupID > 0 {
						fallbackGroup, err := h.gatewayService.ResolveGroupByID(c.Request.Context(), *fallbackGroupID)
						if err != nil {
							log.Printf("Resolve fallback group failed: %v", err)
							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
							return
						}
						if fallbackGroup.Platform != service.PlatformAnthropic ||
							fallbackGroup.SubscriptionType == service.SubscriptionTypeSubscription ||
							fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
							log.Printf("Fallback group invalid: group=%d platform=%s subscription=%s", fallbackGroup.ID, fallbackGroup.Platform, fallbackGroup.SubscriptionType)
							_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
							return
						}
						fallbackAPIKey := cloneAPIKeyWithGroup(apiKey, fallbackGroup)
						if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), fallbackAPIKey.User, fallbackAPIKey, fallbackGroup, nil); err != nil {
							status, code, message := billingErrorDetails(err)
							h.handleStreamingAwareError(c, status, code, message, streamStarted)
							return
						}
						// 兜底重试按“直接请求兜底分组”处理：清除强制平台，允许按分组平台调度
						ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
						c.Request = c.Request.WithContext(ctx)
						currentAPIKey = fallbackAPIKey
						currentSubscription = nil
						fallbackUsed = true
						retryWithFallback = true
						break
					}
					_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
					return
				}
				var failoverErr *service.UpstreamFailoverError
				if errors.As(err, &failoverErr) {
					failedAccountIDs[account.ID] = struct{}{}
					lastFailoverErr = failoverErr
					if failoverErr.ForceCacheBilling {
						forceCacheBilling = true
					}
					if switchCount >= maxAccountSwitches {
						h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
						return
					}
					switchCount++
					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
					continue
				}
				// 错误响应已在Forward中处理，这里只记录日志
				log.Printf("Account %d: Forward request failed: %v", account.ID, err)
				return
			}

			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
			userAgent := c.GetHeader("User-Agent")
			clientIP := ip.GetClientIP(c)

			// 异步记录使用量（subscription已在函数开头获取）
			go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string, fcb bool) {
				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
				defer cancel()
				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
					Result:            result,
					APIKey:            currentAPIKey,
					User:              currentAPIKey.User,
					Account:           usedAccount,
					Subscription:      currentSubscription,
					UserAgent:         ua,
					IPAddress:         clientIP,
					ForceCacheBilling: fcb,
					APIKeyService:     h.apiKeyService,
				}); err != nil {
					log.Printf("Record usage failed: %v", err)
				}
			}(result, account, userAgent, clientIP, forceCacheBilling)
			return
		}
		if !retryWithFallback {
			return
		}
	}
}

// Models handles listing available models
// GET /v1/models
// Returns models based on account configurations (model_mapping whitelist)
// Falls back to default models if no whitelist is configured
func (h *GatewayHandler) Models(c *gin.Context) {
	apiKey, _ := middleware2.GetAPIKeyFromContext(c)

	var groupID *int64
	var platform string

	if apiKey != nil && apiKey.Group != nil {
		groupID = &apiKey.Group.ID
		platform = apiKey.Group.Platform
	}

	// Get available models from account configurations (without platform filter)
	availableModels := h.gatewayService.GetAvailableModels(c.Request.Context(), groupID, "")

	if len(availableModels) > 0 {
		// Build model list from whitelist
		models := make([]claude.Model, 0, len(availableModels))
		for _, modelID := range availableModels {
			models = append(models, claude.Model{
				ID:          modelID,
				Type:        "model",
				DisplayName: modelID,
				CreatedAt:   "2024-01-01T00:00:00Z",
			})
		}
		c.JSON(http.StatusOK, gin.H{
			"object": "list",
			"data":   models,
		})
		return
	}

	// Fallback to default models
	if platform == "openai" {
		c.JSON(http.StatusOK, gin.H{
			"object": "list",
			"data":   openai.DefaultModels,
		})
		return
	}

	c.JSON(http.StatusOK, gin.H{
		"object": "list",
		"data":   claude.DefaultModels,
	})
}

// AntigravityModels 返回 Antigravity 支持的全部模型
// GET /antigravity/models
func (h *GatewayHandler) AntigravityModels(c *gin.Context) {
	c.JSON(http.StatusOK, gin.H{
		"object": "list",
		"data":   antigravity.DefaultModels(),
	})
}

func cloneAPIKeyWithGroup(apiKey *service.APIKey, group *service.Group) *service.APIKey {
	if apiKey == nil || group == nil {
		return apiKey
	}
	cloned := *apiKey
	groupID := group.ID
	cloned.GroupID = &groupID
	cloned.Group = group
	return &cloned
}

// Usage handles getting account balance and usage statistics for CC Switch integration
// GET /v1/usage
func (h *GatewayHandler) Usage(c *gin.Context) {
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	subject, ok := middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	// Best-effort: 获取用量统计（按当前 API Key 过滤），失败不影响基础响应
	var usageData gin.H
	if h.usageService != nil {
		dashStats, err := h.usageService.GetAPIKeyDashboardStats(c.Request.Context(), apiKey.ID)
		if err == nil && dashStats != nil {
			usageData = gin.H{
				"today": gin.H{
					"requests":              dashStats.TodayRequests,
					"input_tokens":          dashStats.TodayInputTokens,
					"output_tokens":         dashStats.TodayOutputTokens,
					"cache_creation_tokens": dashStats.TodayCacheCreationTokens,
					"cache_read_tokens":     dashStats.TodayCacheReadTokens,
					"total_tokens":          dashStats.TodayTokens,
					"cost":                  dashStats.TodayCost,
					"actual_cost":           dashStats.TodayActualCost,
				},
				"total": gin.H{
					"requests":              dashStats.TotalRequests,
					"input_tokens":          dashStats.TotalInputTokens,
					"output_tokens":         dashStats.TotalOutputTokens,
					"cache_creation_tokens": dashStats.TotalCacheCreationTokens,
					"cache_read_tokens":     dashStats.TotalCacheReadTokens,
					"total_tokens":          dashStats.TotalTokens,
					"cost":                  dashStats.TotalCost,
					"actual_cost":           dashStats.TotalActualCost,
				},
				"average_duration_ms": dashStats.AverageDurationMs,
				"rpm":                 dashStats.Rpm,
				"tpm":                 dashStats.Tpm,
			}
		}
	}

	// 订阅模式：返回订阅限额信息 + 用量统计
	if apiKey.Group != nil && apiKey.Group.IsSubscriptionType() {
		subscription, ok := middleware2.GetSubscriptionFromContext(c)
		if !ok {
			h.errorResponse(c, http.StatusForbidden, "subscription_error", "No active subscription")
			return
		}

		remaining := h.calculateSubscriptionRemaining(apiKey.Group, subscription)
		resp := gin.H{
			"isValid":   true,
			"planName":  apiKey.Group.Name,
			"remaining": remaining,
			"unit":      "USD",
			"subscription": gin.H{
				"daily_usage_usd":   subscription.DailyUsageUSD,
				"weekly_usage_usd":  subscription.WeeklyUsageUSD,
				"monthly_usage_usd": subscription.MonthlyUsageUSD,
				"daily_limit_usd":   apiKey.Group.DailyLimitUSD,
				"weekly_limit_usd":  apiKey.Group.WeeklyLimitUSD,
				"monthly_limit_usd": apiKey.Group.MonthlyLimitUSD,
				"expires_at":        subscription.ExpiresAt,
			},
		}
		if usageData != nil {
			resp["usage"] = usageData
		}
		c.JSON(http.StatusOK, resp)
		return
	}

	// 余额模式：返回钱包余额 + 用量统计
	latestUser, err := h.userService.GetByID(c.Request.Context(), subject.UserID)
	if err != nil {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to get user info")
		return
	}

	resp := gin.H{
		"isValid":   true,
		"planName":  "钱包余额",
		"remaining": latestUser.Balance,
		"unit":      "USD",
		"balance":   latestUser.Balance,
	}
	if usageData != nil {
		resp["usage"] = usageData
	}
	c.JSON(http.StatusOK, resp)
}

// calculateSubscriptionRemaining 计算订阅剩余可用额度
// 逻辑：
// 1. 如果日/周/月任一限额达到100%，返回0
// 2. 否则返回所有已配置周期中剩余额度的最小值
func (h *GatewayHandler) calculateSubscriptionRemaining(group *service.Group, sub *service.UserSubscription) float64 {
	var remainingValues []float64

	// 检查日限额
	if group.HasDailyLimit() {
		remaining := *group.DailyLimitUSD - sub.DailyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 检查周限额
	if group.HasWeeklyLimit() {
		remaining := *group.WeeklyLimitUSD - sub.WeeklyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 检查月限额
	if group.HasMonthlyLimit() {
		remaining := *group.MonthlyLimitUSD - sub.MonthlyUsageUSD
		if remaining <= 0 {
			return 0
		}
		remainingValues = append(remainingValues, remaining)
	}

	// 如果没有配置任何限额，返回-1表示无限制
	if len(remainingValues) == 0 {
		return -1
	}

	// 返回最小值
	min := remainingValues[0]
	for _, v := range remainingValues[1:] {
		if v < min {
			min = v
		}
	}
	return min
}

// handleConcurrencyError handles concurrency-related errors with proper 429 response
func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
		fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
}

func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
	statusCode := failoverErr.StatusCode
	responseBody := failoverErr.ResponseBody

	// 先检查透传规则
	if h.errorPassthroughService != nil && len(responseBody) > 0 {
		if rule := h.errorPassthroughService.MatchRule(platform, statusCode, responseBody); rule != nil {
			// 确定响应状态码
			respCode := statusCode
			if !rule.PassthroughCode && rule.ResponseCode != nil {
				respCode = *rule.ResponseCode
			}

			// 确定响应消息
			msg := service.ExtractUpstreamErrorMessage(responseBody)
			if !rule.PassthroughBody && rule.CustomMessage != nil {
				msg = *rule.CustomMessage
			}

			h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
			return
		}
	}

	// 使用默认的错误映射
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
func (h *GatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

func (h *GatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
	switch statusCode {
	case 401:
		return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
	case 403:
		return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
	case 429:
		return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
	case 529:
		return http.StatusServiceUnavailable, "overloaded_error", "Upstream service overloaded, please retry later"
	case 500, 502, 503, 504:
		return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
	default:
		return http.StatusBadGateway, "upstream_error", "Upstream request failed"
	}
}

// handleStreamingAwareError handles errors that may occur after streaming has started
func (h *GatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
	if streamStarted {
		// Stream already started, send error as SSE event then close
		flusher, ok := c.Writer.(http.Flusher)
		if ok {
			// Send error event in SSE format with proper JSON marshaling
			errorData := map[string]any{
				"type": "error",
				"error": map[string]string{
					"type":    errType,
					"message": message,
				},
			}
			jsonBytes, err := json.Marshal(errorData)
			if err != nil {
				_ = c.Error(err)
				return
			}
			errorEvent := fmt.Sprintf("data: %s\n\n", string(jsonBytes))
			if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
				_ = c.Error(err)
			}
			flusher.Flush()
		}
		return
	}

	// Normal case: return JSON response with proper status code
	h.errorResponse(c, status, errType, message)
}

// errorResponse 返回Claude API格式的错误响应
func (h *GatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
	c.JSON(status, gin.H{
		"type": "error",
		"error": gin.H{
			"type":    errType,
			"message": message,
		},
	})
}

// CountTokens handles token counting endpoint
// POST /v1/messages/count_tokens
// 特点：校验订阅/余额，但不计算并发、不记录使用量
func (h *GatewayHandler) CountTokens(c *gin.Context) {
	// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	_, ok = middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
		return
	}

	// 读取请求体
	body, err := io.ReadAll(c.Request.Body)
	if err != nil {
		if maxErr, ok := extractMaxBytesError(err); ok {
			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
			return
		}
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
		return
	}

	if len(body) == 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
		return
	}

	// 检查是否为 Claude Code 客户端，设置到 context 中
	SetClaudeCodeClientContext(c, body)

	setOpsRequestContext(c, "", false, body)

	parsedReq, err := service.ParseGatewayRequest(body)
	if err != nil {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
		return
	}
	// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
	c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ThinkingEnabled, parsedReq.ThinkingEnabled))

	// 验证 model 必填
	if parsedReq.Model == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
		return
	}

	setOpsRequestContext(c, parsedReq.Model, parsedReq.Stream, body)

	// 获取订阅信息（可能为nil）
	subscription, _ := middleware2.GetSubscriptionFromContext(c)

	// 校验 billing eligibility（订阅/余额）
	// 【注意】不计算并发，但需要校验订阅/余额
	if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
		status, code, message := billingErrorDetails(err)
		h.errorResponse(c, status, code, message)
		return
	}

	// 计算粘性会话 hash
	parsedReq.SessionContext = &service.SessionContext{
		ClientIP:  ip.GetClientIP(c),
		UserAgent: c.GetHeader("User-Agent"),
		APIKeyID:  apiKey.ID,
	}
	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

	// 选择支持该模型的账号
	account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
	if err != nil {
		h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
		return
	}
	setOpsSelectedAccount(c, account.ID)

	// 转发请求（不记录使用量）
	if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil {
		log.Printf("Forward count_tokens request failed: %v", err)
		// 错误响应已在 ForwardCountTokens 中处理
		return
	}
}

// InterceptType 表示请求拦截类型
type InterceptType int

const (
	InterceptTypeNone              InterceptType = iota
	InterceptTypeWarmup                          // 预热请求（返回 "New Conversation"）
	InterceptTypeSuggestionMode                  // SUGGESTION MODE（返回空字符串）
	InterceptTypeMaxTokensOneHaiku               // max_tokens=1 + haiku 探测请求（返回 "#"）
)

// isHaikuModel 检查模型名称是否包含 "haiku"（大小写不敏感）
func isHaikuModel(model string) bool {
	return strings.Contains(strings.ToLower(model), "haiku")
}

// isMaxTokensOneHaikuRequest 检查是否为 max_tokens=1 + haiku 模型的探测请求
// 这类请求用于 Claude Code 验证 API 连通性
// 条件：max_tokens == 1 且 model 包含 "haiku" 且非流式请求
func isMaxTokensOneHaikuRequest(model string, maxTokens int, isStream bool) bool {
	return maxTokens == 1 && isHaikuModel(model) && !isStream
}

// detectInterceptType 检测请求是否需要拦截，返回拦截类型
// 参数说明：
//   - body: 请求体字节
//   - model: 请求的模型名称
//   - maxTokens: max_tokens 值
//   - isStream: 是否为流式请求
//   - isClaudeCodeClient: 是否已通过 Claude Code 客户端校验
func detectInterceptType(body []byte, model string, maxTokens int, isStream bool, isClaudeCodeClient bool) InterceptType {
	// 优先检查 max_tokens=1 + haiku 探测请求（仅非流式）
	if isClaudeCodeClient && isMaxTokensOneHaikuRequest(model, maxTokens, isStream) {
		return InterceptTypeMaxTokensOneHaiku
	}

	// 快速检查：如果不包含任何关键字，直接返回
	bodyStr := string(body)
	hasSuggestionMode := strings.Contains(bodyStr, "[SUGGESTION MODE:")
	hasWarmupKeyword := strings.Contains(bodyStr, "title") || strings.Contains(bodyStr, "Warmup")

	if !hasSuggestionMode && !hasWarmupKeyword {
		return InterceptTypeNone
	}

	// 解析请求（只解析一次）
	var req struct {
		Messages []struct {
			Role    string `json:"role"`
			Content []struct {
				Type string `json:"type"`
				Text string `json:"text"`
			} `json:"content"`
		} `json:"messages"`
		System []struct {
			Text string `json:"text"`
		} `json:"system"`
	}
	if err := json.Unmarshal(body, &req); err != nil {
		return InterceptTypeNone
	}

	// 检查 SUGGESTION MODE（最后一条 user 消息）
	if hasSuggestionMode && len(req.Messages) > 0 {
		lastMsg := req.Messages[len(req.Messages)-1]
		if lastMsg.Role == "user" && len(lastMsg.Content) > 0 &&
			lastMsg.Content[0].Type == "text" &&
			strings.HasPrefix(lastMsg.Content[0].Text, "[SUGGESTION MODE:") {
			return InterceptTypeSuggestionMode
		}
	}

	// 检查 Warmup 请求
	if hasWarmupKeyword {
		// 检查 messages 中的标题提示模式
		for _, msg := range req.Messages {
			for _, content := range msg.Content {
				if content.Type == "text" {
					if strings.Contains(content.Text, "Please write a 5-10 word title for the following conversation:") ||
						content.Text == "Warmup" {
						return InterceptTypeWarmup
					}
				}
			}
		}
		// 检查 system 中的标题提取模式
		for _, sys := range req.System {
			if strings.Contains(sys.Text, "nalyze if this message indicates a new conversation topic. If it does, extract a 2-3 word title") {
				return InterceptTypeWarmup
			}
		}
	}

	return InterceptTypeNone
}

// sendMockInterceptStream 发送流式 mock 响应（用于请求拦截）
func sendMockInterceptStream(c *gin.Context, model string, interceptType InterceptType) {
	c.Header("Content-Type", "text/event-stream")
	c.Header("Cache-Control", "no-cache")
	c.Header("Connection", "keep-alive")
	c.Header("X-Accel-Buffering", "no")

	// 根据拦截类型决定响应内容
	var msgID string
	var outputTokens int
	var textDeltas []string

	switch interceptType {
	case InterceptTypeSuggestionMode:
		msgID = "msg_mock_suggestion"
		outputTokens = 1
		textDeltas = []string{""} // 空内容
	default: // InterceptTypeWarmup
		msgID = "msg_mock_warmup"
		outputTokens = 2
		textDeltas = []string{"New", " Conversation"}
	}

	// Build message_start event with proper JSON marshaling
	messageStart := map[string]any{
		"type": "message_start",
		"message": map[string]any{
			"id":            msgID,
			"type":          "message",
			"role":          "assistant",
			"model":         model,
			"content":       []any{},
			"stop_reason":   nil,
			"stop_sequence": nil,
			"usage": map[string]int{
				"input_tokens":  10,
				"output_tokens": 0,
			},
		},
	}
	messageStartJSON, _ := json.Marshal(messageStart)

	// Build events
	events := []string{
		`event: message_start` + "\n" + `data: ` + string(messageStartJSON),
		`event: content_block_start` + "\n" + `data: {"content_block":{"text":"","type":"text"},"index":0,"type":"content_block_start"}`,
	}

	// Add text deltas
	for _, text := range textDeltas {
		delta := map[string]any{
			"type":  "content_block_delta",
			"index": 0,
			"delta": map[string]string{
				"type": "text_delta",
				"text": text,
			},
		}
		deltaJSON, _ := json.Marshal(delta)
		events = append(events, `event: content_block_delta`+"\n"+`data: `+string(deltaJSON))
	}

	// Add final events
	messageDelta := map[string]any{
		"type": "message_delta",
		"delta": map[string]any{
			"stop_reason":   "end_turn",
			"stop_sequence": nil,
		},
		"usage": map[string]int{
			"input_tokens":  10,
			"output_tokens": outputTokens,
		},
	}
	messageDeltaJSON, _ := json.Marshal(messageDelta)

	events = append(events,
		`event: content_block_stop`+"\n"+`data: {"index":0,"type":"content_block_stop"}`,
		`event: message_delta`+"\n"+`data: `+string(messageDeltaJSON),
		`event: message_stop`+"\n"+`data: {"type":"message_stop"}`,
	)

	for _, event := range events {
		_, _ = c.Writer.WriteString(event + "\n\n")
		c.Writer.Flush()
		time.Sleep(20 * time.Millisecond)
	}
}

// generateRealisticMsgID 生成仿真的消息 ID（msg_bdrk_XXXXXXX 格式）
// 格式与 Claude API 真实响应一致，24 位随机字母数字
func generateRealisticMsgID() string {
	const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
	const idLen = 24
	randomBytes := make([]byte, idLen)
	if _, err := rand.Read(randomBytes); err != nil {
		return fmt.Sprintf("msg_bdrk_%d", time.Now().UnixNano())
	}
	b := make([]byte, idLen)
	for i := range b {
		b[i] = charset[int(randomBytes[i])%len(charset)]
	}
	return "msg_bdrk_" + string(b)
}

// sendMockInterceptResponse 发送非流式 mock 响应（用于请求拦截）
func sendMockInterceptResponse(c *gin.Context, model string, interceptType InterceptType) {
	var msgID, text, stopReason string
	var outputTokens int

	switch interceptType {
	case InterceptTypeSuggestionMode:
		msgID = "msg_mock_suggestion"
		text = ""
		outputTokens = 1
		stopReason = "end_turn"
	case InterceptTypeMaxTokensOneHaiku:
		msgID = generateRealisticMsgID()
		text = "#"
		outputTokens = 1
		stopReason = "max_tokens" // max_tokens=1 探测请求的 stop_reason 应为 max_tokens
	default: // InterceptTypeWarmup
		msgID = "msg_mock_warmup"
		text = "New Conversation"
		outputTokens = 2
		stopReason = "end_turn"
	}

	// 构建完整的响应格式（与 Claude API 响应格式一致）
	response := gin.H{
		"model":         model,
		"id":            msgID,
		"type":          "message",
		"role":          "assistant",
		"content":       []gin.H{{"type": "text", "text": text}},
		"stop_reason":   stopReason,
		"stop_sequence": nil,
		"usage": gin.H{
			"input_tokens":                10,
			"cache_creation_input_tokens": 0,
			"cache_read_input_tokens":     0,
			"cache_creation": gin.H{
				"ephemeral_5m_input_tokens": 0,
				"ephemeral_1h_input_tokens": 0,
			},
			"output_tokens": outputTokens,
			"total_tokens":  10 + outputTokens,
		},
	}

	c.JSON(http.StatusOK, response)
}

func billingErrorDetails(err error) (status int, code, message string) {
	if errors.Is(err, service.ErrBillingServiceUnavailable) {
		msg := pkgerrors.Message(err)
		if msg == "" {
			msg = "Billing service temporarily unavailable. Please retry later."
		}
		return http.StatusServiceUnavailable, "billing_service_error", msg
	}
	msg := pkgerrors.Message(err)
	if msg == "" {
		msg = err.Error()
	}
	return http.StatusForbidden, "billing_error", msg
}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								package handler
 								import (
 									"context"
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									"crypto/rand"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"encoding/json"
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									"errors"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"fmt"
 									"io"
 									"log"
 									"net/http"
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									"strings"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									"time"
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/config"
-												feat(antigravity): 添加 models 端点支持

- /antigravity/models: 返回全部模型（Claude + Gemini）
- /antigravity/v1/models: 返回全部模型（Claude API 格式）
- /antigravity/v1beta/models: 仅返回 Gemini 模型（v1beta 格式）

统一管理 antigravity 模型定义，避免重复代码

											
										
										
											2026-01-02 10:21:05 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
-												fix(后端): 修复 lint 失败并清理无用代码

修正测试中的 APIKey 名称引用
移除不可达返回与未使用函数
统一 gofmt 格式并处理 Close 错误

											
										
										
											2026-01-04 22:10:32 +08:00
+									pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
-												fix(gateway): 修复 usage_logs 记录 IP 不正确的问题

在 nginx 反向代理场景下，使用 ip.GetClientIP() 替代 c.ClientIP()
以正确获取客户端真实 IP 地址

											
										
										
											2026-01-12 15:35:54 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/service"
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									"github.com/gin-gonic/gin"
 								)
 								// GatewayHandler handles API gateway requests
 								type GatewayHandler struct {
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									gatewayService            *service.GatewayService
 									geminiCompatService       *service.GeminiMessagesCompatService
 									antigravityGatewayService *service.AntigravityGatewayService
 									userService               *service.UserService
 									billingCacheService       *service.BillingCacheService
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									usageService              *service.UsageService
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+									apiKeyService             *service.APIKeyService
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									errorPassthroughService   *service.ErrorPassthroughService
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									concurrencyHelper         *ConcurrencyHelper
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches        int
 									maxAccountSwitchesGemini  int
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// NewGatewayHandler creates a new GatewayHandler
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+								func NewGatewayHandler(
 									gatewayService *service.GatewayService,
 									geminiCompatService *service.GeminiMessagesCompatService,
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+									antigravityGatewayService *service.AntigravityGatewayService,
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+									userService *service.UserService,
 									concurrencyService *service.ConcurrencyService,
 									billingCacheService *service.BillingCacheService,
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									usageService *service.UsageService,
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+									apiKeyService *service.APIKeyService,
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									errorPassthroughService *service.ErrorPassthroughService,
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									cfg *config.Config,
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+								) *GatewayHandler {
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									pingInterval := time.Duration(0)
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches := 10
 									maxAccountSwitchesGemini := 3
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									if cfg != nil {
 										pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										if cfg.Gateway.MaxAccountSwitches > 0 {
 											maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
 										}
 										if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
 											maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
 										}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									return &GatewayHandler{
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+										gatewayService:            gatewayService,
 										geminiCompatService:       geminiCompatService,
 										antigravityGatewayService: antigravityGatewayService,
 										userService:               userService,
 										billingCacheService:       billingCacheService,
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+										usageService:              usageService,
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+										apiKeyService:             apiKeyService,
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+										errorPassthroughService:   errorPassthroughService,
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+										concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										maxAccountSwitches:        maxAccountSwitches,
 										maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									}
 								}
 								// Messages handles Claude API compatible messages endpoint
 								// POST /v1/messages
 								func (h *GatewayHandler) Messages(c *gin.Context) {
 									// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									subject, ok := middleware2.GetAuthSubjectFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
 										return
 									}
 									// 读取请求体
 									body, err := io.ReadAll(c.Request.Body)
 									if err != nil {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										if maxErr, ok := extractMaxBytesError(err); ok {
 											h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
 											return
 										}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 										return
 									}
 									if len(body) == 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, "", false, body)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									parsedReq, err := service.ParseGatewayRequest(body)
 									if err != nil {
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 										return
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									reqModel := parsedReq.Model
 									reqStream := parsedReq.Stream
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									// 设置 max_tokens=1 + haiku 探测请求标识到 context 中
 									// 必须在 SetClaudeCodeClientContext 之前设置，因为 ClaudeCodeValidator 需要读取此标识进行绕过判断
 									if isMaxTokensOneHaikuRequest(reqModel, parsedReq.MaxTokens, reqStream) {
 										ctx := context.WithValue(c.Request.Context(), ctxkey.IsMaxTokensOneHaikuRequest, true)
 										c.Request = c.Request.WithContext(ctx)
 									}
 									// 检查是否为 Claude Code 客户端，设置到 context 中
 									SetClaudeCodeClientContext(c, body)
 									isClaudeCodeClient := service.IsClaudeCodeClient(c.Request.Context())
 									// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
 									c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ThinkingEnabled, parsedReq.ThinkingEnabled))
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, reqModel, reqStream, body)
-												feat(groups): add Claude Code client restriction and session isolation

- Add claude_code_only field to restrict groups to Claude Code clients only
- Add fallback_group_id for non-Claude Code requests to use alternate group
- Implement ClaudeCodeValidator for User-Agent detection
- Add group-level session binding isolation (groupID in Redis key)
- Prevent cross-group sticky session pollution
- Update frontend with Claude Code restriction controls

											
										
										
											2026-01-08 23:07:00 +08:00
-												fix(网关): 添加 model 参数必填验证

在以下端点添加 model 参数的必填验证，缺失时直接返回 400 错误：
- /v1/messages
- /v1/messages/count_tokens
- /openai/v1/responses

修复前：空 model 会进入账号选择流程，最终由上游 API 返回错误
修复后：入口处直接拒绝，避免浪费资源和不明确的错误信息

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2025-12-31 16:17:45 +08:00
+									// 验证 model 必填
 									if reqModel == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 										return
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									// Track if we've started streaming (for error handling)
 									streamStarted := false
-												fix: restore non-failover error passthrough from 7b156489

											
										
										
											2026-02-07 14:24:55 +08:00
+									// 绑定错误透传服务，允许 service 层在非 failover 错误场景复用规则。
 									if h.errorPassthroughService != nil {
 										service.BindErrorPassthroughService(c, h.errorPassthroughService)
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									// 获取订阅信息（可能为nil）- 提前获取用于后续检查
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									subscription, _ := middleware2.GetSubscriptionFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									// 0. 检查wait队列是否已满
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									maxWait := service.CalculateMaxWait(subject.Concurrency)
 									canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									waitCounted := false
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
 										log.Printf("Increment wait count failed: %v", err)
 										// On error, allow request to proceed
 									} else if !canWait {
 										h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									if err == nil && canWait {
 										waitCounted = true
 									}
 									// Ensure we decrement if we exit before acquiring the user slot.
 									defer func() {
 										if waitCounted {
 											h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										}
 									}()
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
 									// 1. 首先获取用户并发槽位
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
 										log.Printf("User concurrency acquire failed: %v", err)
 										h.handleConcurrencyError(c, err, "user", streamStarted)
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									// User slot acquired: no longer waiting in the queue.
 									if waitCounted {
 										h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										waitCounted = false
 									}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									// 在请求结束或 Context 取消时确保释放槽位，避免客户端断开造成泄漏
 									userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if userReleaseFunc != nil {
 										defer userReleaseFunc()
 									}
 									// 2. 【新增】Wait后二次检查余额/订阅
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										log.Printf("Billing eligibility check failed after wait: %v", err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										status, code, message := billingErrorDetails(err)
 										h.handleStreamingAwareError(c, status, code, message, streamStarted)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										return
 									}
 									// 计算粘性会话hash
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+									parsedReq.SessionContext = &service.SessionContext{
 										ClientIP:  ip.GetClientIP(c),
 										UserAgent: c.GetHeader("User-Agent"),
 										APIKeyID:  apiKey.ID,
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												feat(antigravity): 添加专用路由，支持仅使用 antigravity 账户

添加 /antigravity/v1/* 和 /antigravity/v1beta/* 路由：
- 通过 ForcePlatform 中间件强制使用 antigravity 平台
- 跳过混合调度逻辑，仅调度 antigravity 账户
- 支持按分组优先查找，找不到时回退查询全部 antigravity 账户

修复 context key 类型不匹配问题：
- middleware 和 service 统一使用字符串常量 "ctx_force_platform"
- 解决 Go context.Value() 类型+值匹配导致的读取失败

其他改动：
- 嵌入式前端中间件白名单添加 /antigravity/ 路径
- e2e 测试 Gemini 端点 URL 添加 endpointPrefix 支持

											
										
										
											2025-12-29 16:52:55 +08:00
+									// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+									platform := ""
-												feat(antigravity): 添加专用路由，支持仅使用 antigravity 账户

添加 /antigravity/v1/* 和 /antigravity/v1beta/* 路由：
- 通过 ForcePlatform 中间件强制使用 antigravity 平台
- 跳过混合调度逻辑，仅调度 antigravity 账户
- 支持按分组优先查找，找不到时回退查询全部 antigravity 账户

修复 context key 类型不匹配问题：
- middleware 和 service 统一使用字符串常量 "ctx_force_platform"
- 解决 Go context.Value() 类型+值匹配导致的读取失败

其他改动：
- 嵌入式前端中间件白名单添加 /antigravity/ 路径
- e2e 测试 Gemini 端点 URL 添加 endpointPrefix 支持

											
										
										
											2025-12-29 16:52:55 +08:00
+									if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
 										platform = forcePlatform
 									} else if apiKey.Group != nil {
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
+										platform = apiKey.Group.Platform
 									}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									sessionKey := sessionHash
 									if platform == service.PlatformGemini && sessionHash != "" {
 										sessionKey = "gemini:" + sessionHash
 									}
-												feat(handler): 添加 Gemini OAuth Handler 和完善依赖注入

- 新增 Gemini OAuth 授权处理器
- 扩展账号和网关处理器支持 Gemini
- 注册 Gemini 相关路由
- 更新 Wire 依赖注入配置（所有层）
- 更新 Docker Compose 配置

											
										
										
											2025-12-25 06:45:03 -08:00
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									// 查询粘性会话绑定的账号 ID
 									var sessionBoundAccountID int64
 									if sessionKey != "" {
 										sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
 									}
 									// 判断是否真的绑定了粘性会话：有 sessionKey 且已经绑定到某个账号
 									hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
-												fix(backend): 适配重构后的架构修复 Gemini OAuth 集成

## 主要修改

1. **移除 model 包引用**
   - 删除所有 `internal/model` 包的 import
   - 使用 service 包中的类型定义（Account, Platform常量等）

2. **修复类型转换**
   - JSONB → map[string]any
   - 添加 mergeJSONB 辅助函数
   - 添加 Account.IsGemini() 方法

3. **更新中间件调用**
   - GetUserFromContext → GetAuthSubjectFromContext
   - 适配新的并发控制签名（传递 ID 和 Concurrency 而不是完整对象）

4. **修复 handler 层**
   - 更新 gemini_v1beta_handler.go
   - 修正 billing 检查和 usage 记录

## 影响范围
- backend/internal/service/gemini_*.go
- backend/internal/service/account_test_service.go
- backend/internal/service/crs_sync_service.go
- backend/internal/handler/gemini_v1beta_handler.go
- backend/internal/handler/gateway_handler.go
- backend/internal/handler/admin/account_handler.go

											
										
										
											2025-12-26 22:07:55 +08:00
+									if platform == service.PlatformGemini {
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										maxAccountSwitches := h.maxAccountSwitchesGemini
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+										switchCount := 0
 										failedAccountIDs := make(map[int64]struct{})
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+										var lastFailoverErr *service.UpstreamFailoverError
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+										var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
 										for {
-												feat: 添加5h窗口费用控制和会话数量限制

- 支持Anthropic OAuth/SetupToken账号的5h窗口费用阈值控制
- 支持账号级别的并发会话数量限制
- 使用Redis缓存窗口费用(30秒TTL)减少数据库压力
- 费用计算基于标准费用(不含账号倍率)

											
										
										
											2026-01-16 23:36:52 +08:00
+											selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											if err != nil {
 												if len(failedAccountIDs) == 0 {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 													return
 												}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+												if lastFailoverErr != nil {
 													h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
 												} else {
 													h.handleFailoverExhaustedSimple(c, 502, streamStarted)
 												}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												return
 											}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											account := selection.Account
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											setOpsSelectedAccount(c, account.ID)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											// 检查请求拦截（预热请求、SUGGESTION MODE等）
 											if account.IsInterceptWarmupEnabled() {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+												interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+												if interceptType != InterceptTypeNone {
 													if selection.Acquired && selection.ReleaseFunc != nil {
 														selection.ReleaseFunc()
 													}
 													if reqStream {
 														sendMockInterceptStream(c, reqModel, interceptType)
 													} else {
 														sendMockInterceptResponse(c, reqModel, interceptType)
 													}
 													return
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											// 3. 获取账号并发槽位
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											accountReleaseFunc := selection.ReleaseFunc
 											if !selection.Acquired {
 												if selection.WaitPlan == nil {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
 													return
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												accountWaitCounted := false
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
 												if err != nil {
 													log.Printf("Increment account wait count failed: %v", err)
 												} else if !canWait {
 													log.Printf("Account wait queue full: account=%d", account.ID)
 													h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
 													return
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												}
 												if err == nil && canWait {
 													accountWaitCounted = true
 												}
 												// Ensure the wait counter is decremented if we exit before acquiring the slot.
 												defer func() {
 													if accountWaitCounted {
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+														h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
 													}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												}()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
 												accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
 													c,
 													account.ID,
 													selection.WaitPlan.MaxConcurrency,
 													selection.WaitPlan.Timeout,
 													reqStream,
 													&streamStarted,
 												)
 												if err != nil {
 													log.Printf("Account concurrency acquire failed: %v", err)
 													h.handleConcurrencyError(c, err, "account", streamStarted)
 													return
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												// Slot acquired: no longer waiting in queue.
 												if accountWaitCounted {
 													h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
 													accountWaitCounted = false
 												}
-												feat(groups): add Claude Code client restriction and session isolation

- Add claude_code_only field to restrict groups to Claude Code clients only
- Add fallback_group_id for non-Claude Code requests to use alternate group
- Implement ClaudeCodeValidator for User-Agent detection
- Add group-level session binding isolation (groupID in Redis key)
- Prevent cross-group sticky session pollution
- Update frontend with Claude Code restriction controls

											
										
										
											2026-01-08 23:07:00 +08:00
+												if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionKey, account.ID); err != nil {
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+													log.Printf("Bind sticky session failed: %v", err)
 												}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+											// 账号槽位/等待计数需要在超时或断开时安全回收
 											accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											// 转发请求 - 根据账号平台分流
 											var result *service.ForwardResult
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+											requestCtx := c.Request.Context()
 											if switchCount > 0 {
 												requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
 											}
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											if account.Platform == service.PlatformAntigravity {
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+												result, err = h.antigravityGatewayService.ForwardGemini(requestCtx, c, account, reqModel, "generateContent", reqStream, body, hasBoundSession)
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											} else {
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+												result, err = h.geminiCompatService.Forward(requestCtx, c, account, body)
-												feat: 完善 Antigravity 多平台网关支持，修复 Gemini handler 分流逻辑

											
										
										
											2025-12-28 17:48:52 +08:00
+											}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											if accountReleaseFunc != nil {
 												accountReleaseFunc()
 											}
 											if err != nil {
 												var failoverErr *service.UpstreamFailoverError
 												if errors.As(err, &failoverErr) {
 													failedAccountIDs[account.ID] = struct{}{}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+													lastFailoverErr = failoverErr
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													if failoverErr.ForceCacheBilling {
 														forceCacheBilling = true
 													}
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+													if switchCount >= maxAccountSwitches {
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+														h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+														return
 													}
 													switchCount++
 													log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 													continue
 												}
 												// 错误响应已在Forward中处理，这里只记录日志
 												log.Printf("Forward request failed: %v", err)
 												return
 											}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
+											// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 											userAgent := c.GetHeader("User-Agent")
-												fix(gateway): 修复 usage_logs 记录 IP 不正确的问题

在 nginx 反向代理场景下，使用 ip.GetClientIP() 替代 c.ClientIP()
以正确获取客户端真实 IP 地址

											
										
										
											2026-01-12 15:35:54 +08:00
+											clientIP := ip.GetClientIP(c)
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+											// 异步记录使用量（subscription已在函数开头获取）
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+											go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string, fcb bool) {
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 												defer cancel()
 												if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Result:            result,
 													APIKey:            apiKey,
 													User:              apiKey.User,
 													Account:           usedAccount,
 													Subscription:      subscription,
 													UserAgent:         ua,
 													IPAddress:         clientIP,
 													ForceCacheBilling: fcb,
 													APIKeyService:     h.apiKeyService,
-												feat: cc/codex/gemini 增加账号重试

											
										
										
											2025-12-27 12:27:47 +08:00
+												}); err != nil {
 													log.Printf("Record usage failed: %v", err)
 												}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+											}(result, account, userAgent, clientIP, forceCacheBilling)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+									currentAPIKey := apiKey
 									currentSubscription := subscription
 									var fallbackGroupID *int64
 									if apiKey.Group != nil {
 										fallbackGroupID = apiKey.Group.FallbackGroupIDOnInvalidRequest
 									}
 									fallbackUsed := false
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
 									for {
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										maxAccountSwitches := h.maxAccountSwitches
 										switchCount := 0
 										failedAccountIDs := make(map[int64]struct{})
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+										var lastFailoverErr *service.UpstreamFailoverError
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										retryWithFallback := false
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+										var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										for {
 											// 选择支持该模型的账号
 											selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											if err != nil {
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												if len(failedAccountIDs) == 0 {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 													return
 												}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+												if lastFailoverErr != nil {
 													h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
 												} else {
 													h.handleFailoverExhaustedSimple(c, 502, streamStarted)
 												}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												return
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											account := selection.Account
 											setOpsSelectedAccount(c, account.ID)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											// 检查请求拦截（预热请求、SUGGESTION MODE等）
 											if account.IsInterceptWarmupEnabled() {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+												interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+												if interceptType != InterceptTypeNone {
 													if selection.Acquired && selection.ReleaseFunc != nil {
 														selection.ReleaseFunc()
 													}
 													if reqStream {
 														sendMockInterceptStream(c, reqModel, interceptType)
 													} else {
 														sendMockInterceptResponse(c, reqModel, interceptType)
 													}
 													return
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
 											// 3. 获取账号并发槽位
 											accountReleaseFunc := selection.ReleaseFunc
 											if !selection.Acquired {
 												if selection.WaitPlan == nil {
 													h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
 													return
 												}
 												accountWaitCounted := false
 												canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
 												if err != nil {
 													log.Printf("Increment account wait count failed: %v", err)
 												} else if !canWait {
 													log.Printf("Account wait queue full: account=%d", account.ID)
 													h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
 													return
 												}
 												if err == nil && canWait {
 													accountWaitCounted = true
 												}
 												defer func() {
 													if accountWaitCounted {
 														h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
 													}
 												}()
 												accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
 													c,
 													account.ID,
 													selection.WaitPlan.MaxConcurrency,
 													selection.WaitPlan.Timeout,
 													reqStream,
 													&streamStarted,
 												)
 												if err != nil {
 													log.Printf("Account concurrency acquire failed: %v", err)
 													h.handleConcurrencyError(c, err, "account", streamStarted)
 													return
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+												if accountWaitCounted {
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+													h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													accountWaitCounted = false
 												}
 												if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
 													log.Printf("Bind sticky session failed: %v", err)
-												fix(lint): 修复 golangci-lint 报错

- 修复 gofmt 格式问题
- 修复 staticcheck SA4031 nil check 问题（只在成功时设置 release 函数）
- 删除未使用的 sortAccountsByPriority 函数

											
										
										
											2026-01-01 04:26:01 +08:00
+												}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											// 账号槽位/等待计数需要在超时或断开时安全回收
 											accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
 											// 转发请求 - 根据账号平台分流
 											var result *service.ForwardResult
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+											requestCtx := c.Request.Context()
 											if switchCount > 0 {
 												requestCtx = context.WithValue(requestCtx, ctxkey.AccountSwitchCount, switchCount)
 											}
-												refactor(upstream): replace upstream account type with apikey, auto-append /antigravity

Upstream accounts now use the standard APIKey type instead of a dedicated
upstream type. GetBaseURL() and new GetGeminiBaseURL() automatically append
/antigravity for Antigravity platform APIKey accounts, eliminating the need
for separate upstream forwarding methods.

- Remove ForwardUpstream, ForwardUpstreamGemini, testUpstreamConnection
- Remove upstream branch guards in Forward/ForwardGemini/TestConnection
- Add migration 052 to convert existing upstream accounts to apikey
- Update frontend CreateAccountModal to create apikey type
- Add unit tests for GetBaseURL and GetGeminiBaseURL

											
										
										
											2026-02-08 13:06:25 +08:00
+											if account.Platform == service.PlatformAntigravity && account.Type != service.AccountTypeAPIKey {
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+												result, err = h.antigravityGatewayService.Forward(requestCtx, c, account, body, hasBoundSession)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											} else {
-												antigravity: 区分切换后重试次数

											
										
										
											2026-01-28 00:01:03 +08:00
+												result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											if accountReleaseFunc != nil {
 												accountReleaseFunc()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+											if err != nil {
 												var promptTooLongErr *service.PromptTooLongError
 												if errors.As(err, &promptTooLongErr) {
 													log.Printf("Prompt too long from antigravity: group=%d fallback_group_id=%v fallback_used=%v", currentAPIKey.GroupID, fallbackGroupID, fallbackUsed)
 													if !fallbackUsed && fallbackGroupID != nil && *fallbackGroupID > 0 {
 														fallbackGroup, err := h.gatewayService.ResolveGroupByID(c.Request.Context(), *fallbackGroupID)
 														if err != nil {
 															log.Printf("Resolve fallback group failed: %v", err)
 															_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
 															return
 														}
 														if fallbackGroup.Platform != service.PlatformAnthropic ||
 															fallbackGroup.SubscriptionType == service.SubscriptionTypeSubscription ||
 															fallbackGroup.FallbackGroupIDOnInvalidRequest != nil {
 															log.Printf("Fallback group invalid: group=%d platform=%s subscription=%s", fallbackGroup.ID, fallbackGroup.Platform, fallbackGroup.SubscriptionType)
 															_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
 															return
 														}
 														fallbackAPIKey := cloneAPIKeyWithGroup(apiKey, fallbackGroup)
 														if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), fallbackAPIKey.User, fallbackAPIKey, fallbackGroup, nil); err != nil {
 															status, code, message := billingErrorDetails(err)
 															h.handleStreamingAwareError(c, status, code, message, streamStarted)
 															return
 														}
 														// 兜底重试按“直接请求兜底分组”处理：清除强制平台，允许按分组平台调度
 														ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
 														c.Request = c.Request.WithContext(ctx)
 														currentAPIKey = fallbackAPIKey
 														currentSubscription = nil
 														fallbackUsed = true
 														retryWithFallback = true
 														break
 													}
 													_ = h.antigravityGatewayService.WriteMappedClaudeError(c, account, promptTooLongErr.StatusCode, promptTooLongErr.RequestID, promptTooLongErr.Body)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+													return
 												}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												var failoverErr *service.UpstreamFailoverError
 												if errors.As(err, &failoverErr) {
 													failedAccountIDs[account.ID] = struct{}{}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+													lastFailoverErr = failoverErr
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													if failoverErr.ForceCacheBilling {
 														forceCacheBilling = true
 													}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+													if switchCount >= maxAccountSwitches {
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+														h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+														return
 													}
 													switchCount++
 													log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 													continue
 												}
 												// 错误响应已在Forward中处理，这里只记录日志
 												log.Printf("Account %d: Forward request failed: %v", account.ID, err)
 												return
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
 											// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 											userAgent := c.GetHeader("User-Agent")
 											clientIP := ip.GetClientIP(c)
 											// 异步记录使用量（subscription已在函数开头获取）
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+											go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string, fcb bool) {
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 												defer cancel()
 												if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													Result:            result,
 													APIKey:            currentAPIKey,
 													User:              currentAPIKey.User,
 													Account:           usedAccount,
 													Subscription:      currentSubscription,
 													UserAgent:         ua,
 													IPAddress:         clientIP,
 													ForceCacheBilling: fcb,
 													APIKeyService:     h.apiKeyService,
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+												}); err != nil {
 													log.Printf("Record usage failed: %v", err)
 												}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+											}(result, account, userAgent, clientIP, forceCacheBilling)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+										if !retryWithFallback {
 											return
 										}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// Models handles listing available models
 								// GET /v1/models
-												feat: 添加模型白名单选择器组件，同步 new-api 模型列表

- 新增 ModelWhitelistSelector.vue 支持模型白名单多选
- 新增 ModelIcon.vue 显示品牌图标（基于 @lobehub/icons）
- 新增 useModelWhitelist.ts 硬编码各平台模型列表
- 更新账号编辑表单支持模型白名单配置
- 支持 Claude/OpenAI/Gemini/智谱/百度/讯飞等主流平台

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-01 16:03:48 +08:00
+								// Returns models based on account configurations (model_mapping whitelist)
 								// Falls back to default models if no whitelist is configured
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								func (h *GatewayHandler) Models(c *gin.Context) {
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, _ := middleware2.GetAPIKeyFromContext(c)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat: 添加模型白名单选择器组件，同步 new-api 模型列表

- 新增 ModelWhitelistSelector.vue 支持模型白名单多选
- 新增 ModelIcon.vue 显示品牌图标（基于 @lobehub/icons）
- 新增 useModelWhitelist.ts 硬编码各平台模型列表
- 更新账号编辑表单支持模型白名单配置
- 支持 Claude/OpenAI/Gemini/智谱/百度/讯飞等主流平台

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-01 16:03:48 +08:00
+									var groupID *int64
 									var platform string
 									if apiKey != nil && apiKey.Group != nil {
 										groupID = &apiKey.Group.ID
 										platform = apiKey.Group.Platform
 									}
 									// Get available models from account configurations (without platform filter)
 									availableModels := h.gatewayService.GetAvailableModels(c.Request.Context(), groupID, "")
 									if len(availableModels) > 0 {
 										// Build model list from whitelist
 										models := make([]claude.Model, 0, len(availableModels))
 										for _, modelID := range availableModels {
 											models = append(models, claude.Model{
 												ID:          modelID,
 												Type:        "model",
 												DisplayName: modelID,
 												CreatedAt:   "2024-01-01T00:00:00Z",
 											})
 										}
 										c.JSON(http.StatusOK, gin.H{
 											"object": "list",
 											"data":   models,
 										})
 										return
 									}
 									// Fallback to default models
 									if platform == "openai" {
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										c.JSON(http.StatusOK, gin.H{
 											"object": "list",
 											"data":   openai.DefaultModels,
 										})
 										return
 									}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									c.JSON(http.StatusOK, gin.H{
 										"object": "list",
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										"data":   claude.DefaultModels,
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									})
 								}
-												feat(antigravity): 添加 models 端点支持

- /antigravity/models: 返回全部模型（Claude + Gemini）
- /antigravity/v1/models: 返回全部模型（Claude API 格式）
- /antigravity/v1beta/models: 仅返回 Gemini 模型（v1beta 格式）

统一管理 antigravity 模型定义，避免重复代码

											
										
										
											2026-01-02 10:21:05 +08:00
+								// AntigravityModels 返回 Antigravity 支持的全部模型
 								// GET /antigravity/models
 								func (h *GatewayHandler) AntigravityModels(c *gin.Context) {
 									c.JSON(http.StatusOK, gin.H{
 										"object": "list",
 										"data":   antigravity.DefaultModels(),
 									})
 								}
-												Add invalid-request fallback routing

											
										
										
											2026-01-23 22:24:46 +08:00
+								func cloneAPIKeyWithGroup(apiKey *service.APIKey, group *service.Group) *service.APIKey {
 									if apiKey == nil || group == nil {
 										return apiKey
 									}
 									cloned := *apiKey
 									groupID := group.ID
 									cloned.GroupID = &groupID
 									cloned.Group = group
 									return &cloned
 								}
-												chore: gofmt

											
										
										
											2026-02-03 16:55:13 +08:00
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								// Usage handles getting account balance and usage statistics for CC Switch integration
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// GET /v1/usage
 								func (h *GatewayHandler) Usage(c *gin.Context) {
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									subject, ok := middleware2.GetAuthSubjectFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												feat(gateway): filter /v1/usage stats by API Key instead of UserID

Previously the /v1/usage endpoint aggregated usage stats (today/total
tokens, cost, RPM/TPM) across all API Keys belonging to the user.
This made it impossible to distinguish usage from different API Keys
(e.g. balance vs subscription keys).

Now the usage stats are filtered by the current request's API Key ID,
so each key only sees its own usage data. The balance/remaining fields
are unaffected and still reflect the user-level wallet balance.

Changes:
- Add GetAPIKeyDashboardStats to repository interface and implementation
- Add getPerformanceStatsByAPIKey helper (also fixes TPM to include
  cache_creation_tokens and cache_read_tokens)
- Add GetAPIKeyDashboardStats to UsageService
- Update Usage handler to call GetAPIKeyDashboardStats(apiKey.ID)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-05 11:41:25 +08:00
+									// Best-effort: 获取用量统计（按当前 API Key 过滤），失败不影响基础响应
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									var usageData gin.H
 									if h.usageService != nil {
-												feat(gateway): filter /v1/usage stats by API Key instead of UserID

Previously the /v1/usage endpoint aggregated usage stats (today/total
tokens, cost, RPM/TPM) across all API Keys belonging to the user.
This made it impossible to distinguish usage from different API Keys
(e.g. balance vs subscription keys).

Now the usage stats are filtered by the current request's API Key ID,
so each key only sees its own usage data. The balance/remaining fields
are unaffected and still reflect the user-level wallet balance.

Changes:
- Add GetAPIKeyDashboardStats to repository interface and implementation
- Add getPerformanceStatsByAPIKey helper (also fixes TPM to include
  cache_creation_tokens and cache_read_tokens)
- Add GetAPIKeyDashboardStats to UsageService
- Update Usage handler to call GetAPIKeyDashboardStats(apiKey.ID)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-02-05 11:41:25 +08:00
+										dashStats, err := h.usageService.GetAPIKeyDashboardStats(c.Request.Context(), apiKey.ID)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+										if err == nil && dashStats != nil {
 											usageData = gin.H{
 												"today": gin.H{
 													"requests":              dashStats.TodayRequests,
 													"input_tokens":          dashStats.TodayInputTokens,
 													"output_tokens":         dashStats.TodayOutputTokens,
 													"cache_creation_tokens": dashStats.TodayCacheCreationTokens,
 													"cache_read_tokens":     dashStats.TodayCacheReadTokens,
 													"total_tokens":          dashStats.TodayTokens,
 													"cost":                  dashStats.TodayCost,
 													"actual_cost":           dashStats.TodayActualCost,
 												},
 												"total": gin.H{
 													"requests":              dashStats.TotalRequests,
 													"input_tokens":          dashStats.TotalInputTokens,
 													"output_tokens":         dashStats.TotalOutputTokens,
 													"cache_creation_tokens": dashStats.TotalCacheCreationTokens,
 													"cache_read_tokens":     dashStats.TotalCacheReadTokens,
 													"total_tokens":          dashStats.TotalTokens,
 													"cost":                  dashStats.TotalCost,
 													"actual_cost":           dashStats.TotalActualCost,
 												},
 												"average_duration_ms": dashStats.AverageDurationMs,
 												"rpm":                 dashStats.Rpm,
 												"tpm":                 dashStats.Tpm,
 											}
 										}
 									}
 									// 订阅模式：返回订阅限额信息 + 用量统计
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if apiKey.Group != nil && apiKey.Group.IsSubscriptionType() {
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+										subscription, ok := middleware2.GetSubscriptionFromContext(c)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										if !ok {
 											h.errorResponse(c, http.StatusForbidden, "subscription_error", "No active subscription")
 											return
 										}
 										remaining := h.calculateSubscriptionRemaining(apiKey.Group, subscription)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+										resp := gin.H{
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+											"isValid":   true,
 											"planName":  apiKey.Group.Name,
 											"remaining": remaining,
 											"unit":      "USD",
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											"subscription": gin.H{
 												"daily_usage_usd":   subscription.DailyUsageUSD,
 												"weekly_usage_usd":  subscription.WeeklyUsageUSD,
 												"monthly_usage_usd": subscription.MonthlyUsageUSD,
 												"daily_limit_usd":   apiKey.Group.DailyLimitUSD,
 												"weekly_limit_usd":  apiKey.Group.WeeklyLimitUSD,
 												"monthly_limit_usd": apiKey.Group.MonthlyLimitUSD,
 												"expires_at":        subscription.ExpiresAt,
 											},
 										}
 										if usageData != nil {
 											resp["usage"] = usageData
 										}
 										c.JSON(http.StatusOK, resp)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										return
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 余额模式：返回钱包余额 + 用量统计
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									latestUser, err := h.userService.GetByID(c.Request.Context(), subject.UserID)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									if err != nil {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to get user info")
 										return
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									resp := gin.H{
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+										"isValid":   true,
 										"planName":  "钱包余额",
 										"remaining": latestUser.Balance,
 										"unit":      "USD",
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+										"balance":   latestUser.Balance,
 									}
 									if usageData != nil {
 										resp["usage"] = usageData
 									}
 									c.JSON(http.StatusOK, resp)
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								}
 								// calculateSubscriptionRemaining 计算订阅剩余可用额度
 								// 逻辑：
 								// 1. 如果日/周/月任一限额达到100%，返回0
 								// 2. 否则返回所有已配置周期中剩余额度的最小值
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+								func (h *GatewayHandler) calculateSubscriptionRemaining(group *service.Group, sub *service.UserSubscription) float64 {
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+									var remainingValues []float64
 									// 检查日限额
 									if group.HasDailyLimit() {
 										remaining := *group.DailyLimitUSD - sub.DailyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 检查周限额
 									if group.HasWeeklyLimit() {
 										remaining := *group.WeeklyLimitUSD - sub.WeeklyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 检查月限额
 									if group.HasMonthlyLimit() {
 										remaining := *group.MonthlyLimitUSD - sub.MonthlyUsageUSD
 										if remaining <= 0 {
 											return 0
 										}
 										remainingValues = append(remainingValues, remaining)
 									}
 									// 如果没有配置任何限额，返回-1表示无限制
 									if len(remainingValues) == 0 {
 										return -1
 									}
 									// 返回最小值
 									min := remainingValues[0]
 									for _, v := range remainingValues[1:] {
 										if v < min {
 											min = v
 										}
 									}
 									return min
 								}
 								// handleConcurrencyError handles concurrency-related errors with proper 429 response
 								func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
 									h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
 										fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
 								}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+								func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 									statusCode := failoverErr.StatusCode
 									responseBody := failoverErr.ResponseBody
 									// 先检查透传规则
 									if h.errorPassthroughService != nil && len(responseBody) > 0 {
 										if rule := h.errorPassthroughService.MatchRule(platform, statusCode, responseBody); rule != nil {
 											// 确定响应状态码
 											respCode := statusCode
 											if !rule.PassthroughCode && rule.ResponseCode != nil {
 												respCode = *rule.ResponseCode
 											}
 											// 确定响应消息
 											msg := service.ExtractUpstreamErrorMessage(responseBody)
 											if !rule.PassthroughBody && rule.CustomMessage != nil {
 												msg = *rule.CustomMessage
 											}
 											h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
 											return
 										}
 									}
 									// 使用默认的错误映射
 									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
 								func (h *GatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								func (h *GatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
 									switch statusCode {
 									case 401:
 										return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
 									case 403:
 										return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
 									case 429:
 										return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
 									case 529:
 										return http.StatusServiceUnavailable, "overloaded_error", "Upstream service overloaded, please retry later"
 									case 500, 502, 503, 504:
 										return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
 									default:
 										return http.StatusBadGateway, "upstream_error", "Upstream request failed"
 									}
 								}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+								// handleStreamingAwareError handles errors that may occur after streaming has started
 								func (h *GatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
 									if streamStarted {
 										// Stream already started, send error as SSE event then close
 										flusher, ok := c.Writer.(http.Flusher)
 										if ok {
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
+											// Send error event in SSE format with proper JSON marshaling
 											errorData := map[string]any{
 												"type": "error",
 												"error": map[string]string{
 													"type":    errType,
 													"message": message,
 												},
 											}
 											jsonBytes, err := json.Marshal(errorData)
 											if err != nil {
 												_ = c.Error(err)
 												return
 											}
 											errorEvent := fmt.Sprintf("data: %s\n\n", string(jsonBytes))
-												ci(backend): 添加 github actions (#10)

## 变更内容

### CI/CD
- 添加 GitHub Actions 工作流（test + golangci-lint）
- 添加 golangci-lint 配置，启用 errcheck/govet/staticcheck/unused/depguard
- 通过 depguard 强制 service 层不能直接导入 repository

### 错误处理修复
- 修复 CSV 写入、SSE 流式输出、随机数生成等未处理的错误
- GenerateRedeemCode() 现在返回 error

### 资源泄露修复
- 统一使用 defer func() { _ = xxx.Close() }() 模式

### 代码清理
- 移除未使用的常量
- 简化 nil map 检查
- 统一代码格式
											
										
										
											2025-12-20 15:29:52 +08:00
+											if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
 												_ = c.Error(err)
 											}
-												First commit

											
										
										
											2025-12-18 13:50:39 +08:00
+											flusher.Flush()
 										}
 										return
 									}
 									// Normal case: return JSON response with proper status code
 									h.errorResponse(c, status, errType, message)
 								}
 								// errorResponse 返回Claude API格式的错误响应
 								func (h *GatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
 									c.JSON(status, gin.H{
 										"type": "error",
 										"error": gin.H{
 											"type":    errType,
 											"message": message,
 										},
 									})
 								}
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 								// CountTokens handles token counting endpoint
 								// POST /v1/messages/count_tokens
 								// 特点：校验订阅/余额，但不计算并发、不记录使用量
 								func (h *GatewayHandler) CountTokens(c *gin.Context) {
 									// 从context获取apiKey和user（ApiKeyAuth中间件已设置）
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									_, ok = middleware2.GetAuthSubjectFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
 										return
 									}
 									// 读取请求体
 									body, err := io.ReadAll(c.Request.Body)
 									if err != nil {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										if maxErr, ok := extractMaxBytesError(err); ok {
 											h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
 											return
 										}
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 										return
 									}
 									if len(body) == 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
 										return
 									}
-												fix(网关): 区分 Claude Code OAuth 适配

											
										
										
											2026-01-15 19:17:07 +08:00
+									// 检查是否为 Claude Code 客户端，设置到 context 中
 									SetClaudeCodeClientContext(c, body)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, "", false, body)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									parsedReq, err := service.ParseGatewayRequest(body)
 									if err != nil {
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 										return
 									}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									// 在请求上下文中记录 thinking 状态，供 Antigravity 最终模型 key 推导/模型维度限流使用
 									c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ThinkingEnabled, parsedReq.ThinkingEnabled))
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
-												fix(网关): 添加 model 参数必填验证

在以下端点添加 model 参数的必填验证，缺失时直接返回 400 错误：
- /v1/messages
- /v1/messages/count_tokens
- /openai/v1/responses

修复前：空 model 会进入账号选择流程，最终由上游 API 返回错误
修复后：入口处直接拒绝，避免浪费资源和不明确的错误信息

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2025-12-31 16:17:45 +08:00
+									// 验证 model 必填
 									if parsedReq.Model == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, parsedReq.Model, parsedReq.Stream, body)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									// 获取订阅信息（可能为nil）
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									subscription, _ := middleware2.GetSubscriptionFromContext(c)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 校验 billing eligibility（订阅/余额）
 									// 【注意】不计算并发，但需要校验订阅/余额
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										status, code, message := billingErrorDetails(err)
 										h.errorResponse(c, status, code, message)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										return
 									}
 									// 计算粘性会话 hash
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+									parsedReq.SessionContext = &service.SessionContext{
 										ClientIP:  ip.GetClientIP(c),
 										UserAgent: c.GetHeader("User-Agent"),
 										APIKeyID:  apiKey.ID,
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 选择支持该模型的账号
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+									if err != nil {
 										h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsSelectedAccount(c, account.ID)
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
 									// 转发请求（不记录使用量）
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil {
-												feat(gateway): 添加 /v1/messages/count_tokens 端点

实现 Claude API 的 token 计数功能，支持 OAuth、SetupToken 和 ApiKey 三种账号类型。

特点：
- 校验订阅/余额（不扣费）
- 不计算用户和账号并发
- 不记录使用量
- 支持模型映射（ApiKey 账号）
- 支持 OAuth 账号的指纹管理和 401 重试

											
										
										
											2025-12-19 11:12:41 +08:00
+										log.Printf("Forward count_tokens request failed: %v", err)
 										// 错误响应已在 ForwardCountTokens 中处理
 										return
 									}
 								}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								// InterceptType 表示请求拦截类型
 								type InterceptType int
 								const (
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									InterceptTypeNone              InterceptType = iota
 									InterceptTypeWarmup                          // 预热请求（返回 "New Conversation"）
 									InterceptTypeSuggestionMode                  // SUGGESTION MODE（返回空字符串）
 									InterceptTypeMaxTokensOneHaiku               // max_tokens=1 + haiku 探测请求（返回 "#"）
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								)
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// isHaikuModel 检查模型名称是否包含 "haiku"（大小写不敏感）
 								func isHaikuModel(model string) bool {
 									return strings.Contains(strings.ToLower(model), "haiku")
 								}
 								// isMaxTokensOneHaikuRequest 检查是否为 max_tokens=1 + haiku 模型的探测请求
 								// 这类请求用于 Claude Code 验证 API 连通性
 								// 条件：max_tokens == 1 且 model 包含 "haiku" 且非流式请求
 								func isMaxTokensOneHaikuRequest(model string, maxTokens int, isStream bool) bool {
 									return maxTokens == 1 && isHaikuModel(model) && !isStream
 								}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								// detectInterceptType 检测请求是否需要拦截，返回拦截类型
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// 参数说明：
 								//   - body: 请求体字节
 								//   - model: 请求的模型名称
 								//   - maxTokens: max_tokens 值
 								//   - isStream: 是否为流式请求
 								//   - isClaudeCodeClient: 是否已通过 Claude Code 客户端校验
 								func detectInterceptType(body []byte, model string, maxTokens int, isStream bool, isClaudeCodeClient bool) InterceptType {
 									// 优先检查 max_tokens=1 + haiku 探测请求（仅非流式）
 									if isClaudeCodeClient && isMaxTokensOneHaikuRequest(model, maxTokens, isStream) {
 										return InterceptTypeMaxTokensOneHaiku
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 快速检查：如果不包含任何关键字，直接返回
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									bodyStr := string(body)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									hasSuggestionMode := strings.Contains(bodyStr, "[SUGGESTION MODE:")
 									hasWarmupKeyword := strings.Contains(bodyStr, "title") || strings.Contains(bodyStr, "Warmup")
 									if !hasSuggestionMode && !hasWarmupKeyword {
 										return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 解析请求（只解析一次）
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									var req struct {
 										Messages []struct {
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											Role    string `json:"role"`
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+											Content []struct {
 												Type string `json:"type"`
 												Text string `json:"text"`
 											} `json:"content"`
 										} `json:"messages"`
 										System []struct {
 											Text string `json:"text"`
 										} `json:"system"`
 									}
 									if err := json.Unmarshal(body, &req); err != nil {
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+										return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 检查 SUGGESTION MODE（最后一条 user 消息）
 									if hasSuggestionMode && len(req.Messages) > 0 {
 										lastMsg := req.Messages[len(req.Messages)-1]
 										if lastMsg.Role == "user" && len(lastMsg.Content) > 0 &&
 											lastMsg.Content[0].Type == "text" &&
 											strings.HasPrefix(lastMsg.Content[0].Text, "[SUGGESTION MODE:") {
 											return InterceptTypeSuggestionMode
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										}
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 检查 Warmup 请求
 									if hasWarmupKeyword {
 										// 检查 messages 中的标题提示模式
 										for _, msg := range req.Messages {
 											for _, content := range msg.Content {
 												if content.Type == "text" {
 													if strings.Contains(content.Text, "Please write a 5-10 word title for the following conversation:") ||
 														content.Text == "Warmup" {
 														return InterceptTypeWarmup
 													}
 												}
 											}
 										}
 										// 检查 system 中的标题提取模式
 										for _, sys := range req.System {
 											if strings.Contains(sys.Text, "nalyze if this message indicates a new conversation topic. If it does, extract a 2-3 word title") {
 												return InterceptTypeWarmup
 											}
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										}
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									return InterceptTypeNone
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+								}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								// sendMockInterceptStream 发送流式 mock 响应（用于请求拦截）
 								func sendMockInterceptStream(c *gin.Context, model string, interceptType InterceptType) {
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									c.Header("Content-Type", "text/event-stream")
 									c.Header("Cache-Control", "no-cache")
 									c.Header("Connection", "keep-alive")
 									c.Header("X-Accel-Buffering", "no")
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// 根据拦截类型决定响应内容
 									var msgID string
 									var outputTokens int
 									var textDeltas []string
 									switch interceptType {
 									case InterceptTypeSuggestionMode:
 										msgID = "msg_mock_suggestion"
 										outputTokens = 1
 										textDeltas = []string{""} // 空内容
 									default: // InterceptTypeWarmup
 										msgID = "msg_mock_warmup"
 										outputTokens = 2
 										textDeltas = []string{"New", " Conversation"}
 									}
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
+									// Build message_start event with proper JSON marshaling
 									messageStart := map[string]any{
 										"type": "message_start",
 										"message": map[string]any{
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											"id":            msgID,
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
+											"type":          "message",
 											"role":          "assistant",
 											"model":         model,
 											"content":       []any{},
 											"stop_reason":   nil,
 											"stop_sequence": nil,
 											"usage": map[string]int{
 												"input_tokens":  10,
 												"output_tokens": 0,
 											},
 										},
 									}
 									messageStartJSON, _ := json.Marshal(messageStart)
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// Build events
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									events := []string{
-												fix: 修复 SSE/JSON 转义和 nil 安全问题

基于 Codex 审查建议修复关键安全问题。

SSE/JSON 转义修复：
- handleStreamingAwareError: 使用 json.Marshal 替代字符串拼接
- sendMockWarmupStream: 使用 json.Marshal 生成 message_start 事件
- 防止错误消息中的特殊字符导致无效 JSON

Nil 安全检查：
- SelectAccountWithLoadAwareness: 粘性会话层添加 s.cache != nil 检查
- BindStickySession: 添加 s.cache == nil 检查
- 防止 cache 未初始化时的运行时 panic

影响：
- 提升 SSE 错误处理的健壮性
- 避免客户端 JSON 解析失败
- 增强代码防御性编程

											
										
										
											2026-01-01 19:47:26 -08:00
+										`event: message_start` + "\n" + `data: ` + string(messageStartJSON),
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										`event: content_block_start` + "\n" + `data: {"content_block":{"text":"","type":"text"},"index":0,"type":"content_block_start"}`,
 									}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									// Add text deltas
 									for _, text := range textDeltas {
 										delta := map[string]any{
 											"type":  "content_block_delta",
 											"index": 0,
 											"delta": map[string]string{
 												"type": "text_delta",
 												"text": text,
 											},
 										}
 										deltaJSON, _ := json.Marshal(delta)
 										events = append(events, `event: content_block_delta`+"\n"+`data: `+string(deltaJSON))
 									}
 									// Add final events
 									messageDelta := map[string]any{
 										"type": "message_delta",
 										"delta": map[string]any{
 											"stop_reason":   "end_turn",
 											"stop_sequence": nil,
 										},
 										"usage": map[string]int{
 											"input_tokens":  10,
 											"output_tokens": outputTokens,
 										},
 									}
 									messageDeltaJSON, _ := json.Marshal(messageDelta)
 									events = append(events,
 										`event: content_block_stop`+"\n"+`data: {"index":0,"type":"content_block_stop"}`,
 										`event: message_delta`+"\n"+`data: `+string(messageDeltaJSON),
 										`event: message_stop`+"\n"+`data: {"type":"message_stop"}`,
 									)
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+									for _, event := range events {
 										_, _ = c.Writer.WriteString(event + "\n\n")
 										c.Writer.Flush()
 										time.Sleep(20 * time.Millisecond)
 									}
 								}
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+								// generateRealisticMsgID 生成仿真的消息 ID（msg_bdrk_XXXXXXX 格式）
 								// 格式与 Claude API 真实响应一致，24 位随机字母数字
 								func generateRealisticMsgID() string {
 									const charset = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 									const idLen = 24
 									randomBytes := make([]byte, idLen)
 									if _, err := rand.Read(randomBytes); err != nil {
 										return fmt.Sprintf("msg_bdrk_%d", time.Now().UnixNano())
 									}
 									b := make([]byte, idLen)
 									for i := range b {
 										b[i] = charset[int(randomBytes[i])%len(charset)]
 									}
 									return "msg_bdrk_" + string(b)
 								}
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+								// sendMockInterceptResponse 发送非流式 mock 响应（用于请求拦截）
 								func sendMockInterceptResponse(c *gin.Context, model string, interceptType InterceptType) {
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									var msgID, text, stopReason string
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									var outputTokens int
 									switch interceptType {
 									case InterceptTypeSuggestionMode:
 										msgID = "msg_mock_suggestion"
 										text = ""
 										outputTokens = 1
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+										stopReason = "end_turn"
 									case InterceptTypeMaxTokensOneHaiku:
 										msgID = generateRealisticMsgID()
 										text = "#"
 										outputTokens = 1
 										stopReason = "max_tokens" // max_tokens=1 探测请求的 stop_reason 应为 max_tokens
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									default: // InterceptTypeWarmup
 										msgID = "msg_mock_warmup"
 										text = "New Conversation"
 										outputTokens = 2
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+										stopReason = "end_turn"
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+									}
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									// 构建完整的响应格式（与 Claude API 响应格式一致）
 									response := gin.H{
 										"model":         model,
 										"id":            msgID,
 										"type":          "message",
 										"role":          "assistant",
 										"content":       []gin.H{{"type": "text", "text": text}},
 										"stop_reason":   stopReason,
 										"stop_sequence": nil,
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										"usage": gin.H{
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+											"input_tokens":                10,
 											"cache_creation_input_tokens": 0,
 											"cache_read_input_tokens":     0,
 											"cache_creation": gin.H{
 												"ephemeral_5m_input_tokens": 0,
 												"ephemeral_1h_input_tokens": 0,
 											},
-												merge upstream main

											
										
										
											2026-02-02 22:13:50 +08:00
+											"output_tokens": outputTokens,
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+											"total_tokens":  10 + outputTokens,
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+										},
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									}
 									c.JSON(http.StatusOK, response)
-												feat(account): 支持账号级别拦截预热请求

- 新增 intercept_warmup_requests 配置项，存储在 credentials 字段
- 启用后，标题生成、Warmup 等预热请求返回 mock 响应，不消耗上游 token
- 前端支持所有账号类型（OAuth、Setup Token、API Key）的开关配置
- 修复 OAuth 凭证刷新时丢失非 token 配置的问题

											
										
										
											2025-12-19 16:39:25 +08:00
+								}
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
 								func billingErrorDetails(err error) (status int, code, message string) {
 									if errors.Is(err, service.ErrBillingServiceUnavailable) {
-												merge: 合并 test 分支到 test-dev，解决冲突

解决的冲突文件：
- wire_gen.go: 合并 ConcurrencyService/CRSSyncService 参数和 userAttributeHandler
- gateway_handler.go: 合并 pkg/errors 和 antigravity 导入
- gateway_service.go: 合并 validateUpstreamBaseURL 和 GetAvailableModels
- config.example.yaml: 合并 billing/turnstile 配置和额外 gateway 选项

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-03 11:36:31 +08:00
+										msg := pkgerrors.Message(err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										if msg == "" {
 											msg = "Billing service temporarily unavailable. Please retry later."
 										}
 										return http.StatusServiceUnavailable, "billing_service_error", msg
 									}
-												merge: 合并 test 分支到 test-dev，解决冲突

解决的冲突文件：
- wire_gen.go: 合并 ConcurrencyService/CRSSyncService 参数和 userAttributeHandler
- gateway_handler.go: 合并 pkg/errors 和 antigravity 导入
- gateway_service.go: 合并 validateUpstreamBaseURL 和 GetAvailableModels
- config.example.yaml: 合并 billing/turnstile 配置和额外 gateway 选项

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2026-01-03 11:36:31 +08:00
+									msg := pkgerrors.Message(err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+									if msg == "" {
 										msg = err.Error()
 									}
 									return http.StatusForbidden, "billing_error", msg
 								}