backend/internal/handler/openai_gateway_handler.go

package handler

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"strings"
	"time"

	"github.com/Wei-Shaw/sub2api/internal/config"
	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
	"github.com/Wei-Shaw/sub2api/internal/service"

	"github.com/gin-gonic/gin"
)

// OpenAIGatewayHandler handles OpenAI API gateway requests
type OpenAIGatewayHandler struct {
	gatewayService          *service.OpenAIGatewayService
	billingCacheService     *service.BillingCacheService
	apiKeyService           *service.APIKeyService
	errorPassthroughService *service.ErrorPassthroughService
	concurrencyHelper       *ConcurrencyHelper
	maxAccountSwitches      int
	cfg                     *config.Config
}

// NewOpenAIGatewayHandler creates a new OpenAIGatewayHandler
func NewOpenAIGatewayHandler(
	gatewayService *service.OpenAIGatewayService,
	concurrencyService *service.ConcurrencyService,
	billingCacheService *service.BillingCacheService,
	apiKeyService *service.APIKeyService,
	errorPassthroughService *service.ErrorPassthroughService,
	cfg *config.Config,
) *OpenAIGatewayHandler {
	pingInterval := time.Duration(0)
	maxAccountSwitches := 3
	if cfg != nil {
		pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
		if cfg.Gateway.MaxAccountSwitches > 0 {
			maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
		}
	}
	return &OpenAIGatewayHandler{
		gatewayService:          gatewayService,
		billingCacheService:     billingCacheService,
		apiKeyService:           apiKeyService,
		errorPassthroughService: errorPassthroughService,
		concurrencyHelper:       NewConcurrencyHelper(concurrencyService, SSEPingFormatComment, pingInterval),
		maxAccountSwitches:      maxAccountSwitches,
		cfg:                     cfg,
	}
}

// Responses handles OpenAI Responses API endpoint
// POST /openai/v1/responses
func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
	// Get apiKey and user from context (set by ApiKeyAuth middleware)
	apiKey, ok := middleware2.GetAPIKeyFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
		return
	}

	subject, ok := middleware2.GetAuthSubjectFromContext(c)
	if !ok {
		h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
		return
	}

	// Read request body
	body, err := io.ReadAll(c.Request.Body)
	if err != nil {
		if maxErr, ok := extractMaxBytesError(err); ok {
			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
			return
		}
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
		return
	}

	if len(body) == 0 {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
		return
	}

	setOpsRequestContext(c, "", false, body)

	// Parse request body to map for potential modification
	var reqBody map[string]any
	if err := json.Unmarshal(body, &reqBody); err != nil {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
		return
	}

	// Extract model and stream
	reqModel, _ := reqBody["model"].(string)
	reqStream, _ := reqBody["stream"].(bool)

	// 验证 model 必填
	if reqModel == "" {
		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
		return
	}

	userAgent := c.GetHeader("User-Agent")
	isCodexCLI := openai.IsCodexCLIRequest(userAgent) || (h.cfg != nil && h.cfg.Gateway.ForceCodexCLI)
	if !isCodexCLI {
		existingInstructions, _ := reqBody["instructions"].(string)
		if strings.TrimSpace(existingInstructions) == "" {
			if instructions := strings.TrimSpace(service.GetOpenCodeInstructions()); instructions != "" {
				reqBody["instructions"] = instructions
				// Re-serialize body
				body, err = json.Marshal(reqBody)
				if err != nil {
					h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to process request")
					return
				}
			}
		}
	}

	setOpsRequestContext(c, reqModel, reqStream, body)

	// 提前校验 function_call_output 是否具备可关联上下文，避免上游 400。
	// 要求 previous_response_id，或 input 内存在带 call_id 的 tool_call/function_call，
	// 或带 id 且与 call_id 匹配的 item_reference。
	if service.HasFunctionCallOutput(reqBody) {
		previousResponseID, _ := reqBody["previous_response_id"].(string)
		if strings.TrimSpace(previousResponseID) == "" && !service.HasToolCallContext(reqBody) {
			if service.HasFunctionCallOutputMissingCallID(reqBody) {
				log.Printf("[OpenAI Handler] function_call_output 缺少 call_id: model=%s", reqModel)
				h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires call_id or previous_response_id; if relying on history, ensure store=true and reuse previous_response_id")
				return
			}
			callIDs := service.FunctionCallOutputCallIDs(reqBody)
			if !service.HasItemReferenceForCallIDs(reqBody, callIDs) {
				log.Printf("[OpenAI Handler] function_call_output 缺少匹配的 item_reference: model=%s", reqModel)
				h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires item_reference ids matching each call_id, or previous_response_id/tool_call context; if relying on history, ensure store=true and reuse previous_response_id")
				return
			}
		}
	}

	// Track if we've started streaming (for error handling)
	streamStarted := false

	// Get subscription info (may be nil)
	subscription, _ := middleware2.GetSubscriptionFromContext(c)

	// 0. Check if wait queue is full
	maxWait := service.CalculateMaxWait(subject.Concurrency)
	canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
	waitCounted := false
	if err != nil {
		log.Printf("Increment wait count failed: %v", err)
		// On error, allow request to proceed
	} else if !canWait {
		h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
		return
	}
	if err == nil && canWait {
		waitCounted = true
	}
	defer func() {
		if waitCounted {
			h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		}
	}()

	// 1. First acquire user concurrency slot
	userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
	if err != nil {
		log.Printf("User concurrency acquire failed: %v", err)
		h.handleConcurrencyError(c, err, "user", streamStarted)
		return
	}
	// User slot acquired: no longer waiting.
	if waitCounted {
		h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
		waitCounted = false
	}
	// 确保请求取消时也会释放槽位，避免长连接被动中断造成泄漏
	userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
	if userReleaseFunc != nil {
		defer userReleaseFunc()
	}

	// 2. Re-check billing eligibility after wait
	if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
		log.Printf("Billing eligibility check failed after wait: %v", err)
		status, code, message := billingErrorDetails(err)
		h.handleStreamingAwareError(c, status, code, message, streamStarted)
		return
	}

	// Generate session hash (header first; fallback to prompt_cache_key)
	sessionHash := h.gatewayService.GenerateSessionHash(c, reqBody)

	maxAccountSwitches := h.maxAccountSwitches
	switchCount := 0
	failedAccountIDs := make(map[int64]struct{})
	var lastFailoverErr *service.UpstreamFailoverError

	for {
		// Select account supporting the requested model
		log.Printf("[OpenAI Handler] Selecting account: groupID=%v model=%s", apiKey.GroupID, reqModel)
		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, failedAccountIDs)
		if err != nil {
			log.Printf("[OpenAI Handler] SelectAccount failed: %v", err)
			if len(failedAccountIDs) == 0 {
				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
				return
			}
			if lastFailoverErr != nil {
				h.handleFailoverExhausted(c, lastFailoverErr, streamStarted)
			} else {
				h.handleFailoverExhaustedSimple(c, 502, streamStarted)
			}
			return
		}
		account := selection.Account
		log.Printf("[OpenAI Handler] Selected account: id=%d name=%s", account.ID, account.Name)
		setOpsSelectedAccount(c, account.ID)

		// 3. Acquire account concurrency slot
		accountReleaseFunc := selection.ReleaseFunc
		if !selection.Acquired {
			if selection.WaitPlan == nil {
				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
				return
			}
			accountWaitCounted := false
			canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
			if err != nil {
				log.Printf("Increment account wait count failed: %v", err)
			} else if !canWait {
				log.Printf("Account wait queue full: account=%d", account.ID)
				h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
				return
			}
			if err == nil && canWait {
				accountWaitCounted = true
			}
			defer func() {
				if accountWaitCounted {
					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
				}
			}()

			accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
				c,
				account.ID,
				selection.WaitPlan.MaxConcurrency,
				selection.WaitPlan.Timeout,
				reqStream,
				&streamStarted,
			)
			if err != nil {
				log.Printf("Account concurrency acquire failed: %v", err)
				h.handleConcurrencyError(c, err, "account", streamStarted)
				return
			}
			if accountWaitCounted {
				h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
				accountWaitCounted = false
			}
			if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionHash, account.ID); err != nil {
				log.Printf("Bind sticky session failed: %v", err)
			}
		}
		// 账号槽位/等待计数需要在超时或断开时安全回收
		accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)

		// Forward request
		result, err := h.gatewayService.Forward(c.Request.Context(), c, account, body)
		if accountReleaseFunc != nil {
			accountReleaseFunc()
		}
		if err != nil {
			var failoverErr *service.UpstreamFailoverError
			if errors.As(err, &failoverErr) {
				failedAccountIDs[account.ID] = struct{}{}
				lastFailoverErr = failoverErr
				if switchCount >= maxAccountSwitches {
					h.handleFailoverExhausted(c, failoverErr, streamStarted)
					return
				}
				switchCount++
				log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
				continue
			}
			// Error response already handled in Forward, just log
			log.Printf("Account %d: Forward request failed: %v", account.ID, err)
			return
		}

		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
		userAgent := c.GetHeader("User-Agent")
		clientIP := ip.GetClientIP(c)

		// Async record usage
		go func(result *service.OpenAIForwardResult, usedAccount *service.Account, ua, ip string) {
			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
			defer cancel()
			if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
				Result:        result,
				APIKey:        apiKey,
				User:          apiKey.User,
				Account:       usedAccount,
				Subscription:  subscription,
				UserAgent:     ua,
				IPAddress:     ip,
				APIKeyService: h.apiKeyService,
			}); err != nil {
				log.Printf("Record usage failed: %v", err)
			}
		}(result, account, userAgent, clientIP)
		return
	}
}

// handleConcurrencyError handles concurrency-related errors with proper 429 response
func (h *OpenAIGatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
		fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
}

func (h *OpenAIGatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, streamStarted bool) {
	statusCode := failoverErr.StatusCode
	responseBody := failoverErr.ResponseBody

	// 先检查透传规则
	if h.errorPassthroughService != nil && len(responseBody) > 0 {
		if rule := h.errorPassthroughService.MatchRule("openai", statusCode, responseBody); rule != nil {
			// 确定响应状态码
			respCode := statusCode
			if !rule.PassthroughCode && rule.ResponseCode != nil {
				respCode = *rule.ResponseCode
			}

			// 确定响应消息
			msg := service.ExtractUpstreamErrorMessage(responseBody)
			if !rule.PassthroughBody && rule.CustomMessage != nil {
				msg = *rule.CustomMessage
			}

			h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
			return
		}
	}

	// 使用默认的错误映射
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
func (h *OpenAIGatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
	status, errType, errMsg := h.mapUpstreamError(statusCode)
	h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
}

func (h *OpenAIGatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
	switch statusCode {
	case 401:
		return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
	case 403:
		return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
	case 429:
		return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
	case 529:
		return http.StatusServiceUnavailable, "upstream_error", "Upstream service overloaded, please retry later"
	case 500, 502, 503, 504:
		return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
	default:
		return http.StatusBadGateway, "upstream_error", "Upstream request failed"
	}
}

// handleStreamingAwareError handles errors that may occur after streaming has started
func (h *OpenAIGatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
	if streamStarted {
		// Stream already started, send error as SSE event then close
		flusher, ok := c.Writer.(http.Flusher)
		if ok {
			// Send error event in OpenAI SSE format
			errorEvent := fmt.Sprintf(`event: error`+"\n"+`data: {"error": {"type": "%s", "message": "%s"}}`+"\n\n", errType, message)
			if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
				_ = c.Error(err)
			}
			flusher.Flush()
		}
		return
	}

	// Normal case: return JSON response with proper status code
	h.errorResponse(c, status, errType, message)
}

// errorResponse returns OpenAI API format error response
func (h *OpenAIGatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
	c.JSON(status, gin.H{
		"error": gin.H{
			"type":    errType,
			"message": message,
		},
	})
}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+								package handler
 								import (
 									"context"
 									"encoding/json"
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									"errors"
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									"fmt"
 									"io"
 									"log"
 									"net/http"
-												Merge up/main

											
										
										
											2026-01-10 21:57:57 +08:00
+									"strings"
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									"time"
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/config"
-												fix: 修复反向代理下客户端 IP 获取错误

											
										
										
											2026-01-12 20:44:38 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
-												refactor: 重命名 go module

											
										
										
											2025-12-24 21:07:21 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/service"
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
 									"github.com/gin-gonic/gin"
 								)
 								// OpenAIGatewayHandler handles OpenAI API gateway requests
 								type OpenAIGatewayHandler struct {
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									gatewayService          *service.OpenAIGatewayService
 									billingCacheService     *service.BillingCacheService
 									apiKeyService           *service.APIKeyService
 									errorPassthroughService *service.ErrorPassthroughService
 									concurrencyHelper       *ConcurrencyHelper
 									maxAccountSwitches      int
-												feat(gateway): 支持强制 Codex CLI 模式并伪装 UA

- Codex CLI 请求仅使用内置 instructions，不再读取 opencode 缓存/回源\n- 新增 gateway.force_codex_cli（环境变量 GATEWAY_FORCE_CODEX_CLI）\n- ForceCodexCLI=true 时转发上游强制 User-Agent=codex_cli_rs/0.0.0\n- 更新 deploy 示例配置

											
										
										
											2026-02-07 09:21:15 +08:00
+									cfg                     *config.Config
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+								}
 								// NewOpenAIGatewayHandler creates a new OpenAIGatewayHandler
 								func NewOpenAIGatewayHandler(
 									gatewayService *service.OpenAIGatewayService,
 									concurrencyService *service.ConcurrencyService,
 									billingCacheService *service.BillingCacheService,
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+									apiKeyService *service.APIKeyService,
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									errorPassthroughService *service.ErrorPassthroughService,
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									cfg *config.Config,
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+								) *OpenAIGatewayHandler {
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									pingInterval := time.Duration(0)
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches := 3
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									if cfg != nil {
 										pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+										if cfg.Gateway.MaxAccountSwitches > 0 {
 											maxAccountSwitches = cfg.Gateway.MaxAccountSwitches
 										}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									return &OpenAIGatewayHandler{
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+										gatewayService:          gatewayService,
 										billingCacheService:     billingCacheService,
 										apiKeyService:           apiKeyService,
 										errorPassthroughService: errorPassthroughService,
 										concurrencyHelper:       NewConcurrencyHelper(concurrencyService, SSEPingFormatComment, pingInterval),
 										maxAccountSwitches:      maxAccountSwitches,
-												feat(gateway): 支持强制 Codex CLI 模式并伪装 UA

- Codex CLI 请求仅使用内置 instructions，不再读取 opencode 缓存/回源\n- 新增 gateway.force_codex_cli（环境变量 GATEWAY_FORCE_CODEX_CLI）\n- ForceCodexCLI=true 时转发上游强制 User-Agent=codex_cli_rs/0.0.0\n- 更新 deploy 示例配置

											
										
										
											2026-02-07 09:21:15 +08:00
+										cfg:                     cfg,
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									}
 								}
 								// Responses handles OpenAI Responses API endpoint
 								// POST /openai/v1/responses
 								func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 									// Get apiKey and user from context (set by ApiKeyAuth middleware)
-												fix(lint): 修复所有 Go 命名规范问题

- 全局替换 ApiKey → APIKey（类型、字段、方法、变量）
- 修复所有 initialism 命名（API, SMTP, HTML, URL 等）
- 添加所有缺失的包注释
- 修复导出符号的注释格式

主要修改：
- ApiKey → APIKey（所有出现的地方）
- ApiKeyID → APIKeyID
- ApiKeyIDs → APIKeyIDs
- TestSmtpConnection → TestSMTPConnection
- HtmlURL → HTMLURL
- 添加 20+ 个包注释
- 修复 10+ 个导出符号注释格式

验证结果：
- ✓ golangci-lint: 0 issues
- ✓ 单元测试: 通过
- ✓ 集成测试: 通过

											
										
										
											2026-01-04 19:27:53 +08:00
+									apiKey, ok := middleware2.GetAPIKeyFromContext(c)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
 										return
 									}
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									subject, ok := middleware2.GetAuthSubjectFromContext(c)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									if !ok {
 										h.errorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
 										return
 									}
 									// Read request body
 									body, err := io.ReadAll(c.Request.Body)
 									if err != nil {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										if maxErr, ok := extractMaxBytesError(err); ok {
 											h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
 											return
 										}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 										return
 									}
 									if len(body) == 0 {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, "", false, body)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									// Parse request body to map for potential modification
 									var reqBody map[string]any
 									if err := json.Unmarshal(body, &reqBody); err != nil {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 										return
 									}
 									// Extract model and stream
 									reqModel, _ := reqBody["model"].(string)
 									reqStream, _ := reqBody["stream"].(bool)
-												fix(网关): 添加 model 参数必填验证

在以下端点添加 model 参数的必填验证，缺失时直接返回 400 错误：
- /v1/messages
- /v1/messages/count_tokens
- /openai/v1/responses

修复前：空 model 会进入账号选择流程，最终由上游 API 返回错误
修复后：入口处直接拒绝，避免浪费资源和不明确的错误信息

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

											
										
										
											2025-12-31 16:17:45 +08:00
+									// 验证 model 必填
 									if reqModel == "" {
 										h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
 										return
 									}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									userAgent := c.GetHeader("User-Agent")
-												feat(gateway): 支持强制 Codex CLI 模式并伪装 UA

- Codex CLI 请求仅使用内置 instructions，不再读取 opencode 缓存/回源\n- 新增 gateway.force_codex_cli（环境变量 GATEWAY_FORCE_CODEX_CLI）\n- ForceCodexCLI=true 时转发上游强制 User-Agent=codex_cli_rs/0.0.0\n- 更新 deploy 示例配置

											
										
										
											2026-02-07 09:21:15 +08:00
+									isCodexCLI := openai.IsCodexCLIRequest(userAgent) || (h.cfg != nil && h.cfg.Gateway.ForceCodexCLI)
 									if !isCodexCLI {
-												Merge up/main

											
										
										
											2026-01-10 21:57:57 +08:00
+										existingInstructions, _ := reqBody["instructions"].(string)
 										if strings.TrimSpace(existingInstructions) == "" {
 											if instructions := strings.TrimSpace(service.GetOpenCodeInstructions()); instructions != "" {
 												reqBody["instructions"] = instructions
 												// Re-serialize body
 												body, err = json.Marshal(reqBody)
 												if err != nil {
 													h.errorResponse(c, http.StatusInternalServerError, "api_error", "Failed to process request")
 													return
 												}
 											}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										}
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									setOpsRequestContext(c, reqModel, reqStream, body)
-												fix(网关): 修复工具续链校验与存储策略

完善 function_call_output 续链校验与引用匹配
续链场景强制 store=true，过滤 input 时避免副作用
补充续链判断与过滤相关单元测试

测试: go test ./...

											
										
										
											2026-01-13 16:47:35 +08:00
+									// 提前校验 function_call_output 是否具备可关联上下文，避免上游 400。
 									// 要求 previous_response_id，或 input 内存在带 call_id 的 tool_call/function_call，
 									// 或带 id 且与 call_id 匹配的 item_reference。
 									if service.HasFunctionCallOutput(reqBody) {
 										previousResponseID, _ := reqBody["previous_response_id"].(string)
 										if strings.TrimSpace(previousResponseID) == "" && !service.HasToolCallContext(reqBody) {
 											if service.HasFunctionCallOutputMissingCallID(reqBody) {
 												log.Printf("[OpenAI Handler] function_call_output 缺少 call_id: model=%s", reqModel)
 												h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires call_id or previous_response_id; if relying on history, ensure store=true and reuse previous_response_id")
 												return
 											}
 											callIDs := service.FunctionCallOutputCallIDs(reqBody)
 											if !service.HasItemReferenceForCallIDs(reqBody, callIDs) {
 												log.Printf("[OpenAI Handler] function_call_output 缺少匹配的 item_reference: model=%s", reqModel)
 												h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires item_reference ids matching each call_id, or previous_response_id/tool_call context; if relying on history, ensure store=true and reuse previous_response_id")
 												return
 											}
 										}
 									}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									// Track if we've started streaming (for error handling)
 									streamStarted := false
 									// Get subscription info (may be nil)
-												refactor: 调整 server 目录结构

											
										
										
											2025-12-26 10:42:08 +08:00
+									subscription, _ := middleware2.GetSubscriptionFromContext(c)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
 									// 0. Check if wait queue is full
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									maxWait := service.CalculateMaxWait(subject.Concurrency)
 									canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									waitCounted := false
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									if err != nil {
 										log.Printf("Increment wait count failed: %v", err)
 										// On error, allow request to proceed
 									} else if !canWait {
 										h.errorResponse(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later")
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									if err == nil && canWait {
 										waitCounted = true
 									}
 									defer func() {
 										if waitCounted {
 											h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										}
 									}()
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
 									// 1. First acquire user concurrency slot
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									if err != nil {
 										log.Printf("User concurrency acquire failed: %v", err)
 										h.handleConcurrencyError(c, err, "user", streamStarted)
 										return
 									}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+									// User slot acquired: no longer waiting.
 									if waitCounted {
 										h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)
 										waitCounted = false
 									}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+									// 确保请求取消时也会释放槽位，避免长连接被动中断造成泄漏
 									userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+									if userReleaseFunc != nil {
 										defer userReleaseFunc()
 									}
 									// 2. Re-check billing eligibility after wait
-												refactor: 调整项目结构为单向依赖

											
										
										
											2025-12-26 15:40:24 +08:00
+									if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										log.Printf("Billing eligibility check failed after wait: %v", err)
-												feat(安全): 强化安全策略与配置校验

- 增加 CORS/CSP/安全响应头与代理信任配置

- 引入 URL 白名单与私网开关，校验上游与价格源

- 改善 API Key 处理与网关错误返回

- 管理端设置隐藏敏感字段并优化前端提示

- 增加计费熔断与相关配置示例

测试: go test ./...

											
										
										
											2026-01-02 17:40:57 +08:00
+										status, code, message := billingErrorDetails(err)
 										h.handleStreamingAwareError(c, status, code, message, streamStarted)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										return
 									}
-												fix(openai): 使用 prompt_cache_key 兜底粘性会话

opencode 请求不带 session_id/conversation_id，导致粘性会话失效。现在按 header 优先、prompt_cache_key 兜底生成 session hash，并补充单测验证优先级。

											
										
										
											2026-01-17 02:31:16 +08:00
+									// Generate session hash (header first; fallback to prompt_cache_key)
 									sessionHash := h.gatewayService.GenerateSessionHash(c, reqBody)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat(gateway): 账户切换次数和 Antigravity 限流时间可配置

- gateway.max_account_switches: 账户切换最大次数，默认 10
- gateway.max_account_switches_gemini: Gemini 账户切换次数，默认 3
- gateway.antigravity_fallback_cooldown_minutes: Antigravity 429 fallback 限流时间，默认 5 分钟
- Antigravity 429 不再重试，直接标记账户限流

											
										
										
											2026-01-16 20:18:30 +08:00
+									maxAccountSwitches := h.maxAccountSwitches
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									switchCount := 0
 									failedAccountIDs := make(map[int64]struct{})
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+									var lastFailoverErr *service.UpstreamFailoverError
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									for {
 										// Select account supporting the requested model
 										log.Printf("[OpenAI Handler] Selecting account: groupID=%v model=%s", apiKey.GroupID, reqModel)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+										selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, failedAccountIDs)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										if err != nil {
 											log.Printf("[OpenAI Handler] SelectAccount failed: %v", err)
 											if len(failedAccountIDs) == 0 {
 												h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 												return
 											}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+											if lastFailoverErr != nil {
 												h.handleFailoverExhausted(c, lastFailoverErr, streamStarted)
 											} else {
 												h.handleFailoverExhaustedSimple(c, 502, streamStarted)
 											}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
 										}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+										account := selection.Account
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										log.Printf("[OpenAI Handler] Selected account: id=%d name=%s", account.ID, account.Name)
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+										setOpsSelectedAccount(c, account.ID)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										// 3. Acquire account concurrency slot
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+										accountReleaseFunc := selection.ReleaseFunc
 										if !selection.Acquired {
 											if selection.WaitPlan == nil {
 												h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
 												return
 											}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											accountWaitCounted := false
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
 											if err != nil {
 												log.Printf("Increment account wait count failed: %v", err)
 											} else if !canWait {
 												log.Printf("Account wait queue full: account=%d", account.ID)
 												h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
 												return
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}
 											if err == nil && canWait {
 												accountWaitCounted = true
 											}
 											defer func() {
 												if accountWaitCounted {
-												fix(lint): 修复 openai_gateway_handler 的 staticcheck 问题

											
										
										
											2026-01-01 04:30:42 +08:00
+													h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
 												}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											}()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
 											accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
 												c,
 												account.ID,
 												selection.WaitPlan.MaxConcurrency,
 												selection.WaitPlan.Timeout,
 												reqStream,
 												&streamStarted,
 											)
 											if err != nil {
 												log.Printf("Account concurrency acquire failed: %v", err)
 												h.handleConcurrencyError(c, err, "account", streamStarted)
 												return
 											}
-												feat(网关): 集成运维监控到 API 网关处理器

- 在 gateway_handler 中添加请求监控和错误追踪
- 在 openai_gateway_handler 中集成 ops 指标采集
- 在 gemini_v1beta_handler 中集成 ops 指标采集
- 更新 handler 基类支持 ops 错误日志记录

											
										
										
											2026-01-09 20:56:37 +08:00
+											if accountWaitCounted {
 												h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
 												accountWaitCounted = false
 											}
-												fix: 临时保存编译错误修复

- 添加 LinuxDo 和 Update 配置（从 main 分支缺失）
- 添加 LinuxDoConnectSyntheticEmailDomain 常量
- 添加 IsClaudeCodeClient context key
- 添加 GetLinuxDoConnectOAuthConfig 方法
- 修复 BindStickySession 调用签名
- 修复前端 i18n 重复属性
- 重新生成 wire 依赖注入代码

这个提交准备被合并替换，先保存以防丢失。

											
										
										
											2026-01-11 10:59:01 +08:00
+											if err := h.gatewayService.BindStickySession(c.Request.Context(), apiKey.GroupID, sessionHash, account.ID); err != nil {
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+												log.Printf("Bind sticky session failed: %v", err)
 											}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										}
-												fix(流式): 提升SSE稳定性并统一超时配置

- 扩展SSE行长与间隔超时处理，补充keepalive

- 写入失败与超长行时发送错误事件，修复并发释放

- 同步默认配置与示例配置，更新Caddy超时/压缩规则

- 新增OpenAI流式超时与超长行测试

测试: go test ./...

											
										
										
											2026-01-04 19:49:59 +08:00
+										// 账号槽位/等待计数需要在超时或断开时安全回收
 										accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										// Forward request
 										result, err := h.gatewayService.Forward(c.Request.Context(), c, account, body)
 										if accountReleaseFunc != nil {
 											accountReleaseFunc()
 										}
 										if err != nil {
 											var failoverErr *service.UpstreamFailoverError
 											if errors.As(err, &failoverErr) {
 												failedAccountIDs[account.ID] = struct{}{}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+												lastFailoverErr = failoverErr
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+												if switchCount >= maxAccountSwitches {
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+													h.handleFailoverExhausted(c, failoverErr, streamStarted)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+													return
 												}
 												switchCount++
 												log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 												continue
 											}
 											// Error response already handled in Forward, just log
-												fix: 转发失败日志添加账户ID信息

											
										
										
											2026-01-04 16:45:11 +08:00
+											log.Printf("Account %d: Forward request failed: %v", account.ID, err)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											return
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+										}
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
+										// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
 										userAgent := c.GetHeader("User-Agent")
-												fix: 修复反向代理下客户端 IP 获取错误

											
										
										
											2026-01-12 20:44:38 +08:00
+										clientIP := ip.GetClientIP(c)
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										// Async record usage
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
+										go func(result *service.OpenAIForwardResult, usedAccount *service.Account, ua, ip string) {
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 											defer cancel()
 											if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
-												feat(api-key): add independent quota and expiration support

This feature allows API Keys to have their own quota limits and expiration
times, independent of the user's balance.

Backend:
- Add quota, quota_used, expires_at fields to api_key schema
- Implement IsExpired() and IsQuotaExhausted() checks in middleware
- Add ResetQuota and ClearExpiration API endpoints
- Integrate quota billing in gateway handlers (OpenAI, Anthropic, Gemini)
- Include quota/expiration fields in auth cache for performance
- Expiration check returns 403, quota exhausted returns 429

Frontend:
- Add quota and expiration inputs to key create/edit dialog
- Add quick-select buttons for expiration (+7, +30, +90 days)
- Add reset quota confirmation dialog
- Add expires_at column to keys list
- Add i18n translations for new features (en/zh)

Migration:
- Add 045_add_api_key_quota.sql for new columns

											
										
										
											2026-02-03 19:01:49 +08:00
+												Result:        result,
 												APIKey:        apiKey,
 												User:          apiKey.User,
 												Account:       usedAccount,
 												Subscription:  subscription,
 												UserAgent:     ua,
 												IPAddress:     ip,
 												APIKeyService: h.apiKeyService,
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+											}); err != nil {
 												log.Printf("Record usage failed: %v", err)
 											}
-												fix(gateway): 修复 Claude Code 客户端检测和请求信息记录

- 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测
- 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题

											
										
										
											2026-01-12 15:19:40 +08:00
+										}(result, account, userAgent, clientIP)
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+										return
 									}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+								}
 								// handleConcurrencyError handles concurrency-related errors with proper 429 response
 								func (h *OpenAIGatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotType string, streamStarted bool) {
 									h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error",
 										fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
 								}
-												feat: 新增全局错误透传规则功能

支持管理员配置上游错误如何返回给客户端：
- 新增 ErrorPassthroughRule 数据模型和 Ent Schema
- 实现规则的 CRUD API（/admin/error-passthrough-rules）
- 支持按错误码、关键词匹配，支持 any/all 匹配模式
- 支持按平台过滤（anthropic/openai/gemini/antigravity）
- 支持透传或自定义响应状态码和错误消息
- 实现两级缓存（Redis + 本地内存）和多实例同步
- 集成到 gateway_handler 的错误处理流程
- 新增前端管理界面组件
- 新增单元测试覆盖核心匹配逻辑

优化：
- 移除 refreshLocalCache 中的冗余排序（数据库已排序）
- 后端 Validate() 增加匹配条件非空校验

											
										
										
											2026-02-05 21:52:54 +08:00
+								func (h *OpenAIGatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, streamStarted bool) {
 									statusCode := failoverErr.StatusCode
 									responseBody := failoverErr.ResponseBody
 									// 先检查透传规则
 									if h.errorPassthroughService != nil && len(responseBody) > 0 {
 										if rule := h.errorPassthroughService.MatchRule("openai", statusCode, responseBody); rule != nil {
 											// 确定响应状态码
 											respCode := statusCode
 											if !rule.PassthroughCode && rule.ResponseCode != nil {
 												respCode = *rule.ResponseCode
 											}
 											// 确定响应消息
 											msg := service.ExtractUpstreamErrorMessage(responseBody)
 											if !rule.PassthroughBody && rule.CustomMessage != nil {
 												msg = *rule.CustomMessage
 											}
 											h.handleStreamingAwareError(c, respCode, "upstream_error", msg, streamStarted)
 											return
 										}
 									}
 									// 使用默认的错误映射
 									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								// handleFailoverExhaustedSimple 简化版本，用于没有响应体的情况
 								func (h *OpenAIGatewayHandler) handleFailoverExhaustedSimple(c *gin.Context, statusCode int, streamStarted bool) {
-												feat: cc/codex support account retry

											
										
										
											2025-12-27 11:44:00 +08:00
+									status, errType, errMsg := h.mapUpstreamError(statusCode)
 									h.handleStreamingAwareError(c, status, errType, errMsg, streamStarted)
 								}
 								func (h *OpenAIGatewayHandler) mapUpstreamError(statusCode int) (int, string, string) {
 									switch statusCode {
 									case 401:
 										return http.StatusBadGateway, "upstream_error", "Upstream authentication failed, please contact administrator"
 									case 403:
 										return http.StatusBadGateway, "upstream_error", "Upstream access forbidden, please contact administrator"
 									case 429:
 										return http.StatusTooManyRequests, "rate_limit_error", "Upstream rate limit exceeded, please retry later"
 									case 529:
 										return http.StatusServiceUnavailable, "upstream_error", "Upstream service overloaded, please retry later"
 									case 500, 502, 503, 504:
 										return http.StatusBadGateway, "upstream_error", "Upstream service temporarily unavailable"
 									default:
 										return http.StatusBadGateway, "upstream_error", "Upstream request failed"
 									}
 								}
-												feat: 新增支持codex转发

											
										
										
											2025-12-22 22:58:31 +08:00
+								// handleStreamingAwareError handles errors that may occur after streaming has started
 								func (h *OpenAIGatewayHandler) handleStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
 									if streamStarted {
 										// Stream already started, send error as SSE event then close
 										flusher, ok := c.Writer.(http.Flusher)
 										if ok {
 											// Send error event in OpenAI SSE format
 											errorEvent := fmt.Sprintf(`event: error`+"\n"+`data: {"error": {"type": "%s", "message": "%s"}}`+"\n\n", errType, message)
 											if _, err := fmt.Fprint(c.Writer, errorEvent); err != nil {
 												_ = c.Error(err)
 											}
 											flusher.Flush()
 										}
 										return
 									}
 									// Normal case: return JSON response with proper status code
 									h.errorResponse(c, status, errType, message)
 								}
 								// errorResponse returns OpenAI API format error response
 								func (h *OpenAIGatewayHandler) errorResponse(c *gin.Context, status int, errType, message string) {
 									c.JSON(status, gin.H{
 										"error": gin.H{
 											"type":    errType,
 											"message": message,
 										},
 									})
 								}