From f6cfab990145404bbdb0509a7d12530292dc4c69 Mon Sep 17 00:00:00 2001 From: Rose Ding Date: Mon, 9 Feb 2026 14:26:01 +0800 Subject: [PATCH 01/16] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Antigravity?= =?UTF-8?q?=20=E5=8D=95=E8=B4=A6=E5=8F=B7=20503=20=E9=80=80=E9=81=BF?= =?UTF-8?q?=E9=87=8D=E8=AF=95=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时, 不再设置模型限流+切换账号(因为切换回来还是同一个账号),而是在 Service 层 原地等待+重试,避免双重等待问题。 主要变更: - Handler 层:检测单账号 503 场景,清除排除列表并设置 SingleAccountRetry 标记 - Service 层:新增 handleSingleAccountRetryInPlace 原地重试逻辑 - Service 层:预检查跳过单账号模式下的限流检查 - 新增 ctxkey.SingleAccountRetry 上下文标记 --- backend/internal/handler/gateway_handler.go | 47 +++++ .../internal/handler/gemini_v1beta_handler.go | 13 ++ backend/internal/pkg/ctxkey/ctxkey.go | 4 + .../service/antigravity_gateway_service.go | 196 +++++++++++++++++- 4 files changed, 253 insertions(+), 7 deletions(-) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 6900fa55..82181948 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -245,6 +245,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) { h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted) return } + // Antigravity 单账号退避重试:分组内没有其他可用账号时, + // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。 + // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。 + if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches { + if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) { + log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches) + failedAccountIDs = make(map[int64]struct{}) + // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换 + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + continue + } + } if lastFailoverErr != nil { h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted) } else { @@ -412,6 +425,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) { h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted) return } + // Antigravity 单账号退避重试:分组内没有其他可用账号时, + // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。 + // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。 + if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches { + if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) { + log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches) + failedAccountIDs = make(map[int64]struct{}) + // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换 + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + continue + } + } if lastFailoverErr != nil { h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted) } else { @@ -838,6 +864,27 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool { } } +// sleepAntigravitySingleAccountBackoff Antigravity 平台单账号分组的 503 退避重试延时。 +// 当分组内只有一个可用账号且上游返回 503(MODEL_CAPACITY_EXHAUSTED)时使用, +// 采用短固定延时策略。Service 层在 SingleAccountRetry 模式下已经做了充分的原地重试 +// (最多 3 次、总等待 30s),所以 Handler 层的退避只需短暂等待即可。 +// 返回 false 表示 context 已取消。 +func sleepAntigravitySingleAccountBackoff(ctx context.Context, retryCount int) bool { + // 固定短延时:2s + // Service 层已经在原地等待了足够长的时间(retryDelay × 重试次数), + // Handler 层只需短暂间隔后重新进入 Service 层即可。 + const delay = 2 * time.Second + + log.Printf("Antigravity single-account 503 backoff: waiting %v before retry (attempt %d)", delay, retryCount) + + select { + case <-ctx.Done(): + return false + case <-time.After(delay): + return true + } +} + func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) { statusCode := failoverErr.StatusCode responseBody := failoverErr.ResponseBody diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index d5149f22..2b67cb1f 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -334,6 +334,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error()) return } + // Antigravity 单账号退避重试:分组内没有其他可用账号时, + // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。 + // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。 + if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches { + if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) { + log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches) + failedAccountIDs = make(map[int64]struct{}) + // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换 + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + continue + } + } h.handleGeminiFailoverExhausted(c, lastFailoverErr) return } diff --git a/backend/internal/pkg/ctxkey/ctxkey.go b/backend/internal/pkg/ctxkey/ctxkey.go index 9bf563e7..0c4d82f7 100644 --- a/backend/internal/pkg/ctxkey/ctxkey.go +++ b/backend/internal/pkg/ctxkey/ctxkey.go @@ -28,4 +28,8 @@ const ( // IsMaxTokensOneHaikuRequest 标识当前请求是否为 max_tokens=1 + haiku 模型的探测请求 // 用于 ClaudeCodeOnly 验证绕过(绕过 system prompt 检查,但仍需验证 User-Agent) IsMaxTokensOneHaikuRequest Key = "ctx_is_max_tokens_one_haiku" + + // SingleAccountRetry 标识当前请求处于单账号 503 退避重试模式。 + // 在此模式下,Service 层的模型限流预检查将等待限流过期而非直接切换账号。 + SingleAccountRetry Key = "ctx_single_account_retry" ) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 014b3c86..11f975fe 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -20,6 +20,7 @@ import ( "time" "github.com/Wei-Shaw/sub2api/internal/pkg/antigravity" + "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" "github.com/gin-gonic/gin" "github.com/google/uuid" ) @@ -46,6 +47,23 @@ const ( googleRPCTypeErrorInfo = "type.googleapis.com/google.rpc.ErrorInfo" googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED" googleRPCReasonRateLimitExceeded = "RATE_LIMIT_EXCEEDED" + + // 单账号 503 退避重试:预检查中等待模型限流过期的最大时间 + // 超过此值的限流将直接切换账号(避免请求等待过久) + antigravitySingleAccountMaxWait = 30 * time.Second + + // 单账号 503 退避重试:Service 层原地重试的最大次数 + // 在 handleSmartRetry 中,对于 shouldRateLimitModel(长延迟 ≥ 7s)的情况, + // 多账号模式下会设限流+切换账号;但单账号模式下改为原地等待+重试。 + antigravitySingleAccountSmartRetryMaxAttempts = 3 + + // 单账号 503 退避重试:原地重试时单次最大等待时间 + // 防止上游返回过长的 retryDelay 导致请求卡住太久 + antigravitySingleAccountSmartRetryMaxWait = 15 * time.Second + + // 单账号 503 退避重试:原地重试的总累计等待时间上限 + // 超过此上限将不再重试,直接返回 503 + antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second ) // antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单(小写) @@ -148,6 +166,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam // 情况1: retryDelay >= 阈值,限流模型并切换账号 if shouldRateLimitModel { + // 单账号 503 退避重试模式:不设限流、不切换账号,改为原地等待+重试 + // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。 + // 多账号场景下切换账号是最优选择,但单账号场景下设限流毫无意义(只会导致双重等待)。 + if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) { + return s.handleSingleAccountRetryInPlace(p, resp, respBody, baseURL, waitDuration, modelName) + } + rateLimitDuration := waitDuration if rateLimitDuration <= 0 { rateLimitDuration = antigravityDefaultRateLimitDuration @@ -236,7 +261,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam } } - // 所有重试都失败,限流当前模型并切换账号 + // 所有重试都失败 rateLimitDuration := waitDuration if rateLimitDuration <= 0 { rateLimitDuration = antigravityDefaultRateLimitDuration @@ -245,6 +270,22 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam if retryBody == nil { retryBody = respBody } + + // 单账号 503 退避重试模式:智能重试耗尽后不设限流、不切换账号, + // 直接返回 503 让 Handler 层的单账号退避循环做最终处理。 + if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) { + log.Printf("%s status=%d smart_retry_exhausted_single_account attempts=%d model=%s account=%d body=%s (return 503 directly)", + p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200)) + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + resp: &http.Response{ + StatusCode: resp.StatusCode, + Header: resp.Header.Clone(), + Body: io.NopCloser(bytes.NewReader(retryBody)), + }, + } + } + log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)", p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200)) @@ -279,17 +320,152 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam return &smartRetryResult{action: smartRetryActionContinue} } +// handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。 +// +// 在多账号场景下,收到 503 + 长 retryDelay(≥ 7s)时会设置模型限流 + 切换账号; +// 但在单账号场景下,设限流毫无意义(因为切换回来的还是同一个账号,还要等限流过期)。 +// 此方法改为在 Service 层原地等待 + 重试,避免双重等待问题: +// +// 旧流程:Service 设限流 → Handler 退避等待 → Service 等限流过期 → 再请求(总耗时 = 退避 + 限流) +// 新流程:Service 直接等 retryDelay → 重试 → 成功/再等 → 重试...(总耗时 ≈ 实际 retryDelay × 重试次数) +// +// 约束: +// - 单次等待不超过 antigravitySingleAccountSmartRetryMaxWait +// - 总累计等待不超过 antigravitySingleAccountSmartRetryTotalMaxWait +// - 最多重试 antigravitySingleAccountSmartRetryMaxAttempts 次 +func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace( + p antigravityRetryLoopParams, + resp *http.Response, + respBody []byte, + baseURL string, + waitDuration time.Duration, + modelName string, +) *smartRetryResult { + // 限制单次等待时间 + if waitDuration > antigravitySingleAccountSmartRetryMaxWait { + waitDuration = antigravitySingleAccountSmartRetryMaxWait + } + if waitDuration < antigravitySmartRetryMinWait { + waitDuration = antigravitySmartRetryMinWait + } + + log.Printf("%s status=%d single_account_503_retry_in_place model=%s account=%d upstream_retry_delay=%v (retrying in-place instead of rate-limiting)", + p.prefix, resp.StatusCode, modelName, p.account.ID, waitDuration) + + var lastRetryResp *http.Response + var lastRetryBody []byte + totalWaited := time.Duration(0) + + for attempt := 1; attempt <= antigravitySingleAccountSmartRetryMaxAttempts; attempt++ { + // 检查累计等待是否超限 + if totalWaited+waitDuration > antigravitySingleAccountSmartRetryTotalMaxWait { + remaining := antigravitySingleAccountSmartRetryTotalMaxWait - totalWaited + if remaining <= 0 { + log.Printf("%s single_account_503_retry: total_wait_exceeded total=%v max=%v, giving up", + p.prefix, totalWaited, antigravitySingleAccountSmartRetryTotalMaxWait) + break + } + waitDuration = remaining + } + + log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d", + p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID) + + select { + case <-p.ctx.Done(): + log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix) + return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()} + case <-time.After(waitDuration): + } + totalWaited += waitDuration + + // 创建新请求 + retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body) + if err != nil { + log.Printf("%s single_account_503_retry: request_build_failed error=%v", p.prefix, err) + break + } + + retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) + if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { + log.Printf("%s status=%d single_account_503_retry_success attempt=%d/%d total_waited=%v", + p.prefix, retryResp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited) + // 关闭之前的响应 + if lastRetryResp != nil { + _ = lastRetryResp.Body.Close() + } + return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} + } + + // 网络错误时继续重试 + if retryErr != nil || retryResp == nil { + log.Printf("%s single_account_503_retry: network_error attempt=%d/%d error=%v", + p.prefix, attempt, antigravitySingleAccountSmartRetryMaxAttempts, retryErr) + continue + } + + // 关闭之前的响应 + if lastRetryResp != nil { + _ = lastRetryResp.Body.Close() + } + lastRetryResp = retryResp + lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20)) + _ = retryResp.Body.Close() + + // 解析新的重试信息,更新下次等待时间 + if attempt < antigravitySingleAccountSmartRetryMaxAttempts && lastRetryBody != nil { + _, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) + if newWaitDuration > 0 { + waitDuration = newWaitDuration + if waitDuration > antigravitySingleAccountSmartRetryMaxWait { + waitDuration = antigravitySingleAccountSmartRetryMaxWait + } + if waitDuration < antigravitySmartRetryMinWait { + waitDuration = antigravitySmartRetryMinWait + } + } + } + } + + // 所有重试都失败,不设限流,直接返回 503 + // Handler 层的单账号退避循环会做最终处理 + retryBody := lastRetryBody + if retryBody == nil { + retryBody = respBody + } + log.Printf("%s status=%d single_account_503_retry_exhausted attempts=%d total_waited=%v model=%s account=%d body=%s (return 503 directly)", + p.prefix, resp.StatusCode, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited, modelName, p.account.ID, truncateForLog(retryBody, 200)) + + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + resp: &http.Response{ + StatusCode: resp.StatusCode, + Header: resp.Header.Clone(), + Body: io.NopCloser(bytes.NewReader(retryBody)), + }, + } +} + // antigravityRetryLoop 执行带 URL fallback 的重试循环 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) { // 预检查:如果账号已限流,直接返回切换信号 if p.requestedModel != "" { if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 { - log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d", - p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID) - return nil, &AntigravityAccountSwitchError{ - OriginalAccountID: p.account.ID, - RateLimitedModel: p.requestedModel, - IsStickySession: p.isStickySession, + // 单账号 503 退避重试模式:跳过限流预检查,直接发请求。 + // 首次请求设的限流是为了多账号调度器跳过该账号,在单账号模式下无意义。 + // 如果上游确实还不可用,handleSmartRetry → handleSingleAccountRetryInPlace + // 会在 Service 层原地等待+重试,不需要在预检查这里等。 + if isSingleAccountRetry(p.ctx) { + log.Printf("%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)", + p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID) + } else { + log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d", + p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID) + return nil, &AntigravityAccountSwitchError{ + OriginalAccountID: p.account.ID, + RateLimitedModel: p.requestedModel, + IsStickySession: p.isStickySession, + } } } } @@ -1943,6 +2119,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool { } } +// isSingleAccountRetry 检查 context 中是否设置了单账号退避重试标记 +func isSingleAccountRetry(ctx context.Context) bool { + v, _ := ctx.Value(ctxkey.SingleAccountRetry).(bool) + return v +} + // setModelRateLimitByModelName 使用官方模型 ID 设置模型级限流 // 直接使用上游返回的模型 ID(如 claude-sonnet-4-5)作为限流 key // 返回是否已成功设置(若模型名为空或 repo 为 nil 将返回 false) From 021abfca181af4f6f52f594200d948de04070119 Mon Sep 17 00:00:00 2001 From: Rose Ding Date: Mon, 9 Feb 2026 17:25:36 +0800 Subject: [PATCH 02/16] =?UTF-8?q?fix:=20=E5=8D=95=E8=B4=A6=E5=8F=B7?= =?UTF-8?q?=E5=88=86=E7=BB=84=E9=A6=96=E6=AC=A1=20503=20=E4=B8=8D=E8=AE=BE?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=99=90=E6=B5=81=E6=A0=87=E8=AE=B0=EF=BC=8C?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E5=90=8E=E7=BB=AD=E8=AF=B7=E6=B1=82=E9=9B=AA?= =?UTF-8?q?=E5=B4=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时, 原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换, 后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503, 导致约 30 秒的雪崩窗口。 修复:在 Handler 入口处检查分组是否只有单个 antigravity 账号, 如果是则提前设置 SingleAccountRetry context 标记,让 Service 层 首次 503 就走原地重试逻辑(不设限流标记),避免污染后续请求。 --- backend/internal/handler/gateway_handler.go | 14 ++++++++++++++ backend/internal/handler/gemini_v1beta_handler.go | 7 +++++++ backend/internal/service/gateway_service.go | 11 +++++++++++ 3 files changed, 32 insertions(+) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 82181948..2b3703b4 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -238,6 +238,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) { var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制 if err != nil { @@ -409,6 +416,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) { } fallbackUsed := false + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { maxAccountSwitches := h.maxAccountSwitches switchCount := 0 diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index 2b67cb1f..f8fb0dcb 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制 if err != nil { diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 4e723232..2c04ae14 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -1683,6 +1683,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i return accounts, useMixed, nil } +// IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。 +// 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context, +// 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。 +func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool { + accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true) + if err != nil { + return false + } + return len(accounts) == 1 +} + func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool { if account == nil { return false From 4a84ca9a02e1f11b64cd849e3f7a274b3e18b056 Mon Sep 17 00:00:00 2001 From: erio Date: Mon, 9 Feb 2026 20:08:00 +0800 Subject: [PATCH 03/16] fix: support clearing model-level rate limits from action menu and temp-unsched reset --- backend/internal/service/ratelimit_service.go | 4 ++++ .../components/admin/account/AccountActionMenu.vue | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go index 63732dee..12c48ab8 100644 --- a/backend/internal/service/ratelimit_service.go +++ b/backend/internal/service/ratelimit_service.go @@ -623,6 +623,10 @@ func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err) } } + // 同时清除模型级别限流 + if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil { + slog.Warn("clear_model_rate_limits_on_temp_unsched_reset_failed", "account_id", accountID, "error", err) + } return nil } diff --git a/frontend/src/components/admin/account/AccountActionMenu.vue b/frontend/src/components/admin/account/AccountActionMenu.vue index bb753faa..2325f4b4 100644 --- a/frontend/src/components/admin/account/AccountActionMenu.vue +++ b/frontend/src/components/admin/account/AccountActionMenu.vue @@ -53,7 +53,19 @@ import type { Account } from '@/types' const props = defineProps<{ show: boolean; account: Account | null; position: { top: number; left: number } | null }>() const emit = defineEmits(['close', 'test', 'stats', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit']) const { t } = useI18n() -const isRateLimited = computed(() => props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) +const isRateLimited = computed(() => { + if (props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) { + return true + } + const modelLimits = (props.account?.extra as Record | undefined)?.model_rate_limits as + | Record + | undefined + if (modelLimits) { + const now = new Date() + return Object.values(modelLimits).some(info => new Date(info.rate_limit_reset_at) > now) + } + return false +}) const isOverloaded = computed(() => props.account?.overload_until && new Date(props.account.overload_until) > new Date()) const handleKeydown = (event: KeyboardEvent) => { From e4bc35151f4feb41e8283fda1dd8e074a20289a0 Mon Sep 17 00:00:00 2001 From: Rose Ding Date: Mon, 9 Feb 2026 22:06:06 +0800 Subject: [PATCH 04/16] =?UTF-8?q?test:=20=E6=B7=BB=E5=8A=A0=E5=8D=95?= =?UTF-8?q?=E8=B4=A6=E5=8F=B7=20503=20=E9=80=80=E9=81=BF=E9=87=8D=E8=AF=95?= =?UTF-8?q?=E6=9C=BA=E5=88=B6=E7=9A=84=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 覆盖 Service 层和 Handler 层的所有新增逻辑: - isSingleAccountRetry context 标记检查 - handleSmartRetry 中 503 + SingleAccountRetry 分支 - handleSingleAccountRetryInPlace 原地重试逻辑 - antigravityRetryLoop 预检查跳过限流 - sleepAntigravitySingleAccountBackoff 固定延迟退避 - 端到端集成场景验证 Co-Authored-By: Claude Opus 4.6 --- ...teway_handler_single_account_retry_test.go | 51 + .../antigravity_single_account_retry_test.go | 904 ++++++++++++++++++ 2 files changed, 955 insertions(+) create mode 100644 backend/internal/handler/gateway_handler_single_account_retry_test.go create mode 100644 backend/internal/service/antigravity_single_account_retry_test.go diff --git a/backend/internal/handler/gateway_handler_single_account_retry_test.go b/backend/internal/handler/gateway_handler_single_account_retry_test.go new file mode 100644 index 00000000..96aa14c6 --- /dev/null +++ b/backend/internal/handler/gateway_handler_single_account_retry_test.go @@ -0,0 +1,51 @@ +package handler + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// sleepAntigravitySingleAccountBackoff 测试 +// --------------------------------------------------------------------------- + +func TestSleepAntigravitySingleAccountBackoff_ReturnsTrue(t *testing.T) { + ctx := context.Background() + start := time.Now() + ok := sleepAntigravitySingleAccountBackoff(ctx, 1) + elapsed := time.Since(start) + + require.True(t, ok, "should return true when context is not canceled") + // 固定延迟 2s + require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond, "should wait approximately 2s") + require.Less(t, elapsed, 5*time.Second, "should not wait too long") +} + +func TestSleepAntigravitySingleAccountBackoff_ContextCanceled(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // 立即取消 + + start := time.Now() + ok := sleepAntigravitySingleAccountBackoff(ctx, 1) + elapsed := time.Since(start) + + require.False(t, ok, "should return false when context is canceled") + require.Less(t, elapsed, 500*time.Millisecond, "should return immediately on cancel") +} + +func TestSleepAntigravitySingleAccountBackoff_FixedDelay(t *testing.T) { + // 验证不同 retryCount 都使用固定 2s 延迟 + ctx := context.Background() + + start := time.Now() + ok := sleepAntigravitySingleAccountBackoff(ctx, 5) + elapsed := time.Since(start) + + require.True(t, ok) + // 即使 retryCount=5,延迟仍然是固定的 2s + require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond) + require.Less(t, elapsed, 5*time.Second) +} diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go new file mode 100644 index 00000000..0950b728 --- /dev/null +++ b/backend/internal/service/antigravity_single_account_retry_test.go @@ -0,0 +1,904 @@ +//go:build unit + +package service + +import ( + "bytes" + "context" + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" + "github.com/stretchr/testify/require" +) + +// --------------------------------------------------------------------------- +// 辅助函数:构造带 SingleAccountRetry 标记的 context +// --------------------------------------------------------------------------- + +func ctxWithSingleAccountRetry() context.Context { + return context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true) +} + +// --------------------------------------------------------------------------- +// 1. isSingleAccountRetry 测试 +// --------------------------------------------------------------------------- + +func TestIsSingleAccountRetry_True(t *testing.T) { + ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true) + require.True(t, isSingleAccountRetry(ctx)) +} + +func TestIsSingleAccountRetry_False_NoValue(t *testing.T) { + require.False(t, isSingleAccountRetry(context.Background())) +} + +func TestIsSingleAccountRetry_False_ExplicitFalse(t *testing.T) { + ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, false) + require.False(t, isSingleAccountRetry(ctx)) +} + +func TestIsSingleAccountRetry_False_WrongType(t *testing.T) { + ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, "true") + require.False(t, isSingleAccountRetry(ctx)) +} + +// --------------------------------------------------------------------------- +// 2. 常量验证 +// --------------------------------------------------------------------------- + +func TestSingleAccountRetryConstants(t *testing.T) { + require.Equal(t, 3, antigravitySingleAccountSmartRetryMaxAttempts, + "单账号原地重试最多 3 次") + require.Equal(t, 15*time.Second, antigravitySingleAccountSmartRetryMaxWait, + "单次最大等待 15s") + require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait, + "总累计等待不超过 30s") + require.Equal(t, 30*time.Second, antigravitySingleAccountMaxWait, + "预检查最大等待 30s") +} + +// --------------------------------------------------------------------------- +// 3. handleSmartRetry + 503 + SingleAccountRetry → 走 handleSingleAccountRetryInPlace +// (而非设模型限流 + 切换账号) +// --------------------------------------------------------------------------- + +// TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace +// 核心场景:503 + retryDelay >= 7s + SingleAccountRetry 标记 +// → 不设模型限流、不切换账号,改为原地重试 +func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testing.T) { + // 原地重试成功 + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{successResp}, + errors: []error{nil}, + } + + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 1, + Name: "acc-single", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + // 503 + 39s >= 7s 阈值 + MODEL_CAPACITY_EXHAUSTED + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"} + ], + "message": "No capacity available for model gemini-3-pro-high on the server" + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), // 关键:设置单账号标记 + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 关键断言:返回 resp(原地重试成功),而非 switchError(切换账号) + require.NotNil(t, result.resp, "should return successful response from in-place retry") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + require.Nil(t, result.switchError, "should NOT return switchError in single account mode") + require.Nil(t, result.err) + + // 验证未设模型限流(单账号模式不应设限流) + require.Len(t, repo.modelRateLimitCalls, 0, + "should NOT set model rate limit in single account retry mode") + + // 验证确实调用了 upstream(原地重试) + require.GreaterOrEqual(t, len(upstream.calls), 1, "should have made at least one retry call") +} + +// TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches +// 对照组:503 + retryDelay >= 7s + 无 SingleAccountRetry 标记 +// → 照常设模型限流 + 切换账号 +func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) { + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 2, + Name: "acc-multi", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + // 503 + 39s >= 7s 阈值 + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"} + ] + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: context.Background(), // 关键:无单账号标记 + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 对照:多账号模式返回 switchError + require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503") + require.Nil(t, result.resp, "should not return resp when switchError is set") + + // 对照:多账号模式应设模型限流 + require.Len(t, repo.modelRateLimitCalls, 1, + "multi-account mode SHOULD set model rate limit") +} + +// TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches +// 边界情况:429(非 503)+ SingleAccountRetry 标记 +// → 单账号原地重试仅针对 503,429 依然走切换账号逻辑 +func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *testing.T) { + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 3, + Name: "acc-429", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + // 429 + 15s >= 7s 阈值 + respBody := []byte(`{ + "error": { + "status": "RESOURCE_EXHAUSTED", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"} + ] + } + }`) + resp := &http.Response{ + StatusCode: http.StatusTooManyRequests, // 429,不是 503 + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), // 有单账号标记 + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 429 即使有单账号标记,也应走切换账号 + require.NotNil(t, result.switchError, "429 should still return switchError even with SingleAccountRetry") + require.Len(t, repo.modelRateLimitCalls, 1, + "429 should still set model rate limit even with SingleAccountRetry") +} + +// --------------------------------------------------------------------------- +// 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流 +// --------------------------------------------------------------------------- + +// TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit +// 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503,不设限流 +func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) { + // 智能重试也返回 503 + failRespBody := `{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }` + failResp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(failRespBody)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{failResp}, + errors: []error{nil}, + } + + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 4, + Name: "acc-short-503", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + // 0.1s < 7s 阈值 + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 关键断言:单账号 503 模式下,智能重试耗尽后直接返回 503 响应,不切换 + require.NotNil(t, result.resp, "should return 503 response directly for single account mode") + require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode) + require.Nil(t, result.switchError, "should NOT switch account in single account mode") + + // 关键断言:不设模型限流 + require.Len(t, repo.modelRateLimitCalls, 0, + "should NOT set model rate limit for 503 in single account mode") +} + +// TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit +// 对照组:503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流 +func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) { + failRespBody := `{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }` + failResp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(failRespBody)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{failResp}, + errors: []error{nil}, + } + + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 5, + Name: "acc-multi-503", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: context.Background(), // 无单账号标记 + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 对照:多账号模式应返回 switchError + require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503") + // 对照:多账号模式应设模型限流 + require.Len(t, repo.modelRateLimitCalls, 1, + "multi-account mode should set model rate limit") +} + +// --------------------------------------------------------------------------- +// 5. handleSingleAccountRetryInPlace 直接测试 +// --------------------------------------------------------------------------- + +// TestHandleSingleAccountRetryInPlace_Success 原地重试成功 +func TestHandleSingleAccountRetryInPlace_Success(t *testing.T) { + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{successResp}, + errors: []error{nil}, + } + + account := &Account{ + ID: 10, + Name: "acc-inplace-ok", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro") + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.NotNil(t, result.resp, "should return successful response") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + require.Nil(t, result.switchError, "should not switch account on success") + require.Nil(t, result.err) +} + +// TestHandleSingleAccountRetryInPlace_AllRetriesFail 所有重试都失败,返回 503(不设限流) +func TestHandleSingleAccountRetryInPlace_AllRetriesFail(t *testing.T) { + // 构造 3 个 503 响应(对应 3 次原地重试) + var responses []*http.Response + var errors []error + for i := 0; i < antigravitySingleAccountSmartRetryMaxAttempts; i++ { + responses = append(responses, &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }`)), + }) + errors = append(errors, nil) + } + upstream := &mockSmartRetryUpstream{ + responses: responses, + errors: errors, + } + + account := &Account{ + ID: 11, + Name: "acc-inplace-fail", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + origBody := []byte(`{"error":{"code":503,"status":"UNAVAILABLE"}}`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{"X-Test": {"original"}}, + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSingleAccountRetryInPlace(params, resp, origBody, "https://ag-1.test", 1*time.Second, "gemini-3-pro") + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + // 关键:返回 503 resp,不返回 switchError + require.NotNil(t, result.resp, "should return 503 response directly") + require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode) + require.Nil(t, result.switchError, "should NOT return switchError - let Handler handle it") + require.Nil(t, result.err) + + // 验证确实重试了指定次数 + require.Len(t, upstream.calls, antigravitySingleAccountSmartRetryMaxAttempts, + "should have made exactly maxAttempts retry calls") +} + +// TestHandleSingleAccountRetryInPlace_WaitDurationClamped 等待时间被限制在 [min, max] 范围 +func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) { + // 用短延迟的成功响应,只验证不 panic + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{successResp}, + errors: []error{nil}, + } + + account := &Account{ + ID: 12, + Name: "acc-clamp", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + + // 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait + result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro") + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.NotNil(t, result.resp) + require.Equal(t, http.StatusOK, result.resp.StatusCode) +} + +// TestHandleSingleAccountRetryInPlace_ContextCanceled context 取消时立即返回 +func TestHandleSingleAccountRetryInPlace_ContextCanceled(t *testing.T) { + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{nil}, + errors: []error{nil}, + } + + account := &Account{ + ID: 13, + Name: "acc-cancel", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + } + + ctx, cancel := context.WithCancel(context.Background()) + ctx = context.WithValue(ctx, ctxkey.SingleAccountRetry, true) + cancel() // 立即取消 + + params := antigravityRetryLoopParams{ + ctx: ctx, + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro") + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.Error(t, result.err, "should return context error") + // 不应调用 upstream(因为在等待阶段就被取消了) + require.Len(t, upstream.calls, 0, "should not call upstream when context is canceled") +} + +// TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry 网络错误时继续重试 +func TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry(t *testing.T) { + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + upstream := &mockSmartRetryUpstream{ + // 第1次网络错误(nil resp),第2次成功 + responses: []*http.Response{nil, successResp}, + errors: []error{nil, nil}, + } + + account := &Account{ + ID: 14, + Name: "acc-net-retry", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro") + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.NotNil(t, result.resp, "should return successful response after network error recovery") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + require.Len(t, upstream.calls, 2, "first call fails (network error), second succeeds") +} + +// --------------------------------------------------------------------------- +// 6. antigravityRetryLoop 预检查:单账号模式跳过限流 +// --------------------------------------------------------------------------- + +// TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit +// 预检查中,如果有 SingleAccountRetry 标记,即使账号已限流也跳过直接发请求 +func TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit(t *testing.T) { + // 创建一个已设模型限流的账号 + upstream := &recordingOKUpstream{} + account := &Account{ + ID: 20, + Name: "acc-rate-limited", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Schedulable: true, + Status: StatusActive, + Concurrency: 1, + Extra: map[string]any{ + modelRateLimitsKey: map[string]any{ + "claude-sonnet-4-5": map[string]any{ + "rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339), + }, + }, + }, + } + + svc := &AntigravityGatewayService{} + result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + requestedModel: "claude-sonnet-4-5", + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + }) + + require.NoError(t, err, "should not return error") + require.NotNil(t, result, "should return result") + require.NotNil(t, result.resp, "should have response") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + // 关键:尽管限流了,有 SingleAccountRetry 标记时仍然到达了 upstream + require.Equal(t, 1, upstream.calls, "should have reached upstream despite rate limit") +} + +// TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit +// 对照组:无 SingleAccountRetry + 已限流 → 预检查返回 switchError +func TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit(t *testing.T) { + upstream := &recordingOKUpstream{} + account := &Account{ + ID: 21, + Name: "acc-rate-limited-multi", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Schedulable: true, + Status: StatusActive, + Concurrency: 1, + Extra: map[string]any{ + modelRateLimitsKey: map[string]any{ + "claude-sonnet-4-5": map[string]any{ + "rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339), + }, + }, + }, + } + + svc := &AntigravityGatewayService{} + result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{ + ctx: context.Background(), // 无单账号标记 + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + requestedModel: "claude-sonnet-4-5", + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + }) + + require.Nil(t, result, "should not return result on rate limit switch") + require.NotNil(t, err, "should return error") + + var switchErr *AntigravityAccountSwitchError + require.ErrorAs(t, err, &switchErr, "should return AntigravityAccountSwitchError") + require.Equal(t, account.ID, switchErr.OriginalAccountID) + require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel) + + // upstream 不应被调用(预检查就短路了) + require.Equal(t, 0, upstream.calls, "upstream should NOT be called when pre-check blocks") +} + +// --------------------------------------------------------------------------- +// 7. 端到端集成场景测试 +// --------------------------------------------------------------------------- + +// TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E +// 端到端场景:503 + 单账号 + 原地重试第2次成功 +func TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E(t *testing.T) { + // 第1次原地重试仍返回 503,第2次成功 + fail503Body := `{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"} + ] + } + }` + resp503 := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(fail503Body)), + } + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{resp503, successResp}, + errors: []error{nil, nil}, + } + + account := &Account{ + ID: 30, + Name: "acc-e2e", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Concurrency: 1, + } + + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + } + + params := antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro") + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.NotNil(t, result.resp, "should return successful response after 2nd attempt") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + require.Nil(t, result.switchError) + require.Len(t, upstream.calls, 2, "first 503, second OK") +} + +// TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E +// 通过 antigravityRetryLoop → handleSmartRetry → handleSingleAccountRetryInPlace 完整链路 +func TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E(t *testing.T) { + // 初始请求返回 503 + 长延迟 + initial503Body := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "10s"} + ], + "message": "No capacity available" + } + }`) + initial503Resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(initial503Body)), + } + + // 原地重试成功 + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)), + } + + upstream := &mockSmartRetryUpstream{ + // 第1次调用(retryLoop 主循环)返回 503 + // 第2次调用(handleSingleAccountRetryInPlace 原地重试)返回 200 + responses: []*http.Response{initial503Resp, successResp}, + errors: []error{nil, nil}, + } + + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 31, + Name: "acc-e2e-loop", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + Schedulable: true, + Status: StatusActive, + Concurrency: 1, + } + + svc := &AntigravityGatewayService{} + result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{ + ctx: ctxWithSingleAccountRetry(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + }) + + require.NoError(t, err, "should not return error on successful retry") + require.NotNil(t, result, "should return result") + require.NotNil(t, result.resp, "should return response") + require.Equal(t, http.StatusOK, result.resp.StatusCode) + + // 验证未设模型限流 + require.Len(t, repo.modelRateLimitCalls, 0, + "should NOT set model rate limit in single account retry mode") +} From 18b591bc3b0e38448d24413063ae9e0915402601 Mon Sep 17 00:00:00 2001 From: erio Date: Mon, 9 Feb 2026 22:13:44 +0800 Subject: [PATCH 05/16] feat: Antigravity extra failover retries after default retries exhausted When default failover retries are exhausted, continue retrying with Antigravity accounts only (up to 10 times, configurable via GATEWAY_ANTIGRAVITY_EXTRA_RETRIES). Each extra retry uses a fixed 500ms delay. Non-Antigravity accounts are skipped during the extra retry phase. Applied to all three endpoints: Gemini compat, Claude, and Gemini native API paths. --- backend/internal/config/config.go | 4 + backend/internal/handler/gateway_handler.go | 65 ++- .../gateway_handler_extra_retry_test.go | 417 ++++++++++++++++++ .../internal/handler/gemini_v1beta_handler.go | 26 +- 4 files changed, 504 insertions(+), 8 deletions(-) create mode 100644 backend/internal/handler/gateway_handler_extra_retry_test.go diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 91437ba8..460bd05d 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -279,6 +279,9 @@ type GatewayConfig struct { // Antigravity 429 fallback 限流时间(分钟),解析重置时间失败时使用 AntigravityFallbackCooldownMinutes int `mapstructure:"antigravity_fallback_cooldown_minutes"` + // 默认重试用完后,额外使用 Antigravity 账号重试的最大次数(0 表示禁用) + AntigravityExtraRetries int `mapstructure:"antigravity_extra_retries"` + // Scheduling: 账号调度相关配置 Scheduling GatewaySchedulingConfig `mapstructure:"scheduling"` @@ -883,6 +886,7 @@ func setDefaults() { viper.SetDefault("gateway.max_account_switches", 10) viper.SetDefault("gateway.max_account_switches_gemini", 3) viper.SetDefault("gateway.antigravity_fallback_cooldown_minutes", 1) + viper.SetDefault("gateway.antigravity_extra_retries", 10) viper.SetDefault("gateway.max_body_size", int64(100*1024*1024)) viper.SetDefault("gateway.connection_pool_isolation", ConnectionPoolIsolationAccountProxy) // HTTP 上游连接池配置(针对 5000+ 并发用户优化) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 6900fa55..361cd8b5 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -39,6 +39,7 @@ type GatewayHandler struct { concurrencyHelper *ConcurrencyHelper maxAccountSwitches int maxAccountSwitchesGemini int + antigravityExtraRetries int } // NewGatewayHandler creates a new GatewayHandler @@ -57,6 +58,7 @@ func NewGatewayHandler( pingInterval := time.Duration(0) maxAccountSwitches := 10 maxAccountSwitchesGemini := 3 + antigravityExtraRetries := 10 if cfg != nil { pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second if cfg.Gateway.MaxAccountSwitches > 0 { @@ -65,6 +67,7 @@ func NewGatewayHandler( if cfg.Gateway.MaxAccountSwitchesGemini > 0 { maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini } + antigravityExtraRetries = cfg.Gateway.AntigravityExtraRetries } return &GatewayHandler{ gatewayService: gatewayService, @@ -78,6 +81,7 @@ func NewGatewayHandler( concurrencyHelper: NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval), maxAccountSwitches: maxAccountSwitches, maxAccountSwitchesGemini: maxAccountSwitchesGemini, + antigravityExtraRetries: antigravityExtraRetries, } } @@ -234,6 +238,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { if platform == service.PlatformGemini { maxAccountSwitches := h.maxAccountSwitchesGemini switchCount := 0 + antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 @@ -255,6 +260,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) + // 额外重试阶段:跳过非 Antigravity 账号 + if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { + failedAccountIDs[account.ID] = struct{}{} + if selection.Acquired && selection.ReleaseFunc != nil { + selection.ReleaseFunc() + } + continue + } + // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient) @@ -345,8 +359,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { - h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted) - return + // 默认重试用完,进入 Antigravity 额外重试 + antigravityExtraCount++ + if antigravityExtraCount > h.antigravityExtraRetries { + h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted) + return + } + log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) + if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { + return + } + continue } switchCount++ log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) @@ -399,6 +422,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { for { maxAccountSwitches := h.maxAccountSwitches switchCount := 0 + antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) var lastFailoverErr *service.UpstreamFailoverError retryWithFallback := false @@ -422,6 +446,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) + // 额外重试阶段:跳过非 Antigravity 账号 + if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { + failedAccountIDs[account.ID] = struct{}{} + if selection.Acquired && selection.ReleaseFunc != nil { + selection.ReleaseFunc() + } + continue + } + // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient) @@ -545,8 +578,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { - h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted) - return + // 默认重试用完,进入 Antigravity 额外重试 + antigravityExtraCount++ + if antigravityExtraCount > h.antigravityExtraRetries { + h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted) + return + } + log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) + if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { + return + } + continue } switchCount++ log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) @@ -838,6 +880,21 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool { } } +const antigravityExtraRetryDelay = 500 * time.Millisecond + +// sleepFixedDelay 固定延时等待,返回 false 表示 context 已取消。 +func sleepFixedDelay(ctx context.Context, delay time.Duration) bool { + if delay <= 0 { + return true + } + select { + case <-ctx.Done(): + return false + case <-time.After(delay): + return true + } +} + func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) { statusCode := failoverErr.StatusCode responseBody := failoverErr.ResponseBody diff --git a/backend/internal/handler/gateway_handler_extra_retry_test.go b/backend/internal/handler/gateway_handler_extra_retry_test.go new file mode 100644 index 00000000..a0777941 --- /dev/null +++ b/backend/internal/handler/gateway_handler_extra_retry_test.go @@ -0,0 +1,417 @@ +//go:build unit + +package handler + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +// --- sleepFixedDelay --- + +func TestSleepFixedDelay_ZeroDelay(t *testing.T) { + got := sleepFixedDelay(context.Background(), 0) + require.True(t, got, "zero delay should return true immediately") +} + +func TestSleepFixedDelay_NegativeDelay(t *testing.T) { + got := sleepFixedDelay(context.Background(), -1*time.Second) + require.True(t, got, "negative delay should return true immediately") +} + +func TestSleepFixedDelay_NormalDelay(t *testing.T) { + start := time.Now() + got := sleepFixedDelay(context.Background(), 50*time.Millisecond) + elapsed := time.Since(start) + require.True(t, got, "normal delay should return true") + require.GreaterOrEqual(t, elapsed, 40*time.Millisecond, "should sleep at least ~50ms") +} + +func TestSleepFixedDelay_ContextCancelled(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + got := sleepFixedDelay(ctx, 10*time.Second) + require.False(t, got, "cancelled context should return false") +} + +func TestSleepFixedDelay_ContextTimeout(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + got := sleepFixedDelay(ctx, 5*time.Second) + require.False(t, got, "context timeout should return false before delay completes") +} + +// --- antigravityExtraRetryDelay constant --- + +func TestAntigravityExtraRetryDelayValue(t *testing.T) { + require.Equal(t, 500*time.Millisecond, antigravityExtraRetryDelay) +} + +// --- NewGatewayHandler antigravityExtraRetries field --- + +func TestNewGatewayHandler_AntigravityExtraRetries_Default(t *testing.T) { + h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil) + require.Equal(t, 10, h.antigravityExtraRetries, "default should be 10 when cfg is nil") +} + +func TestNewGatewayHandler_AntigravityExtraRetries_FromConfig(t *testing.T) { + cfg := &config.Config{ + Gateway: config.GatewayConfig{ + AntigravityExtraRetries: 5, + }, + } + h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg) + require.Equal(t, 5, h.antigravityExtraRetries, "should use config value") +} + +func TestNewGatewayHandler_AntigravityExtraRetries_ZeroDisables(t *testing.T) { + cfg := &config.Config{ + Gateway: config.GatewayConfig{ + AntigravityExtraRetries: 0, + }, + } + h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg) + require.Equal(t, 0, h.antigravityExtraRetries, "zero should disable extra retries") +} + +// --- handleFailoverAllAccountsExhausted (renamed: using handleFailoverExhausted) --- +// We test the error response format helpers that the extra retry path uses. + +func TestHandleFailoverExhausted_JSON(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + h := &GatewayHandler{} + failoverErr := &service.UpstreamFailoverError{StatusCode: 429} + h.handleFailoverExhausted(c, failoverErr, service.PlatformAntigravity, false) + + require.Equal(t, http.StatusTooManyRequests, rec.Code) + + var body map[string]any + err := json.Unmarshal(rec.Body.Bytes(), &body) + require.NoError(t, err) + errObj, ok := body["error"].(map[string]any) + require.True(t, ok) + require.Equal(t, "rate_limit_error", errObj["type"]) +} + +func TestHandleFailoverExhaustedSimple_JSON(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + h := &GatewayHandler{} + h.handleFailoverExhaustedSimple(c, 502, false) + + require.Equal(t, http.StatusBadGateway, rec.Code) + + var body map[string]any + err := json.Unmarshal(rec.Body.Bytes(), &body) + require.NoError(t, err) + errObj, ok := body["error"].(map[string]any) + require.True(t, ok) + require.Equal(t, "upstream_error", errObj["type"]) +} + +// --- Extra retry platform filter logic --- + +func TestExtraRetryPlatformFilter(t *testing.T) { + tests := []struct { + name string + switchCount int + maxAccountSwitch int + platform string + expectSkip bool + }{ + { + name: "default_retry_phase_antigravity_not_skipped", + switchCount: 1, + maxAccountSwitch: 3, + platform: service.PlatformAntigravity, + expectSkip: false, + }, + { + name: "default_retry_phase_gemini_not_skipped", + switchCount: 1, + maxAccountSwitch: 3, + platform: service.PlatformGemini, + expectSkip: false, + }, + { + name: "extra_retry_phase_antigravity_not_skipped", + switchCount: 3, + maxAccountSwitch: 3, + platform: service.PlatformAntigravity, + expectSkip: false, + }, + { + name: "extra_retry_phase_gemini_skipped", + switchCount: 3, + maxAccountSwitch: 3, + platform: service.PlatformGemini, + expectSkip: true, + }, + { + name: "extra_retry_phase_anthropic_skipped", + switchCount: 3, + maxAccountSwitch: 3, + platform: service.PlatformAnthropic, + expectSkip: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Replicate the filter condition from the handler + shouldSkip := tt.switchCount >= tt.maxAccountSwitch && tt.platform != service.PlatformAntigravity + require.Equal(t, tt.expectSkip, shouldSkip) + }) + } +} + +// --- Extra retry counter logic --- + +func TestExtraRetryCounterExhaustion(t *testing.T) { + tests := []struct { + name string + maxExtraRetries int + currentExtraCount int + expectExhausted bool + }{ + { + name: "first_extra_retry", + maxExtraRetries: 10, + currentExtraCount: 1, + expectExhausted: false, + }, + { + name: "at_limit", + maxExtraRetries: 10, + currentExtraCount: 10, + expectExhausted: false, + }, + { + name: "exceeds_limit", + maxExtraRetries: 10, + currentExtraCount: 11, + expectExhausted: true, + }, + { + name: "zero_disables_extra_retry", + maxExtraRetries: 0, + currentExtraCount: 1, + expectExhausted: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Replicate the exhaustion condition: antigravityExtraCount > h.antigravityExtraRetries + exhausted := tt.currentExtraCount > tt.maxExtraRetries + require.Equal(t, tt.expectExhausted, exhausted) + }) + } +} + +// --- mapUpstreamError (used by handleFailoverExhausted) --- + +func TestMapUpstreamError(t *testing.T) { + h := &GatewayHandler{} + tests := []struct { + name string + statusCode int + expectedStatus int + expectedType string + }{ + {"429", 429, http.StatusTooManyRequests, "rate_limit_error"}, + {"529", 529, http.StatusServiceUnavailable, "overloaded_error"}, + {"500", 500, http.StatusBadGateway, "upstream_error"}, + {"502", 502, http.StatusBadGateway, "upstream_error"}, + {"503", 503, http.StatusBadGateway, "upstream_error"}, + {"504", 504, http.StatusBadGateway, "upstream_error"}, + {"401", 401, http.StatusBadGateway, "upstream_error"}, + {"403", 403, http.StatusBadGateway, "upstream_error"}, + {"unknown", 418, http.StatusBadGateway, "upstream_error"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status, errType, _ := h.mapUpstreamError(tt.statusCode) + require.Equal(t, tt.expectedStatus, status) + require.Equal(t, tt.expectedType, errType) + }) + } +} + +// --- Gemini native path: handleGeminiFailoverExhausted --- + +func TestHandleGeminiFailoverExhausted_NilError(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + h := &GatewayHandler{} + h.handleGeminiFailoverExhausted(c, nil) + + require.Equal(t, http.StatusBadGateway, rec.Code) + var body map[string]any + err := json.Unmarshal(rec.Body.Bytes(), &body) + require.NoError(t, err) + errObj, ok := body["error"].(map[string]any) + require.True(t, ok) + require.Equal(t, "Upstream request failed", errObj["message"]) +} + +func TestHandleGeminiFailoverExhausted_429(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + h := &GatewayHandler{} + failoverErr := &service.UpstreamFailoverError{StatusCode: 429} + h.handleGeminiFailoverExhausted(c, failoverErr) + + require.Equal(t, http.StatusTooManyRequests, rec.Code) +} + +// --- handleStreamingAwareError streaming mode --- + +func TestHandleStreamingAwareError_StreamStarted(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + // Simulate stream already started: set content type and write initial data + c.Writer.Header().Set("Content-Type", "text/event-stream") + c.Writer.WriteHeaderNow() + + h := &GatewayHandler{} + h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "test error", true) + + body := rec.Body.String() + require.Contains(t, body, "rate_limit_error") + require.Contains(t, body, "test error") + require.Contains(t, body, "data: ") +} + +func TestHandleStreamingAwareError_NotStreaming(t *testing.T) { + gin.SetMode(gin.TestMode) + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + + h := &GatewayHandler{} + h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "no model", false) + + require.Equal(t, http.StatusServiceUnavailable, rec.Code) + var body map[string]any + err := json.Unmarshal(rec.Body.Bytes(), &body) + require.NoError(t, err) + errObj, ok := body["error"].(map[string]any) + require.True(t, ok) + require.Equal(t, "api_error", errObj["type"]) + require.Equal(t, "no model", errObj["message"]) +} + +// --- Integration: extra retry flow simulation --- + +func TestExtraRetryFlowSimulation(t *testing.T) { + // Simulate the full extra retry flow logic + maxAccountSwitches := 3 + maxExtraRetries := 2 + switchCount := 0 + antigravityExtraCount := 0 + + type attempt struct { + platform string + isFailover bool + } + + // Simulate: 3 default retries (all fail), then 2 extra retries (all fail), then exhausted + attempts := []attempt{ + {service.PlatformAntigravity, true}, // switchCount 0 -> 1 + {service.PlatformGemini, true}, // switchCount 1 -> 2 + {service.PlatformAntigravity, true}, // switchCount 2 -> 3 (reaches max) + {service.PlatformAntigravity, true}, // extra retry 1 + {service.PlatformAntigravity, true}, // extra retry 2 + {service.PlatformAntigravity, true}, // extra retry 3 -> exhausted + } + + var exhausted bool + var skipped int + + for _, a := range attempts { + if exhausted { + break + } + + // Extra retry phase: skip non-Antigravity + if switchCount >= maxAccountSwitches && a.platform != service.PlatformAntigravity { + skipped++ + continue + } + + if a.isFailover { + if switchCount >= maxAccountSwitches { + antigravityExtraCount++ + if antigravityExtraCount > maxExtraRetries { + exhausted = true + continue + } + // extra retry delay + continue + continue + } + switchCount++ + } + } + + require.Equal(t, 3, switchCount, "should have 3 default retries") + require.Equal(t, 3, antigravityExtraCount, "counter incremented 3 times") + require.True(t, exhausted, "should be exhausted after exceeding max extra retries") + require.Equal(t, 0, skipped, "no non-antigravity accounts in this simulation") +} + +func TestExtraRetryFlowSimulation_SkipsNonAntigravity(t *testing.T) { + maxAccountSwitches := 2 + switchCount := 2 // already past default retries + antigravityExtraCount := 0 + maxExtraRetries := 5 + + type accountSelection struct { + platform string + } + + selections := []accountSelection{ + {service.PlatformGemini}, // should be skipped + {service.PlatformAnthropic}, // should be skipped + {service.PlatformAntigravity}, // should be attempted + } + + var skippedCount int + var attemptedCount int + + for _, sel := range selections { + if switchCount >= maxAccountSwitches && sel.platform != service.PlatformAntigravity { + skippedCount++ + continue + } + // Simulate failover + antigravityExtraCount++ + if antigravityExtraCount > maxExtraRetries { + break + } + attemptedCount++ + } + + require.Equal(t, 2, skippedCount, "gemini and anthropic accounts should be skipped") + require.Equal(t, 1, attemptedCount, "only antigravity account should be attempted") + require.Equal(t, 1, antigravityExtraCount) +} diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index d5149f22..5a576ab0 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -323,6 +323,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { maxAccountSwitches := h.maxAccountSwitchesGemini switchCount := 0 + antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 @@ -340,6 +341,15 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) + // 额外重试阶段:跳过非 Antigravity 账号 + if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { + failedAccountIDs[account.ID] = struct{}{} + if selection.Acquired && selection.ReleaseFunc != nil { + selection.ReleaseFunc() + } + continue + } + // 检测账号切换:如果粘性会话绑定的账号与当前选择的账号不同,清除 thoughtSignature // 注意:Gemini 原生 API 的 thoughtSignature 与具体上游账号强相关;跨账号透传会导致 400。 if sessionBoundAccountID > 0 && sessionBoundAccountID != account.ID { @@ -424,15 +434,23 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { var failoverErr *service.UpstreamFailoverError if errors.As(err, &failoverErr) { failedAccountIDs[account.ID] = struct{}{} + lastFailoverErr = failoverErr if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { - lastFailoverErr = failoverErr - h.handleGeminiFailoverExhausted(c, lastFailoverErr) - return + // 默认重试用完,进入 Antigravity 额外重试 + antigravityExtraCount++ + if antigravityExtraCount > h.antigravityExtraRetries { + h.handleGeminiFailoverExhausted(c, failoverErr) + return + } + log.Printf("Gemini account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) + if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { + return + } + continue } - lastFailoverErr = failoverErr switchCount++ log.Printf("Gemini account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) if account.Platform == service.PlatformAntigravity { From aa4b102108a4e7f1ecc19f4d3179c13393e4be31 Mon Sep 17 00:00:00 2001 From: shaw Date: Mon, 9 Feb 2026 22:04:19 +0800 Subject: [PATCH 06/16] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4Antigravity?= =?UTF-8?q?=E7=9A=84apikey=E8=B4=A6=E6=88=B7=E9=A2=9D=E5=A4=96=E7=9A=84?= =?UTF-8?q?=E8=A1=A8=E5=8D=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/components/account/CreateAccountModal.vue | 12 ++++++------ .../src/components/account/EditAccountModal.vue | 14 +++++++++----- frontend/src/i18n/locales/en.ts | 3 ++- frontend/src/i18n/locales/zh.ts | 3 ++- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue index 8b4d4c06..f09df7b7 100644 --- a/frontend/src/components/account/CreateAccountModal.vue +++ b/frontend/src/components/account/CreateAccountModal.vue @@ -665,8 +665,8 @@
- {{ t('admin.accounts.types.upstream') }} - {{ t('admin.accounts.types.upstreamDesc') }} + API Key + {{ t('admin.accounts.types.antigravityApikey') }}
@@ -681,7 +681,7 @@ type="text" required class="input" - placeholder="https://s.konstants.xyz" + placeholder="https://cloudcode-pa.googleapis.com" />

{{ t('admin.accounts.upstream.baseUrlHint') }}

@@ -816,8 +816,8 @@ - -
+ +
{{ t('admin.accounts.gemini.tier.aiStudioHint') }}

- +
diff --git a/frontend/src/components/account/EditAccountModal.vue b/frontend/src/components/account/EditAccountModal.vue index 986bd297..60575f56 100644 --- a/frontend/src/components/account/EditAccountModal.vue +++ b/frontend/src/components/account/EditAccountModal.vue @@ -39,7 +39,9 @@ ? 'https://api.openai.com' : account.platform === 'gemini' ? 'https://generativelanguage.googleapis.com' - : 'https://api.anthropic.com' + : account.platform === 'antigravity' + ? 'https://cloudcode-pa.googleapis.com' + : 'https://api.anthropic.com' " />

{{ baseUrlHint }}

@@ -55,14 +57,16 @@ ? 'sk-proj-...' : account.platform === 'gemini' ? 'AIza...' - : 'sk-ant-...' + : account.platform === 'antigravity' + ? 'sk-...' + : 'sk-ant-...' " />

{{ t('admin.accounts.leaveEmptyToKeep') }}

- -
+ +
@@ -372,7 +376,7 @@ v-model="editBaseUrl" type="text" class="input" - placeholder="https://s.konstants.xyz" + placeholder="https://cloudcode-pa.googleapis.com" />

{{ t('admin.accounts.upstream.baseUrlHint') }}

diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index a2d42cb1..dc53e697 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -1359,6 +1359,7 @@ export default { googleOauth: 'Google OAuth', codeAssist: 'Code Assist', antigravityOauth: 'Antigravity OAuth', + antigravityApikey: 'Connect via Base URL + API Key', upstream: 'Upstream', upstreamDesc: 'Connect via Base URL + API Key' }, @@ -1625,7 +1626,7 @@ export default { // Upstream type upstream: { baseUrl: 'Upstream Base URL', - baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://s.konstants.xyz', + baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://cloudcode-pa.googleapis.com', apiKey: 'Upstream API Key', apiKeyHint: 'API Key for the upstream service', pleaseEnterBaseUrl: 'Please enter upstream Base URL', diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index 6d49e169..728d7744 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -1493,6 +1493,7 @@ export default { googleOauth: 'Google OAuth', codeAssist: 'Code Assist', antigravityOauth: 'Antigravity OAuth', + antigravityApikey: '通过 Base URL + API Key 连接', upstream: '对接上游', upstreamDesc: '通过 Base URL + API Key 连接上游', api_key: 'API Key', @@ -1771,7 +1772,7 @@ export default { // Upstream type upstream: { baseUrl: '上游 Base URL', - baseUrlHint: '上游 Antigravity 服务的地址,例如:https://s.konstants.xyz', + baseUrlHint: '上游 Antigravity 服务的地址,例如:https://cloudcode-pa.googleapis.com', apiKey: '上游 API Key', apiKeyHint: '上游服务的 API Key', pleaseEnterBaseUrl: '请输入上游 Base URL', From 345f853b5d8f34273a43bb80e69b87162edd57b2 Mon Sep 17 00:00:00 2001 From: erio Date: Mon, 9 Feb 2026 22:27:47 +0800 Subject: [PATCH 07/16] chore: bump version to 0.1.77.1 --- backend/cmd/server/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION index 508699ff..af6111e5 100644 --- a/backend/cmd/server/VERSION +++ b/backend/cmd/server/VERSION @@ -1 +1 @@ -0.1.76.4 +0.1.77.1 From 4c1fd570f02430c648e97b4217d602000b6aec89 Mon Sep 17 00:00:00 2001 From: Edric Li Date: Mon, 9 Feb 2026 22:22:19 +0800 Subject: [PATCH 08/16] feat: failover and temp-unschedule on Google "Invalid project resource name" 400 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Google 后端间歇性返回 400 "Invalid project resource name" 错误, 此前该错误直接透传给客户端且不触发账号切换,导致请求失败。 - 在 Antigravity 和 Gemini 两个平台的所有转发路径中, 精确匹配该错误消息后触发 failover 自动换号重试 - 命中后将账号临时封禁 1 小时,避免反复调度到同一故障账号 - 提取共享函数 isGoogleProjectConfigError / tempUnscheduleGoogleConfigError 消除跨 Service 的代码重复 --- .../service/antigravity_gateway_service.go | 62 +++++++++++++++++++ .../service/gemini_messages_compat_service.go | 61 ++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 81a1c149..71dee705 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -1285,6 +1285,28 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession) + // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + if resp.StatusCode == http.StatusBadRequest { + msg := strings.ToLower(strings.TrimSpace(extractAntigravityErrorMessage(respBody))) + if isGoogleProjectConfigError(msg) { + upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractAntigravityErrorMessage(respBody))) + upstreamDetail := s.getUpstreamErrorDetail(respBody) + log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID) + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: resp.Header.Get("x-request-id"), + Kind: "failover", + Message: upstreamMsg, + Detail: upstreamDetail, + }) + tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix) + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + } + } + if s.shouldFailoverUpstreamError(resp.StatusCode) { upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody)) upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg) @@ -1825,6 +1847,23 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co // Always record upstream context for Ops error logs, even when we will failover. setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, upstreamDetail) + // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + if resp.StatusCode == http.StatusBadRequest && isGoogleProjectConfigError(strings.ToLower(upstreamMsg)) { + log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID) + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: requestID, + Kind: "failover", + Message: upstreamMsg, + Detail: upstreamDetail, + }) + tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix) + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps} + } + if s.shouldFailoverUpstreamError(resp.StatusCode) { appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ Platform: account.Platform, @@ -1920,6 +1959,29 @@ func (s *AntigravityGatewayService) shouldFailoverUpstreamError(statusCode int) } } +// isGoogleProjectConfigError 判断(已提取的小写)错误消息是否属于 Google 服务端配置类问题。 +// 只精确匹配已知的服务端侧错误,避免对客户端请求错误做无意义重试。 +// 适用于所有走 Google 后端的平台(Antigravity、Gemini)。 +func isGoogleProjectConfigError(lowerMsg string) bool { + // Google 间歇性 Bug:Project ID 有效但被临时识别失败 + return strings.Contains(lowerMsg, "invalid project resource name") +} + +// googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长 +const googleConfigErrorCooldown = 60 * time.Minute + +// tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁, +// 避免短时间内反复调度到同一个有问题的账号。 +func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) { + until := time.Now().Add(googleConfigErrorCooldown) + reason := "400: invalid project resource name (auto temp-unschedule 1h)" + if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil { + log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err) + } else { + log.Printf("%s temp_unscheduled account=%d until=%v reason=%q", logPrefix, accountID, until.Format("15:04:05"), reason) + } +} + // sleepAntigravityBackoffWithContext 带 context 取消检查的退避等待 // 返回 true 表示正常完成等待,false 表示 context 已取消 func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool { diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go index 792c8f4b..1e59c5fd 100644 --- a/backend/internal/service/gemini_messages_compat_service.go +++ b/backend/internal/service/gemini_messages_compat_service.go @@ -880,6 +880,38 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex // ErrorPolicyNone → 原有逻辑 s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) + // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + if resp.StatusCode == http.StatusBadRequest { + msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody))) + if isGoogleProjectConfigError(msg400) { + upstreamReqID := resp.Header.Get(requestIDHeader) + if upstreamReqID == "" { + upstreamReqID = resp.Header.Get("x-goog-request-id") + } + upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody))) + upstreamDetail := "" + if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody { + maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes + if maxBytes <= 0 { + maxBytes = 2048 + } + upstreamDetail = truncateString(string(respBody), maxBytes) + } + log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID) + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: upstreamReqID, + Kind: "failover", + Message: upstreamMsg, + Detail: upstreamDetail, + }) + tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]") + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + } + } if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { upstreamReqID := resp.Header.Get(requestIDHeader) if upstreamReqID == "" { @@ -1330,6 +1362,35 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin. // ErrorPolicyNone → 原有逻辑 s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) + // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + if resp.StatusCode == http.StatusBadRequest { + msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody))) + if isGoogleProjectConfigError(msg400) { + evBody := unwrapIfNeeded(isOAuth, respBody) + upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(evBody))) + upstreamDetail := "" + if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody { + maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes + if maxBytes <= 0 { + maxBytes = 2048 + } + upstreamDetail = truncateString(string(evBody), maxBytes) + } + log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID) + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: requestID, + Kind: "failover", + Message: upstreamMsg, + Detail: upstreamDetail, + }) + tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]") + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody} + } + } if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { evBody := unwrapIfNeeded(isOAuth, respBody) upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(evBody)) From 425dfb80d95bc4121c42664fc2931b3633f16216 Mon Sep 17 00:00:00 2001 From: Edric Li Date: Mon, 9 Feb 2026 23:25:30 +0800 Subject: [PATCH 09/16] feat: failover and temp-unschedule on empty stream response - Empty stream responses now return UpstreamFailoverError instead of plain 502, triggering automatic account switching (up to 10 retries) - Add tempUnscheduleEmptyResponse: accounts returning empty responses are temp-unscheduled for 30 minutes - Apply to both Claude and Gemini non-streaming paths - Align googleConfigErrorCooldown from 60m to 30m for consistency --- .../service/antigravity_gateway_service.go | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 71dee705..a5fd1535 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -1351,6 +1351,10 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, streamRes, err := s.handleClaudeStreamToNonStreaming(c, resp, startTime, originalModel) if err != nil { log.Printf("%s status=stream_collect_error error=%v", prefix, err) + var failoverErr *UpstreamFailoverError + if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway { + tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix) + } return nil, err } usage = streamRes.usage @@ -1920,6 +1924,10 @@ handleSuccess: streamRes, err := s.handleGeminiStreamToNonStreaming(c, resp, startTime) if err != nil { log.Printf("%s status=stream_collect_error error=%v", prefix, err) + var failoverErr *UpstreamFailoverError + if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway { + tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix) + } return nil, err } usage = streamRes.usage @@ -1968,13 +1976,28 @@ func isGoogleProjectConfigError(lowerMsg string) bool { } // googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长 -const googleConfigErrorCooldown = 60 * time.Minute +const googleConfigErrorCooldown = 30 * time.Minute // tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁, // 避免短时间内反复调度到同一个有问题的账号。 func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) { until := time.Now().Add(googleConfigErrorCooldown) - reason := "400: invalid project resource name (auto temp-unschedule 1h)" + reason := "400: invalid project resource name (auto temp-unschedule 30m)" + if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil { + log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err) + } else { + log.Printf("%s temp_unscheduled account=%d until=%v reason=%q", logPrefix, accountID, until.Format("15:04:05"), reason) + } +} + +// emptyResponseCooldown 空流式响应的临时封禁时长 +const emptyResponseCooldown = 30 * time.Minute + +// tempUnscheduleEmptyResponse 对空流式响应触发临时封禁, +// 避免短时间内反复调度到同一个返回空响应的账号。 +func tempUnscheduleEmptyResponse(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) { + until := time.Now().Add(emptyResponseCooldown) + reason := "empty stream response (auto temp-unschedule 30m)" if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil { log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err) } else { @@ -2786,9 +2809,13 @@ returnResponse: // 选择最后一个有效响应 finalResponse := pickGeminiCollectResult(last, lastWithParts) - // 处理空响应情况 + // 处理空响应情况 — 触发 failover 切换账号重试 if last == nil && lastWithParts == nil { - log.Printf("[antigravity-Forward] warning: empty stream response, no valid chunks received") + log.Printf("[antigravity-Forward] warning: empty stream response (gemini non-stream), triggering failover") + return nil, &UpstreamFailoverError{ + StatusCode: http.StatusBadGateway, + ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + } } // 如果收集到了图片 parts,需要合并到最终响应中 @@ -3201,10 +3228,13 @@ returnResponse: // 选择最后一个有效响应 finalResponse := pickGeminiCollectResult(last, lastWithParts) - // 处理空响应情况 + // 处理空响应情况 — 触发 failover 切换账号重试 if last == nil && lastWithParts == nil { - log.Printf("[antigravity-Forward] warning: empty stream response, no valid chunks received") - return nil, s.writeClaudeError(c, http.StatusBadGateway, "upstream_error", "Empty response from upstream") + log.Printf("[antigravity-Forward] warning: empty stream response (claude non-stream), triggering failover") + return nil, &UpstreamFailoverError{ + StatusCode: http.StatusBadGateway, + ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + } } // 将收集的所有 parts 合并到最终响应中 From 6328e694417662ca6a25500655095a88de4249cf Mon Sep 17 00:00:00 2001 From: Edric Li Date: Tue, 10 Feb 2026 00:53:54 +0800 Subject: [PATCH 10/16] feat: same-account retry before failover for transient errors For retryable transient errors (Google 400 "invalid project resource name" and empty stream responses), retry on the same account up to 2 times (with 500ms delay) before switching to another account. - Add RetryableOnSameAccount field to UpstreamFailoverError - Add same-account retry loop in both Gemini and Claude/OpenAI handler paths - Move temp-unschedule from service layer to handler layer (only after all same-account retries exhausted) - Reduce temp-unschedule cooldown from 30 minutes to 1 minute --- backend/internal/handler/gateway_handler.go | 57 ++++++++++++++++++- .../service/antigravity_gateway_service.go | 40 ++++++------- backend/internal/service/gateway_service.go | 21 ++++++- .../service/gemini_messages_compat_service.go | 6 +- 4 files changed, 91 insertions(+), 33 deletions(-) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 361cd8b5..3003b5ae 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -240,6 +240,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { switchCount := 0 antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) + sameAccountRetryCount := make(map[int64]int) // 同账号重试计数 var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 @@ -353,11 +354,28 @@ func (h *GatewayHandler) Messages(c *gin.Context) { if err != nil { var failoverErr *service.UpstreamFailoverError if errors.As(err, &failoverErr) { - failedAccountIDs[account.ID] = struct{}{} lastFailoverErr = failoverErr if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } + + // 同账号重试:对 RetryableOnSameAccount 的临时性错误,先在同一账号上重试 + if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries { + sameAccountRetryCount[account.ID]++ + log.Printf("Account %d: retryable error %d, same-account retry %d/%d", + account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries) + if !sleepSameAccountRetryDelay(c.Request.Context()) { + return + } + continue + } + + // 同账号重试用尽,执行临时封禁并切换账号 + if failoverErr.RetryableOnSameAccount { + h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr) + } + + failedAccountIDs[account.ID] = struct{}{} if switchCount >= maxAccountSwitches { // 默认重试用完,进入 Antigravity 额外重试 antigravityExtraCount++ @@ -424,6 +442,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { switchCount := 0 antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) + sameAccountRetryCount := make(map[int64]int) // 同账号重试计数 var lastFailoverErr *service.UpstreamFailoverError retryWithFallback := false var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 @@ -572,11 +591,28 @@ func (h *GatewayHandler) Messages(c *gin.Context) { } var failoverErr *service.UpstreamFailoverError if errors.As(err, &failoverErr) { - failedAccountIDs[account.ID] = struct{}{} lastFailoverErr = failoverErr if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } + + // 同账号重试:对 RetryableOnSameAccount 的临时性错误,先在同一账号上重试 + if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries { + sameAccountRetryCount[account.ID]++ + log.Printf("Account %d: retryable error %d, same-account retry %d/%d", + account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries) + if !sleepSameAccountRetryDelay(c.Request.Context()) { + return + } + continue + } + + // 同账号重试用尽,执行临时封禁并切换账号 + if failoverErr.RetryableOnSameAccount { + h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr) + } + + failedAccountIDs[account.ID] = struct{}{} if switchCount >= maxAccountSwitches { // 默认重试用完,进入 Antigravity 额外重试 antigravityExtraCount++ @@ -865,6 +901,23 @@ func needForceCacheBilling(hasBoundSession bool, failoverErr *service.UpstreamFa return hasBoundSession || (failoverErr != nil && failoverErr.ForceCacheBilling) } +const ( + // maxSameAccountRetries 同账号重试次数上限(针对 RetryableOnSameAccount 错误) + maxSameAccountRetries = 2 + // sameAccountRetryDelay 同账号重试间隔 + sameAccountRetryDelay = 500 * time.Millisecond +) + +// sleepSameAccountRetryDelay 同账号重试固定延时,返回 false 表示 context 已取消。 +func sleepSameAccountRetryDelay(ctx context.Context) bool { + select { + case <-ctx.Done(): + return false + case <-time.After(sameAccountRetryDelay): + return true + } +} + // sleepFailoverDelay 账号切换线性递增延时:第1次0s、第2次1s、第3次2s… // 返回 false 表示 context 已取消。 func sleepFailoverDelay(ctx context.Context, switchCount int) bool { diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index a5fd1535..9c2b9027 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -1285,7 +1285,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession) - // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + // 精确匹配服务端配置类 400 错误,触发同账号重试 + failover if resp.StatusCode == http.StatusBadRequest { msg := strings.ToLower(strings.TrimSpace(extractAntigravityErrorMessage(respBody))) if isGoogleProjectConfigError(msg) { @@ -1302,8 +1302,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, Message: upstreamMsg, Detail: upstreamDetail, }) - tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix) - return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true} } } @@ -1351,10 +1350,6 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, streamRes, err := s.handleClaudeStreamToNonStreaming(c, resp, startTime, originalModel) if err != nil { log.Printf("%s status=stream_collect_error error=%v", prefix, err) - var failoverErr *UpstreamFailoverError - if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway { - tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix) - } return nil, err } usage = streamRes.usage @@ -1851,7 +1846,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co // Always record upstream context for Ops error logs, even when we will failover. setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, upstreamDetail) - // 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁 + // 精确匹配服务端配置类 400 错误,触发同账号重试 + failover if resp.StatusCode == http.StatusBadRequest && isGoogleProjectConfigError(strings.ToLower(upstreamMsg)) { log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID) appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ @@ -1864,8 +1859,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co Message: upstreamMsg, Detail: upstreamDetail, }) - tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix) - return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps} + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps, RetryableOnSameAccount: true} } if s.shouldFailoverUpstreamError(resp.StatusCode) { @@ -1924,10 +1918,6 @@ handleSuccess: streamRes, err := s.handleGeminiStreamToNonStreaming(c, resp, startTime) if err != nil { log.Printf("%s status=stream_collect_error error=%v", prefix, err) - var failoverErr *UpstreamFailoverError - if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway { - tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix) - } return nil, err } usage = streamRes.usage @@ -1976,13 +1966,13 @@ func isGoogleProjectConfigError(lowerMsg string) bool { } // googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长 -const googleConfigErrorCooldown = 30 * time.Minute +const googleConfigErrorCooldown = 1 * time.Minute // tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁, // 避免短时间内反复调度到同一个有问题的账号。 func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) { until := time.Now().Add(googleConfigErrorCooldown) - reason := "400: invalid project resource name (auto temp-unschedule 30m)" + reason := "400: invalid project resource name (auto temp-unschedule 1m)" if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil { log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err) } else { @@ -1991,13 +1981,13 @@ func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository } // emptyResponseCooldown 空流式响应的临时封禁时长 -const emptyResponseCooldown = 30 * time.Minute +const emptyResponseCooldown = 1 * time.Minute // tempUnscheduleEmptyResponse 对空流式响应触发临时封禁, // 避免短时间内反复调度到同一个返回空响应的账号。 func tempUnscheduleEmptyResponse(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) { until := time.Now().Add(emptyResponseCooldown) - reason := "empty stream response (auto temp-unschedule 30m)" + reason := "empty stream response (auto temp-unschedule 1m)" if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil { log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err) } else { @@ -2809,12 +2799,13 @@ returnResponse: // 选择最后一个有效响应 finalResponse := pickGeminiCollectResult(last, lastWithParts) - // 处理空响应情况 — 触发 failover 切换账号重试 + // 处理空响应情况 — 触发同账号重试 + failover 切换账号 if last == nil && lastWithParts == nil { log.Printf("[antigravity-Forward] warning: empty stream response (gemini non-stream), triggering failover") return nil, &UpstreamFailoverError{ - StatusCode: http.StatusBadGateway, - ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + StatusCode: http.StatusBadGateway, + ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + RetryableOnSameAccount: true, } } @@ -3228,12 +3219,13 @@ returnResponse: // 选择最后一个有效响应 finalResponse := pickGeminiCollectResult(last, lastWithParts) - // 处理空响应情况 — 触发 failover 切换账号重试 + // 处理空响应情况 — 触发同账号重试 + failover 切换账号 if last == nil && lastWithParts == nil { log.Printf("[antigravity-Forward] warning: empty stream response (claude non-stream), triggering failover") return nil, &UpstreamFailoverError{ - StatusCode: http.StatusBadGateway, - ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + StatusCode: http.StatusBadGateway, + ResponseBody: []byte(`{"error":"empty stream response from upstream"}`), + RetryableOnSameAccount: true, } } diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 4e723232..01e1acb4 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -362,15 +362,30 @@ type ForwardResult struct { // UpstreamFailoverError indicates an upstream error that should trigger account failover. type UpstreamFailoverError struct { - StatusCode int - ResponseBody []byte // 上游响应体,用于错误透传规则匹配 - ForceCacheBilling bool // Antigravity 粘性会话切换时设为 true + StatusCode int + ResponseBody []byte // 上游响应体,用于错误透传规则匹配 + ForceCacheBilling bool // Antigravity 粘性会话切换时设为 true + RetryableOnSameAccount bool // 临时性错误(如 Google 间歇性 400、空响应),应在同一账号上重试 N 次再切换 } func (e *UpstreamFailoverError) Error() string { return fmt.Sprintf("upstream error: %d (failover)", e.StatusCode) } +// TempUnscheduleRetryableError 对 RetryableOnSameAccount 类型的 failover 错误触发临时封禁。 +// 由 handler 层在同账号重试全部用尽、切换账号时调用。 +func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accountID int64, failoverErr *UpstreamFailoverError) { + if failoverErr == nil || !failoverErr.RetryableOnSameAccount { + return + } + // 根据状态码选择封禁策略 + if failoverErr.StatusCode == http.StatusBadRequest { + tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]") + } else if failoverErr.StatusCode == http.StatusBadGateway { + tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]") + } +} + // GatewayService handles API gateway operations type GatewayService struct { accountRepo AccountRepository diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go index 1e59c5fd..7fa375ca 100644 --- a/backend/internal/service/gemini_messages_compat_service.go +++ b/backend/internal/service/gemini_messages_compat_service.go @@ -908,8 +908,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex Message: upstreamMsg, Detail: upstreamDetail, }) - tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]") - return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true} } } if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { @@ -1387,8 +1386,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin. Message: upstreamMsg, Detail: upstreamDetail, }) - tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]") - return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody} + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody, RetryableOnSameAccount: true} } } if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { From 662625a09114a000cfd0897d322fc894b5a8097c Mon Sep 17 00:00:00 2001 From: erio Date: Tue, 10 Feb 2026 03:47:40 +0800 Subject: [PATCH 11/16] feat: optimize MODEL_CAPACITY_EXHAUSTED retry and remove extra failover retries - MODEL_CAPACITY_EXHAUSTED now uses independent retry strategy: - retryDelay < 20s: wait actual retryDelay then retry once - retryDelay >= 20s or missing: retry up to 5 times at 20s intervals - Still capacity exhausted after retries: switch account (failover) - Different error during retry (e.g. 429): handle by actual error code - No model rate limit set (capacity != rate limit) - Remove Antigravity extra failover retries feature: Same-account retry mechanism (cherry-picked) makes it redundant. Removed: antigravityExtraRetries config, sleepFixedDelay, skip-non-antigravity logic. --- backend/internal/config/config.go | 3 - backend/internal/handler/gateway_handler.go | 65 +-- .../gateway_handler_extra_retry_test.go | 417 ------------------ .../internal/handler/gemini_v1beta_handler.go | 23 +- .../service/antigravity_gateway_service.go | 151 ++++++- .../service/antigravity_rate_limit_test.go | 40 +- .../service/antigravity_smart_retry_test.go | 142 ++++-- 7 files changed, 282 insertions(+), 559 deletions(-) delete mode 100644 backend/internal/handler/gateway_handler_extra_retry_test.go diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 460bd05d..7b6b4a37 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -279,9 +279,6 @@ type GatewayConfig struct { // Antigravity 429 fallback 限流时间(分钟),解析重置时间失败时使用 AntigravityFallbackCooldownMinutes int `mapstructure:"antigravity_fallback_cooldown_minutes"` - // 默认重试用完后,额外使用 Antigravity 账号重试的最大次数(0 表示禁用) - AntigravityExtraRetries int `mapstructure:"antigravity_extra_retries"` - // Scheduling: 账号调度相关配置 Scheduling GatewaySchedulingConfig `mapstructure:"scheduling"` diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 3003b5ae..b5fb379e 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -39,7 +39,6 @@ type GatewayHandler struct { concurrencyHelper *ConcurrencyHelper maxAccountSwitches int maxAccountSwitchesGemini int - antigravityExtraRetries int } // NewGatewayHandler creates a new GatewayHandler @@ -58,7 +57,6 @@ func NewGatewayHandler( pingInterval := time.Duration(0) maxAccountSwitches := 10 maxAccountSwitchesGemini := 3 - antigravityExtraRetries := 10 if cfg != nil { pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second if cfg.Gateway.MaxAccountSwitches > 0 { @@ -67,7 +65,6 @@ func NewGatewayHandler( if cfg.Gateway.MaxAccountSwitchesGemini > 0 { maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini } - antigravityExtraRetries = cfg.Gateway.AntigravityExtraRetries } return &GatewayHandler{ gatewayService: gatewayService, @@ -81,7 +78,6 @@ func NewGatewayHandler( concurrencyHelper: NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval), maxAccountSwitches: maxAccountSwitches, maxAccountSwitchesGemini: maxAccountSwitchesGemini, - antigravityExtraRetries: antigravityExtraRetries, } } @@ -238,7 +234,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) { if platform == service.PlatformGemini { maxAccountSwitches := h.maxAccountSwitchesGemini switchCount := 0 - antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) sameAccountRetryCount := make(map[int64]int) // 同账号重试计数 var lastFailoverErr *service.UpstreamFailoverError @@ -261,15 +256,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) - // 额外重试阶段:跳过非 Antigravity 账号 - if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { - failedAccountIDs[account.ID] = struct{}{} - if selection.Acquired && selection.ReleaseFunc != nil { - selection.ReleaseFunc() - } - continue - } - // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient) @@ -377,17 +363,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) { failedAccountIDs[account.ID] = struct{}{} if switchCount >= maxAccountSwitches { - // 默认重试用完,进入 Antigravity 额外重试 - antigravityExtraCount++ - if antigravityExtraCount > h.antigravityExtraRetries { - h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted) - return - } - log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) - if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { - return - } - continue + h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted) + return } switchCount++ log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) @@ -440,7 +417,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) { for { maxAccountSwitches := h.maxAccountSwitches switchCount := 0 - antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) sameAccountRetryCount := make(map[int64]int) // 同账号重试计数 var lastFailoverErr *service.UpstreamFailoverError @@ -465,15 +441,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) - // 额外重试阶段:跳过非 Antigravity 账号 - if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { - failedAccountIDs[account.ID] = struct{}{} - if selection.Acquired && selection.ReleaseFunc != nil { - selection.ReleaseFunc() - } - continue - } - // 检查请求拦截(预热请求、SUGGESTION MODE等) if account.IsInterceptWarmupEnabled() { interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient) @@ -614,17 +581,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) { failedAccountIDs[account.ID] = struct{}{} if switchCount >= maxAccountSwitches { - // 默认重试用完,进入 Antigravity 额外重试 - antigravityExtraCount++ - if antigravityExtraCount > h.antigravityExtraRetries { - h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted) - return - } - log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) - if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { - return - } - continue + h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted) + return } switchCount++ log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) @@ -933,21 +891,6 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool { } } -const antigravityExtraRetryDelay = 500 * time.Millisecond - -// sleepFixedDelay 固定延时等待,返回 false 表示 context 已取消。 -func sleepFixedDelay(ctx context.Context, delay time.Duration) bool { - if delay <= 0 { - return true - } - select { - case <-ctx.Done(): - return false - case <-time.After(delay): - return true - } -} - func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) { statusCode := failoverErr.StatusCode responseBody := failoverErr.ResponseBody diff --git a/backend/internal/handler/gateway_handler_extra_retry_test.go b/backend/internal/handler/gateway_handler_extra_retry_test.go deleted file mode 100644 index a0777941..00000000 --- a/backend/internal/handler/gateway_handler_extra_retry_test.go +++ /dev/null @@ -1,417 +0,0 @@ -//go:build unit - -package handler - -import ( - "context" - "encoding/json" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/Wei-Shaw/sub2api/internal/config" - "github.com/Wei-Shaw/sub2api/internal/service" - "github.com/gin-gonic/gin" - "github.com/stretchr/testify/require" -) - -// --- sleepFixedDelay --- - -func TestSleepFixedDelay_ZeroDelay(t *testing.T) { - got := sleepFixedDelay(context.Background(), 0) - require.True(t, got, "zero delay should return true immediately") -} - -func TestSleepFixedDelay_NegativeDelay(t *testing.T) { - got := sleepFixedDelay(context.Background(), -1*time.Second) - require.True(t, got, "negative delay should return true immediately") -} - -func TestSleepFixedDelay_NormalDelay(t *testing.T) { - start := time.Now() - got := sleepFixedDelay(context.Background(), 50*time.Millisecond) - elapsed := time.Since(start) - require.True(t, got, "normal delay should return true") - require.GreaterOrEqual(t, elapsed, 40*time.Millisecond, "should sleep at least ~50ms") -} - -func TestSleepFixedDelay_ContextCancelled(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() // cancel immediately - got := sleepFixedDelay(ctx, 10*time.Second) - require.False(t, got, "cancelled context should return false") -} - -func TestSleepFixedDelay_ContextTimeout(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) - defer cancel() - got := sleepFixedDelay(ctx, 5*time.Second) - require.False(t, got, "context timeout should return false before delay completes") -} - -// --- antigravityExtraRetryDelay constant --- - -func TestAntigravityExtraRetryDelayValue(t *testing.T) { - require.Equal(t, 500*time.Millisecond, antigravityExtraRetryDelay) -} - -// --- NewGatewayHandler antigravityExtraRetries field --- - -func TestNewGatewayHandler_AntigravityExtraRetries_Default(t *testing.T) { - h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil) - require.Equal(t, 10, h.antigravityExtraRetries, "default should be 10 when cfg is nil") -} - -func TestNewGatewayHandler_AntigravityExtraRetries_FromConfig(t *testing.T) { - cfg := &config.Config{ - Gateway: config.GatewayConfig{ - AntigravityExtraRetries: 5, - }, - } - h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg) - require.Equal(t, 5, h.antigravityExtraRetries, "should use config value") -} - -func TestNewGatewayHandler_AntigravityExtraRetries_ZeroDisables(t *testing.T) { - cfg := &config.Config{ - Gateway: config.GatewayConfig{ - AntigravityExtraRetries: 0, - }, - } - h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg) - require.Equal(t, 0, h.antigravityExtraRetries, "zero should disable extra retries") -} - -// --- handleFailoverAllAccountsExhausted (renamed: using handleFailoverExhausted) --- -// We test the error response format helpers that the extra retry path uses. - -func TestHandleFailoverExhausted_JSON(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - h := &GatewayHandler{} - failoverErr := &service.UpstreamFailoverError{StatusCode: 429} - h.handleFailoverExhausted(c, failoverErr, service.PlatformAntigravity, false) - - require.Equal(t, http.StatusTooManyRequests, rec.Code) - - var body map[string]any - err := json.Unmarshal(rec.Body.Bytes(), &body) - require.NoError(t, err) - errObj, ok := body["error"].(map[string]any) - require.True(t, ok) - require.Equal(t, "rate_limit_error", errObj["type"]) -} - -func TestHandleFailoverExhaustedSimple_JSON(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - h := &GatewayHandler{} - h.handleFailoverExhaustedSimple(c, 502, false) - - require.Equal(t, http.StatusBadGateway, rec.Code) - - var body map[string]any - err := json.Unmarshal(rec.Body.Bytes(), &body) - require.NoError(t, err) - errObj, ok := body["error"].(map[string]any) - require.True(t, ok) - require.Equal(t, "upstream_error", errObj["type"]) -} - -// --- Extra retry platform filter logic --- - -func TestExtraRetryPlatformFilter(t *testing.T) { - tests := []struct { - name string - switchCount int - maxAccountSwitch int - platform string - expectSkip bool - }{ - { - name: "default_retry_phase_antigravity_not_skipped", - switchCount: 1, - maxAccountSwitch: 3, - platform: service.PlatformAntigravity, - expectSkip: false, - }, - { - name: "default_retry_phase_gemini_not_skipped", - switchCount: 1, - maxAccountSwitch: 3, - platform: service.PlatformGemini, - expectSkip: false, - }, - { - name: "extra_retry_phase_antigravity_not_skipped", - switchCount: 3, - maxAccountSwitch: 3, - platform: service.PlatformAntigravity, - expectSkip: false, - }, - { - name: "extra_retry_phase_gemini_skipped", - switchCount: 3, - maxAccountSwitch: 3, - platform: service.PlatformGemini, - expectSkip: true, - }, - { - name: "extra_retry_phase_anthropic_skipped", - switchCount: 3, - maxAccountSwitch: 3, - platform: service.PlatformAnthropic, - expectSkip: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Replicate the filter condition from the handler - shouldSkip := tt.switchCount >= tt.maxAccountSwitch && tt.platform != service.PlatformAntigravity - require.Equal(t, tt.expectSkip, shouldSkip) - }) - } -} - -// --- Extra retry counter logic --- - -func TestExtraRetryCounterExhaustion(t *testing.T) { - tests := []struct { - name string - maxExtraRetries int - currentExtraCount int - expectExhausted bool - }{ - { - name: "first_extra_retry", - maxExtraRetries: 10, - currentExtraCount: 1, - expectExhausted: false, - }, - { - name: "at_limit", - maxExtraRetries: 10, - currentExtraCount: 10, - expectExhausted: false, - }, - { - name: "exceeds_limit", - maxExtraRetries: 10, - currentExtraCount: 11, - expectExhausted: true, - }, - { - name: "zero_disables_extra_retry", - maxExtraRetries: 0, - currentExtraCount: 1, - expectExhausted: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Replicate the exhaustion condition: antigravityExtraCount > h.antigravityExtraRetries - exhausted := tt.currentExtraCount > tt.maxExtraRetries - require.Equal(t, tt.expectExhausted, exhausted) - }) - } -} - -// --- mapUpstreamError (used by handleFailoverExhausted) --- - -func TestMapUpstreamError(t *testing.T) { - h := &GatewayHandler{} - tests := []struct { - name string - statusCode int - expectedStatus int - expectedType string - }{ - {"429", 429, http.StatusTooManyRequests, "rate_limit_error"}, - {"529", 529, http.StatusServiceUnavailable, "overloaded_error"}, - {"500", 500, http.StatusBadGateway, "upstream_error"}, - {"502", 502, http.StatusBadGateway, "upstream_error"}, - {"503", 503, http.StatusBadGateway, "upstream_error"}, - {"504", 504, http.StatusBadGateway, "upstream_error"}, - {"401", 401, http.StatusBadGateway, "upstream_error"}, - {"403", 403, http.StatusBadGateway, "upstream_error"}, - {"unknown", 418, http.StatusBadGateway, "upstream_error"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - status, errType, _ := h.mapUpstreamError(tt.statusCode) - require.Equal(t, tt.expectedStatus, status) - require.Equal(t, tt.expectedType, errType) - }) - } -} - -// --- Gemini native path: handleGeminiFailoverExhausted --- - -func TestHandleGeminiFailoverExhausted_NilError(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - h := &GatewayHandler{} - h.handleGeminiFailoverExhausted(c, nil) - - require.Equal(t, http.StatusBadGateway, rec.Code) - var body map[string]any - err := json.Unmarshal(rec.Body.Bytes(), &body) - require.NoError(t, err) - errObj, ok := body["error"].(map[string]any) - require.True(t, ok) - require.Equal(t, "Upstream request failed", errObj["message"]) -} - -func TestHandleGeminiFailoverExhausted_429(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - h := &GatewayHandler{} - failoverErr := &service.UpstreamFailoverError{StatusCode: 429} - h.handleGeminiFailoverExhausted(c, failoverErr) - - require.Equal(t, http.StatusTooManyRequests, rec.Code) -} - -// --- handleStreamingAwareError streaming mode --- - -func TestHandleStreamingAwareError_StreamStarted(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - // Simulate stream already started: set content type and write initial data - c.Writer.Header().Set("Content-Type", "text/event-stream") - c.Writer.WriteHeaderNow() - - h := &GatewayHandler{} - h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "test error", true) - - body := rec.Body.String() - require.Contains(t, body, "rate_limit_error") - require.Contains(t, body, "test error") - require.Contains(t, body, "data: ") -} - -func TestHandleStreamingAwareError_NotStreaming(t *testing.T) { - gin.SetMode(gin.TestMode) - rec := httptest.NewRecorder() - c, _ := gin.CreateTestContext(rec) - - h := &GatewayHandler{} - h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "no model", false) - - require.Equal(t, http.StatusServiceUnavailable, rec.Code) - var body map[string]any - err := json.Unmarshal(rec.Body.Bytes(), &body) - require.NoError(t, err) - errObj, ok := body["error"].(map[string]any) - require.True(t, ok) - require.Equal(t, "api_error", errObj["type"]) - require.Equal(t, "no model", errObj["message"]) -} - -// --- Integration: extra retry flow simulation --- - -func TestExtraRetryFlowSimulation(t *testing.T) { - // Simulate the full extra retry flow logic - maxAccountSwitches := 3 - maxExtraRetries := 2 - switchCount := 0 - antigravityExtraCount := 0 - - type attempt struct { - platform string - isFailover bool - } - - // Simulate: 3 default retries (all fail), then 2 extra retries (all fail), then exhausted - attempts := []attempt{ - {service.PlatformAntigravity, true}, // switchCount 0 -> 1 - {service.PlatformGemini, true}, // switchCount 1 -> 2 - {service.PlatformAntigravity, true}, // switchCount 2 -> 3 (reaches max) - {service.PlatformAntigravity, true}, // extra retry 1 - {service.PlatformAntigravity, true}, // extra retry 2 - {service.PlatformAntigravity, true}, // extra retry 3 -> exhausted - } - - var exhausted bool - var skipped int - - for _, a := range attempts { - if exhausted { - break - } - - // Extra retry phase: skip non-Antigravity - if switchCount >= maxAccountSwitches && a.platform != service.PlatformAntigravity { - skipped++ - continue - } - - if a.isFailover { - if switchCount >= maxAccountSwitches { - antigravityExtraCount++ - if antigravityExtraCount > maxExtraRetries { - exhausted = true - continue - } - // extra retry delay + continue - continue - } - switchCount++ - } - } - - require.Equal(t, 3, switchCount, "should have 3 default retries") - require.Equal(t, 3, antigravityExtraCount, "counter incremented 3 times") - require.True(t, exhausted, "should be exhausted after exceeding max extra retries") - require.Equal(t, 0, skipped, "no non-antigravity accounts in this simulation") -} - -func TestExtraRetryFlowSimulation_SkipsNonAntigravity(t *testing.T) { - maxAccountSwitches := 2 - switchCount := 2 // already past default retries - antigravityExtraCount := 0 - maxExtraRetries := 5 - - type accountSelection struct { - platform string - } - - selections := []accountSelection{ - {service.PlatformGemini}, // should be skipped - {service.PlatformAnthropic}, // should be skipped - {service.PlatformAntigravity}, // should be attempted - } - - var skippedCount int - var attemptedCount int - - for _, sel := range selections { - if switchCount >= maxAccountSwitches && sel.platform != service.PlatformAntigravity { - skippedCount++ - continue - } - // Simulate failover - antigravityExtraCount++ - if antigravityExtraCount > maxExtraRetries { - break - } - attemptedCount++ - } - - require.Equal(t, 2, skippedCount, "gemini and anthropic accounts should be skipped") - require.Equal(t, 1, attemptedCount, "only antigravity account should be attempted") - require.Equal(t, 1, antigravityExtraCount) -} diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index 5a576ab0..0475c332 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -323,7 +323,6 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { maxAccountSwitches := h.maxAccountSwitchesGemini switchCount := 0 - antigravityExtraCount := 0 failedAccountIDs := make(map[int64]struct{}) var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 @@ -341,15 +340,6 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { account := selection.Account setOpsSelectedAccount(c, account.ID) - // 额外重试阶段:跳过非 Antigravity 账号 - if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity { - failedAccountIDs[account.ID] = struct{}{} - if selection.Acquired && selection.ReleaseFunc != nil { - selection.ReleaseFunc() - } - continue - } - // 检测账号切换:如果粘性会话绑定的账号与当前选择的账号不同,清除 thoughtSignature // 注意:Gemini 原生 API 的 thoughtSignature 与具体上游账号强相关;跨账号透传会导致 400。 if sessionBoundAccountID > 0 && sessionBoundAccountID != account.ID { @@ -439,17 +429,8 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { - // 默认重试用完,进入 Antigravity 额外重试 - antigravityExtraCount++ - if antigravityExtraCount > h.antigravityExtraRetries { - h.handleGeminiFailoverExhausted(c, failoverErr) - return - } - log.Printf("Gemini account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries) - if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) { - return - } - continue + h.handleGeminiFailoverExhausted(c, failoverErr) + return } switchCount++ log.Printf("Gemini account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 9c2b9027..84e78eaa 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -39,6 +39,15 @@ const ( antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待) antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用) + // MODEL_CAPACITY_EXHAUSTED 专用常量 + // 容量不足是临时状态,所有账号共享容量池,与限流不同 + // - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 + // - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次 + // - 重试仍为容量不足: 切换账号 + // - 重试遇到其他错误: 按实际错误码处理 + antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值 + antigravityModelCapacityMaxAttempts = 5 // 容量不足长等待重试次数 + // Google RPC 状态和类型常量 googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED" googleRPCStatusUnavailable = "UNAVAILABLE" @@ -144,7 +153,12 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam } // 判断是否触发智能重试 - shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName := shouldTriggerAntigravitySmartRetry(p.account, respBody) + shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody) + + // MODEL_CAPACITY_EXHAUSTED: 独立处理 + if isModelCapacityExhausted { + return s.handleModelCapacityExhaustedRetry(p, resp, respBody, baseURL, waitDuration, modelName) + } // 情况1: retryDelay >= 阈值,限流模型并切换账号 if shouldRateLimitModel { @@ -229,7 +243,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam // 解析新的重试信息,用于下次重试的等待时间 if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil { - newShouldRetry, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) + newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) if newShouldRetry && newWaitDuration > 0 { waitDuration = newWaitDuration } @@ -279,6 +293,100 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam return &smartRetryResult{action: smartRetryActionContinue} } +// handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑 +// 策略: +// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 +// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次 +// - 重试成功: 直接返回 +// - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完,然后切换账号 +// - 重试遇到其他错误 (429 限流等): 返回该响应,让上层按实际错误码处理 +func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry( + p antigravityRetryLoopParams, resp *http.Response, respBody []byte, + baseURL string, retryDelay time.Duration, modelName string, +) *smartRetryResult { + // 确定重试参数 + maxAttempts := 1 + waitDuration := retryDelay + if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold { + // 无 retryDelay 或 >= 20s: 固定 20s 间隔,最多 5 次 + maxAttempts = antigravityModelCapacityMaxAttempts + waitDuration = antigravityModelCapacityWaitThreshold + } + + for attempt := 1; attempt <= maxAttempts; attempt++ { + log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d", + p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID) + + select { + case <-p.ctx.Done(): + log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix) + return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()} + case <-time.After(waitDuration): + } + + retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body) + if err != nil { + log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err) + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + resp: &http.Response{ + StatusCode: resp.StatusCode, + Header: resp.Header.Clone(), + Body: io.NopCloser(bytes.NewReader(respBody)), + }, + } + } + + retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) + + // 网络错误: 继续重试 + if retryErr != nil || retryResp == nil { + log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v", + p.prefix, attempt, maxAttempts, retryErr) + continue + } + + // 成功 (非 429/503): 直接返回 + if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { + log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d", + p.prefix, retryResp.StatusCode, attempt, maxAttempts) + return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} + } + + // 读取重试响应体,判断是否仍为容量不足 + retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20)) + _ = retryResp.Body.Close() + + retryInfo := parseAntigravitySmartRetryInfo(retryBody) + + // 不再是 MODEL_CAPACITY_EXHAUSTED(例如变成了 429 限流): 返回该响应让上层处理 + if retryInfo == nil || !retryInfo.IsModelCapacityExhausted { + log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s", + p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200)) + retryResp.Body = io.NopCloser(bytes.NewReader(retryBody)) + return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} + } + + // 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间,继续重试 + if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold { + waitDuration = retryInfo.RetryDelay + } + } + + // 所有重试都失败且仍为容量不足: 切换账号 + log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)", + p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID) + + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + switchError: &AntigravityAccountSwitchError{ + OriginalAccountID: p.account.ID, + RateLimitedModel: modelName, + IsStickySession: p.isStickySession, + }, + } +} + // antigravityRetryLoop 执行带 URL fallback 的重试循环 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) { // 预检查:如果账号已限流,直接返回切换信号 @@ -2053,8 +2161,9 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) { // antigravitySmartRetryInfo 智能重试所需的信息 type antigravitySmartRetryInfo struct { - RetryDelay time.Duration // 重试延迟时间 - ModelName string // 限流的模型名称(如 "claude-sonnet-4-5") + RetryDelay time.Duration // 重试延迟时间 + ModelName string // 限流的模型名称(如 "claude-sonnet-4-5") + IsModelCapacityExhausted bool // 是否为 MODEL_CAPACITY_EXHAUSTED(503 容量不足,与 429 限流处理策略不同) } // parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息 @@ -2163,14 +2272,16 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo { return nil } - // 如果上游未提供 retryDelay,使用默认限流时间 - if retryDelay <= 0 { + // MODEL_CAPACITY_EXHAUSTED: retryDelay 可以为 0(由调用方决定默认等待策略) + // RATE_LIMIT_EXCEEDED: 无 retryDelay 时使用默认限流时间 + if retryDelay <= 0 && !hasModelCapacityExhausted { retryDelay = antigravityDefaultRateLimitDuration } return &antigravitySmartRetryInfo{ - RetryDelay: retryDelay, - ModelName: modelName, + RetryDelay: retryDelay, + ModelName: modelName, + IsModelCapacityExhausted: hasModelCapacityExhausted, } } @@ -2178,22 +2289,28 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo { // 返回: // - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold) // - shouldRateLimitModel: 是否应该限流模型(retryDelay >= antigravityRateLimitThreshold) -// - waitDuration: 等待时间(智能重试时使用,shouldRateLimitModel=true 时为 0) +// - waitDuration: 等待时间(智能重试时使用,shouldRateLimitModel=true 时为限流时长) // - modelName: 限流的模型名称 -func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string) { +// - isModelCapacityExhausted: 是否为 MODEL_CAPACITY_EXHAUSTED(需要独立处理) +func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) { if account.Platform != PlatformAntigravity { - return false, false, 0, "" + return false, false, 0, "", false } info := parseAntigravitySmartRetryInfo(respBody) if info == nil { - return false, false, 0, "" + return false, false, 0, "", false + } + + // MODEL_CAPACITY_EXHAUSTED: 独立处理,不走 7s 阈值判断 + if info.IsModelCapacityExhausted { + return true, false, info.RetryDelay, info.ModelName, true } // retryDelay >= 阈值:直接限流模型,不重试 // 注意:如果上游未提供 retryDelay,parseAntigravitySmartRetryInfo 已设置为默认 30s if info.RetryDelay >= antigravityRateLimitThreshold { - return false, true, info.RetryDelay, info.ModelName + return false, true, info.RetryDelay, info.ModelName, false } // retryDelay < 阈值:智能重试 @@ -2202,7 +2319,7 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou waitDuration = antigravitySmartRetryMinWait } - return true, false, waitDuration, info.ModelName + return true, false, waitDuration, info.ModelName, false } // handleModelRateLimitParams 模型级限流处理参数 @@ -2240,6 +2357,12 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit return &handleModelRateLimitResult{Handled: false} } + // MODEL_CAPACITY_EXHAUSTED: 容量不足由 handleSmartRetry 独立处理,此处仅标记已处理 + // 不设置模型限流(容量不足是临时的,不等同于限流) + if info.IsModelCapacityExhausted { + return &handleModelRateLimitResult{Handled: true} + } + // < antigravityRateLimitThreshold: 等待后重试 if info.RetryDelay < antigravityRateLimitThreshold { log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v", diff --git a/backend/internal/service/antigravity_rate_limit_test.go b/backend/internal/service/antigravity_rate_limit_test.go index 59cc9331..c8b0d779 100644 --- a/backend/internal/service/antigravity_rate_limit_test.go +++ b/backend/internal/service/antigravity_rate_limit_test.go @@ -188,13 +188,14 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) { require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey) } -// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景 -func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) { +// TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景 +// MODEL_CAPACITY_EXHAUSTED 标记 Handled 但不设模型限流(由 handleSmartRetry 独立处理) +func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) { repo := &stubAntigravityAccountRepo{} svc := &AntigravityGatewayService{accountRepo: repo} account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity} - // 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流 + // 503 + MODEL_CAPACITY_EXHAUSTED → 标记已处理,不设模型限流 body := []byte(`{ "error": { "status": "UNAVAILABLE", @@ -207,13 +208,11 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) { result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false) - // 应该触发模型限流 + // 应该标记已处理,但不设模型限流 require.NotNil(t, result) require.True(t, result.Handled) - require.NotNil(t, result.SwitchError) - require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel) - require.Len(t, repo.modelRateLimitCalls, 1) - require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey) + require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger switch error in handleModelRateLimit") + require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit") } // TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理) @@ -496,6 +495,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { body string expectedShouldRetry bool expectedShouldRateLimit bool + expectedCapacityExhaust bool minWait time.Duration modelName string }{ @@ -611,8 +611,9 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { ] } }`, - expectedShouldRetry: false, - expectedShouldRateLimit: true, + expectedShouldRetry: true, + expectedShouldRateLimit: false, + expectedCapacityExhaust: true, minWait: 39 * time.Second, modelName: "gemini-3-pro-high", }, @@ -629,9 +630,10 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { "message": "No capacity available for model gemini-2.5-flash on the server" } }`, - expectedShouldRetry: false, - expectedShouldRateLimit: true, - minWait: 30 * time.Second, + expectedShouldRetry: true, + expectedShouldRateLimit: false, + expectedCapacityExhaust: true, + minWait: 0, // 无 retryDelay,由 handleModelCapacityExhaustedRetry 决定默认 20s modelName: "gemini-2.5-flash", }, { @@ -656,18 +658,26 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body)) + shouldRetry, shouldRateLimit, wait, model, isCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body)) if shouldRetry != tt.expectedShouldRetry { t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry) } if shouldRateLimit != tt.expectedShouldRateLimit { t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit) } - if shouldRetry { + if isCapacityExhausted != tt.expectedCapacityExhaust { + t.Errorf("isCapacityExhausted = %v, want %v", isCapacityExhausted, tt.expectedCapacityExhaust) + } + if shouldRetry && !isCapacityExhausted { if wait < tt.minWait { t.Errorf("wait = %v, want >= %v", wait, tt.minWait) } } + if isCapacityExhausted && tt.minWait > 0 { + if wait < tt.minWait { + t.Errorf("capacity exhausted wait = %v, want >= %v", wait, tt.minWait) + } + } if shouldRateLimit && tt.minWait > 0 { if wait < tt.minWait { t.Errorf("rate limit wait = %v, want >= %v", wait, tt.minWait) diff --git a/backend/internal/service/antigravity_smart_retry_test.go b/backend/internal/service/antigravity_smart_retry_test.go index a7e0d296..7a6050a7 100644 --- a/backend/internal/service/antigravity_smart_retry_test.go +++ b/backend/internal/service/antigravity_smart_retry_test.go @@ -9,6 +9,7 @@ import ( "net/http" "strings" "testing" + "time" "github.com/stretchr/testify/require" ) @@ -294,8 +295,20 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)") } -// TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError 测试 503 MODEL_CAPACITY_EXHAUSTED 返回 switchError -func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testing.T) { +// TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess +// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay < 20s → 按实际 retryDelay 等待后重试 1 次,成功返回 +func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *testing.T) { + // 重试成功的响应 + successResp := &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(`{"ok":true}`)), + } + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{successResp}, + errors: []error{nil}, + } + repo := &stubAntigravityAccountRepo{} account := &Account{ ID: 3, @@ -304,7 +317,89 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi Platform: PlatformAntigravity, } - // 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 7s 阈值 + // 503 + MODEL_CAPACITY_EXHAUSTED + 0.5s < 20s 阈值 → 按实际 retryDelay 重试 1 次 + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} + ], + "message": "No capacity available for model gemini-3-pro-high on the server" + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + params := antigravityRetryLoopParams{ + ctx: context.Background(), + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + httpUpstream: upstream, + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + availableURLs := []string{"https://ag-1.test"} + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.NotNil(t, result.resp) + require.Equal(t, http.StatusOK, result.resp.StatusCode, "should return success after retry") + require.Nil(t, result.switchError, "should not switch account on success") + require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted") +} + +// TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount +// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次,全失败后切换账号 +func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) { + // 构造 5 个仍然容量不足的重试响应 + capacityBody := `{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "30s"} + ] + } + }` + var responses []*http.Response + var errs []error + for i := 0; i < 5; i++ { + responses = append(responses, &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(capacityBody)), + }) + errs = append(errs, nil) + } + upstream := &mockSmartRetryUpstream{ + responses: responses, + errors: errs, + } + + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 3, + Name: "acc-3", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + // 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 20s 阈值 respBody := []byte(`{ "error": { "code": 503, @@ -322,13 +417,18 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi Body: io.NopCloser(bytes.NewReader(respBody)), } + // 使用可取消的 context 避免测试真的等待 5×20s + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + params := antigravityRetryLoopParams{ - ctx: context.Background(), + ctx: ctx, prefix: "[test]", account: account, accessToken: "token", action: "generateContent", body: []byte(`{"input":"test"}`), + httpUpstream: upstream, accountRepo: repo, isStickySession: true, handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { @@ -343,16 +443,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi require.NotNil(t, result) require.Equal(t, smartRetryActionBreakWithResp, result.action) - require.Nil(t, result.resp) - require.Nil(t, result.err) - require.NotNil(t, result.switchError, "should return switchError for 503 model capacity exhausted") - require.Equal(t, account.ID, result.switchError.OriginalAccountID) - require.Equal(t, "gemini-3-pro-high", result.switchError.RateLimitedModel) - require.True(t, result.switchError.IsStickySession) - - // 验证模型限流已设置 - require.Len(t, repo.modelRateLimitCalls, 1) - require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey) + // context 超时会导致提前返回,switchError 可能为 nil(context canceled) + // 验证不设置模型限流 + require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted") } // TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑 @@ -1128,9 +1221,9 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t require.Equal(t, "sticky-net-error", cache.deleteCalls[0].sessionHash) } -// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession -// 503 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定 -func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) { +// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount +// 503 + 短延迟 + 容量不足 + 重试失败 → 切换账号(不设模型限流) +func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount(t *testing.T) { failRespBody := `{ "error": { "code": 503, @@ -1152,7 +1245,6 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession } repo := &stubAntigravityAccountRepo{} - cache := &stubSmartRetryCache{} account := &Account{ ID: 16, Name: "acc-16", @@ -1195,21 +1287,15 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession availableURLs := []string{"https://ag-1.test"} - svc := &AntigravityGatewayService{cache: cache} + svc := &AntigravityGatewayService{} result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs) require.NotNil(t, result) - require.NotNil(t, result.switchError) + require.NotNil(t, result.switchError, "should switch account after capacity retry exhausted") require.True(t, result.switchError.IsStickySession) - // 验证粘性绑定被清除 - require.Len(t, cache.deleteCalls, 1) - require.Equal(t, int64(77), cache.deleteCalls[0].groupID) - require.Equal(t, "sticky-503-short", cache.deleteCalls[0].sessionHash) - - // 验证模型限流已设置 - require.Len(t, repo.modelRateLimitCalls, 1) - require.Equal(t, "gemini-3-pro", repo.modelRateLimitCalls[0].modelKey) + // MODEL_CAPACITY_EXHAUSTED 不应设置模型限流 + require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted") } // TestAntigravityRetryLoop_SmartRetryFailed_StickySession_SwitchErrorPropagates From 05f5a8b61db4960528074ab7f281404a8426e49f Mon Sep 17 00:00:00 2001 From: erio Date: Tue, 10 Feb 2026 03:59:39 +0800 Subject: [PATCH 12/16] fix: use switch statement for staticcheck QF1003 compliance --- backend/internal/service/gateway_service.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 01e1acb4..910e04a4 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -379,9 +379,10 @@ func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accou return } // 根据状态码选择封禁策略 - if failoverErr.StatusCode == http.StatusBadRequest { + switch failoverErr.StatusCode { + case http.StatusBadRequest: tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]") - } else if failoverErr.StatusCode == http.StatusBadGateway { + case http.StatusBadGateway: tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]") } } From f06048eccfb1e4f8373b6348f3a425a3c36fa1a5 Mon Sep 17 00:00:00 2001 From: erio Date: Tue, 10 Feb 2026 04:05:20 +0800 Subject: [PATCH 13/16] fix: simplify MODEL_CAPACITY_EXHAUSTED to single retry for all cases Both short (<20s) and long (>=20s/missing) retryDelay now retry once: - Short: wait actual retryDelay, retry once - Long/missing: wait 20s, retry once - Still capacity exhausted: switch account - Different error: let upper layer handle --- .../service/antigravity_gateway_service.go | 128 +++++++++--------- .../service/antigravity_smart_retry_test.go | 31 ++--- 2 files changed, 75 insertions(+), 84 deletions(-) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 84e78eaa..efff2e18 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -42,11 +42,10 @@ const ( // MODEL_CAPACITY_EXHAUSTED 专用常量 // 容量不足是临时状态,所有账号共享容量池,与限流不同 // - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 - // - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次 + // - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次 // - 重试仍为容量不足: 切换账号 // - 重试遇到其他错误: 按实际错误码处理 antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值 - antigravityModelCapacityMaxAttempts = 5 // 容量不足长等待重试次数 // Google RPC 状态和类型常量 googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED" @@ -296,86 +295,83 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam // handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑 // 策略: // - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 -// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次 +// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次 // - 重试成功: 直接返回 -// - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完,然后切换账号 +// - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号 // - 重试遇到其他错误 (429 限流等): 返回该响应,让上层按实际错误码处理 func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry( p antigravityRetryLoopParams, resp *http.Response, respBody []byte, baseURL string, retryDelay time.Duration, modelName string, ) *smartRetryResult { - // 确定重试参数 - maxAttempts := 1 + // 确定等待时间 waitDuration := retryDelay if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold { - // 无 retryDelay 或 >= 20s: 固定 20s 间隔,最多 5 次 - maxAttempts = antigravityModelCapacityMaxAttempts + // 无 retryDelay 或 >= 20s: 固定等待 20s waitDuration = antigravityModelCapacityWaitThreshold } - for attempt := 1; attempt <= maxAttempts; attempt++ { - log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d", - p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID) + log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d", + p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID) - select { - case <-p.ctx.Done(): - log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix) - return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()} - case <-time.After(waitDuration): - } + select { + case <-p.ctx.Done(): + log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix) + return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()} + case <-time.After(waitDuration): + } - retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body) - if err != nil { - log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err) - return &smartRetryResult{ - action: smartRetryActionBreakWithResp, - resp: &http.Response{ - StatusCode: resp.StatusCode, - Header: resp.Header.Clone(), - Body: io.NopCloser(bytes.NewReader(respBody)), - }, - } - } - - retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) - - // 网络错误: 继续重试 - if retryErr != nil || retryResp == nil { - log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v", - p.prefix, attempt, maxAttempts, retryErr) - continue - } - - // 成功 (非 429/503): 直接返回 - if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { - log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d", - p.prefix, retryResp.StatusCode, attempt, maxAttempts) - return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} - } - - // 读取重试响应体,判断是否仍为容量不足 - retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20)) - _ = retryResp.Body.Close() - - retryInfo := parseAntigravitySmartRetryInfo(retryBody) - - // 不再是 MODEL_CAPACITY_EXHAUSTED(例如变成了 429 限流): 返回该响应让上层处理 - if retryInfo == nil || !retryInfo.IsModelCapacityExhausted { - log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s", - p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200)) - retryResp.Body = io.NopCloser(bytes.NewReader(retryBody)) - return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} - } - - // 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间,继续重试 - if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold { - waitDuration = retryInfo.RetryDelay + retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body) + if err != nil { + log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err) + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + resp: &http.Response{ + StatusCode: resp.StatusCode, + Header: resp.Header.Clone(), + Body: io.NopCloser(bytes.NewReader(respBody)), + }, } } - // 所有重试都失败且仍为容量不足: 切换账号 - log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)", - p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID) + retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) + + // 网络错误: 切换账号 + if retryErr != nil || retryResp == nil { + log.Printf("%s status=capacity_retry_network_error error=%v (switch account)", + p.prefix, retryErr) + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + switchError: &AntigravityAccountSwitchError{ + OriginalAccountID: p.account.ID, + RateLimitedModel: modelName, + IsStickySession: p.isStickySession, + }, + } + } + + // 成功 (非 429/503): 直接返回 + if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { + log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode) + return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} + } + + // 读取重试响应体,判断是否仍为容量不足 + retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20)) + _ = retryResp.Body.Close() + + retryInfo := parseAntigravitySmartRetryInfo(retryBody) + + // 不再是 MODEL_CAPACITY_EXHAUSTED(例如变成了 429 限流): 返回该响应让上层处理 + if retryInfo == nil || !retryInfo.IsModelCapacityExhausted { + log.Printf("%s status=%d capacity_retry_got_different_error body=%s", + p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200)) + retryResp.Body = io.NopCloser(bytes.NewReader(retryBody)) + return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} + } + + // 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号 + log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)", + p.prefix, resp.StatusCode, modelName, p.account.ID) return &smartRetryResult{ action: smartRetryActionBreakWithResp, diff --git a/backend/internal/service/antigravity_smart_retry_test.go b/backend/internal/service/antigravity_smart_retry_test.go index 7a6050a7..b1ca5695 100644 --- a/backend/internal/service/antigravity_smart_retry_test.go +++ b/backend/internal/service/antigravity_smart_retry_test.go @@ -363,9 +363,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t * } // TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount -// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次,全失败后切换账号 +// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 后重试 1 次,仍失败则切换账号 func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) { - // 构造 5 个仍然容量不足的重试响应 + // 重试仍然返回容量不足 capacityBody := `{ "error": { "code": 503, @@ -376,19 +376,15 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t * ] } }` - var responses []*http.Response - var errs []error - for i := 0; i < 5; i++ { - responses = append(responses, &http.Response{ - StatusCode: http.StatusServiceUnavailable, - Header: http.Header{}, - Body: io.NopCloser(strings.NewReader(capacityBody)), - }) - errs = append(errs, nil) - } upstream := &mockSmartRetryUpstream{ - responses: responses, - errors: errs, + responses: []*http.Response{ + { + StatusCode: 503, + Header: http.Header{}, + Body: io.NopCloser(strings.NewReader(capacityBody)), + }, + }, + errors: []error{nil}, } repo := &stubAntigravityAccountRepo{} @@ -412,12 +408,12 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t * } }`) resp := &http.Response{ - StatusCode: http.StatusServiceUnavailable, + StatusCode: 503, Header: http.Header{}, Body: io.NopCloser(bytes.NewReader(respBody)), } - // 使用可取消的 context 避免测试真的等待 5×20s + // context 超时短于 20s 等待,验证 context 取消时正确返回 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) defer cancel() @@ -443,8 +439,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t * require.NotNil(t, result) require.Equal(t, smartRetryActionBreakWithResp, result.action) - // context 超时会导致提前返回,switchError 可能为 nil(context canceled) - // 验证不设置模型限流 + // context 超时会导致提前返回 require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted") } From 5dd83d3cf2d448403301b6879f6e0bc6337a4390 Mon Sep 17 00:00:00 2001 From: shaw Date: Tue, 10 Feb 2026 10:28:34 +0800 Subject: [PATCH 14/16] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4=E7=89=B9?= =?UTF-8?q?=E5=AE=9Asystem=E4=BB=A5=E9=80=82=E9=85=8D=E6=96=B0=E7=89=88cc?= =?UTF-8?q?=E5=AE=A2=E6=88=B7=E7=AB=AF=E7=BC=93=E5=AD=98=E5=A4=B1=E6=95=88?= =?UTF-8?q?=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pkg/antigravity/request_transformer.go | 23 +++++-- .../service/antigravity_gateway_service.go | 4 -- .../antigravity_single_account_retry_test.go | 2 - backend/internal/service/gateway_service.go | 66 +++++++++++++++++++ frontend/package.json | 2 +- frontend/pnpm-lock.yaml | 21 ++++-- 6 files changed, 102 insertions(+), 16 deletions(-) diff --git a/backend/internal/pkg/antigravity/request_transformer.go b/backend/internal/pkg/antigravity/request_transformer.go index 65f45cfc..e89a4c53 100644 --- a/backend/internal/pkg/antigravity/request_transformer.go +++ b/backend/internal/pkg/antigravity/request_transformer.go @@ -271,6 +271,21 @@ func filterOpenCodePrompt(text string) string { return "" } +// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表 +var systemBlockFilterPrefixes = []string{ + "x-anthropic-billing-header", +} + +// filterSystemBlockByPrefix 如果文本匹配过滤前缀,返回空字符串 +func filterSystemBlockByPrefix(text string) string { + for _, prefix := range systemBlockFilterPrefixes { + if strings.HasPrefix(text, prefix) { + return "" + } + } + return text +} + // buildSystemInstruction 构建 systemInstruction(与 Antigravity-Manager 保持一致) func buildSystemInstruction(system json.RawMessage, modelName string, opts TransformOptions, tools []ClaudeTool) *GeminiContent { var parts []GeminiPart @@ -287,8 +302,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans if strings.Contains(sysStr, "You are Antigravity") { userHasAntigravityIdentity = true } - // 过滤 OpenCode 默认提示词 - filtered := filterOpenCodePrompt(sysStr) + // 过滤 OpenCode 默认提示词和黑名单前缀 + filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(sysStr)) if filtered != "" { userSystemParts = append(userSystemParts, GeminiPart{Text: filtered}) } @@ -302,8 +317,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans if strings.Contains(block.Text, "You are Antigravity") { userHasAntigravityIdentity = true } - // 过滤 OpenCode 默认提示词 - filtered := filterOpenCodePrompt(block.Text) + // 过滤 OpenCode 默认提示词和黑名单前缀 + filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(block.Text)) if filtered != "" { userSystemParts = append(userSystemParts, GeminiPart{Text: filtered}) } diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 42a60372..b6d0da06 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -48,10 +48,6 @@ const ( googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED" googleRPCReasonRateLimitExceeded = "RATE_LIMIT_EXCEEDED" - // 单账号 503 退避重试:预检查中等待模型限流过期的最大时间 - // 超过此值的限流将直接切换账号(避免请求等待过久) - antigravitySingleAccountMaxWait = 30 * time.Second - // 单账号 503 退避重试:Service 层原地重试的最大次数 // 在 handleSmartRetry 中,对于 shouldRateLimitModel(长延迟 ≥ 7s)的情况, // 多账号模式下会设限流+切换账号;但单账号模式下改为原地等待+重试。 diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go index 0950b728..d5813553 100644 --- a/backend/internal/service/antigravity_single_account_retry_test.go +++ b/backend/internal/service/antigravity_single_account_retry_test.go @@ -57,8 +57,6 @@ func TestSingleAccountRetryConstants(t *testing.T) { "单次最大等待 15s") require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait, "总累计等待不超过 30s") - require.Equal(t, 30*time.Second, antigravitySingleAccountMaxWait, - "预检查最大等待 30s") } // --------------------------------------------------------------------------- diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 2c04ae14..610c8f01 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -243,6 +243,12 @@ var ( } ) +// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表 +// OAuth/SetupToken 账号转发时,匹配这些前缀的 system 元素会被移除 +var systemBlockFilterPrefixes = []string{ + "x-anthropic-billing-header", +} + // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients") @@ -2684,6 +2690,60 @@ func hasClaudeCodePrefix(text string) bool { return false } +// matchesFilterPrefix 检查文本是否匹配任一过滤前缀 +func matchesFilterPrefix(text string) bool { + for _, prefix := range systemBlockFilterPrefixes { + if strings.HasPrefix(text, prefix) { + return true + } + } + return false +} + +// filterSystemBlocksByPrefix 从 body 的 system 中移除文本匹配 systemBlockFilterPrefixes 前缀的元素 +// 直接从 body 解析 system,不依赖外部传入的 parsed.System(因为前置步骤可能已修改 body 中的 system) +func filterSystemBlocksByPrefix(body []byte) []byte { + sys := gjson.GetBytes(body, "system") + if !sys.Exists() { + return body + } + + switch { + case sys.Type == gjson.String: + if matchesFilterPrefix(sys.Str) { + result, err := sjson.DeleteBytes(body, "system") + if err != nil { + return body + } + return result + } + case sys.IsArray(): + var parsed []any + if err := json.Unmarshal([]byte(sys.Raw), &parsed); err != nil { + return body + } + filtered := make([]any, 0, len(parsed)) + changed := false + for _, item := range parsed { + if m, ok := item.(map[string]any); ok { + if text, ok := m["text"].(string); ok && matchesFilterPrefix(text) { + changed = true + continue + } + } + filtered = append(filtered, item) + } + if changed { + result, err := sjson.SetBytes(body, "system", filtered) + if err != nil { + return body + } + return result + } + } + return body +} + // injectClaudeCodePrompt 在 system 开头注入 Claude Code 提示词 // 处理 null、字符串、数组三种格式 func injectClaudeCodePrompt(body []byte, system any) []byte { @@ -2963,6 +3023,12 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A body, reqModel = normalizeClaudeOAuthRequestBody(body, reqModel, normalizeOpts) } + // OAuth/SetupToken 账号:移除黑名单前缀匹配的 system 元素(如客户端注入的计费元数据) + // 放在 inject/normalize 之后,确保不会被覆盖 + if account.IsOAuth() { + body = filterSystemBlocksByPrefix(body) + } + // 强制执行 cache_control 块数量限制(最多 4 个) body = enforceCacheControlLimit(body) diff --git a/frontend/package.json b/frontend/package.json index 325eba60..1b380b17 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -17,7 +17,7 @@ "dependencies": { "@lobehub/icons": "^4.0.2", "@vueuse/core": "^10.7.0", - "axios": "^1.6.2", + "axios": "^1.13.5", "chart.js": "^4.4.1", "dompurify": "^3.3.1", "driver.js": "^1.4.0", diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 9af2d7af..37c384b4 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -15,8 +15,8 @@ importers: specifier: ^10.7.0 version: 10.11.1(vue@3.5.26(typescript@5.6.3)) axios: - specifier: ^1.6.2 - version: 1.13.2 + specifier: ^1.13.5 + version: 1.13.5 chart.js: specifier: ^4.4.1 version: 4.5.1 @@ -1257,56 +1257,67 @@ packages: resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==} cpu: [arm] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.54.0': resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==} cpu: [arm] os: [linux] + libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.54.0': resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==} cpu: [arm64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.54.0': resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==} cpu: [arm64] os: [linux] + libc: [musl] '@rollup/rollup-linux-loong64-gnu@4.54.0': resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==} cpu: [loong64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-ppc64-gnu@4.54.0': resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==} cpu: [ppc64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-gnu@4.54.0': resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==} cpu: [riscv64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.54.0': resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==} cpu: [riscv64] os: [linux] + libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.54.0': resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==} cpu: [s390x] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.54.0': resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==} cpu: [x64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-musl@4.54.0': resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==} cpu: [x64] os: [linux] + libc: [musl] '@rollup/rollup-openharmony-arm64@4.54.0': resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==} @@ -1805,8 +1816,8 @@ packages: peerDependencies: postcss: ^8.1.0 - axios@1.13.2: - resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==} + axios@1.13.5: + resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==} babel-plugin-macros@3.1.0: resolution: {integrity: sha512-Cg7TFGpIr01vOQNODXOOaGz2NpCU5gl8x1qJFbb6hbZxR7XrcE2vtbAsTAbJ7/xwJtUuJEw8K8Zr/AE0LHlesg==} @@ -6387,7 +6398,7 @@ snapshots: postcss: 8.5.6 postcss-value-parser: 4.2.0 - axios@1.13.2: + axios@1.13.5: dependencies: follow-redirects: 1.15.11 form-data: 4.0.5 From 406dad998d6def371b7f6cfc429ecba489fa3c32 Mon Sep 17 00:00:00 2001 From: erio Date: Tue, 10 Feb 2026 10:59:34 +0800 Subject: [PATCH 15/16] chore: bump version to 0.1.77.2 --- backend/cmd/server/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION index af6111e5..18412869 100644 --- a/backend/cmd/server/VERSION +++ b/backend/cmd/server/VERSION @@ -1 +1 @@ -0.1.77.1 +0.1.77.2 From 6bdd580b3fcc9f564508f7497f63b2cc2b5b2674 Mon Sep 17 00:00:00 2001 From: erio Date: Tue, 10 Feb 2026 11:40:36 +0800 Subject: [PATCH 16/16] chore: bump version to 0.1.78.1 --- backend/cmd/server/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION index 18412869..aade6705 100644 --- a/backend/cmd/server/VERSION +++ b/backend/cmd/server/VERSION @@ -1 +1 @@ -0.1.77.2 +0.1.78.1 \ No newline at end of file