From f6cfab990145404bbdb0509a7d12530292dc4c69 Mon Sep 17 00:00:00 2001
From: Rose Ding <xiaodingsiren@icloud.com>
Date: Mon, 9 Feb 2026 14:26:01 +0800
Subject: [PATCH 01/16] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20Antigravity?=
 =?UTF-8?q?=20=E5=8D=95=E8=B4=A6=E5=8F=B7=20503=20=E9=80=80=E9=81=BF?=
 =?UTF-8?q?=E9=87=8D=E8=AF=95=E6=9C=BA=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

当分组内只有一个可用账号且上游返回 503 (MODEL_CAPACITY_EXHAUSTED) 时，
不再设置模型限流+切换账号（因为切换回来还是同一个账号），而是在 Service 层
原地等待+重试，避免双重等待问题。

主要变更：
- Handler 层：检测单账号 503 场景，清除排除列表并设置 SingleAccountRetry 标记
- Service 层：新增 handleSingleAccountRetryInPlace 原地重试逻辑
- Service 层：预检查跳过单账号模式下的限流检查
- 新增 ctxkey.SingleAccountRetry 上下文标记
---
 backend/internal/handler/gateway_handler.go   |  47 +++++
 .../internal/handler/gemini_v1beta_handler.go |  13 ++
 backend/internal/pkg/ctxkey/ctxkey.go         |   4 +
 .../service/antigravity_gateway_service.go    | 196 +++++++++++++++++-
 4 files changed, 253 insertions(+), 7 deletions(-)

diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index 6900fa55..82181948 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -245,6 +245,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 					return
 				}
+				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+						failedAccountIDs = make(map[int64]struct{})
+						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+						c.Request = c.Request.WithContext(ctx)
+						continue
+					}
+				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
 				} else {
@@ -412,6 +425,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
 					return
 				}
+				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+						failedAccountIDs = make(map[int64]struct{})
+						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+						c.Request = c.Request.WithContext(ctx)
+						continue
+					}
+				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
 				} else {
@@ -838,6 +864,27 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
 	}
 }
 
+// sleepAntigravitySingleAccountBackoff Antigravity 平台单账号分组的 503 退避重试延时。
+// 当分组内只有一个可用账号且上游返回 503（MODEL_CAPACITY_EXHAUSTED）时使用，
+// 采用短固定延时策略。Service 层在 SingleAccountRetry 模式下已经做了充分的原地重试
+// （最多 3 次、总等待 30s），所以 Handler 层的退避只需短暂等待即可。
+// 返回 false 表示 context 已取消。
+func sleepAntigravitySingleAccountBackoff(ctx context.Context, retryCount int) bool {
+	// 固定短延时：2s
+	// Service 层已经在原地等待了足够长的时间（retryDelay × 重试次数），
+	// Handler 层只需短暂间隔后重新进入 Service 层即可。
+	const delay = 2 * time.Second
+
+	log.Printf("Antigravity single-account 503 backoff: waiting %v before retry (attempt %d)", delay, retryCount)
+
+	select {
+	case <-ctx.Done():
+		return false
+	case <-time.After(delay):
+		return true
+	}
+}
+
 func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 	statusCode := failoverErr.StatusCode
 	responseBody := failoverErr.ResponseBody
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index d5149f22..2b67cb1f 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -334,6 +334,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
 				return
 			}
+			// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+			// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+			// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+			if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+				if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+					log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+					failedAccountIDs = make(map[int64]struct{})
+					// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+					ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+					c.Request = c.Request.WithContext(ctx)
+					continue
+				}
+			}
 			h.handleGeminiFailoverExhausted(c, lastFailoverErr)
 			return
 		}
diff --git a/backend/internal/pkg/ctxkey/ctxkey.go b/backend/internal/pkg/ctxkey/ctxkey.go
index 9bf563e7..0c4d82f7 100644
--- a/backend/internal/pkg/ctxkey/ctxkey.go
+++ b/backend/internal/pkg/ctxkey/ctxkey.go
@@ -28,4 +28,8 @@ const (
 	// IsMaxTokensOneHaikuRequest 标识当前请求是否为 max_tokens=1 + haiku 模型的探测请求
 	// 用于 ClaudeCodeOnly 验证绕过（绕过 system prompt 检查，但仍需验证 User-Agent）
 	IsMaxTokensOneHaikuRequest Key = "ctx_is_max_tokens_one_haiku"
+
+	// SingleAccountRetry 标识当前请求处于单账号 503 退避重试模式。
+	// 在此模式下，Service 层的模型限流预检查将等待限流过期而非直接切换账号。
+	SingleAccountRetry Key = "ctx_single_account_retry"
 )
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 014b3c86..11f975fe 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 )
@@ -46,6 +47,23 @@ const (
 	googleRPCTypeErrorInfo                = "type.googleapis.com/google.rpc.ErrorInfo"
 	googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED"
 	googleRPCReasonRateLimitExceeded      = "RATE_LIMIT_EXCEEDED"
+
+	// 单账号 503 退避重试：预检查中等待模型限流过期的最大时间
+	// 超过此值的限流将直接切换账号（避免请求等待过久）
+	antigravitySingleAccountMaxWait = 30 * time.Second
+
+	// 单账号 503 退避重试：Service 层原地重试的最大次数
+	// 在 handleSmartRetry 中，对于 shouldRateLimitModel（长延迟 ≥ 7s）的情况，
+	// 多账号模式下会设限流+切换账号；但单账号模式下改为原地等待+重试。
+	antigravitySingleAccountSmartRetryMaxAttempts = 3
+
+	// 单账号 503 退避重试：原地重试时单次最大等待时间
+	// 防止上游返回过长的 retryDelay 导致请求卡住太久
+	antigravitySingleAccountSmartRetryMaxWait = 15 * time.Second
+
+	// 单账号 503 退避重试：原地重试的总累计等待时间上限
+	// 超过此上限将不再重试，直接返回 503
+	antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
 )
 
 // antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单（小写）
@@ -148,6 +166,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 
 	// 情况1: retryDelay >= 阈值，限流模型并切换账号
 	if shouldRateLimitModel {
+		// 单账号 503 退避重试模式：不设限流、不切换账号，改为原地等待+重试
+		// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+		// 多账号场景下切换账号是最优选择，但单账号场景下设限流毫无意义（只会导致双重等待）。
+		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+			return s.handleSingleAccountRetryInPlace(p, resp, respBody, baseURL, waitDuration, modelName)
+		}
+
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -236,7 +261,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 			}
 		}
 
-		// 所有重试都失败，限流当前模型并切换账号
+		// 所有重试都失败
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -245,6 +270,22 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		if retryBody == nil {
 			retryBody = respBody
 		}
+
+		// 单账号 503 退避重试模式：智能重试耗尽后不设限流、不切换账号，
+		// 直接返回 503 让 Handler 层的单账号退避循环做最终处理。
+		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+			log.Printf("%s status=%d smart_retry_exhausted_single_account attempts=%d model=%s account=%d body=%s (return 503 directly)",
+				p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
+			return &smartRetryResult{
+				action: smartRetryActionBreakWithResp,
+				resp: &http.Response{
+					StatusCode: resp.StatusCode,
+					Header:     resp.Header.Clone(),
+					Body:       io.NopCloser(bytes.NewReader(retryBody)),
+				},
+			}
+		}
+
 		log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
 			p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
 
@@ -279,17 +320,152 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	return &smartRetryResult{action: smartRetryActionContinue}
 }
 
+// handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。
+//
+// 在多账号场景下，收到 503 + 长 retryDelay（≥ 7s）时会设置模型限流 + 切换账号；
+// 但在单账号场景下，设限流毫无意义（因为切换回来的还是同一个账号，还要等限流过期）。
+// 此方法改为在 Service 层原地等待 + 重试，避免双重等待问题：
+//
+//	旧流程：Service 设限流 → Handler 退避等待 → Service 等限流过期 → 再请求（总耗时 = 退避 + 限流）
+//	新流程：Service 直接等 retryDelay → 重试 → 成功/再等 → 重试...（总耗时 ≈ 实际 retryDelay × 重试次数）
+//
+// 约束：
+//   - 单次等待不超过 antigravitySingleAccountSmartRetryMaxWait
+//   - 总累计等待不超过 antigravitySingleAccountSmartRetryTotalMaxWait
+//   - 最多重试 antigravitySingleAccountSmartRetryMaxAttempts 次
+func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
+	p antigravityRetryLoopParams,
+	resp *http.Response,
+	respBody []byte,
+	baseURL string,
+	waitDuration time.Duration,
+	modelName string,
+) *smartRetryResult {
+	// 限制单次等待时间
+	if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+		waitDuration = antigravitySingleAccountSmartRetryMaxWait
+	}
+	if waitDuration < antigravitySmartRetryMinWait {
+		waitDuration = antigravitySmartRetryMinWait
+	}
+
+	log.Printf("%s status=%d single_account_503_retry_in_place model=%s account=%d upstream_retry_delay=%v (retrying in-place instead of rate-limiting)",
+		p.prefix, resp.StatusCode, modelName, p.account.ID, waitDuration)
+
+	var lastRetryResp *http.Response
+	var lastRetryBody []byte
+	totalWaited := time.Duration(0)
+
+	for attempt := 1; attempt <= antigravitySingleAccountSmartRetryMaxAttempts; attempt++ {
+		// 检查累计等待是否超限
+		if totalWaited+waitDuration > antigravitySingleAccountSmartRetryTotalMaxWait {
+			remaining := antigravitySingleAccountSmartRetryTotalMaxWait - totalWaited
+			if remaining <= 0 {
+				log.Printf("%s single_account_503_retry: total_wait_exceeded total=%v max=%v, giving up",
+					p.prefix, totalWaited, antigravitySingleAccountSmartRetryTotalMaxWait)
+				break
+			}
+			waitDuration = remaining
+		}
+
+		log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
+			p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
+
+		select {
+		case <-p.ctx.Done():
+			log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+		case <-time.After(waitDuration):
+		}
+		totalWaited += waitDuration
+
+		// 创建新请求
+		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+		if err != nil {
+			log.Printf("%s single_account_503_retry: request_build_failed error=%v", p.prefix, err)
+			break
+		}
+
+		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+		if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+			log.Printf("%s status=%d single_account_503_retry_success attempt=%d/%d total_waited=%v",
+				p.prefix, retryResp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited)
+			// 关闭之前的响应
+			if lastRetryResp != nil {
+				_ = lastRetryResp.Body.Close()
+			}
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+		}
+
+		// 网络错误时继续重试
+		if retryErr != nil || retryResp == nil {
+			log.Printf("%s single_account_503_retry: network_error attempt=%d/%d error=%v",
+				p.prefix, attempt, antigravitySingleAccountSmartRetryMaxAttempts, retryErr)
+			continue
+		}
+
+		// 关闭之前的响应
+		if lastRetryResp != nil {
+			_ = lastRetryResp.Body.Close()
+		}
+		lastRetryResp = retryResp
+		lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+		_ = retryResp.Body.Close()
+
+		// 解析新的重试信息，更新下次等待时间
+		if attempt < antigravitySingleAccountSmartRetryMaxAttempts && lastRetryBody != nil {
+			_, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
+			if newWaitDuration > 0 {
+				waitDuration = newWaitDuration
+				if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+					waitDuration = antigravitySingleAccountSmartRetryMaxWait
+				}
+				if waitDuration < antigravitySmartRetryMinWait {
+					waitDuration = antigravitySmartRetryMinWait
+				}
+			}
+		}
+	}
+
+	// 所有重试都失败，不设限流，直接返回 503
+	// Handler 层的单账号退避循环会做最终处理
+	retryBody := lastRetryBody
+	if retryBody == nil {
+		retryBody = respBody
+	}
+	log.Printf("%s status=%d single_account_503_retry_exhausted attempts=%d total_waited=%v model=%s account=%d body=%s (return 503 directly)",
+		p.prefix, resp.StatusCode, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited, modelName, p.account.ID, truncateForLog(retryBody, 200))
+
+	return &smartRetryResult{
+		action: smartRetryActionBreakWithResp,
+		resp: &http.Response{
+			StatusCode: resp.StatusCode,
+			Header:     resp.Header.Clone(),
+			Body:       io.NopCloser(bytes.NewReader(retryBody)),
+		},
+	}
+}
+
 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
 	// 预检查：如果账号已限流，直接返回切换信号
 	if p.requestedModel != "" {
 		if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
-			log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
-				p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
-			return nil, &AntigravityAccountSwitchError{
-				OriginalAccountID: p.account.ID,
-				RateLimitedModel:  p.requestedModel,
-				IsStickySession:   p.isStickySession,
+			// 单账号 503 退避重试模式：跳过限流预检查，直接发请求。
+			// 首次请求设的限流是为了多账号调度器跳过该账号，在单账号模式下无意义。
+			// 如果上游确实还不可用，handleSmartRetry → handleSingleAccountRetryInPlace
+			// 会在 Service 层原地等待+重试，不需要在预检查这里等。
+			if isSingleAccountRetry(p.ctx) {
+				log.Printf("%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)",
+					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+			} else {
+				log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
+					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+				return nil, &AntigravityAccountSwitchError{
+					OriginalAccountID: p.account.ID,
+					RateLimitedModel:  p.requestedModel,
+					IsStickySession:   p.isStickySession,
+				}
 			}
 		}
 	}
@@ -1943,6 +2119,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
 	}
 }
 
+// isSingleAccountRetry 检查 context 中是否设置了单账号退避重试标记
+func isSingleAccountRetry(ctx context.Context) bool {
+	v, _ := ctx.Value(ctxkey.SingleAccountRetry).(bool)
+	return v
+}
+
 // setModelRateLimitByModelName 使用官方模型 ID 设置模型级限流
 // 直接使用上游返回的模型 ID（如 claude-sonnet-4-5）作为限流 key
 // 返回是否已成功设置（若模型名为空或 repo 为 nil 将返回 false）

From 021abfca181af4f6f52f594200d948de04070119 Mon Sep 17 00:00:00 2001
From: Rose Ding <xiaodingsiren@icloud.com>
Date: Mon, 9 Feb 2026 17:25:36 +0800
Subject: [PATCH 02/16] =?UTF-8?q?fix:=20=E5=8D=95=E8=B4=A6=E5=8F=B7?=
 =?UTF-8?q?=E5=88=86=E7=BB=84=E9=A6=96=E6=AC=A1=20503=20=E4=B8=8D=E8=AE=BE?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=99=90=E6=B5=81=E6=A0=87=E8=AE=B0=EF=BC=8C?=
 =?UTF-8?q?=E9=81=BF=E5=85=8D=E5=90=8E=E7=BB=AD=E8=AF=B7=E6=B1=82=E9=9B=AA?=
 =?UTF-8?q?=E5=B4=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时，
原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换，
后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503，
导致约 30 秒的雪崩窗口。

修复：在 Handler 入口处检查分组是否只有单个 antigravity 账号，
如果是则提前设置 SingleAccountRetry context 标记，让 Service 层
首次 503 就走原地重试逻辑（不设限流标记），避免污染后续请求。
---
 backend/internal/handler/gateway_handler.go       | 14 ++++++++++++++
 backend/internal/handler/gemini_v1beta_handler.go |  7 +++++++
 backend/internal/service/gateway_service.go       | 11 +++++++++++
 3 files changed, 32 insertions(+)

diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index 82181948..2b3703b4 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -238,6 +238,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		var lastFailoverErr *service.UpstreamFailoverError
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 
+		// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+		// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+		if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+			ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+			c.Request = c.Request.WithContext(ctx)
+		}
+
 		for {
 			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
 			if err != nil {
@@ -409,6 +416,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}
 	fallbackUsed := false
 
+	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
+		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+		c.Request = c.Request.WithContext(ctx)
+	}
+
 	for {
 		maxAccountSwitches := h.maxAccountSwitches
 		switchCount := 0
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index 2b67cb1f..f8fb0dcb 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 	var lastFailoverErr *service.UpstreamFailoverError
 	var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 
+	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+		c.Request = c.Request.WithContext(ctx)
+	}
+
 	for {
 		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
 		if err != nil {
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 4e723232..2c04ae14 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -1683,6 +1683,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i
 	return accounts, useMixed, nil
 }
 
+// IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。
+// 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context，
+// 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。
+func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool {
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true)
+	if err != nil {
+		return false
+	}
+	return len(accounts) == 1
+}
+
 func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool {
 	if account == nil {
 		return false

From 4a84ca9a02e1f11b64cd849e3f7a274b3e18b056 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Mon, 9 Feb 2026 20:08:00 +0800
Subject: [PATCH 03/16] fix: support clearing model-level rate limits from
 action menu and temp-unsched reset

---
 backend/internal/service/ratelimit_service.go      |  4 ++++
 .../components/admin/account/AccountActionMenu.vue | 14 +++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go
index 63732dee..12c48ab8 100644
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -623,6 +623,10 @@ func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID
 			slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
 		}
 	}
+	// 同时清除模型级别限流
+	if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
+		slog.Warn("clear_model_rate_limits_on_temp_unsched_reset_failed", "account_id", accountID, "error", err)
+	}
 	return nil
 }
 
diff --git a/frontend/src/components/admin/account/AccountActionMenu.vue b/frontend/src/components/admin/account/AccountActionMenu.vue
index bb753faa..2325f4b4 100644
--- a/frontend/src/components/admin/account/AccountActionMenu.vue
+++ b/frontend/src/components/admin/account/AccountActionMenu.vue
@@ -53,7 +53,19 @@ import type { Account } from '@/types'
 const props = defineProps<{ show: boolean; account: Account | null; position: { top: number; left: number } | null }>()
 const emit = defineEmits(['close', 'test', 'stats', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit'])
 const { t } = useI18n()
-const isRateLimited = computed(() => props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date())
+const isRateLimited = computed(() => {
+  if (props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) {
+    return true
+  }
+  const modelLimits = (props.account?.extra as Record<string, unknown> | undefined)?.model_rate_limits as
+    | Record<string, { rate_limit_reset_at: string }>
+    | undefined
+  if (modelLimits) {
+    const now = new Date()
+    return Object.values(modelLimits).some(info => new Date(info.rate_limit_reset_at) > now)
+  }
+  return false
+})
 const isOverloaded = computed(() => props.account?.overload_until && new Date(props.account.overload_until) > new Date())
 
 const handleKeydown = (event: KeyboardEvent) => {

From e4bc35151f4feb41e8283fda1dd8e074a20289a0 Mon Sep 17 00:00:00 2001
From: Rose Ding <xiaodingsiren@icloud.com>
Date: Mon, 9 Feb 2026 22:06:06 +0800
Subject: [PATCH 04/16] =?UTF-8?q?test:=20=E6=B7=BB=E5=8A=A0=E5=8D=95?=
 =?UTF-8?q?=E8=B4=A6=E5=8F=B7=20503=20=E9=80=80=E9=81=BF=E9=87=8D=E8=AF=95?=
 =?UTF-8?q?=E6=9C=BA=E5=88=B6=E7=9A=84=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

覆盖 Service 层和 Handler 层的所有新增逻辑：
- isSingleAccountRetry context 标记检查
- handleSmartRetry 中 503 + SingleAccountRetry 分支
- handleSingleAccountRetryInPlace 原地重试逻辑
- antigravityRetryLoop 预检查跳过限流
- sleepAntigravitySingleAccountBackoff 固定延迟退避
- 端到端集成场景验证

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...teway_handler_single_account_retry_test.go |  51 +
 .../antigravity_single_account_retry_test.go  | 904 ++++++++++++++++++
 2 files changed, 955 insertions(+)
 create mode 100644 backend/internal/handler/gateway_handler_single_account_retry_test.go
 create mode 100644 backend/internal/service/antigravity_single_account_retry_test.go

diff --git a/backend/internal/handler/gateway_handler_single_account_retry_test.go b/backend/internal/handler/gateway_handler_single_account_retry_test.go
new file mode 100644
index 00000000..96aa14c6
--- /dev/null
+++ b/backend/internal/handler/gateway_handler_single_account_retry_test.go
@@ -0,0 +1,51 @@
+package handler
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// sleepAntigravitySingleAccountBackoff 测试
+// ---------------------------------------------------------------------------
+
+func TestSleepAntigravitySingleAccountBackoff_ReturnsTrue(t *testing.T) {
+	ctx := context.Background()
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+	elapsed := time.Since(start)
+
+	require.True(t, ok, "should return true when context is not canceled")
+	// 固定延迟 2s
+	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond, "should wait approximately 2s")
+	require.Less(t, elapsed, 5*time.Second, "should not wait too long")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_ContextCanceled(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // 立即取消
+
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+	elapsed := time.Since(start)
+
+	require.False(t, ok, "should return false when context is canceled")
+	require.Less(t, elapsed, 500*time.Millisecond, "should return immediately on cancel")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_FixedDelay(t *testing.T) {
+	// 验证不同 retryCount 都使用固定 2s 延迟
+	ctx := context.Background()
+
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 5)
+	elapsed := time.Since(start)
+
+	require.True(t, ok)
+	// 即使 retryCount=5，延迟仍然是固定的 2s
+	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond)
+	require.Less(t, elapsed, 5*time.Second)
+}
diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go
new file mode 100644
index 00000000..0950b728
--- /dev/null
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -0,0 +1,904 @@
+//go:build unit
+
+package service
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// 辅助函数：构造带 SingleAccountRetry 标记的 context
+// ---------------------------------------------------------------------------
+
+func ctxWithSingleAccountRetry() context.Context {
+	return context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+}
+
+// ---------------------------------------------------------------------------
+// 1. isSingleAccountRetry 测试
+// ---------------------------------------------------------------------------
+
+func TestIsSingleAccountRetry_True(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+	require.True(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_NoValue(t *testing.T) {
+	require.False(t, isSingleAccountRetry(context.Background()))
+}
+
+func TestIsSingleAccountRetry_False_ExplicitFalse(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, false)
+	require.False(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_WrongType(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, "true")
+	require.False(t, isSingleAccountRetry(ctx))
+}
+
+// ---------------------------------------------------------------------------
+// 2. 常量验证
+// ---------------------------------------------------------------------------
+
+func TestSingleAccountRetryConstants(t *testing.T) {
+	require.Equal(t, 3, antigravitySingleAccountSmartRetryMaxAttempts,
+		"单账号原地重试最多 3 次")
+	require.Equal(t, 15*time.Second, antigravitySingleAccountSmartRetryMaxWait,
+		"单次最大等待 15s")
+	require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait,
+		"总累计等待不超过 30s")
+	require.Equal(t, 30*time.Second, antigravitySingleAccountMaxWait,
+		"预检查最大等待 30s")
+}
+
+// ---------------------------------------------------------------------------
+// 3. handleSmartRetry + 503 + SingleAccountRetry → 走 handleSingleAccountRetryInPlace
+//    （而非设模型限流 + 切换账号）
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace
+// 核心场景：503 + retryDelay >= 7s + SingleAccountRetry 标记
+// → 不设模型限流、不切换账号，改为原地重试
+func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testing.T) {
+	// 原地重试成功
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:          1,
+		Name:        "acc-single",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	// 503 + 39s >= 7s 阈值 + MODEL_CAPACITY_EXHAUSTED
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+			],
+			"message": "No capacity available for model gemini-3-pro-high on the server"
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(), // 关键：设置单账号标记
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键断言：返回 resp（原地重试成功），而非 switchError（切换账号）
+	require.NotNil(t, result.resp, "should return successful response from in-place retry")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT return switchError in single account mode")
+	require.Nil(t, result.err)
+
+	// 验证未设模型限流（单账号模式不应设限流）
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit in single account retry mode")
+
+	// 验证确实调用了 upstream（原地重试）
+	require.GreaterOrEqual(t, len(upstream.calls), 1, "should have made at least one retry call")
+}
+
+// TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches
+// 对照组：503 + retryDelay >= 7s + 无 SingleAccountRetry 标记
+// → 照常设模型限流 + 切换账号
+func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       2,
+		Name:     "acc-multi",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 503 + 39s >= 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:         context.Background(), // 关键：无单账号标记
+		prefix:      "[test]",
+		account:     account,
+		accessToken: "token",
+		action:      "generateContent",
+		body:        []byte(`{"input":"test"}`),
+		accountRepo: repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 对照：多账号模式返回 switchError
+	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+	require.Nil(t, result.resp, "should not return resp when switchError is set")
+
+	// 对照：多账号模式应设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"multi-account mode SHOULD set model rate limit")
+}
+
+// TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches
+// 边界情况：429（非 503）+ SingleAccountRetry 标记
+// → 单账号原地重试仅针对 503，429 依然走切换账号逻辑
+func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       3,
+		Name:     "acc-429",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 429 + 15s >= 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"status": "RESOURCE_EXHAUSTED",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusTooManyRequests, // 429，不是 503
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:         ctxWithSingleAccountRetry(), // 有单账号标记
+		prefix:      "[test]",
+		account:     account,
+		accessToken: "token",
+		action:      "generateContent",
+		body:        []byte(`{"input":"test"}`),
+		accountRepo: repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 429 即使有单账号标记，也应走切换账号
+	require.NotNil(t, result.switchError, "429 should still return switchError even with SingleAccountRetry")
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"429 should still set model rate limit even with SingleAccountRetry")
+}
+
+// ---------------------------------------------------------------------------
+// 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
+// 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503，不设限流
+func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
+	// 智能重试也返回 503
+	failRespBody := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	failResp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(failRespBody)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{failResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       4,
+		Name:     "acc-short-503",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 0.1s < 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键断言：单账号 503 模式下，智能重试耗尽后直接返回 503 响应，不切换
+	require.NotNil(t, result.resp, "should return 503 response directly for single account mode")
+	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT switch account in single account mode")
+
+	// 关键断言：不设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit for 503 in single account mode")
+}
+
+// TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit
+// 对照组：503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流
+func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) {
+	failRespBody := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	failResp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(failRespBody)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{failResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       5,
+		Name:     "acc-multi-503",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          context.Background(), // 无单账号标记
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 对照：多账号模式应返回 switchError
+	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+	// 对照：多账号模式应设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"multi-account mode should set model rate limit")
+}
+
+// ---------------------------------------------------------------------------
+// 5. handleSingleAccountRetryInPlace 直接测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSingleAccountRetryInPlace_Success 原地重试成功
+func TestHandleSingleAccountRetryInPlace_Success(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          10,
+		Name:        "acc-inplace-ok",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should not switch account on success")
+	require.Nil(t, result.err)
+}
+
+// TestHandleSingleAccountRetryInPlace_AllRetriesFail 所有重试都失败，返回 503（不设限流）
+func TestHandleSingleAccountRetryInPlace_AllRetriesFail(t *testing.T) {
+	// 构造 3 个 503 响应（对应 3 次原地重试）
+	var responses []*http.Response
+	var errors []error
+	for i := 0; i < antigravitySingleAccountSmartRetryMaxAttempts; i++ {
+		responses = append(responses, &http.Response{
+			StatusCode: http.StatusServiceUnavailable,
+			Header:     http.Header{},
+			Body: io.NopCloser(strings.NewReader(`{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+					]
+				}
+			}`)),
+		})
+		errors = append(errors, nil)
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: responses,
+		errors:    errors,
+	}
+
+	account := &Account{
+		ID:          11,
+		Name:        "acc-inplace-fail",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	origBody := []byte(`{"error":{"code":503,"status":"UNAVAILABLE"}}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{"X-Test": {"original"}},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, origBody, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键：返回 503 resp，不返回 switchError
+	require.NotNil(t, result.resp, "should return 503 response directly")
+	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT return switchError - let Handler handle it")
+	require.Nil(t, result.err)
+
+	// 验证确实重试了指定次数
+	require.Len(t, upstream.calls, antigravitySingleAccountSmartRetryMaxAttempts,
+		"should have made exactly maxAttempts retry calls")
+}
+
+// TestHandleSingleAccountRetryInPlace_WaitDurationClamped 等待时间被限制在 [min, max] 范围
+func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) {
+	// 用短延迟的成功响应，只验证不 panic
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          12,
+		Name:        "acc-clamp",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+
+	// 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro")
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp)
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+}
+
+// TestHandleSingleAccountRetryInPlace_ContextCanceled context 取消时立即返回
+func TestHandleSingleAccountRetryInPlace_ContextCanceled(t *testing.T) {
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{nil},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          13,
+		Name:        "acc-cancel",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	ctx = context.WithValue(ctx, ctxkey.SingleAccountRetry, true)
+	cancel() // 立即取消
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctx,
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.Error(t, result.err, "should return context error")
+	// 不应调用 upstream（因为在等待阶段就被取消了）
+	require.Len(t, upstream.calls, 0, "should not call upstream when context is canceled")
+}
+
+// TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry 网络错误时继续重试
+func TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		// 第1次网络错误（nil resp），第2次成功
+		responses: []*http.Response{nil, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	account := &Account{
+		ID:          14,
+		Name:        "acc-net-retry",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response after network error recovery")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Len(t, upstream.calls, 2, "first call fails (network error), second succeeds")
+}
+
+// ---------------------------------------------------------------------------
+// 6. antigravityRetryLoop 预检查：单账号模式跳过限流
+// ---------------------------------------------------------------------------
+
+// TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit
+// 预检查中，如果有 SingleAccountRetry 标记，即使账号已限流也跳过直接发请求
+func TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit(t *testing.T) {
+	// 创建一个已设模型限流的账号
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          20,
+		Name:        "acc-rate-limited",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            ctxWithSingleAccountRetry(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err, "should not return error")
+	require.NotNil(t, result, "should return result")
+	require.NotNil(t, result.resp, "should have response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	// 关键：尽管限流了，有 SingleAccountRetry 标记时仍然到达了 upstream
+	require.Equal(t, 1, upstream.calls, "should have reached upstream despite rate limit")
+}
+
+// TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit
+// 对照组：无 SingleAccountRetry + 已限流 → 预检查返回 switchError
+func TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit(t *testing.T) {
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          21,
+		Name:        "acc-rate-limited-multi",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            context.Background(), // 无单账号标记
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.Nil(t, result, "should not return result on rate limit switch")
+	require.NotNil(t, err, "should return error")
+
+	var switchErr *AntigravityAccountSwitchError
+	require.ErrorAs(t, err, &switchErr, "should return AntigravityAccountSwitchError")
+	require.Equal(t, account.ID, switchErr.OriginalAccountID)
+	require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
+
+	// upstream 不应被调用（预检查就短路了）
+	require.Equal(t, 0, upstream.calls, "upstream should NOT be called when pre-check blocks")
+}
+
+// ---------------------------------------------------------------------------
+// 7. 端到端集成场景测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E
+// 端到端场景：503 + 单账号 + 原地重试第2次成功
+func TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E(t *testing.T) {
+	// 第1次原地重试仍返回 503，第2次成功
+	fail503Body := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	resp503 := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(fail503Body)),
+	}
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{resp503, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	account := &Account{
+		ID:          30,
+		Name:        "acc-e2e",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response after 2nd attempt")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError)
+	require.Len(t, upstream.calls, 2, "first 503, second OK")
+}
+
+// TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E
+// 通过 antigravityRetryLoop → handleSmartRetry → handleSingleAccountRetryInPlace 完整链路
+func TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E(t *testing.T) {
+	// 初始请求返回 503 + 长延迟
+	initial503Body := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "10s"}
+			],
+			"message": "No capacity available"
+		}
+	}`)
+	initial503Resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(initial503Body)),
+	}
+
+	// 原地重试成功
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+
+	upstream := &mockSmartRetryUpstream{
+		// 第1次调用（retryLoop 主循环）返回 503
+		// 第2次调用（handleSingleAccountRetryInPlace 原地重试）返回 200
+		responses: []*http.Response{initial503Resp, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:          31,
+		Name:        "acc-e2e-loop",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err, "should not return error on successful retry")
+	require.NotNil(t, result, "should return result")
+	require.NotNil(t, result.resp, "should return response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+
+	// 验证未设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit in single account retry mode")
+}

From 18b591bc3b0e38448d24413063ae9e0915402601 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Mon, 9 Feb 2026 22:13:44 +0800
Subject: [PATCH 05/16] feat: Antigravity extra failover retries after default
 retries exhausted

When default failover retries are exhausted, continue retrying with
Antigravity accounts only (up to 10 times, configurable via
GATEWAY_ANTIGRAVITY_EXTRA_RETRIES). Each extra retry uses a fixed
500ms delay. Non-Antigravity accounts are skipped during the extra
retry phase. Applied to all three endpoints: Gemini compat, Claude,
and Gemini native API paths.
---
 backend/internal/config/config.go             |   4 +
 backend/internal/handler/gateway_handler.go   |  65 ++-
 .../gateway_handler_extra_retry_test.go       | 417 ++++++++++++++++++
 .../internal/handler/gemini_v1beta_handler.go |  26 +-
 4 files changed, 504 insertions(+), 8 deletions(-)
 create mode 100644 backend/internal/handler/gateway_handler_extra_retry_test.go

diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go
index 91437ba8..460bd05d 100644
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -279,6 +279,9 @@ type GatewayConfig struct {
 	// Antigravity 429 fallback 限流时间（分钟），解析重置时间失败时使用
 	AntigravityFallbackCooldownMinutes int `mapstructure:"antigravity_fallback_cooldown_minutes"`
 
+	// 默认重试用完后，额外使用 Antigravity 账号重试的最大次数（0 表示禁用）
+	AntigravityExtraRetries int `mapstructure:"antigravity_extra_retries"`
+
 	// Scheduling: 账号调度相关配置
 	Scheduling GatewaySchedulingConfig `mapstructure:"scheduling"`
 
@@ -883,6 +886,7 @@ func setDefaults() {
 	viper.SetDefault("gateway.max_account_switches", 10)
 	viper.SetDefault("gateway.max_account_switches_gemini", 3)
 	viper.SetDefault("gateway.antigravity_fallback_cooldown_minutes", 1)
+	viper.SetDefault("gateway.antigravity_extra_retries", 10)
 	viper.SetDefault("gateway.max_body_size", int64(100*1024*1024))
 	viper.SetDefault("gateway.connection_pool_isolation", ConnectionPoolIsolationAccountProxy)
 	// HTTP 上游连接池配置（针对 5000+ 并发用户优化）
diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index 6900fa55..361cd8b5 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -39,6 +39,7 @@ type GatewayHandler struct {
 	concurrencyHelper         *ConcurrencyHelper
 	maxAccountSwitches        int
 	maxAccountSwitchesGemini  int
+	antigravityExtraRetries   int
 }
 
 // NewGatewayHandler creates a new GatewayHandler
@@ -57,6 +58,7 @@ func NewGatewayHandler(
 	pingInterval := time.Duration(0)
 	maxAccountSwitches := 10
 	maxAccountSwitchesGemini := 3
+	antigravityExtraRetries := 10
 	if cfg != nil {
 		pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
 		if cfg.Gateway.MaxAccountSwitches > 0 {
@@ -65,6 +67,7 @@ func NewGatewayHandler(
 		if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
 			maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
 		}
+		antigravityExtraRetries = cfg.Gateway.AntigravityExtraRetries
 	}
 	return &GatewayHandler{
 		gatewayService:            gatewayService,
@@ -78,6 +81,7 @@ func NewGatewayHandler(
 		concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
 		maxAccountSwitches:        maxAccountSwitches,
 		maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
+		antigravityExtraRetries:   antigravityExtraRetries,
 	}
 }
 
@@ -234,6 +238,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	if platform == service.PlatformGemini {
 		maxAccountSwitches := h.maxAccountSwitchesGemini
 		switchCount := 0
+		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
 		var lastFailoverErr *service.UpstreamFailoverError
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
@@ -255,6 +260,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			account := selection.Account
 			setOpsSelectedAccount(c, account.ID)
 
+			// 额外重试阶段：跳过非 Antigravity 账号
+			if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
+				failedAccountIDs[account.ID] = struct{}{}
+				if selection.Acquired && selection.ReleaseFunc != nil {
+					selection.ReleaseFunc()
+				}
+				continue
+			}
+
 			// 检查请求拦截（预热请求、SUGGESTION MODE等）
 			if account.IsInterceptWarmupEnabled() {
 				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -345,8 +359,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 						forceCacheBilling = true
 					}
 					if switchCount >= maxAccountSwitches {
-						h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
-						return
+						// 默认重试用完，进入 Antigravity 额外重试
+						antigravityExtraCount++
+						if antigravityExtraCount > h.antigravityExtraRetries {
+							h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
+							return
+						}
+						log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
+						if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
+							return
+						}
+						continue
 					}
 					switchCount++
 					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -399,6 +422,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	for {
 		maxAccountSwitches := h.maxAccountSwitches
 		switchCount := 0
+		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
 		var lastFailoverErr *service.UpstreamFailoverError
 		retryWithFallback := false
@@ -422,6 +446,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			account := selection.Account
 			setOpsSelectedAccount(c, account.ID)
 
+			// 额外重试阶段：跳过非 Antigravity 账号
+			if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
+				failedAccountIDs[account.ID] = struct{}{}
+				if selection.Acquired && selection.ReleaseFunc != nil {
+					selection.ReleaseFunc()
+				}
+				continue
+			}
+
 			// 检查请求拦截（预热请求、SUGGESTION MODE等）
 			if account.IsInterceptWarmupEnabled() {
 				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -545,8 +578,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 						forceCacheBilling = true
 					}
 					if switchCount >= maxAccountSwitches {
-						h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
-						return
+						// 默认重试用完，进入 Antigravity 额外重试
+						antigravityExtraCount++
+						if antigravityExtraCount > h.antigravityExtraRetries {
+							h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
+							return
+						}
+						log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
+						if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
+							return
+						}
+						continue
 					}
 					switchCount++
 					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -838,6 +880,21 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
 	}
 }
 
+const antigravityExtraRetryDelay = 500 * time.Millisecond
+
+// sleepFixedDelay 固定延时等待，返回 false 表示 context 已取消。
+func sleepFixedDelay(ctx context.Context, delay time.Duration) bool {
+	if delay <= 0 {
+		return true
+	}
+	select {
+	case <-ctx.Done():
+		return false
+	case <-time.After(delay):
+		return true
+	}
+}
+
 func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 	statusCode := failoverErr.StatusCode
 	responseBody := failoverErr.ResponseBody
diff --git a/backend/internal/handler/gateway_handler_extra_retry_test.go b/backend/internal/handler/gateway_handler_extra_retry_test.go
new file mode 100644
index 00000000..a0777941
--- /dev/null
+++ b/backend/internal/handler/gateway_handler_extra_retry_test.go
@@ -0,0 +1,417 @@
+//go:build unit
+
+package handler
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+// --- sleepFixedDelay ---
+
+func TestSleepFixedDelay_ZeroDelay(t *testing.T) {
+	got := sleepFixedDelay(context.Background(), 0)
+	require.True(t, got, "zero delay should return true immediately")
+}
+
+func TestSleepFixedDelay_NegativeDelay(t *testing.T) {
+	got := sleepFixedDelay(context.Background(), -1*time.Second)
+	require.True(t, got, "negative delay should return true immediately")
+}
+
+func TestSleepFixedDelay_NormalDelay(t *testing.T) {
+	start := time.Now()
+	got := sleepFixedDelay(context.Background(), 50*time.Millisecond)
+	elapsed := time.Since(start)
+	require.True(t, got, "normal delay should return true")
+	require.GreaterOrEqual(t, elapsed, 40*time.Millisecond, "should sleep at least ~50ms")
+}
+
+func TestSleepFixedDelay_ContextCancelled(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel immediately
+	got := sleepFixedDelay(ctx, 10*time.Second)
+	require.False(t, got, "cancelled context should return false")
+}
+
+func TestSleepFixedDelay_ContextTimeout(t *testing.T) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
+	defer cancel()
+	got := sleepFixedDelay(ctx, 5*time.Second)
+	require.False(t, got, "context timeout should return false before delay completes")
+}
+
+// --- antigravityExtraRetryDelay constant ---
+
+func TestAntigravityExtraRetryDelayValue(t *testing.T) {
+	require.Equal(t, 500*time.Millisecond, antigravityExtraRetryDelay)
+}
+
+// --- NewGatewayHandler antigravityExtraRetries field ---
+
+func TestNewGatewayHandler_AntigravityExtraRetries_Default(t *testing.T) {
+	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
+	require.Equal(t, 10, h.antigravityExtraRetries, "default should be 10 when cfg is nil")
+}
+
+func TestNewGatewayHandler_AntigravityExtraRetries_FromConfig(t *testing.T) {
+	cfg := &config.Config{
+		Gateway: config.GatewayConfig{
+			AntigravityExtraRetries: 5,
+		},
+	}
+	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg)
+	require.Equal(t, 5, h.antigravityExtraRetries, "should use config value")
+}
+
+func TestNewGatewayHandler_AntigravityExtraRetries_ZeroDisables(t *testing.T) {
+	cfg := &config.Config{
+		Gateway: config.GatewayConfig{
+			AntigravityExtraRetries: 0,
+		},
+	}
+	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg)
+	require.Equal(t, 0, h.antigravityExtraRetries, "zero should disable extra retries")
+}
+
+// --- handleFailoverAllAccountsExhausted (renamed: using handleFailoverExhausted) ---
+// We test the error response format helpers that the extra retry path uses.
+
+func TestHandleFailoverExhausted_JSON(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	h := &GatewayHandler{}
+	failoverErr := &service.UpstreamFailoverError{StatusCode: 429}
+	h.handleFailoverExhausted(c, failoverErr, service.PlatformAntigravity, false)
+
+	require.Equal(t, http.StatusTooManyRequests, rec.Code)
+
+	var body map[string]any
+	err := json.Unmarshal(rec.Body.Bytes(), &body)
+	require.NoError(t, err)
+	errObj, ok := body["error"].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "rate_limit_error", errObj["type"])
+}
+
+func TestHandleFailoverExhaustedSimple_JSON(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	h := &GatewayHandler{}
+	h.handleFailoverExhaustedSimple(c, 502, false)
+
+	require.Equal(t, http.StatusBadGateway, rec.Code)
+
+	var body map[string]any
+	err := json.Unmarshal(rec.Body.Bytes(), &body)
+	require.NoError(t, err)
+	errObj, ok := body["error"].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "upstream_error", errObj["type"])
+}
+
+// --- Extra retry platform filter logic ---
+
+func TestExtraRetryPlatformFilter(t *testing.T) {
+	tests := []struct {
+		name             string
+		switchCount      int
+		maxAccountSwitch int
+		platform         string
+		expectSkip       bool
+	}{
+		{
+			name:             "default_retry_phase_antigravity_not_skipped",
+			switchCount:      1,
+			maxAccountSwitch: 3,
+			platform:         service.PlatformAntigravity,
+			expectSkip:       false,
+		},
+		{
+			name:             "default_retry_phase_gemini_not_skipped",
+			switchCount:      1,
+			maxAccountSwitch: 3,
+			platform:         service.PlatformGemini,
+			expectSkip:       false,
+		},
+		{
+			name:             "extra_retry_phase_antigravity_not_skipped",
+			switchCount:      3,
+			maxAccountSwitch: 3,
+			platform:         service.PlatformAntigravity,
+			expectSkip:       false,
+		},
+		{
+			name:             "extra_retry_phase_gemini_skipped",
+			switchCount:      3,
+			maxAccountSwitch: 3,
+			platform:         service.PlatformGemini,
+			expectSkip:       true,
+		},
+		{
+			name:             "extra_retry_phase_anthropic_skipped",
+			switchCount:      3,
+			maxAccountSwitch: 3,
+			platform:         service.PlatformAnthropic,
+			expectSkip:       true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Replicate the filter condition from the handler
+			shouldSkip := tt.switchCount >= tt.maxAccountSwitch && tt.platform != service.PlatformAntigravity
+			require.Equal(t, tt.expectSkip, shouldSkip)
+		})
+	}
+}
+
+// --- Extra retry counter logic ---
+
+func TestExtraRetryCounterExhaustion(t *testing.T) {
+	tests := []struct {
+		name               string
+		maxExtraRetries    int
+		currentExtraCount  int
+		expectExhausted    bool
+	}{
+		{
+			name:              "first_extra_retry",
+			maxExtraRetries:   10,
+			currentExtraCount: 1,
+			expectExhausted:   false,
+		},
+		{
+			name:              "at_limit",
+			maxExtraRetries:   10,
+			currentExtraCount: 10,
+			expectExhausted:   false,
+		},
+		{
+			name:              "exceeds_limit",
+			maxExtraRetries:   10,
+			currentExtraCount: 11,
+			expectExhausted:   true,
+		},
+		{
+			name:              "zero_disables_extra_retry",
+			maxExtraRetries:   0,
+			currentExtraCount: 1,
+			expectExhausted:   true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Replicate the exhaustion condition: antigravityExtraCount > h.antigravityExtraRetries
+			exhausted := tt.currentExtraCount > tt.maxExtraRetries
+			require.Equal(t, tt.expectExhausted, exhausted)
+		})
+	}
+}
+
+// --- mapUpstreamError (used by handleFailoverExhausted) ---
+
+func TestMapUpstreamError(t *testing.T) {
+	h := &GatewayHandler{}
+	tests := []struct {
+		name           string
+		statusCode     int
+		expectedStatus int
+		expectedType   string
+	}{
+		{"429", 429, http.StatusTooManyRequests, "rate_limit_error"},
+		{"529", 529, http.StatusServiceUnavailable, "overloaded_error"},
+		{"500", 500, http.StatusBadGateway, "upstream_error"},
+		{"502", 502, http.StatusBadGateway, "upstream_error"},
+		{"503", 503, http.StatusBadGateway, "upstream_error"},
+		{"504", 504, http.StatusBadGateway, "upstream_error"},
+		{"401", 401, http.StatusBadGateway, "upstream_error"},
+		{"403", 403, http.StatusBadGateway, "upstream_error"},
+		{"unknown", 418, http.StatusBadGateway, "upstream_error"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			status, errType, _ := h.mapUpstreamError(tt.statusCode)
+			require.Equal(t, tt.expectedStatus, status)
+			require.Equal(t, tt.expectedType, errType)
+		})
+	}
+}
+
+// --- Gemini native path: handleGeminiFailoverExhausted ---
+
+func TestHandleGeminiFailoverExhausted_NilError(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	h := &GatewayHandler{}
+	h.handleGeminiFailoverExhausted(c, nil)
+
+	require.Equal(t, http.StatusBadGateway, rec.Code)
+	var body map[string]any
+	err := json.Unmarshal(rec.Body.Bytes(), &body)
+	require.NoError(t, err)
+	errObj, ok := body["error"].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "Upstream request failed", errObj["message"])
+}
+
+func TestHandleGeminiFailoverExhausted_429(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	h := &GatewayHandler{}
+	failoverErr := &service.UpstreamFailoverError{StatusCode: 429}
+	h.handleGeminiFailoverExhausted(c, failoverErr)
+
+	require.Equal(t, http.StatusTooManyRequests, rec.Code)
+}
+
+// --- handleStreamingAwareError streaming mode ---
+
+func TestHandleStreamingAwareError_StreamStarted(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	// Simulate stream already started: set content type and write initial data
+	c.Writer.Header().Set("Content-Type", "text/event-stream")
+	c.Writer.WriteHeaderNow()
+
+	h := &GatewayHandler{}
+	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "test error", true)
+
+	body := rec.Body.String()
+	require.Contains(t, body, "rate_limit_error")
+	require.Contains(t, body, "test error")
+	require.Contains(t, body, "data: ")
+}
+
+func TestHandleStreamingAwareError_NotStreaming(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	h := &GatewayHandler{}
+	h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "no model", false)
+
+	require.Equal(t, http.StatusServiceUnavailable, rec.Code)
+	var body map[string]any
+	err := json.Unmarshal(rec.Body.Bytes(), &body)
+	require.NoError(t, err)
+	errObj, ok := body["error"].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "api_error", errObj["type"])
+	require.Equal(t, "no model", errObj["message"])
+}
+
+// --- Integration: extra retry flow simulation ---
+
+func TestExtraRetryFlowSimulation(t *testing.T) {
+	// Simulate the full extra retry flow logic
+	maxAccountSwitches := 3
+	maxExtraRetries := 2
+	switchCount := 0
+	antigravityExtraCount := 0
+
+	type attempt struct {
+		platform   string
+		isFailover bool
+	}
+
+	// Simulate: 3 default retries (all fail), then 2 extra retries (all fail), then exhausted
+	attempts := []attempt{
+		{service.PlatformAntigravity, true},  // switchCount 0 -> 1
+		{service.PlatformGemini, true},        // switchCount 1 -> 2
+		{service.PlatformAntigravity, true},  // switchCount 2 -> 3 (reaches max)
+		{service.PlatformAntigravity, true},  // extra retry 1
+		{service.PlatformAntigravity, true},  // extra retry 2
+		{service.PlatformAntigravity, true},  // extra retry 3 -> exhausted
+	}
+
+	var exhausted bool
+	var skipped int
+
+	for _, a := range attempts {
+		if exhausted {
+			break
+		}
+
+		// Extra retry phase: skip non-Antigravity
+		if switchCount >= maxAccountSwitches && a.platform != service.PlatformAntigravity {
+			skipped++
+			continue
+		}
+
+		if a.isFailover {
+			if switchCount >= maxAccountSwitches {
+				antigravityExtraCount++
+				if antigravityExtraCount > maxExtraRetries {
+					exhausted = true
+					continue
+				}
+				// extra retry delay + continue
+				continue
+			}
+			switchCount++
+		}
+	}
+
+	require.Equal(t, 3, switchCount, "should have 3 default retries")
+	require.Equal(t, 3, antigravityExtraCount, "counter incremented 3 times")
+	require.True(t, exhausted, "should be exhausted after exceeding max extra retries")
+	require.Equal(t, 0, skipped, "no non-antigravity accounts in this simulation")
+}
+
+func TestExtraRetryFlowSimulation_SkipsNonAntigravity(t *testing.T) {
+	maxAccountSwitches := 2
+	switchCount := 2 // already past default retries
+	antigravityExtraCount := 0
+	maxExtraRetries := 5
+
+	type accountSelection struct {
+		platform string
+	}
+
+	selections := []accountSelection{
+		{service.PlatformGemini},       // should be skipped
+		{service.PlatformAnthropic},    // should be skipped
+		{service.PlatformAntigravity},  // should be attempted
+	}
+
+	var skippedCount int
+	var attemptedCount int
+
+	for _, sel := range selections {
+		if switchCount >= maxAccountSwitches && sel.platform != service.PlatformAntigravity {
+			skippedCount++
+			continue
+		}
+		// Simulate failover
+		antigravityExtraCount++
+		if antigravityExtraCount > maxExtraRetries {
+			break
+		}
+		attemptedCount++
+	}
+
+	require.Equal(t, 2, skippedCount, "gemini and anthropic accounts should be skipped")
+	require.Equal(t, 1, attemptedCount, "only antigravity account should be attempted")
+	require.Equal(t, 1, antigravityExtraCount)
+}
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index d5149f22..5a576ab0 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -323,6 +323,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 
 	maxAccountSwitches := h.maxAccountSwitchesGemini
 	switchCount := 0
+	antigravityExtraCount := 0
 	failedAccountIDs := make(map[int64]struct{})
 	var lastFailoverErr *service.UpstreamFailoverError
 	var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
@@ -340,6 +341,15 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 		account := selection.Account
 		setOpsSelectedAccount(c, account.ID)
 
+		// 额外重试阶段：跳过非 Antigravity 账号
+		if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
+			failedAccountIDs[account.ID] = struct{}{}
+			if selection.Acquired && selection.ReleaseFunc != nil {
+				selection.ReleaseFunc()
+			}
+			continue
+		}
+
 		// 检测账号切换：如果粘性会话绑定的账号与当前选择的账号不同，清除 thoughtSignature
 		// 注意：Gemini 原生 API 的 thoughtSignature 与具体上游账号强相关；跨账号透传会导致 400。
 		if sessionBoundAccountID > 0 && sessionBoundAccountID != account.ID {
@@ -424,15 +434,23 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 			var failoverErr *service.UpstreamFailoverError
 			if errors.As(err, &failoverErr) {
 				failedAccountIDs[account.ID] = struct{}{}
+				lastFailoverErr = failoverErr
 				if needForceCacheBilling(hasBoundSession, failoverErr) {
 					forceCacheBilling = true
 				}
 				if switchCount >= maxAccountSwitches {
-					lastFailoverErr = failoverErr
-					h.handleGeminiFailoverExhausted(c, lastFailoverErr)
-					return
+					// 默认重试用完，进入 Antigravity 额外重试
+					antigravityExtraCount++
+					if antigravityExtraCount > h.antigravityExtraRetries {
+						h.handleGeminiFailoverExhausted(c, failoverErr)
+						return
+					}
+					log.Printf("Gemini account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
+					if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
+						return
+					}
+					continue
 				}
-				lastFailoverErr = failoverErr
 				switchCount++
 				log.Printf("Gemini account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 				if account.Platform == service.PlatformAntigravity {

From aa4b102108a4e7f1ecc19f4d3179c13393e4be31 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Mon, 9 Feb 2026 22:04:19 +0800
Subject: [PATCH 06/16] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4Antigravity?=
 =?UTF-8?q?=E7=9A=84apikey=E8=B4=A6=E6=88=B7=E9=A2=9D=E5=A4=96=E7=9A=84?=
 =?UTF-8?q?=E8=A1=A8=E5=8D=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/components/account/CreateAccountModal.vue  | 12 ++++++------
 .../src/components/account/EditAccountModal.vue    | 14 +++++++++-----
 frontend/src/i18n/locales/en.ts                    |  3 ++-
 frontend/src/i18n/locales/zh.ts                    |  3 ++-
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index 8b4d4c06..f09df7b7 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -665,8 +665,8 @@
               <Icon name="cloud" size="sm" />
             </div>
             <div>
-              <span class="block text-sm font-medium text-gray-900 dark:text-white">{{ t('admin.accounts.types.upstream') }}</span>
-              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.upstreamDesc') }}</span>
+              <span class="block text-sm font-medium text-gray-900 dark:text-white">API Key</span>
+              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.antigravityApikey') }}</span>
             </div>
           </button>
         </div>
@@ -681,7 +681,7 @@
             type="text"
             required
             class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
           />
           <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
         </div>
@@ -816,8 +816,8 @@
         </div>
       </div>
 
-      <!-- API Key input (only for apikey type) -->
-      <div v-if="form.type === 'apikey'" class="space-y-4">
+      <!-- API Key input (only for apikey type, excluding Antigravity which has its own fields) -->
+      <div v-if="form.type === 'apikey' && form.platform !== 'antigravity'" class="space-y-4">
         <div>
           <label class="input-label">{{ t('admin.accounts.baseUrl') }}</label>
           <input
@@ -862,7 +862,7 @@
           <p class="input-hint">{{ t('admin.accounts.gemini.tier.aiStudioHint') }}</p>
         </div>
 
-        <!-- Model Restriction Section (不适用于 Gemini) -->
+        <!-- Model Restriction Section (不适用于 Gemini，Antigravity 已在上层条件排除) -->
         <div v-if="form.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
           <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
 
diff --git a/frontend/src/components/account/EditAccountModal.vue b/frontend/src/components/account/EditAccountModal.vue
index 986bd297..60575f56 100644
--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -39,7 +39,9 @@
                 ? 'https://api.openai.com'
                 : account.platform === 'gemini'
                   ? 'https://generativelanguage.googleapis.com'
-                  : 'https://api.anthropic.com'
+                  : account.platform === 'antigravity'
+                    ? 'https://cloudcode-pa.googleapis.com'
+                    : 'https://api.anthropic.com'
             "
           />
           <p class="input-hint">{{ baseUrlHint }}</p>
@@ -55,14 +57,16 @@
                 ? 'sk-proj-...'
                 : account.platform === 'gemini'
                   ? 'AIza...'
-                  : 'sk-ant-...'
+                  : account.platform === 'antigravity'
+                    ? 'sk-...'
+                    : 'sk-ant-...'
             "
           />
           <p class="input-hint">{{ t('admin.accounts.leaveEmptyToKeep') }}</p>
         </div>
 
-        <!-- Model Restriction Section (不适用于 Gemini) -->
-        <div v-if="account.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
+        <!-- Model Restriction Section (不适用于 Gemini 和 Antigravity) -->
+        <div v-if="account.platform !== 'gemini' && account.platform !== 'antigravity'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
           <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
 
           <!-- Mode Toggle -->
@@ -372,7 +376,7 @@
             v-model="editBaseUrl"
             type="text"
             class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
           />
           <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
         </div>
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index a2d42cb1..dc53e697 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1359,6 +1359,7 @@ export default {
         googleOauth: 'Google OAuth',
         codeAssist: 'Code Assist',
         antigravityOauth: 'Antigravity OAuth',
+        antigravityApikey: 'Connect via Base URL + API Key',
         upstream: 'Upstream',
         upstreamDesc: 'Connect via Base URL + API Key'
       },
@@ -1625,7 +1626,7 @@ export default {
       // Upstream type
       upstream: {
         baseUrl: 'Upstream Base URL',
-        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://s.konstants.xyz',
+        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://cloudcode-pa.googleapis.com',
         apiKey: 'Upstream API Key',
         apiKeyHint: 'API Key for the upstream service',
         pleaseEnterBaseUrl: 'Please enter upstream Base URL',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index 6d49e169..728d7744 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -1493,6 +1493,7 @@ export default {
         googleOauth: 'Google OAuth',
         codeAssist: 'Code Assist',
         antigravityOauth: 'Antigravity OAuth',
+        antigravityApikey: '通过 Base URL + API Key 连接',
         upstream: '对接上游',
         upstreamDesc: '通过 Base URL + API Key 连接上游',
         api_key: 'API Key',
@@ -1771,7 +1772,7 @@ export default {
       // Upstream type
       upstream: {
         baseUrl: '上游 Base URL',
-        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://s.konstants.xyz',
+        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://cloudcode-pa.googleapis.com',
         apiKey: '上游 API Key',
         apiKeyHint: '上游服务的 API Key',
         pleaseEnterBaseUrl: '请输入上游 Base URL',

From 345f853b5d8f34273a43bb80e69b87162edd57b2 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Mon, 9 Feb 2026 22:27:47 +0800
Subject: [PATCH 07/16] chore: bump version to 0.1.77.1

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index 508699ff..af6111e5 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.76.4
+0.1.77.1

From 4c1fd570f02430c648e97b4217d602000b6aec89 Mon Sep 17 00:00:00 2001
From: Edric Li <edricli.dev@gmail.com>
Date: Mon, 9 Feb 2026 22:22:19 +0800
Subject: [PATCH 08/16] feat: failover and temp-unschedule on Google "Invalid
 project resource name" 400
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Google 后端间歇性返回 400 "Invalid project resource name" 错误，
此前该错误直接透传给客户端且不触发账号切换，导致请求失败。

- 在 Antigravity 和 Gemini 两个平台的所有转发路径中，
  精确匹配该错误消息后触发 failover 自动换号重试
- 命中后将账号临时封禁 1 小时，避免反复调度到同一故障账号
- 提取共享函数 isGoogleProjectConfigError / tempUnscheduleGoogleConfigError
  消除跨 Service 的代码重复
---
 .../service/antigravity_gateway_service.go    | 62 +++++++++++++++++++
 .../service/gemini_messages_compat_service.go | 61 ++++++++++++++++++
 2 files changed, 123 insertions(+)

diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 81a1c149..71dee705 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -1285,6 +1285,28 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 
 			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession)
 
+			// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+			if resp.StatusCode == http.StatusBadRequest {
+				msg := strings.ToLower(strings.TrimSpace(extractAntigravityErrorMessage(respBody)))
+				if isGoogleProjectConfigError(msg) {
+					upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractAntigravityErrorMessage(respBody)))
+					upstreamDetail := s.getUpstreamErrorDetail(respBody)
+					log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID)
+					appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+						Platform:           account.Platform,
+						AccountID:          account.ID,
+						AccountName:        account.Name,
+						UpstreamStatusCode: resp.StatusCode,
+						UpstreamRequestID:  resp.Header.Get("x-request-id"),
+						Kind:               "failover",
+						Message:            upstreamMsg,
+						Detail:             upstreamDetail,
+					})
+					tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix)
+					return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody}
+				}
+			}
+
 			if s.shouldFailoverUpstreamError(resp.StatusCode) {
 				upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
 				upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
@@ -1825,6 +1847,23 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		// Always record upstream context for Ops error logs, even when we will failover.
 		setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, upstreamDetail)
 
+		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		if resp.StatusCode == http.StatusBadRequest && isGoogleProjectConfigError(strings.ToLower(upstreamMsg)) {
+			log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID)
+			appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+				Platform:           account.Platform,
+				AccountID:          account.ID,
+				AccountName:        account.Name,
+				UpstreamStatusCode: resp.StatusCode,
+				UpstreamRequestID:  requestID,
+				Kind:               "failover",
+				Message:            upstreamMsg,
+				Detail:             upstreamDetail,
+			})
+			tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix)
+			return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps}
+		}
+
 		if s.shouldFailoverUpstreamError(resp.StatusCode) {
 			appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
 				Platform:           account.Platform,
@@ -1920,6 +1959,29 @@ func (s *AntigravityGatewayService) shouldFailoverUpstreamError(statusCode int)
 	}
 }
 
+// isGoogleProjectConfigError 判断（已提取的小写）错误消息是否属于 Google 服务端配置类问题。
+// 只精确匹配已知的服务端侧错误，避免对客户端请求错误做无意义重试。
+// 适用于所有走 Google 后端的平台（Antigravity、Gemini）。
+func isGoogleProjectConfigError(lowerMsg string) bool {
+	// Google 间歇性 Bug：Project ID 有效但被临时识别失败
+	return strings.Contains(lowerMsg, "invalid project resource name")
+}
+
+// googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长
+const googleConfigErrorCooldown = 60 * time.Minute
+
+// tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁，
+// 避免短时间内反复调度到同一个有问题的账号。
+func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) {
+	until := time.Now().Add(googleConfigErrorCooldown)
+	reason := "400: invalid project resource name (auto temp-unschedule 1h)"
+	if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil {
+		log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err)
+	} else {
+		log.Printf("%s temp_unscheduled account=%d until=%v reason=%q", logPrefix, accountID, until.Format("15:04:05"), reason)
+	}
+}
+
 // sleepAntigravityBackoffWithContext 带 context 取消检查的退避等待
 // 返回 true 表示正常完成等待，false 表示 context 已取消
 func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index 792c8f4b..1e59c5fd 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -880,6 +880,38 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 
 		// ErrorPolicyNone → 原有逻辑
 		s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		if resp.StatusCode == http.StatusBadRequest {
+			msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+			if isGoogleProjectConfigError(msg400) {
+				upstreamReqID := resp.Header.Get(requestIDHeader)
+				if upstreamReqID == "" {
+					upstreamReqID = resp.Header.Get("x-goog-request-id")
+				}
+				upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+				upstreamDetail := ""
+				if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+					maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+					if maxBytes <= 0 {
+						maxBytes = 2048
+					}
+					upstreamDetail = truncateString(string(respBody), maxBytes)
+				}
+				log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
+				appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+					Platform:           account.Platform,
+					AccountID:          account.ID,
+					AccountName:        account.Name,
+					UpstreamStatusCode: resp.StatusCode,
+					UpstreamRequestID:  upstreamReqID,
+					Kind:               "failover",
+					Message:            upstreamMsg,
+					Detail:             upstreamDetail,
+				})
+				tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]")
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody}
+			}
+		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
 			upstreamReqID := resp.Header.Get(requestIDHeader)
 			if upstreamReqID == "" {
@@ -1330,6 +1362,35 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 
 		// ErrorPolicyNone → 原有逻辑
 		s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		if resp.StatusCode == http.StatusBadRequest {
+			msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+			if isGoogleProjectConfigError(msg400) {
+				evBody := unwrapIfNeeded(isOAuth, respBody)
+				upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(evBody)))
+				upstreamDetail := ""
+				if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+					maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+					if maxBytes <= 0 {
+						maxBytes = 2048
+					}
+					upstreamDetail = truncateString(string(evBody), maxBytes)
+				}
+				log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
+				appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+					Platform:           account.Platform,
+					AccountID:          account.ID,
+					AccountName:        account.Name,
+					UpstreamStatusCode: resp.StatusCode,
+					UpstreamRequestID:  requestID,
+					Kind:               "failover",
+					Message:            upstreamMsg,
+					Detail:             upstreamDetail,
+				})
+				tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]")
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody}
+			}
+		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
 			evBody := unwrapIfNeeded(isOAuth, respBody)
 			upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(evBody))

From 425dfb80d95bc4121c42664fc2931b3633f16216 Mon Sep 17 00:00:00 2001
From: Edric Li <edricli.dev@gmail.com>
Date: Mon, 9 Feb 2026 23:25:30 +0800
Subject: [PATCH 09/16] feat: failover and temp-unschedule on empty stream
 response

- Empty stream responses now return UpstreamFailoverError instead of
  plain 502, triggering automatic account switching (up to 10 retries)
- Add tempUnscheduleEmptyResponse: accounts returning empty responses
  are temp-unscheduled for 30 minutes
- Apply to both Claude and Gemini non-streaming paths
- Align googleConfigErrorCooldown from 60m to 30m for consistency
---
 .../service/antigravity_gateway_service.go    | 44 ++++++++++++++++---
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 71dee705..a5fd1535 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -1351,6 +1351,10 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		streamRes, err := s.handleClaudeStreamToNonStreaming(c, resp, startTime, originalModel)
 		if err != nil {
 			log.Printf("%s status=stream_collect_error error=%v", prefix, err)
+			var failoverErr *UpstreamFailoverError
+			if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway {
+				tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix)
+			}
 			return nil, err
 		}
 		usage = streamRes.usage
@@ -1920,6 +1924,10 @@ handleSuccess:
 		streamRes, err := s.handleGeminiStreamToNonStreaming(c, resp, startTime)
 		if err != nil {
 			log.Printf("%s status=stream_collect_error error=%v", prefix, err)
+			var failoverErr *UpstreamFailoverError
+			if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway {
+				tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix)
+			}
 			return nil, err
 		}
 		usage = streamRes.usage
@@ -1968,13 +1976,28 @@ func isGoogleProjectConfigError(lowerMsg string) bool {
 }
 
 // googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长
-const googleConfigErrorCooldown = 60 * time.Minute
+const googleConfigErrorCooldown = 30 * time.Minute
 
 // tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁，
 // 避免短时间内反复调度到同一个有问题的账号。
 func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) {
 	until := time.Now().Add(googleConfigErrorCooldown)
-	reason := "400: invalid project resource name (auto temp-unschedule 1h)"
+	reason := "400: invalid project resource name (auto temp-unschedule 30m)"
+	if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil {
+		log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err)
+	} else {
+		log.Printf("%s temp_unscheduled account=%d until=%v reason=%q", logPrefix, accountID, until.Format("15:04:05"), reason)
+	}
+}
+
+// emptyResponseCooldown 空流式响应的临时封禁时长
+const emptyResponseCooldown = 30 * time.Minute
+
+// tempUnscheduleEmptyResponse 对空流式响应触发临时封禁，
+// 避免短时间内反复调度到同一个返回空响应的账号。
+func tempUnscheduleEmptyResponse(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) {
+	until := time.Now().Add(emptyResponseCooldown)
+	reason := "empty stream response (auto temp-unschedule 30m)"
 	if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil {
 		log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err)
 	} else {
@@ -2786,9 +2809,13 @@ returnResponse:
 	// 选择最后一个有效响应
 	finalResponse := pickGeminiCollectResult(last, lastWithParts)
 
-	// 处理空响应情况
+	// 处理空响应情况 — 触发 failover 切换账号重试
 	if last == nil && lastWithParts == nil {
-		log.Printf("[antigravity-Forward] warning: empty stream response, no valid chunks received")
+		log.Printf("[antigravity-Forward] warning: empty stream response (gemini non-stream), triggering failover")
+		return nil, &UpstreamFailoverError{
+			StatusCode:   http.StatusBadGateway,
+			ResponseBody: []byte(`{"error":"empty stream response from upstream"}`),
+		}
 	}
 
 	// 如果收集到了图片 parts，需要合并到最终响应中
@@ -3201,10 +3228,13 @@ returnResponse:
 	// 选择最后一个有效响应
 	finalResponse := pickGeminiCollectResult(last, lastWithParts)
 
-	// 处理空响应情况
+	// 处理空响应情况 — 触发 failover 切换账号重试
 	if last == nil && lastWithParts == nil {
-		log.Printf("[antigravity-Forward] warning: empty stream response, no valid chunks received")
-		return nil, s.writeClaudeError(c, http.StatusBadGateway, "upstream_error", "Empty response from upstream")
+		log.Printf("[antigravity-Forward] warning: empty stream response (claude non-stream), triggering failover")
+		return nil, &UpstreamFailoverError{
+			StatusCode:   http.StatusBadGateway,
+			ResponseBody: []byte(`{"error":"empty stream response from upstream"}`),
+		}
 	}
 
 	// 将收集的所有 parts 合并到最终响应中

From 6328e694417662ca6a25500655095a88de4249cf Mon Sep 17 00:00:00 2001
From: Edric Li <edricli.dev@gmail.com>
Date: Tue, 10 Feb 2026 00:53:54 +0800
Subject: [PATCH 10/16] feat: same-account retry before failover for transient
 errors

For retryable transient errors (Google 400 "invalid project resource name"
and empty stream responses), retry on the same account up to 2 times
(with 500ms delay) before switching to another account.

- Add RetryableOnSameAccount field to UpstreamFailoverError
- Add same-account retry loop in both Gemini and Claude/OpenAI handler paths
- Move temp-unschedule from service layer to handler layer (only after
  all same-account retries exhausted)
- Reduce temp-unschedule cooldown from 30 minutes to 1 minute
---
 backend/internal/handler/gateway_handler.go   | 57 ++++++++++++++++++-
 .../service/antigravity_gateway_service.go    | 40 ++++++-------
 backend/internal/service/gateway_service.go   | 21 ++++++-
 .../service/gemini_messages_compat_service.go |  6 +-
 4 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index 361cd8b5..3003b5ae 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -240,6 +240,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		switchCount := 0
 		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
+		sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
 		var lastFailoverErr *service.UpstreamFailoverError
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 
@@ -353,11 +354,28 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			if err != nil {
 				var failoverErr *service.UpstreamFailoverError
 				if errors.As(err, &failoverErr) {
-					failedAccountIDs[account.ID] = struct{}{}
 					lastFailoverErr = failoverErr
 					if needForceCacheBilling(hasBoundSession, failoverErr) {
 						forceCacheBilling = true
 					}
+
+					// 同账号重试：对 RetryableOnSameAccount 的临时性错误，先在同一账号上重试
+					if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries {
+						sameAccountRetryCount[account.ID]++
+						log.Printf("Account %d: retryable error %d, same-account retry %d/%d",
+							account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries)
+						if !sleepSameAccountRetryDelay(c.Request.Context()) {
+							return
+						}
+						continue
+					}
+
+					// 同账号重试用尽，执行临时封禁并切换账号
+					if failoverErr.RetryableOnSameAccount {
+						h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr)
+					}
+
+					failedAccountIDs[account.ID] = struct{}{}
 					if switchCount >= maxAccountSwitches {
 						// 默认重试用完，进入 Antigravity 额外重试
 						antigravityExtraCount++
@@ -424,6 +442,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		switchCount := 0
 		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
+		sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
 		var lastFailoverErr *service.UpstreamFailoverError
 		retryWithFallback := false
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
@@ -572,11 +591,28 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				}
 				var failoverErr *service.UpstreamFailoverError
 				if errors.As(err, &failoverErr) {
-					failedAccountIDs[account.ID] = struct{}{}
 					lastFailoverErr = failoverErr
 					if needForceCacheBilling(hasBoundSession, failoverErr) {
 						forceCacheBilling = true
 					}
+
+					// 同账号重试：对 RetryableOnSameAccount 的临时性错误，先在同一账号上重试
+					if failoverErr.RetryableOnSameAccount && sameAccountRetryCount[account.ID] < maxSameAccountRetries {
+						sameAccountRetryCount[account.ID]++
+						log.Printf("Account %d: retryable error %d, same-account retry %d/%d",
+							account.ID, failoverErr.StatusCode, sameAccountRetryCount[account.ID], maxSameAccountRetries)
+						if !sleepSameAccountRetryDelay(c.Request.Context()) {
+							return
+						}
+						continue
+					}
+
+					// 同账号重试用尽，执行临时封禁并切换账号
+					if failoverErr.RetryableOnSameAccount {
+						h.gatewayService.TempUnscheduleRetryableError(c.Request.Context(), account.ID, failoverErr)
+					}
+
+					failedAccountIDs[account.ID] = struct{}{}
 					if switchCount >= maxAccountSwitches {
 						// 默认重试用完，进入 Antigravity 额外重试
 						antigravityExtraCount++
@@ -865,6 +901,23 @@ func needForceCacheBilling(hasBoundSession bool, failoverErr *service.UpstreamFa
 	return hasBoundSession || (failoverErr != nil && failoverErr.ForceCacheBilling)
 }
 
+const (
+	// maxSameAccountRetries 同账号重试次数上限（针对 RetryableOnSameAccount 错误）
+	maxSameAccountRetries = 2
+	// sameAccountRetryDelay 同账号重试间隔
+	sameAccountRetryDelay = 500 * time.Millisecond
+)
+
+// sleepSameAccountRetryDelay 同账号重试固定延时，返回 false 表示 context 已取消。
+func sleepSameAccountRetryDelay(ctx context.Context) bool {
+	select {
+	case <-ctx.Done():
+		return false
+	case <-time.After(sameAccountRetryDelay):
+		return true
+	}
+}
+
 // sleepFailoverDelay 账号切换线性递增延时：第1次0s、第2次1s、第3次2s…
 // 返回 false 表示 context 已取消。
 func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index a5fd1535..9c2b9027 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -1285,7 +1285,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 
 			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession)
 
-			// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+			// 精确匹配服务端配置类 400 错误，触发同账号重试 + failover
 			if resp.StatusCode == http.StatusBadRequest {
 				msg := strings.ToLower(strings.TrimSpace(extractAntigravityErrorMessage(respBody)))
 				if isGoogleProjectConfigError(msg) {
@@ -1302,8 +1302,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 						Message:            upstreamMsg,
 						Detail:             upstreamDetail,
 					})
-					tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix)
-					return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody}
+					return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true}
 				}
 			}
 
@@ -1351,10 +1350,6 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		streamRes, err := s.handleClaudeStreamToNonStreaming(c, resp, startTime, originalModel)
 		if err != nil {
 			log.Printf("%s status=stream_collect_error error=%v", prefix, err)
-			var failoverErr *UpstreamFailoverError
-			if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway {
-				tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix)
-			}
 			return nil, err
 		}
 		usage = streamRes.usage
@@ -1851,7 +1846,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		// Always record upstream context for Ops error logs, even when we will failover.
 		setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, upstreamDetail)
 
-		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		// 精确匹配服务端配置类 400 错误，触发同账号重试 + failover
 		if resp.StatusCode == http.StatusBadRequest && isGoogleProjectConfigError(strings.ToLower(upstreamMsg)) {
 			log.Printf("%s status=400 google_config_error failover=true upstream_message=%q account=%d", prefix, upstreamMsg, account.ID)
 			appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
@@ -1864,8 +1859,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 				Message:            upstreamMsg,
 				Detail:             upstreamDetail,
 			})
-			tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, prefix)
-			return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps}
+			return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: unwrappedForOps, RetryableOnSameAccount: true}
 		}
 
 		if s.shouldFailoverUpstreamError(resp.StatusCode) {
@@ -1924,10 +1918,6 @@ handleSuccess:
 		streamRes, err := s.handleGeminiStreamToNonStreaming(c, resp, startTime)
 		if err != nil {
 			log.Printf("%s status=stream_collect_error error=%v", prefix, err)
-			var failoverErr *UpstreamFailoverError
-			if errors.As(err, &failoverErr) && failoverErr.StatusCode == http.StatusBadGateway {
-				tempUnscheduleEmptyResponse(ctx, s.accountRepo, account.ID, prefix)
-			}
 			return nil, err
 		}
 		usage = streamRes.usage
@@ -1976,13 +1966,13 @@ func isGoogleProjectConfigError(lowerMsg string) bool {
 }
 
 // googleConfigErrorCooldown 服务端配置类 400 错误的临时封禁时长
-const googleConfigErrorCooldown = 30 * time.Minute
+const googleConfigErrorCooldown = 1 * time.Minute
 
 // tempUnscheduleGoogleConfigError 对服务端配置类 400 错误触发临时封禁，
 // 避免短时间内反复调度到同一个有问题的账号。
 func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) {
 	until := time.Now().Add(googleConfigErrorCooldown)
-	reason := "400: invalid project resource name (auto temp-unschedule 30m)"
+	reason := "400: invalid project resource name (auto temp-unschedule 1m)"
 	if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil {
 		log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err)
 	} else {
@@ -1991,13 +1981,13 @@ func tempUnscheduleGoogleConfigError(ctx context.Context, repo AccountRepository
 }
 
 // emptyResponseCooldown 空流式响应的临时封禁时长
-const emptyResponseCooldown = 30 * time.Minute
+const emptyResponseCooldown = 1 * time.Minute
 
 // tempUnscheduleEmptyResponse 对空流式响应触发临时封禁，
 // 避免短时间内反复调度到同一个返回空响应的账号。
 func tempUnscheduleEmptyResponse(ctx context.Context, repo AccountRepository, accountID int64, logPrefix string) {
 	until := time.Now().Add(emptyResponseCooldown)
-	reason := "empty stream response (auto temp-unschedule 30m)"
+	reason := "empty stream response (auto temp-unschedule 1m)"
 	if err := repo.SetTempUnschedulable(ctx, accountID, until, reason); err != nil {
 		log.Printf("%s temp_unschedule_failed account=%d error=%v", logPrefix, accountID, err)
 	} else {
@@ -2809,12 +2799,13 @@ returnResponse:
 	// 选择最后一个有效响应
 	finalResponse := pickGeminiCollectResult(last, lastWithParts)
 
-	// 处理空响应情况 — 触发 failover 切换账号重试
+	// 处理空响应情况 — 触发同账号重试 + failover 切换账号
 	if last == nil && lastWithParts == nil {
 		log.Printf("[antigravity-Forward] warning: empty stream response (gemini non-stream), triggering failover")
 		return nil, &UpstreamFailoverError{
-			StatusCode:   http.StatusBadGateway,
-			ResponseBody: []byte(`{"error":"empty stream response from upstream"}`),
+			StatusCode:             http.StatusBadGateway,
+			ResponseBody:           []byte(`{"error":"empty stream response from upstream"}`),
+			RetryableOnSameAccount: true,
 		}
 	}
 
@@ -3228,12 +3219,13 @@ returnResponse:
 	// 选择最后一个有效响应
 	finalResponse := pickGeminiCollectResult(last, lastWithParts)
 
-	// 处理空响应情况 — 触发 failover 切换账号重试
+	// 处理空响应情况 — 触发同账号重试 + failover 切换账号
 	if last == nil && lastWithParts == nil {
 		log.Printf("[antigravity-Forward] warning: empty stream response (claude non-stream), triggering failover")
 		return nil, &UpstreamFailoverError{
-			StatusCode:   http.StatusBadGateway,
-			ResponseBody: []byte(`{"error":"empty stream response from upstream"}`),
+			StatusCode:             http.StatusBadGateway,
+			ResponseBody:           []byte(`{"error":"empty stream response from upstream"}`),
+			RetryableOnSameAccount: true,
 		}
 	}
 
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 4e723232..01e1acb4 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -362,15 +362,30 @@ type ForwardResult struct {
 
 // UpstreamFailoverError indicates an upstream error that should trigger account failover.
 type UpstreamFailoverError struct {
-	StatusCode        int
-	ResponseBody      []byte // 上游响应体，用于错误透传规则匹配
-	ForceCacheBilling bool   // Antigravity 粘性会话切换时设为 true
+	StatusCode             int
+	ResponseBody           []byte // 上游响应体，用于错误透传规则匹配
+	ForceCacheBilling      bool   // Antigravity 粘性会话切换时设为 true
+	RetryableOnSameAccount bool   // 临时性错误（如 Google 间歇性 400、空响应），应在同一账号上重试 N 次再切换
 }
 
 func (e *UpstreamFailoverError) Error() string {
 	return fmt.Sprintf("upstream error: %d (failover)", e.StatusCode)
 }
 
+// TempUnscheduleRetryableError 对 RetryableOnSameAccount 类型的 failover 错误触发临时封禁。
+// 由 handler 层在同账号重试全部用尽、切换账号时调用。
+func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accountID int64, failoverErr *UpstreamFailoverError) {
+	if failoverErr == nil || !failoverErr.RetryableOnSameAccount {
+		return
+	}
+	// 根据状态码选择封禁策略
+	if failoverErr.StatusCode == http.StatusBadRequest {
+		tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]")
+	} else if failoverErr.StatusCode == http.StatusBadGateway {
+		tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]")
+	}
+}
+
 // GatewayService handles API gateway operations
 type GatewayService struct {
 	accountRepo         AccountRepository
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index 1e59c5fd..7fa375ca 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -908,8 +908,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 					Message:            upstreamMsg,
 					Detail:             upstreamDetail,
 				})
-				tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]")
-				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody}
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true}
 			}
 		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
@@ -1387,8 +1386,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 					Message:            upstreamMsg,
 					Detail:             upstreamDetail,
 				})
-				tempUnscheduleGoogleConfigError(ctx, s.accountRepo, account.ID, "[Gemini]")
-				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody}
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody, RetryableOnSameAccount: true}
 			}
 		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {

From 662625a09114a000cfd0897d322fc894b5a8097c Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Tue, 10 Feb 2026 03:47:40 +0800
Subject: [PATCH 11/16] feat: optimize MODEL_CAPACITY_EXHAUSTED retry and
 remove extra failover retries

- MODEL_CAPACITY_EXHAUSTED now uses independent retry strategy:
  - retryDelay < 20s: wait actual retryDelay then retry once
  - retryDelay >= 20s or missing: retry up to 5 times at 20s intervals
  - Still capacity exhausted after retries: switch account (failover)
  - Different error during retry (e.g. 429): handle by actual error code
  - No model rate limit set (capacity != rate limit)

- Remove Antigravity extra failover retries feature:
  Same-account retry mechanism (cherry-picked) makes it redundant.
  Removed: antigravityExtraRetries config, sleepFixedDelay, skip-non-antigravity logic.
---
 backend/internal/config/config.go             |   3 -
 backend/internal/handler/gateway_handler.go   |  65 +--
 .../gateway_handler_extra_retry_test.go       | 417 ------------------
 .../internal/handler/gemini_v1beta_handler.go |  23 +-
 .../service/antigravity_gateway_service.go    | 151 ++++++-
 .../service/antigravity_rate_limit_test.go    |  40 +-
 .../service/antigravity_smart_retry_test.go   | 142 ++++--
 7 files changed, 282 insertions(+), 559 deletions(-)
 delete mode 100644 backend/internal/handler/gateway_handler_extra_retry_test.go

diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go
index 460bd05d..7b6b4a37 100644
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -279,9 +279,6 @@ type GatewayConfig struct {
 	// Antigravity 429 fallback 限流时间（分钟），解析重置时间失败时使用
 	AntigravityFallbackCooldownMinutes int `mapstructure:"antigravity_fallback_cooldown_minutes"`
 
-	// 默认重试用完后，额外使用 Antigravity 账号重试的最大次数（0 表示禁用）
-	AntigravityExtraRetries int `mapstructure:"antigravity_extra_retries"`
-
 	// Scheduling: 账号调度相关配置
 	Scheduling GatewaySchedulingConfig `mapstructure:"scheduling"`
 
diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index 3003b5ae..b5fb379e 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -39,7 +39,6 @@ type GatewayHandler struct {
 	concurrencyHelper         *ConcurrencyHelper
 	maxAccountSwitches        int
 	maxAccountSwitchesGemini  int
-	antigravityExtraRetries   int
 }
 
 // NewGatewayHandler creates a new GatewayHandler
@@ -58,7 +57,6 @@ func NewGatewayHandler(
 	pingInterval := time.Duration(0)
 	maxAccountSwitches := 10
 	maxAccountSwitchesGemini := 3
-	antigravityExtraRetries := 10
 	if cfg != nil {
 		pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
 		if cfg.Gateway.MaxAccountSwitches > 0 {
@@ -67,7 +65,6 @@ func NewGatewayHandler(
 		if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
 			maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
 		}
-		antigravityExtraRetries = cfg.Gateway.AntigravityExtraRetries
 	}
 	return &GatewayHandler{
 		gatewayService:            gatewayService,
@@ -81,7 +78,6 @@ func NewGatewayHandler(
 		concurrencyHelper:         NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
 		maxAccountSwitches:        maxAccountSwitches,
 		maxAccountSwitchesGemini:  maxAccountSwitchesGemini,
-		antigravityExtraRetries:   antigravityExtraRetries,
 	}
 }
 
@@ -238,7 +234,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	if platform == service.PlatformGemini {
 		maxAccountSwitches := h.maxAccountSwitchesGemini
 		switchCount := 0
-		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
 		sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
 		var lastFailoverErr *service.UpstreamFailoverError
@@ -261,15 +256,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			account := selection.Account
 			setOpsSelectedAccount(c, account.ID)
 
-			// 额外重试阶段：跳过非 Antigravity 账号
-			if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
-				failedAccountIDs[account.ID] = struct{}{}
-				if selection.Acquired && selection.ReleaseFunc != nil {
-					selection.ReleaseFunc()
-				}
-				continue
-			}
-
 			// 检查请求拦截（预热请求、SUGGESTION MODE等）
 			if account.IsInterceptWarmupEnabled() {
 				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -377,17 +363,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 
 					failedAccountIDs[account.ID] = struct{}{}
 					if switchCount >= maxAccountSwitches {
-						// 默认重试用完，进入 Antigravity 额外重试
-						antigravityExtraCount++
-						if antigravityExtraCount > h.antigravityExtraRetries {
-							h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
-							return
-						}
-						log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
-						if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
-							return
-						}
-						continue
+						h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
+						return
 					}
 					switchCount++
 					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -440,7 +417,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	for {
 		maxAccountSwitches := h.maxAccountSwitches
 		switchCount := 0
-		antigravityExtraCount := 0
 		failedAccountIDs := make(map[int64]struct{})
 		sameAccountRetryCount := make(map[int64]int) // 同账号重试计数
 		var lastFailoverErr *service.UpstreamFailoverError
@@ -465,15 +441,6 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			account := selection.Account
 			setOpsSelectedAccount(c, account.ID)
 
-			// 额外重试阶段：跳过非 Antigravity 账号
-			if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
-				failedAccountIDs[account.ID] = struct{}{}
-				if selection.Acquired && selection.ReleaseFunc != nil {
-					selection.ReleaseFunc()
-				}
-				continue
-			}
-
 			// 检查请求拦截（预热请求、SUGGESTION MODE等）
 			if account.IsInterceptWarmupEnabled() {
 				interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -614,17 +581,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 
 					failedAccountIDs[account.ID] = struct{}{}
 					if switchCount >= maxAccountSwitches {
-						// 默认重试用完，进入 Antigravity 额外重试
-						antigravityExtraCount++
-						if antigravityExtraCount > h.antigravityExtraRetries {
-							h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
-							return
-						}
-						log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
-						if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
-							return
-						}
-						continue
+						h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
+						return
 					}
 					switchCount++
 					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -933,21 +891,6 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
 	}
 }
 
-const antigravityExtraRetryDelay = 500 * time.Millisecond
-
-// sleepFixedDelay 固定延时等待，返回 false 表示 context 已取消。
-func sleepFixedDelay(ctx context.Context, delay time.Duration) bool {
-	if delay <= 0 {
-		return true
-	}
-	select {
-	case <-ctx.Done():
-		return false
-	case <-time.After(delay):
-		return true
-	}
-}
-
 func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 	statusCode := failoverErr.StatusCode
 	responseBody := failoverErr.ResponseBody
diff --git a/backend/internal/handler/gateway_handler_extra_retry_test.go b/backend/internal/handler/gateway_handler_extra_retry_test.go
deleted file mode 100644
index a0777941..00000000
--- a/backend/internal/handler/gateway_handler_extra_retry_test.go
+++ /dev/null
@@ -1,417 +0,0 @@
-//go:build unit
-
-package handler
-
-import (
-	"context"
-	"encoding/json"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/Wei-Shaw/sub2api/internal/config"
-	"github.com/Wei-Shaw/sub2api/internal/service"
-	"github.com/gin-gonic/gin"
-	"github.com/stretchr/testify/require"
-)
-
-// --- sleepFixedDelay ---
-
-func TestSleepFixedDelay_ZeroDelay(t *testing.T) {
-	got := sleepFixedDelay(context.Background(), 0)
-	require.True(t, got, "zero delay should return true immediately")
-}
-
-func TestSleepFixedDelay_NegativeDelay(t *testing.T) {
-	got := sleepFixedDelay(context.Background(), -1*time.Second)
-	require.True(t, got, "negative delay should return true immediately")
-}
-
-func TestSleepFixedDelay_NormalDelay(t *testing.T) {
-	start := time.Now()
-	got := sleepFixedDelay(context.Background(), 50*time.Millisecond)
-	elapsed := time.Since(start)
-	require.True(t, got, "normal delay should return true")
-	require.GreaterOrEqual(t, elapsed, 40*time.Millisecond, "should sleep at least ~50ms")
-}
-
-func TestSleepFixedDelay_ContextCancelled(t *testing.T) {
-	ctx, cancel := context.WithCancel(context.Background())
-	cancel() // cancel immediately
-	got := sleepFixedDelay(ctx, 10*time.Second)
-	require.False(t, got, "cancelled context should return false")
-}
-
-func TestSleepFixedDelay_ContextTimeout(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
-	defer cancel()
-	got := sleepFixedDelay(ctx, 5*time.Second)
-	require.False(t, got, "context timeout should return false before delay completes")
-}
-
-// --- antigravityExtraRetryDelay constant ---
-
-func TestAntigravityExtraRetryDelayValue(t *testing.T) {
-	require.Equal(t, 500*time.Millisecond, antigravityExtraRetryDelay)
-}
-
-// --- NewGatewayHandler antigravityExtraRetries field ---
-
-func TestNewGatewayHandler_AntigravityExtraRetries_Default(t *testing.T) {
-	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
-	require.Equal(t, 10, h.antigravityExtraRetries, "default should be 10 when cfg is nil")
-}
-
-func TestNewGatewayHandler_AntigravityExtraRetries_FromConfig(t *testing.T) {
-	cfg := &config.Config{
-		Gateway: config.GatewayConfig{
-			AntigravityExtraRetries: 5,
-		},
-	}
-	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg)
-	require.Equal(t, 5, h.antigravityExtraRetries, "should use config value")
-}
-
-func TestNewGatewayHandler_AntigravityExtraRetries_ZeroDisables(t *testing.T) {
-	cfg := &config.Config{
-		Gateway: config.GatewayConfig{
-			AntigravityExtraRetries: 0,
-		},
-	}
-	h := NewGatewayHandler(nil, nil, nil, nil, nil, nil, nil, nil, nil, cfg)
-	require.Equal(t, 0, h.antigravityExtraRetries, "zero should disable extra retries")
-}
-
-// --- handleFailoverAllAccountsExhausted (renamed: using handleFailoverExhausted) ---
-// We test the error response format helpers that the extra retry path uses.
-
-func TestHandleFailoverExhausted_JSON(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	h := &GatewayHandler{}
-	failoverErr := &service.UpstreamFailoverError{StatusCode: 429}
-	h.handleFailoverExhausted(c, failoverErr, service.PlatformAntigravity, false)
-
-	require.Equal(t, http.StatusTooManyRequests, rec.Code)
-
-	var body map[string]any
-	err := json.Unmarshal(rec.Body.Bytes(), &body)
-	require.NoError(t, err)
-	errObj, ok := body["error"].(map[string]any)
-	require.True(t, ok)
-	require.Equal(t, "rate_limit_error", errObj["type"])
-}
-
-func TestHandleFailoverExhaustedSimple_JSON(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	h := &GatewayHandler{}
-	h.handleFailoverExhaustedSimple(c, 502, false)
-
-	require.Equal(t, http.StatusBadGateway, rec.Code)
-
-	var body map[string]any
-	err := json.Unmarshal(rec.Body.Bytes(), &body)
-	require.NoError(t, err)
-	errObj, ok := body["error"].(map[string]any)
-	require.True(t, ok)
-	require.Equal(t, "upstream_error", errObj["type"])
-}
-
-// --- Extra retry platform filter logic ---
-
-func TestExtraRetryPlatformFilter(t *testing.T) {
-	tests := []struct {
-		name             string
-		switchCount      int
-		maxAccountSwitch int
-		platform         string
-		expectSkip       bool
-	}{
-		{
-			name:             "default_retry_phase_antigravity_not_skipped",
-			switchCount:      1,
-			maxAccountSwitch: 3,
-			platform:         service.PlatformAntigravity,
-			expectSkip:       false,
-		},
-		{
-			name:             "default_retry_phase_gemini_not_skipped",
-			switchCount:      1,
-			maxAccountSwitch: 3,
-			platform:         service.PlatformGemini,
-			expectSkip:       false,
-		},
-		{
-			name:             "extra_retry_phase_antigravity_not_skipped",
-			switchCount:      3,
-			maxAccountSwitch: 3,
-			platform:         service.PlatformAntigravity,
-			expectSkip:       false,
-		},
-		{
-			name:             "extra_retry_phase_gemini_skipped",
-			switchCount:      3,
-			maxAccountSwitch: 3,
-			platform:         service.PlatformGemini,
-			expectSkip:       true,
-		},
-		{
-			name:             "extra_retry_phase_anthropic_skipped",
-			switchCount:      3,
-			maxAccountSwitch: 3,
-			platform:         service.PlatformAnthropic,
-			expectSkip:       true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Replicate the filter condition from the handler
-			shouldSkip := tt.switchCount >= tt.maxAccountSwitch && tt.platform != service.PlatformAntigravity
-			require.Equal(t, tt.expectSkip, shouldSkip)
-		})
-	}
-}
-
-// --- Extra retry counter logic ---
-
-func TestExtraRetryCounterExhaustion(t *testing.T) {
-	tests := []struct {
-		name               string
-		maxExtraRetries    int
-		currentExtraCount  int
-		expectExhausted    bool
-	}{
-		{
-			name:              "first_extra_retry",
-			maxExtraRetries:   10,
-			currentExtraCount: 1,
-			expectExhausted:   false,
-		},
-		{
-			name:              "at_limit",
-			maxExtraRetries:   10,
-			currentExtraCount: 10,
-			expectExhausted:   false,
-		},
-		{
-			name:              "exceeds_limit",
-			maxExtraRetries:   10,
-			currentExtraCount: 11,
-			expectExhausted:   true,
-		},
-		{
-			name:              "zero_disables_extra_retry",
-			maxExtraRetries:   0,
-			currentExtraCount: 1,
-			expectExhausted:   true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			// Replicate the exhaustion condition: antigravityExtraCount > h.antigravityExtraRetries
-			exhausted := tt.currentExtraCount > tt.maxExtraRetries
-			require.Equal(t, tt.expectExhausted, exhausted)
-		})
-	}
-}
-
-// --- mapUpstreamError (used by handleFailoverExhausted) ---
-
-func TestMapUpstreamError(t *testing.T) {
-	h := &GatewayHandler{}
-	tests := []struct {
-		name           string
-		statusCode     int
-		expectedStatus int
-		expectedType   string
-	}{
-		{"429", 429, http.StatusTooManyRequests, "rate_limit_error"},
-		{"529", 529, http.StatusServiceUnavailable, "overloaded_error"},
-		{"500", 500, http.StatusBadGateway, "upstream_error"},
-		{"502", 502, http.StatusBadGateway, "upstream_error"},
-		{"503", 503, http.StatusBadGateway, "upstream_error"},
-		{"504", 504, http.StatusBadGateway, "upstream_error"},
-		{"401", 401, http.StatusBadGateway, "upstream_error"},
-		{"403", 403, http.StatusBadGateway, "upstream_error"},
-		{"unknown", 418, http.StatusBadGateway, "upstream_error"},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			status, errType, _ := h.mapUpstreamError(tt.statusCode)
-			require.Equal(t, tt.expectedStatus, status)
-			require.Equal(t, tt.expectedType, errType)
-		})
-	}
-}
-
-// --- Gemini native path: handleGeminiFailoverExhausted ---
-
-func TestHandleGeminiFailoverExhausted_NilError(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	h := &GatewayHandler{}
-	h.handleGeminiFailoverExhausted(c, nil)
-
-	require.Equal(t, http.StatusBadGateway, rec.Code)
-	var body map[string]any
-	err := json.Unmarshal(rec.Body.Bytes(), &body)
-	require.NoError(t, err)
-	errObj, ok := body["error"].(map[string]any)
-	require.True(t, ok)
-	require.Equal(t, "Upstream request failed", errObj["message"])
-}
-
-func TestHandleGeminiFailoverExhausted_429(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	h := &GatewayHandler{}
-	failoverErr := &service.UpstreamFailoverError{StatusCode: 429}
-	h.handleGeminiFailoverExhausted(c, failoverErr)
-
-	require.Equal(t, http.StatusTooManyRequests, rec.Code)
-}
-
-// --- handleStreamingAwareError streaming mode ---
-
-func TestHandleStreamingAwareError_StreamStarted(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	// Simulate stream already started: set content type and write initial data
-	c.Writer.Header().Set("Content-Type", "text/event-stream")
-	c.Writer.WriteHeaderNow()
-
-	h := &GatewayHandler{}
-	h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "test error", true)
-
-	body := rec.Body.String()
-	require.Contains(t, body, "rate_limit_error")
-	require.Contains(t, body, "test error")
-	require.Contains(t, body, "data: ")
-}
-
-func TestHandleStreamingAwareError_NotStreaming(t *testing.T) {
-	gin.SetMode(gin.TestMode)
-	rec := httptest.NewRecorder()
-	c, _ := gin.CreateTestContext(rec)
-
-	h := &GatewayHandler{}
-	h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "no model", false)
-
-	require.Equal(t, http.StatusServiceUnavailable, rec.Code)
-	var body map[string]any
-	err := json.Unmarshal(rec.Body.Bytes(), &body)
-	require.NoError(t, err)
-	errObj, ok := body["error"].(map[string]any)
-	require.True(t, ok)
-	require.Equal(t, "api_error", errObj["type"])
-	require.Equal(t, "no model", errObj["message"])
-}
-
-// --- Integration: extra retry flow simulation ---
-
-func TestExtraRetryFlowSimulation(t *testing.T) {
-	// Simulate the full extra retry flow logic
-	maxAccountSwitches := 3
-	maxExtraRetries := 2
-	switchCount := 0
-	antigravityExtraCount := 0
-
-	type attempt struct {
-		platform   string
-		isFailover bool
-	}
-
-	// Simulate: 3 default retries (all fail), then 2 extra retries (all fail), then exhausted
-	attempts := []attempt{
-		{service.PlatformAntigravity, true},  // switchCount 0 -> 1
-		{service.PlatformGemini, true},        // switchCount 1 -> 2
-		{service.PlatformAntigravity, true},  // switchCount 2 -> 3 (reaches max)
-		{service.PlatformAntigravity, true},  // extra retry 1
-		{service.PlatformAntigravity, true},  // extra retry 2
-		{service.PlatformAntigravity, true},  // extra retry 3 -> exhausted
-	}
-
-	var exhausted bool
-	var skipped int
-
-	for _, a := range attempts {
-		if exhausted {
-			break
-		}
-
-		// Extra retry phase: skip non-Antigravity
-		if switchCount >= maxAccountSwitches && a.platform != service.PlatformAntigravity {
-			skipped++
-			continue
-		}
-
-		if a.isFailover {
-			if switchCount >= maxAccountSwitches {
-				antigravityExtraCount++
-				if antigravityExtraCount > maxExtraRetries {
-					exhausted = true
-					continue
-				}
-				// extra retry delay + continue
-				continue
-			}
-			switchCount++
-		}
-	}
-
-	require.Equal(t, 3, switchCount, "should have 3 default retries")
-	require.Equal(t, 3, antigravityExtraCount, "counter incremented 3 times")
-	require.True(t, exhausted, "should be exhausted after exceeding max extra retries")
-	require.Equal(t, 0, skipped, "no non-antigravity accounts in this simulation")
-}
-
-func TestExtraRetryFlowSimulation_SkipsNonAntigravity(t *testing.T) {
-	maxAccountSwitches := 2
-	switchCount := 2 // already past default retries
-	antigravityExtraCount := 0
-	maxExtraRetries := 5
-
-	type accountSelection struct {
-		platform string
-	}
-
-	selections := []accountSelection{
-		{service.PlatformGemini},       // should be skipped
-		{service.PlatformAnthropic},    // should be skipped
-		{service.PlatformAntigravity},  // should be attempted
-	}
-
-	var skippedCount int
-	var attemptedCount int
-
-	for _, sel := range selections {
-		if switchCount >= maxAccountSwitches && sel.platform != service.PlatformAntigravity {
-			skippedCount++
-			continue
-		}
-		// Simulate failover
-		antigravityExtraCount++
-		if antigravityExtraCount > maxExtraRetries {
-			break
-		}
-		attemptedCount++
-	}
-
-	require.Equal(t, 2, skippedCount, "gemini and anthropic accounts should be skipped")
-	require.Equal(t, 1, attemptedCount, "only antigravity account should be attempted")
-	require.Equal(t, 1, antigravityExtraCount)
-}
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index 5a576ab0..0475c332 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -323,7 +323,6 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 
 	maxAccountSwitches := h.maxAccountSwitchesGemini
 	switchCount := 0
-	antigravityExtraCount := 0
 	failedAccountIDs := make(map[int64]struct{})
 	var lastFailoverErr *service.UpstreamFailoverError
 	var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
@@ -341,15 +340,6 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 		account := selection.Account
 		setOpsSelectedAccount(c, account.ID)
 
-		// 额外重试阶段：跳过非 Antigravity 账号
-		if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
-			failedAccountIDs[account.ID] = struct{}{}
-			if selection.Acquired && selection.ReleaseFunc != nil {
-				selection.ReleaseFunc()
-			}
-			continue
-		}
-
 		// 检测账号切换：如果粘性会话绑定的账号与当前选择的账号不同，清除 thoughtSignature
 		// 注意：Gemini 原生 API 的 thoughtSignature 与具体上游账号强相关；跨账号透传会导致 400。
 		if sessionBoundAccountID > 0 && sessionBoundAccountID != account.ID {
@@ -439,17 +429,8 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 					forceCacheBilling = true
 				}
 				if switchCount >= maxAccountSwitches {
-					// 默认重试用完，进入 Antigravity 额外重试
-					antigravityExtraCount++
-					if antigravityExtraCount > h.antigravityExtraRetries {
-						h.handleGeminiFailoverExhausted(c, failoverErr)
-						return
-					}
-					log.Printf("Gemini account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
-					if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
-						return
-					}
-					continue
+					h.handleGeminiFailoverExhausted(c, failoverErr)
+					return
 				}
 				switchCount++
 				log.Printf("Gemini account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 9c2b9027..84e78eaa 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -39,6 +39,15 @@ const (
 	antigravitySmartRetryMaxAttempts    = 1                // 智能重试最大次数（仅重试 1 次，防止重复限流/长期等待）
 	antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间（无 retryDelay 时使用）
 
+	// MODEL_CAPACITY_EXHAUSTED 专用常量
+	// 容量不足是临时状态，所有账号共享容量池，与限流不同
+	// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
+	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+	// - 重试仍为容量不足: 切换账号
+	// - 重试遇到其他错误: 按实际错误码处理
+	antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
+	antigravityModelCapacityMaxAttempts   = 5                // 容量不足长等待重试次数
+
 	// Google RPC 状态和类型常量
 	googleRPCStatusResourceExhausted      = "RESOURCE_EXHAUSTED"
 	googleRPCStatusUnavailable            = "UNAVAILABLE"
@@ -144,7 +153,12 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	}
 
 	// 判断是否触发智能重试
-	shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName := shouldTriggerAntigravitySmartRetry(p.account, respBody)
+	shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody)
+
+	// MODEL_CAPACITY_EXHAUSTED: 独立处理
+	if isModelCapacityExhausted {
+		return s.handleModelCapacityExhaustedRetry(p, resp, respBody, baseURL, waitDuration, modelName)
+	}
 
 	// 情况1: retryDelay >= 阈值，限流模型并切换账号
 	if shouldRateLimitModel {
@@ -229,7 +243,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 
 			// 解析新的重试信息，用于下次重试的等待时间
 			if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil {
-				newShouldRetry, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
+				newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
 				if newShouldRetry && newWaitDuration > 0 {
 					waitDuration = newWaitDuration
 				}
@@ -279,6 +293,100 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	return &smartRetryResult{action: smartRetryActionContinue}
 }
 
+// handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
+// 策略：
+//   - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
+//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+//   - 重试成功: 直接返回
+//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完，然后切换账号
+//   - 重试遇到其他错误 (429 限流等): 返回该响应，让上层按实际错误码处理
+func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
+	p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
+	baseURL string, retryDelay time.Duration, modelName string,
+) *smartRetryResult {
+	// 确定重试参数
+	maxAttempts := 1
+	waitDuration := retryDelay
+	if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
+		// 无 retryDelay 或 >= 20s: 固定 20s 间隔，最多 5 次
+		maxAttempts = antigravityModelCapacityMaxAttempts
+		waitDuration = antigravityModelCapacityWaitThreshold
+	}
+
+	for attempt := 1; attempt <= maxAttempts; attempt++ {
+		log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d",
+			p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
+
+		select {
+		case <-p.ctx.Done():
+			log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+		case <-time.After(waitDuration):
+		}
+
+		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+		if err != nil {
+			log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
+			return &smartRetryResult{
+				action: smartRetryActionBreakWithResp,
+				resp: &http.Response{
+					StatusCode: resp.StatusCode,
+					Header:     resp.Header.Clone(),
+					Body:       io.NopCloser(bytes.NewReader(respBody)),
+				},
+			}
+		}
+
+		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+
+		// 网络错误: 继续重试
+		if retryErr != nil || retryResp == nil {
+			log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v",
+				p.prefix, attempt, maxAttempts, retryErr)
+			continue
+		}
+
+		// 成功 (非 429/503): 直接返回
+		if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+			log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d",
+				p.prefix, retryResp.StatusCode, attempt, maxAttempts)
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+		}
+
+		// 读取重试响应体，判断是否仍为容量不足
+		retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+		_ = retryResp.Body.Close()
+
+		retryInfo := parseAntigravitySmartRetryInfo(retryBody)
+
+		// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
+		if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
+			log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s",
+				p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200))
+			retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+		}
+
+		// 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间，继续重试
+		if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold {
+			waitDuration = retryInfo.RetryDelay
+		}
+	}
+
+	// 所有重试都失败且仍为容量不足: 切换账号
+	log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)",
+		p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID)
+
+	return &smartRetryResult{
+		action: smartRetryActionBreakWithResp,
+		switchError: &AntigravityAccountSwitchError{
+			OriginalAccountID: p.account.ID,
+			RateLimitedModel:  modelName,
+			IsStickySession:   p.isStickySession,
+		},
+	}
+}
+
 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
 	// 预检查：如果账号已限流，直接返回切换信号
@@ -2053,8 +2161,9 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) {
 
 // antigravitySmartRetryInfo 智能重试所需的信息
 type antigravitySmartRetryInfo struct {
-	RetryDelay time.Duration // 重试延迟时间
-	ModelName  string        // 限流的模型名称（如 "claude-sonnet-4-5"）
+	RetryDelay               time.Duration // 重试延迟时间
+	ModelName                string        // 限流的模型名称（如 "claude-sonnet-4-5"）
+	IsModelCapacityExhausted bool          // 是否为 MODEL_CAPACITY_EXHAUSTED（503 容量不足，与 429 限流处理策略不同）
 }
 
 // parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息
@@ -2163,14 +2272,16 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
 		return nil
 	}
 
-	// 如果上游未提供 retryDelay，使用默认限流时间
-	if retryDelay <= 0 {
+	// MODEL_CAPACITY_EXHAUSTED: retryDelay 可以为 0（由调用方决定默认等待策略）
+	// RATE_LIMIT_EXCEEDED: 无 retryDelay 时使用默认限流时间
+	if retryDelay <= 0 && !hasModelCapacityExhausted {
 		retryDelay = antigravityDefaultRateLimitDuration
 	}
 
 	return &antigravitySmartRetryInfo{
-		RetryDelay: retryDelay,
-		ModelName:  modelName,
+		RetryDelay:               retryDelay,
+		ModelName:                modelName,
+		IsModelCapacityExhausted: hasModelCapacityExhausted,
 	}
 }
 
@@ -2178,22 +2289,28 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
 // 返回：
 //   - shouldRetry: 是否应该智能重试（retryDelay < antigravityRateLimitThreshold）
 //   - shouldRateLimitModel: 是否应该限流模型（retryDelay >= antigravityRateLimitThreshold）
-//   - waitDuration: 等待时间（智能重试时使用，shouldRateLimitModel=true 时为 0）
+//   - waitDuration: 等待时间（智能重试时使用，shouldRateLimitModel=true 时为限流时长）
 //   - modelName: 限流的模型名称
-func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string) {
+//   - isModelCapacityExhausted: 是否为 MODEL_CAPACITY_EXHAUSTED（需要独立处理）
+func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) {
 	if account.Platform != PlatformAntigravity {
-		return false, false, 0, ""
+		return false, false, 0, "", false
 	}
 
 	info := parseAntigravitySmartRetryInfo(respBody)
 	if info == nil {
-		return false, false, 0, ""
+		return false, false, 0, "", false
+	}
+
+	// MODEL_CAPACITY_EXHAUSTED: 独立处理，不走 7s 阈值判断
+	if info.IsModelCapacityExhausted {
+		return true, false, info.RetryDelay, info.ModelName, true
 	}
 
 	// retryDelay >= 阈值：直接限流模型，不重试
 	// 注意：如果上游未提供 retryDelay，parseAntigravitySmartRetryInfo 已设置为默认 30s
 	if info.RetryDelay >= antigravityRateLimitThreshold {
-		return false, true, info.RetryDelay, info.ModelName
+		return false, true, info.RetryDelay, info.ModelName, false
 	}
 
 	// retryDelay < 阈值：智能重试
@@ -2202,7 +2319,7 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou
 		waitDuration = antigravitySmartRetryMinWait
 	}
 
-	return true, false, waitDuration, info.ModelName
+	return true, false, waitDuration, info.ModelName, false
 }
 
 // handleModelRateLimitParams 模型级限流处理参数
@@ -2240,6 +2357,12 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
 		return &handleModelRateLimitResult{Handled: false}
 	}
 
+	// MODEL_CAPACITY_EXHAUSTED: 容量不足由 handleSmartRetry 独立处理，此处仅标记已处理
+	// 不设置模型限流（容量不足是临时的，不等同于限流）
+	if info.IsModelCapacityExhausted {
+		return &handleModelRateLimitResult{Handled: true}
+	}
+
 	// < antigravityRateLimitThreshold: 等待后重试
 	if info.RetryDelay < antigravityRateLimitThreshold {
 		log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v",
diff --git a/backend/internal/service/antigravity_rate_limit_test.go b/backend/internal/service/antigravity_rate_limit_test.go
index 59cc9331..c8b0d779 100644
--- a/backend/internal/service/antigravity_rate_limit_test.go
+++ b/backend/internal/service/antigravity_rate_limit_test.go
@@ -188,13 +188,14 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
 	require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
 }
 
-// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景
-func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
+// TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景
+// MODEL_CAPACITY_EXHAUSTED 标记 Handled 但不设模型限流（由 handleSmartRetry 独立处理）
+func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
 	svc := &AntigravityGatewayService{accountRepo: repo}
 	account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
 
-	// 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流
+	// 503 + MODEL_CAPACITY_EXHAUSTED → 标记已处理，不设模型限流
 	body := []byte(`{
 		"error": {
 			"status": "UNAVAILABLE",
@@ -207,13 +208,11 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
 
 	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)
 
-	// 应该触发模型限流
+	// 应该标记已处理，但不设模型限流
 	require.NotNil(t, result)
 	require.True(t, result.Handled)
-	require.NotNil(t, result.SwitchError)
-	require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel)
-	require.Len(t, repo.modelRateLimitCalls, 1)
-	require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
+	require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger switch error in handleModelRateLimit")
+	require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
 }
 
 // TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景（不处理）
@@ -496,6 +495,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 		body                    string
 		expectedShouldRetry     bool
 		expectedShouldRateLimit bool
+		expectedCapacityExhaust bool
 		minWait                 time.Duration
 		modelName               string
 	}{
@@ -611,8 +611,9 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 					]
 				}
 			}`,
-			expectedShouldRetry:     false,
-			expectedShouldRateLimit: true,
+			expectedShouldRetry:     true,
+			expectedShouldRateLimit: false,
+			expectedCapacityExhaust: true,
 			minWait:                 39 * time.Second,
 			modelName:               "gemini-3-pro-high",
 		},
@@ -629,9 +630,10 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 					"message": "No capacity available for model gemini-2.5-flash on the server"
 				}
 			}`,
-			expectedShouldRetry:     false,
-			expectedShouldRateLimit: true,
-			minWait:                 30 * time.Second,
+			expectedShouldRetry:     true,
+			expectedShouldRateLimit: false,
+			expectedCapacityExhaust: true,
+			minWait:                 0, // 无 retryDelay，由 handleModelCapacityExhaustedRetry 决定默认 20s
 			modelName:               "gemini-2.5-flash",
 		},
 		{
@@ -656,18 +658,26 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
+			shouldRetry, shouldRateLimit, wait, model, isCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
 			if shouldRetry != tt.expectedShouldRetry {
 				t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
 			}
 			if shouldRateLimit != tt.expectedShouldRateLimit {
 				t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
 			}
-			if shouldRetry {
+			if isCapacityExhausted != tt.expectedCapacityExhaust {
+				t.Errorf("isCapacityExhausted = %v, want %v", isCapacityExhausted, tt.expectedCapacityExhaust)
+			}
+			if shouldRetry && !isCapacityExhausted {
 				if wait < tt.minWait {
 					t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
 				}
 			}
+			if isCapacityExhausted && tt.minWait > 0 {
+				if wait < tt.minWait {
+					t.Errorf("capacity exhausted wait = %v, want >= %v", wait, tt.minWait)
+				}
+			}
 			if shouldRateLimit && tt.minWait > 0 {
 				if wait < tt.minWait {
 					t.Errorf("rate limit wait = %v, want >= %v", wait, tt.minWait)
diff --git a/backend/internal/service/antigravity_smart_retry_test.go b/backend/internal/service/antigravity_smart_retry_test.go
index a7e0d296..7a6050a7 100644
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -9,6 +9,7 @@ import (
 	"net/http"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/require"
 )
@@ -294,8 +295,20 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test
 	require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)")
 }
 
-// TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError 测试 503 MODEL_CAPACITY_EXHAUSTED 返回 switchError
-func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testing.T) {
+// TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess
+// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay < 20s → 按实际 retryDelay 等待后重试 1 次，成功返回
+func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *testing.T) {
+	// 重试成功的响应
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"ok":true}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:       3,
@@ -304,7 +317,89 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
 		Platform: PlatformAntigravity,
 	}
 
-	// 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 7s 阈值
+	// 503 + MODEL_CAPACITY_EXHAUSTED + 0.5s < 20s 阈值 → 按实际 retryDelay 重试 1 次
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
+			],
+			"message": "No capacity available for model gemini-3-pro-high on the server"
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          context.Background(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp)
+	require.Equal(t, http.StatusOK, result.resp.StatusCode, "should return success after retry")
+	require.Nil(t, result.switchError, "should not switch account on success")
+	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
+}
+
+// TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
+// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次，全失败后切换账号
+func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
+	// 构造 5 个仍然容量不足的重试响应
+	capacityBody := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "30s"}
+			]
+		}
+	}`
+	var responses []*http.Response
+	var errs []error
+	for i := 0; i < 5; i++ {
+		responses = append(responses, &http.Response{
+			StatusCode: http.StatusServiceUnavailable,
+			Header:     http.Header{},
+			Body:       io.NopCloser(strings.NewReader(capacityBody)),
+		})
+		errs = append(errs, nil)
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: responses,
+		errors:    errs,
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       3,
+		Name:     "acc-3",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 20s 阈值
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
@@ -322,13 +417,18 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 
+	// 使用可取消的 context 避免测试真的等待 5×20s
+	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
+	defer cancel()
+
 	params := antigravityRetryLoopParams{
-		ctx:             context.Background(),
+		ctx:             ctx,
 		prefix:          "[test]",
 		account:         account,
 		accessToken:     "token",
 		action:          "generateContent",
 		body:            []byte(`{"input":"test"}`),
+		httpUpstream:    upstream,
 		accountRepo:     repo,
 		isStickySession: true,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
@@ -343,16 +443,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
 
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
-	require.Nil(t, result.resp)
-	require.Nil(t, result.err)
-	require.NotNil(t, result.switchError, "should return switchError for 503 model capacity exhausted")
-	require.Equal(t, account.ID, result.switchError.OriginalAccountID)
-	require.Equal(t, "gemini-3-pro-high", result.switchError.RateLimitedModel)
-	require.True(t, result.switchError.IsStickySession)
-
-	// 验证模型限流已设置
-	require.Len(t, repo.modelRateLimitCalls, 1)
-	require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
+	// context 超时会导致提前返回，switchError 可能为 nil（context canceled）
+	// 验证不设置模型限流
+	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
 }
 
 // TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑
@@ -1128,9 +1221,9 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t
 	require.Equal(t, "sticky-net-error", cache.deleteCalls[0].sessionHash)
 }
 
-// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
-// 503 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定
-func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) {
+// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount
+// 503 + 短延迟 + 容量不足 + 重试失败 → 切换账号（不设模型限流）
+func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_SwitchesAccount(t *testing.T) {
 	failRespBody := `{
 		"error": {
 			"code": 503,
@@ -1152,7 +1245,6 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
 	}
 
 	repo := &stubAntigravityAccountRepo{}
-	cache := &stubSmartRetryCache{}
 	account := &Account{
 		ID:       16,
 		Name:     "acc-16",
@@ -1195,21 +1287,15 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
 
 	availableURLs := []string{"https://ag-1.test"}
 
-	svc := &AntigravityGatewayService{cache: cache}
+	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 
 	require.NotNil(t, result)
-	require.NotNil(t, result.switchError)
+	require.NotNil(t, result.switchError, "should switch account after capacity retry exhausted")
 	require.True(t, result.switchError.IsStickySession)
 
-	// 验证粘性绑定被清除
-	require.Len(t, cache.deleteCalls, 1)
-	require.Equal(t, int64(77), cache.deleteCalls[0].groupID)
-	require.Equal(t, "sticky-503-short", cache.deleteCalls[0].sessionHash)
-
-	// 验证模型限流已设置
-	require.Len(t, repo.modelRateLimitCalls, 1)
-	require.Equal(t, "gemini-3-pro", repo.modelRateLimitCalls[0].modelKey)
+	// MODEL_CAPACITY_EXHAUSTED 不应设置模型限流
+	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
 }
 
 // TestAntigravityRetryLoop_SmartRetryFailed_StickySession_SwitchErrorPropagates

From 05f5a8b61db4960528074ab7f281404a8426e49f Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Tue, 10 Feb 2026 03:59:39 +0800
Subject: [PATCH 12/16] fix: use switch statement for staticcheck QF1003
 compliance

---
 backend/internal/service/gateway_service.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 01e1acb4..910e04a4 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -379,9 +379,10 @@ func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accou
 		return
 	}
 	// 根据状态码选择封禁策略
-	if failoverErr.StatusCode == http.StatusBadRequest {
+	switch failoverErr.StatusCode {
+	case http.StatusBadRequest:
 		tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]")
-	} else if failoverErr.StatusCode == http.StatusBadGateway {
+	case http.StatusBadGateway:
 		tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]")
 	}
 }

From f06048eccfb1e4f8373b6348f3a425a3c36fa1a5 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Tue, 10 Feb 2026 04:05:20 +0800
Subject: [PATCH 13/16] fix: simplify MODEL_CAPACITY_EXHAUSTED to single retry
 for all cases

Both short (<20s) and long (>=20s/missing) retryDelay now retry once:
- Short: wait actual retryDelay, retry once
- Long/missing: wait 20s, retry once
- Still capacity exhausted: switch account
- Different error: let upper layer handle
---
 .../service/antigravity_gateway_service.go    | 128 +++++++++---------
 .../service/antigravity_smart_retry_test.go   |  31 ++---
 2 files changed, 75 insertions(+), 84 deletions(-)

diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 84e78eaa..efff2e18 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -42,11 +42,10 @@ const (
 	// MODEL_CAPACITY_EXHAUSTED 专用常量
 	// 容量不足是临时状态，所有账号共享容量池，与限流不同
 	// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 	// - 重试仍为容量不足: 切换账号
 	// - 重试遇到其他错误: 按实际错误码处理
 	antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
-	antigravityModelCapacityMaxAttempts   = 5                // 容量不足长等待重试次数
 
 	// Google RPC 状态和类型常量
 	googleRPCStatusResourceExhausted      = "RESOURCE_EXHAUSTED"
@@ -296,86 +295,83 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 // handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
 // 策略：
 //   - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 //   - 重试成功: 直接返回
-//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完，然后切换账号
+//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号
 //   - 重试遇到其他错误 (429 限流等): 返回该响应，让上层按实际错误码处理
 func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
 	p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
 	baseURL string, retryDelay time.Duration, modelName string,
 ) *smartRetryResult {
-	// 确定重试参数
-	maxAttempts := 1
+	// 确定等待时间
 	waitDuration := retryDelay
 	if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
-		// 无 retryDelay 或 >= 20s: 固定 20s 间隔，最多 5 次
-		maxAttempts = antigravityModelCapacityMaxAttempts
+		// 无 retryDelay 或 >= 20s: 固定等待 20s
 		waitDuration = antigravityModelCapacityWaitThreshold
 	}
 
-	for attempt := 1; attempt <= maxAttempts; attempt++ {
-		log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d",
-			p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
+	log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d",
+		p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID)
 
-		select {
-		case <-p.ctx.Done():
-			log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
-		case <-time.After(waitDuration):
-		}
+	select {
+	case <-p.ctx.Done():
+		log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+	case <-time.After(waitDuration):
+	}
 
-		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
-		if err != nil {
-			log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
-			return &smartRetryResult{
-				action: smartRetryActionBreakWithResp,
-				resp: &http.Response{
-					StatusCode: resp.StatusCode,
-					Header:     resp.Header.Clone(),
-					Body:       io.NopCloser(bytes.NewReader(respBody)),
-				},
-			}
-		}
-
-		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
-
-		// 网络错误: 继续重试
-		if retryErr != nil || retryResp == nil {
-			log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v",
-				p.prefix, attempt, maxAttempts, retryErr)
-			continue
-		}
-
-		// 成功 (非 429/503): 直接返回
-		if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
-			log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d",
-				p.prefix, retryResp.StatusCode, attempt, maxAttempts)
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
-		}
-
-		// 读取重试响应体，判断是否仍为容量不足
-		retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
-		_ = retryResp.Body.Close()
-
-		retryInfo := parseAntigravitySmartRetryInfo(retryBody)
-
-		// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
-		if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
-			log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s",
-				p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200))
-			retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
-		}
-
-		// 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间，继续重试
-		if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold {
-			waitDuration = retryInfo.RetryDelay
+	retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+	if err != nil {
+		log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
+		return &smartRetryResult{
+			action: smartRetryActionBreakWithResp,
+			resp: &http.Response{
+				StatusCode: resp.StatusCode,
+				Header:     resp.Header.Clone(),
+				Body:       io.NopCloser(bytes.NewReader(respBody)),
+			},
 		}
 	}
 
-	// 所有重试都失败且仍为容量不足: 切换账号
-	log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)",
-		p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID)
+	retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+
+	// 网络错误: 切换账号
+	if retryErr != nil || retryResp == nil {
+		log.Printf("%s status=capacity_retry_network_error error=%v (switch account)",
+			p.prefix, retryErr)
+		return &smartRetryResult{
+			action: smartRetryActionBreakWithResp,
+			switchError: &AntigravityAccountSwitchError{
+				OriginalAccountID: p.account.ID,
+				RateLimitedModel:  modelName,
+				IsStickySession:   p.isStickySession,
+			},
+		}
+	}
+
+	// 成功 (非 429/503): 直接返回
+	if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+		log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode)
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+	}
+
+	// 读取重试响应体，判断是否仍为容量不足
+	retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+	_ = retryResp.Body.Close()
+
+	retryInfo := parseAntigravitySmartRetryInfo(retryBody)
+
+	// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
+	if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
+		log.Printf("%s status=%d capacity_retry_got_different_error body=%s",
+			p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200))
+		retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+	}
+
+	// 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号
+	log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)",
+		p.prefix, resp.StatusCode, modelName, p.account.ID)
 
 	return &smartRetryResult{
 		action: smartRetryActionBreakWithResp,
diff --git a/backend/internal/service/antigravity_smart_retry_test.go b/backend/internal/service/antigravity_smart_retry_test.go
index 7a6050a7..b1ca5695 100644
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -363,9 +363,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *
 }
 
 // TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
-// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次，全失败后切换账号
+// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 后重试 1 次，仍失败则切换账号
 func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
-	// 构造 5 个仍然容量不足的重试响应
+	// 重试仍然返回容量不足
 	capacityBody := `{
 		"error": {
 			"code": 503,
@@ -376,19 +376,15 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 			]
 		}
 	}`
-	var responses []*http.Response
-	var errs []error
-	for i := 0; i < 5; i++ {
-		responses = append(responses, &http.Response{
-			StatusCode: http.StatusServiceUnavailable,
-			Header:     http.Header{},
-			Body:       io.NopCloser(strings.NewReader(capacityBody)),
-		})
-		errs = append(errs, nil)
-	}
 	upstream := &mockSmartRetryUpstream{
-		responses: responses,
-		errors:    errs,
+		responses: []*http.Response{
+			{
+				StatusCode: 503,
+				Header:     http.Header{},
+				Body:       io.NopCloser(strings.NewReader(capacityBody)),
+			},
+		},
+		errors: []error{nil},
 	}
 
 	repo := &stubAntigravityAccountRepo{}
@@ -412,12 +408,12 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 		}
 	}`)
 	resp := &http.Response{
-		StatusCode: http.StatusServiceUnavailable,
+		StatusCode: 503,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 
-	// 使用可取消的 context 避免测试真的等待 5×20s
+	// context 超时短于 20s 等待，验证 context 取消时正确返回
 	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 	defer cancel()
 
@@ -443,8 +439,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
-	// context 超时会导致提前返回，switchError 可能为 nil（context canceled）
-	// 验证不设置模型限流
+	// context 超时会导致提前返回
 	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
 }
 

From 5dd83d3cf2d448403301b6879f6e0bc6337a4390 Mon Sep 17 00:00:00 2001
From: shaw <shaw-wei@foxmail.com>
Date: Tue, 10 Feb 2026 10:28:34 +0800
Subject: [PATCH 14/16] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4=E7=89=B9?=
 =?UTF-8?q?=E5=AE=9Asystem=E4=BB=A5=E9=80=82=E9=85=8D=E6=96=B0=E7=89=88cc?=
 =?UTF-8?q?=E5=AE=A2=E6=88=B7=E7=AB=AF=E7=BC=93=E5=AD=98=E5=A4=B1=E6=95=88?=
 =?UTF-8?q?=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../pkg/antigravity/request_transformer.go    | 23 +++++--
 .../service/antigravity_gateway_service.go    |  4 --
 .../antigravity_single_account_retry_test.go  |  2 -
 backend/internal/service/gateway_service.go   | 66 +++++++++++++++++++
 frontend/package.json                         |  2 +-
 frontend/pnpm-lock.yaml                       | 21 ++++--
 6 files changed, 102 insertions(+), 16 deletions(-)

diff --git a/backend/internal/pkg/antigravity/request_transformer.go b/backend/internal/pkg/antigravity/request_transformer.go
index 65f45cfc..e89a4c53 100644
--- a/backend/internal/pkg/antigravity/request_transformer.go
+++ b/backend/internal/pkg/antigravity/request_transformer.go
@@ -271,6 +271,21 @@ func filterOpenCodePrompt(text string) string {
 	return ""
 }
 
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+var systemBlockFilterPrefixes = []string{
+	"x-anthropic-billing-header",
+}
+
+// filterSystemBlockByPrefix 如果文本匹配过滤前缀，返回空字符串
+func filterSystemBlockByPrefix(text string) string {
+	for _, prefix := range systemBlockFilterPrefixes {
+		if strings.HasPrefix(text, prefix) {
+			return ""
+		}
+	}
+	return text
+}
+
 // buildSystemInstruction 构建 systemInstruction（与 Antigravity-Manager 保持一致）
 func buildSystemInstruction(system json.RawMessage, modelName string, opts TransformOptions, tools []ClaudeTool) *GeminiContent {
 	var parts []GeminiPart
@@ -287,8 +302,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 				if strings.Contains(sysStr, "You are Antigravity") {
 					userHasAntigravityIdentity = true
 				}
-				// 过滤 OpenCode 默认提示词
-				filtered := filterOpenCodePrompt(sysStr)
+				// 过滤 OpenCode 默认提示词和黑名单前缀
+				filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(sysStr))
 				if filtered != "" {
 					userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 				}
@@ -302,8 +317,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 						if strings.Contains(block.Text, "You are Antigravity") {
 							userHasAntigravityIdentity = true
 						}
-						// 过滤 OpenCode 默认提示词
-						filtered := filterOpenCodePrompt(block.Text)
+						// 过滤 OpenCode 默认提示词和黑名单前缀
+						filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(block.Text))
 						if filtered != "" {
 							userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 						}
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 42a60372..b6d0da06 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -48,10 +48,6 @@ const (
 	googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED"
 	googleRPCReasonRateLimitExceeded      = "RATE_LIMIT_EXCEEDED"
 
-	// 单账号 503 退避重试：预检查中等待模型限流过期的最大时间
-	// 超过此值的限流将直接切换账号（避免请求等待过久）
-	antigravitySingleAccountMaxWait = 30 * time.Second
-
 	// 单账号 503 退避重试：Service 层原地重试的最大次数
 	// 在 handleSmartRetry 中，对于 shouldRateLimitModel（长延迟 ≥ 7s）的情况，
 	// 多账号模式下会设限流+切换账号；但单账号模式下改为原地等待+重试。
diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go
index 0950b728..d5813553 100644
--- a/backend/internal/service/antigravity_single_account_retry_test.go
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -57,8 +57,6 @@ func TestSingleAccountRetryConstants(t *testing.T) {
 		"单次最大等待 15s")
 	require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait,
 		"总累计等待不超过 30s")
-	require.Equal(t, 30*time.Second, antigravitySingleAccountMaxWait,
-		"预检查最大等待 30s")
 }
 
 // ---------------------------------------------------------------------------
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 2c04ae14..610c8f01 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -243,6 +243,12 @@ var (
 	}
 )
 
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+// OAuth/SetupToken 账号转发时，匹配这些前缀的 system 元素会被移除
+var systemBlockFilterPrefixes = []string{
+	"x-anthropic-billing-header",
+}
+
 // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")
 
@@ -2684,6 +2690,60 @@ func hasClaudeCodePrefix(text string) bool {
 	return false
 }
 
+// matchesFilterPrefix 检查文本是否匹配任一过滤前缀
+func matchesFilterPrefix(text string) bool {
+	for _, prefix := range systemBlockFilterPrefixes {
+		if strings.HasPrefix(text, prefix) {
+			return true
+		}
+	}
+	return false
+}
+
+// filterSystemBlocksByPrefix 从 body 的 system 中移除文本匹配 systemBlockFilterPrefixes 前缀的元素
+// 直接从 body 解析 system，不依赖外部传入的 parsed.System（因为前置步骤可能已修改 body 中的 system）
+func filterSystemBlocksByPrefix(body []byte) []byte {
+	sys := gjson.GetBytes(body, "system")
+	if !sys.Exists() {
+		return body
+	}
+
+	switch {
+	case sys.Type == gjson.String:
+		if matchesFilterPrefix(sys.Str) {
+			result, err := sjson.DeleteBytes(body, "system")
+			if err != nil {
+				return body
+			}
+			return result
+		}
+	case sys.IsArray():
+		var parsed []any
+		if err := json.Unmarshal([]byte(sys.Raw), &parsed); err != nil {
+			return body
+		}
+		filtered := make([]any, 0, len(parsed))
+		changed := false
+		for _, item := range parsed {
+			if m, ok := item.(map[string]any); ok {
+				if text, ok := m["text"].(string); ok && matchesFilterPrefix(text) {
+					changed = true
+					continue
+				}
+			}
+			filtered = append(filtered, item)
+		}
+		if changed {
+			result, err := sjson.SetBytes(body, "system", filtered)
+			if err != nil {
+				return body
+			}
+			return result
+		}
+	}
+	return body
+}
+
 // injectClaudeCodePrompt 在 system 开头注入 Claude Code 提示词
 // 处理 null、字符串、数组三种格式
 func injectClaudeCodePrompt(body []byte, system any) []byte {
@@ -2963,6 +3023,12 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		body, reqModel = normalizeClaudeOAuthRequestBody(body, reqModel, normalizeOpts)
 	}
 
+	// OAuth/SetupToken 账号：移除黑名单前缀匹配的 system 元素（如客户端注入的计费元数据）
+	// 放在 inject/normalize 之后，确保不会被覆盖
+	if account.IsOAuth() {
+		body = filterSystemBlocksByPrefix(body)
+	}
+
 	// 强制执行 cache_control 块数量限制（最多 4 个）
 	body = enforceCacheControlLimit(body)
 
diff --git a/frontend/package.json b/frontend/package.json
index 325eba60..1b380b17 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -17,7 +17,7 @@
   "dependencies": {
     "@lobehub/icons": "^4.0.2",
     "@vueuse/core": "^10.7.0",
-    "axios": "^1.6.2",
+    "axios": "^1.13.5",
     "chart.js": "^4.4.1",
     "dompurify": "^3.3.1",
     "driver.js": "^1.4.0",
diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml
index 9af2d7af..37c384b4 100644
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -15,8 +15,8 @@ importers:
         specifier: ^10.7.0
         version: 10.11.1(vue@3.5.26(typescript@5.6.3))
       axios:
-        specifier: ^1.6.2
-        version: 1.13.2
+        specifier: ^1.13.5
+        version: 1.13.5
       chart.js:
         specifier: ^4.4.1
         version: 4.5.1
@@ -1257,56 +1257,67 @@ packages:
     resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==}
     cpu: [arm]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm-musleabihf@4.54.0':
     resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==}
     cpu: [arm]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-arm64-gnu@4.54.0':
     resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==}
     cpu: [arm64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-musl@4.54.0':
     resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==}
     cpu: [arm64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-loong64-gnu@4.54.0':
     resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==}
     cpu: [loong64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-ppc64-gnu@4.54.0':
     resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==}
     cpu: [ppc64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-gnu@4.54.0':
     resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==}
     cpu: [riscv64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-musl@4.54.0':
     resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==}
     cpu: [riscv64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-s390x-gnu@4.54.0':
     resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==}
     cpu: [s390x]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.54.0':
     resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==}
     cpu: [x64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-musl@4.54.0':
     resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==}
     cpu: [x64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-openharmony-arm64@4.54.0':
     resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==}
@@ -1805,8 +1816,8 @@ packages:
     peerDependencies:
       postcss: ^8.1.0
 
-  axios@1.13.2:
-    resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==}
+  axios@1.13.5:
+    resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==}
 
   babel-plugin-macros@3.1.0:
     resolution: {integrity: sha512-Cg7TFGpIr01vOQNODXOOaGz2NpCU5gl8x1qJFbb6hbZxR7XrcE2vtbAsTAbJ7/xwJtUuJEw8K8Zr/AE0LHlesg==}
@@ -6387,7 +6398,7 @@ snapshots:
       postcss: 8.5.6
       postcss-value-parser: 4.2.0
 
-  axios@1.13.2:
+  axios@1.13.5:
     dependencies:
       follow-redirects: 1.15.11
       form-data: 4.0.5

From 406dad998d6def371b7f6cfc429ecba489fa3c32 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Tue, 10 Feb 2026 10:59:34 +0800
Subject: [PATCH 15/16] chore: bump version to 0.1.77.2

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index af6111e5..18412869 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.77.1
+0.1.77.2

From 6bdd580b3fcc9f564508f7497f63b2cc2b5b2674 Mon Sep 17 00:00:00 2001
From: erio <asakifeng@gmail.com>
Date: Tue, 10 Feb 2026 11:40:36 +0800
Subject: [PATCH 16/16] chore: bump version to 0.1.78.1

---
 backend/cmd/server/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/cmd/server/VERSION b/backend/cmd/server/VERSION
index 18412869..aade6705 100644
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
@@ -1 +1 @@
-0.1.77.2
+0.1.78.1
\ No newline at end of file