feat: Antigravity extra failover retries after default retries exhausted

When default failover retries are exhausted, continue retrying with
Antigravity accounts only (up to 10 times, configurable via
GATEWAY_ANTIGRAVITY_EXTRA_RETRIES). Each extra retry uses a fixed
500ms delay. Non-Antigravity accounts are skipped during the extra
retry phase. Applied to all three endpoints: Gemini compat, Claude,
and Gemini native API paths.
This commit is contained in:
erio
2026-02-09 22:13:44 +08:00
parent 228aca9523
commit 18b591bc3b
4 changed files with 504 additions and 8 deletions

View File

@@ -39,6 +39,7 @@ type GatewayHandler struct {
concurrencyHelper *ConcurrencyHelper
maxAccountSwitches int
maxAccountSwitchesGemini int
antigravityExtraRetries int
}
// NewGatewayHandler creates a new GatewayHandler
@@ -57,6 +58,7 @@ func NewGatewayHandler(
pingInterval := time.Duration(0)
maxAccountSwitches := 10
maxAccountSwitchesGemini := 3
antigravityExtraRetries := 10
if cfg != nil {
pingInterval = time.Duration(cfg.Concurrency.PingInterval) * time.Second
if cfg.Gateway.MaxAccountSwitches > 0 {
@@ -65,6 +67,7 @@ func NewGatewayHandler(
if cfg.Gateway.MaxAccountSwitchesGemini > 0 {
maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
}
antigravityExtraRetries = cfg.Gateway.AntigravityExtraRetries
}
return &GatewayHandler{
gatewayService: gatewayService,
@@ -78,6 +81,7 @@ func NewGatewayHandler(
concurrencyHelper: NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
maxAccountSwitches: maxAccountSwitches,
maxAccountSwitchesGemini: maxAccountSwitchesGemini,
antigravityExtraRetries: antigravityExtraRetries,
}
}
@@ -234,6 +238,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
if platform == service.PlatformGemini {
maxAccountSwitches := h.maxAccountSwitchesGemini
switchCount := 0
antigravityExtraCount := 0
failedAccountIDs := make(map[int64]struct{})
var lastFailoverErr *service.UpstreamFailoverError
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
@@ -255,6 +260,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
account := selection.Account
setOpsSelectedAccount(c, account.ID)
// 额外重试阶段:跳过非 Antigravity 账号
if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
failedAccountIDs[account.ID] = struct{}{}
if selection.Acquired && selection.ReleaseFunc != nil {
selection.ReleaseFunc()
}
continue
}
// 检查请求拦截预热请求、SUGGESTION MODE等
if account.IsInterceptWarmupEnabled() {
interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -345,8 +359,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
forceCacheBilling = true
}
if switchCount >= maxAccountSwitches {
h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
return
// 默认重试用完,进入 Antigravity 额外重试
antigravityExtraCount++
if antigravityExtraCount > h.antigravityExtraRetries {
h.handleFailoverExhausted(c, failoverErr, service.PlatformGemini, streamStarted)
return
}
log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
return
}
continue
}
switchCount++
log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -399,6 +422,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
for {
maxAccountSwitches := h.maxAccountSwitches
switchCount := 0
antigravityExtraCount := 0
failedAccountIDs := make(map[int64]struct{})
var lastFailoverErr *service.UpstreamFailoverError
retryWithFallback := false
@@ -422,6 +446,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
account := selection.Account
setOpsSelectedAccount(c, account.ID)
// 额外重试阶段:跳过非 Antigravity 账号
if switchCount >= maxAccountSwitches && account.Platform != service.PlatformAntigravity {
failedAccountIDs[account.ID] = struct{}{}
if selection.Acquired && selection.ReleaseFunc != nil {
selection.ReleaseFunc()
}
continue
}
// 检查请求拦截预热请求、SUGGESTION MODE等
if account.IsInterceptWarmupEnabled() {
interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -545,8 +578,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
forceCacheBilling = true
}
if switchCount >= maxAccountSwitches {
h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
return
// 默认重试用完,进入 Antigravity 额外重试
antigravityExtraCount++
if antigravityExtraCount > h.antigravityExtraRetries {
h.handleFailoverExhausted(c, failoverErr, account.Platform, streamStarted)
return
}
log.Printf("Account %d: antigravity extra retry %d/%d", account.ID, antigravityExtraCount, h.antigravityExtraRetries)
if !sleepFixedDelay(c.Request.Context(), antigravityExtraRetryDelay) {
return
}
continue
}
switchCount++
log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
@@ -838,6 +880,21 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
}
}
const antigravityExtraRetryDelay = 500 * time.Millisecond
// sleepFixedDelay 固定延时等待,返回 false 表示 context 已取消。
func sleepFixedDelay(ctx context.Context, delay time.Duration) bool {
if delay <= 0 {
return true
}
select {
case <-ctx.Done():
return false
case <-time.After(delay):
return true
}
}
func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
statusCode := failoverErr.StatusCode
responseBody := failoverErr.ResponseBody