fix: simplify MODEL_CAPACITY_EXHAUSTED to single retry for all cases

Both short (<20s) and long (>=20s/missing) retryDelay now retry once: - Short: wait actual retryDelay, retry once - Long/missing: wait 20s, retry once - Still capacity exhausted: switch account - Different error: let upper layer handle
2026-04-25 17:14:45 +08:00 · 2026-02-10 04:05:20 +08:00
parent 05f5a8b61d
commit f06048eccf
2 changed files with 75 additions and 84 deletions
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -42,11 +42,10 @@ const (
 	// MODEL_CAPACITY_EXHAUSTED 专用常量
 	// 容量不足是临时状态，所有账号共享容量池，与限流不同
 	// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 	// - 重试仍为容量不足: 切换账号
 	// - 重试遇到其他错误: 按实际错误码处理
 	antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
 	antigravityModelCapacityMaxAttempts   = 5                // 容量不足长等待重试次数
 	// Google RPC 状态和类型常量
 	googleRPCStatusResourceExhausted      = "RESOURCE_EXHAUSTED"
@@ -296,26 +295,23 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 // handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
 // 策略：
 //   - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 //   - 重试成功: 直接返回
-//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完，然后切换账号
+//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号
 //   - 重试遇到其他错误 (429 限流等): 返回该响应，让上层按实际错误码处理
 func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
 	p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
 	baseURL string, retryDelay time.Duration, modelName string,
 ) *smartRetryResult {
-	// 确定重试参数
+	// 确定等待时间
 	maxAttempts := 1
 	waitDuration := retryDelay
 	if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
-		// 无 retryDelay 或 >= 20s: 固定 20s 间隔，最多 5 次
+		// 无 retryDelay 或 >= 20s: 固定等待 20s
 		maxAttempts = antigravityModelCapacityMaxAttempts
 		waitDuration = antigravityModelCapacityWaitThreshold
 	}
-	for attempt := 1; attempt <= maxAttempts; attempt++ {
+	log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d",
-		log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d",
+		p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID)
 			p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
 	select {
 	case <-p.ctx.Done():
@@ -339,17 +335,23 @@ func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
 	retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
-		// 网络错误: 继续重试
+	// 网络错误: 切换账号
 	if retryErr != nil || retryResp == nil {
-			log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v",
+		log.Printf("%s status=capacity_retry_network_error error=%v (switch account)",
-				p.prefix, attempt, maxAttempts, retryErr)
+			p.prefix, retryErr)
-			continue
+		return &smartRetryResult{
 			action: smartRetryActionBreakWithResp,
 			switchError: &AntigravityAccountSwitchError{
 				OriginalAccountID: p.account.ID,
 				RateLimitedModel:  modelName,
 				IsStickySession:   p.isStickySession,
 			},
 		}
 	}
 	// 成功 (非 429/503): 直接返回
 	if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
-			log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d",
+		log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode)
 				p.prefix, retryResp.StatusCode, attempt, maxAttempts)
 		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
 	}
@@ -361,21 +363,15 @@ func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
 	// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
 	if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
-			log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s",
+		log.Printf("%s status=%d capacity_retry_got_different_error body=%s",
-				p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200))
+			p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200))
 		retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
 		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
 	}
-		// 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间，继续重试
+	// 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号
-		if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold {
+	log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)",
-			waitDuration = retryInfo.RetryDelay
+		p.prefix, resp.StatusCode, modelName, p.account.ID)
 		}
 	}
 	// 所有重试都失败且仍为容量不足: 切换账号
 	log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)",
 		p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID)
 	return &smartRetryResult{
 		action: smartRetryActionBreakWithResp,
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -363,9 +363,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *
 }
 // TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
-// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次，全失败后切换账号
+// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 后重试 1 次，仍失败则切换账号
 func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
-	// 构造 5 个仍然容量不足的重试响应
+	// 重试仍然返回容量不足
 	capacityBody := `{
 		"error": {
 			"code": 503,
@@ -376,19 +376,15 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 			]
 		}
 	}`
-	var responses []*http.Response
+	upstream := &mockSmartRetryUpstream{
-	var errs []error
+		responses: []*http.Response{
-	for i := 0; i < 5; i++ {
+			{
-		responses = append(responses, &http.Response{
+				StatusCode: 503,
 			StatusCode: http.StatusServiceUnavailable,
 				Header:     http.Header{},
 				Body:       io.NopCloser(strings.NewReader(capacityBody)),
-		})
+			},
-		errs = append(errs, nil)
+		},
-	}
+		errors: []error{nil},
 	upstream := &mockSmartRetryUpstream{
 		responses: responses,
 		errors:    errs,
 	}
 	repo := &stubAntigravityAccountRepo{}
@@ -412,12 +408,12 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 		}
 	}`)
 	resp := &http.Response{
-		StatusCode: http.StatusServiceUnavailable,
+		StatusCode: 503,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
-	// 使用可取消的 context 避免测试真的等待 5×20s
+	// context 超时短于 20s 等待，验证 context 取消时正确返回
 	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 	defer cancel()
@@ -443,8 +439,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
-	// context 超时会导致提前返回，switchError 可能为 nil（context canceled）
+	// context 超时会导致提前返回
 	// 验证不设置模型限流
 	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
 }