fix: simplify MODEL_CAPACITY_EXHAUSTED to single retry for all cases

Both short (<20s) and long (>=20s/missing) retryDelay now retry once: - Short: wait actual retryDelay, retry once - Long/missing: wait 20s, retry once - Still capacity exhausted: switch account - Different error: let upper layer handle
2026-04-03 15:02:13 +08:00 · 2026-02-10 04:05:20 +08:00
parent 05f5a8b61d
commit f06048eccf
2 changed files with 75 additions and 84 deletions
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -42,11 +42,10 @@ const (
 	// MODEL_CAPACITY_EXHAUSTED 专用常量
 	// 容量不足是临时状态，所有账号共享容量池，与限流不同
 	// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+	// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 	// - 重试仍为容量不足: 切换账号
 	// - 重试遇到其他错误: 按实际错误码处理
 	antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
-	antigravityModelCapacityMaxAttempts   = 5                // 容量不足长等待重试次数

 	// Google RPC 状态和类型常量
 	googleRPCStatusResourceExhausted      = "RESOURCE_EXHAUSTED"
@@ -296,86 +295,83 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 // handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
 // 策略：
 //   - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
-//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 每 20s 重试最多 5 次
+//   - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 后重试 1 次
 //   - 重试成功: 直接返回
-//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完，然后切换账号
+//   - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号
 //   - 重试遇到其他错误 (429 限流等): 返回该响应，让上层按实际错误码处理
 func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
 	p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
 	baseURL string, retryDelay time.Duration, modelName string,
 ) *smartRetryResult {
-	// 确定重试参数
-	maxAttempts := 1
+	// 确定等待时间
 	waitDuration := retryDelay
 	if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
-		// 无 retryDelay 或 >= 20s: 固定 20s 间隔，最多 5 次
-		maxAttempts = antigravityModelCapacityMaxAttempts
+		// 无 retryDelay 或 >= 20s: 固定等待 20s
 		waitDuration = antigravityModelCapacityWaitThreshold
 	}

-	for attempt := 1; attempt <= maxAttempts; attempt++ {
-		log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d",
-			p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
+	log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d",
+		p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID)

-		select {
-		case <-p.ctx.Done():
-			log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
-		case <-time.After(waitDuration):
-		}
+	select {
+	case <-p.ctx.Done():
+		log.Printf("%s status=context_canceled_during_capacity_retry", p.prefix)
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+	case <-time.After(waitDuration):
+	}

-		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
-		if err != nil {
-			log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
-			return &smartRetryResult{
-				action: smartRetryActionBreakWithResp,
-				resp: &http.Response{
-					StatusCode: resp.StatusCode,
-					Header:     resp.Header.Clone(),
-					Body:       io.NopCloser(bytes.NewReader(respBody)),
-				},
-			}
-		}
-
-		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
-
-		// 网络错误: 继续重试
-		if retryErr != nil || retryResp == nil {
-			log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v",
-				p.prefix, attempt, maxAttempts, retryErr)
-			continue
-		}
-
-		// 成功 (非 429/503): 直接返回
-		if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
-			log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d",
-				p.prefix, retryResp.StatusCode, attempt, maxAttempts)
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
-		}
-
-		// 读取重试响应体，判断是否仍为容量不足
-		retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
-		_ = retryResp.Body.Close()
-
-		retryInfo := parseAntigravitySmartRetryInfo(retryBody)
-
-		// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
-		if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
-			log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s",
-				p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200))
-			retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
-			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
-		}
-
-		// 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间，继续重试
-		if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold {
-			waitDuration = retryInfo.RetryDelay
+	retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+	if err != nil {
+		log.Printf("%s status=capacity_retry_request_build_failed error=%v", p.prefix, err)
+		return &smartRetryResult{
+			action: smartRetryActionBreakWithResp,
+			resp: &http.Response{
+				StatusCode: resp.StatusCode,
+				Header:     resp.Header.Clone(),
+				Body:       io.NopCloser(bytes.NewReader(respBody)),
+			},
 		}
 	}

-	// 所有重试都失败且仍为容量不足: 切换账号
-	log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)",
-		p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID)
+	retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+
+	// 网络错误: 切换账号
+	if retryErr != nil || retryResp == nil {
+		log.Printf("%s status=capacity_retry_network_error error=%v (switch account)",
+			p.prefix, retryErr)
+		return &smartRetryResult{
+			action: smartRetryActionBreakWithResp,
+			switchError: &AntigravityAccountSwitchError{
+				OriginalAccountID: p.account.ID,
+				RateLimitedModel:  modelName,
+				IsStickySession:   p.isStickySession,
+			},
+		}
+	}
+
+	// 成功 (非 429/503): 直接返回
+	if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+		log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode)
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+	}
+
+	// 读取重试响应体，判断是否仍为容量不足
+	retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+	_ = retryResp.Body.Close()
+
+	retryInfo := parseAntigravitySmartRetryInfo(retryBody)
+
+	// 不再是 MODEL_CAPACITY_EXHAUSTED（例如变成了 429 限流）: 返回该响应让上层处理
+	if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
+		log.Printf("%s status=%d capacity_retry_got_different_error body=%s",
+			p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200))
+		retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
+		return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+	}
+
+	// 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号
+	log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)",
+		p.prefix, resp.StatusCode, modelName, p.account.ID)

 	return &smartRetryResult{
 		action: smartRetryActionBreakWithResp,
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -363,9 +363,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *
 }

 // TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
-// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 每 20s 重试最多 5 次，全失败后切换账号
+// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 后重试 1 次，仍失败则切换账号
 func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
-	// 构造 5 个仍然容量不足的重试响应
+	// 重试仍然返回容量不足
 	capacityBody := `{
 		"error": {
 			"code": 503,
@@ -376,19 +376,15 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 			]
 		}
 	}`
-	var responses []*http.Response
-	var errs []error
-	for i := 0; i < 5; i++ {
-		responses = append(responses, &http.Response{
-			StatusCode: http.StatusServiceUnavailable,
-			Header:     http.Header{},
-			Body:       io.NopCloser(strings.NewReader(capacityBody)),
-		})
-		errs = append(errs, nil)
-	}
 	upstream := &mockSmartRetryUpstream{
-		responses: responses,
-		errors:    errs,
+		responses: []*http.Response{
+			{
+				StatusCode: 503,
+				Header:     http.Header{},
+				Body:       io.NopCloser(strings.NewReader(capacityBody)),
+			},
+		},
+		errors: []error{nil},
 	}

 	repo := &stubAntigravityAccountRepo{}
@@ -412,12 +408,12 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
 		}
 	}`)
 	resp := &http.Response{
-		StatusCode: http.StatusServiceUnavailable,
+		StatusCode: 503,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}

-	// 使用可取消的 context 避免测试真的等待 5×20s
+	// context 超时短于 20s 等待，验证 context 取消时正确返回
 	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 	defer cancel()

@@ -443,8 +439,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *

 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
-	// context 超时会导致提前返回，switchError 可能为 nil（context canceled）
-	// 验证不设置模型限流
+	// context 超时会导致提前返回
 	require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
 }