fix: simplify MODEL_CAPACITY_EXHAUSTED to single retry for all cases

Both short (<20s) and long (>=20s/missing) retryDelay now retry once:
- Short: wait actual retryDelay, retry once
- Long/missing: wait 20s, retry once
- Still capacity exhausted: switch account
- Different error: let upper layer handle
This commit is contained in:
erio
2026-02-10 04:05:20 +08:00
parent 05f5a8b61d
commit f06048eccf
2 changed files with 75 additions and 84 deletions

View File

@@ -42,11 +42,10 @@ const (
// MODEL_CAPACITY_EXHAUSTED 专用常量 // MODEL_CAPACITY_EXHAUSTED 专用常量
// 容量不足是临时状态,所有账号共享容量池,与限流不同 // 容量不足是临时状态,所有账号共享容量池,与限流不同
// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 // - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 20s 重试最多 5 // - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 重试 1
// - 重试仍为容量不足: 切换账号 // - 重试仍为容量不足: 切换账号
// - 重试遇到其他错误: 按实际错误码处理 // - 重试遇到其他错误: 按实际错误码处理
antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值 antigravityModelCapacityWaitThreshold = 20 * time.Second // 容量不足等待阈值
antigravityModelCapacityMaxAttempts = 5 // 容量不足长等待重试次数
// Google RPC 状态和类型常量 // Google RPC 状态和类型常量
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED" googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
@@ -296,26 +295,23 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
// handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑 // handleModelCapacityExhaustedRetry 处理 MODEL_CAPACITY_EXHAUSTED 的重试逻辑
// 策略: // 策略:
// - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次 // - retryDelay < antigravityModelCapacityWaitThreshold: 按实际 retryDelay 等待后重试 1 次
// - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 20s 重试最多 5 // - retryDelay >= antigravityModelCapacityWaitThreshold 或无 retryDelay: 等待 20s 重试 1
// - 重试成功: 直接返回 // - 重试成功: 直接返回
// - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 继续重试直到次数用完,然后切换账号 // - 重试仍为 MODEL_CAPACITY_EXHAUSTED: 切换账号
// - 重试遇到其他错误 (429 限流等): 返回该响应,让上层按实际错误码处理 // - 重试遇到其他错误 (429 限流等): 返回该响应,让上层按实际错误码处理
func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry( func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
p antigravityRetryLoopParams, resp *http.Response, respBody []byte, p antigravityRetryLoopParams, resp *http.Response, respBody []byte,
baseURL string, retryDelay time.Duration, modelName string, baseURL string, retryDelay time.Duration, modelName string,
) *smartRetryResult { ) *smartRetryResult {
// 确定重试参数 // 确定等待时间
maxAttempts := 1
waitDuration := retryDelay waitDuration := retryDelay
if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold { if retryDelay <= 0 || retryDelay >= antigravityModelCapacityWaitThreshold {
// 无 retryDelay 或 >= 20s: 固定 20s 间隔,最多 5 次 // 无 retryDelay 或 >= 20s: 固定等待 20s
maxAttempts = antigravityModelCapacityMaxAttempts
waitDuration = antigravityModelCapacityWaitThreshold waitDuration = antigravityModelCapacityWaitThreshold
} }
for attempt := 1; attempt <= maxAttempts; attempt++ { log.Printf("%s status=%d model_capacity_exhausted_retry delay=%v model=%s account=%d",
log.Printf("%s status=%d model_capacity_exhausted_retry attempt=%d/%d delay=%v model=%s account=%d", p.prefix, resp.StatusCode, waitDuration, modelName, p.account.ID)
p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
select { select {
case <-p.ctx.Done(): case <-p.ctx.Done():
@@ -339,17 +335,23 @@ func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
// 网络错误: 继续重试 // 网络错误: 切换账号
if retryErr != nil || retryResp == nil { if retryErr != nil || retryResp == nil {
log.Printf("%s status=capacity_retry_network_error attempt=%d/%d error=%v", log.Printf("%s status=capacity_retry_network_error error=%v (switch account)",
p.prefix, attempt, maxAttempts, retryErr) p.prefix, retryErr)
continue return &smartRetryResult{
action: smartRetryActionBreakWithResp,
switchError: &AntigravityAccountSwitchError{
OriginalAccountID: p.account.ID,
RateLimitedModel: modelName,
IsStickySession: p.isStickySession,
},
}
} }
// 成功 (非 429/503): 直接返回 // 成功 (非 429/503): 直接返回
if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { if retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
log.Printf("%s status=%d model_capacity_retry_success attempt=%d/%d", log.Printf("%s status=%d model_capacity_retry_success", p.prefix, retryResp.StatusCode)
p.prefix, retryResp.StatusCode, attempt, maxAttempts)
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
} }
@@ -361,21 +363,15 @@ func (s *AntigravityGatewayService) handleModelCapacityExhaustedRetry(
// 不再是 MODEL_CAPACITY_EXHAUSTED例如变成了 429 限流): 返回该响应让上层处理 // 不再是 MODEL_CAPACITY_EXHAUSTED例如变成了 429 限流): 返回该响应让上层处理
if retryInfo == nil || !retryInfo.IsModelCapacityExhausted { if retryInfo == nil || !retryInfo.IsModelCapacityExhausted {
log.Printf("%s status=%d capacity_retry_got_different_error attempt=%d/%d body=%s", log.Printf("%s status=%d capacity_retry_got_different_error body=%s",
p.prefix, retryResp.StatusCode, attempt, maxAttempts, truncateForLog(retryBody, 200)) p.prefix, retryResp.StatusCode, truncateForLog(retryBody, 200))
retryResp.Body = io.NopCloser(bytes.NewReader(retryBody)) retryResp.Body = io.NopCloser(bytes.NewReader(retryBody))
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
} }
// 仍然是 MODEL_CAPACITY_EXHAUSTED: 更新等待时间,继续重试 // 仍然是 MODEL_CAPACITY_EXHAUSTED: 切换账号
if retryInfo.RetryDelay > 0 && retryInfo.RetryDelay < antigravityModelCapacityWaitThreshold { log.Printf("%s status=%d model_capacity_exhausted_retry_failed model=%s account=%d (switch account)",
waitDuration = retryInfo.RetryDelay p.prefix, resp.StatusCode, modelName, p.account.ID)
}
}
// 所有重试都失败且仍为容量不足: 切换账号
log.Printf("%s status=%d model_capacity_exhausted_retry_exhausted attempts=%d model=%s account=%d (switch account)",
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID)
return &smartRetryResult{ return &smartRetryResult{
action: smartRetryActionBreakWithResp, action: smartRetryActionBreakWithResp,

View File

@@ -363,9 +363,9 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ShortDelay_RetrySuccess(t *
} }
// TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount // TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount
// 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 20s 重试最多 5 次,失败切换账号 // 503 MODEL_CAPACITY_EXHAUSTED + retryDelay >= 20s → 等待 20s 重试 1 次,失败切换账号
func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) { func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *testing.T) {
// 构造 5 个仍然容量不足的重试响应 // 重试仍然返回容量不足
capacityBody := `{ capacityBody := `{
"error": { "error": {
"code": 503, "code": 503,
@@ -376,19 +376,15 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
] ]
} }
}` }`
var responses []*http.Response upstream := &mockSmartRetryUpstream{
var errs []error responses: []*http.Response{
for i := 0; i < 5; i++ { {
responses = append(responses, &http.Response{ StatusCode: 503,
StatusCode: http.StatusServiceUnavailable,
Header: http.Header{}, Header: http.Header{},
Body: io.NopCloser(strings.NewReader(capacityBody)), Body: io.NopCloser(strings.NewReader(capacityBody)),
}) },
errs = append(errs, nil) },
} errors: []error{nil},
upstream := &mockSmartRetryUpstream{
responses: responses,
errors: errs,
} }
repo := &stubAntigravityAccountRepo{} repo := &stubAntigravityAccountRepo{}
@@ -412,12 +408,12 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
} }
}`) }`)
resp := &http.Response{ resp := &http.Response{
StatusCode: http.StatusServiceUnavailable, StatusCode: 503,
Header: http.Header{}, Header: http.Header{},
Body: io.NopCloser(bytes.NewReader(respBody)), Body: io.NopCloser(bytes.NewReader(respBody)),
} }
// 使用可取消的 context 避免测试真的等待 5×20s // context 超时短于 20s 等待,验证 context 取消时正确返回
ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
defer cancel() defer cancel()
@@ -443,8 +439,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_LongDelay_SwitchAccount(t *
require.NotNil(t, result) require.NotNil(t, result)
require.Equal(t, smartRetryActionBreakWithResp, result.action) require.Equal(t, smartRetryActionBreakWithResp, result.action)
// context 超时会导致提前返回switchError 可能为 nilcontext canceled // context 超时会导致提前返回
// 验证不设置模型限流
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted") require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit for capacity exhausted")
} }