Merge pull request #1095 from LvyuanW/lvyuan/dev

fix(admin/accounts): reset edit modal state on reopen
Merge pull request #1096 from Ethan0x0000/pr/fix-idle-usage-windows
2026-04-07 00:40:22 +08:00 · 2026-03-18 11:37:07 +08:00 · 2026-03-18 11:32:50 +08:00 · 2026-03-18 11:32:35 +08:00 · 2026-03-18 11:31:32 +08:00 · 2026-03-18 11:12:43 +08:00
155 changed files with 8834 additions and 1916 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -4,6 +4,13 @@ backend/migrations/*.sql text eol=lf
 # Go 源代码文件
 *.go text eol=lf

+# 前端 源代码文件
+*.ts text eol=lf
+*.tsx text eol=lf
+*.js text eol=lf
+*.jsx text eol=lf
+*.vue text eol=lf
+
 # Shell 脚本
 *.sh text eol=lf

--- a/.goreleaser.simple.yaml
+++ b/.goreleaser.simple.yaml
@@ -47,6 +47,8 @@ dockers:
      - "ghcr.io/{{ .Env.GITHUB_REPO_OWNER_LOWER }}/sub2api:latest"
    dockerfile: Dockerfile.goreleaser
    use: buildx
+    extra_files:
+      - deploy/docker-entrypoint.sh
    build_flag_templates:
      - "--platform=linux/amd64"
      - "--label=org.opencontainers.image.version={{ .Version }}"
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -63,6 +63,8 @@ dockers:
      - "{{ .Env.DOCKERHUB_USERNAME }}/sub2api:{{ .Version }}-amd64"
    dockerfile: Dockerfile.goreleaser
    use: buildx
+    extra_files:
+      - deploy/docker-entrypoint.sh
    build_flag_templates:
      - "--platform=linux/amd64"
      - "--label=org.opencontainers.image.version={{ .Version }}"
@@ -76,6 +78,8 @@ dockers:
      - "{{ .Env.DOCKERHUB_USERNAME }}/sub2api:{{ .Version }}-arm64"
    dockerfile: Dockerfile.goreleaser
    use: buildx
+    extra_files:
+      - deploy/docker-entrypoint.sh
    build_flag_templates:
      - "--platform=linux/arm64"
      - "--label=org.opencontainers.image.version={{ .Version }}"
@@ -89,6 +93,8 @@ dockers:
      - "ghcr.io/{{ .Env.GITHUB_REPO_OWNER_LOWER }}/sub2api:{{ .Version }}-amd64"
    dockerfile: Dockerfile.goreleaser
    use: buildx
+    extra_files:
+      - deploy/docker-entrypoint.sh
    build_flag_templates:
      - "--platform=linux/amd64"
      - "--label=org.opencontainers.image.version={{ .Version }}"
@@ -102,6 +108,8 @@ dockers:
      - "ghcr.io/{{ .Env.GITHUB_REPO_OWNER_LOWER }}/sub2api:{{ .Version }}-arm64"
    dockerfile: Dockerfile.goreleaser
    use: buildx
+    extra_files:
+      - deploy/docker-entrypoint.sh
    build_flag_templates:
      - "--platform=linux/arm64"
      - "--label=org.opencontainers.image.version={{ .Version }}"
--- a/11
+++ b/11
@@ -92,6 +92,7 @@ LABEL org.opencontainers.image.source="https://github.com/Wei-Shaw/sub2api"
 RUN apk add --no-cache \
    ca-certificates \
    tzdata \
+    su-exec \
    libpq \
    zstd-libs \
    lz4-libs \
@@ -120,8 +121,9 @@ COPY --from=backend-builder --chown=sub2api:sub2api /app/backend/resources /app/
 # Create data directory
 RUN mkdir -p /app/data && chown sub2api:sub2api /app/data

-# Switch to non-root user
-USER sub2api
+# Copy entrypoint script (fixes volume permissions then drops to sub2api)
+COPY deploy/docker-entrypoint.sh /app/docker-entrypoint.sh
+RUN chmod +x /app/docker-entrypoint.sh

 # Expose port (can be overridden by SERVER_PORT env var)
 EXPOSE 8080
@@ -130,5 +132,6 @@ EXPOSE 8080
 HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
    CMD wget -q -T 5 -O /dev/null http://localhost:${SERVER_PORT:-8080}/health || exit 1

-# Run the application
-ENTRYPOINT ["/app/sub2api"]
+# Run the application (entrypoint fixes /app/data ownership then execs as sub2api)
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
+CMD ["/app/sub2api"]
--- a/Dockerfile.goreleaser
+++ b/Dockerfile.goreleaser
@@ -21,6 +21,7 @@ RUN apk add --no-cache \
    ca-certificates \
    tzdata \
    curl \
+    su-exec \
    libpq \
    zstd-libs \
    lz4-libs \
@@ -47,11 +48,15 @@ COPY sub2api /app/sub2api
 # Create data directory
 RUN mkdir -p /app/data && chown -R sub2api:sub2api /app

-USER sub2api
+# Copy entrypoint script (fixes volume permissions then drops to sub2api)
+COPY deploy/docker-entrypoint.sh /app/docker-entrypoint.sh
+RUN chmod +x /app/docker-entrypoint.sh

 EXPOSE 8080

 HEALTHCHECK --interval=30s --timeout=10s --start-period=10s --retries=3 \
    CMD curl -f http://localhost:${SERVER_PORT:-8080}/health || exit 1

-ENTRYPOINT ["/app/sub2api"]
+# Run the application (entrypoint fixes /app/data ownership then execs as sub2api)
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
+CMD ["/app/sub2api"]
--- a/README.md
+++ b/README.md
@@ -8,27 +8,31 @@
 [![Redis](https://img.shields.io/badge/Redis-7+-DC382D.svg)](https://redis.io/)
 [![Docker](https://img.shields.io/badge/Docker-Ready-2496ED.svg)](https://www.docker.com/)

+<a href="https://trendshift.io/repositories/21823" target="_blank"><img src="https://trendshift.io/api/badge/repositories/21823" alt="Wei-Shaw%2Fsub2api | Trendshift" width="250" height="55"/></a>
+
 **AI API Gateway Platform for Subscription Quota Distribution**

 English | [中文](README_CN.md)

 </div>

+> **Sub2API officially uses only the domains `sub2api.org` and `pincc.ai`. Other websites using the Sub2API name may be third-party deployments or services and are not affiliated with this project. Please verify and exercise your own judgment.**
+
 ---

 ## Demo

-Try Sub2API online: **https://demo.sub2api.org/**
+Try Sub2API online: **[https://demo.sub2api.org/](https://demo.sub2api.org/)**

 Demo credentials (shared demo environment; **not** created automatically for self-hosted installs):

 | Email | Password |
 |-------|----------|
-| admin@sub2api.com | admin123 |
+| admin@sub2api.org | admin123 |

 ## Overview

-Sub2API is an AI API gateway platform designed to distribute and manage API quotas from AI product subscriptions (like Claude Code $200/month). Users can access upstream AI services through platform-generated API Keys, while the platform handles authentication, billing, load balancing, and request forwarding.
+Sub2API is an AI API gateway platform designed to distribute and manage API quotas from AI product subscriptions. Users can access upstream AI services through platform-generated API Keys, while the platform handles authentication, billing, load balancing, and request forwarding.

 ## Features

@@ -41,6 +45,15 @@ Sub2API is an AI API gateway platform designed to distribute and manage API quot
 - **Admin Dashboard** - Web interface for monitoring and management
 - **External System Integration** - Embed external systems (e.g. payment, ticketing) via iframe to extend the admin dashboard

+## Don't Want to Self-Host?
+
+<table>
+<tr>
+<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="120"></a></td>
+<td valign="middle"><b><a href="https://shop.pincc.ai/">PinCC</a></b> is the official relay service built on Sub2API, offering stable access to Claude Code, Codex, Gemini and other popular models — ready to use, no deployment or maintenance required.</td>
+</tr>
+</table>
+
 ## Ecosystem

 Community projects that extend or integrate with Sub2API:
@@ -61,10 +74,15 @@ Community projects that extend or integrate with Sub2API:

 ---

-## Documentation
+## Nginx Reverse Proxy Note

- Dependency Security: `docs/dependency-security.md`
- Admin Payment Integration API: `docs/ADMIN_PAYMENT_INTEGRATION_API.md`
+When using Nginx as a reverse proxy for Sub2API (or CRS) with Codex CLI, add the following to the `http` block in your Nginx configuration:
+
+```nginx
+underscores_in_headers on;
+```
+
+Nginx drops headers containing underscores by default (e.g. `session_id`), which breaks sticky session routing in multi-account setups.

 ---

--- a/README_CN.md
+++ b/README_CN.md
@@ -8,27 +8,30 @@
 [![Redis](https://img.shields.io/badge/Redis-7+-DC382D.svg)](https://redis.io/)
 [![Docker](https://img.shields.io/badge/Docker-Ready-2496ED.svg)](https://www.docker.com/)

+<a href="https://trendshift.io/repositories/21823" target="_blank"><img src="https://trendshift.io/api/badge/repositories/21823" alt="Wei-Shaw%2Fsub2api | Trendshift" width="250" height="55"/></a>
+
 **AI API 网关平台 - 订阅配额分发管理**

 [English](README.md) | 中文

 </div>

+> **Sub2API 官方仅使用  `sub2api.org` 与 `pincc.ai` 两个域名。其他使用 Sub2API 名义的网站可能为第三方部署或服务，与本项目无关，请自行甄别。**
 ---

 ## 在线体验

-体验地址：**https://v2.pincc.ai/**
+体验地址：**[https://demo.sub2api.org/](https://demo.sub2api.org/)**

 演示账号（共享演示环境；自建部署不会自动创建该账号）：

 | 邮箱 | 密码 |
 |------|------|
-| admin@sub2api.com | admin123 |
+| admin@sub2api.org | admin123 |

 ## 项目概述

-Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅（如 Claude Code $200/月）的 API 配额。用户通过平台生成的 API Key 调用上游 AI 服务，平台负责鉴权、计费、负载均衡和请求转发。
+Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅的 API 配额。用户通过平台生成的 API Key 调用上游 AI 服务，平台负责鉴权、计费、负载均衡和请求转发。

 ## 核心功能

@@ -41,6 +44,15 @@ Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅（
 - **管理后台** - Web 界面进行监控和管理
 - **外部系统集成** - 支持通过 iframe 嵌入外部系统（如支付、工单等），扩展管理后台功能

+## 不想自建？试试官方中转
+
+<table>
+<tr>
+<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="120"></a></td>
+<td valign="middle"><b><a href="https://shop.pincc.ai/">PinCC</a></b> 是基于 Sub2API 搭建的官方中转服务，提供 Claude Code、Codex、Gemini 等主流模型的稳定中转，开箱即用，免去自建部署与运维烦恼。</td>
+</tr>
+</table>
+
 ## 生态项目

 围绕 Sub2API 的社区扩展与集成项目：
@@ -61,17 +73,18 @@ Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅（

 ---

-## 文档
+## Nginx 反向代理注意事项

- 依赖安全：`docs/dependency-security.md`
+通过 Nginx 反向代理 Sub2API（或 CRS 服务）并搭配 Codex CLI 使用时，需要在 Nginx 配置的 `http` 块中添加：
+
+```nginx
+underscores_in_headers on;
+```
+
+Nginx 默认会丢弃名称中含下划线的请求头（如 `session_id`），这会导致多账号环境下的粘性会话功能失效。

 ---

-## OpenAI Responses 兼容注意事项
-
- 当请求包含 `function_call_output` 时，需要携带 `previous_response_id`，或在 `input` 中包含带 `call_id` 的 `tool_call`/`function_call`，或带非空 `id` 且与 `function_call_output.call_id` 匹配的 `item_reference`。
- 若依赖上游历史记录，网关会强制 `store=true` 并需要复用 `previous_response_id`，以避免出现 “No tool call found for function call output” 错误。
-
 ## 部署方式

 ### 方式一：脚本安装（推荐）
--- a/assets/partners/logos/pincc-logo.png
+++ b/assets/partners/logos/pincc-logo.png
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -110,7 +110,6 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	concurrencyCache := repository.ProvideConcurrencyCache(redisClient, configConfig)
 	concurrencyService := service.ProvideConcurrencyService(concurrencyCache, accountRepository, configConfig)
 	adminUserHandler := admin.NewUserHandler(adminService, concurrencyService)
-	groupHandler := admin.NewGroupHandler(adminService)
 	claudeOAuthClient := repository.NewClaudeOAuthClient()
 	oAuthService := service.NewOAuthService(proxyRepository, claudeOAuthClient)
 	openAIOAuthClient := repository.NewOpenAIOAuthClient()
@@ -124,6 +123,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	tempUnschedCache := repository.NewTempUnschedCache(redisClient)
 	timeoutCounterCache := repository.NewTimeoutCounterCache(redisClient)
 	geminiTokenCache := repository.NewGeminiTokenCache(redisClient)
+	oauthRefreshAPI := service.NewOAuthRefreshAPI(accountRepository, geminiTokenCache)
 	compositeTokenCacheInvalidator := service.NewCompositeTokenCacheInvalidator(geminiTokenCache)
 	rateLimitService := service.ProvideRateLimitService(accountRepository, usageLogRepository, configConfig, geminiQuotaService, tempUnschedCache, timeoutCounterCache, settingService, compositeTokenCacheInvalidator)
 	httpUpstream := repository.NewHTTPUpstream(configConfig)
@@ -132,16 +132,18 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	usageCache := service.NewUsageCache()
 	identityCache := repository.NewIdentityCache(redisClient)
 	accountUsageService := service.NewAccountUsageService(accountRepository, usageLogRepository, claudeUsageFetcher, geminiQuotaService, antigravityQuotaFetcher, usageCache, identityCache)
-	geminiTokenProvider := service.NewGeminiTokenProvider(accountRepository, geminiTokenCache, geminiOAuthService)
+	geminiTokenProvider := service.ProvideGeminiTokenProvider(accountRepository, geminiTokenCache, geminiOAuthService, oauthRefreshAPI)
 	gatewayCache := repository.NewGatewayCache(redisClient)
 	schedulerOutboxRepository := repository.NewSchedulerOutboxRepository(db)
 	schedulerSnapshotService := service.ProvideSchedulerSnapshotService(schedulerCache, schedulerOutboxRepository, accountRepository, groupRepository, configConfig)
-	antigravityTokenProvider := service.NewAntigravityTokenProvider(accountRepository, geminiTokenCache, antigravityOAuthService)
+	antigravityTokenProvider := service.ProvideAntigravityTokenProvider(accountRepository, geminiTokenCache, antigravityOAuthService, oauthRefreshAPI)
 	antigravityGatewayService := service.NewAntigravityGatewayService(accountRepository, gatewayCache, schedulerSnapshotService, antigravityTokenProvider, rateLimitService, httpUpstream, settingService)
 	accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, antigravityGatewayService, httpUpstream, configConfig)
 	crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
 	sessionLimitCache := repository.ProvideSessionLimitCache(redisClient, configConfig)
 	rpmCache := repository.NewRPMCache(redisClient)
+	groupCapacityService := service.NewGroupCapacityService(accountRepository, groupRepository, concurrencyService, sessionLimitCache, rpmCache)
+	groupHandler := admin.NewGroupHandler(adminService, dashboardService, groupCapacityService)
 	accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService, sessionLimitCache, rpmCache, compositeTokenCacheInvalidator)
 	adminAnnouncementHandler := admin.NewAnnouncementHandler(announcementService)
 	dataManagementService := service.NewDataManagementService()
@@ -166,10 +168,10 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	billingService := service.NewBillingService(configConfig, pricingService)
 	identityService := service.NewIdentityService(identityCache)
 	deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
-	claudeTokenProvider := service.NewClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService)
+	claudeTokenProvider := service.ProvideClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService, oauthRefreshAPI)
 	digestSessionStore := service.NewDigestSessionStore()
 	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore, settingService)
-	openAITokenProvider := service.NewOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService)
+	openAITokenProvider := service.ProvideOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService, oauthRefreshAPI)
 	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, usageBillingRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider)
 	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
 	opsSystemLogSink := service.ProvideOpsSystemLogSink(opsRepository)
@@ -232,7 +234,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	opsCleanupService := service.ProvideOpsCleanupService(opsRepository, db, redisClient, configConfig)
 	opsScheduledReportService := service.ProvideOpsScheduledReportService(opsService, userService, emailService, redisClient, configConfig)
 	soraMediaCleanupService := service.ProvideSoraMediaCleanupService(soraMediaStorage, configConfig)
-	tokenRefreshService := service.ProvideTokenRefreshService(accountRepository, soraAccountRepository, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, compositeTokenCacheInvalidator, schedulerCache, configConfig, tempUnschedCache, privacyClientFactory, proxyRepository)
+	tokenRefreshService := service.ProvideTokenRefreshService(accountRepository, soraAccountRepository, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, compositeTokenCacheInvalidator, schedulerCache, configConfig, tempUnschedCache, privacyClientFactory, proxyRepository, oauthRefreshAPI)
 	accountExpiryService := service.ProvideAccountExpiryService(accountRepository)
 	subscriptionExpiryService := service.ProvideSubscriptionExpiryService(userSubscriptionRepository)
 	scheduledTestRunnerService := service.ProvideScheduledTestRunnerService(scheduledTestPlanRepository, scheduledTestService, accountTestService, rateLimitService, configConfig)
--- a/backend/internal/handler/admin/admin_basic_handlers_test.go
+++ b/backend/internal/handler/admin/admin_basic_handlers_test.go
@@ -17,7 +17,7 @@ func setupAdminRouter() (*gin.Engine, *stubAdminService) {
 	adminSvc := newStubAdminService()

 	userHandler := NewUserHandler(adminSvc, nil)
-	groupHandler := NewGroupHandler(adminSvc)
+	groupHandler := NewGroupHandler(adminSvc, nil, nil)
 	proxyHandler := NewProxyHandler(adminSvc)
 	redeemHandler := NewRedeemHandler(adminSvc, nil)

--- a/backend/internal/handler/admin/backup_handler.go
+++ b/backend/internal/handler/admin/backup_handler.go
@@ -98,12 +98,12 @@ func (h *BackupHandler) CreateBackup(c *gin.Context) {
 		expireDays = *req.ExpireDays
 	}

-	record, err := h.backupService.CreateBackup(c.Request.Context(), "manual", expireDays)
+	record, err := h.backupService.StartBackup(c.Request.Context(), "manual", expireDays)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
-	response.Success(c, record)
+	response.Accepted(c, record)
 }

 func (h *BackupHandler) ListBackups(c *gin.Context) {
@@ -196,9 +196,10 @@ func (h *BackupHandler) RestoreBackup(c *gin.Context) {
 		return
 	}

-	if err := h.backupService.RestoreBackup(c.Request.Context(), backupID); err != nil {
+	record, err := h.backupService.StartRestore(c.Request.Context(), backupID)
+	if err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
-	response.Success(c, gin.H{"restored": true})
+	response.Accepted(c, record)
 }
--- a/backend/internal/handler/admin/dashboard_handler.go
+++ b/backend/internal/handler/admin/dashboard_handler.go
@@ -9,6 +9,7 @@ import (

 	"github.com/Wei-Shaw/sub2api/internal/pkg/response"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/timezone"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/usagestats"
 	"github.com/Wei-Shaw/sub2api/internal/service"

 	"github.com/gin-gonic/gin"
@@ -512,6 +513,8 @@ func (h *DashboardHandler) GetUserSpendingRanking(c *gin.Context) {
 	payload := gin.H{
 		"ranking":           ranking.Ranking,
 		"total_actual_cost": ranking.TotalActualCost,
+		"total_requests":    ranking.TotalRequests,
+		"total_tokens":      ranking.TotalTokens,
 		"start_date":        startTime.Format("2006-01-02"),
 		"end_date":          endTime.Add(-24 * time.Hour).Format("2006-01-02"),
 	}
@@ -602,3 +605,41 @@ func (h *DashboardHandler) GetBatchAPIKeysUsage(c *gin.Context) {
 	c.Header("X-Snapshot-Cache", "miss")
 	response.Success(c, payload)
 }
+
+// GetUserBreakdown handles getting per-user usage breakdown within a dimension.
+// GET /api/v1/admin/dashboard/user-breakdown
+// Query params: start_date, end_date, group_id, model, endpoint, endpoint_type, limit
+func (h *DashboardHandler) GetUserBreakdown(c *gin.Context) {
+	startTime, endTime := parseTimeRange(c)
+
+	dim := usagestats.UserBreakdownDimension{}
+	if v := c.Query("group_id"); v != "" {
+		if id, err := strconv.ParseInt(v, 10, 64); err == nil {
+			dim.GroupID = id
+		}
+	}
+	dim.Model = c.Query("model")
+	dim.Endpoint = c.Query("endpoint")
+	dim.EndpointType = c.DefaultQuery("endpoint_type", "inbound")
+
+	limit := 50
+	if v := c.Query("limit"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n > 0 && n <= 200 {
+			limit = n
+		}
+	}
+
+	stats, err := h.dashboardService.GetUserBreakdownStats(
+		c.Request.Context(), startTime, endTime, dim, limit,
+	)
+	if err != nil {
+		response.Error(c, 500, "Failed to get user breakdown stats")
+		return
+	}
+
+	response.Success(c, gin.H{
+		"users":      stats,
+		"start_date": startTime.Format("2006-01-02"),
+		"end_date":   endTime.Add(-24 * time.Hour).Format("2006-01-02"),
+	})
+}
--- a/backend/internal/handler/admin/dashboard_handler_request_type_test.go
+++ b/backend/internal/handler/admin/dashboard_handler_request_type_test.go
@@ -61,6 +61,8 @@ func (s *dashboardUsageRepoCapture) GetUserSpendingRanking(
 	return &usagestats.UserSpendingRankingResponse{
 		Ranking:         s.ranking,
 		TotalActualCost: s.rankingTotal,
+		TotalRequests:   44,
+		TotalTokens:     1234,
 	}, nil
 }

@@ -164,6 +166,8 @@ func TestDashboardUsersRankingLimitAndCache(t *testing.T) {
 	require.Equal(t, http.StatusOK, rec.Code)
 	require.Equal(t, 50, repo.rankingLimit)
 	require.Contains(t, rec.Body.String(), "\"total_actual_cost\":88.8")
+	require.Contains(t, rec.Body.String(), "\"total_requests\":44")
+	require.Contains(t, rec.Body.String(), "\"total_tokens\":1234")
 	require.Equal(t, "miss", rec.Header().Get("X-Snapshot-Cache"))

 	req2 := httptest.NewRequest(http.MethodGet, "/admin/dashboard/users-ranking?limit=100&start_date=2025-01-01&end_date=2025-01-02", nil)
--- a/backend/internal/handler/admin/dashboard_handler_user_breakdown_test.go
+++ b/backend/internal/handler/admin/dashboard_handler_user_breakdown_test.go
@@ -0,0 +1,203 @@
+package admin
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/usagestats"
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+// --- mock repo ---
+
+type userBreakdownRepoCapture struct {
+	service.UsageLogRepository
+	capturedDim   usagestats.UserBreakdownDimension
+	capturedLimit int
+	result        []usagestats.UserBreakdownItem
+}
+
+func (r *userBreakdownRepoCapture) GetUserBreakdownStats(
+	_ context.Context, _, _ time.Time,
+	dim usagestats.UserBreakdownDimension, limit int,
+) ([]usagestats.UserBreakdownItem, error) {
+	r.capturedDim = dim
+	r.capturedLimit = limit
+	if r.result != nil {
+		return r.result, nil
+	}
+	return []usagestats.UserBreakdownItem{}, nil
+}
+
+func newUserBreakdownRouter(repo *userBreakdownRepoCapture) *gin.Engine {
+	gin.SetMode(gin.TestMode)
+	svc := service.NewDashboardService(repo, nil, nil, nil)
+	h := NewDashboardHandler(svc, nil)
+	router := gin.New()
+	router.GET("/admin/dashboard/user-breakdown", h.GetUserBreakdown)
+	return router
+}
+
+// --- tests ---
+
+func TestGetUserBreakdown_GroupIDFilter(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&group_id=42", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, int64(42), repo.capturedDim.GroupID)
+	require.Empty(t, repo.capturedDim.Model)
+	require.Empty(t, repo.capturedDim.Endpoint)
+	require.Equal(t, 50, repo.capturedLimit) // default limit
+}
+
+func TestGetUserBreakdown_ModelFilter(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&model=claude-opus-4-6", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, "claude-opus-4-6", repo.capturedDim.Model)
+	require.Equal(t, int64(0), repo.capturedDim.GroupID)
+}
+
+func TestGetUserBreakdown_EndpointFilter(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&endpoint=/v1/messages&endpoint_type=upstream", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, "/v1/messages", repo.capturedDim.Endpoint)
+	require.Equal(t, "upstream", repo.capturedDim.EndpointType)
+}
+
+func TestGetUserBreakdown_DefaultEndpointType(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&endpoint=/chat", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, "inbound", repo.capturedDim.EndpointType)
+}
+
+func TestGetUserBreakdown_CustomLimit(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&model=test&limit=100", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, 100, repo.capturedLimit)
+}
+
+func TestGetUserBreakdown_LimitClamped(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	// limit > 200 should fall back to default 50
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&model=test&limit=999", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, 50, repo.capturedLimit)
+}
+
+func TestGetUserBreakdown_ResponseFormat(t *testing.T) {
+	repo := &userBreakdownRepoCapture{
+		result: []usagestats.UserBreakdownItem{
+			{UserID: 1, Email: "alice@test.com", Requests: 100, TotalTokens: 50000, Cost: 1.5, ActualCost: 1.2},
+			{UserID: 2, Email: "bob@test.com", Requests: 50, TotalTokens: 25000, Cost: 0.8, ActualCost: 0.6},
+		},
+	}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&group_id=1", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+
+	var resp struct {
+		Code int `json:"code"`
+		Data struct {
+			Users     []usagestats.UserBreakdownItem `json:"users"`
+			StartDate string                         `json:"start_date"`
+			EndDate   string                         `json:"end_date"`
+		} `json:"data"`
+	}
+	err := json.Unmarshal(w.Body.Bytes(), &resp)
+	require.NoError(t, err)
+	require.Equal(t, 0, resp.Code)
+	require.Len(t, resp.Data.Users, 2)
+	require.Equal(t, int64(1), resp.Data.Users[0].UserID)
+	require.Equal(t, "alice@test.com", resp.Data.Users[0].Email)
+	require.Equal(t, int64(100), resp.Data.Users[0].Requests)
+	require.InDelta(t, 1.2, resp.Data.Users[0].ActualCost, 0.001)
+	require.Equal(t, "2026-03-01", resp.Data.StartDate)
+	require.Equal(t, "2026-03-16", resp.Data.EndDate)
+}
+
+func TestGetUserBreakdown_EmptyResult(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16&group_id=999", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+
+	var resp struct {
+		Data struct {
+			Users []usagestats.UserBreakdownItem `json:"users"`
+		} `json:"data"`
+	}
+	err := json.Unmarshal(w.Body.Bytes(), &resp)
+	require.NoError(t, err)
+	require.Empty(t, resp.Data.Users)
+}
+
+func TestGetUserBreakdown_NoFilters(t *testing.T) {
+	repo := &userBreakdownRepoCapture{}
+	router := newUserBreakdownRouter(repo)
+
+	req := httptest.NewRequest(http.MethodGet,
+		"/admin/dashboard/user-breakdown?start_date=2026-03-01&end_date=2026-03-16", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	require.Equal(t, http.StatusOK, w.Code)
+	require.Equal(t, int64(0), repo.capturedDim.GroupID)
+	require.Empty(t, repo.capturedDim.Model)
+	require.Empty(t, repo.capturedDim.Endpoint)
+}
--- a/backend/internal/handler/admin/group_handler.go
+++ b/backend/internal/handler/admin/group_handler.go
@@ -9,6 +9,7 @@ import (

 	"github.com/Wei-Shaw/sub2api/internal/handler/dto"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/response"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/timezone"
 	"github.com/Wei-Shaw/sub2api/internal/service"

 	"github.com/gin-gonic/gin"
@@ -16,7 +17,9 @@ import (

 // GroupHandler handles admin group management
 type GroupHandler struct {
-	adminService service.AdminService
+	adminService         service.AdminService
+	dashboardService     *service.DashboardService
+	groupCapacityService *service.GroupCapacityService
 }

 type optionalLimitField struct {
@@ -69,9 +72,11 @@ func (f optionalLimitField) ToServiceInput() *float64 {
 }

 // NewGroupHandler creates a new admin group handler
-func NewGroupHandler(adminService service.AdminService) *GroupHandler {
+func NewGroupHandler(adminService service.AdminService, dashboardService *service.DashboardService, groupCapacityService *service.GroupCapacityService) *GroupHandler {
 	return &GroupHandler{
-		adminService: adminService,
+		adminService:         adminService,
+		dashboardService:     dashboardService,
+		groupCapacityService: groupCapacityService,
 	}
 }

@@ -363,6 +368,33 @@ func (h *GroupHandler) GetStats(c *gin.Context) {
 	_ = groupID // TODO: implement actual stats
 }

+// GetUsageSummary returns today's and cumulative cost for all groups.
+// GET /api/v1/admin/groups/usage-summary?timezone=Asia/Shanghai
+func (h *GroupHandler) GetUsageSummary(c *gin.Context) {
+	userTZ := c.Query("timezone")
+	now := timezone.NowInUserLocation(userTZ)
+	todayStart := timezone.StartOfDayInUserLocation(now, userTZ)
+
+	results, err := h.dashboardService.GetGroupUsageSummary(c.Request.Context(), todayStart)
+	if err != nil {
+		response.Error(c, 500, "Failed to get group usage summary")
+		return
+	}
+
+	response.Success(c, results)
+}
+
+// GetCapacitySummary returns aggregated capacity (concurrency/sessions/RPM) for all active groups.
+// GET /api/v1/admin/groups/capacity-summary
+func (h *GroupHandler) GetCapacitySummary(c *gin.Context) {
+	results, err := h.groupCapacityService.GetAllGroupCapacity(c.Request.Context())
+	if err != nil {
+		response.Error(c, 500, "Failed to get group capacity summary")
+		return
+	}
+	response.Success(c, results)
+}
+
 // GetGroupAPIKeys handles getting API keys in a group
 // GET /api/v1/admin/groups/:id/api-keys
 func (h *GroupHandler) GetGroupAPIKeys(c *gin.Context) {
--- a/backend/internal/handler/admin/subscription_handler.go
+++ b/backend/internal/handler/admin/subscription_handler.go
@@ -77,12 +77,13 @@ func (h *SubscriptionHandler) List(c *gin.Context) {
 		}
 	}
 	status := c.Query("status")
+	platform := c.Query("platform")

 	// Parse sorting parameters
 	sortBy := c.DefaultQuery("sort_by", "created_at")
 	sortOrder := c.DefaultQuery("sort_order", "desc")

-	subscriptions, pagination, err := h.subscriptionService.List(c.Request.Context(), page, pageSize, userID, groupID, status, sortBy, sortOrder)
+	subscriptions, pagination, err := h.subscriptionService.List(c.Request.Context(), page, pageSize, userID, groupID, status, platform, sortBy, sortOrder)
 	if err != nil {
 		response.ErrorFrom(c, err)
 		return
--- a/backend/internal/handler/admin/usage_handler.go
+++ b/backend/internal/handler/admin/usage_handler.go
@@ -159,8 +159,8 @@ func (h *UsageHandler) List(c *gin.Context) {
 			response.BadRequest(c, "Invalid end_date format, use YYYY-MM-DD")
 			return
 		}
-		// Set end time to end of day
-		t = t.Add(24*time.Hour - time.Nanosecond)
+		// Use half-open range [start, end), move to next calendar day start (DST-safe).
+		t = t.AddDate(0, 0, 1)
 		endTime = &t
 	}

@@ -285,7 +285,8 @@ func (h *UsageHandler) Stats(c *gin.Context) {
 			response.BadRequest(c, "Invalid end_date format, use YYYY-MM-DD")
 			return
 		}
-		endTime = endTime.Add(24*time.Hour - time.Nanosecond)
+		// 与 SQL 条件 created_at < end 对齐，使用次日 00:00 作为上边界（DST-safe）。
+		endTime = endTime.AddDate(0, 0, 1)
 	} else {
 		period := c.DefaultQuery("period", "today")
 		switch period {
--- a/backend/internal/handler/dto/mappers.go
+++ b/backend/internal/handler/dto/mappers.go
@@ -135,14 +135,16 @@ func GroupFromServiceAdmin(g *service.Group) *AdminGroup {
 		return nil
 	}
 	out := &AdminGroup{
-		Group:                groupFromServiceBase(g),
-		ModelRouting:         g.ModelRouting,
-		ModelRoutingEnabled:  g.ModelRoutingEnabled,
-		MCPXMLInject:         g.MCPXMLInject,
-		DefaultMappedModel:   g.DefaultMappedModel,
-		SupportedModelScopes: g.SupportedModelScopes,
-		AccountCount:         g.AccountCount,
-		SortOrder:            g.SortOrder,
+		Group:                   groupFromServiceBase(g),
+		ModelRouting:            g.ModelRouting,
+		ModelRoutingEnabled:     g.ModelRoutingEnabled,
+		MCPXMLInject:            g.MCPXMLInject,
+		DefaultMappedModel:      g.DefaultMappedModel,
+		SupportedModelScopes:    g.SupportedModelScopes,
+		AccountCount:            g.AccountCount,
+		ActiveAccountCount:      g.ActiveAccountCount,
+		RateLimitedAccountCount: g.RateLimitedAccountCount,
+		SortOrder:               g.SortOrder,
 	}
 	if len(g.AccountGroups) > 0 {
 		out.AccountGroups = make([]AccountGroup, 0, len(g.AccountGroups))
--- a/backend/internal/handler/dto/types.go
+++ b/backend/internal/handler/dto/types.go
@@ -122,9 +122,11 @@ type AdminGroup struct {
 	DefaultMappedModel string `json:"default_mapped_model"`

 	// 支持的模型系列（仅 antigravity 平台使用）
-	SupportedModelScopes []string       `json:"supported_model_scopes"`
-	AccountGroups        []AccountGroup `json:"account_groups,omitempty"`
-	AccountCount         int64          `json:"account_count,omitempty"`
+	SupportedModelScopes    []string       `json:"supported_model_scopes"`
+	AccountGroups           []AccountGroup `json:"account_groups,omitempty"`
+	AccountCount            int64          `json:"account_count,omitempty"`
+	ActiveAccountCount      int64          `json:"active_account_count,omitempty"`
+	RateLimitedAccountCount int64          `json:"rate_limited_account_count,omitempty"`

 	// 分组排序
 	SortOrder int `json:"sort_order"`
--- a/backend/internal/handler/endpoint.go
+++ b/backend/internal/handler/endpoint.go
@@ -0,0 +1,174 @@
+package handler
+
+import (
+	"strings"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/gin-gonic/gin"
+)
+
+// ──────────────────────────────────────────────────────────
+// Canonical inbound / upstream endpoint paths.
+// All normalization and derivation reference this single set
+// of constants — add new paths HERE when a new API surface
+// is introduced.
+// ──────────────────────────────────────────────────────────
+
+const (
+	EndpointMessages        = "/v1/messages"
+	EndpointChatCompletions = "/v1/chat/completions"
+	EndpointResponses       = "/v1/responses"
+	EndpointGeminiModels    = "/v1beta/models"
+)
+
+// gin.Context keys used by the middleware and helpers below.
+const (
+	ctxKeyInboundEndpoint = "_gateway_inbound_endpoint"
+)
+
+// ──────────────────────────────────────────────────────────
+// Normalization functions
+// ──────────────────────────────────────────────────────────
+
+// NormalizeInboundEndpoint maps a raw request path (which may carry
+// prefixes like /antigravity, /openai, /sora) to its canonical form.
+//
+//	"/antigravity/v1/messages"   → "/v1/messages"
+//	"/v1/chat/completions"       → "/v1/chat/completions"
+//	"/openai/v1/responses/foo"   → "/v1/responses"
+//	"/v1beta/models/gemini:gen"  → "/v1beta/models"
+func NormalizeInboundEndpoint(path string) string {
+	path = strings.TrimSpace(path)
+	switch {
+	case strings.Contains(path, EndpointChatCompletions):
+		return EndpointChatCompletions
+	case strings.Contains(path, EndpointMessages):
+		return EndpointMessages
+	case strings.Contains(path, EndpointResponses):
+		return EndpointResponses
+	case strings.Contains(path, EndpointGeminiModels):
+		return EndpointGeminiModels
+	default:
+		return path
+	}
+}
+
+// DeriveUpstreamEndpoint determines the upstream endpoint from the
+// account platform and the normalized inbound endpoint.
+//
+// Platform-specific rules:
+//   - OpenAI always forwards to /v1/responses (with optional subpath
+//     such as /v1/responses/compact preserved from the raw URL).
+//   - Anthropic  → /v1/messages
+//   - Gemini     → /v1beta/models
+//   - Sora       → /v1/chat/completions
+//   - Antigravity routes may target either Claude or Gemini, so the
+//     inbound endpoint is used to distinguish.
+func DeriveUpstreamEndpoint(inbound, rawRequestPath, platform string) string {
+	inbound = strings.TrimSpace(inbound)
+
+	switch platform {
+	case service.PlatformOpenAI:
+		// OpenAI forwards everything to the Responses API.
+		// Preserve subresource suffix (e.g. /v1/responses/compact).
+		if suffix := responsesSubpathSuffix(rawRequestPath); suffix != "" {
+			return EndpointResponses + suffix
+		}
+		return EndpointResponses
+
+	case service.PlatformAnthropic:
+		return EndpointMessages
+
+	case service.PlatformGemini:
+		return EndpointGeminiModels
+
+	case service.PlatformSora:
+		return EndpointChatCompletions
+
+	case service.PlatformAntigravity:
+		// Antigravity accounts serve both Claude and Gemini.
+		if inbound == EndpointGeminiModels {
+			return EndpointGeminiModels
+		}
+		return EndpointMessages
+	}
+
+	// Unknown platform — fall back to inbound.
+	return inbound
+}
+
+// responsesSubpathSuffix extracts the part after "/responses" in a raw
+// request path, e.g. "/openai/v1/responses/compact" → "/compact".
+// Returns "" when there is no meaningful suffix.
+func responsesSubpathSuffix(rawPath string) string {
+	trimmed := strings.TrimRight(strings.TrimSpace(rawPath), "/")
+	idx := strings.LastIndex(trimmed, "/responses")
+	if idx < 0 {
+		return ""
+	}
+	suffix := trimmed[idx+len("/responses"):]
+	if suffix == "" || suffix == "/" {
+		return ""
+	}
+	if !strings.HasPrefix(suffix, "/") {
+		return ""
+	}
+	return suffix
+}
+
+// ──────────────────────────────────────────────────────────
+// Middleware
+// ──────────────────────────────────────────────────────────
+
+// InboundEndpointMiddleware normalizes the request path and stores the
+// canonical inbound endpoint in gin.Context so that every handler in
+// the chain can read it via GetInboundEndpoint.
+//
+// Apply this middleware to all gateway route groups.
+func InboundEndpointMiddleware() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		path := c.FullPath()
+		if path == "" && c.Request != nil && c.Request.URL != nil {
+			path = c.Request.URL.Path
+		}
+		c.Set(ctxKeyInboundEndpoint, NormalizeInboundEndpoint(path))
+		c.Next()
+	}
+}
+
+// ──────────────────────────────────────────────────────────
+// Context helpers — used by handlers before building
+// RecordUsageInput / RecordUsageLongContextInput.
+// ──────────────────────────────────────────────────────────
+
+// GetInboundEndpoint returns the canonical inbound endpoint stored by
+// InboundEndpointMiddleware. If the middleware did not run (e.g. in
+// tests), it falls back to normalizing c.FullPath() on the fly.
+func GetInboundEndpoint(c *gin.Context) string {
+	if v, ok := c.Get(ctxKeyInboundEndpoint); ok {
+		if s, ok := v.(string); ok && s != "" {
+			return s
+		}
+	}
+	// Fallback: normalize on the fly.
+	path := ""
+	if c != nil {
+		path = c.FullPath()
+		if path == "" && c.Request != nil && c.Request.URL != nil {
+			path = c.Request.URL.Path
+		}
+	}
+	return NormalizeInboundEndpoint(path)
+}
+
+// GetUpstreamEndpoint derives the upstream endpoint from the context
+// and the account platform. Handlers call this after scheduling an
+// account, passing account.Platform.
+func GetUpstreamEndpoint(c *gin.Context, platform string) string {
+	inbound := GetInboundEndpoint(c)
+	rawPath := ""
+	if c != nil && c.Request != nil && c.Request.URL != nil {
+		rawPath = c.Request.URL.Path
+	}
+	return DeriveUpstreamEndpoint(inbound, rawPath, platform)
+}
--- a/backend/internal/handler/endpoint_test.go
+++ b/backend/internal/handler/endpoint_test.go
@@ -0,0 +1,159 @@
+package handler
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+func init() { gin.SetMode(gin.TestMode) }
+
+// ──────────────────────────────────────────────────────────
+// NormalizeInboundEndpoint
+// ──────────────────────────────────────────────────────────
+
+func TestNormalizeInboundEndpoint(t *testing.T) {
+	tests := []struct {
+		path string
+		want string
+	}{
+		// Direct canonical paths.
+		{"/v1/messages", EndpointMessages},
+		{"/v1/chat/completions", EndpointChatCompletions},
+		{"/v1/responses", EndpointResponses},
+		{"/v1beta/models", EndpointGeminiModels},
+
+		// Prefixed paths (antigravity, openai, sora).
+		{"/antigravity/v1/messages", EndpointMessages},
+		{"/openai/v1/responses", EndpointResponses},
+		{"/openai/v1/responses/compact", EndpointResponses},
+		{"/sora/v1/chat/completions", EndpointChatCompletions},
+		{"/antigravity/v1beta/models/gemini:generateContent", EndpointGeminiModels},
+
+		// Gin route patterns with wildcards.
+		{"/v1beta/models/*modelAction", EndpointGeminiModels},
+		{"/v1/responses/*subpath", EndpointResponses},
+
+		// Unknown path is returned as-is.
+		{"/v1/embeddings", "/v1/embeddings"},
+		{"", ""},
+		{"  /v1/messages  ", EndpointMessages},
+	}
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			require.Equal(t, tt.want, NormalizeInboundEndpoint(tt.path))
+		})
+	}
+}
+
+// ──────────────────────────────────────────────────────────
+// DeriveUpstreamEndpoint
+// ──────────────────────────────────────────────────────────
+
+func TestDeriveUpstreamEndpoint(t *testing.T) {
+	tests := []struct {
+		name     string
+		inbound  string
+		rawPath  string
+		platform string
+		want     string
+	}{
+		// Anthropic.
+		{"anthropic messages", EndpointMessages, "/v1/messages", service.PlatformAnthropic, EndpointMessages},
+
+		// Gemini.
+		{"gemini models", EndpointGeminiModels, "/v1beta/models/gemini:gen", service.PlatformGemini, EndpointGeminiModels},
+
+		// Sora.
+		{"sora completions", EndpointChatCompletions, "/sora/v1/chat/completions", service.PlatformSora, EndpointChatCompletions},
+
+		// OpenAI — always /v1/responses.
+		{"openai responses root", EndpointResponses, "/v1/responses", service.PlatformOpenAI, EndpointResponses},
+		{"openai responses compact", EndpointResponses, "/openai/v1/responses/compact", service.PlatformOpenAI, "/v1/responses/compact"},
+		{"openai responses nested", EndpointResponses, "/openai/v1/responses/compact/detail", service.PlatformOpenAI, "/v1/responses/compact/detail"},
+		{"openai from messages", EndpointMessages, "/v1/messages", service.PlatformOpenAI, EndpointResponses},
+		{"openai from completions", EndpointChatCompletions, "/v1/chat/completions", service.PlatformOpenAI, EndpointResponses},
+
+		// Antigravity — uses inbound to pick Claude vs Gemini upstream.
+		{"antigravity claude", EndpointMessages, "/antigravity/v1/messages", service.PlatformAntigravity, EndpointMessages},
+		{"antigravity gemini", EndpointGeminiModels, "/antigravity/v1beta/models", service.PlatformAntigravity, EndpointGeminiModels},
+
+		// Unknown platform — passthrough.
+		{"unknown platform", "/v1/embeddings", "/v1/embeddings", "unknown", "/v1/embeddings"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			require.Equal(t, tt.want, DeriveUpstreamEndpoint(tt.inbound, tt.rawPath, tt.platform))
+		})
+	}
+}
+
+// ──────────────────────────────────────────────────────────
+// responsesSubpathSuffix
+// ──────────────────────────────────────────────────────────
+
+func TestResponsesSubpathSuffix(t *testing.T) {
+	tests := []struct {
+		raw  string
+		want string
+	}{
+		{"/v1/responses", ""},
+		{"/v1/responses/", ""},
+		{"/v1/responses/compact", "/compact"},
+		{"/openai/v1/responses/compact/detail", "/compact/detail"},
+		{"/v1/messages", ""},
+		{"", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.raw, func(t *testing.T) {
+			require.Equal(t, tt.want, responsesSubpathSuffix(tt.raw))
+		})
+	}
+}
+
+// ──────────────────────────────────────────────────────────
+// InboundEndpointMiddleware + context helpers
+// ──────────────────────────────────────────────────────────
+
+func TestInboundEndpointMiddleware(t *testing.T) {
+	router := gin.New()
+	router.Use(InboundEndpointMiddleware())
+
+	var captured string
+	router.POST("/v1/messages", func(c *gin.Context) {
+		captured = GetInboundEndpoint(c)
+		c.Status(http.StatusOK)
+	})
+
+	req := httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+	rec := httptest.NewRecorder()
+	router.ServeHTTP(rec, req)
+
+	require.Equal(t, EndpointMessages, captured)
+}
+
+func TestGetInboundEndpoint_FallbackWithoutMiddleware(t *testing.T) {
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/antigravity/v1/messages", nil)
+
+	// Middleware did not run — fallback to normalizing c.Request.URL.Path.
+	got := GetInboundEndpoint(c)
+	require.Equal(t, EndpointMessages, got)
+}
+
+func TestGetUpstreamEndpoint_FullFlow(t *testing.T) {
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+	c.Request = httptest.NewRequest(http.MethodPost, "/openai/v1/responses/compact", nil)
+
+	// Simulate middleware.
+	c.Set(ctxKeyInboundEndpoint, NormalizeInboundEndpoint(c.Request.URL.Path))
+
+	got := GetUpstreamEndpoint(c, service.PlatformOpenAI)
+	require.Equal(t, "/v1/responses/compact", got)
+}
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -442,6 +442,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			userAgent := c.GetHeader("User-Agent")
 			clientIP := ip.GetClientIP(c)
 			requestPayloadHash := service.HashUsageRequestPayload(body)
+			inboundEndpoint := GetInboundEndpoint(c)
+			upstreamEndpoint := GetUpstreamEndpoint(c, account.Platform)

 			if result.ReasoningEffort == nil {
 				result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort)
@@ -455,6 +457,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					User:               apiKey.User,
 					Account:            account,
 					Subscription:       subscription,
+					InboundEndpoint:    inboundEndpoint,
+					UpstreamEndpoint:   upstreamEndpoint,
 					UserAgent:          userAgent,
 					IPAddress:          clientIP,
 					RequestPayloadHash: requestPayloadHash,
@@ -757,6 +761,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			userAgent := c.GetHeader("User-Agent")
 			clientIP := ip.GetClientIP(c)
 			requestPayloadHash := service.HashUsageRequestPayload(body)
+			inboundEndpoint := GetInboundEndpoint(c)
+			upstreamEndpoint := GetUpstreamEndpoint(c, account.Platform)

 			if result.ReasoningEffort == nil {
 				result.ReasoningEffort = service.NormalizeClaudeOutputEffort(parsedReq.OutputEffort)
@@ -770,6 +776,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					User:               currentAPIKey.User,
 					Account:            account,
 					Subscription:       currentSubscription,
+					InboundEndpoint:    inboundEndpoint,
+					UpstreamEndpoint:   upstreamEndpoint,
 					UserAgent:          userAgent,
 					IPAddress:          clientIP,
 					RequestPayloadHash: requestPayloadHash,
@@ -935,7 +943,7 @@ func (h *GatewayHandler) parseUsageDateRange(c *gin.Context) (time.Time, time.Ti
 	}
 	if s := c.Query("end_date"); s != "" {
 		if t, err := timezone.ParseInLocation("2006-01-02", s); err == nil {
-			endTime = t.Add(24*time.Hour - time.Second) // end of day
+			endTime = t.AddDate(0, 0, 1) // half-open range upper bound
 		}
 	}
 	return startTime, endTime
--- a/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go
+++ b/backend/internal/handler/gateway_handler_warmup_intercept_unit_test.go
@@ -76,7 +76,7 @@ func (f *fakeGroupRepo) ListActiveByPlatform(context.Context, string) ([]service
 	return nil, nil
 }
 func (f *fakeGroupRepo) ExistsByName(context.Context, string) (bool, error)    { return false, nil }
-func (f *fakeGroupRepo) GetAccountCount(context.Context, int64) (int64, error) { return 0, nil }
+func (f *fakeGroupRepo) GetAccountCount(context.Context, int64) (int64, int64, error) { return 0, 0, nil }
 func (f *fakeGroupRepo) DeleteAccountGroupsByGroupID(context.Context, int64) (int64, error) {
 	return 0, nil
 }
--- a/backend/internal/handler/gateway_helper_hotpath_test.go
+++ b/backend/internal/handler/gateway_helper_hotpath_test.go
@@ -136,7 +136,7 @@ func validClaudeCodeBodyJSON() []byte {
 	return []byte(`{
 		"model":"claude-3-5-sonnet-20241022",
 		"system":[{"text":"You are Claude Code, Anthropic's official CLI for Claude."}],
-		"metadata":{"user_id":"user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_abc-123"}
+		"metadata":{"user_id":"user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"}
 	}`)
 }

@@ -190,7 +190,7 @@ func TestSetClaudeCodeClientContext_ReuseParsedRequestAndContextCache(t *testing
 			System: []any{
 				map[string]any{"text": "You are Claude Code, Anthropic's official CLI for Claude."},
 			},
-			MetadataUserID: "user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_abc-123",
+			MetadataUserID: "user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
 		}

 		// body 非法 JSON，如果函数复用 parsedReq 成功则仍应判定为 Claude Code。
@@ -209,7 +209,7 @@ func TestSetClaudeCodeClientContext_ReuseParsedRequestAndContextCache(t *testing
 			"system": []any{
 				map[string]any{"text": "You are Claude Code, Anthropic's official CLI for Claude."},
 			},
-			"metadata": map[string]any{"user_id": "user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_abc-123"},
+			"metadata": map[string]any{"user_id": "user_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_account__session_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"},
 		})

 		SetClaudeCodeClientContext(c, []byte(`{invalid`), nil)
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -504,6 +504,8 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {

 		// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
 		requestPayloadHash := service.HashUsageRequestPayload(body)
+		inboundEndpoint := GetInboundEndpoint(c)
+		upstreamEndpoint := GetUpstreamEndpoint(c, account.Platform)
 		h.submitUsageRecordTask(func(ctx context.Context) {
 			if err := h.gatewayService.RecordUsageWithLongContext(ctx, &service.RecordUsageLongContextInput{
 				Result:                result,
@@ -511,6 +513,8 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				User:                  apiKey.User,
 				Account:               account,
 				Subscription:          subscription,
+				InboundEndpoint:       inboundEndpoint,
+				UpstreamEndpoint:      upstreamEndpoint,
 				UserAgent:             userAgent,
 				IPAddress:             clientIP,
 				RequestPayloadHash:    requestPayloadHash,
--- a/backend/internal/handler/openai_chat_completions.go
+++ b/backend/internal/handler/openai_chat_completions.go
@@ -261,8 +261,8 @@ func (h *OpenAIGatewayHandler) ChatCompletions(c *gin.Context) {
 				User:             apiKey.User,
 				Account:          account,
 				Subscription:     subscription,
-				InboundEndpoint:  normalizedOpenAIInboundEndpoint(c, openAIInboundEndpointChatCompletions),
-				UpstreamEndpoint: normalizedOpenAIUpstreamEndpoint(c, openAIUpstreamEndpointResponses),
+				InboundEndpoint:  GetInboundEndpoint(c),
+				UpstreamEndpoint: GetUpstreamEndpoint(c, account.Platform),
 				UserAgent:        userAgent,
 				IPAddress:        clientIP,
 				APIKeyService:    h.apiKeyService,
--- a/backend/internal/handler/openai_gateway_endpoint_normalization_test.go
+++ b/backend/internal/handler/openai_gateway_endpoint_normalization_test.go
@@ -5,42 +5,41 @@ import (
 	"net/http/httptest"
 	"testing"

+	"github.com/Wei-Shaw/sub2api/internal/service"
 	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
 )

-func TestNormalizedOpenAIUpstreamEndpoint(t *testing.T) {
+// TestOpenAIUpstreamEndpoint_ViaGetUpstreamEndpoint verifies that the
+// unified GetUpstreamEndpoint helper produces the same results as the
+// former normalizedOpenAIUpstreamEndpoint for OpenAI platform requests.
+func TestOpenAIUpstreamEndpoint_ViaGetUpstreamEndpoint(t *testing.T) {
 	gin.SetMode(gin.TestMode)

 	tests := []struct {
-		name     string
-		path     string
-		fallback string
-		want     string
+		name string
+		path string
+		want string
 	}{
 		{
-			name:     "responses root maps to responses upstream",
-			path:     "/v1/responses",
-			fallback: openAIUpstreamEndpointResponses,
-			want:     "/v1/responses",
+			name: "responses root maps to responses upstream",
+			path: "/v1/responses",
+			want: EndpointResponses,
 		},
 		{
-			name:     "responses compact keeps compact suffix",
-			path:     "/openai/v1/responses/compact",
-			fallback: openAIUpstreamEndpointResponses,
-			want:     "/v1/responses/compact",
+			name: "responses compact keeps compact suffix",
+			path: "/openai/v1/responses/compact",
+			want: "/v1/responses/compact",
 		},
 		{
-			name:     "responses nested suffix preserved",
-			path:     "/openai/v1/responses/compact/detail",
-			fallback: openAIUpstreamEndpointResponses,
-			want:     "/v1/responses/compact/detail",
+			name: "responses nested suffix preserved",
+			path: "/openai/v1/responses/compact/detail",
+			want: "/v1/responses/compact/detail",
 		},
 		{
-			name:     "non responses path uses fallback",
-			path:     "/v1/messages",
-			fallback: openAIUpstreamEndpointResponses,
-			want:     "/v1/responses",
+			name: "non responses path uses platform fallback",
+			path: "/v1/messages",
+			want: EndpointResponses,
 		},
 	}

@@ -50,7 +49,7 @@ func TestNormalizedOpenAIUpstreamEndpoint(t *testing.T) {
 			c, _ := gin.CreateTestContext(rec)
 			c.Request = httptest.NewRequest(http.MethodPost, tt.path, nil)

-			got := normalizedOpenAIUpstreamEndpoint(c, tt.fallback)
+			got := GetUpstreamEndpoint(c, service.PlatformOpenAI)
 			require.Equal(t, tt.want, got)
 		})
 	}
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -37,13 +37,6 @@ type OpenAIGatewayHandler struct {
 	cfg                     *config.Config
 }

-const (
-	openAIInboundEndpointResponses       = "/v1/responses"
-	openAIInboundEndpointMessages        = "/v1/messages"
-	openAIInboundEndpointChatCompletions = "/v1/chat/completions"
-	openAIUpstreamEndpointResponses      = "/v1/responses"
-)
-
 // NewOpenAIGatewayHandler creates a new OpenAIGatewayHandler
 func NewOpenAIGatewayHandler(
 	gatewayService *service.OpenAIGatewayService,
@@ -369,8 +362,8 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 				User:               apiKey.User,
 				Account:            account,
 				Subscription:       subscription,
-				InboundEndpoint:    normalizedOpenAIInboundEndpoint(c, openAIInboundEndpointResponses),
-				UpstreamEndpoint:   normalizedOpenAIUpstreamEndpoint(c, openAIUpstreamEndpointResponses),
+				InboundEndpoint:    GetInboundEndpoint(c),
+				UpstreamEndpoint:   GetUpstreamEndpoint(c, account.Platform),
 				UserAgent:          userAgent,
 				IPAddress:          clientIP,
 				RequestPayloadHash: requestPayloadHash,
@@ -747,8 +740,8 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 				User:               apiKey.User,
 				Account:            account,
 				Subscription:       subscription,
-				InboundEndpoint:    normalizedOpenAIInboundEndpoint(c, openAIInboundEndpointMessages),
-				UpstreamEndpoint:   normalizedOpenAIUpstreamEndpoint(c, openAIUpstreamEndpointResponses),
+				InboundEndpoint:    GetInboundEndpoint(c),
+				UpstreamEndpoint:   GetUpstreamEndpoint(c, account.Platform),
 				UserAgent:          userAgent,
 				IPAddress:          clientIP,
 				RequestPayloadHash: requestPayloadHash,
@@ -1246,8 +1239,8 @@ func (h *OpenAIGatewayHandler) ResponsesWebSocket(c *gin.Context) {
 					User:               apiKey.User,
 					Account:            account,
 					Subscription:       subscription,
-					InboundEndpoint:    normalizedOpenAIInboundEndpoint(c, openAIInboundEndpointResponses),
-					UpstreamEndpoint:   normalizedOpenAIUpstreamEndpoint(c, openAIUpstreamEndpointResponses),
+					InboundEndpoint:    GetInboundEndpoint(c),
+					UpstreamEndpoint:   GetUpstreamEndpoint(c, account.Platform),
 					UserAgent:          userAgent,
 					IPAddress:          clientIP,
 					RequestPayloadHash: service.HashUsageRequestPayload(firstMessage),
@@ -1543,62 +1536,6 @@ func openAIWSIngressFallbackSessionSeed(userID, apiKeyID int64, groupID *int64)
 	return fmt.Sprintf("openai_ws_ingress:%d:%d:%d", gid, userID, apiKeyID)
 }

-func normalizedOpenAIInboundEndpoint(c *gin.Context, fallback string) string {
-	path := strings.TrimSpace(fallback)
-	if c != nil {
-		if fullPath := strings.TrimSpace(c.FullPath()); fullPath != "" {
-			path = fullPath
-		} else if c.Request != nil && c.Request.URL != nil {
-			if requestPath := strings.TrimSpace(c.Request.URL.Path); requestPath != "" {
-				path = requestPath
-			}
-		}
-	}
-
-	switch {
-	case strings.Contains(path, openAIInboundEndpointChatCompletions):
-		return openAIInboundEndpointChatCompletions
-	case strings.Contains(path, openAIInboundEndpointMessages):
-		return openAIInboundEndpointMessages
-	case strings.Contains(path, openAIInboundEndpointResponses):
-		return openAIInboundEndpointResponses
-	default:
-		return path
-	}
-}
-
-func normalizedOpenAIUpstreamEndpoint(c *gin.Context, fallback string) string {
-	base := strings.TrimSpace(fallback)
-	if base == "" {
-		base = openAIUpstreamEndpointResponses
-	}
-	base = strings.TrimRight(base, "/")
-
-	if c == nil || c.Request == nil || c.Request.URL == nil {
-		return base
-	}
-
-	path := strings.TrimRight(strings.TrimSpace(c.Request.URL.Path), "/")
-	if path == "" {
-		return base
-	}
-
-	idx := strings.LastIndex(path, "/responses")
-	if idx < 0 {
-		return base
-	}
-
-	suffix := strings.TrimSpace(path[idx+len("/responses"):])
-	if suffix == "" || suffix == "/" {
-		return base
-	}
-	if !strings.HasPrefix(suffix, "/") {
-		return base
-	}
-
-	return base + suffix
-}
-
 func isOpenAIWSUpgradeRequest(r *http.Request) bool {
 	if r == nil {
 		return false
--- a/backend/internal/handler/sora_gateway_handler.go
+++ b/backend/internal/handler/sora_gateway_handler.go
@@ -400,6 +400,8 @@ func (h *SoraGatewayHandler) ChatCompletions(c *gin.Context) {
 		userAgent := c.GetHeader("User-Agent")
 		clientIP := ip.GetClientIP(c)
 		requestPayloadHash := service.HashUsageRequestPayload(body)
+		inboundEndpoint := GetInboundEndpoint(c)
+		upstreamEndpoint := GetUpstreamEndpoint(c, account.Platform)

 		// 使用量记录通过有界 worker 池提交，避免请求热路径创建无界 goroutine。
 		h.submitUsageRecordTask(func(ctx context.Context) {
@@ -409,6 +411,8 @@ func (h *SoraGatewayHandler) ChatCompletions(c *gin.Context) {
 				User:               apiKey.User,
 				Account:            account,
 				Subscription:       subscription,
+				InboundEndpoint:    inboundEndpoint,
+				UpstreamEndpoint:   upstreamEndpoint,
 				UserAgent:          userAgent,
 				IPAddress:          clientIP,
 				RequestPayloadHash: requestPayloadHash,
--- a/backend/internal/handler/sora_gateway_handler_test.go
+++ b/backend/internal/handler/sora_gateway_handler_test.go
@@ -273,8 +273,8 @@ func (r *stubGroupRepo) ListActiveByPlatform(ctx context.Context, platform strin
 func (r *stubGroupRepo) ExistsByName(ctx context.Context, name string) (bool, error) {
 	return false, nil
 }
-func (r *stubGroupRepo) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
-	return 0, nil
+func (r *stubGroupRepo) GetAccountCount(ctx context.Context, groupID int64) (int64, int64, error) {
+	return 0, 0, nil
 }
 func (r *stubGroupRepo) DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error) {
 	return 0, nil
@@ -345,6 +345,12 @@ func (s *stubUsageLogRepo) GetUpstreamEndpointStatsWithFilters(ctx context.Conte
 func (s *stubUsageLogRepo) GetGroupStatsWithFilters(ctx context.Context, startTime, endTime time.Time, userID, apiKeyID, accountID, groupID int64, requestType *int16, stream *bool, billingType *int8) ([]usagestats.GroupStat, error) {
 	return nil, nil
 }
+func (s *stubUsageLogRepo) GetUserBreakdownStats(ctx context.Context, startTime, endTime time.Time, dim usagestats.UserBreakdownDimension, limit int) ([]usagestats.UserBreakdownItem, error) {
+	return nil, nil
+}
+func (s *stubUsageLogRepo) GetAllGroupUsageSummary(ctx context.Context, todayStart time.Time) ([]usagestats.GroupUsageSummary, error) {
+	return nil, nil
+}
 func (s *stubUsageLogRepo) GetAPIKeyUsageTrend(ctx context.Context, startTime, endTime time.Time, granularity string, limit int) ([]usagestats.APIKeyUsageTrendPoint, error) {
 	return nil, nil
 }
--- a/backend/internal/handler/usage_handler.go
+++ b/backend/internal/handler/usage_handler.go
@@ -114,8 +114,8 @@ func (h *UsageHandler) List(c *gin.Context) {
 			response.BadRequest(c, "Invalid end_date format, use YYYY-MM-DD")
 			return
 		}
-		// Set end time to end of day
-		t = t.Add(24*time.Hour - time.Nanosecond)
+		// Use half-open range [start, end), move to next calendar day start (DST-safe).
+		t = t.AddDate(0, 0, 1)
 		endTime = &t
 	}

@@ -227,8 +227,8 @@ func (h *UsageHandler) Stats(c *gin.Context) {
 			response.BadRequest(c, "Invalid end_date format, use YYYY-MM-DD")
 			return
 		}
-		// 设置结束时间为当天结束
-		endTime = endTime.Add(24*time.Hour - time.Nanosecond)
+		// 与 SQL 条件 created_at < end 对齐，使用次日 00:00 作为上边界（DST-safe）。
+		endTime = endTime.AddDate(0, 0, 1)
 	} else {
 		// 使用 period 参数
 		period := c.DefaultQuery("period", "today")
--- a/backend/internal/pkg/antigravity/client.go
+++ b/backend/internal/pkg/antigravity/client.go
@@ -124,10 +124,68 @@ type IneligibleTier struct {
 type LoadCodeAssistResponse struct {
 	CloudAICompanionProject string            `json:"cloudaicompanionProject"`
 	CurrentTier             *TierInfo         `json:"currentTier,omitempty"`
-	PaidTier                *TierInfo         `json:"paidTier,omitempty"`
+	PaidTier                *PaidTierInfo     `json:"paidTier,omitempty"`
 	IneligibleTiers         []*IneligibleTier `json:"ineligibleTiers,omitempty"`
 }

+// PaidTierInfo 付费等级信息，包含 AI Credits 余额。
+type PaidTierInfo struct {
+	ID               string            `json:"id"`
+	Name             string            `json:"name"`
+	Description      string            `json:"description"`
+	AvailableCredits []AvailableCredit `json:"availableCredits,omitempty"`
+}
+
+// UnmarshalJSON 兼容 paidTier 既可能是字符串也可能是对象的情况。
+func (p *PaidTierInfo) UnmarshalJSON(data []byte) error {
+	data = bytes.TrimSpace(data)
+	if len(data) == 0 || string(data) == "null" {
+		return nil
+	}
+	if data[0] == '"' {
+		var id string
+		if err := json.Unmarshal(data, &id); err != nil {
+			return err
+		}
+		p.ID = id
+		return nil
+	}
+	type alias PaidTierInfo
+	var raw alias
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return err
+	}
+	*p = PaidTierInfo(raw)
+	return nil
+}
+
+// AvailableCredit 表示一条 AI Credits 余额记录。
+type AvailableCredit struct {
+	CreditType                  string `json:"creditType,omitempty"`
+	CreditAmount                string `json:"creditAmount,omitempty"`
+	MinimumCreditAmountForUsage string `json:"minimumCreditAmountForUsage,omitempty"`
+}
+
+// GetAmount 将 creditAmount 解析为浮点数。
+func (c *AvailableCredit) GetAmount() float64 {
+	if c.CreditAmount == "" {
+		return 0
+	}
+	var value float64
+	_, _ = fmt.Sscanf(c.CreditAmount, "%f", &value)
+	return value
+}
+
+// GetMinimumAmount 将 minimumCreditAmountForUsage 解析为浮点数。
+func (c *AvailableCredit) GetMinimumAmount() float64 {
+	if c.MinimumCreditAmountForUsage == "" {
+		return 0
+	}
+	var value float64
+	_, _ = fmt.Sscanf(c.MinimumCreditAmountForUsage, "%f", &value)
+	return value
+}
+
 // OnboardUserRequest onboardUser 请求
 type OnboardUserRequest struct {
 	TierID   string `json:"tierId"`
@@ -157,6 +215,14 @@ func (r *LoadCodeAssistResponse) GetTier() string {
 	return ""
 }

+// GetAvailableCredits 返回 paid tier 中的 AI Credits 余额列表。
+func (r *LoadCodeAssistResponse) GetAvailableCredits() []AvailableCredit {
+	if r.PaidTier == nil {
+		return nil
+	}
+	return r.PaidTier.AvailableCredits
+}
+
 // Client Antigravity API 客户端
 type Client struct {
 	httpClient *http.Client
--- a/backend/internal/pkg/antigravity/client_test.go
+++ b/backend/internal/pkg/antigravity/client_test.go
@@ -190,7 +190,7 @@ func TestTierInfo_UnmarshalJSON_通过JSON嵌套结构(t *testing.T) {
 func TestGetTier_PaidTier优先(t *testing.T) {
 	resp := &LoadCodeAssistResponse{
 		CurrentTier: &TierInfo{ID: "free-tier"},
-		PaidTier:    &TierInfo{ID: "g1-pro-tier"},
+		PaidTier:    &PaidTierInfo{ID: "g1-pro-tier"},
 	}
 	if got := resp.GetTier(); got != "g1-pro-tier" {
 		t.Errorf("应返回 paidTier: got %s", got)
@@ -209,7 +209,7 @@ func TestGetTier_回退到CurrentTier(t *testing.T) {
 func TestGetTier_PaidTier为空ID(t *testing.T) {
 	resp := &LoadCodeAssistResponse{
 		CurrentTier: &TierInfo{ID: "free-tier"},
-		PaidTier:    &TierInfo{ID: ""},
+		PaidTier:    &PaidTierInfo{ID: ""},
 	}
 	// paidTier.ID 为空时应回退到 currentTier
 	if got := resp.GetTier(); got != "free-tier" {
@@ -217,6 +217,32 @@ func TestGetTier_PaidTier为空ID(t *testing.T) {
 	}
 }

+func TestGetAvailableCredits(t *testing.T) {
+	resp := &LoadCodeAssistResponse{
+		PaidTier: &PaidTierInfo{
+			ID: "g1-pro-tier",
+			AvailableCredits: []AvailableCredit{
+				{
+					CreditType:                  "GOOGLE_ONE_AI",
+					CreditAmount:                "25",
+					MinimumCreditAmountForUsage: "5",
+				},
+			},
+		},
+	}
+
+	credits := resp.GetAvailableCredits()
+	if len(credits) != 1 {
+		t.Fatalf("AI Credits 数量不匹配: got %d", len(credits))
+	}
+	if credits[0].GetAmount() != 25 {
+		t.Errorf("CreditAmount 解析不正确: got %v", credits[0].GetAmount())
+	}
+	if credits[0].GetMinimumAmount() != 5 {
+		t.Errorf("MinimumCreditAmountForUsage 解析不正确: got %v", credits[0].GetMinimumAmount())
+	}
+}
+
 func TestGetTier_两者都为nil(t *testing.T) {
 	resp := &LoadCodeAssistResponse{}
 	if got := resp.GetTier(); got != "" {
--- a/backend/internal/pkg/antigravity/oauth.go
+++ b/backend/internal/pkg/antigravity/oauth.go
@@ -49,8 +49,8 @@ const (
 	antigravityDailyBaseURL = "https://daily-cloudcode-pa.sandbox.googleapis.com"
 )

-// defaultUserAgentVersion 可通过环境变量 ANTIGRAVITY_USER_AGENT_VERSION 配置，默认 1.20.4
-var defaultUserAgentVersion = "1.20.4"
+// defaultUserAgentVersion 可通过环境变量 ANTIGRAVITY_USER_AGENT_VERSION 配置，默认 1.20.5
+var defaultUserAgentVersion = "1.20.5"

 // defaultClientSecret 可通过环境变量 ANTIGRAVITY_OAUTH_CLIENT_SECRET 配置
 var defaultClientSecret = "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"
--- a/backend/internal/pkg/antigravity/oauth_test.go
+++ b/backend/internal/pkg/antigravity/oauth_test.go
@@ -690,7 +690,7 @@ func TestConstants_值正确(t *testing.T) {
 	if RedirectURI != "http://localhost:8085/callback" {
 		t.Errorf("RedirectURI 不匹配: got %s", RedirectURI)
 	}
-	if GetUserAgent() != "antigravity/1.20.4 windows/amd64" {
+	if GetUserAgent() != "antigravity/1.20.5 windows/amd64" {
 		t.Errorf("UserAgent 不匹配: got %s", GetUserAgent())
 	}
 	if SessionTTL != 30*time.Minute {
--- a/backend/internal/pkg/response/response.go
+++ b/backend/internal/pkg/response/response.go
@@ -47,6 +47,15 @@ func Created(c *gin.Context, data any) {
 	})
 }

+// Accepted 返回异步接受响应 (HTTP 202)
+func Accepted(c *gin.Context, data any) {
+	c.JSON(http.StatusAccepted, Response{
+		Code:    0,
+		Message: "accepted",
+		Data:    data,
+	})
+}
+
 // Error 返回错误响应
 func Error(c *gin.Context, statusCode int, message string) {
 	c.JSON(statusCode, Response{
--- a/backend/internal/pkg/usagestats/usage_log_types.go
+++ b/backend/internal/pkg/usagestats/usage_log_types.go
@@ -90,6 +90,13 @@ type EndpointStat struct {
 	ActualCost  float64 `json:"actual_cost"` // 实际扣除
 }

+// GroupUsageSummary represents today's and cumulative cost for a single group.
+type GroupUsageSummary struct {
+	GroupID   int64   `json:"group_id"`
+	TodayCost float64 `json:"today_cost"`
+	TotalCost float64 `json:"total_cost"`
+}
+
 // GroupStat represents usage statistics for a single group
 type GroupStat struct {
 	GroupID     int64   `json:"group_id"`
@@ -125,6 +132,26 @@ type UserSpendingRankingItem struct {
 type UserSpendingRankingResponse struct {
 	Ranking         []UserSpendingRankingItem `json:"ranking"`
 	TotalActualCost float64                   `json:"total_actual_cost"`
+	TotalRequests   int64                     `json:"total_requests"`
+	TotalTokens     int64                     `json:"total_tokens"`
+}
+
+// UserBreakdownItem represents per-user usage breakdown within a dimension (group, model, endpoint).
+type UserBreakdownItem struct {
+	UserID      int64   `json:"user_id"`
+	Email       string  `json:"email"`
+	Requests    int64   `json:"requests"`
+	TotalTokens int64   `json:"total_tokens"`
+	Cost        float64 `json:"cost"`        // 标准计费
+	ActualCost  float64 `json:"actual_cost"` // 实际扣除
+}
+
+// UserBreakdownDimension specifies the dimension to filter for user breakdown.
+type UserBreakdownDimension struct {
+	GroupID      int64  // filter by group_id (>0 to enable)
+	Model        string // filter by model name (non-empty to enable)
+	Endpoint     string // filter by endpoint value (non-empty to enable)
+	EndpointType string // "inbound", "upstream", or "path"
 }

 // APIKeyUsageTrendPoint represents API key usage trend data point
--- a/backend/internal/repository/backup_s3_store.go
+++ b/backend/internal/repository/backup_s3_store.go
@@ -57,6 +57,7 @@ func NewS3BackupStoreFactory() service.BackupObjectStoreFactory {

 func (s *S3BackupStore) Upload(ctx context.Context, key string, body io.Reader, contentType string) (int64, error) {
 	// 读取全部内容以获取大小（S3 PutObject 需要知道内容长度）
+	// 注意：阿里云 OSS 不兼容 s3manager 分片上传的签名方式，因此使用 PutObject
 	data, err := io.ReadAll(body)
 	if err != nil {
 		return 0, fmt.Errorf("read body: %w", err)
--- a/backend/internal/repository/billing_cache.go
+++ b/backend/internal/repository/billing_cache.go
@@ -20,6 +20,11 @@ const (
 	billingCacheTTL           = 5 * time.Minute
 	billingCacheJitter        = 30 * time.Second
 	rateLimitCacheTTL         = 7 * 24 * time.Hour // 7 days matches the longest window
+
+	// Rate limit window durations — must match service.RateLimitWindow* constants.
+	rateLimitWindow5h = 5 * time.Hour
+	rateLimitWindow1d = 24 * time.Hour
+	rateLimitWindow7d = 7 * 24 * time.Hour
 )

 // jitteredTTL 返回带随机抖动的 TTL，防止缓存雪崩
@@ -90,17 +95,40 @@ var (
 		return 1
 	`)

-	// updateRateLimitUsageScript atomically increments all three rate limit usage counters.
-	// Returns 0 if the key doesn't exist (cache miss), 1 on success.
+	// updateRateLimitUsageScript atomically increments all three rate limit usage counters
+	// with window expiration checking. If a window has expired, its usage is reset to cost
+	// (instead of accumulated) and the window timestamp is updated, matching the DB-side
+	// IncrementRateLimitUsage semantics.
+	//
+	// ARGV: [1]=cost, [2]=ttl_seconds, [3]=now_unix, [4]=window_5h_seconds, [5]=window_1d_seconds, [6]=window_7d_seconds
 	updateRateLimitUsageScript = redis.NewScript(`
 		local exists = redis.call('EXISTS', KEYS[1])
 		if exists == 0 then
 			return 0
 		end
 		local cost = tonumber(ARGV[1])
-		redis.call('HINCRBYFLOAT', KEYS[1], 'usage_5h', cost)
-		redis.call('HINCRBYFLOAT', KEYS[1], 'usage_1d', cost)
-		redis.call('HINCRBYFLOAT', KEYS[1], 'usage_7d', cost)
+		local now = tonumber(ARGV[3])
+		local win5h = tonumber(ARGV[4])
+		local win1d = tonumber(ARGV[5])
+		local win7d = tonumber(ARGV[6])
+
+		-- Helper: check if window is expired and update usage + window accordingly
+		-- Returns nothing, modifies the hash in-place.
+		local function update_window(usage_field, window_field, window_duration)
+			local w = tonumber(redis.call('HGET', KEYS[1], window_field) or 0)
+			if w == 0 or (now - w) >= window_duration then
+				-- Window expired or never started: reset usage to cost, start new window
+				redis.call('HSET', KEYS[1], usage_field, tostring(cost))
+				redis.call('HSET', KEYS[1], window_field, tostring(now))
+			else
+				-- Window still valid: accumulate
+				redis.call('HINCRBYFLOAT', KEYS[1], usage_field, cost)
+			end
+		end
+
+		update_window('usage_5h', 'window_5h', win5h)
+		update_window('usage_1d', 'window_1d', win1d)
+		update_window('usage_7d', 'window_7d', win7d)
 		redis.call('EXPIRE', KEYS[1], ARGV[2])
 		return 1
 	`)
@@ -280,7 +308,15 @@ func (c *billingCache) SetAPIKeyRateLimit(ctx context.Context, keyID int64, data

 func (c *billingCache) UpdateAPIKeyRateLimitUsage(ctx context.Context, keyID int64, cost float64) error {
 	key := billingRateLimitKey(keyID)
-	_, err := updateRateLimitUsageScript.Run(ctx, c.rdb, []string{key}, cost, int(rateLimitCacheTTL.Seconds())).Result()
+	now := time.Now().Unix()
+	_, err := updateRateLimitUsageScript.Run(ctx, c.rdb, []string{key},
+		cost,
+		int(rateLimitCacheTTL.Seconds()),
+		now,
+		int(rateLimitWindow5h.Seconds()),
+		int(rateLimitWindow1d.Seconds()),
+		int(rateLimitWindow7d.Seconds()),
+	).Result()
 	if err != nil && !errors.Is(err, redis.Nil) {
 		log.Printf("Warning: update rate limit usage cache failed for api key %d: %v", keyID, err)
 		return err
--- a/backend/internal/repository/group_repo.go
+++ b/backend/internal/repository/group_repo.go
@@ -88,8 +88,9 @@ func (r *groupRepository) GetByID(ctx context.Context, id int64) (*service.Group
 	if err != nil {
 		return nil, err
 	}
-	count, _ := r.GetAccountCount(ctx, out.ID)
-	out.AccountCount = count
+	total, active, _ := r.GetAccountCount(ctx, out.ID)
+	out.AccountCount = total
+	out.ActiveAccountCount = active
 	return out, nil
 }

@@ -256,7 +257,10 @@ func (r *groupRepository) ListWithFilters(ctx context.Context, params pagination
 	counts, err := r.loadAccountCounts(ctx, groupIDs)
 	if err == nil {
 		for i := range outGroups {
-			outGroups[i].AccountCount = counts[outGroups[i].ID]
+			c := counts[outGroups[i].ID]
+			outGroups[i].AccountCount = c.Total
+			outGroups[i].ActiveAccountCount = c.Active
+			outGroups[i].RateLimitedAccountCount = c.RateLimited
 		}
 	}

@@ -283,7 +287,10 @@ func (r *groupRepository) ListActive(ctx context.Context) ([]service.Group, erro
 	counts, err := r.loadAccountCounts(ctx, groupIDs)
 	if err == nil {
 		for i := range outGroups {
-			outGroups[i].AccountCount = counts[outGroups[i].ID]
+			c := counts[outGroups[i].ID]
+			outGroups[i].AccountCount = c.Total
+			outGroups[i].ActiveAccountCount = c.Active
+			outGroups[i].RateLimitedAccountCount = c.RateLimited
 		}
 	}

@@ -310,7 +317,10 @@ func (r *groupRepository) ListActiveByPlatform(ctx context.Context, platform str
 	counts, err := r.loadAccountCounts(ctx, groupIDs)
 	if err == nil {
 		for i := range outGroups {
-			outGroups[i].AccountCount = counts[outGroups[i].ID]
+			c := counts[outGroups[i].ID]
+			outGroups[i].AccountCount = c.Total
+			outGroups[i].ActiveAccountCount = c.Active
+			outGroups[i].RateLimitedAccountCount = c.RateLimited
 		}
 	}

@@ -369,12 +379,20 @@ func (r *groupRepository) ExistsByIDs(ctx context.Context, ids []int64) (map[int
 	return result, nil
 }

-func (r *groupRepository) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
-	var count int64
-	if err := scanSingleRow(ctx, r.sql, "SELECT COUNT(*) FROM account_groups WHERE group_id = $1", []any{groupID}, &count); err != nil {
-		return 0, err
-	}
-	return count, nil
+func (r *groupRepository) GetAccountCount(ctx context.Context, groupID int64) (total int64, active int64, err error) {
+	var rateLimited int64
+	err = scanSingleRow(ctx, r.sql,
+		`SELECT COUNT(*),
+			COUNT(*) FILTER (WHERE a.status = 'active' AND a.schedulable = true),
+			COUNT(*) FILTER (WHERE a.status = 'active' AND (
+				a.rate_limit_reset_at > NOW() OR
+				a.overload_until > NOW() OR
+				a.temp_unschedulable_until > NOW()
+			))
+		FROM account_groups ag JOIN accounts a ON a.id = ag.account_id
+		WHERE ag.group_id = $1`,
+		[]any{groupID}, &total, &active, &rateLimited)
+	return
 }

 func (r *groupRepository) DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error) {
@@ -500,15 +518,32 @@ func (r *groupRepository) DeleteCascade(ctx context.Context, id int64) ([]int64,
 	return affectedUserIDs, nil
 }

-func (r *groupRepository) loadAccountCounts(ctx context.Context, groupIDs []int64) (counts map[int64]int64, err error) {
-	counts = make(map[int64]int64, len(groupIDs))
+type groupAccountCounts struct {
+	Total       int64
+	Active      int64
+	RateLimited int64
+}
+
+func (r *groupRepository) loadAccountCounts(ctx context.Context, groupIDs []int64) (counts map[int64]groupAccountCounts, err error) {
+	counts = make(map[int64]groupAccountCounts, len(groupIDs))
 	if len(groupIDs) == 0 {
 		return counts, nil
 	}

 	rows, err := r.sql.QueryContext(
 		ctx,
-		"SELECT group_id, COUNT(*) FROM account_groups WHERE group_id = ANY($1) GROUP BY group_id",
+		`SELECT ag.group_id,
+			COUNT(*) AS total,
+			COUNT(*) FILTER (WHERE a.status = 'active' AND a.schedulable = true) AS active,
+			COUNT(*) FILTER (WHERE a.status = 'active' AND (
+				a.rate_limit_reset_at > NOW() OR
+				a.overload_until > NOW() OR
+				a.temp_unschedulable_until > NOW()
+			)) AS rate_limited
+		FROM account_groups ag
+		JOIN accounts a ON a.id = ag.account_id
+		WHERE ag.group_id = ANY($1)
+		GROUP BY ag.group_id`,
 		pq.Array(groupIDs),
 	)
 	if err != nil {
@@ -523,11 +558,11 @@ func (r *groupRepository) loadAccountCounts(ctx context.Context, groupIDs []int6

 	for rows.Next() {
 		var groupID int64
-		var count int64
-		if err = rows.Scan(&groupID, &count); err != nil {
+		var c groupAccountCounts
+		if err = rows.Scan(&groupID, &c.Total, &c.Active, &c.RateLimited); err != nil {
 			return nil, err
 		}
-		counts[groupID] = count
+		counts[groupID] = c
 	}
 	if err = rows.Err(); err != nil {
 		return nil, err
--- a/backend/internal/repository/group_repo_integration_test.go
+++ b/backend/internal/repository/group_repo_integration_test.go
@@ -603,7 +603,7 @@ func (s *GroupRepoSuite) TestGetAccountCount() {
 	_, err = s.tx.ExecContext(s.ctx, "INSERT INTO account_groups (account_id, group_id, priority, created_at) VALUES ($1, $2, $3, NOW())", a2, group.ID, 2)
 	s.Require().NoError(err)

-	count, err := s.repo.GetAccountCount(s.ctx, group.ID)
+	count, _, err := s.repo.GetAccountCount(s.ctx, group.ID)
 	s.Require().NoError(err, "GetAccountCount")
 	s.Require().Equal(int64(2), count)
 }
@@ -619,7 +619,7 @@ func (s *GroupRepoSuite) TestGetAccountCount_Empty() {
 	}
 	s.Require().NoError(s.repo.Create(s.ctx, group))

-	count, err := s.repo.GetAccountCount(s.ctx, group.ID)
+	count, _, err := s.repo.GetAccountCount(s.ctx, group.ID)
 	s.Require().NoError(err)
 	s.Require().Zero(count)
 }
@@ -651,7 +651,7 @@ func (s *GroupRepoSuite) TestDeleteAccountGroupsByGroupID() {
 	s.Require().NoError(err, "DeleteAccountGroupsByGroupID")
 	s.Require().Equal(int64(1), affected, "expected 1 affected row")

-	count, err := s.repo.GetAccountCount(s.ctx, g.ID)
+	count, _, err := s.repo.GetAccountCount(s.ctx, g.ID)
 	s.Require().NoError(err, "GetAccountCount")
 	s.Require().Equal(int64(0), count, "expected 0 account groups")
 }
@@ -692,7 +692,7 @@ func (s *GroupRepoSuite) TestDeleteAccountGroupsByGroupID_MultipleAccounts() {
 	s.Require().NoError(err)
 	s.Require().Equal(int64(3), affected)

-	count, _ := s.repo.GetAccountCount(s.ctx, g.ID)
+	count, _, _ := s.repo.GetAccountCount(s.ctx, g.ID)
 	s.Require().Zero(count)
 }

--- a/backend/internal/repository/usage_log_repo.go
+++ b/backend/internal/repository/usage_log_repo.go
@@ -2161,7 +2161,9 @@ func (r *usageLogRepository) GetUserSpendingRanking(ctx context.Context, startTi
 				actual_cost,
 				requests,
 				tokens,
-				COALESCE(SUM(actual_cost) OVER (), 0) as total_actual_cost
+				COALESCE(SUM(actual_cost) OVER (), 0) as total_actual_cost,
+				COALESCE(SUM(requests) OVER (), 0) as total_requests,
+				COALESCE(SUM(tokens) OVER (), 0) as total_tokens
 			FROM user_spend
 			ORDER BY actual_cost DESC, tokens DESC, user_id ASC
 			LIMIT $3
@@ -2172,7 +2174,9 @@ func (r *usageLogRepository) GetUserSpendingRanking(ctx context.Context, startTi
 			actual_cost,
 			requests,
 			tokens,
-			total_actual_cost
+			total_actual_cost,
+			total_requests,
+			total_tokens
 		FROM ranked
 		ORDER BY actual_cost DESC, tokens DESC, user_id ASC
 	`
@@ -2190,9 +2194,11 @@ func (r *usageLogRepository) GetUserSpendingRanking(ctx context.Context, startTi

 	ranking := make([]UserSpendingRankingItem, 0)
 	totalActualCost := 0.0
+	totalRequests := int64(0)
+	totalTokens := int64(0)
 	for rows.Next() {
 		var row UserSpendingRankingItem
-		if err = rows.Scan(&row.UserID, &row.Email, &row.ActualCost, &row.Requests, &row.Tokens, &totalActualCost); err != nil {
+		if err = rows.Scan(&row.UserID, &row.Email, &row.ActualCost, &row.Requests, &row.Tokens, &totalActualCost, &totalRequests, &totalTokens); err != nil {
 			return nil, err
 		}
 		ranking = append(ranking, row)
@@ -2204,6 +2210,8 @@ func (r *usageLogRepository) GetUserSpendingRanking(ctx context.Context, startTi
 	return &UserSpendingRankingResponse{
 		Ranking:         ranking,
 		TotalActualCost: totalActualCost,
+		TotalRequests:   totalRequests,
+		TotalTokens:     totalTokens,
 	}, nil
 }

@@ -2992,6 +3000,120 @@ func (r *usageLogRepository) GetGroupStatsWithFilters(ctx context.Context, start
 	return results, nil
 }

+// GetUserBreakdownStats returns per-user usage breakdown within a specific dimension.
+func (r *usageLogRepository) GetUserBreakdownStats(ctx context.Context, startTime, endTime time.Time, dim usagestats.UserBreakdownDimension, limit int) (results []usagestats.UserBreakdownItem, err error) {
+	query := `
+		SELECT
+			COALESCE(ul.user_id, 0) as user_id,
+			COALESCE(u.email, '') as email,
+			COUNT(*) as requests,
+			COALESCE(SUM(ul.input_tokens + ul.output_tokens + ul.cache_creation_tokens + ul.cache_read_tokens), 0) as total_tokens,
+			COALESCE(SUM(ul.total_cost), 0) as cost,
+			COALESCE(SUM(ul.actual_cost), 0) as actual_cost
+		FROM usage_logs ul
+		LEFT JOIN users u ON u.id = ul.user_id
+		WHERE ul.created_at >= $1 AND ul.created_at < $2
+	`
+	args := []any{startTime, endTime}
+
+	if dim.GroupID > 0 {
+		query += fmt.Sprintf(" AND ul.group_id = $%d", len(args)+1)
+		args = append(args, dim.GroupID)
+	}
+	if dim.Model != "" {
+		query += fmt.Sprintf(" AND ul.model = $%d", len(args)+1)
+		args = append(args, dim.Model)
+	}
+	if dim.Endpoint != "" {
+		col := resolveEndpointColumn(dim.EndpointType)
+		query += fmt.Sprintf(" AND %s = $%d", col, len(args)+1)
+		args = append(args, dim.Endpoint)
+	}
+
+	query += " GROUP BY ul.user_id, u.email ORDER BY actual_cost DESC"
+	if limit > 0 {
+		query += fmt.Sprintf(" LIMIT %d", limit)
+	}
+
+	rows, err := r.sql.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if closeErr := rows.Close(); closeErr != nil && err == nil {
+			err = closeErr
+			results = nil
+		}
+	}()
+
+	results = make([]usagestats.UserBreakdownItem, 0)
+	for rows.Next() {
+		var row usagestats.UserBreakdownItem
+		if err := rows.Scan(
+			&row.UserID,
+			&row.Email,
+			&row.Requests,
+			&row.TotalTokens,
+			&row.Cost,
+			&row.ActualCost,
+		); err != nil {
+			return nil, err
+		}
+		results = append(results, row)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return results, nil
+}
+
+// GetAllGroupUsageSummary returns today's and cumulative actual_cost for every group.
+// todayStart is the start-of-day in the caller's timezone (UTC-based).
+// TODO(perf): This query scans ALL usage_logs rows for total_cost aggregation.
+// When usage_logs exceeds ~1M rows, consider adding a short-lived cache (30s)
+// or a materialized view / pre-aggregation table for cumulative costs.
+func (r *usageLogRepository) GetAllGroupUsageSummary(ctx context.Context, todayStart time.Time) ([]usagestats.GroupUsageSummary, error) {
+	query := `
+		SELECT
+			g.id AS group_id,
+			COALESCE(SUM(ul.actual_cost), 0) AS total_cost,
+			COALESCE(SUM(CASE WHEN ul.created_at >= $1 THEN ul.actual_cost ELSE 0 END), 0) AS today_cost
+		FROM groups g
+		LEFT JOIN usage_logs ul ON ul.group_id = g.id
+		GROUP BY g.id
+	`
+
+	rows, err := r.sql.QueryContext(ctx, query, todayStart)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = rows.Close() }()
+	var results []usagestats.GroupUsageSummary
+	for rows.Next() {
+		var row usagestats.GroupUsageSummary
+		if err := rows.Scan(&row.GroupID, &row.TotalCost, &row.TodayCost); err != nil {
+			return nil, err
+		}
+		results = append(results, row)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return results, nil
+}
+
+// resolveEndpointColumn maps endpoint type to the corresponding DB column name.
+func resolveEndpointColumn(endpointType string) string {
+	switch endpointType {
+	case "upstream":
+		return "ul.upstream_endpoint"
+	case "path":
+		return "ul.inbound_endpoint || ' -> ' || ul.upstream_endpoint"
+	default:
+		return "ul.inbound_endpoint"
+	}
+}
+
 // GetGlobalStats gets usage statistics for all users within a time range
 func (r *usageLogRepository) GetGlobalStats(ctx context.Context, startTime, endTime time.Time) (*UsageStats, error) {
 	query := `
@@ -3004,7 +3126,7 @@ func (r *usageLogRepository) GetGlobalStats(ctx context.Context, startTime, endT
 			COALESCE(SUM(actual_cost), 0) as total_actual_cost,
 			COALESCE(AVG(duration_ms), 0) as avg_duration_ms
 		FROM usage_logs
-		WHERE created_at >= $1 AND created_at <= $2
+		WHERE created_at >= $1 AND created_at < $2
 	`

 	stats := &UsageStats{}
--- a/backend/internal/repository/usage_log_repo_breakdown_test.go
+++ b/backend/internal/repository/usage_log_repo_breakdown_test.go
@@ -0,0 +1,29 @@
+//go:build unit
+
+package repository
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestResolveEndpointColumn(t *testing.T) {
+	tests := []struct {
+		endpointType string
+		want         string
+	}{
+		{"inbound", "ul.inbound_endpoint"},
+		{"upstream", "ul.upstream_endpoint"},
+		{"path", "ul.inbound_endpoint || ' -> ' || ul.upstream_endpoint"},
+		{"", "ul.inbound_endpoint"},           // default
+		{"unknown", "ul.inbound_endpoint"},     // fallback
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.endpointType, func(t *testing.T) {
+			got := resolveEndpointColumn(tc.endpointType)
+			require.Equal(t, tc.want, got)
+		})
+	}
+}
--- a/backend/internal/repository/usage_log_repo_request_type_test.go
+++ b/backend/internal/repository/usage_log_repo_request_type_test.go
@@ -259,10 +259,10 @@ func TestUsageLogRepositoryGetUserSpendingRanking(t *testing.T) {
 	start := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
 	end := start.Add(24 * time.Hour)

-	rows := sqlmock.NewRows([]string{"user_id", "email", "actual_cost", "requests", "tokens", "total_actual_cost"}).
-		AddRow(int64(2), "beta@example.com", 12.5, int64(9), int64(900), 40.0).
-		AddRow(int64(1), "alpha@example.com", 12.5, int64(8), int64(800), 40.0).
-		AddRow(int64(3), "gamma@example.com", 4.25, int64(5), int64(300), 40.0)
+	rows := sqlmock.NewRows([]string{"user_id", "email", "actual_cost", "requests", "tokens", "total_actual_cost", "total_requests", "total_tokens"}).
+		AddRow(int64(2), "beta@example.com", 12.5, int64(9), int64(900), 40.0, int64(30), int64(2600)).
+		AddRow(int64(1), "alpha@example.com", 12.5, int64(8), int64(800), 40.0, int64(30), int64(2600)).
+		AddRow(int64(3), "gamma@example.com", 4.25, int64(5), int64(300), 40.0, int64(30), int64(2600))

 	mock.ExpectQuery("WITH user_spend AS \\(").
 		WithArgs(start, end, 12).
@@ -277,6 +277,8 @@ func TestUsageLogRepositoryGetUserSpendingRanking(t *testing.T) {
 			{UserID: 3, Email: "gamma@example.com", ActualCost: 4.25, Requests: 5, Tokens: 300},
 		},
 		TotalActualCost: 40.0,
+		TotalRequests:   30,
+		TotalTokens:     2600,
 	}, got)
 	require.NoError(t, mock.ExpectationsWereMet())
 }
--- a/backend/internal/repository/user_subscription_repo.go
+++ b/backend/internal/repository/user_subscription_repo.go
@@ -5,6 +5,7 @@ import (
 	"time"

 	dbent "github.com/Wei-Shaw/sub2api/ent"
+	"github.com/Wei-Shaw/sub2api/ent/group"
 	"github.com/Wei-Shaw/sub2api/ent/usersubscription"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -190,7 +191,7 @@ func (r *userSubscriptionRepository) ListByGroupID(ctx context.Context, groupID
 	return userSubscriptionEntitiesToService(subs), paginationResultFromTotal(int64(total), params), nil
 }

-func (r *userSubscriptionRepository) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
+func (r *userSubscriptionRepository) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, platform, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	client := clientFromContext(ctx, r.client)
 	q := client.UserSubscription.Query()
 	if userID != nil {
@@ -199,6 +200,9 @@ func (r *userSubscriptionRepository) List(ctx context.Context, params pagination
 	if groupID != nil {
 		q = q.Where(usersubscription.GroupIDEQ(*groupID))
 	}
+	if platform != "" {
+		q = q.Where(usersubscription.HasGroupWith(group.PlatformEQ(platform)))
+	}

 	// Status filtering with real-time expiration check
 	now := time.Now()
--- a/backend/internal/repository/user_subscription_repo_integration_test.go
+++ b/backend/internal/repository/user_subscription_repo_integration_test.go
@@ -271,7 +271,7 @@ func (s *UserSubscriptionRepoSuite) TestList_NoFilters() {
 	group := s.mustCreateGroup("g-list")
 	s.mustCreateSubscription(user.ID, group.ID, nil)

-	subs, page, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, nil, "", "", "")
+	subs, page, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, nil, "", "", "", "")
 	s.Require().NoError(err, "List")
 	s.Require().Len(subs, 1)
 	s.Require().Equal(int64(1), page.Total)
@@ -285,7 +285,7 @@ func (s *UserSubscriptionRepoSuite) TestList_FilterByUserID() {
 	s.mustCreateSubscription(user1.ID, group.ID, nil)
 	s.mustCreateSubscription(user2.ID, group.ID, nil)

-	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, &user1.ID, nil, "", "", "")
+	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, &user1.ID, nil, "", "", "", "")
 	s.Require().NoError(err)
 	s.Require().Len(subs, 1)
 	s.Require().Equal(user1.ID, subs[0].UserID)
@@ -299,7 +299,7 @@ func (s *UserSubscriptionRepoSuite) TestList_FilterByGroupID() {
 	s.mustCreateSubscription(user.ID, g1.ID, nil)
 	s.mustCreateSubscription(user.ID, g2.ID, nil)

-	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, &g1.ID, "", "", "")
+	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, &g1.ID, "", "", "", "")
 	s.Require().NoError(err)
 	s.Require().Len(subs, 1)
 	s.Require().Equal(g1.ID, subs[0].GroupID)
@@ -320,7 +320,7 @@ func (s *UserSubscriptionRepoSuite) TestList_FilterByStatus() {
 		c.SetExpiresAt(time.Now().Add(-24 * time.Hour))
 	})

-	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, nil, service.SubscriptionStatusExpired, "", "")
+	subs, _, err := s.repo.List(s.ctx, pagination.PaginationParams{Page: 1, PageSize: 10}, nil, nil, service.SubscriptionStatusExpired, "", "", "")
 	s.Require().NoError(err)
 	s.Require().Len(subs, 1)
 	s.Require().Equal(service.SubscriptionStatusExpired, subs[0].Status)
--- a/backend/internal/server/api_contract_test.go
+++ b/backend/internal/server/api_contract_test.go
@@ -924,8 +924,8 @@ func (stubGroupRepo) ExistsByName(ctx context.Context, name string) (bool, error
 	return false, errors.New("not implemented")
 }

-func (stubGroupRepo) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
-	return 0, errors.New("not implemented")
+func (stubGroupRepo) GetAccountCount(ctx context.Context, groupID int64) (int64, int64, error) {
+	return 0, 0, errors.New("not implemented")
 }

 func (stubGroupRepo) DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error) {
@@ -1289,7 +1289,7 @@ func (r *stubUserSubscriptionRepo) ListActiveByUserID(ctx context.Context, userI
 func (stubUserSubscriptionRepo) ListByGroupID(ctx context.Context, groupID int64, params pagination.PaginationParams) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	return nil, nil, errors.New("not implemented")
 }
-func (stubUserSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
+func (stubUserSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, platform, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	return nil, nil, errors.New("not implemented")
 }
 func (stubUserSubscriptionRepo) ExistsByUserIDAndGroupID(ctx context.Context, userID, groupID int64) (bool, error) {
@@ -1637,6 +1637,10 @@ func (r *stubUsageLogRepo) GetGroupStatsWithFilters(ctx context.Context, startTi
 	return nil, errors.New("not implemented")
 }

+func (r *stubUsageLogRepo) GetUserBreakdownStats(ctx context.Context, startTime, endTime time.Time, dim usagestats.UserBreakdownDimension, limit int) ([]usagestats.UserBreakdownItem, error) {
+	return nil, errors.New("not implemented")
+}
+
 func (r *stubUsageLogRepo) GetAPIKeyUsageTrend(ctx context.Context, startTime, endTime time.Time, granularity string, limit int) ([]usagestats.APIKeyUsageTrendPoint, error) {
 	return nil, errors.New("not implemented")
 }
@@ -1782,6 +1786,9 @@ func (r *stubUsageLogRepo) GetAccountUsageStats(ctx context.Context, accountID i
 func (r *stubUsageLogRepo) GetStatsWithFilters(ctx context.Context, filters usagestats.UsageLogFilters) (*usagestats.UsageStats, error) {
 	return nil, errors.New("not implemented")
 }
+func (r *stubUsageLogRepo) GetAllGroupUsageSummary(ctx context.Context, todayStart time.Time) ([]usagestats.GroupUsageSummary, error) {
+	return nil, errors.New("not implemented")
+}

 type stubSettingRepo struct {
 	all map[string]string
--- a/backend/internal/server/middleware/api_key_auth_google_test.go
+++ b/backend/internal/server/middleware/api_key_auth_google_test.go
@@ -135,7 +135,7 @@ func (f fakeGoogleSubscriptionRepo) ListActiveByUserID(ctx context.Context, user
 func (f fakeGoogleSubscriptionRepo) ListByGroupID(ctx context.Context, groupID int64, params pagination.PaginationParams) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	return nil, nil, errors.New("not implemented")
 }
-func (f fakeGoogleSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
+func (f fakeGoogleSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, platform, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	return nil, nil, errors.New("not implemented")
 }
 func (f fakeGoogleSubscriptionRepo) ExistsByUserIDAndGroupID(ctx context.Context, userID, groupID int64) (bool, error) {
--- a/backend/internal/server/middleware/api_key_auth_test.go
+++ b/backend/internal/server/middleware/api_key_auth_test.go
@@ -646,7 +646,7 @@ func (r *stubUserSubscriptionRepo) ListByGroupID(ctx context.Context, groupID in
 	return nil, nil, errors.New("not implemented")
 }

-func (r *stubUserSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
+func (r *stubUserSubscriptionRepo) List(ctx context.Context, params pagination.PaginationParams, userID, groupID *int64, status, platform, sortBy, sortOrder string) ([]service.UserSubscription, *pagination.PaginationResult, error) {
 	return nil, nil, errors.New("not implemented")
 }

--- a/backend/internal/server/routes/admin.go
+++ b/backend/internal/server/routes/admin.go
@@ -198,6 +198,7 @@ func registerDashboardRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		dashboard.GET("/users-ranking", h.Admin.Dashboard.GetUserSpendingRanking)
 		dashboard.POST("/users-usage", h.Admin.Dashboard.GetBatchUsersUsage)
 		dashboard.POST("/api-keys-usage", h.Admin.Dashboard.GetBatchAPIKeysUsage)
+		dashboard.GET("/user-breakdown", h.Admin.Dashboard.GetUserBreakdown)
 		dashboard.POST("/aggregation/backfill", h.Admin.Dashboard.BackfillAggregation)
 	}
 }
@@ -226,6 +227,8 @@ func registerGroupRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 	{
 		groups.GET("", h.Admin.Group.List)
 		groups.GET("/all", h.Admin.Group.GetAll)
+		groups.GET("/usage-summary", h.Admin.Group.GetUsageSummary)
+		groups.GET("/capacity-summary", h.Admin.Group.GetCapacitySummary)
 		groups.PUT("/sort-order", h.Admin.Group.UpdateSortOrder)
 		groups.GET("/:id", h.Admin.Group.GetByID)
 		groups.POST("", h.Admin.Group.Create)
--- a/backend/internal/server/routes/gateway.go
+++ b/backend/internal/server/routes/gateway.go
@@ -30,6 +30,7 @@ func RegisterGatewayRoutes(
 	soraBodyLimit := middleware.RequestBodyLimit(soraMaxBodySize)
 	clientRequestID := middleware.ClientRequestID()
 	opsErrorLogger := handler.OpsErrorLoggerMiddleware(opsService)
+	endpointNorm := handler.InboundEndpointMiddleware()

 	// 未分组 Key 拦截中间件（按协议格式区分错误响应）
 	requireGroupAnthropic := middleware.RequireGroupAssignment(settingService, middleware.AnthropicErrorWriter)
@@ -40,6 +41,7 @@ func RegisterGatewayRoutes(
 	gateway.Use(bodyLimit)
 	gateway.Use(clientRequestID)
 	gateway.Use(opsErrorLogger)
+	gateway.Use(endpointNorm)
 	gateway.Use(gin.HandlerFunc(apiKeyAuth))
 	gateway.Use(requireGroupAnthropic)
 	{
@@ -80,6 +82,7 @@ func RegisterGatewayRoutes(
 	gemini.Use(bodyLimit)
 	gemini.Use(clientRequestID)
 	gemini.Use(opsErrorLogger)
+	gemini.Use(endpointNorm)
 	gemini.Use(middleware.APIKeyAuthWithSubscriptionGoogle(apiKeyService, subscriptionService, cfg))
 	gemini.Use(requireGroupGoogle)
 	{
@@ -90,11 +93,11 @@ func RegisterGatewayRoutes(
 	}

 	// OpenAI Responses API（不带v1前缀的别名）
-	r.POST("/responses", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
-	r.POST("/responses/*subpath", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
-	r.GET("/responses", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.ResponsesWebSocket)
+	r.POST("/responses", bodyLimit, clientRequestID, opsErrorLogger, endpointNorm, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
+	r.POST("/responses/*subpath", bodyLimit, clientRequestID, opsErrorLogger, endpointNorm, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
+	r.GET("/responses", bodyLimit, clientRequestID, opsErrorLogger, endpointNorm, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.ResponsesWebSocket)
 	// OpenAI Chat Completions API（不带v1前缀的别名）
-	r.POST("/chat/completions", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.ChatCompletions)
+	r.POST("/chat/completions", bodyLimit, clientRequestID, opsErrorLogger, endpointNorm, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.ChatCompletions)

 	// Antigravity 模型列表
 	r.GET("/antigravity/models", gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.Gateway.AntigravityModels)
@@ -104,6 +107,7 @@ func RegisterGatewayRoutes(
 	antigravityV1.Use(bodyLimit)
 	antigravityV1.Use(clientRequestID)
 	antigravityV1.Use(opsErrorLogger)
+	antigravityV1.Use(endpointNorm)
 	antigravityV1.Use(middleware.ForcePlatform(service.PlatformAntigravity))
 	antigravityV1.Use(gin.HandlerFunc(apiKeyAuth))
 	antigravityV1.Use(requireGroupAnthropic)
@@ -118,6 +122,7 @@ func RegisterGatewayRoutes(
 	antigravityV1Beta.Use(bodyLimit)
 	antigravityV1Beta.Use(clientRequestID)
 	antigravityV1Beta.Use(opsErrorLogger)
+	antigravityV1Beta.Use(endpointNorm)
 	antigravityV1Beta.Use(middleware.ForcePlatform(service.PlatformAntigravity))
 	antigravityV1Beta.Use(middleware.APIKeyAuthWithSubscriptionGoogle(apiKeyService, subscriptionService, cfg))
 	antigravityV1Beta.Use(requireGroupGoogle)
@@ -132,6 +137,7 @@ func RegisterGatewayRoutes(
 	soraV1.Use(soraBodyLimit)
 	soraV1.Use(clientRequestID)
 	soraV1.Use(opsErrorLogger)
+	soraV1.Use(endpointNorm)
 	soraV1.Use(middleware.ForcePlatform(service.PlatformSora))
 	soraV1.Use(gin.HandlerFunc(apiKeyAuth))
 	soraV1.Use(requireGroupAnthropic)
--- a/backend/internal/service/account.go
+++ b/backend/internal/service/account.go
@@ -901,6 +901,22 @@ func (a *Account) IsMixedSchedulingEnabled() bool {
 	return false
 }

+// IsOveragesEnabled 检查 Antigravity 账号是否启用 AI Credits 超量请求。
+func (a *Account) IsOveragesEnabled() bool {
+	if a.Platform != PlatformAntigravity {
+		return false
+	}
+	if a.Extra == nil {
+		return false
+	}
+	if v, ok := a.Extra["allow_overages"]; ok {
+		if enabled, ok := v.(bool); ok {
+			return enabled
+		}
+	}
+	return false
+}
+
 // IsOpenAIPassthroughEnabled 返回 OpenAI 账号是否启用“自动透传（仅替换认证）”。
 //
 // 新字段：accounts.extra.openai_passthrough。
--- a/backend/internal/service/account_test_service.go
+++ b/backend/internal/service/account_test_service.go
@@ -113,15 +113,18 @@ func (s *AccountTestService) validateUpstreamBaseURL(raw string) (string, error)
 	return normalized, nil
 }

-// generateSessionString generates a Claude Code style session string
+// generateSessionString generates a Claude Code style session string.
+// The output format is determined by the UA version in claude.DefaultHeaders,
+// ensuring consistency between the user_id format and the UA sent to upstream.
 func generateSessionString() (string, error) {
-	bytes := make([]byte, 32)
-	if _, err := rand.Read(bytes); err != nil {
+	b := make([]byte, 32)
+	if _, err := rand.Read(b); err != nil {
 		return "", err
 	}
-	hex64 := hex.EncodeToString(bytes)
+	hex64 := hex.EncodeToString(b)
 	sessionUUID := uuid.New().String()
-	return fmt.Sprintf("user_%s_account__session_%s", hex64, sessionUUID), nil
+	uaVersion := ExtractCLIVersion(claude.DefaultHeaders["User-Agent"])
+	return FormatMetadataUserID(hex64, "", sessionUUID, uaVersion), nil
 }

 // createTestPayload creates a Claude Code style test request payload
--- a/backend/internal/service/account_usage_service.go
+++ b/backend/internal/service/account_usage_service.go
@@ -48,6 +48,8 @@ type UsageLogRepository interface {
 	GetEndpointStatsWithFilters(ctx context.Context, startTime, endTime time.Time, userID, apiKeyID, accountID, groupID int64, model string, requestType *int16, stream *bool, billingType *int8) ([]usagestats.EndpointStat, error)
 	GetUpstreamEndpointStatsWithFilters(ctx context.Context, startTime, endTime time.Time, userID, apiKeyID, accountID, groupID int64, model string, requestType *int16, stream *bool, billingType *int8) ([]usagestats.EndpointStat, error)
 	GetGroupStatsWithFilters(ctx context.Context, startTime, endTime time.Time, userID, apiKeyID, accountID, groupID int64, requestType *int16, stream *bool, billingType *int8) ([]usagestats.GroupStat, error)
+	GetUserBreakdownStats(ctx context.Context, startTime, endTime time.Time, dim usagestats.UserBreakdownDimension, limit int) ([]usagestats.UserBreakdownItem, error)
+	GetAllGroupUsageSummary(ctx context.Context, todayStart time.Time) ([]usagestats.GroupUsageSummary, error)
 	GetAPIKeyUsageTrend(ctx context.Context, startTime, endTime time.Time, granularity string, limit int) ([]usagestats.APIKeyUsageTrendPoint, error)
 	GetUserUsageTrend(ctx context.Context, startTime, endTime time.Time, granularity string, limit int) ([]usagestats.UserUsageTrendPoint, error)
 	GetUserSpendingRanking(ctx context.Context, startTime, endTime time.Time, limit int) (*usagestats.UserSpendingRankingResponse, error)
@@ -166,6 +168,13 @@ type AntigravityModelDetail struct {
 	SupportedMimeTypes map[string]bool `json:"supported_mime_types,omitempty"`
 }

+// AICredit 表示 Antigravity 账号的 AI Credits 余额信息。
+type AICredit struct {
+	CreditType     string  `json:"credit_type,omitempty"`
+	Amount         float64 `json:"amount,omitempty"`
+	MinimumBalance float64 `json:"minimum_balance,omitempty"`
+}
+
 // UsageInfo 账号使用量信息
 type UsageInfo struct {
 	UpdatedAt          *time.Time     `json:"updated_at,omitempty"`           // 更新时间
@@ -189,6 +198,9 @@ type UsageInfo struct {
 	// Antigravity 模型详细能力信息（与 antigravity_quota 同 key）
 	AntigravityQuotaDetails map[string]*AntigravityModelDetail `json:"antigravity_quota_details,omitempty"`

+	// Antigravity AI Credits 余额
+	AICredits []AICredit `json:"ai_credits,omitempty"`
+
 	// Antigravity 废弃模型转发规则 (old_model_id -> new_model_id)
 	ModelForwardingRules map[string]string `json:"model_forwarding_rules,omitempty"`

@@ -436,23 +448,17 @@ func (s *AccountUsageService) getOpenAIUsage(ctx context.Context, account *Accou
 	}

 	if stats, err := s.usageLogRepo.GetAccountWindowStats(ctx, account.ID, now.Add(-5*time.Hour)); err == nil {
-		windowStats := windowStatsFromAccountStats(stats)
-		if hasMeaningfulWindowStats(windowStats) {
-			if usage.FiveHour == nil {
-				usage.FiveHour = &UsageProgress{Utilization: 0}
-			}
-			usage.FiveHour.WindowStats = windowStats
+		if usage.FiveHour == nil {
+			usage.FiveHour = &UsageProgress{Utilization: 0}
 		}
+		usage.FiveHour.WindowStats = windowStatsFromAccountStats(stats)
 	}

 	if stats, err := s.usageLogRepo.GetAccountWindowStats(ctx, account.ID, now.Add(-7*24*time.Hour)); err == nil {
-		windowStats := windowStatsFromAccountStats(stats)
-		if hasMeaningfulWindowStats(windowStats) {
-			if usage.SevenDay == nil {
-				usage.SevenDay = &UsageProgress{Utilization: 0}
-			}
-			usage.SevenDay.WindowStats = windowStats
+		if usage.SevenDay == nil {
+			usage.SevenDay = &UsageProgress{Utilization: 0}
 		}
+		usage.SevenDay.WindowStats = windowStatsFromAccountStats(stats)
 	}

 	return usage, nil
@@ -982,13 +988,6 @@ func windowStatsFromAccountStats(stats *usagestats.AccountStats) *WindowStats {
 	}
 }

-func hasMeaningfulWindowStats(stats *WindowStats) bool {
-	if stats == nil {
-		return false
-	}
-	return stats.Requests > 0 || stats.Tokens > 0 || stats.Cost > 0 || stats.StandardCost > 0 || stats.UserCost > 0
-}
-
 func buildCodexUsageProgressFromExtra(extra map[string]any, window string, now time.Time) *UsageProgress {
 	if len(extra) == 0 {
 		return nil
@@ -1045,6 +1044,11 @@ func buildCodexUsageProgressFromExtra(extra map[string]any, window string, now t
 		}
 	}

+	// 窗口已过期（resetAt 在 now 之前）→ 额度已重置，归零
+	if progress.ResetsAt != nil && !now.Before(*progress.ResetsAt) {
+		progress.Utilization = 0
+	}
+
 	return progress
 }

--- a/backend/internal/service/account_usage_service_test.go
+++ b/backend/internal/service/account_usage_service_test.go
@@ -148,3 +148,54 @@ func TestAccountUsageService_PersistOpenAICodexProbeSnapshotSetsRateLimit(t *tes
 		t.Fatal("waiting for codex probe rate limit persistence timed out")
 	}
 }
+
+func TestBuildCodexUsageProgressFromExtra_ZerosExpiredWindow(t *testing.T) {
+	t.Parallel()
+	now := time.Date(2026, 3, 16, 12, 0, 0, 0, time.UTC)
+
+	t.Run("expired 5h window zeroes utilization", func(t *testing.T) {
+		extra := map[string]any{
+			"codex_5h_used_percent": 42.0,
+			"codex_5h_reset_at":     "2026-03-16T10:00:00Z", // 2h ago
+		}
+		progress := buildCodexUsageProgressFromExtra(extra, "5h", now)
+		if progress == nil {
+			t.Fatal("expected non-nil progress")
+		}
+		if progress.Utilization != 0 {
+			t.Fatalf("expected Utilization=0 for expired window, got %v", progress.Utilization)
+		}
+		if progress.RemainingSeconds != 0 {
+			t.Fatalf("expected RemainingSeconds=0, got %v", progress.RemainingSeconds)
+		}
+	})
+
+	t.Run("active 5h window keeps utilization", func(t *testing.T) {
+		resetAt := now.Add(2 * time.Hour).Format(time.RFC3339)
+		extra := map[string]any{
+			"codex_5h_used_percent": 42.0,
+			"codex_5h_reset_at":     resetAt,
+		}
+		progress := buildCodexUsageProgressFromExtra(extra, "5h", now)
+		if progress == nil {
+			t.Fatal("expected non-nil progress")
+		}
+		if progress.Utilization != 42.0 {
+			t.Fatalf("expected Utilization=42, got %v", progress.Utilization)
+		}
+	})
+
+	t.Run("expired 7d window zeroes utilization", func(t *testing.T) {
+		extra := map[string]any{
+			"codex_7d_used_percent": 88.0,
+			"codex_7d_reset_at":     "2026-03-15T00:00:00Z", // yesterday
+		}
+		progress := buildCodexUsageProgressFromExtra(extra, "7d", now)
+		if progress == nil {
+			t.Fatal("expected non-nil progress")
+		}
+		if progress.Utilization != 0 {
+			t.Fatalf("expected Utilization=0 for expired 7d window, got %v", progress.Utilization)
+		}
+	})
+}
--- a/backend/internal/service/admin_service.go
+++ b/backend/internal/service/admin_service.go
@@ -368,6 +368,10 @@ type ProxyExitInfoProber interface {
 	ProbeProxy(ctx context.Context, proxyURL string) (*ProxyExitInfo, int64, error)
 }

+type groupExistenceBatchReader interface {
+	ExistsByIDs(ctx context.Context, ids []int64) (map[int64]bool, error)
+}
+
 type proxyQualityTarget struct {
 	Target          string
 	URL             string
@@ -445,10 +449,6 @@ type userGroupRateBatchReader interface {
 	GetByUserIDs(ctx context.Context, userIDs []int64) (map[int64]map[int64]float64, error)
 }

-type groupExistenceBatchReader interface {
-	ExistsByIDs(ctx context.Context, ids []int64) (map[int64]bool, error)
-}
-
 // NewAdminService creates a new AdminService
 func NewAdminService(
 	userRepo UserRepository,
@@ -1516,6 +1516,7 @@ func (s *adminServiceImpl) UpdateAccount(ctx context.Context, id int64, input *U
 	if err != nil {
 		return nil, err
 	}
+	wasOveragesEnabled := account.IsOveragesEnabled()

 	if input.Name != "" {
 		account.Name = input.Name
@@ -1529,7 +1530,9 @@ func (s *adminServiceImpl) UpdateAccount(ctx context.Context, id int64, input *U
 	if len(input.Credentials) > 0 {
 		account.Credentials = input.Credentials
 	}
-	if len(input.Extra) > 0 {
+	// Extra 使用 map：需要区分“未提供(nil)”与“显式清空({})”。
+	// 关闭配额限制时前端会删除 quota_* 键并提交 extra:{}，此时也必须落库。
+	if input.Extra != nil {
 		// 保留配额用量字段，防止编辑账号时意外重置
 		for _, key := range []string{"quota_used", "quota_daily_used", "quota_daily_start", "quota_weekly_used", "quota_weekly_start"} {
 			if v, ok := account.Extra[key]; ok {
@@ -1537,6 +1540,17 @@ func (s *adminServiceImpl) UpdateAccount(ctx context.Context, id int64, input *U
 			}
 		}
 		account.Extra = input.Extra
+		if account.Platform == PlatformAntigravity && wasOveragesEnabled && !account.IsOveragesEnabled() {
+			delete(account.Extra, "antigravity_credits_overages") // 清理旧版 overages 运行态
+			// 清除 AICredits 限流 key
+			if rawLimits, ok := account.Extra[modelRateLimitsKey].(map[string]any); ok {
+				delete(rawLimits, creditsExhaustedKey)
+			}
+		}
+		if account.Platform == PlatformAntigravity && !wasOveragesEnabled && account.IsOveragesEnabled() {
+			delete(account.Extra, modelRateLimitsKey)
+			delete(account.Extra, "antigravity_credits_overages") // 清理旧版 overages 运行态
+		}
 		// 校验并预计算固定时间重置的下次重置时间
 		if err := ValidateQuotaResetConfig(account.Extra); err != nil {
 			return nil, err
--- a/backend/internal/service/admin_service_apikey_test.go
+++ b/backend/internal/service/admin_service_apikey_test.go
@@ -194,7 +194,7 @@ func (s *groupRepoStubForGroupUpdate) ListActiveByPlatform(context.Context, stri
 func (s *groupRepoStubForGroupUpdate) ExistsByName(context.Context, string) (bool, error) {
 	panic("unexpected")
 }
-func (s *groupRepoStubForGroupUpdate) GetAccountCount(context.Context, int64) (int64, error) {
+func (s *groupRepoStubForGroupUpdate) GetAccountCount(context.Context, int64) (int64, int64, error) {
 	panic("unexpected")
 }
 func (s *groupRepoStubForGroupUpdate) DeleteAccountGroupsByGroupID(context.Context, int64) (int64, error) {
--- a/backend/internal/service/admin_service_delete_test.go
+++ b/backend/internal/service/admin_service_delete_test.go
@@ -160,7 +160,7 @@ func (s *groupRepoStub) ExistsByName(ctx context.Context, name string) (bool, er
 	panic("unexpected ExistsByName call")
 }

-func (s *groupRepoStub) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
+func (s *groupRepoStub) GetAccountCount(ctx context.Context, groupID int64) (int64, int64, error) {
 	panic("unexpected GetAccountCount call")
 }

--- a/backend/internal/service/admin_service_group_test.go
+++ b/backend/internal/service/admin_service_group_test.go
@@ -100,7 +100,7 @@ func (s *groupRepoStubForAdmin) ExistsByName(_ context.Context, _ string) (bool,
 	panic("unexpected ExistsByName call")
 }

-func (s *groupRepoStubForAdmin) GetAccountCount(_ context.Context, _ int64) (int64, error) {
+func (s *groupRepoStubForAdmin) GetAccountCount(_ context.Context, _ int64) (int64, int64, error) {
 	panic("unexpected GetAccountCount call")
 }

@@ -383,7 +383,7 @@ func (s *groupRepoStubForFallbackCycle) ExistsByName(_ context.Context, _ string
 	panic("unexpected ExistsByName call")
 }

-func (s *groupRepoStubForFallbackCycle) GetAccountCount(_ context.Context, _ int64) (int64, error) {
+func (s *groupRepoStubForFallbackCycle) GetAccountCount(_ context.Context, _ int64) (int64, int64, error) {
 	panic("unexpected GetAccountCount call")
 }

@@ -458,7 +458,7 @@ func (s *groupRepoStubForInvalidRequestFallback) ExistsByName(_ context.Context,
 	panic("unexpected ExistsByName call")
 }

-func (s *groupRepoStubForInvalidRequestFallback) GetAccountCount(_ context.Context, _ int64) (int64, error) {
+func (s *groupRepoStubForInvalidRequestFallback) GetAccountCount(_ context.Context, _ int64) (int64, int64, error) {
 	panic("unexpected GetAccountCount call")
 }

--- a/backend/internal/service/admin_service_overages_test.go
+++ b/backend/internal/service/admin_service_overages_test.go
@@ -0,0 +1,155 @@
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+type updateAccountOveragesRepoStub struct {
+	mockAccountRepoForGemini
+	account     *Account
+	updateCalls int
+}
+
+func (r *updateAccountOveragesRepoStub) GetByID(ctx context.Context, id int64) (*Account, error) {
+	return r.account, nil
+}
+
+func (r *updateAccountOveragesRepoStub) Update(ctx context.Context, account *Account) error {
+	r.updateCalls++
+	r.account = account
+	return nil
+}
+
+func TestUpdateAccount_DisableOveragesClearsAICreditsKey(t *testing.T) {
+	accountID := int64(101)
+	repo := &updateAccountOveragesRepoStub{
+		account: &Account{
+			ID:       accountID,
+			Platform: PlatformAntigravity,
+			Type:     AccountTypeOAuth,
+			Status:   StatusActive,
+			Extra: map[string]any{
+				"allow_overages":   true,
+				"mixed_scheduling": true,
+				modelRateLimitsKey: map[string]any{
+					"claude-sonnet-4-5": map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": "2099-03-15T00:00:00Z",
+					},
+					creditsExhaustedKey: map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": time.Now().Add(5 * time.Hour).UTC().Format(time.RFC3339),
+					},
+				},
+			},
+		},
+	}
+
+	svc := &adminServiceImpl{accountRepo: repo}
+	updated, err := svc.UpdateAccount(context.Background(), accountID, &UpdateAccountInput{
+		Extra: map[string]any{
+			"mixed_scheduling": true,
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limited_at":     "2026-03-15T00:00:00Z",
+					"rate_limit_reset_at": "2099-03-15T00:00:00Z",
+				},
+				creditsExhaustedKey: map[string]any{
+					"rate_limited_at":     "2026-03-15T00:00:00Z",
+					"rate_limit_reset_at": time.Now().Add(5 * time.Hour).UTC().Format(time.RFC3339),
+				},
+			},
+		},
+	})
+
+	require.NoError(t, err)
+	require.NotNil(t, updated)
+	require.Equal(t, 1, repo.updateCalls)
+	require.False(t, updated.IsOveragesEnabled())
+
+	// 关闭 overages 后，AICredits key 应被清除
+	rawLimits, ok := repo.account.Extra[modelRateLimitsKey].(map[string]any)
+	if ok {
+		_, exists := rawLimits[creditsExhaustedKey]
+		require.False(t, exists, "关闭 overages 时应清除 AICredits 限流 key")
+	}
+	// 普通模型限流应保留
+	require.True(t, ok)
+	_, exists := rawLimits["claude-sonnet-4-5"]
+	require.True(t, exists, "普通模型限流应保留")
+}
+
+func TestUpdateAccount_EnableOveragesClearsModelRateLimitsBeforePersist(t *testing.T) {
+	accountID := int64(102)
+	repo := &updateAccountOveragesRepoStub{
+		account: &Account{
+			ID:       accountID,
+			Platform: PlatformAntigravity,
+			Type:     AccountTypeOAuth,
+			Status:   StatusActive,
+			Extra: map[string]any{
+				"mixed_scheduling": true,
+				modelRateLimitsKey: map[string]any{
+					"claude-sonnet-4-5": map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": "2099-03-15T00:00:00Z",
+					},
+				},
+			},
+		},
+	}
+
+	svc := &adminServiceImpl{accountRepo: repo}
+	updated, err := svc.UpdateAccount(context.Background(), accountID, &UpdateAccountInput{
+		Extra: map[string]any{
+			"mixed_scheduling": true,
+			"allow_overages":   true,
+		},
+	})
+
+	require.NoError(t, err)
+	require.NotNil(t, updated)
+	require.Equal(t, 1, repo.updateCalls)
+	require.True(t, updated.IsOveragesEnabled())
+
+	_, exists := repo.account.Extra[modelRateLimitsKey]
+	require.False(t, exists, "开启 overages 时应在持久化前清掉旧模型限流")
+}
+
+func TestUpdateAccount_EmptyExtraPayloadCanClearQuotaLimits(t *testing.T) {
+	accountID := int64(103)
+	repo := &updateAccountOveragesRepoStub{
+		account: &Account{
+			ID:       accountID,
+			Platform: PlatformAnthropic,
+			Type:     AccountTypeAPIKey,
+			Status:   StatusActive,
+			Extra: map[string]any{
+				"quota_limit":        100.0,
+				"quota_daily_limit":  10.0,
+				"quota_weekly_limit": 40.0,
+			},
+		},
+	}
+
+	svc := &adminServiceImpl{accountRepo: repo}
+	updated, err := svc.UpdateAccount(context.Background(), accountID, &UpdateAccountInput{
+		// 显式空对象：语义是“清空 extra 中的可配置键”（例如关闭配额限制）
+		Extra: map[string]any{},
+	})
+
+	require.NoError(t, err)
+	require.NotNil(t, updated)
+	require.Equal(t, 1, repo.updateCalls)
+	require.NotNil(t, repo.account.Extra)
+	require.NotContains(t, repo.account.Extra, "quota_limit")
+	require.NotContains(t, repo.account.Extra, "quota_daily_limit")
+	require.NotContains(t, repo.account.Extra, "quota_weekly_limit")
+	require.Len(t, repo.account.Extra, 0)
+}
--- a/backend/internal/service/antigravity_credits_overages.go
+++ b/backend/internal/service/antigravity_credits_overages.go
@@ -0,0 +1,234 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+)
+
+const (
+	// creditsExhaustedKey 是 model_rate_limits 中标记积分耗尽的特殊 key。
+	// 与普通模型限流完全同构：通过 SetModelRateLimit / isRateLimitActiveForKey 读写。
+	creditsExhaustedKey      = "AICredits"
+	creditsExhaustedDuration = 5 * time.Hour
+)
+
+type antigravity429Category string
+
+const (
+	antigravity429Unknown        antigravity429Category = "unknown"
+	antigravity429RateLimited    antigravity429Category = "rate_limited"
+	antigravity429QuotaExhausted antigravity429Category = "quota_exhausted"
+)
+
+var (
+	antigravityQuotaExhaustedKeywords = []string{
+		"quota_exhausted",
+		"quota exhausted",
+	}
+
+	creditsExhaustedKeywords = []string{
+		"google_one_ai",
+		"insufficient credit",
+		"insufficient credits",
+		"not enough credit",
+		"not enough credits",
+		"credit exhausted",
+		"credits exhausted",
+		"credit balance",
+		"minimumcreditamountforusage",
+		"minimum credit amount for usage",
+		"minimum credit",
+	}
+)
+
+// isCreditsExhausted 检查账号的 AICredits 限流 key 是否生效（积分是否耗尽）。
+func (a *Account) isCreditsExhausted() bool {
+	if a == nil {
+		return false
+	}
+	return a.isRateLimitActiveForKey(creditsExhaustedKey)
+}
+
+// setCreditsExhausted 标记账号积分耗尽：写入 model_rate_limits["AICredits"] + 更新缓存。
+func (s *AntigravityGatewayService) setCreditsExhausted(ctx context.Context, account *Account) {
+	if account == nil || account.ID == 0 {
+		return
+	}
+	resetAt := time.Now().Add(creditsExhaustedDuration)
+	if err := s.accountRepo.SetModelRateLimit(ctx, account.ID, creditsExhaustedKey, resetAt); err != nil {
+		logger.LegacyPrintf("service.antigravity_gateway", "set credits exhausted failed: account=%d err=%v", account.ID, err)
+		return
+	}
+	s.updateAccountModelRateLimitInCache(ctx, account, creditsExhaustedKey, resetAt)
+	logger.LegacyPrintf("service.antigravity_gateway", "credits_exhausted_marked account=%d reset_at=%s",
+		account.ID, resetAt.UTC().Format(time.RFC3339))
+}
+
+// clearCreditsExhausted 清除账号的 AICredits 限流 key。
+func (s *AntigravityGatewayService) clearCreditsExhausted(ctx context.Context, account *Account) {
+	if account == nil || account.ID == 0 || account.Extra == nil {
+		return
+	}
+	rawLimits, ok := account.Extra[modelRateLimitsKey].(map[string]any)
+	if !ok {
+		return
+	}
+	if _, exists := rawLimits[creditsExhaustedKey]; !exists {
+		return
+	}
+	delete(rawLimits, creditsExhaustedKey)
+	account.Extra[modelRateLimitsKey] = rawLimits
+	if err := s.accountRepo.UpdateExtra(ctx, account.ID, map[string]any{
+		modelRateLimitsKey: rawLimits,
+	}); err != nil {
+		logger.LegacyPrintf("service.antigravity_gateway", "clear credits exhausted failed: account=%d err=%v", account.ID, err)
+	}
+}
+
+// classifyAntigravity429 将 Antigravity 的 429 响应归类为配额耗尽、限流或未知。
+func classifyAntigravity429(body []byte) antigravity429Category {
+	if len(body) == 0 {
+		return antigravity429Unknown
+	}
+	lowerBody := strings.ToLower(string(body))
+	for _, keyword := range antigravityQuotaExhaustedKeywords {
+		if strings.Contains(lowerBody, keyword) {
+			return antigravity429QuotaExhausted
+		}
+	}
+	if info := parseAntigravitySmartRetryInfo(body); info != nil && !info.IsModelCapacityExhausted {
+		return antigravity429RateLimited
+	}
+	return antigravity429Unknown
+}
+
+// injectEnabledCreditTypes 在已序列化的 v1internal JSON body 中注入 AI Credits 类型。
+func injectEnabledCreditTypes(body []byte) []byte {
+	var payload map[string]any
+	if err := json.Unmarshal(body, &payload); err != nil {
+		return nil
+	}
+	payload["enabledCreditTypes"] = []string{"GOOGLE_ONE_AI"}
+	result, err := json.Marshal(payload)
+	if err != nil {
+		return nil
+	}
+	return result
+}
+
+// resolveCreditsOveragesModelKey 解析当前请求对应的 overages 状态模型 key。
+func resolveCreditsOveragesModelKey(ctx context.Context, account *Account, upstreamModelName, requestedModel string) string {
+	modelKey := strings.TrimSpace(upstreamModelName)
+	if modelKey != "" {
+		return modelKey
+	}
+	if account == nil {
+		return ""
+	}
+	modelKey = resolveFinalAntigravityModelKey(ctx, account, requestedModel)
+	if strings.TrimSpace(modelKey) != "" {
+		return modelKey
+	}
+	return resolveAntigravityModelKey(requestedModel)
+}
+
+// shouldMarkCreditsExhausted 判断一次 credits 请求失败是否应标记为 credits 耗尽。
+func shouldMarkCreditsExhausted(resp *http.Response, respBody []byte, reqErr error) bool {
+	if reqErr != nil || resp == nil {
+		return false
+	}
+	if resp.StatusCode >= 500 || resp.StatusCode == http.StatusRequestTimeout {
+		return false
+	}
+	if isURLLevelRateLimit(respBody) {
+		return false
+	}
+	if info := parseAntigravitySmartRetryInfo(respBody); info != nil {
+		return false
+	}
+	bodyLower := strings.ToLower(string(respBody))
+	for _, keyword := range creditsExhaustedKeywords {
+		if strings.Contains(bodyLower, keyword) {
+			return true
+		}
+	}
+	return false
+}
+
+type creditsOveragesRetryResult struct {
+	handled bool
+	resp    *http.Response
+}
+
+// attemptCreditsOveragesRetry 在确认免费配额耗尽后，尝试注入 AI Credits 继续请求。
+func (s *AntigravityGatewayService) attemptCreditsOveragesRetry(
+	p antigravityRetryLoopParams,
+	baseURL string,
+	modelName string,
+	waitDuration time.Duration,
+	originalStatusCode int,
+	respBody []byte,
+) *creditsOveragesRetryResult {
+	creditsBody := injectEnabledCreditTypes(p.body)
+	if creditsBody == nil {
+		return &creditsOveragesRetryResult{handled: false}
+	}
+	modelKey := resolveCreditsOveragesModelKey(p.ctx, p.account, modelName, p.requestedModel)
+	logger.LegacyPrintf("service.antigravity_gateway", "%s status=429 credit_overages_retry model=%s account=%d (injecting enabledCreditTypes)",
+		p.prefix, modelKey, p.account.ID)
+
+	creditsReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, creditsBody)
+	if err != nil {
+		logger.LegacyPrintf("service.antigravity_gateway", "%s credit_overages_failed model=%s account=%d build_request_err=%v",
+			p.prefix, modelKey, p.account.ID, err)
+		return &creditsOveragesRetryResult{handled: true}
+	}
+
+	creditsResp, err := p.httpUpstream.Do(creditsReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+	if err == nil && creditsResp != nil && creditsResp.StatusCode < 400 {
+		s.clearCreditsExhausted(p.ctx, p.account)
+		logger.LegacyPrintf("service.antigravity_gateway", "%s status=%d credit_overages_success model=%s account=%d",
+			p.prefix, creditsResp.StatusCode, modelKey, p.account.ID)
+		return &creditsOveragesRetryResult{handled: true, resp: creditsResp}
+	}
+
+	s.handleCreditsRetryFailure(p.ctx, p.prefix, modelKey, p.account, creditsResp, err)
+	return &creditsOveragesRetryResult{handled: true}
+}
+
+func (s *AntigravityGatewayService) handleCreditsRetryFailure(
+	ctx context.Context,
+	prefix string,
+	modelKey string,
+	account *Account,
+	creditsResp *http.Response,
+	reqErr error,
+) {
+	var creditsRespBody []byte
+	creditsStatusCode := 0
+	if creditsResp != nil {
+		creditsStatusCode = creditsResp.StatusCode
+		if creditsResp.Body != nil {
+			creditsRespBody, _ = io.ReadAll(io.LimitReader(creditsResp.Body, 64<<10))
+			_ = creditsResp.Body.Close()
+		}
+	}
+
+	if shouldMarkCreditsExhausted(creditsResp, creditsRespBody, reqErr) && account != nil {
+		s.setCreditsExhausted(ctx, account)
+		logger.LegacyPrintf("service.antigravity_gateway", "%s credit_overages_failed model=%s account=%d marked_exhausted=true status=%d body=%s",
+			prefix, modelKey, account.ID, creditsStatusCode, truncateForLog(creditsRespBody, 200))
+		return
+	}
+	if account != nil {
+		logger.LegacyPrintf("service.antigravity_gateway", "%s credit_overages_failed model=%s account=%d marked_exhausted=false status=%d err=%v body=%s",
+			prefix, modelKey, account.ID, creditsStatusCode, reqErr, truncateForLog(creditsRespBody, 200))
+	}
+}
--- a/backend/internal/service/antigravity_credits_overages_test.go
+++ b/backend/internal/service/antigravity_credits_overages_test.go
@@ -0,0 +1,538 @@
+//go:build unit
+
+package service
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/stretchr/testify/require"
+)
+
+func TestClassifyAntigravity429(t *testing.T) {
+	t.Run("明确配额耗尽", func(t *testing.T) {
+		body := []byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)
+		require.Equal(t, antigravity429QuotaExhausted, classifyAntigravity429(body))
+	})
+
+	t.Run("结构化限流", func(t *testing.T) {
+		body := []byte(`{
+			"error": {
+				"status": "RESOURCE_EXHAUSTED",
+				"details": [
+					{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+					{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
+				]
+			}
+		}`)
+		require.Equal(t, antigravity429RateLimited, classifyAntigravity429(body))
+	})
+
+	t.Run("未知429", func(t *testing.T) {
+		body := []byte(`{"error":{"message":"too many requests"}}`)
+		require.Equal(t, antigravity429Unknown, classifyAntigravity429(body))
+	})
+}
+
+func TestIsCreditsExhausted_UsesAICreditsKey(t *testing.T) {
+	t.Run("无 AICredits key 则积分可用", func(t *testing.T) {
+		account := &Account{
+			ID:       1,
+			Platform: PlatformAntigravity,
+			Extra: map[string]any{
+				"allow_overages": true,
+			},
+		}
+		require.False(t, account.isCreditsExhausted())
+	})
+
+	t.Run("AICredits key 生效则积分耗尽", func(t *testing.T) {
+		account := &Account{
+			ID:       2,
+			Platform: PlatformAntigravity,
+			Extra: map[string]any{
+				"allow_overages": true,
+				modelRateLimitsKey: map[string]any{
+					creditsExhaustedKey: map[string]any{
+						"rate_limited_at":     time.Now().UTC().Format(time.RFC3339),
+						"rate_limit_reset_at": time.Now().Add(5 * time.Hour).UTC().Format(time.RFC3339),
+					},
+				},
+			},
+		}
+		require.True(t, account.isCreditsExhausted())
+	})
+
+	t.Run("AICredits key 过期则积分可用", func(t *testing.T) {
+		account := &Account{
+			ID:       3,
+			Platform: PlatformAntigravity,
+			Extra: map[string]any{
+				"allow_overages": true,
+				modelRateLimitsKey: map[string]any{
+					creditsExhaustedKey: map[string]any{
+						"rate_limited_at":     time.Now().Add(-6 * time.Hour).UTC().Format(time.RFC3339),
+						"rate_limit_reset_at": time.Now().Add(-1 * time.Hour).UTC().Format(time.RFC3339),
+					},
+				},
+			},
+		}
+		require.False(t, account.isCreditsExhausted())
+	})
+}
+
+func TestHandleSmartRetry_QuotaExhausted_UsesCreditsAndStoresIndependentState(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"ok":true}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       101,
+		Name:     "acc-101",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+		Extra: map[string]any{
+			"allow_overages": true,
+		},
+		Credentials: map[string]any{
+			"model_mapping": map[string]any{
+				"claude-opus-4-6": "claude-sonnet-4-5",
+			},
+		},
+	}
+
+	respBody := []byte(`{"error":{"status":"RESOURCE_EXHAUSTED","message":"QUOTA_EXHAUSTED"}}`)
+	resp := &http.Response{
+		StatusCode: http.StatusTooManyRequests,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+	params := antigravityRetryLoopParams{
+		ctx:            context.Background(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"model":"claude-opus-4-6","request":{}}`),
+		httpUpstream:   upstream,
+		accountRepo:    repo,
+		requestedModel: "claude-opus-4-6",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"})
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp)
+	require.Nil(t, result.switchError)
+	require.Len(t, upstream.requestBodies, 1)
+	require.Contains(t, string(upstream.requestBodies[0]), "enabledCreditTypes")
+	require.Empty(t, repo.modelRateLimitCalls, "overages 成功后不应写入普通 model_rate_limits")
+}
+
+func TestHandleSmartRetry_RateLimited_DoesNotUseCredits(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"ok":true}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       102,
+		Name:     "acc-102",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+		Extra: map[string]any{
+			"allow_overages": true,
+		},
+	}
+
+	respBody := []byte(`{
+		"error": {
+			"status": "RESOURCE_EXHAUSTED",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusTooManyRequests,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+	params := antigravityRetryLoopParams{
+		ctx:          context.Background(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"model":"claude-sonnet-4-5","request":{}}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"})
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp)
+	require.Len(t, upstream.requestBodies, 1)
+	require.NotContains(t, string(upstream.requestBodies[0]), "enabledCreditTypes")
+	require.Empty(t, repo.extraUpdateCalls)
+	require.Empty(t, repo.modelRateLimitCalls)
+}
+
+func TestAntigravityRetryLoop_ModelRateLimited_InjectsCredits(t *testing.T) {
+	oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
+	oldAvailability := antigravity.DefaultURLAvailability
+	defer func() {
+		antigravity.BaseURLs = oldBaseURLs
+		antigravity.DefaultURLAvailability = oldAvailability
+	}()
+
+	antigravity.BaseURLs = []string{"https://ag-1.test"}
+	antigravity.DefaultURLAvailability = antigravity.NewURLAvailability(time.Minute)
+
+	upstream := &queuedHTTPUpstreamStub{
+		responses: []*http.Response{
+			{
+				StatusCode: http.StatusOK,
+				Header:     http.Header{},
+				Body:       io.NopCloser(strings.NewReader(`{"ok":true}`)),
+			},
+		},
+		errors: []error{nil},
+	}
+	// 模型已限流 + overages 启用 + 无 AICredits key → 应直接注入积分
+	account := &Account{
+		ID:          103,
+		Name:        "acc-103",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Status:      StatusActive,
+		Schedulable: true,
+		Extra: map[string]any{
+			"allow_overages": true,
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limited_at":     time.Now().UTC().Format(time.RFC3339),
+					"rate_limit_reset_at": time.Now().Add(30 * time.Minute).UTC().Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            context.Background(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"model":"claude-sonnet-4-5","request":{}}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.Len(t, upstream.requestBodies, 1)
+	require.Contains(t, string(upstream.requestBodies[0]), "enabledCreditTypes")
+}
+
+func TestAntigravityRetryLoop_CreditsExhausted_DoesNotInject(t *testing.T) {
+	oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
+	oldAvailability := antigravity.DefaultURLAvailability
+	defer func() {
+		antigravity.BaseURLs = oldBaseURLs
+		antigravity.DefaultURLAvailability = oldAvailability
+	}()
+
+	antigravity.BaseURLs = []string{"https://ag-1.test"}
+	antigravity.DefaultURLAvailability = antigravity.NewURLAvailability(time.Minute)
+
+	// 模型限流 + overages 启用 + AICredits key 生效 → 不应注入积分，应切号
+	account := &Account{
+		ID:          104,
+		Name:        "acc-104",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Status:      StatusActive,
+		Schedulable: true,
+		Extra: map[string]any{
+			"allow_overages": true,
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limited_at":     time.Now().UTC().Format(time.RFC3339),
+					"rate_limit_reset_at": time.Now().Add(30 * time.Minute).UTC().Format(time.RFC3339),
+				},
+				creditsExhaustedKey: map[string]any{
+					"rate_limited_at":     time.Now().UTC().Format(time.RFC3339),
+					"rate_limit_reset_at": time.Now().Add(5 * time.Hour).UTC().Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	_, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            context.Background(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"model":"claude-sonnet-4-5","request":{}}`),
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	// 模型限流 + 积分耗尽 → 应触发切号错误
+	require.Error(t, err)
+	var switchErr *AntigravityAccountSwitchError
+	require.ErrorAs(t, err, &switchErr)
+}
+
+func TestAntigravityRetryLoop_CreditErrorMarksExhausted(t *testing.T) {
+	oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
+	oldAvailability := antigravity.DefaultURLAvailability
+	defer func() {
+		antigravity.BaseURLs = oldBaseURLs
+		antigravity.DefaultURLAvailability = oldAvailability
+	}()
+
+	antigravity.BaseURLs = []string{"https://ag-1.test"}
+	antigravity.DefaultURLAvailability = antigravity.NewURLAvailability(time.Minute)
+
+	repo := &stubAntigravityAccountRepo{}
+	upstream := &queuedHTTPUpstreamStub{
+		responses: []*http.Response{
+			{
+				StatusCode: http.StatusForbidden,
+				Header:     http.Header{},
+				Body:       io.NopCloser(strings.NewReader(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`)),
+			},
+		},
+		errors: []error{nil},
+	}
+	// 模型限流 + overages 启用 + 积分可用 → 注入积分但上游返回积分不足
+	account := &Account{
+		ID:          105,
+		Name:        "acc-105",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Status:      StatusActive,
+		Schedulable: true,
+		Extra: map[string]any{
+			"allow_overages": true,
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limited_at":     time.Now().UTC().Format(time.RFC3339),
+					"rate_limit_reset_at": time.Now().Add(30 * time.Minute).UTC().Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{accountRepo: repo}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            context.Background(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"model":"claude-sonnet-4-5","request":{}}`),
+		httpUpstream:   upstream,
+		accountRepo:    repo,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	// 验证 AICredits key 已通过 SetModelRateLimit 写入数据库
+	require.Len(t, repo.modelRateLimitCalls, 1, "应通过 SetModelRateLimit 写入 AICredits key")
+	require.Equal(t, creditsExhaustedKey, repo.modelRateLimitCalls[0].modelKey)
+}
+
+func TestShouldMarkCreditsExhausted(t *testing.T) {
+	t.Run("reqErr 不为 nil 时不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusForbidden}
+		require.False(t, shouldMarkCreditsExhausted(resp, []byte(`{"error":"Insufficient credits"}`), io.ErrUnexpectedEOF))
+	})
+
+	t.Run("resp 为 nil 时不标记", func(t *testing.T) {
+		require.False(t, shouldMarkCreditsExhausted(nil, []byte(`{"error":"Insufficient credits"}`), nil))
+	})
+
+	t.Run("5xx 响应不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusInternalServerError}
+		require.False(t, shouldMarkCreditsExhausted(resp, []byte(`{"error":"Insufficient credits"}`), nil))
+	})
+
+	t.Run("408 RequestTimeout 不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusRequestTimeout}
+		require.False(t, shouldMarkCreditsExhausted(resp, []byte(`{"error":"Insufficient credits"}`), nil))
+	})
+
+	t.Run("URL 级限流不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusTooManyRequests}
+		body := []byte(`{"error":{"message":"Resource has been exhausted"}}`)
+		require.False(t, shouldMarkCreditsExhausted(resp, body, nil))
+	})
+
+	t.Run("结构化限流不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusTooManyRequests}
+		body := []byte(`{"error":{"status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","reason":"RATE_LIMIT_EXCEEDED"},{"@type":"type.googleapis.com/google.rpc.RetryInfo","retryDelay":"0.5s"}]}}`)
+		require.False(t, shouldMarkCreditsExhausted(resp, body, nil))
+	})
+
+	t.Run("含 credits 关键词时标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusForbidden}
+		for _, keyword := range []string{
+			"Insufficient GOOGLE_ONE_AI credits",
+			"insufficient credit balance",
+			"not enough credits for this request",
+			"Credits exhausted",
+			"minimumCreditAmountForUsage requirement not met",
+		} {
+			body := []byte(`{"error":{"message":"` + keyword + `"}}`)
+			require.True(t, shouldMarkCreditsExhausted(resp, body, nil), "should mark for keyword: %s", keyword)
+		}
+	})
+
+	t.Run("无 credits 关键词时不标记", func(t *testing.T) {
+		resp := &http.Response{StatusCode: http.StatusForbidden}
+		body := []byte(`{"error":{"message":"permission denied"}}`)
+		require.False(t, shouldMarkCreditsExhausted(resp, body, nil))
+	})
+}
+
+func TestInjectEnabledCreditTypes(t *testing.T) {
+	t.Run("正常 JSON 注入成功", func(t *testing.T) {
+		body := []byte(`{"model":"claude-sonnet-4-5","request":{}}`)
+		result := injectEnabledCreditTypes(body)
+		require.NotNil(t, result)
+		require.Contains(t, string(result), `"enabledCreditTypes"`)
+		require.Contains(t, string(result), `GOOGLE_ONE_AI`)
+	})
+
+	t.Run("非法 JSON 返回 nil", func(t *testing.T) {
+		require.Nil(t, injectEnabledCreditTypes([]byte(`not json`)))
+	})
+
+	t.Run("空 body 返回 nil", func(t *testing.T) {
+		require.Nil(t, injectEnabledCreditTypes([]byte{}))
+	})
+
+	t.Run("已有 enabledCreditTypes 会被覆盖", func(t *testing.T) {
+		body := []byte(`{"enabledCreditTypes":["OLD"],"model":"test"}`)
+		result := injectEnabledCreditTypes(body)
+		require.NotNil(t, result)
+		require.Contains(t, string(result), `GOOGLE_ONE_AI`)
+		require.NotContains(t, string(result), `OLD`)
+	})
+}
+
+func TestClearCreditsExhausted(t *testing.T) {
+	t.Run("account 为 nil 不操作", func(t *testing.T) {
+		repo := &stubAntigravityAccountRepo{}
+		svc := &AntigravityGatewayService{accountRepo: repo}
+		svc.clearCreditsExhausted(context.Background(), nil)
+		require.Empty(t, repo.extraUpdateCalls)
+	})
+
+	t.Run("Extra 为 nil 不操作", func(t *testing.T) {
+		repo := &stubAntigravityAccountRepo{}
+		svc := &AntigravityGatewayService{accountRepo: repo}
+		svc.clearCreditsExhausted(context.Background(), &Account{ID: 1})
+		require.Empty(t, repo.extraUpdateCalls)
+	})
+
+	t.Run("无 modelRateLimitsKey 不操作", func(t *testing.T) {
+		repo := &stubAntigravityAccountRepo{}
+		svc := &AntigravityGatewayService{accountRepo: repo}
+		svc.clearCreditsExhausted(context.Background(), &Account{
+			ID:    1,
+			Extra: map[string]any{"some_key": "value"},
+		})
+		require.Empty(t, repo.extraUpdateCalls)
+	})
+
+	t.Run("无 AICredits key 不操作", func(t *testing.T) {
+		repo := &stubAntigravityAccountRepo{}
+		svc := &AntigravityGatewayService{accountRepo: repo}
+		svc.clearCreditsExhausted(context.Background(), &Account{
+			ID: 1,
+			Extra: map[string]any{
+				modelRateLimitsKey: map[string]any{
+					"claude-sonnet-4-5": map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": "2099-03-15T00:00:00Z",
+					},
+				},
+			},
+		})
+		require.Empty(t, repo.extraUpdateCalls)
+	})
+
+	t.Run("有 AICredits key 时删除并调用 UpdateExtra", func(t *testing.T) {
+		repo := &stubAntigravityAccountRepo{}
+		svc := &AntigravityGatewayService{accountRepo: repo}
+		account := &Account{
+			ID: 1,
+			Extra: map[string]any{
+				modelRateLimitsKey: map[string]any{
+					"claude-sonnet-4-5": map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": "2099-03-15T00:00:00Z",
+					},
+					creditsExhaustedKey: map[string]any{
+						"rate_limited_at":     "2026-03-15T00:00:00Z",
+						"rate_limit_reset_at": time.Now().Add(5 * time.Hour).UTC().Format(time.RFC3339),
+					},
+				},
+			},
+		}
+		svc.clearCreditsExhausted(context.Background(), account)
+		require.Len(t, repo.extraUpdateCalls, 1)
+		// AICredits key 应被删除
+		rawLimits := account.Extra[modelRateLimitsKey].(map[string]any)
+		_, exists := rawLimits[creditsExhaustedKey]
+		require.False(t, exists, "AICredits key 应被删除")
+		// 普通模型限流应保留
+		_, exists = rawLimits["claude-sonnet-4-5"]
+		require.True(t, exists, "普通模型限流应保留")
+	})
+}
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -188,9 +188,29 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		return &smartRetryResult{action: smartRetryActionContinueURL}
 	}

+	category := antigravity429Unknown
+	if resp.StatusCode == http.StatusTooManyRequests {
+		category = classifyAntigravity429(respBody)
+	}
+
 	// 判断是否触发智能重试
 	shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody)

+	// AI Credits 超量请求：
+	// 仅在上游明确返回免费配额耗尽时才允许切换到 credits。
+	if resp.StatusCode == http.StatusTooManyRequests &&
+		category == antigravity429QuotaExhausted &&
+		p.account.IsOveragesEnabled() &&
+		!p.account.isCreditsExhausted() {
+		result := s.attemptCreditsOveragesRetry(p, baseURL, modelName, waitDuration, resp.StatusCode, respBody)
+		if result.handled && result.resp != nil {
+			return &smartRetryResult{
+				action: smartRetryActionBreakWithResp,
+				resp:   result.resp,
+			}
+		}
+	}
+
 	// 情况1: retryDelay >= 阈值，限流模型并切换账号
 	if shouldRateLimitModel {
 		// 单账号 503 退避重试模式：不设限流、不切换账号，改为原地等待+重试
@@ -532,14 +552,31 @@ func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(

 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
+	// 预检查：模型限流 + overages 启用 + 积分未耗尽 → 直接注入 AI Credits
+	overagesInjected := false
+	if p.requestedModel != "" && p.account.Platform == PlatformAntigravity &&
+		p.account.IsOveragesEnabled() && !p.account.isCreditsExhausted() &&
+		p.account.isModelRateLimitedWithContext(p.ctx, p.requestedModel) {
+		if creditsBody := injectEnabledCreditTypes(p.body); creditsBody != nil {
+			p.body = creditsBody
+			overagesInjected = true
+			logger.LegacyPrintf("service.antigravity_gateway", "%s pre_check: model_rate_limited_credits_inject model=%s account=%d (injecting enabledCreditTypes)",
+				p.prefix, p.requestedModel, p.account.ID)
+		}
+	}
+
 	// 预检查：如果账号已限流，直接返回切换信号
 	if p.requestedModel != "" {
 		if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
-			// 单账号 503 退避重试模式：跳过限流预检查，直接发请求。
-			// 首次请求设的限流是为了多账号调度器跳过该账号，在单账号模式下无意义。
-			// 如果上游确实还不可用，handleSmartRetry → handleSingleAccountRetryInPlace
-			// 会在 Service 层原地等待+重试，不需要在预检查这里等。
-			if isSingleAccountRetry(p.ctx) {
+			// 已注入积分的请求不再受普通模型限流预检查阻断。
+			if overagesInjected {
+				logger.LegacyPrintf("service.antigravity_gateway", "%s pre_check: credits_injected_ignore_rate_limit remaining=%v model=%s account=%d",
+					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+			} else if isSingleAccountRetry(p.ctx) {
+				// 单账号 503 退避重试模式：跳过限流预检查，直接发请求。
+				// 首次请求设的限流是为了多账号调度器跳过该账号，在单账号模式下无意义。
+				// 如果上游确实还不可用，handleSmartRetry → handleSingleAccountRetryInPlace
+				// 会在 Service 层原地等待+重试，不需要在预检查这里等。
 				logger.LegacyPrintf("service.antigravity_gateway", "%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)",
 					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
 			} else {
@@ -631,6 +668,15 @@ urlFallbackLoop:
 				respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 				_ = resp.Body.Close()

+				if overagesInjected && shouldMarkCreditsExhausted(resp, respBody, nil) {
+					modelKey := resolveCreditsOveragesModelKey(p.ctx, p.account, "", p.requestedModel)
+					s.handleCreditsRetryFailure(p.ctx, p.prefix, modelKey, p.account, &http.Response{
+						StatusCode: resp.StatusCode,
+						Header:     resp.Header.Clone(),
+						Body:       io.NopCloser(bytes.NewReader(respBody)),
+					}, nil)
+				}
+
 				// ★ 统一入口：自定义错误码 + 临时不可调度
 				if handled, outStatus, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
 					if policyErr != nil {
@@ -884,7 +930,7 @@ func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParam
 	case ErrorPolicyTempUnscheduled:
 		slog.Info("temp_unschedulable_matched",
 			"prefix", p.prefix, "status_code", statusCode, "account_id", p.account.ID)
-		return true, statusCode, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
+		return true, statusCode, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, RateLimitedModel: p.requestedModel, IsStickySession: p.isStickySession}
 	}
 	return false, statusCode, nil
 }
@@ -955,8 +1001,9 @@ type TestConnectionResult struct {
 	MappedModel string // 实际使用的模型
 }

-// TestConnection 测试 Antigravity 账号连接（非流式，无重试、无计费）
-// 支持 Claude 和 Gemini 两种协议，根据 modelID 前缀自动选择
+// TestConnection 测试 Antigravity 账号连接。
+// 复用 antigravityRetryLoop 的完整重试 / credits overages / 智能重试逻辑，
+// 与真实调度行为一致。差异：不做账号切换（测试指定账号）、不记录 ops 错误。
 func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account *Account, modelID string) (*TestConnectionResult, error) {

 	// 获取 token
@@ -980,10 +1027,8 @@ func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account
 	// 构建请求体
 	var requestBody []byte
 	if strings.HasPrefix(modelID, "gemini-") {
-		// Gemini 模型：直接使用 Gemini 格式
 		requestBody, err = s.buildGeminiTestRequest(projectID, mappedModel)
 	} else {
-		// Claude 模型：使用协议转换
 		requestBody, err = s.buildClaudeTestRequest(projectID, mappedModel)
 	}
 	if err != nil {
@@ -996,64 +1041,63 @@ func (s *AntigravityGatewayService) TestConnection(ctx context.Context, account
 		proxyURL = account.Proxy.URL()
 	}

-	baseURL := resolveAntigravityForwardBaseURL()
-	if baseURL == "" {
-		return nil, errors.New("no antigravity forward base url configured")
-	}
-	availableURLs := []string{baseURL}
-
-	var lastErr error
-	for urlIdx, baseURL := range availableURLs {
-		// 构建 HTTP 请求（总是使用流式 endpoint，与官方客户端一致）
-		req, err := antigravity.NewAPIRequestWithURL(ctx, baseURL, "streamGenerateContent", accessToken, requestBody)
-		if err != nil {
-			lastErr = err
-			continue
-		}
-
-		// 调试日志：Test 请求信息
-		logger.LegacyPrintf("service.antigravity_gateway", "[antigravity-Test] account=%s request_size=%d url=%s", account.Name, len(requestBody), req.URL.String())
-
-		// 发送请求
-		resp, err := s.httpUpstream.Do(req, proxyURL, account.ID, account.Concurrency)
-		if err != nil {
-			lastErr = fmt.Errorf("请求失败: %w", err)
-			if shouldAntigravityFallbackToNextURL(err, 0) && urlIdx < len(availableURLs)-1 {
-				logger.LegacyPrintf("service.antigravity_gateway", "[antigravity-Test] URL fallback: %s -> %s", baseURL, availableURLs[urlIdx+1])
-				continue
-			}
-			return nil, lastErr
-		}
-
-		// 读取响应
-		respBody, err := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
-		_ = resp.Body.Close() // 立即关闭，避免循环内 defer 导致的资源泄漏
-		if err != nil {
-			return nil, fmt.Errorf("读取响应失败: %w", err)
-		}
-
-		// 检查是否需要 URL 降级
-		if shouldAntigravityFallbackToNextURL(nil, resp.StatusCode) && urlIdx < len(availableURLs)-1 {
-			logger.LegacyPrintf("service.antigravity_gateway", "[antigravity-Test] URL fallback (HTTP %d): %s -> %s", resp.StatusCode, baseURL, availableURLs[urlIdx+1])
-			continue
-		}
-
-		if resp.StatusCode >= 400 {
-			return nil, fmt.Errorf("API 返回 %d: %s", resp.StatusCode, string(respBody))
-		}
-
-		// 解析流式响应，提取文本
-		text := extractTextFromSSEResponse(respBody)
-
-		// 标记成功的 URL，下次优先使用
-		antigravity.DefaultURLAvailability.MarkSuccess(baseURL)
-		return &TestConnectionResult{
-			Text:        text,
-			MappedModel: mappedModel,
-		}, nil
+	// 复用 antigravityRetryLoop：完整的重试 / credits overages / 智能重试
+	prefix := fmt.Sprintf("[antigravity-Test] account=%d(%s)", account.ID, account.Name)
+	p := antigravityRetryLoopParams{
+		ctx:            ctx,
+		prefix:         prefix,
+		account:        account,
+		proxyURL:       proxyURL,
+		accessToken:    accessToken,
+		action:         "streamGenerateContent",
+		body:           requestBody,
+		c:              nil, // 无 gin.Context → 跳过 ops 追踪
+		httpUpstream:   s.httpUpstream,
+		settingService: s.settingService,
+		accountRepo:    s.accountRepo,
+		requestedModel: modelID,
+		handleError:    testConnectionHandleError,
 	}

-	return nil, lastErr
+	result, err := s.antigravityRetryLoop(p)
+	if err != nil {
+		// AccountSwitchError → 测试时不切换账号，返回友好提示
+		var switchErr *AntigravityAccountSwitchError
+		if errors.As(err, &switchErr) {
+			return nil, fmt.Errorf("该账号模型 %s 当前限流中，请稍后重试", switchErr.RateLimitedModel)
+		}
+		return nil, err
+	}
+
+	if result == nil || result.resp == nil {
+		return nil, errors.New("upstream returned empty response")
+	}
+	defer func() { _ = result.resp.Body.Close() }()
+
+	respBody, err := io.ReadAll(io.LimitReader(result.resp.Body, 2<<20))
+	if err != nil {
+		return nil, fmt.Errorf("读取响应失败: %w", err)
+	}
+
+	if result.resp.StatusCode >= 400 {
+		return nil, fmt.Errorf("API 返回 %d: %s", result.resp.StatusCode, string(respBody))
+	}
+
+	text := extractTextFromSSEResponse(respBody)
+	return &TestConnectionResult{Text: text, MappedModel: mappedModel}, nil
+}
+
+// testConnectionHandleError 是 TestConnection 使用的轻量 handleError 回调。
+// 仅记录日志，不做 ops 错误追踪或粘性会话清除。
+func testConnectionHandleError(
+	_ context.Context, prefix string, account *Account,
+	statusCode int, _ http.Header, body []byte,
+	requestedModel string, _ int64, _ string, _ bool,
+) *handleModelRateLimitResult {
+	logger.LegacyPrintf("service.antigravity_gateway",
+		"%s test_handle_error status=%d model=%s account=%d body=%s",
+		prefix, statusCode, requestedModel, account.ID, truncateForLog(body, 200))
+	return nil
 }

 // buildGeminiTestRequest 构建 Gemini 格式测试请求
@@ -3033,6 +3077,22 @@ func (s *AntigravityGatewayService) handleGeminiStreamingResponse(c *gin.Context
 		intervalCh = intervalTicker.C
 	}

+	// 下游 keepalive：防止代理/Cloudflare Tunnel 因连接空闲而断开
+	keepaliveInterval := time.Duration(0)
+	if s.settingService.cfg != nil && s.settingService.cfg.Gateway.StreamKeepaliveInterval > 0 {
+		keepaliveInterval = time.Duration(s.settingService.cfg.Gateway.StreamKeepaliveInterval) * time.Second
+	}
+	var keepaliveTicker *time.Ticker
+	if keepaliveInterval > 0 {
+		keepaliveTicker = time.NewTicker(keepaliveInterval)
+		defer keepaliveTicker.Stop()
+	}
+	var keepaliveCh <-chan time.Time
+	if keepaliveTicker != nil {
+		keepaliveCh = keepaliveTicker.C
+	}
+	lastDataAt := time.Now()
+
 	cw := newAntigravityClientWriter(c.Writer, flusher, "antigravity gemini")

 	// 仅发送一次错误事件，避免多次写入导致协议混乱
@@ -3065,6 +3125,8 @@ func (s *AntigravityGatewayService) handleGeminiStreamingResponse(c *gin.Context
 				return nil, ev.err
 			}

+			lastDataAt = time.Now()
+
 			line := ev.line
 			trimmed := strings.TrimRight(line, "\r\n")
 			if strings.HasPrefix(trimmed, "data:") {
@@ -3124,6 +3186,19 @@ func (s *AntigravityGatewayService) handleGeminiStreamingResponse(c *gin.Context
 			logger.LegacyPrintf("service.antigravity_gateway", "Stream data interval timeout (antigravity)")
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
+
+		case <-keepaliveCh:
+			if cw.Disconnected() {
+				continue
+			}
+			if time.Since(lastDataAt) < keepaliveInterval {
+				continue
+			}
+			// SSE ping/keepalive：保持连接活跃防止 Cloudflare Tunnel 等代理断开
+			if !cw.Fprintf(":\n\n") {
+				logger.LegacyPrintf("service.antigravity_gateway", "Client disconnected during keepalive ping (antigravity gemini), continuing to drain upstream for billing")
+				continue
+			}
 		}
 	}
 }
@@ -3849,6 +3924,22 @@ func (s *AntigravityGatewayService) handleClaudeStreamingResponse(c *gin.Context
 		intervalCh = intervalTicker.C
 	}

+	// 下游 keepalive：防止代理/Cloudflare Tunnel 因连接空闲而断开
+	keepaliveInterval := time.Duration(0)
+	if s.settingService.cfg != nil && s.settingService.cfg.Gateway.StreamKeepaliveInterval > 0 {
+		keepaliveInterval = time.Duration(s.settingService.cfg.Gateway.StreamKeepaliveInterval) * time.Second
+	}
+	var keepaliveTicker *time.Ticker
+	if keepaliveInterval > 0 {
+		keepaliveTicker = time.NewTicker(keepaliveInterval)
+		defer keepaliveTicker.Stop()
+	}
+	var keepaliveCh <-chan time.Time
+	if keepaliveTicker != nil {
+		keepaliveCh = keepaliveTicker.C
+	}
+	lastDataAt := time.Now()
+
 	cw := newAntigravityClientWriter(c.Writer, flusher, "antigravity claude")

 	// 仅发送一次错误事件，避免多次写入导致协议混乱
@@ -3901,6 +3992,8 @@ func (s *AntigravityGatewayService) handleClaudeStreamingResponse(c *gin.Context
 				return nil, fmt.Errorf("stream read error: %w", ev.err)
 			}

+			lastDataAt = time.Now()
+
 			// 处理 SSE 行，转换为 Claude 格式
 			claudeEvents := processor.ProcessLine(strings.TrimRight(ev.line, "\r\n"))
 			if len(claudeEvents) > 0 {
@@ -3923,6 +4016,20 @@ func (s *AntigravityGatewayService) handleClaudeStreamingResponse(c *gin.Context
 			logger.LegacyPrintf("service.antigravity_gateway", "Stream data interval timeout (antigravity)")
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: convertUsage(nil), firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
+
+		case <-keepaliveCh:
+			if cw.Disconnected() {
+				continue
+			}
+			if time.Since(lastDataAt) < keepaliveInterval {
+				continue
+			}
+			// SSE ping 事件：Anthropic 原生格式，客户端会正确处理，
+			// 同时保持连接活跃防止 Cloudflare Tunnel 等代理断开
+			if !cw.Fprintf("event: ping\ndata: {\"type\": \"ping\"}\n\n") {
+				logger.LegacyPrintf("service.antigravity_gateway", "Client disconnected during keepalive ping (antigravity claude), continuing to drain upstream for billing")
+				continue
+			}
 		}
 	}
 }
@@ -4253,6 +4360,22 @@ func (s *AntigravityGatewayService) streamUpstreamResponse(c *gin.Context, resp
 		intervalCh = intervalTicker.C
 	}

+	// 下游 keepalive：防止代理/Cloudflare Tunnel 因连接空闲而断开
+	keepaliveInterval := time.Duration(0)
+	if s.settingService.cfg != nil && s.settingService.cfg.Gateway.StreamKeepaliveInterval > 0 {
+		keepaliveInterval = time.Duration(s.settingService.cfg.Gateway.StreamKeepaliveInterval) * time.Second
+	}
+	var keepaliveTicker *time.Ticker
+	if keepaliveInterval > 0 {
+		keepaliveTicker = time.NewTicker(keepaliveInterval)
+		defer keepaliveTicker.Stop()
+	}
+	var keepaliveCh <-chan time.Time
+	if keepaliveTicker != nil {
+		keepaliveCh = keepaliveTicker.C
+	}
+	lastDataAt := time.Now()
+
 	flusher, _ := c.Writer.(http.Flusher)
 	cw := newAntigravityClientWriter(c.Writer, flusher, "antigravity upstream")

@@ -4270,6 +4393,8 @@ func (s *AntigravityGatewayService) streamUpstreamResponse(c *gin.Context, resp
 				return &antigravityStreamResult{usage: usage, firstTokenMs: firstTokenMs}
 			}

+			lastDataAt = time.Now()
+
 			line := ev.line

 			// 记录首 token 时间
@@ -4295,6 +4420,20 @@ func (s *AntigravityGatewayService) streamUpstreamResponse(c *gin.Context, resp
 			}
 			logger.LegacyPrintf("service.antigravity_gateway", "Stream data interval timeout (antigravity upstream)")
 			return &antigravityStreamResult{usage: usage, firstTokenMs: firstTokenMs}
+
+		case <-keepaliveCh:
+			if cw.Disconnected() {
+				continue
+			}
+			if time.Since(lastDataAt) < keepaliveInterval {
+				continue
+			}
+			// SSE ping 事件：Anthropic 原生格式，客户端会正确处理，
+			// 同时保持连接活跃防止 Cloudflare Tunnel 等代理断开
+			if !cw.Fprintf("event: ping\ndata: {\"type\": \"ping\"}\n\n") {
+				logger.LegacyPrintf("service.antigravity_gateway", "Client disconnected during keepalive ping (antigravity upstream), continuing to drain upstream for billing")
+				continue
+			}
 		}
 	}
 }
--- a/backend/internal/service/antigravity_quota_fetcher.go
+++ b/backend/internal/service/antigravity_quota_fetcher.go
@@ -78,11 +78,11 @@ func (f *AntigravityQuotaFetcher) FetchQuota(ctx context.Context, account *Accou
 		return nil, err
 	}

-	// 调用 LoadCodeAssist 获取订阅等级（非关键路径，失败不影响主流程）
-	tierRaw, tierNormalized := f.fetchSubscriptionTier(ctx, client, accessToken)
+	// 调用 LoadCodeAssist 获取订阅等级和 AI Credits 余额（非关键路径，失败不影响主流程）
+	tierRaw, tierNormalized, loadResp := f.fetchSubscriptionTier(ctx, client, accessToken)

 	// 转换为 UsageInfo
-	usageInfo := f.buildUsageInfo(modelsResp, tierRaw, tierNormalized)
+	usageInfo := f.buildUsageInfo(modelsResp, tierRaw, tierNormalized, loadResp)

 	return &QuotaResult{
 		UsageInfo: usageInfo,
@@ -90,20 +90,21 @@ func (f *AntigravityQuotaFetcher) FetchQuota(ctx context.Context, account *Accou
 	}, nil
 }

-// fetchSubscriptionTier 获取账号订阅等级，失败返回空字符串
-func (f *AntigravityQuotaFetcher) fetchSubscriptionTier(ctx context.Context, client *antigravity.Client, accessToken string) (raw, normalized string) {
+// fetchSubscriptionTier 获取账号订阅等级，失败返回空字符串。
+// 同时返回 LoadCodeAssistResponse，以便提取 AI Credits 余额。
+func (f *AntigravityQuotaFetcher) fetchSubscriptionTier(ctx context.Context, client *antigravity.Client, accessToken string) (raw, normalized string, loadResp *antigravity.LoadCodeAssistResponse) {
 	loadResp, _, err := client.LoadCodeAssist(ctx, accessToken)
 	if err != nil {
 		slog.Warn("failed to fetch subscription tier", "error", err)
-		return "", ""
+		return "", "", nil
 	}
 	if loadResp == nil {
-		return "", ""
+		return "", "", nil
 	}

 	raw = loadResp.GetTier() // 已有方法：paidTier > currentTier
 	normalized = normalizeTier(raw)
-	return raw, normalized
+	return raw, normalized, loadResp
 }

 // normalizeTier 将原始 tier 字符串归一化为 FREE/PRO/ULTRA/UNKNOWN
@@ -124,8 +125,8 @@ func normalizeTier(raw string) string {
 	}
 }

-// buildUsageInfo 将 API 响应转换为 UsageInfo
-func (f *AntigravityQuotaFetcher) buildUsageInfo(modelsResp *antigravity.FetchAvailableModelsResponse, tierRaw, tierNormalized string) *UsageInfo {
+// buildUsageInfo 将 API 响应转换为 UsageInfo。
+func (f *AntigravityQuotaFetcher) buildUsageInfo(modelsResp *antigravity.FetchAvailableModelsResponse, tierRaw, tierNormalized string, loadResp *antigravity.LoadCodeAssistResponse) *UsageInfo {
 	now := time.Now()
 	info := &UsageInfo{
 		UpdatedAt:               &now,
@@ -190,6 +191,16 @@ func (f *AntigravityQuotaFetcher) buildUsageInfo(modelsResp *antigravity.FetchAv
 		}
 	}

+	if loadResp != nil {
+		for _, credit := range loadResp.GetAvailableCredits() {
+			info.AICredits = append(info.AICredits, AICredit{
+				CreditType:     credit.CreditType,
+				Amount:         credit.GetAmount(),
+				MinimumBalance: credit.GetMinimumAmount(),
+			})
+		}
+	}
+
 	return info
 }

--- a/backend/internal/service/antigravity_quota_fetcher_test.go
+++ b/backend/internal/service/antigravity_quota_fetcher_test.go
@@ -81,7 +81,7 @@ func TestBuildUsageInfo_BasicModels(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "g1-pro-tier", "PRO")
+	info := fetcher.buildUsageInfo(modelsResp, "g1-pro-tier", "PRO", nil)

 	// 基本字段
 	require.NotNil(t, info.UpdatedAt, "UpdatedAt should be set")
@@ -141,7 +141,7 @@ func TestBuildUsageInfo_DeprecatedModels(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.Len(t, info.ModelForwardingRules, 2)
 	require.Equal(t, "claude-sonnet-4-20250514", info.ModelForwardingRules["claude-3-sonnet-20240229"])
@@ -159,7 +159,7 @@ func TestBuildUsageInfo_NoDeprecatedModels(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.Nil(t, info.ModelForwardingRules, "ModelForwardingRules should be nil when no deprecated models")
 }
@@ -171,7 +171,7 @@ func TestBuildUsageInfo_EmptyModels(t *testing.T) {
 		Models: map[string]antigravity.ModelInfo{},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info)
 	require.NotNil(t, info.AntigravityQuota)
@@ -193,7 +193,7 @@ func TestBuildUsageInfo_ModelWithNilQuotaInfo(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info)
 	require.Empty(t, info.AntigravityQuota, "models with nil QuotaInfo should be skipped")
@@ -222,7 +222,7 @@ func TestBuildUsageInfo_FiveHourPriorityOrder(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info.FiveHour, "FiveHour should be set when a priority model exists")
 	// claude-sonnet-4-20250514 is first in priority list, so it should be used
@@ -251,7 +251,7 @@ func TestBuildUsageInfo_FiveHourFallbackToClaude4(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info.FiveHour)
 	expectedUtilization := (1.0 - 0.60) * 100 // 40
@@ -277,7 +277,7 @@ func TestBuildUsageInfo_FiveHourFallbackToGemini(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info.FiveHour)
 	expectedUtilization := (1.0 - 0.30) * 100 // 70
@@ -298,7 +298,7 @@ func TestBuildUsageInfo_FiveHourNoPriorityModel(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.Nil(t, info.FiveHour, "FiveHour should be nil when no priority model exists")
 }
@@ -317,7 +317,7 @@ func TestBuildUsageInfo_FiveHourWithEmptyResetTime(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	require.NotNil(t, info.FiveHour)
 	require.Nil(t, info.FiveHour.ResetsAt, "ResetsAt should be nil when ResetTime is empty")
@@ -338,7 +338,7 @@ func TestBuildUsageInfo_FullUtilization(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)

 	quota := info.AntigravityQuota["claude-sonnet-4-20250514"]
 	require.NotNil(t, quota)
@@ -358,13 +358,38 @@ func TestBuildUsageInfo_ZeroUtilization(t *testing.T) {
 		},
 	}

-	info := fetcher.buildUsageInfo(modelsResp, "", "")
-
+	info := fetcher.buildUsageInfo(modelsResp, "", "", nil)
 	quota := info.AntigravityQuota["claude-sonnet-4-20250514"]
 	require.NotNil(t, quota)
 	require.Equal(t, 0, quota.Utilization)
 }

+func TestBuildUsageInfo_AICredits(t *testing.T) {
+	fetcher := &AntigravityQuotaFetcher{}
+	modelsResp := &antigravity.FetchAvailableModelsResponse{
+		Models: map[string]antigravity.ModelInfo{},
+	}
+	loadResp := &antigravity.LoadCodeAssistResponse{
+		PaidTier: &antigravity.PaidTierInfo{
+			ID: "g1-pro-tier",
+			AvailableCredits: []antigravity.AvailableCredit{
+				{
+					CreditType:                  "GOOGLE_ONE_AI",
+					CreditAmount:                "25",
+					MinimumCreditAmountForUsage: "5",
+				},
+			},
+		},
+	}
+
+	info := fetcher.buildUsageInfo(modelsResp, "g1-pro-tier", "PRO", loadResp)
+
+	require.Len(t, info.AICredits, 1)
+	require.Equal(t, "GOOGLE_ONE_AI", info.AICredits[0].CreditType)
+	require.Equal(t, 25.0, info.AICredits[0].Amount)
+	require.Equal(t, 5.0, info.AICredits[0].MinimumBalance)
+}
+
 func TestFetchQuota_ForbiddenReturnsIsForbidden(t *testing.T) {
 	// 模拟 FetchQuota 遇到 403 时的行为：
 	// FetchAvailableModels 返回 ForbiddenError → FetchQuota 应返回 is_forbidden=true
--- a/backend/internal/service/antigravity_quota_scope.go
+++ b/backend/internal/service/antigravity_quota_scope.go
@@ -32,6 +32,10 @@ func (a *Account) IsSchedulableForModelWithContext(ctx context.Context, requeste
 		return false
 	}
 	if a.isModelRateLimitedWithContext(ctx, requestedModel) {
+		// Antigravity + overages 启用 + 积分未耗尽 → 放行（有积分可用）
+		if a.Platform == PlatformAntigravity && a.IsOveragesEnabled() && !a.isCreditsExhausted() {
+			return true
+		}
 		return false
 	}
 	return true
--- a/backend/internal/service/antigravity_rate_limit_test.go
+++ b/backend/internal/service/antigravity_rate_limit_test.go
@@ -76,10 +76,16 @@ type modelRateLimitCall struct {
 	resetAt   time.Time
 }

+type extraUpdateCall struct {
+	accountID int64
+	updates   map[string]any
+}
+
 type stubAntigravityAccountRepo struct {
 	AccountRepository
 	rateCalls           []rateLimitCall
 	modelRateLimitCalls []modelRateLimitCall
+	extraUpdateCalls    []extraUpdateCall
 }

 func (s *stubAntigravityAccountRepo) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
@@ -92,6 +98,11 @@ func (s *stubAntigravityAccountRepo) SetModelRateLimit(ctx context.Context, id i
 	return nil
 }

+func (s *stubAntigravityAccountRepo) UpdateExtra(ctx context.Context, id int64, updates map[string]any) error {
+	s.extraUpdateCalls = append(s.extraUpdateCalls, extraUpdateCall{accountID: id, updates: updates})
+	return nil
+}
+
 func TestAntigravityRetryLoop_NoURLFallback_UsesConfiguredBaseURL(t *testing.T) {
 	t.Setenv(antigravityForwardBaseURLEnv, "")

--- a/backend/internal/service/antigravity_single_account_retry_test.go
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -260,14 +260,15 @@ func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *test

 // TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
 // 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503，不设限流
+// 使用 RATE_LIMIT_EXCEEDED（走 1 次智能重试），避免 MODEL_CAPACITY_EXHAUSTED 的 60 次重试导致测试超时
 func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
 	// 智能重试也返回 503
 	failRespBody := `{
 		"error": {
 			"code": 503,
-			"status": "UNAVAILABLE",
+			"status": "RESOURCE_EXHAUSTED",
 			"details": [
-				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "RATE_LIMIT_EXCEEDED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
@@ -278,8 +279,9 @@ func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testi
 		Body:       io.NopCloser(strings.NewReader(failRespBody)),
 	}
 	upstream := &mockSmartRetryUpstream{
-		responses: []*http.Response{failResp},
-		errors:    []error{nil},
+		responses:  []*http.Response{failResp},
+		errors:     []error{nil},
+		repeatLast: true,
 	}

 	repo := &stubAntigravityAccountRepo{}
@@ -294,9 +296,9 @@ func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testi
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
-			"status": "UNAVAILABLE",
+			"status": "RESOURCE_EXHAUSTED",
 			"details": [
-				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "RATE_LIMIT_EXCEEDED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
@@ -569,8 +571,9 @@ func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) {

 	svc := &AntigravityGatewayService{}

-	// 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait
-	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro")
+	// waitDuration=0 会被 clamp 到 antigravitySmartRetryMinWait=1s。
+	// 首次重试即成功（200），总耗时 ~1s。
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 0, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.NotNil(t, result.resp)
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -32,20 +32,65 @@ func (c *stubSmartRetryCache) DeleteSessionAccountID(_ context.Context, groupID

 // mockSmartRetryUpstream 用于 handleSmartRetry 测试的 mock upstream
 type mockSmartRetryUpstream struct {
-	responses []*http.Response
-	errors    []error
-	callIdx   int
-	calls     []string
+	responses      []*http.Response
+	responseBodies [][]byte // 缓存的 response body 字节（用于 repeatLast 重建）
+	errors         []error
+	callIdx        int
+	calls          []string
+	requestBodies  [][]byte
+	repeatLast     bool // 超出范围时重复最后一个响应
 }

 func (m *mockSmartRetryUpstream) Do(req *http.Request, proxyURL string, accountID int64, accountConcurrency int) (*http.Response, error) {
 	idx := m.callIdx
 	m.calls = append(m.calls, req.URL.String())
-	m.callIdx++
-	if idx < len(m.responses) {
-		return m.responses[idx], m.errors[idx]
+	if req != nil && req.Body != nil {
+		body, _ := io.ReadAll(req.Body)
+		m.requestBodies = append(m.requestBodies, body)
+		req.Body = io.NopCloser(bytes.NewReader(body))
+	} else {
+		m.requestBodies = append(m.requestBodies, nil)
 	}
-	return nil, nil
+	m.callIdx++
+
+	// 确定使用哪个索引
+	respIdx := idx
+	if respIdx >= len(m.responses) {
+		if !m.repeatLast || len(m.responses) == 0 {
+			return nil, nil
+		}
+		respIdx = len(m.responses) - 1
+	}
+
+	resp := m.responses[respIdx]
+	respErr := m.errors[respIdx]
+	if resp == nil {
+		return nil, respErr
+	}
+
+	// 首次调用时缓存 body 字节
+	if respIdx >= len(m.responseBodies) {
+		for len(m.responseBodies) <= respIdx {
+			m.responseBodies = append(m.responseBodies, nil)
+		}
+	}
+	if m.responseBodies[respIdx] == nil && resp.Body != nil {
+		bodyBytes, _ := io.ReadAll(resp.Body)
+		_ = resp.Body.Close()
+		m.responseBodies[respIdx] = bodyBytes
+	}
+
+	// 用缓存的 body 字节重建新的 reader
+	var body io.ReadCloser
+	if m.responseBodies[respIdx] != nil {
+		body = io.NopCloser(bytes.NewReader(m.responseBodies[respIdx]))
+	}
+
+	return &http.Response{
+		StatusCode: resp.StatusCode,
+		Header:     resp.Header.Clone(),
+		Body:       body,
+	}, respErr
 }

 func (m *mockSmartRetryUpstream) DoWithTLS(req *http.Request, proxyURL string, accountID int64, accountConcurrency int, enableTLSFingerprint bool) (*http.Response, error) {
--- a/backend/internal/service/antigravity_token_provider.go
+++ b/backend/internal/service/antigravity_token_provider.go
@@ -3,7 +3,6 @@ package service
 import (
 	"context"
 	"errors"
-	"log"
 	"log/slog"
 	"strconv"
 	"strings"
@@ -17,15 +16,18 @@ const (
 	antigravityBackfillCooldown = 5 * time.Minute
 )

-// AntigravityTokenCache Token 缓存接口（复用 GeminiTokenCache 接口定义）
+// AntigravityTokenCache token cache interface.
 type AntigravityTokenCache = GeminiTokenCache

-// AntigravityTokenProvider 管理 Antigravity 账户的 access_token
+// AntigravityTokenProvider manages access_token for antigravity accounts.
 type AntigravityTokenProvider struct {
 	accountRepo             AccountRepository
 	tokenCache              AntigravityTokenCache
 	antigravityOAuthService *AntigravityOAuthService
-	backfillCooldown        sync.Map // key: int64 (account.ID) → value: time.Time
+	backfillCooldown        sync.Map // key: accountID -> last attempt time
+	refreshAPI              *OAuthRefreshAPI
+	executor                OAuthRefreshExecutor
+	refreshPolicy           ProviderRefreshPolicy
 }

 func NewAntigravityTokenProvider(
@@ -37,10 +39,22 @@ func NewAntigravityTokenProvider(
 		accountRepo:             accountRepo,
 		tokenCache:              tokenCache,
 		antigravityOAuthService: antigravityOAuthService,
+		refreshPolicy:           AntigravityProviderRefreshPolicy(),
 	}
 }

-// GetAccessToken 获取有效的 access_token
+// SetRefreshAPI injects unified OAuth refresh API and executor.
+func (p *AntigravityTokenProvider) SetRefreshAPI(api *OAuthRefreshAPI, executor OAuthRefreshExecutor) {
+	p.refreshAPI = api
+	p.executor = executor
+}
+
+// SetRefreshPolicy injects caller-side refresh policy.
+func (p *AntigravityTokenProvider) SetRefreshPolicy(policy ProviderRefreshPolicy) {
+	p.refreshPolicy = policy
+}
+
+// GetAccessToken returns a valid access_token.
 func (p *AntigravityTokenProvider) GetAccessToken(ctx context.Context, account *Account) (string, error) {
 	if account == nil {
 		return "", errors.New("account is nil")
@@ -48,7 +62,8 @@ func (p *AntigravityTokenProvider) GetAccessToken(ctx context.Context, account *
 	if account.Platform != PlatformAntigravity {
 		return "", errors.New("not an antigravity account")
 	}
-	// upstream 类型：直接从 credentials 读取 api_key，不走 OAuth 刷新流程
+
+	// upstream accounts use static api_key and never refresh oauth token.
 	if account.Type == AccountTypeUpstream {
 		apiKey := account.GetCredential("api_key")
 		if apiKey == "" {
@@ -62,46 +77,38 @@ func (p *AntigravityTokenProvider) GetAccessToken(ctx context.Context, account *

 	cacheKey := AntigravityTokenCacheKey(account)

-	// 1. 先尝试缓存
+	// 1) Try cache first.
 	if p.tokenCache != nil {
 		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
 			return token, nil
 		}
 	}

-	// 2. 如果即将过期则刷新
+	// 2) Refresh if needed (pre-expiry skew).
 	expiresAt := account.GetCredentialAsTime("expires_at")
 	needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= antigravityTokenRefreshSkew
-	if needsRefresh && p.tokenCache != nil {
+	if needsRefresh && p.refreshAPI != nil && p.executor != nil {
+		result, err := p.refreshAPI.RefreshIfNeeded(ctx, account, p.executor, antigravityTokenRefreshSkew)
+		if err != nil {
+			if p.refreshPolicy.OnRefreshError == ProviderRefreshErrorReturn {
+				return "", err
+			}
+		} else if result.LockHeld {
+			if p.refreshPolicy.OnLockHeld == ProviderLockHeldWaitForCache && p.tokenCache != nil {
+				if token, cacheErr := p.tokenCache.GetAccessToken(ctx, cacheKey); cacheErr == nil && strings.TrimSpace(token) != "" {
+					return token, nil
+				}
+			}
+			// default policy: continue with existing token.
+		} else {
+			account = result.Account
+			expiresAt = account.GetCredentialAsTime("expires_at")
+		}
+	} else if needsRefresh && p.tokenCache != nil {
+		// Backward-compatible test path when refreshAPI is not injected.
 		locked, err := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
 		if err == nil && locked {
 			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
-
-			// 拿到锁后再次检查缓存（另一个 worker 可能已刷新）
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
-			}
-
-			// 从数据库获取最新账户信息
-			fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-			if err == nil && fresh != nil {
-				account = fresh
-			}
-			expiresAt = account.GetCredentialAsTime("expires_at")
-			if expiresAt == nil || time.Until(*expiresAt) <= antigravityTokenRefreshSkew {
-				if p.antigravityOAuthService == nil {
-					return "", errors.New("antigravity oauth service not configured")
-				}
-				tokenInfo, err := p.antigravityOAuthService.RefreshAccountToken(ctx, account)
-				if err != nil {
-					return "", err
-				}
-				p.mergeCredentials(account, tokenInfo)
-				if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-					log.Printf("[AntigravityTokenProvider] Failed to update account credentials: %v", updateErr)
-				}
-				expiresAt = account.GetCredentialAsTime("expires_at")
-			}
 		}
 	}

@@ -110,32 +117,31 @@ func (p *AntigravityTokenProvider) GetAccessToken(ctx context.Context, account *
 		return "", errors.New("access_token not found in credentials")
 	}

-	// 如果账号还没有 project_id，尝试在线补齐，避免请求 daily/sandbox 时出现
-	// "Invalid project resource name projects/"。
-	// 仅调用 loadProjectIDWithRetry，不刷新 OAuth token；带冷却机制防止频繁重试。
+	// Backfill project_id online when missing, with cooldown to avoid hammering.
 	if strings.TrimSpace(account.GetCredential("project_id")) == "" && p.antigravityOAuthService != nil {
 		if p.shouldAttemptBackfill(account.ID) {
 			p.markBackfillAttempted(account.ID)
 			if projectID, err := p.antigravityOAuthService.FillProjectID(ctx, account, accessToken); err == nil && projectID != "" {
 				account.Credentials["project_id"] = projectID
 				if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-					log.Printf("[AntigravityTokenProvider] project_id 补齐持久化失败: %v", updateErr)
+					slog.Warn("antigravity_project_id_backfill_persist_failed",
+						"account_id", account.ID,
+						"error", updateErr,
+					)
 				}
 			}
 		}
 	}

-	// 3. 存入缓存（验证版本后再写入，避免异步刷新任务与请求线程的竞态条件）
+	// 3) Populate cache with TTL.
 	if p.tokenCache != nil {
 		latestAccount, isStale := CheckTokenVersion(ctx, account, p.accountRepo)
 		if isStale && latestAccount != nil {
-			// 版本过时，使用 DB 中的最新 token
 			slog.Debug("antigravity_token_version_stale_use_latest", "account_id", account.ID)
 			accessToken = latestAccount.GetCredential("access_token")
 			if strings.TrimSpace(accessToken) == "" {
 				return "", errors.New("access_token not found after version check")
 			}
-			// 不写入缓存，让下次请求重新处理
 		} else {
 			ttl := 30 * time.Minute
 			if expiresAt != nil {
@@ -156,18 +162,7 @@ func (p *AntigravityTokenProvider) GetAccessToken(ctx context.Context, account *
 	return accessToken, nil
 }

-// mergeCredentials 将 tokenInfo 构建的凭证合并到 account 中，保留原有未覆盖的字段
-func (p *AntigravityTokenProvider) mergeCredentials(account *Account, tokenInfo *AntigravityTokenInfo) {
-	newCredentials := p.antigravityOAuthService.BuildAccountCredentials(tokenInfo)
-	for k, v := range account.Credentials {
-		if _, exists := newCredentials[k]; !exists {
-			newCredentials[k] = v
-		}
-	}
-	account.Credentials = newCredentials
-}
-
-// shouldAttemptBackfill 检查是否应该尝试补齐 project_id（冷却期内不重复尝试）
+// shouldAttemptBackfill checks backfill cooldown.
 func (p *AntigravityTokenProvider) shouldAttemptBackfill(accountID int64) bool {
 	if v, ok := p.backfillCooldown.Load(accountID); ok {
 		if lastAttempt, ok := v.(time.Time); ok {
--- a/backend/internal/service/antigravity_token_refresher.go
+++ b/backend/internal/service/antigravity_token_refresher.go
@@ -25,6 +25,11 @@ func NewAntigravityTokenRefresher(antigravityOAuthService *AntigravityOAuthServi
 	}
 }

+// CacheKey 返回用于分布式锁的缓存键
+func (r *AntigravityTokenRefresher) CacheKey(account *Account) string {
+	return AntigravityTokenCacheKey(account)
+}
+
 // CanRefresh 检查是否可以刷新此账户
 func (r *AntigravityTokenRefresher) CanRefresh(account *Account) bool {
 	return account.Platform == PlatformAntigravity && account.Type == AccountTypeOAuth
@@ -58,11 +63,7 @@ func (r *AntigravityTokenRefresher) Refresh(ctx context.Context, account *Accoun

 	newCredentials := r.antigravityOAuthService.BuildAccountCredentials(tokenInfo)
 	// 合并旧的 credentials，保留新 credentials 中不存在的字段
-	for k, v := range account.Credentials {
-		if _, exists := newCredentials[k]; !exists {
-			newCredentials[k] = v
-		}
-	}
+	newCredentials = MergeCredentials(account.Credentials, newCredentials)

 	// 特殊处理 project_id：如果新值为空但旧值非空，保留旧值
 	// 这确保了即使 LoadCodeAssist 失败，project_id 也不会丢失
--- a/backend/internal/service/backup_service.go
+++ b/backend/internal/service/backup_service.go
@@ -4,11 +4,13 @@ import (
 	"compress/gzip"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"sort"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/google/uuid"
@@ -84,17 +86,21 @@ type BackupScheduleConfig struct {

 // BackupRecord 备份记录
 type BackupRecord struct {
-	ID          string `json:"id"`
-	Status      string `json:"status"`      // pending, running, completed, failed
-	BackupType  string `json:"backup_type"` // postgres
-	FileName    string `json:"file_name"`
-	S3Key       string `json:"s3_key"`
-	SizeBytes   int64  `json:"size_bytes"`
-	TriggeredBy string `json:"triggered_by"` // manual, scheduled
-	ErrorMsg    string `json:"error_message,omitempty"`
-	StartedAt   string `json:"started_at"`
-	FinishedAt  string `json:"finished_at,omitempty"`
-	ExpiresAt   string `json:"expires_at,omitempty"` // 过期时间
+	ID            string `json:"id"`
+	Status        string `json:"status"`      // pending, running, completed, failed
+	BackupType    string `json:"backup_type"` // postgres
+	FileName      string `json:"file_name"`
+	S3Key         string `json:"s3_key"`
+	SizeBytes     int64  `json:"size_bytes"`
+	TriggeredBy   string `json:"triggered_by"` // manual, scheduled
+	ErrorMsg      string `json:"error_message,omitempty"`
+	StartedAt     string `json:"started_at"`
+	FinishedAt    string `json:"finished_at,omitempty"`
+	ExpiresAt     string `json:"expires_at,omitempty"`     // 过期时间
+	Progress      string `json:"progress,omitempty"`       // "dumping", "uploading", ""
+	RestoreStatus string `json:"restore_status,omitempty"` // "", "running", "completed", "failed"
+	RestoreError  string `json:"restore_error,omitempty"`
+	RestoredAt    string `json:"restored_at,omitempty"`
 }

 // BackupService 数据库备份恢复服务
@@ -105,17 +111,24 @@ type BackupService struct {
 	storeFactory BackupObjectStoreFactory
 	dumper       DBDumper

-	mu        sync.Mutex
-	store     BackupObjectStore
-	s3Cfg     *BackupS3Config
+	opMu      sync.Mutex // 保护 backingUp/restoring 标志
 	backingUp bool
 	restoring bool

+	storeMu sync.Mutex // 保护 store/s3Cfg 缓存
+	store   BackupObjectStore
+	s3Cfg   *BackupS3Config
+
 	recordsMu sync.Mutex // 保护 records 的 load/save 操作

 	cronMu      sync.Mutex
 	cronSched   *cron.Cron
 	cronEntryID cron.EntryID
+
+	wg           sync.WaitGroup     // 追踪活跃的备份/恢复 goroutine
+	shuttingDown atomic.Bool        // 阻止新备份启动
+	bgCtx        context.Context    // 所有后台操作的 parent context
+	bgCancel     context.CancelFunc // 取消所有活跃后台操作
 }

 func NewBackupService(
@@ -125,20 +138,26 @@ func NewBackupService(
 	storeFactory BackupObjectStoreFactory,
 	dumper DBDumper,
 ) *BackupService {
+	bgCtx, bgCancel := context.WithCancel(context.Background())
 	return &BackupService{
 		settingRepo:  settingRepo,
 		dbCfg:        &cfg.Database,
 		encryptor:    encryptor,
 		storeFactory: storeFactory,
 		dumper:       dumper,
+		bgCtx:        bgCtx,
+		bgCancel:     bgCancel,
 	}
 }

-// Start 启动定时备份调度器
+// Start 启动定时备份调度器并清理孤立记录
 func (s *BackupService) Start() {
 	s.cronSched = cron.New()
 	s.cronSched.Start()

+	// 清理重启后孤立的 running 记录
+	s.recoverStaleRecords()
+
 	// 加载已有的定时配置
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
@@ -154,13 +173,65 @@ func (s *BackupService) Start() {
 	}
 }

-// Stop 停止定时备份
+// recoverStaleRecords 启动时将孤立的 running 记录标记为 failed
+func (s *BackupService) recoverStaleRecords() {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	records, err := s.loadRecords(ctx)
+	if err != nil {
+		return
+	}
+	for i := range records {
+		if records[i].Status == "running" {
+			records[i].Status = "failed"
+			records[i].ErrorMsg = "interrupted by server restart"
+			records[i].Progress = ""
+			records[i].FinishedAt = time.Now().Format(time.RFC3339)
+			_ = s.saveRecord(ctx, &records[i])
+			logger.LegacyPrintf("service.backup", "[Backup] recovered stale running record: %s", records[i].ID)
+		}
+		if records[i].RestoreStatus == "running" {
+			records[i].RestoreStatus = "failed"
+			records[i].RestoreError = "interrupted by server restart"
+			_ = s.saveRecord(ctx, &records[i])
+			logger.LegacyPrintf("service.backup", "[Backup] recovered stale restoring record: %s", records[i].ID)
+		}
+	}
+}
+
+// Stop 停止定时备份并等待活跃操作完成
 func (s *BackupService) Stop() {
+	s.shuttingDown.Store(true)
+
 	s.cronMu.Lock()
-	defer s.cronMu.Unlock()
 	if s.cronSched != nil {
 		s.cronSched.Stop()
 	}
+	s.cronMu.Unlock()
+
+	// 等待活跃备份/恢复完成（最多 5 分钟）
+	done := make(chan struct{})
+	go func() {
+		s.wg.Wait()
+		close(done)
+	}()
+	select {
+	case <-done:
+		logger.LegacyPrintf("service.backup", "[Backup] all active operations finished")
+	case <-time.After(5 * time.Minute):
+		logger.LegacyPrintf("service.backup", "[Backup] shutdown timeout after 5min, cancelling active operations")
+		if s.bgCancel != nil {
+			s.bgCancel() // 取消所有后台操作
+		}
+		// 给 goroutine 时间响应取消并完成清理
+		select {
+		case <-done:
+			logger.LegacyPrintf("service.backup", "[Backup] active operations cancelled and cleaned up")
+		case <-time.After(10 * time.Second):
+			logger.LegacyPrintf("service.backup", "[Backup] goroutine cleanup timed out")
+		}
+	}
 }

 // ─── S3 配置管理 ───
@@ -203,10 +274,10 @@ func (s *BackupService) UpdateS3Config(ctx context.Context, cfg BackupS3Config)
 	}

 	// 清除缓存的 S3 客户端
-	s.mu.Lock()
+	s.storeMu.Lock()
 	s.store = nil
 	s.s3Cfg = nil
-	s.mu.Unlock()
+	s.storeMu.Unlock()

 	cfg.SecretAccessKey = ""
 	return &cfg, nil
@@ -314,7 +385,10 @@ func (s *BackupService) removeCronSchedule() {
 }

 func (s *BackupService) runScheduledBackup() {
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
+	s.wg.Add(1)
+	defer s.wg.Done()
+
+	ctx, cancel := context.WithTimeout(s.bgCtx, 30*time.Minute)
 	defer cancel()

 	// 读取定时备份配置中的过期天数
@@ -327,7 +401,11 @@ func (s *BackupService) runScheduledBackup() {
 	logger.LegacyPrintf("service.backup", "[Backup] 开始执行定时备份, 过期天数: %d", expireDays)
 	record, err := s.CreateBackup(ctx, "scheduled", expireDays)
 	if err != nil {
-		logger.LegacyPrintf("service.backup", "[Backup] 定时备份失败: %v", err)
+		if errors.Is(err, ErrBackupInProgress) {
+			logger.LegacyPrintf("service.backup", "[Backup] 定时备份跳过: 已有备份正在进行中")
+		} else {
+			logger.LegacyPrintf("service.backup", "[Backup] 定时备份失败: %v", err)
+		}
 		return
 	}
 	logger.LegacyPrintf("service.backup", "[Backup] 定时备份完成: id=%s size=%d", record.ID, record.SizeBytes)
@@ -346,17 +424,21 @@ func (s *BackupService) runScheduledBackup() {
 // CreateBackup 创建全量数据库备份并上传到 S3（流式处理）
 // expireDays: 备份过期天数，0=永不过期，默认14天
 func (s *BackupService) CreateBackup(ctx context.Context, triggeredBy string, expireDays int) (*BackupRecord, error) {
-	s.mu.Lock()
+	if s.shuttingDown.Load() {
+		return nil, infraerrors.ServiceUnavailable("SERVER_SHUTTING_DOWN", "server is shutting down")
+	}
+
+	s.opMu.Lock()
 	if s.backingUp {
-		s.mu.Unlock()
+		s.opMu.Unlock()
 		return nil, ErrBackupInProgress
 	}
 	s.backingUp = true
-	s.mu.Unlock()
+	s.opMu.Unlock()
 	defer func() {
-		s.mu.Lock()
+		s.opMu.Lock()
 		s.backingUp = false
-		s.mu.Unlock()
+		s.opMu.Unlock()
 	}()

 	s3Cfg, err := s.loadS3Config(ctx)
@@ -405,36 +487,47 @@ func (s *BackupService) CreateBackup(ctx context.Context, triggeredBy string, ex

 	// 使用 io.Pipe 将 gzip 压缩数据流式传递给 S3 上传
 	pr, pw := io.Pipe()
-	var gzipErr error
+	gzipDone := make(chan error, 1)
 	go func() {
+		defer func() {
+			if r := recover(); r != nil {
+				pw.CloseWithError(fmt.Errorf("gzip goroutine panic: %v", r)) //nolint:errcheck
+				gzipDone <- fmt.Errorf("gzip goroutine panic: %v", r)
+			}
+		}()
 		gzWriter := gzip.NewWriter(pw)
-		_, gzipErr = io.Copy(gzWriter, dumpReader)
-		if closeErr := gzWriter.Close(); closeErr != nil && gzipErr == nil {
-			gzipErr = closeErr
+		var gzErr error
+		_, gzErr = io.Copy(gzWriter, dumpReader)
+		if closeErr := gzWriter.Close(); closeErr != nil && gzErr == nil {
+			gzErr = closeErr
 		}
-		if closeErr := dumpReader.Close(); closeErr != nil && gzipErr == nil {
-			gzipErr = closeErr
+		if closeErr := dumpReader.Close(); closeErr != nil && gzErr == nil {
+			gzErr = closeErr
 		}
-		if gzipErr != nil {
-			_ = pw.CloseWithError(gzipErr)
+		if gzErr != nil {
+			_ = pw.CloseWithError(gzErr)
 		} else {
 			_ = pw.Close()
 		}
+		gzipDone <- gzErr
 	}()

 	contentType := "application/gzip"
 	sizeBytes, err := objectStore.Upload(ctx, s3Key, pr, contentType)
 	if err != nil {
+		_ = pr.CloseWithError(err) // 确保 gzip goroutine 不会悬挂
+		gzErr := <-gzipDone        // 安全等待 gzip goroutine 完成
 		record.Status = "failed"
 		errMsg := fmt.Sprintf("S3 upload failed: %v", err)
-		if gzipErr != nil {
-			errMsg = fmt.Sprintf("gzip/dump failed: %v", gzipErr)
+		if gzErr != nil {
+			errMsg = fmt.Sprintf("gzip/dump failed: %v", gzErr)
 		}
 		record.ErrorMsg = errMsg
 		record.FinishedAt = time.Now().Format(time.RFC3339)
 		_ = s.saveRecord(ctx, record)
 		return record, fmt.Errorf("backup upload: %w", err)
 	}
+	<-gzipDone // 确保 gzip goroutine 已退出

 	record.SizeBytes = sizeBytes
 	record.Status = "completed"
@@ -446,19 +539,187 @@ func (s *BackupService) CreateBackup(ctx context.Context, triggeredBy string, ex
 	return record, nil
 }

+// StartBackup 异步创建备份，立即返回 running 状态的记录
+func (s *BackupService) StartBackup(ctx context.Context, triggeredBy string, expireDays int) (*BackupRecord, error) {
+	if s.shuttingDown.Load() {
+		return nil, infraerrors.ServiceUnavailable("SERVER_SHUTTING_DOWN", "server is shutting down")
+	}
+
+	s.opMu.Lock()
+	if s.backingUp {
+		s.opMu.Unlock()
+		return nil, ErrBackupInProgress
+	}
+	s.backingUp = true
+	s.opMu.Unlock()
+
+	// 初始化阶段出错时自动重置标志
+	launched := false
+	defer func() {
+		if !launched {
+			s.opMu.Lock()
+			s.backingUp = false
+			s.opMu.Unlock()
+		}
+	}()
+
+	// 在返回前加载 S3 配置和创建 store，避免 goroutine 中配置被修改
+	s3Cfg, err := s.loadS3Config(ctx)
+	if err != nil {
+		return nil, err
+	}
+	if s3Cfg == nil || !s3Cfg.IsConfigured() {
+		return nil, ErrBackupS3NotConfigured
+	}
+
+	objectStore, err := s.getOrCreateStore(ctx, s3Cfg)
+	if err != nil {
+		return nil, fmt.Errorf("init object store: %w", err)
+	}
+
+	now := time.Now()
+	backupID := uuid.New().String()[:8]
+	fileName := fmt.Sprintf("%s_%s.sql.gz", s.dbCfg.DBName, now.Format("20060102_150405"))
+	s3Key := s.buildS3Key(s3Cfg, fileName)
+
+	var expiresAt string
+	if expireDays > 0 {
+		expiresAt = now.AddDate(0, 0, expireDays).Format(time.RFC3339)
+	}
+
+	record := &BackupRecord{
+		ID:          backupID,
+		Status:      "running",
+		BackupType:  "postgres",
+		FileName:    fileName,
+		S3Key:       s3Key,
+		TriggeredBy: triggeredBy,
+		StartedAt:   now.Format(time.RFC3339),
+		ExpiresAt:   expiresAt,
+		Progress:    "pending",
+	}
+
+	if err := s.saveRecord(ctx, record); err != nil {
+		return nil, fmt.Errorf("save initial record: %w", err)
+	}
+
+	launched = true
+	// 在启动 goroutine 前完成拷贝，避免数据竞争
+	result := *record
+
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		defer func() {
+			s.opMu.Lock()
+			s.backingUp = false
+			s.opMu.Unlock()
+		}()
+		defer func() {
+			if r := recover(); r != nil {
+				logger.LegacyPrintf("service.backup", "[Backup] panic recovered: %v", r)
+				record.Status = "failed"
+				record.ErrorMsg = fmt.Sprintf("internal panic: %v", r)
+				record.Progress = ""
+				record.FinishedAt = time.Now().Format(time.RFC3339)
+				_ = s.saveRecord(context.Background(), record)
+			}
+		}()
+		s.executeBackup(record, objectStore)
+	}()
+
+	return &result, nil
+}
+
+// executeBackup 后台执行备份（独立于 HTTP context）
+func (s *BackupService) executeBackup(record *BackupRecord, objectStore BackupObjectStore) {
+	ctx, cancel := context.WithTimeout(s.bgCtx, 30*time.Minute)
+	defer cancel()
+
+	// 阶段1: pg_dump
+	record.Progress = "dumping"
+	_ = s.saveRecord(ctx, record)
+
+	dumpReader, err := s.dumper.Dump(ctx)
+	if err != nil {
+		record.Status = "failed"
+		record.ErrorMsg = fmt.Sprintf("pg_dump failed: %v", err)
+		record.Progress = ""
+		record.FinishedAt = time.Now().Format(time.RFC3339)
+		_ = s.saveRecord(context.Background(), record)
+		return
+	}
+
+	// 阶段2: gzip + upload
+	record.Progress = "uploading"
+	_ = s.saveRecord(ctx, record)
+
+	pr, pw := io.Pipe()
+	gzipDone := make(chan error, 1)
+	go func() {
+		defer func() {
+			if r := recover(); r != nil {
+				pw.CloseWithError(fmt.Errorf("gzip goroutine panic: %v", r)) //nolint:errcheck
+				gzipDone <- fmt.Errorf("gzip goroutine panic: %v", r)
+			}
+		}()
+		gzWriter := gzip.NewWriter(pw)
+		var gzErr error
+		_, gzErr = io.Copy(gzWriter, dumpReader)
+		if closeErr := gzWriter.Close(); closeErr != nil && gzErr == nil {
+			gzErr = closeErr
+		}
+		if closeErr := dumpReader.Close(); closeErr != nil && gzErr == nil {
+			gzErr = closeErr
+		}
+		if gzErr != nil {
+			_ = pw.CloseWithError(gzErr)
+		} else {
+			_ = pw.Close()
+		}
+		gzipDone <- gzErr
+	}()
+
+	contentType := "application/gzip"
+	sizeBytes, err := objectStore.Upload(ctx, record.S3Key, pr, contentType)
+	if err != nil {
+		_ = pr.CloseWithError(err) // 确保 gzip goroutine 不会悬挂
+		gzErr := <-gzipDone        // 安全等待 gzip goroutine 完成
+		record.Status = "failed"
+		errMsg := fmt.Sprintf("S3 upload failed: %v", err)
+		if gzErr != nil {
+			errMsg = fmt.Sprintf("gzip/dump failed: %v", gzErr)
+		}
+		record.ErrorMsg = errMsg
+		record.Progress = ""
+		record.FinishedAt = time.Now().Format(time.RFC3339)
+		_ = s.saveRecord(context.Background(), record)
+		return
+	}
+	<-gzipDone // 确保 gzip goroutine 已退出
+
+	record.SizeBytes = sizeBytes
+	record.Status = "completed"
+	record.Progress = ""
+	record.FinishedAt = time.Now().Format(time.RFC3339)
+	if err := s.saveRecord(context.Background(), record); err != nil {
+		logger.LegacyPrintf("service.backup", "[Backup] 保存备份记录失败: %v", err)
+	}
+}
+
 // RestoreBackup 从 S3 下载备份并流式恢复到数据库
 func (s *BackupService) RestoreBackup(ctx context.Context, backupID string) error {
-	s.mu.Lock()
+	s.opMu.Lock()
 	if s.restoring {
-		s.mu.Unlock()
+		s.opMu.Unlock()
 		return ErrRestoreInProgress
 	}
 	s.restoring = true
-	s.mu.Unlock()
+	s.opMu.Unlock()
 	defer func() {
-		s.mu.Lock()
+		s.opMu.Lock()
 		s.restoring = false
-		s.mu.Unlock()
+		s.opMu.Unlock()
 	}()

 	record, err := s.GetBackupRecord(ctx, backupID)
@@ -500,6 +761,112 @@ func (s *BackupService) RestoreBackup(ctx context.Context, backupID string) erro
 	return nil
 }

+// StartRestore 异步恢复备份，立即返回
+func (s *BackupService) StartRestore(ctx context.Context, backupID string) (*BackupRecord, error) {
+	if s.shuttingDown.Load() {
+		return nil, infraerrors.ServiceUnavailable("SERVER_SHUTTING_DOWN", "server is shutting down")
+	}
+
+	s.opMu.Lock()
+	if s.restoring {
+		s.opMu.Unlock()
+		return nil, ErrRestoreInProgress
+	}
+	s.restoring = true
+	s.opMu.Unlock()
+
+	// 初始化阶段出错时自动重置标志
+	launched := false
+	defer func() {
+		if !launched {
+			s.opMu.Lock()
+			s.restoring = false
+			s.opMu.Unlock()
+		}
+	}()
+
+	record, err := s.GetBackupRecord(ctx, backupID)
+	if err != nil {
+		return nil, err
+	}
+	if record.Status != "completed" {
+		return nil, infraerrors.BadRequest("BACKUP_NOT_COMPLETED", "can only restore from a completed backup")
+	}
+
+	s3Cfg, err := s.loadS3Config(ctx)
+	if err != nil {
+		return nil, err
+	}
+	objectStore, err := s.getOrCreateStore(ctx, s3Cfg)
+	if err != nil {
+		return nil, fmt.Errorf("init object store: %w", err)
+	}
+
+	record.RestoreStatus = "running"
+	_ = s.saveRecord(ctx, record)
+
+	launched = true
+	result := *record
+
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		defer func() {
+			s.opMu.Lock()
+			s.restoring = false
+			s.opMu.Unlock()
+		}()
+		defer func() {
+			if r := recover(); r != nil {
+				logger.LegacyPrintf("service.backup", "[Backup] restore panic recovered: %v", r)
+				record.RestoreStatus = "failed"
+				record.RestoreError = fmt.Sprintf("internal panic: %v", r)
+				_ = s.saveRecord(context.Background(), record)
+			}
+		}()
+		s.executeRestore(record, objectStore)
+	}()
+
+	return &result, nil
+}
+
+// executeRestore 后台执行恢复
+func (s *BackupService) executeRestore(record *BackupRecord, objectStore BackupObjectStore) {
+	ctx, cancel := context.WithTimeout(s.bgCtx, 30*time.Minute)
+	defer cancel()
+
+	body, err := objectStore.Download(ctx, record.S3Key)
+	if err != nil {
+		record.RestoreStatus = "failed"
+		record.RestoreError = fmt.Sprintf("S3 download failed: %v", err)
+		_ = s.saveRecord(context.Background(), record)
+		return
+	}
+	defer func() { _ = body.Close() }()
+
+	gzReader, err := gzip.NewReader(body)
+	if err != nil {
+		record.RestoreStatus = "failed"
+		record.RestoreError = fmt.Sprintf("gzip reader: %v", err)
+		_ = s.saveRecord(context.Background(), record)
+		return
+	}
+	defer func() { _ = gzReader.Close() }()
+
+	if err := s.dumper.Restore(ctx, gzReader); err != nil {
+		record.RestoreStatus = "failed"
+		record.RestoreError = fmt.Sprintf("pg restore: %v", err)
+		_ = s.saveRecord(context.Background(), record)
+		return
+	}
+
+	record.RestoreStatus = "completed"
+	record.RestoredAt = time.Now().Format(time.RFC3339)
+	if err := s.saveRecord(context.Background(), record); err != nil {
+		logger.LegacyPrintf("service.backup", "[Backup] 保存恢复记录失败: %v", err)
+	}
+}
+
 // ─── 备份记录管理 ───

 func (s *BackupService) ListBackups(ctx context.Context) ([]BackupRecord, error) {
@@ -614,8 +981,8 @@ func (s *BackupService) loadS3Config(ctx context.Context) (*BackupS3Config, erro
 }

 func (s *BackupService) getOrCreateStore(ctx context.Context, cfg *BackupS3Config) (BackupObjectStore, error) {
-	s.mu.Lock()
-	defer s.mu.Unlock()
+	s.storeMu.Lock()
+	defer s.storeMu.Unlock()

 	if s.store != nil && s.s3Cfg != nil {
 		return s.store, nil
--- a/backend/internal/service/backup_service_test.go
+++ b/backend/internal/service/backup_service_test.go
@@ -134,6 +134,30 @@ func (m *mockDumper) Restore(_ context.Context, data io.Reader) error {
 	return nil
 }

+// blockingDumper 可控延迟的 dumper，用于测试异步行为
+type blockingDumper struct {
+	blockCh chan struct{}
+	data    []byte
+	restErr error
+}
+
+func (d *blockingDumper) Dump(ctx context.Context) (io.ReadCloser, error) {
+	select {
+	case <-d.blockCh:
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+	return io.NopCloser(bytes.NewReader(d.data)), nil
+}
+
+func (d *blockingDumper) Restore(_ context.Context, data io.Reader) error {
+	if d.restErr != nil {
+		return d.restErr
+	}
+	_, _ = io.ReadAll(data)
+	return nil
+}
+
 type mockObjectStore struct {
 	objects map[string][]byte
 	mu      sync.Mutex
@@ -179,7 +203,7 @@ func (m *mockObjectStore) HeadBucket(_ context.Context) error {
 	return nil
 }

-func newTestBackupService(repo *mockSettingRepo, dumper *mockDumper, store *mockObjectStore) *BackupService {
+func newTestBackupService(repo *mockSettingRepo, dumper DBDumper, store *mockObjectStore) *BackupService {
 	cfg := &config.Config{
 		Database: config.DatabaseConfig{
 			Host:   "localhost",
@@ -361,9 +385,9 @@ func TestBackupService_CreateBackup_ConcurrentBlocked(t *testing.T) {
 	svc := newTestBackupService(repo, dumper, store)

 	// 手动设置 backingUp 标志
-	svc.mu.Lock()
+	svc.opMu.Lock()
 	svc.backingUp = true
-	svc.mu.Unlock()
+	svc.opMu.Unlock()

 	_, err := svc.CreateBackup(context.Background(), "manual", 14)
 	require.ErrorIs(t, err, ErrBackupInProgress)
@@ -526,3 +550,154 @@ func TestBackupService_LoadS3Config_Corrupted(t *testing.T) {
 	require.Error(t, err)
 	require.Nil(t, cfg)
 }
+
+// ─── Async Backup Tests ───
+
+func TestStartBackup_ReturnsImmediately(t *testing.T) {
+	repo := newMockSettingRepo()
+	seedS3Config(t, repo)
+
+	dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
+	store := newMockObjectStore()
+	svc := newTestBackupService(repo, dumper, store)
+
+	record, err := svc.StartBackup(context.Background(), "manual", 14)
+	require.NoError(t, err)
+	require.Equal(t, "running", record.Status)
+	require.NotEmpty(t, record.ID)
+
+	// 释放 dumper 让后台完成
+	close(dumper.blockCh)
+	svc.wg.Wait()
+
+	// 验证最终状态
+	final, err := svc.GetBackupRecord(context.Background(), record.ID)
+	require.NoError(t, err)
+	require.Equal(t, "completed", final.Status)
+	require.Greater(t, final.SizeBytes, int64(0))
+}
+
+func TestStartBackup_ConcurrentBlocked(t *testing.T) {
+	repo := newMockSettingRepo()
+	seedS3Config(t, repo)
+
+	dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
+	store := newMockObjectStore()
+	svc := newTestBackupService(repo, dumper, store)
+
+	// 第一次启动
+	_, err := svc.StartBackup(context.Background(), "manual", 14)
+	require.NoError(t, err)
+
+	// 第二次应被阻塞
+	_, err = svc.StartBackup(context.Background(), "manual", 14)
+	require.ErrorIs(t, err, ErrBackupInProgress)
+
+	close(dumper.blockCh)
+	svc.wg.Wait()
+}
+
+func TestStartBackup_ShuttingDown(t *testing.T) {
+	repo := newMockSettingRepo()
+	seedS3Config(t, repo)
+	svc := newTestBackupService(repo, &mockDumper{dumpData: []byte("data")}, newMockObjectStore())
+
+	svc.shuttingDown.Store(true)
+
+	_, err := svc.StartBackup(context.Background(), "manual", 14)
+	require.Error(t, err)
+	require.Contains(t, err.Error(), "shutting down")
+}
+
+func TestRecoverStaleRecords(t *testing.T) {
+	repo := newMockSettingRepo()
+	svc := newTestBackupService(repo, &mockDumper{}, newMockObjectStore())
+
+	// 模拟一条孤立的 running 记录
+	_ = svc.saveRecord(context.Background(), &BackupRecord{
+		ID:        "stale-1",
+		Status:    "running",
+		StartedAt: time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
+	})
+	// 模拟一条孤立的恢复中记录
+	_ = svc.saveRecord(context.Background(), &BackupRecord{
+		ID:            "stale-2",
+		Status:        "completed",
+		RestoreStatus: "running",
+		StartedAt:     time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
+	})
+
+	svc.recoverStaleRecords()
+
+	r1, _ := svc.GetBackupRecord(context.Background(), "stale-1")
+	require.Equal(t, "failed", r1.Status)
+	require.Contains(t, r1.ErrorMsg, "server restart")
+
+	r2, _ := svc.GetBackupRecord(context.Background(), "stale-2")
+	require.Equal(t, "failed", r2.RestoreStatus)
+	require.Contains(t, r2.RestoreError, "server restart")
+}
+
+func TestGracefulShutdown(t *testing.T) {
+	repo := newMockSettingRepo()
+	seedS3Config(t, repo)
+
+	dumper := &blockingDumper{blockCh: make(chan struct{}), data: []byte("data")}
+	store := newMockObjectStore()
+	svc := newTestBackupService(repo, dumper, store)
+
+	_, err := svc.StartBackup(context.Background(), "manual", 14)
+	require.NoError(t, err)
+
+	// Stop 应该等待备份完成
+	done := make(chan struct{})
+	go func() {
+		svc.Stop()
+		close(done)
+	}()
+
+	// 短暂等待确认 Stop 还在等待
+	select {
+	case <-done:
+		t.Fatal("Stop returned before backup finished")
+	case <-time.After(100 * time.Millisecond):
+		// 预期：Stop 还在等待
+	}
+
+	// 释放备份
+	close(dumper.blockCh)
+
+	// 现在 Stop 应该完成
+	select {
+	case <-done:
+		// 预期
+	case <-time.After(5 * time.Second):
+		t.Fatal("Stop did not return after backup finished")
+	}
+}
+
+func TestStartRestore_Async(t *testing.T) {
+	repo := newMockSettingRepo()
+	seedS3Config(t, repo)
+
+	dumpContent := "-- PostgreSQL dump\nCREATE TABLE test (id int);\n"
+	dumper := &mockDumper{dumpData: []byte(dumpContent)}
+	store := newMockObjectStore()
+	svc := newTestBackupService(repo, dumper, store)
+
+	// 先创建一个备份（同步方式）
+	record, err := svc.CreateBackup(context.Background(), "manual", 14)
+	require.NoError(t, err)
+
+	// 异步恢复
+	restored, err := svc.StartRestore(context.Background(), record.ID)
+	require.NoError(t, err)
+	require.Equal(t, "running", restored.RestoreStatus)
+
+	svc.wg.Wait()
+
+	// 验证最终状态
+	final, err := svc.GetBackupRecord(context.Background(), record.ID)
+	require.NoError(t, err)
+	require.Equal(t, "completed", final.RestoreStatus)
+}
--- a/backend/internal/service/claude_code_validator.go
+++ b/backend/internal/service/claude_code_validator.go
@@ -21,9 +21,6 @@ var (
 	// 带捕获组的版本提取正则
 	claudeCodeUAVersionPattern = regexp.MustCompile(`(?i)^claude-cli/(\d+\.\d+\.\d+)`)

-	// metadata.user_id 格式: user_{64位hex}_account__session_{uuid}
-	userIDPattern = regexp.MustCompile(`^user_[a-fA-F0-9]{64}_account__session_[\w-]+$`)
-
 	// System prompt 相似度阈值（默认 0.5，和 claude-relay-service 一致）
 	systemPromptThreshold = 0.5
 )
@@ -124,7 +121,7 @@ func (v *ClaudeCodeValidator) Validate(r *http.Request, body map[string]any) boo
 		return false
 	}

-	if !userIDPattern.MatchString(userID) {
+	if ParseMetadataUserID(userID) == nil {
 		return false
 	}

@@ -278,11 +275,7 @@ func SetClaudeCodeClient(ctx context.Context, isClaudeCode bool) context.Context
 // ExtractVersion 从 User-Agent 中提取 Claude Code 版本号
 // 返回 "2.1.22" 形式的版本号，如果不匹配返回空字符串
 func (v *ClaudeCodeValidator) ExtractVersion(ua string) string {
-	matches := claudeCodeUAVersionPattern.FindStringSubmatch(ua)
-	if len(matches) >= 2 {
-		return matches[1]
-	}
-	return ""
+	return ExtractCLIVersion(ua)
 }

 // SetClaudeCodeVersion 将 Claude Code 版本号设置到 context 中
--- a/backend/internal/service/claude_token_provider.go
+++ b/backend/internal/service/claude_token_provider.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"log/slog"
-	"strconv"
 	"strings"
 	"time"
 )
@@ -15,14 +14,17 @@ const (
 	claudeLockWaitTime     = 200 * time.Millisecond
 )

-// ClaudeTokenCache Token 缓存接口（复用 GeminiTokenCache 接口定义）
+// ClaudeTokenCache token cache interface.
 type ClaudeTokenCache = GeminiTokenCache

-// ClaudeTokenProvider 管理 Claude (Anthropic) OAuth 账户的 access_token
+// ClaudeTokenProvider manages access_token for Claude OAuth accounts.
 type ClaudeTokenProvider struct {
-	accountRepo  AccountRepository
-	tokenCache   ClaudeTokenCache
-	oauthService *OAuthService
+	accountRepo   AccountRepository
+	tokenCache    ClaudeTokenCache
+	oauthService  *OAuthService
+	refreshAPI    *OAuthRefreshAPI
+	executor      OAuthRefreshExecutor
+	refreshPolicy ProviderRefreshPolicy
 }

 func NewClaudeTokenProvider(
@@ -31,13 +33,25 @@ func NewClaudeTokenProvider(
 	oauthService *OAuthService,
 ) *ClaudeTokenProvider {
 	return &ClaudeTokenProvider{
-		accountRepo:  accountRepo,
-		tokenCache:   tokenCache,
-		oauthService: oauthService,
+		accountRepo:   accountRepo,
+		tokenCache:    tokenCache,
+		oauthService:  oauthService,
+		refreshPolicy: ClaudeProviderRefreshPolicy(),
 	}
 }

-// GetAccessToken 获取有效的 access_token
+// SetRefreshAPI injects unified OAuth refresh API and executor.
+func (p *ClaudeTokenProvider) SetRefreshAPI(api *OAuthRefreshAPI, executor OAuthRefreshExecutor) {
+	p.refreshAPI = api
+	p.executor = executor
+}
+
+// SetRefreshPolicy injects caller-side refresh policy.
+func (p *ClaudeTokenProvider) SetRefreshPolicy(policy ProviderRefreshPolicy) {
+	p.refreshPolicy = policy
+}
+
+// GetAccessToken returns a valid access_token.
 func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Account) (string, error) {
 	if account == nil {
 		return "", errors.New("account is nil")
@@ -48,7 +62,7 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou

 	cacheKey := ClaudeTokenCacheKey(account)

-	// 1. 先尝试缓存
+	// 1) Try cache first.
 	if p.tokenCache != nil {
 		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
 			slog.Debug("claude_token_cache_hit", "account_id", account.ID)
@@ -60,114 +74,39 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou

 	slog.Debug("claude_token_cache_miss", "account_id", account.ID)

-	// 2. 如果即将过期则刷新
+	// 2) Refresh if needed (pre-expiry skew).
 	expiresAt := account.GetCredentialAsTime("expires_at")
 	needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= claudeTokenRefreshSkew
 	refreshFailed := false
-	if needsRefresh && p.tokenCache != nil {
-		locked, lockErr := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
-		if lockErr == nil && locked {
-			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()

-			// 拿到锁后再次检查缓存（另一个 worker 可能已刷新）
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
+	if needsRefresh && p.refreshAPI != nil && p.executor != nil {
+		result, err := p.refreshAPI.RefreshIfNeeded(ctx, account, p.executor, claudeTokenRefreshSkew)
+		if err != nil {
+			if p.refreshPolicy.OnRefreshError == ProviderRefreshErrorReturn {
+				return "", err
 			}
-
-			// 从数据库获取最新账户信息
-			fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-			if err == nil && fresh != nil {
-				account = fresh
-			}
-			expiresAt = account.GetCredentialAsTime("expires_at")
-			if expiresAt == nil || time.Until(*expiresAt) <= claudeTokenRefreshSkew {
-				if p.oauthService == nil {
-					slog.Warn("claude_oauth_service_not_configured", "account_id", account.ID)
-					refreshFailed = true // 无法刷新，标记失败
-				} else {
-					tokenInfo, err := p.oauthService.RefreshAccountToken(ctx, account)
-					if err != nil {
-						// 刷新失败时记录警告，但不立即返回错误，尝试使用现有 token
-						slog.Warn("claude_token_refresh_failed", "account_id", account.ID, "error", err)
-						refreshFailed = true // 刷新失败，标记以使用短 TTL
-					} else {
-						// 构建新 credentials，保留原有字段
-						newCredentials := make(map[string]any)
-						for k, v := range account.Credentials {
-							newCredentials[k] = v
-						}
-						newCredentials["access_token"] = tokenInfo.AccessToken
-						newCredentials["token_type"] = tokenInfo.TokenType
-						newCredentials["expires_in"] = strconv.FormatInt(tokenInfo.ExpiresIn, 10)
-						newCredentials["expires_at"] = strconv.FormatInt(tokenInfo.ExpiresAt, 10)
-						if tokenInfo.RefreshToken != "" {
-							newCredentials["refresh_token"] = tokenInfo.RefreshToken
-						}
-						if tokenInfo.Scope != "" {
-							newCredentials["scope"] = tokenInfo.Scope
-						}
-						account.Credentials = newCredentials
-						if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-							slog.Error("claude_token_provider_update_failed", "account_id", account.ID, "error", updateErr)
-						}
-						expiresAt = account.GetCredentialAsTime("expires_at")
-					}
-				}
-			}
-		} else if lockErr != nil {
-			// Redis 错误导致无法获取锁，降级为无锁刷新（仅在 token 接近过期时）
-			slog.Warn("claude_token_lock_failed_degraded_refresh", "account_id", account.ID, "error", lockErr)
-
-			// 检查 ctx 是否已取消
-			if ctx.Err() != nil {
-				return "", ctx.Err()
-			}
-
-			// 从数据库获取最新账户信息
-			if p.accountRepo != nil {
-				fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-				if err == nil && fresh != nil {
-					account = fresh
-				}
-			}
-			expiresAt = account.GetCredentialAsTime("expires_at")
-
-			// 仅在 expires_at 已过期/接近过期时才执行无锁刷新
-			if expiresAt == nil || time.Until(*expiresAt) <= claudeTokenRefreshSkew {
-				if p.oauthService == nil {
-					slog.Warn("claude_oauth_service_not_configured", "account_id", account.ID)
-					refreshFailed = true
-				} else {
-					tokenInfo, err := p.oauthService.RefreshAccountToken(ctx, account)
-					if err != nil {
-						slog.Warn("claude_token_refresh_failed_degraded", "account_id", account.ID, "error", err)
-						refreshFailed = true
-					} else {
-						// 构建新 credentials，保留原有字段
-						newCredentials := make(map[string]any)
-						for k, v := range account.Credentials {
-							newCredentials[k] = v
-						}
-						newCredentials["access_token"] = tokenInfo.AccessToken
-						newCredentials["token_type"] = tokenInfo.TokenType
-						newCredentials["expires_in"] = strconv.FormatInt(tokenInfo.ExpiresIn, 10)
-						newCredentials["expires_at"] = strconv.FormatInt(tokenInfo.ExpiresAt, 10)
-						if tokenInfo.RefreshToken != "" {
-							newCredentials["refresh_token"] = tokenInfo.RefreshToken
-						}
-						if tokenInfo.Scope != "" {
-							newCredentials["scope"] = tokenInfo.Scope
-						}
-						account.Credentials = newCredentials
-						if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-							slog.Error("claude_token_provider_update_failed", "account_id", account.ID, "error", updateErr)
-						}
-						expiresAt = account.GetCredentialAsTime("expires_at")
-					}
+			slog.Warn("claude_token_refresh_failed", "account_id", account.ID, "error", err)
+			refreshFailed = true
+		} else if result.LockHeld {
+			if p.refreshPolicy.OnLockHeld == ProviderLockHeldWaitForCache && p.tokenCache != nil {
+				time.Sleep(claudeLockWaitTime)
+				if token, cacheErr := p.tokenCache.GetAccessToken(ctx, cacheKey); cacheErr == nil && strings.TrimSpace(token) != "" {
+					slog.Debug("claude_token_cache_hit_after_wait", "account_id", account.ID)
+					return token, nil
 				}
 			}
 		} else {
-			// 锁获取失败（被其他 worker 持有），等待 200ms 后重试读取缓存
+			account = result.Account
+			expiresAt = account.GetCredentialAsTime("expires_at")
+		}
+	} else if needsRefresh && p.tokenCache != nil {
+		// Backward-compatible test path when refreshAPI is not injected.
+		locked, lockErr := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
+		if lockErr == nil && locked {
+			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
+		} else if lockErr != nil {
+			slog.Warn("claude_token_lock_failed", "account_id", account.ID, "error", lockErr)
+		} else {
 			time.Sleep(claudeLockWaitTime)
 			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
 				slog.Debug("claude_token_cache_hit_after_wait", "account_id", account.ID)
@@ -181,22 +120,23 @@ func (p *ClaudeTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 		return "", errors.New("access_token not found in credentials")
 	}

-	// 3. 存入缓存（验证版本后再写入，避免异步刷新任务与请求线程的竞态条件）
+	// 3) Populate cache with TTL.
 	if p.tokenCache != nil {
 		latestAccount, isStale := CheckTokenVersion(ctx, account, p.accountRepo)
 		if isStale && latestAccount != nil {
-			// 版本过时，使用 DB 中的最新 token
 			slog.Debug("claude_token_version_stale_use_latest", "account_id", account.ID)
 			accessToken = latestAccount.GetCredential("access_token")
 			if strings.TrimSpace(accessToken) == "" {
 				return "", errors.New("access_token not found after version check")
 			}
-			// 不写入缓存，让下次请求重新处理
 		} else {
 			ttl := 30 * time.Minute
 			if refreshFailed {
-				// 刷新失败时使用短 TTL，避免失效 token 长时间缓存导致 401 抖动
-				ttl = time.Minute
+				if p.refreshPolicy.FailureTTL > 0 {
+					ttl = p.refreshPolicy.FailureTTL
+				} else {
+					ttl = time.Minute
+				}
 				slog.Debug("claude_token_cache_short_ttl", "account_id", account.ID, "reason", "refresh_failed")
 			} else if expiresAt != nil {
 				until := time.Until(*expiresAt)
--- a/backend/internal/service/dashboard_service.go
+++ b/backend/internal/service/dashboard_service.go
@@ -148,6 +148,15 @@ func (s *DashboardService) GetGroupStatsWithFilters(ctx context.Context, startTi
 	return stats, nil
 }

+// GetGroupUsageSummary returns today's and cumulative cost for all groups.
+func (s *DashboardService) GetGroupUsageSummary(ctx context.Context, todayStart time.Time) ([]usagestats.GroupUsageSummary, error) {
+	results, err := s.usageRepo.GetAllGroupUsageSummary(ctx, todayStart)
+	if err != nil {
+		return nil, fmt.Errorf("get group usage summary: %w", err)
+	}
+	return results, nil
+}
+
 func (s *DashboardService) getCachedDashboardStats(ctx context.Context) (*usagestats.DashboardStats, bool, error) {
 	data, err := s.cache.GetDashboardStats(ctx)
 	if err != nil {
@@ -335,6 +344,14 @@ func (s *DashboardService) GetUserSpendingRanking(ctx context.Context, startTime
 	return ranking, nil
 }

+func (s *DashboardService) GetUserBreakdownStats(ctx context.Context, startTime, endTime time.Time, dim usagestats.UserBreakdownDimension, limit int) ([]usagestats.UserBreakdownItem, error) {
+	stats, err := s.usageRepo.GetUserBreakdownStats(ctx, startTime, endTime, dim, limit)
+	if err != nil {
+		return nil, fmt.Errorf("get user breakdown stats: %w", err)
+	}
+	return stats, nil
+}
+
 func (s *DashboardService) GetBatchUserUsageStats(ctx context.Context, userIDs []int64, startTime, endTime time.Time) (map[int64]*usagestats.BatchUserUsageStats, error) {
 	stats, err := s.usageRepo.GetBatchUserUsageStats(ctx, userIDs, startTime, endTime)
 	if err != nil {
--- a/backend/internal/service/gateway_multiplatform_test.go
+++ b/backend/internal/service/gateway_multiplatform_test.go
@@ -278,8 +278,8 @@ func (m *mockGroupRepoForGateway) ListActiveByPlatform(ctx context.Context, plat
 func (m *mockGroupRepoForGateway) ExistsByName(ctx context.Context, name string) (bool, error) {
 	return false, nil
 }
-func (m *mockGroupRepoForGateway) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
-	return 0, nil
+func (m *mockGroupRepoForGateway) GetAccountCount(ctx context.Context, groupID int64) (int64, int64, error) {
+	return 0, 0, nil
 }
 func (m *mockGroupRepoForGateway) DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error) {
 	return 0, nil
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -326,7 +326,6 @@ func isClaudeCodeCredentialScopeError(msg string) bool {
 // Some upstream APIs return non-standard "data:" without space (should be "data: ").
 var (
 	sseDataRe            = regexp.MustCompile(`^data:\s*`)
-	sessionIDRegex       = regexp.MustCompile(`session_([a-f0-9-]{36})`)
 	claudeCliUserAgentRe = regexp.MustCompile(`^claude-cli/\d+\.\d+\.\d+`)

 	// claudeCodePromptPrefixes 用于检测 Claude Code 系统提示词的前缀列表
@@ -644,8 +643,8 @@ func (s *GatewayService) GenerateSessionHash(parsed *ParsedRequest) string {

 	// 1. 最高优先级：从 metadata.user_id 提取 session_xxx
 	if parsed.MetadataUserID != "" {
-		if match := sessionIDRegex.FindStringSubmatch(parsed.MetadataUserID); len(match) > 1 {
-			return match[1]
+		if uid := ParseMetadataUserID(parsed.MetadataUserID); uid != nil && uid.SessionID != "" {
+			return uid.SessionID
 		}
 	}

@@ -1026,13 +1025,13 @@ func (s *GatewayService) buildOAuthMetadataUserID(parsed *ParsedRequest, account
 		sessionID = generateSessionUUID(seed)
 	}

-	// Prefer the newer format that includes account_uuid (if present),
-	// otherwise fall back to the legacy Claude Code format.
-	accountUUID := strings.TrimSpace(account.GetExtraString("account_uuid"))
-	if accountUUID != "" {
-		return fmt.Sprintf("user_%s_account_%s_session_%s", userID, accountUUID, sessionID)
+	// 根据指纹 UA 版本选择输出格式
+	var uaVersion string
+	if fp != nil {
+		uaVersion = ExtractCLIVersion(fp.UserAgent)
 	}
-	return fmt.Sprintf("user_%s_account__session_%s", userID, sessionID)
+	accountUUID := strings.TrimSpace(account.GetExtraString("account_uuid"))
+	return FormatMetadataUserID(userID, accountUUID, sessionID, uaVersion)
 }

 // GenerateSessionUUID creates a deterministic UUID4 from a seed string.
@@ -5533,7 +5532,7 @@ func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Contex
 			// 如果启用了会话ID伪装，会在重写后替换 session 部分为固定值
 			accountUUID := account.GetExtraString("account_uuid")
 			if accountUUID != "" && fp.ClientID != "" {
-				if newBody, err := s.identityService.RewriteUserIDWithMasking(ctx, body, account, accountUUID, fp.ClientID); err == nil && len(newBody) > 0 {
+				if newBody, err := s.identityService.RewriteUserIDWithMasking(ctx, body, account, accountUUID, fp.ClientID, fp.UserAgent); err == nil && len(newBody) > 0 {
 					body = newBody
 				}
 			}
@@ -7130,6 +7129,8 @@ type RecordUsageInput struct {
 	User               *User
 	Account            *Account
 	Subscription       *UserSubscription  // 可选：订阅信息
+	InboundEndpoint    string             // 入站端点（客户端请求路径）
+	UpstreamEndpoint   string             // 上游端点（标准化后的上游路径）
 	UserAgent          string             // 请求的 User-Agent
 	IPAddress          string             // 请求的客户端 IP 地址
 	RequestPayloadHash string             // 请求体语义哈希，用于降低 request_id 误复用时的静默误去重风险
@@ -7528,6 +7529,8 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
 		RequestID:             requestID,
 		Model:                 result.Model,
 		ReasoningEffort:       result.ReasoningEffort,
+		InboundEndpoint:       optionalTrimmedStringPtr(input.InboundEndpoint),
+		UpstreamEndpoint:      optionalTrimmedStringPtr(input.UpstreamEndpoint),
 		InputTokens:           result.Usage.InputTokens,
 		OutputTokens:          result.Usage.OutputTokens,
 		CacheCreationTokens:   result.Usage.CacheCreationInputTokens,
@@ -7608,6 +7611,8 @@ type RecordUsageLongContextInput struct {
 	User                  *User
 	Account               *Account
 	Subscription          *UserSubscription  // 可选：订阅信息
+	InboundEndpoint       string             // 入站端点（客户端请求路径）
+	UpstreamEndpoint      string             // 上游端点（标准化后的上游路径）
 	UserAgent             string             // 请求的 User-Agent
 	IPAddress             string             // 请求的客户端 IP 地址
 	RequestPayloadHash    string             // 请求体语义哈希，用于降低 request_id 误复用时的静默误去重风险
@@ -7705,6 +7710,8 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input *
 		RequestID:             requestID,
 		Model:                 result.Model,
 		ReasoningEffort:       result.ReasoningEffort,
+		InboundEndpoint:       optionalTrimmedStringPtr(input.InboundEndpoint),
+		UpstreamEndpoint:      optionalTrimmedStringPtr(input.UpstreamEndpoint),
 		InputTokens:           result.Usage.InputTokens,
 		OutputTokens:          result.Usage.OutputTokens,
 		CacheCreationTokens:   result.Usage.CacheCreationInputTokens,
@@ -8153,7 +8160,7 @@ func (s *GatewayService) buildCountTokensRequest(ctx context.Context, c *gin.Con
 		if err == nil {
 			accountUUID := account.GetExtraString("account_uuid")
 			if accountUUID != "" && fp.ClientID != "" {
-				if newBody, err := s.identityService.RewriteUserIDWithMasking(ctx, body, account, accountUUID, fp.ClientID); err == nil && len(newBody) > 0 {
+				if newBody, err := s.identityService.RewriteUserIDWithMasking(ctx, body, account, accountUUID, fp.ClientID, fp.UserAgent); err == nil && len(newBody) > 0 {
 					body = newBody
 				}
 			}
--- a/backend/internal/service/gemini_multiplatform_test.go
+++ b/backend/internal/service/gemini_multiplatform_test.go
@@ -230,8 +230,8 @@ func (m *mockGroupRepoForGemini) ListActiveByPlatform(ctx context.Context, platf
 func (m *mockGroupRepoForGemini) ExistsByName(ctx context.Context, name string) (bool, error) {
 	return false, nil
 }
-func (m *mockGroupRepoForGemini) GetAccountCount(ctx context.Context, groupID int64) (int64, error) {
-	return 0, nil
+func (m *mockGroupRepoForGemini) GetAccountCount(ctx context.Context, groupID int64) (int64, int64, error) {
+	return 0, 0, nil
 }
 func (m *mockGroupRepoForGemini) DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error) {
 	return 0, nil
--- a/backend/internal/service/gemini_token_provider.go
+++ b/backend/internal/service/gemini_token_provider.go
@@ -15,10 +15,14 @@ const (
 	geminiTokenCacheSkew   = 5 * time.Minute
 )

+// GeminiTokenProvider manages access_token for Gemini OAuth accounts.
 type GeminiTokenProvider struct {
 	accountRepo        AccountRepository
 	tokenCache         GeminiTokenCache
 	geminiOAuthService *GeminiOAuthService
+	refreshAPI         *OAuthRefreshAPI
+	executor           OAuthRefreshExecutor
+	refreshPolicy      ProviderRefreshPolicy
 }

 func NewGeminiTokenProvider(
@@ -30,9 +34,21 @@ func NewGeminiTokenProvider(
 		accountRepo:        accountRepo,
 		tokenCache:         tokenCache,
 		geminiOAuthService: geminiOAuthService,
+		refreshPolicy:      GeminiProviderRefreshPolicy(),
 	}
 }

+// SetRefreshAPI injects unified OAuth refresh API and executor.
+func (p *GeminiTokenProvider) SetRefreshAPI(api *OAuthRefreshAPI, executor OAuthRefreshExecutor) {
+	p.refreshAPI = api
+	p.executor = executor
+}
+
+// SetRefreshPolicy injects caller-side refresh policy.
+func (p *GeminiTokenProvider) SetRefreshPolicy(policy ProviderRefreshPolicy) {
+	p.refreshPolicy = policy
+}
+
 func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Account) (string, error) {
 	if account == nil {
 		return "", errors.New("account is nil")
@@ -53,39 +69,31 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	// 2) Refresh if needed (pre-expiry skew).
 	expiresAt := account.GetCredentialAsTime("expires_at")
 	needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= geminiTokenRefreshSkew
-	if needsRefresh && p.tokenCache != nil {
-		locked, err := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
-		if err == nil && locked {
-			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()

-			// Re-check after lock (another worker may have refreshed).
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
+	if needsRefresh && p.refreshAPI != nil && p.executor != nil {
+		result, err := p.refreshAPI.RefreshIfNeeded(ctx, account, p.executor, geminiTokenRefreshSkew)
+		if err != nil {
+			if p.refreshPolicy.OnRefreshError == ProviderRefreshErrorReturn {
+				return "", err
 			}
-
-			fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-			if err == nil && fresh != nil {
-				account = fresh
+		} else if result.LockHeld {
+			if p.refreshPolicy.OnLockHeld == ProviderLockHeldWaitForCache && p.tokenCache != nil {
+				if token, cacheErr := p.tokenCache.GetAccessToken(ctx, cacheKey); cacheErr == nil && strings.TrimSpace(token) != "" {
+					return token, nil
+				}
 			}
+			slog.Debug("gemini_token_lock_held_use_old", "account_id", account.ID)
+		} else {
+			account = result.Account
 			expiresAt = account.GetCredentialAsTime("expires_at")
-			if expiresAt == nil || time.Until(*expiresAt) <= geminiTokenRefreshSkew {
-				if p.geminiOAuthService == nil {
-					return "", errors.New("gemini oauth service not configured")
-				}
-				tokenInfo, err := p.geminiOAuthService.RefreshAccountToken(ctx, account)
-				if err != nil {
-					return "", err
-				}
-				newCredentials := p.geminiOAuthService.BuildAccountCredentials(tokenInfo)
-				for k, v := range account.Credentials {
-					if _, exists := newCredentials[k]; !exists {
-						newCredentials[k] = v
-					}
-				}
-				account.Credentials = newCredentials
-				_ = p.accountRepo.Update(ctx, account)
-				expiresAt = account.GetCredentialAsTime("expires_at")
-			}
+		}
+	} else if needsRefresh && p.tokenCache != nil {
+		// Backward-compatible test path when refreshAPI is not injected.
+		locked, lockErr := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
+		if lockErr == nil && locked {
+			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
+		} else if lockErr != nil {
+			slog.Warn("gemini_token_lock_failed", "account_id", account.ID, "error", lockErr)
 		}
 	}

@@ -95,15 +103,14 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	}

 	// project_id is optional now:
-	// - If present: will use Code Assist API (requires project_id)
-	// - If absent: will use AI Studio API with OAuth token (like regular API key mode)
-	// Auto-detect project_id only if explicitly enabled via a credential flag
+	// - If present: use Code Assist API (requires project_id)
+	// - If absent: use AI Studio API with OAuth token.
 	projectID := strings.TrimSpace(account.GetCredential("project_id"))
 	autoDetectProjectID := account.GetCredential("auto_detect_project_id") == "true"

 	if projectID == "" && autoDetectProjectID {
 		if p.geminiOAuthService == nil {
-			return accessToken, nil // Fallback to AI Studio API mode
+			return accessToken, nil
 		}

 		var proxyURL string
@@ -132,17 +139,15 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 		}
 	}

-	// 3) Populate cache with TTL（验证版本后再写入，避免异步刷新任务与请求线程的竞态条件）
+	// 3) Populate cache with TTL.
 	if p.tokenCache != nil {
 		latestAccount, isStale := CheckTokenVersion(ctx, account, p.accountRepo)
 		if isStale && latestAccount != nil {
-			// 版本过时，使用 DB 中的最新 token
 			slog.Debug("gemini_token_version_stale_use_latest", "account_id", account.ID)
 			accessToken = latestAccount.GetCredential("access_token")
 			if strings.TrimSpace(accessToken) == "" {
 				return "", errors.New("access_token not found after version check")
 			}
-			// 不写入缓存，让下次请求重新处理
 		} else {
 			ttl := 30 * time.Minute
 			if expiresAt != nil {
--- a/backend/internal/service/gemini_token_refresher.go
+++ b/backend/internal/service/gemini_token_refresher.go
@@ -13,6 +13,11 @@ func NewGeminiTokenRefresher(geminiOAuthService *GeminiOAuthService) *GeminiToke
 	return &GeminiTokenRefresher{geminiOAuthService: geminiOAuthService}
 }

+// CacheKey 返回用于分布式锁的缓存键
+func (r *GeminiTokenRefresher) CacheKey(account *Account) string {
+	return GeminiTokenCacheKey(account)
+}
+
 func (r *GeminiTokenRefresher) CanRefresh(account *Account) bool {
 	return account.Platform == PlatformGemini && account.Type == AccountTypeOAuth
 }
@@ -35,11 +40,7 @@ func (r *GeminiTokenRefresher) Refresh(ctx context.Context, account *Account) (m
 	}

 	newCredentials := r.geminiOAuthService.BuildAccountCredentials(tokenInfo)
-	for k, v := range account.Credentials {
-		if _, exists := newCredentials[k]; !exists {
-			newCredentials[k] = v
-		}
-	}
+	newCredentials = MergeCredentials(account.Credentials, newCredentials)

 	return newCredentials, nil
 }
--- a/backend/internal/service/generate_session_hash_test.go
+++ b/backend/internal/service/generate_session_hash_test.go
@@ -24,7 +24,7 @@ func TestGenerateSessionHash_MetadataHasHighestPriority(t *testing.T) {
 	svc := &GatewayService{}

 	parsed := &ParsedRequest{
-		MetadataUserID: "session_123e4567-e89b-12d3-a456-426614174000",
+		MetadataUserID: "user_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2_account__session_123e4567-e89b-12d3-a456-426614174000",
 		System:         "You are a helpful assistant.",
 		HasSystem:      true,
 		Messages: []any{
@@ -196,7 +196,7 @@ func TestGenerateSessionHash_MetadataOverridesSessionContext(t *testing.T) {
 	svc := &GatewayService{}

 	parsed := &ParsedRequest{
-		MetadataUserID: "session_123e4567-e89b-12d3-a456-426614174000",
+		MetadataUserID: "user_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2_account__session_123e4567-e89b-12d3-a456-426614174000",
 		Messages: []any{
 			map[string]any{"role": "user", "content": "hello"},
 		},
@@ -212,6 +212,22 @@ func TestGenerateSessionHash_MetadataOverridesSessionContext(t *testing.T) {
 		"metadata session_id should take priority over SessionContext")
 }

+func TestGenerateSessionHash_MetadataJSON_HasHighestPriority(t *testing.T) {
+	svc := &GatewayService{}
+
+	parsed := &ParsedRequest{
+		MetadataUserID: `{"device_id":"a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2","account_uuid":"","session_id":"c72554f2-1234-5678-abcd-123456789abc"}`,
+		System:         "You are a helpful assistant.",
+		HasSystem:      true,
+		Messages: []any{
+			map[string]any{"role": "user", "content": "hello"},
+		},
+	}
+
+	hash := svc.GenerateSessionHash(parsed)
+	require.Equal(t, "c72554f2-1234-5678-abcd-123456789abc", hash, "JSON format metadata session_id should have highest priority")
+}
+
 func TestGenerateSessionHash_NilSessionContextBackwardCompatible(t *testing.T) {
 	svc := &GatewayService{}

--- a/backend/internal/service/group.go
+++ b/backend/internal/service/group.go
@@ -64,8 +64,10 @@ type Group struct {
 	CreatedAt time.Time
 	UpdatedAt time.Time

-	AccountGroups []AccountGroup
-	AccountCount  int64
+	AccountGroups           []AccountGroup
+	AccountCount            int64
+	ActiveAccountCount      int64
+	RateLimitedAccountCount int64
 }

 func (g *Group) IsActive() bool {
--- a/backend/internal/service/group_capacity_service.go
+++ b/backend/internal/service/group_capacity_service.go
@@ -0,0 +1,131 @@
+package service
+
+import (
+	"context"
+	"time"
+)
+
+// GroupCapacitySummary holds aggregated capacity for a single group.
+type GroupCapacitySummary struct {
+	GroupID         int64 `json:"group_id"`
+	ConcurrencyUsed int   `json:"concurrency_used"`
+	ConcurrencyMax  int   `json:"concurrency_max"`
+	SessionsUsed    int   `json:"sessions_used"`
+	SessionsMax     int   `json:"sessions_max"`
+	RPMUsed         int   `json:"rpm_used"`
+	RPMMax          int   `json:"rpm_max"`
+}
+
+// GroupCapacityService aggregates per-group capacity from runtime data.
+type GroupCapacityService struct {
+	accountRepo        AccountRepository
+	groupRepo          GroupRepository
+	concurrencyService *ConcurrencyService
+	sessionLimitCache  SessionLimitCache
+	rpmCache           RPMCache
+}
+
+// NewGroupCapacityService creates a new GroupCapacityService.
+func NewGroupCapacityService(
+	accountRepo AccountRepository,
+	groupRepo GroupRepository,
+	concurrencyService *ConcurrencyService,
+	sessionLimitCache SessionLimitCache,
+	rpmCache RPMCache,
+) *GroupCapacityService {
+	return &GroupCapacityService{
+		accountRepo:        accountRepo,
+		groupRepo:          groupRepo,
+		concurrencyService: concurrencyService,
+		sessionLimitCache:  sessionLimitCache,
+		rpmCache:           rpmCache,
+	}
+}
+
+// GetAllGroupCapacity returns capacity summary for all active groups.
+func (s *GroupCapacityService) GetAllGroupCapacity(ctx context.Context) ([]GroupCapacitySummary, error) {
+	groups, err := s.groupRepo.ListActive(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	results := make([]GroupCapacitySummary, 0, len(groups))
+	for i := range groups {
+		cap, err := s.getGroupCapacity(ctx, groups[i].ID)
+		if err != nil {
+			// Skip groups with errors, return partial results
+			continue
+		}
+		cap.GroupID = groups[i].ID
+		results = append(results, cap)
+	}
+	return results, nil
+}
+
+func (s *GroupCapacityService) getGroupCapacity(ctx context.Context, groupID int64) (GroupCapacitySummary, error) {
+	accounts, err := s.accountRepo.ListSchedulableByGroupID(ctx, groupID)
+	if err != nil {
+		return GroupCapacitySummary{}, err
+	}
+	if len(accounts) == 0 {
+		return GroupCapacitySummary{}, nil
+	}
+
+	// Collect account IDs and config values
+	accountIDs := make([]int64, 0, len(accounts))
+	sessionTimeouts := make(map[int64]time.Duration)
+	var concurrencyMax, sessionsMax, rpmMax int
+
+	for i := range accounts {
+		acc := &accounts[i]
+		accountIDs = append(accountIDs, acc.ID)
+		concurrencyMax += acc.Concurrency
+
+		if ms := acc.GetMaxSessions(); ms > 0 {
+			sessionsMax += ms
+			timeout := time.Duration(acc.GetSessionIdleTimeoutMinutes()) * time.Minute
+			if timeout <= 0 {
+				timeout = 5 * time.Minute
+			}
+			sessionTimeouts[acc.ID] = timeout
+		}
+
+		if rpm := acc.GetBaseRPM(); rpm > 0 {
+			rpmMax += rpm
+		}
+	}
+
+	// Batch query runtime data from Redis
+	concurrencyMap, _ := s.concurrencyService.GetAccountConcurrencyBatch(ctx, accountIDs)
+
+	var sessionsMap map[int64]int
+	if sessionsMax > 0 && s.sessionLimitCache != nil {
+		sessionsMap, _ = s.sessionLimitCache.GetActiveSessionCountBatch(ctx, accountIDs, sessionTimeouts)
+	}
+
+	var rpmMap map[int64]int
+	if rpmMax > 0 && s.rpmCache != nil {
+		rpmMap, _ = s.rpmCache.GetRPMBatch(ctx, accountIDs)
+	}
+
+	// Aggregate
+	var concurrencyUsed, sessionsUsed, rpmUsed int
+	for _, id := range accountIDs {
+		concurrencyUsed += concurrencyMap[id]
+		if sessionsMap != nil {
+			sessionsUsed += sessionsMap[id]
+		}
+		if rpmMap != nil {
+			rpmUsed += rpmMap[id]
+		}
+	}
+
+	return GroupCapacitySummary{
+		ConcurrencyUsed: concurrencyUsed,
+		ConcurrencyMax:  concurrencyMax,
+		SessionsUsed:    sessionsUsed,
+		SessionsMax:     sessionsMax,
+		RPMUsed:         rpmUsed,
+		RPMMax:          rpmMax,
+	}, nil
+}
--- a/backend/internal/service/group_service.go
+++ b/backend/internal/service/group_service.go
@@ -27,7 +27,7 @@ type GroupRepository interface {
 	ListActiveByPlatform(ctx context.Context, platform string) ([]Group, error)

 	ExistsByName(ctx context.Context, name string) (bool, error)
-	GetAccountCount(ctx context.Context, groupID int64) (int64, error)
+	GetAccountCount(ctx context.Context, groupID int64) (total int64, active int64, err error)
 	DeleteAccountGroupsByGroupID(ctx context.Context, groupID int64) (int64, error)
 	// GetAccountIDsByGroupIDs 获取多个分组的所有账号 ID（去重）
 	GetAccountIDsByGroupIDs(ctx context.Context, groupIDs []int64) ([]int64, error)
@@ -202,7 +202,7 @@ func (s *GroupService) GetStats(ctx context.Context, id int64) (map[string]any,
 	}

 	// 获取账号数量
-	accountCount, err := s.groupRepo.GetAccountCount(ctx, id)
+	accountCount, _, err := s.groupRepo.GetAccountCount(ctx, id)
 	if err != nil {
 		return nil, fmt.Errorf("get account count: %w", err)
 	}
--- a/backend/internal/service/identity_service.go
+++ b/backend/internal/service/identity_service.go
@@ -19,10 +19,6 @@ import (

 // 预编译正则表达式（避免每次调用重新编译）
 var (
-	// 匹配 user_id 格式:
-	//   旧格式: user_{64位hex}_account__session_{uuid}        (account 后无 UUID)
-	//   新格式: user_{64位hex}_account_{uuid}_session_{uuid}  (account 后有 UUID)
-	userIDRegex = regexp.MustCompile(`^user_[a-f0-9]{64}_account_([a-f0-9-]*)_session_([a-f0-9-]{36})$`)
 	// 匹配 User-Agent 版本号: xxx/x.y.z
 	userAgentVersionRegex = regexp.MustCompile(`/(\d+)\.(\d+)\.(\d+)`)
 )
@@ -209,12 +205,12 @@ func (s *IdentityService) ApplyFingerprint(req *http.Request, fp *Fingerprint) {
 }

 // RewriteUserID 重写body中的metadata.user_id
-// 输入格式：user_{clientId}_account__session_{sessionUUID}
-// 输出格式：user_{cachedClientID}_account_{accountUUID}_session_{newHash}
+// 支持旧拼接格式和新 JSON 格式的 user_id 解析，
+// 根据 fingerprintUA 版本选择输出格式。
 //
 // 重要：此函数使用 json.RawMessage 保留其他字段的原始字节，
 // 避免重新序列化导致 thinking 块等内容被修改。
-func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUID, cachedClientID string) ([]byte, error) {
+func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUID, cachedClientID, fingerprintUA string) ([]byte, error) {
 	if len(body) == 0 || accountUUID == "" || cachedClientID == "" {
 		return body, nil
 	}
@@ -241,24 +237,21 @@ func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUI
 		return body, nil
 	}

-	// 匹配格式:
-	//   旧格式: user_{64位hex}_account__session_{uuid}
-	//   新格式: user_{64位hex}_account_{uuid}_session_{uuid}
-	matches := userIDRegex.FindStringSubmatch(userID)
-	if matches == nil {
+	// 解析 user_id（兼容旧拼接格式和新 JSON 格式）
+	parsed := ParseMetadataUserID(userID)
+	if parsed == nil {
 		return body, nil
 	}

-	// matches[1] = account UUID (可能为空), matches[2] = session UUID
-	sessionTail := matches[2] // 原始session UUID
+	sessionTail := parsed.SessionID // 原始session UUID

 	// 生成新的session hash: SHA256(accountID::sessionTail) -> UUID格式
 	seed := fmt.Sprintf("%d::%s", accountID, sessionTail)
 	newSessionHash := generateUUIDFromSeed(seed)

-	// 构建新的user_id
-	// 格式: user_{cachedClientID}_account_{account_uuid}_session_{newSessionHash}
-	newUserID := fmt.Sprintf("user_%s_account_%s_session_%s", cachedClientID, accountUUID, newSessionHash)
+	// 根据客户端版本选择输出格式
+	version := ExtractCLIVersion(fingerprintUA)
+	newUserID := FormatMetadataUserID(cachedClientID, accountUUID, newSessionHash, version)

 	metadata["user_id"] = newUserID

@@ -278,9 +271,9 @@ func (s *IdentityService) RewriteUserID(body []byte, accountID int64, accountUUI
 //
 // 重要：此函数使用 json.RawMessage 保留其他字段的原始字节，
 // 避免重新序列化导致 thinking 块等内容被修改。
-func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []byte, account *Account, accountUUID, cachedClientID string) ([]byte, error) {
+func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []byte, account *Account, accountUUID, cachedClientID, fingerprintUA string) ([]byte, error) {
 	// 先执行常规的 RewriteUserID 逻辑
-	newBody, err := s.RewriteUserID(body, account.ID, accountUUID, cachedClientID)
+	newBody, err := s.RewriteUserID(body, account.ID, accountUUID, cachedClientID, fingerprintUA)
 	if err != nil {
 		return newBody, err
 	}
@@ -312,10 +305,9 @@ func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []b
 		return newBody, nil
 	}

-	// 查找 _session_ 的位置，替换其后的内容
-	const sessionMarker = "_session_"
-	idx := strings.LastIndex(userID, sessionMarker)
-	if idx == -1 {
+	// 解析已重写的 user_id
+	uidParsed := ParseMetadataUserID(userID)
+	if uidParsed == nil {
 		return newBody, nil
 	}

@@ -337,8 +329,9 @@ func (s *IdentityService) RewriteUserIDWithMasking(ctx context.Context, body []b
 		logger.LegacyPrintf("service.identity", "Warning: failed to set masked session ID for account %d: %v", account.ID, err)
 	}

-	// 替换 session 部分：保留 _session_ 之前的内容，替换之后的内容
-	newUserID := userID[:idx+len(sessionMarker)] + maskedSessionID
+	// 用 FormatMetadataUserID 重建（保持与 RewriteUserID 相同的格式）
+	version := ExtractCLIVersion(fingerprintUA)
+	newUserID := FormatMetadataUserID(uidParsed.DeviceID, uidParsed.AccountUUID, maskedSessionID, version)

 	slog.Debug("session_id_masking_applied",
 		"account_id", account.ID,
--- a/backend/internal/service/metadata_userid.go
+++ b/backend/internal/service/metadata_userid.go
@@ -0,0 +1,104 @@
+package service
+
+import (
+	"encoding/json"
+	"regexp"
+	"strings"
+)
+
+// NewMetadataFormatMinVersion is the minimum Claude Code version that uses
+// JSON-formatted metadata.user_id instead of the legacy concatenated string.
+const NewMetadataFormatMinVersion = "2.1.78"
+
+// ParsedUserID represents the components extracted from a metadata.user_id value.
+type ParsedUserID struct {
+	DeviceID    string // 64-char hex (or arbitrary client id)
+	AccountUUID string // may be empty
+	SessionID   string // UUID
+	IsNewFormat bool   // true if the original was JSON format
+}
+
+// legacyUserIDRegex matches the legacy user_id format:
+//
+//	user_{64hex}_account_{optional_uuid}_session_{uuid}
+var legacyUserIDRegex = regexp.MustCompile(`^user_([a-fA-F0-9]{64})_account_([a-fA-F0-9-]*)_session_([a-fA-F0-9-]{36})$`)
+
+// jsonUserID is the JSON structure for the new metadata.user_id format.
+type jsonUserID struct {
+	DeviceID    string `json:"device_id"`
+	AccountUUID string `json:"account_uuid"`
+	SessionID   string `json:"session_id"`
+}
+
+// ParseMetadataUserID parses a metadata.user_id string in either format.
+// Returns nil if the input cannot be parsed.
+func ParseMetadataUserID(raw string) *ParsedUserID {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return nil
+	}
+
+	// Try JSON format first (starts with '{')
+	if raw[0] == '{' {
+		var j jsonUserID
+		if err := json.Unmarshal([]byte(raw), &j); err != nil {
+			return nil
+		}
+		if j.DeviceID == "" || j.SessionID == "" {
+			return nil
+		}
+		return &ParsedUserID{
+			DeviceID:    j.DeviceID,
+			AccountUUID: j.AccountUUID,
+			SessionID:   j.SessionID,
+			IsNewFormat: true,
+		}
+	}
+
+	// Try legacy format
+	matches := legacyUserIDRegex.FindStringSubmatch(raw)
+	if matches == nil {
+		return nil
+	}
+	return &ParsedUserID{
+		DeviceID:    matches[1],
+		AccountUUID: matches[2],
+		SessionID:   matches[3],
+		IsNewFormat: false,
+	}
+}
+
+// FormatMetadataUserID builds a metadata.user_id string in the format
+// appropriate for the given CLI version. Components are the rewritten values
+// (not necessarily the originals).
+func FormatMetadataUserID(deviceID, accountUUID, sessionID, uaVersion string) string {
+	if IsNewMetadataFormatVersion(uaVersion) {
+		b, _ := json.Marshal(jsonUserID{
+			DeviceID:    deviceID,
+			AccountUUID: accountUUID,
+			SessionID:   sessionID,
+		})
+		return string(b)
+	}
+	// Legacy format
+	return "user_" + deviceID + "_account_" + accountUUID + "_session_" + sessionID
+}
+
+// IsNewMetadataFormatVersion returns true if the given CLI version uses the
+// new JSON metadata.user_id format (>= 2.1.78).
+func IsNewMetadataFormatVersion(version string) bool {
+	if version == "" {
+		return false
+	}
+	return CompareVersions(version, NewMetadataFormatMinVersion) >= 0
+}
+
+// ExtractCLIVersion extracts the Claude Code version from a User-Agent string.
+// Returns "" if the UA doesn't match the expected pattern.
+func ExtractCLIVersion(ua string) string {
+	matches := claudeCodeUAVersionPattern.FindStringSubmatch(ua)
+	if len(matches) >= 2 {
+		return matches[1]
+	}
+	return ""
+}
--- a/backend/internal/service/metadata_userid_test.go
+++ b/backend/internal/service/metadata_userid_test.go
@@ -0,0 +1,183 @@
+//go:build unit
+
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+// ============ ParseMetadataUserID Tests ============
+
+func TestParseMetadataUserID_LegacyFormat_WithoutAccountUUID(t *testing.T) {
+	raw := "user_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2_account__session_123e4567-e89b-12d3-a456-426614174000"
+	parsed := ParseMetadataUserID(raw)
+	require.NotNil(t, parsed)
+	require.Equal(t, "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", parsed.DeviceID)
+	require.Equal(t, "", parsed.AccountUUID)
+	require.Equal(t, "123e4567-e89b-12d3-a456-426614174000", parsed.SessionID)
+	require.False(t, parsed.IsNewFormat)
+}
+
+func TestParseMetadataUserID_LegacyFormat_WithAccountUUID(t *testing.T) {
+	raw := "user_a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2_account_550e8400-e29b-41d4-a716-446655440000_session_123e4567-e89b-12d3-a456-426614174000"
+	parsed := ParseMetadataUserID(raw)
+	require.NotNil(t, parsed)
+	require.Equal(t, "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", parsed.DeviceID)
+	require.Equal(t, "550e8400-e29b-41d4-a716-446655440000", parsed.AccountUUID)
+	require.Equal(t, "123e4567-e89b-12d3-a456-426614174000", parsed.SessionID)
+	require.False(t, parsed.IsNewFormat)
+}
+
+func TestParseMetadataUserID_JSONFormat_WithoutAccountUUID(t *testing.T) {
+	raw := `{"device_id":"d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677","account_uuid":"","session_id":"c72554f2-1234-5678-abcd-123456789abc"}`
+	parsed := ParseMetadataUserID(raw)
+	require.NotNil(t, parsed)
+	require.Equal(t, "d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677", parsed.DeviceID)
+	require.Equal(t, "", parsed.AccountUUID)
+	require.Equal(t, "c72554f2-1234-5678-abcd-123456789abc", parsed.SessionID)
+	require.True(t, parsed.IsNewFormat)
+}
+
+func TestParseMetadataUserID_JSONFormat_WithAccountUUID(t *testing.T) {
+	raw := `{"device_id":"d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677","account_uuid":"550e8400-e29b-41d4-a716-446655440000","session_id":"c72554f2-1234-5678-abcd-123456789abc"}`
+	parsed := ParseMetadataUserID(raw)
+	require.NotNil(t, parsed)
+	require.Equal(t, "d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677", parsed.DeviceID)
+	require.Equal(t, "550e8400-e29b-41d4-a716-446655440000", parsed.AccountUUID)
+	require.Equal(t, "c72554f2-1234-5678-abcd-123456789abc", parsed.SessionID)
+	require.True(t, parsed.IsNewFormat)
+}
+
+func TestParseMetadataUserID_InvalidInputs(t *testing.T) {
+	tests := []struct {
+		name string
+		raw  string
+	}{
+		{"empty string", ""},
+		{"whitespace only", "   "},
+		{"random text", "not-a-valid-user-id"},
+		{"partial legacy format", "session_123e4567-e89b-12d3-a456-426614174000"},
+		{"invalid JSON", `{"device_id":}`},
+		{"JSON missing device_id", `{"account_uuid":"","session_id":"c72554f2-1234-5678-abcd-123456789abc"}`},
+		{"JSON missing session_id", `{"device_id":"d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677","account_uuid":""}`},
+		{"JSON empty device_id", `{"device_id":"","account_uuid":"","session_id":"c72554f2-1234-5678-abcd-123456789abc"}`},
+		{"JSON empty session_id", `{"device_id":"d61f76d0aabbccdd00112233445566778899aabbccddeeff0011223344556677","account_uuid":"","session_id":""}`},
+		{"legacy format short hex", "user_a1b2c3d4_account__session_123e4567-e89b-12d3-a456-426614174000"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			require.Nil(t, ParseMetadataUserID(tt.raw), "should return nil for: %s", tt.raw)
+		})
+	}
+}
+
+func TestParseMetadataUserID_HexCaseInsensitive(t *testing.T) {
+	// Legacy format should accept both upper and lower case hex
+	rawUpper := "user_A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2_account__session_123e4567-e89b-12d3-a456-426614174000"
+	parsed := ParseMetadataUserID(rawUpper)
+	require.NotNil(t, parsed, "legacy format should accept uppercase hex")
+	require.Equal(t, "A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4E5F6A1B2", parsed.DeviceID)
+}
+
+// ============ FormatMetadataUserID Tests ============
+
+func TestFormatMetadataUserID_LegacyVersion(t *testing.T) {
+	result := FormatMetadataUserID("deadbeef"+"00112233445566778899aabbccddeeff0011223344556677", "acc-uuid", "sess-uuid", "2.1.77")
+	require.Equal(t, "user_deadbeef00112233445566778899aabbccddeeff0011223344556677_account_acc-uuid_session_sess-uuid", result)
+}
+
+func TestFormatMetadataUserID_NewVersion(t *testing.T) {
+	result := FormatMetadataUserID("deadbeef"+"00112233445566778899aabbccddeeff0011223344556677", "acc-uuid", "sess-uuid", "2.1.78")
+	require.Equal(t, `{"device_id":"deadbeef00112233445566778899aabbccddeeff0011223344556677","account_uuid":"acc-uuid","session_id":"sess-uuid"}`, result)
+}
+
+func TestFormatMetadataUserID_EmptyVersion_Legacy(t *testing.T) {
+	result := FormatMetadataUserID("deadbeef"+"00112233445566778899aabbccddeeff0011223344556677", "", "sess-uuid", "")
+	require.Equal(t, "user_deadbeef00112233445566778899aabbccddeeff0011223344556677_account__session_sess-uuid", result)
+}
+
+func TestFormatMetadataUserID_EmptyAccountUUID(t *testing.T) {
+	// Legacy format with empty account UUID → double underscore
+	result := FormatMetadataUserID("deadbeef"+"00112233445566778899aabbccddeeff0011223344556677", "", "sess-uuid", "2.1.22")
+	require.Contains(t, result, "_account__session_")
+
+	// New format with empty account UUID → empty string in JSON
+	result = FormatMetadataUserID("deadbeef"+"00112233445566778899aabbccddeeff0011223344556677", "", "sess-uuid", "2.1.78")
+	require.Contains(t, result, `"account_uuid":""`)
+}
+
+// ============ IsNewMetadataFormatVersion Tests ============
+
+func TestIsNewMetadataFormatVersion(t *testing.T) {
+	tests := []struct {
+		version string
+		want    bool
+	}{
+		{"", false},
+		{"2.1.77", false},
+		{"2.1.78", true},
+		{"2.1.79", true},
+		{"2.2.0", true},
+		{"3.0.0", true},
+		{"2.0.100", false},
+		{"1.9.99", false},
+	}
+	for _, tt := range tests {
+		t.Run(tt.version, func(t *testing.T) {
+			require.Equal(t, tt.want, IsNewMetadataFormatVersion(tt.version))
+		})
+	}
+}
+
+// ============ Round-trip Tests ============
+
+func TestParseFormat_RoundTrip_Legacy(t *testing.T) {
+	deviceID := "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"
+	accountUUID := "550e8400-e29b-41d4-a716-446655440000"
+	sessionID := "123e4567-e89b-12d3-a456-426614174000"
+
+	formatted := FormatMetadataUserID(deviceID, accountUUID, sessionID, "2.1.22")
+	parsed := ParseMetadataUserID(formatted)
+	require.NotNil(t, parsed)
+	require.Equal(t, deviceID, parsed.DeviceID)
+	require.Equal(t, accountUUID, parsed.AccountUUID)
+	require.Equal(t, sessionID, parsed.SessionID)
+	require.False(t, parsed.IsNewFormat)
+}
+
+func TestParseFormat_RoundTrip_JSON(t *testing.T) {
+	deviceID := "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"
+	accountUUID := "550e8400-e29b-41d4-a716-446655440000"
+	sessionID := "123e4567-e89b-12d3-a456-426614174000"
+
+	formatted := FormatMetadataUserID(deviceID, accountUUID, sessionID, "2.1.78")
+	parsed := ParseMetadataUserID(formatted)
+	require.NotNil(t, parsed)
+	require.Equal(t, deviceID, parsed.DeviceID)
+	require.Equal(t, accountUUID, parsed.AccountUUID)
+	require.Equal(t, sessionID, parsed.SessionID)
+	require.True(t, parsed.IsNewFormat)
+}
+
+func TestParseFormat_RoundTrip_EmptyAccountUUID(t *testing.T) {
+	deviceID := "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"
+	sessionID := "123e4567-e89b-12d3-a456-426614174000"
+
+	// Legacy round-trip with empty account UUID
+	formatted := FormatMetadataUserID(deviceID, "", sessionID, "2.1.22")
+	parsed := ParseMetadataUserID(formatted)
+	require.NotNil(t, parsed)
+	require.Equal(t, deviceID, parsed.DeviceID)
+	require.Equal(t, "", parsed.AccountUUID)
+	require.Equal(t, sessionID, parsed.SessionID)
+
+	// JSON round-trip with empty account UUID
+	formatted = FormatMetadataUserID(deviceID, "", sessionID, "2.1.78")
+	parsed = ParseMetadataUserID(formatted)
+	require.NotNil(t, parsed)
+	require.Equal(t, deviceID, parsed.DeviceID)
+	require.Equal(t, "", parsed.AccountUUID)
+	require.Equal(t, sessionID, parsed.SessionID)
+}
--- a/backend/internal/service/oauth_refresh_api.go
+++ b/backend/internal/service/oauth_refresh_api.go
@@ -0,0 +1,159 @@
+package service
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"strconv"
+	"time"
+)
+
+// OAuthRefreshExecutor 各平台实现的 OAuth 刷新执行器
+// TokenRefresher 接口的超集：增加了 CacheKey 方法用于分布式锁
+type OAuthRefreshExecutor interface {
+	TokenRefresher
+
+	// CacheKey 返回用于分布式锁的缓存键（与 TokenProvider 使用的一致）
+	CacheKey(account *Account) string
+}
+
+const refreshLockTTL = 30 * time.Second
+
+// OAuthRefreshResult 统一刷新结果
+type OAuthRefreshResult struct {
+	Refreshed      bool           // 实际执行了刷新
+	NewCredentials map[string]any // 刷新后的 credentials（nil 表示未刷新）
+	Account        *Account       // 从 DB 重新读取的最新 account
+	LockHeld       bool           // 锁被其他 worker 持有（未执行刷新）
+}
+
+// OAuthRefreshAPI 统一的 OAuth Token 刷新入口
+// 封装分布式锁、DB 重读、已刷新检查等通用逻辑
+type OAuthRefreshAPI struct {
+	accountRepo AccountRepository
+	tokenCache  GeminiTokenCache // 可选，nil = 无锁
+}
+
+// NewOAuthRefreshAPI 创建统一刷新 API
+func NewOAuthRefreshAPI(accountRepo AccountRepository, tokenCache GeminiTokenCache) *OAuthRefreshAPI {
+	return &OAuthRefreshAPI{
+		accountRepo: accountRepo,
+		tokenCache:  tokenCache,
+	}
+}
+
+// RefreshIfNeeded 在分布式锁保护下按需刷新 OAuth token
+//
+// 流程:
+//  1. 获取分布式锁
+//  2. 从 DB 重读最新 account（防止使用过时的 refresh_token）
+//  3. 二次检查是否仍需刷新
+//  4. 调用 executor.Refresh() 执行平台特定刷新逻辑
+//  5. 设置 _token_version + 更新 DB
+//  6. 释放锁
+func (api *OAuthRefreshAPI) RefreshIfNeeded(
+	ctx context.Context,
+	account *Account,
+	executor OAuthRefreshExecutor,
+	refreshWindow time.Duration,
+) (*OAuthRefreshResult, error) {
+	cacheKey := executor.CacheKey(account)
+
+	// 1. 获取分布式锁
+	lockAcquired := false
+	if api.tokenCache != nil {
+		acquired, lockErr := api.tokenCache.AcquireRefreshLock(ctx, cacheKey, refreshLockTTL)
+		if lockErr != nil {
+			// Redis 错误，降级为无锁刷新
+			slog.Warn("oauth_refresh_lock_failed_degraded",
+				"account_id", account.ID,
+				"cache_key", cacheKey,
+				"error", lockErr,
+			)
+		} else if !acquired {
+			// 锁被其他 worker 持有
+			return &OAuthRefreshResult{LockHeld: true}, nil
+		} else {
+			lockAcquired = true
+			defer func() { _ = api.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
+		}
+	}
+
+	// 2. 从 DB 重读最新 account（锁保护下，确保使用最新的 refresh_token）
+	freshAccount, err := api.accountRepo.GetByID(ctx, account.ID)
+	if err != nil {
+		slog.Warn("oauth_refresh_db_reread_failed",
+			"account_id", account.ID,
+			"error", err,
+		)
+		// 降级使用传入的 account
+		freshAccount = account
+	} else if freshAccount == nil {
+		freshAccount = account
+	}
+
+	// 3. 二次检查是否仍需刷新（另一条路径可能已刷新）
+	if !executor.NeedsRefresh(freshAccount, refreshWindow) {
+		return &OAuthRefreshResult{
+			Account: freshAccount,
+		}, nil
+	}
+
+	// 4. 执行平台特定刷新逻辑
+	newCredentials, refreshErr := executor.Refresh(ctx, freshAccount)
+	if refreshErr != nil {
+		return nil, refreshErr
+	}
+
+	// 5. 设置版本号 + 更新 DB
+	if newCredentials != nil {
+		newCredentials["_token_version"] = time.Now().UnixMilli()
+		freshAccount.Credentials = newCredentials
+		if updateErr := api.accountRepo.Update(ctx, freshAccount); updateErr != nil {
+			slog.Error("oauth_refresh_update_failed",
+				"account_id", freshAccount.ID,
+				"error", updateErr,
+			)
+			return nil, fmt.Errorf("oauth refresh succeeded but DB update failed: %w", updateErr)
+		}
+	}
+
+	_ = lockAcquired // suppress unused warning when tokenCache is nil
+
+	return &OAuthRefreshResult{
+		Refreshed:      true,
+		NewCredentials: newCredentials,
+		Account:        freshAccount,
+	}, nil
+}
+
+// MergeCredentials 将旧 credentials 中不存在于新 map 的字段保留到新 map 中
+func MergeCredentials(oldCreds, newCreds map[string]any) map[string]any {
+	if newCreds == nil {
+		newCreds = make(map[string]any)
+	}
+	for k, v := range oldCreds {
+		if _, exists := newCreds[k]; !exists {
+			newCreds[k] = v
+		}
+	}
+	return newCreds
+}
+
+// BuildClaudeAccountCredentials 为 Claude 平台构建 OAuth credentials map
+// 消除 Claude 平台没有 BuildAccountCredentials 方法的问题
+func BuildClaudeAccountCredentials(tokenInfo *TokenInfo) map[string]any {
+	creds := map[string]any{
+		"access_token": tokenInfo.AccessToken,
+		"token_type":   tokenInfo.TokenType,
+		"expires_in":   strconv.FormatInt(tokenInfo.ExpiresIn, 10),
+		"expires_at":   strconv.FormatInt(tokenInfo.ExpiresAt, 10),
+	}
+	if tokenInfo.RefreshToken != "" {
+		creds["refresh_token"] = tokenInfo.RefreshToken
+	}
+	if tokenInfo.Scope != "" {
+		creds["scope"] = tokenInfo.Scope
+	}
+	return creds
+}
--- a/backend/internal/service/oauth_refresh_api_test.go
+++ b/backend/internal/service/oauth_refresh_api_test.go
@@ -0,0 +1,395 @@
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+// ---------- mock helpers ----------
+
+// refreshAPIAccountRepo implements AccountRepository for OAuthRefreshAPI tests.
+type refreshAPIAccountRepo struct {
+	mockAccountRepoForGemini
+	account   *Account   // returned by GetByID
+	getByIDErr error
+	updateErr  error
+	updateCalls int
+}
+
+func (r *refreshAPIAccountRepo) GetByID(_ context.Context, _ int64) (*Account, error) {
+	if r.getByIDErr != nil {
+		return nil, r.getByIDErr
+	}
+	return r.account, nil
+}
+
+func (r *refreshAPIAccountRepo) Update(_ context.Context, _ *Account) error {
+	r.updateCalls++
+	return r.updateErr
+}
+
+// refreshAPIExecutorStub implements OAuthRefreshExecutor for tests.
+type refreshAPIExecutorStub struct {
+	needsRefresh bool
+	credentials  map[string]any
+	err          error
+	refreshCalls int
+}
+
+func (e *refreshAPIExecutorStub) CanRefresh(_ *Account) bool { return true }
+
+func (e *refreshAPIExecutorStub) NeedsRefresh(_ *Account, _ time.Duration) bool {
+	return e.needsRefresh
+}
+
+func (e *refreshAPIExecutorStub) Refresh(_ context.Context, _ *Account) (map[string]any, error) {
+	e.refreshCalls++
+	if e.err != nil {
+		return nil, e.err
+	}
+	return e.credentials, nil
+}
+
+func (e *refreshAPIExecutorStub) CacheKey(account *Account) string {
+	return "test:api:" + account.Platform
+}
+
+// refreshAPICacheStub implements GeminiTokenCache for OAuthRefreshAPI tests.
+type refreshAPICacheStub struct {
+	lockResult   bool
+	lockErr      error
+	releaseCalls int
+}
+
+func (c *refreshAPICacheStub) GetAccessToken(context.Context, string) (string, error) {
+	return "", nil
+}
+
+func (c *refreshAPICacheStub) SetAccessToken(context.Context, string, string, time.Duration) error {
+	return nil
+}
+
+func (c *refreshAPICacheStub) DeleteAccessToken(context.Context, string) error { return nil }
+
+func (c *refreshAPICacheStub) AcquireRefreshLock(context.Context, string, time.Duration) (bool, error) {
+	return c.lockResult, c.lockErr
+}
+
+func (c *refreshAPICacheStub) ReleaseRefreshLock(context.Context, string) error {
+	c.releaseCalls++
+	return nil
+}
+
+// ========== RefreshIfNeeded tests ==========
+
+func TestRefreshIfNeeded_Success(t *testing.T) {
+	account := &Account{ID: 1, Platform: PlatformAnthropic, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  map[string]any{"access_token": "new-token"},
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.Refreshed)
+	require.NotNil(t, result.NewCredentials)
+	require.Equal(t, "new-token", result.NewCredentials["access_token"])
+	require.NotNil(t, result.NewCredentials["_token_version"]) // version stamp set
+	require.Equal(t, 1, repo.updateCalls)                      // DB updated
+	require.Equal(t, 1, cache.releaseCalls)                    // lock released
+	require.Equal(t, 1, executor.refreshCalls)
+}
+
+func TestRefreshIfNeeded_LockHeld(t *testing.T) {
+	account := &Account{ID: 2, Platform: PlatformAnthropic}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockResult: false} // lock not acquired
+	executor := &refreshAPIExecutorStub{needsRefresh: true}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.LockHeld)
+	require.False(t, result.Refreshed)
+	require.Equal(t, 0, repo.updateCalls)
+	require.Equal(t, 0, executor.refreshCalls)
+}
+
+func TestRefreshIfNeeded_LockErrorDegrades(t *testing.T) {
+	account := &Account{ID: 3, Platform: PlatformGemini, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockErr: errors.New("redis down")} // lock error
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  map[string]any{"access_token": "degraded-token"},
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.Refreshed)       // still refreshed (degraded mode)
+	require.Equal(t, 1, repo.updateCalls)   // DB updated
+	require.Equal(t, 0, cache.releaseCalls) // no lock to release
+	require.Equal(t, 1, executor.refreshCalls)
+}
+
+func TestRefreshIfNeeded_NoCacheNoLock(t *testing.T) {
+	account := &Account{ID: 4, Platform: PlatformGemini, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{account: account}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  map[string]any{"access_token": "no-cache-token"},
+	}
+
+	api := NewOAuthRefreshAPI(repo, nil) // no cache = no lock
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.Refreshed)
+	require.Equal(t, 1, repo.updateCalls)
+}
+
+func TestRefreshIfNeeded_AlreadyRefreshed(t *testing.T) {
+	account := &Account{ID: 5, Platform: PlatformAnthropic}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{needsRefresh: false} // already refreshed
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.False(t, result.Refreshed)
+	require.False(t, result.LockHeld)
+	require.NotNil(t, result.Account) // returns fresh account
+	require.Equal(t, 0, repo.updateCalls)
+	require.Equal(t, 0, executor.refreshCalls)
+}
+
+func TestRefreshIfNeeded_RefreshError(t *testing.T) {
+	account := &Account{ID: 6, Platform: PlatformAnthropic}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		err:          errors.New("invalid_grant: token revoked"),
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.Error(t, err)
+	require.Nil(t, result)
+	require.Contains(t, err.Error(), "invalid_grant")
+	require.Equal(t, 0, repo.updateCalls) // no DB update on refresh error
+	require.Equal(t, 1, cache.releaseCalls) // lock still released via defer
+}
+
+func TestRefreshIfNeeded_DBUpdateError(t *testing.T) {
+	account := &Account{ID: 7, Platform: PlatformGemini, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{
+		account:   account,
+		updateErr: errors.New("db connection lost"),
+	}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  map[string]any{"access_token": "token"},
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.Error(t, err)
+	require.Nil(t, result)
+	require.Contains(t, err.Error(), "DB update failed")
+	require.Equal(t, 1, repo.updateCalls) // attempted
+}
+
+func TestRefreshIfNeeded_DBRereadFails(t *testing.T) {
+	account := &Account{ID: 8, Platform: PlatformAnthropic, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{
+		account:    nil, // GetByID returns nil
+		getByIDErr: errors.New("db timeout"),
+	}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  map[string]any{"access_token": "fallback-token"},
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.Refreshed)
+	require.Equal(t, 1, executor.refreshCalls) // still refreshes using passed-in account
+}
+
+func TestRefreshIfNeeded_NilCredentials(t *testing.T) {
+	account := &Account{ID: 9, Platform: PlatformGemini, Type: AccountTypeOAuth}
+	repo := &refreshAPIAccountRepo{account: account}
+	cache := &refreshAPICacheStub{lockResult: true}
+	executor := &refreshAPIExecutorStub{
+		needsRefresh: true,
+		credentials:  nil, // Refresh returns nil credentials
+	}
+
+	api := NewOAuthRefreshAPI(repo, cache)
+	result, err := api.RefreshIfNeeded(context.Background(), account, executor, 3*time.Minute)
+
+	require.NoError(t, err)
+	require.True(t, result.Refreshed)
+	require.Nil(t, result.NewCredentials)
+	require.Equal(t, 0, repo.updateCalls) // no DB update when credentials are nil
+}
+
+// ========== MergeCredentials tests ==========
+
+func TestMergeCredentials_Basic(t *testing.T) {
+	old := map[string]any{"a": "1", "b": "2", "c": "3"}
+	new := map[string]any{"a": "new", "d": "4"}
+
+	result := MergeCredentials(old, new)
+
+	require.Equal(t, "new", result["a"]) // new value preserved
+	require.Equal(t, "2", result["b"])   // old value kept
+	require.Equal(t, "3", result["c"])   // old value kept
+	require.Equal(t, "4", result["d"])   // new value preserved
+}
+
+func TestMergeCredentials_NilNew(t *testing.T) {
+	old := map[string]any{"a": "1"}
+
+	result := MergeCredentials(old, nil)
+
+	require.NotNil(t, result)
+	require.Equal(t, "1", result["a"])
+}
+
+func TestMergeCredentials_NilOld(t *testing.T) {
+	new := map[string]any{"a": "1"}
+
+	result := MergeCredentials(nil, new)
+
+	require.Equal(t, "1", result["a"])
+}
+
+func TestMergeCredentials_BothNil(t *testing.T) {
+	result := MergeCredentials(nil, nil)
+	require.NotNil(t, result)
+	require.Empty(t, result)
+}
+
+func TestMergeCredentials_NewOverridesOld(t *testing.T) {
+	old := map[string]any{"access_token": "old-token", "refresh_token": "old-refresh"}
+	new := map[string]any{"access_token": "new-token"}
+
+	result := MergeCredentials(old, new)
+
+	require.Equal(t, "new-token", result["access_token"])     // overridden
+	require.Equal(t, "old-refresh", result["refresh_token"])  // preserved
+}
+
+// ========== BuildClaudeAccountCredentials tests ==========
+
+func TestBuildClaudeAccountCredentials_Full(t *testing.T) {
+	tokenInfo := &TokenInfo{
+		AccessToken:  "at-123",
+		TokenType:    "Bearer",
+		ExpiresIn:    3600,
+		ExpiresAt:    1700000000,
+		RefreshToken: "rt-456",
+		Scope:        "openid",
+	}
+
+	creds := BuildClaudeAccountCredentials(tokenInfo)
+
+	require.Equal(t, "at-123", creds["access_token"])
+	require.Equal(t, "Bearer", creds["token_type"])
+	require.Equal(t, "3600", creds["expires_in"])
+	require.Equal(t, "1700000000", creds["expires_at"])
+	require.Equal(t, "rt-456", creds["refresh_token"])
+	require.Equal(t, "openid", creds["scope"])
+}
+
+func TestBuildClaudeAccountCredentials_Minimal(t *testing.T) {
+	tokenInfo := &TokenInfo{
+		AccessToken: "at-789",
+		TokenType:   "Bearer",
+		ExpiresIn:   7200,
+		ExpiresAt:   1700003600,
+	}
+
+	creds := BuildClaudeAccountCredentials(tokenInfo)
+
+	require.Equal(t, "at-789", creds["access_token"])
+	require.Equal(t, "Bearer", creds["token_type"])
+	require.Equal(t, "7200", creds["expires_in"])
+	require.Equal(t, "1700003600", creds["expires_at"])
+	_, hasRefresh := creds["refresh_token"]
+	_, hasScope := creds["scope"]
+	require.False(t, hasRefresh, "refresh_token should not be set when empty")
+	require.False(t, hasScope, "scope should not be set when empty")
+}
+
+// ========== BackgroundRefreshPolicy tests ==========
+
+func TestBackgroundRefreshPolicy_DefaultSkips(t *testing.T) {
+	p := DefaultBackgroundRefreshPolicy()
+
+	require.ErrorIs(t, p.handleLockHeld(), errRefreshSkipped)
+	require.ErrorIs(t, p.handleAlreadyRefreshed(), errRefreshSkipped)
+}
+
+func TestBackgroundRefreshPolicy_SuccessOverride(t *testing.T) {
+	p := BackgroundRefreshPolicy{
+		OnLockHeld:       BackgroundSkipAsSuccess,
+		OnAlreadyRefresh: BackgroundSkipAsSuccess,
+	}
+
+	require.NoError(t, p.handleLockHeld())
+	require.NoError(t, p.handleAlreadyRefreshed())
+}
+
+// ========== ProviderRefreshPolicy tests ==========
+
+func TestClaudeProviderRefreshPolicy(t *testing.T) {
+	p := ClaudeProviderRefreshPolicy()
+	require.Equal(t, ProviderRefreshErrorUseExistingToken, p.OnRefreshError)
+	require.Equal(t, ProviderLockHeldWaitForCache, p.OnLockHeld)
+	require.Equal(t, time.Minute, p.FailureTTL)
+}
+
+func TestOpenAIProviderRefreshPolicy(t *testing.T) {
+	p := OpenAIProviderRefreshPolicy()
+	require.Equal(t, ProviderRefreshErrorUseExistingToken, p.OnRefreshError)
+	require.Equal(t, ProviderLockHeldWaitForCache, p.OnLockHeld)
+	require.Equal(t, time.Minute, p.FailureTTL)
+}
+
+func TestGeminiProviderRefreshPolicy(t *testing.T) {
+	p := GeminiProviderRefreshPolicy()
+	require.Equal(t, ProviderRefreshErrorReturn, p.OnRefreshError)
+	require.Equal(t, ProviderLockHeldUseExistingToken, p.OnLockHeld)
+	require.Equal(t, time.Duration(0), p.FailureTTL)
+}
+
+func TestAntigravityProviderRefreshPolicy(t *testing.T) {
+	p := AntigravityProviderRefreshPolicy()
+	require.Equal(t, ProviderRefreshErrorReturn, p.OnRefreshError)
+	require.Equal(t, ProviderLockHeldUseExistingToken, p.OnLockHeld)
+	require.Equal(t, time.Duration(0), p.FailureTTL)
+}
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -172,6 +172,11 @@ func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact
 		result.PromptCacheKey = strings.TrimSpace(v)
 	}

+	// 提取 input 中 role:"system" 消息至 instructions（OAuth 上游不支持 system role）。
+	if extractSystemMessagesFromInput(reqBody) {
+		result.Modified = true
+	}
+
 	// instructions 处理逻辑：根据是否是 Codex CLI 分别调用不同方法
 	if applyInstructions(reqBody, isCodexCLI) {
 		result.Modified = true
@@ -301,6 +306,73 @@ func getNormalizedCodexModel(modelID string) string {
 	return ""
 }

+// extractTextFromContent extracts plain text from a content value that is either
+// a Go string or a []any of content-part maps with type:"text".
+func extractTextFromContent(content any) string {
+	switch v := content.(type) {
+	case string:
+		return v
+	case []any:
+		var parts []string
+		for _, part := range v {
+			m, ok := part.(map[string]any)
+			if !ok {
+				continue
+			}
+			if t, _ := m["type"].(string); t == "text" {
+				if text, ok := m["text"].(string); ok {
+					parts = append(parts, text)
+				}
+			}
+		}
+		return strings.Join(parts, "")
+	default:
+		return ""
+	}
+}
+
+// extractSystemMessagesFromInput scans the input array for items with role=="system",
+// removes them, and merges their content into reqBody["instructions"].
+// If instructions is already non-empty, extracted content is prepended with "\n\n".
+// Returns true if any system messages were extracted.
+func extractSystemMessagesFromInput(reqBody map[string]any) bool {
+	input, ok := reqBody["input"].([]any)
+	if !ok || len(input) == 0 {
+		return false
+	}
+
+	var systemTexts []string
+	remaining := make([]any, 0, len(input))
+
+	for _, item := range input {
+		m, ok := item.(map[string]any)
+		if !ok {
+			remaining = append(remaining, item)
+			continue
+		}
+		if role, _ := m["role"].(string); role != "system" {
+			remaining = append(remaining, item)
+			continue
+		}
+		if text := extractTextFromContent(m["content"]); text != "" {
+			systemTexts = append(systemTexts, text)
+		}
+	}
+
+	if len(systemTexts) == 0 {
+		return false
+	}
+
+	extracted := strings.Join(systemTexts, "\n\n")
+	if existing, ok := reqBody["instructions"].(string); ok && strings.TrimSpace(existing) != "" {
+		reqBody["instructions"] = extracted + "\n\n" + existing
+	} else {
+		reqBody["instructions"] = extracted
+	}
+	reqBody["input"] = remaining
+	return true
+}
+
 // applyInstructions 处理 instructions 字段：仅在 instructions 为空时填充默认值。
 func applyInstructions(reqBody map[string]any, isCodexCLI bool) bool {
 	if !isInstructionsEmpty(reqBody) {
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -344,6 +344,135 @@ func TestApplyCodexOAuthTransform_StringInputWithToolsField(t *testing.T) {
 	require.Len(t, input, 1)
 }

+func TestExtractSystemMessagesFromInput(t *testing.T) {
+	t.Run("no system messages", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{"role": "user", "content": "hello"},
+			},
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.False(t, result)
+		input, ok := reqBody["input"].([]any)
+		require.True(t, ok)
+		require.Len(t, input, 1)
+		_, hasInstructions := reqBody["instructions"]
+		require.False(t, hasInstructions)
+	})
+
+	t.Run("string content system message", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{"role": "system", "content": "You are an assistant."},
+				map[string]any{"role": "user", "content": "hello"},
+			},
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.True(t, result)
+		input, ok := reqBody["input"].([]any)
+		require.True(t, ok)
+		require.Len(t, input, 1)
+		msg, ok := input[0].(map[string]any)
+		require.True(t, ok)
+		require.Equal(t, "user", msg["role"])
+		require.Equal(t, "You are an assistant.", reqBody["instructions"])
+	})
+
+	t.Run("array content system message", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{
+					"role": "system",
+					"content": []any{
+						map[string]any{"type": "text", "text": "Be helpful."},
+					},
+				},
+			},
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.True(t, result)
+		require.Equal(t, "Be helpful.", reqBody["instructions"])
+		input, ok := reqBody["input"].([]any)
+		require.True(t, ok)
+		require.Len(t, input, 0)
+	})
+
+	t.Run("multiple system messages concatenated", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{"role": "system", "content": "First."},
+				map[string]any{"role": "system", "content": "Second."},
+				map[string]any{"role": "user", "content": "hi"},
+			},
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.True(t, result)
+		require.Equal(t, "First.\n\nSecond.", reqBody["instructions"])
+		input, ok := reqBody["input"].([]any)
+		require.True(t, ok)
+		require.Len(t, input, 1)
+	})
+
+	t.Run("mixed system and non-system preserves non-system", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{"role": "user", "content": "hello"},
+				map[string]any{"role": "system", "content": "Sys prompt."},
+				map[string]any{"role": "assistant", "content": "Hi there"},
+			},
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.True(t, result)
+		input, ok := reqBody["input"].([]any)
+		require.True(t, ok)
+		require.Len(t, input, 2)
+		first, ok := input[0].(map[string]any)
+		require.True(t, ok)
+		require.Equal(t, "user", first["role"])
+		second, ok := input[1].(map[string]any)
+		require.True(t, ok)
+		require.Equal(t, "assistant", second["role"])
+	})
+
+	t.Run("existing instructions prepended", func(t *testing.T) {
+		reqBody := map[string]any{
+			"input": []any{
+				map[string]any{"role": "system", "content": "Extracted."},
+				map[string]any{"role": "user", "content": "hi"},
+			},
+			"instructions": "Existing instructions.",
+		}
+		result := extractSystemMessagesFromInput(reqBody)
+		require.True(t, result)
+		require.Equal(t, "Extracted.\n\nExisting instructions.", reqBody["instructions"])
+	})
+}
+
+func TestApplyCodexOAuthTransform_ExtractsSystemMessages(t *testing.T) {
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		"input": []any{
+			map[string]any{"role": "system", "content": "You are a coding assistant."},
+			map[string]any{"role": "user", "content": "Write a function."},
+		},
+	}
+
+	result := applyCodexOAuthTransform(reqBody, false, false)
+
+	require.True(t, result.Modified)
+
+	input, ok := reqBody["input"].([]any)
+	require.True(t, ok)
+	require.Len(t, input, 1)
+	msg, ok := input[0].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "user", msg["role"])
+
+	instructions, ok := reqBody["instructions"].(string)
+	require.True(t, ok)
+	require.Equal(t, "You are a coding assistant.", instructions)
+}
+
 func TestIsInstructionsEmpty(t *testing.T) {
 	tests := []struct {
 		name     string
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -107,10 +107,11 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 		return nil, fmt.Errorf("build upstream request: %w", err)
 	}

-	// Override session_id with a deterministic UUID derived from the sticky
-	// session key (buildUpstreamRequest may have set it to the raw value).
+	// Override session_id with a deterministic UUID derived from the isolated
+	// session key, ensuring different API keys produce different upstream sessions.
 	if promptCacheKey != "" {
-		upstreamReq.Header.Set("session_id", generateSessionUUID(promptCacheKey))
+		apiKeyID := getAPIKeyIDFromContext(c)
+		upstreamReq.Header.Set("session_id", generateSessionUUID(isolateOpenAISessionID(apiKeyID, promptCacheKey)))
 	}

 	// 7. Send request
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -24,6 +24,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
 	"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
+	"github.com/cespare/xxhash/v2"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 	"github.com/tidwall/gjson"
@@ -787,6 +788,20 @@ func getAPIKeyIDFromContext(c *gin.Context) int64 {
 	return apiKey.ID
 }

+// isolateOpenAISessionID 将 apiKeyID 混入 session 标识符，
+// 确保不同 API Key 的用户即使使用相同的原始 session_id/conversation_id，
+// 到达上游的标识符也不同，防止跨用户会话碰撞。
+func isolateOpenAISessionID(apiKeyID int64, raw string) string {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return ""
+	}
+	h := xxhash.New()
+	_, _ = fmt.Fprintf(h, "k%d:", apiKeyID)
+	_, _ = h.WriteString(raw)
+	return fmt.Sprintf("%016x", h.Sum64())
+}
+
 func logCodexCLIOnlyDetection(ctx context.Context, c *gin.Context, account *Account, apiKeyID int64, result CodexClientRestrictionDetectionResult, body []byte) {
 	if !result.Enabled {
 		return
@@ -2501,13 +2516,17 @@ func (s *OpenAIGatewayService) buildUpstreamRequestOpenAIPassthrough(
 		if chatgptAccountID := account.GetChatGPTAccountID(); chatgptAccountID != "" {
 			req.Header.Set("chatgpt-account-id", chatgptAccountID)
 		}
+		apiKeyID := getAPIKeyIDFromContext(c)
+		// 先保存客户端原始值，再做 compact 补充，避免后续统一隔离时读到已处理的值。
+		clientSessionID := strings.TrimSpace(req.Header.Get("session_id"))
+		clientConversationID := strings.TrimSpace(req.Header.Get("conversation_id"))
 		if isOpenAIResponsesCompactPath(c) {
 			req.Header.Set("accept", "application/json")
 			if req.Header.Get("version") == "" {
 				req.Header.Set("version", codexCLIVersion)
 			}
-			if req.Header.Get("session_id") == "" {
-				req.Header.Set("session_id", resolveOpenAICompactSessionID(c))
+			if clientSessionID == "" {
+				clientSessionID = resolveOpenAICompactSessionID(c)
 			}
 		} else if req.Header.Get("accept") == "" {
 			req.Header.Set("accept", "text/event-stream")
@@ -2518,13 +2537,18 @@ func (s *OpenAIGatewayService) buildUpstreamRequestOpenAIPassthrough(
 		if req.Header.Get("originator") == "" {
 			req.Header.Set("originator", "codex_cli_rs")
 		}
-		if promptCacheKey != "" {
-			if req.Header.Get("conversation_id") == "" {
-				req.Header.Set("conversation_id", promptCacheKey)
-			}
-			if req.Header.Get("session_id") == "" {
-				req.Header.Set("session_id", promptCacheKey)
-			}
+		// 用隔离后的 session 标识符覆盖客户端透传值，防止跨用户会话碰撞。
+		if clientSessionID == "" {
+			clientSessionID = promptCacheKey
+		}
+		if clientConversationID == "" {
+			clientConversationID = promptCacheKey
+		}
+		if clientSessionID != "" {
+			req.Header.Set("session_id", isolateOpenAISessionID(apiKeyID, clientSessionID))
+		}
+		if clientConversationID != "" {
+			req.Header.Set("conversation_id", isolateOpenAISessionID(apiKeyID, clientConversationID))
 		}
 	}

@@ -2887,22 +2911,27 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
 		}
 	}
 	if account.Type == AccountTypeOAuth {
+		// 清除客户端透传的 session 头，后续用隔离后的值重新设置，防止跨用户会话碰撞。
+		req.Header.Del("conversation_id")
+		req.Header.Del("session_id")
+
 		req.Header.Set("OpenAI-Beta", "responses=experimental")
 		req.Header.Set("originator", resolveOpenAIUpstreamOriginator(c, isCodexCLI))
+		apiKeyID := getAPIKeyIDFromContext(c)
 		if isOpenAIResponsesCompactPath(c) {
 			req.Header.Set("accept", "application/json")
 			if req.Header.Get("version") == "" {
 				req.Header.Set("version", codexCLIVersion)
 			}
-			if req.Header.Get("session_id") == "" {
-				req.Header.Set("session_id", resolveOpenAICompactSessionID(c))
-			}
+			compactSession := resolveOpenAICompactSessionID(c)
+			req.Header.Set("session_id", isolateOpenAISessionID(apiKeyID, compactSession))
 		} else {
 			req.Header.Set("accept", "text/event-stream")
 		}
 		if promptCacheKey != "" {
-			req.Header.Set("conversation_id", promptCacheKey)
-			req.Header.Set("session_id", promptCacheKey)
+			isolated := isolateOpenAISessionID(apiKeyID, promptCacheKey)
+			req.Header.Set("conversation_id", isolated)
+			req.Header.Set("session_id", isolated)
 		}
 	}

--- a/backend/internal/service/openai_gateway_service_session_isolation_test.go
+++ b/backend/internal/service/openai_gateway_service_session_isolation_test.go
@@ -0,0 +1,50 @@
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestIsolateOpenAISessionID(t *testing.T) {
+	t.Run("empty_raw_returns_empty", func(t *testing.T) {
+		assert.Equal(t, "", isolateOpenAISessionID(1, ""))
+		assert.Equal(t, "", isolateOpenAISessionID(1, "   "))
+	})
+
+	t.Run("deterministic", func(t *testing.T) {
+		a := isolateOpenAISessionID(42, "sess_abc123")
+		b := isolateOpenAISessionID(42, "sess_abc123")
+		assert.Equal(t, a, b)
+	})
+
+	t.Run("different_apiKeyID_different_result", func(t *testing.T) {
+		a := isolateOpenAISessionID(1, "same_session")
+		b := isolateOpenAISessionID(2, "same_session")
+		require.NotEqual(t, a, b, "不同 API Key 使用相同 session_id 应产生不同隔离值")
+	})
+
+	t.Run("different_raw_different_result", func(t *testing.T) {
+		a := isolateOpenAISessionID(1, "session_a")
+		b := isolateOpenAISessionID(1, "session_b")
+		require.NotEqual(t, a, b)
+	})
+
+	t.Run("format_is_16_hex_chars", func(t *testing.T) {
+		result := isolateOpenAISessionID(99, "test_session")
+		assert.Len(t, result, 16, "应为 16 字符的 hex 字符串")
+		for _, ch := range result {
+			assert.True(t, (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'),
+				"应仅包含 hex 字符: %c", ch)
+		}
+	})
+
+	t.Run("zero_apiKeyID_still_works", func(t *testing.T) {
+		result := isolateOpenAISessionID(0, "session")
+		assert.NotEmpty(t, result)
+		// apiKeyID=0 与 apiKeyID=1 应产生不同结果
+		other := isolateOpenAISessionID(1, "session")
+		assert.NotEqual(t, result, other)
+	})
+}
--- a/backend/internal/service/openai_token_provider.go
+++ b/backend/internal/service/openai_token_provider.go
@@ -20,7 +20,7 @@ const (
 	openAILockWarnThresholdMs = 250
 )

-// OpenAITokenRuntimeMetrics 表示 OpenAI token 刷新与锁竞争保护指标快照。
+// OpenAITokenRuntimeMetrics is a snapshot of refresh and lock contention metrics.
 type OpenAITokenRuntimeMetrics struct {
 	RefreshRequests    int64
 	RefreshSuccess     int64
@@ -72,15 +72,18 @@ func (m *openAITokenRuntimeMetricsStore) touchNow() {
 	m.lastObservedUnixMs.Store(time.Now().UnixMilli())
 }

-// OpenAITokenCache Token 缓存接口（复用 GeminiTokenCache 接口定义）
+// OpenAITokenCache token cache interface.
 type OpenAITokenCache = GeminiTokenCache

-// OpenAITokenProvider 管理 OpenAI OAuth 账户的 access_token
+// OpenAITokenProvider manages access_token for OpenAI/Sora OAuth accounts.
 type OpenAITokenProvider struct {
 	accountRepo        AccountRepository
 	tokenCache         OpenAITokenCache
 	openAIOAuthService *OpenAIOAuthService
 	metrics            *openAITokenRuntimeMetricsStore
+	refreshAPI         *OAuthRefreshAPI
+	executor           OAuthRefreshExecutor
+	refreshPolicy      ProviderRefreshPolicy
 }

 func NewOpenAITokenProvider(
@@ -93,9 +96,21 @@ func NewOpenAITokenProvider(
 		tokenCache:         tokenCache,
 		openAIOAuthService: openAIOAuthService,
 		metrics:            &openAITokenRuntimeMetricsStore{},
+		refreshPolicy:      OpenAIProviderRefreshPolicy(),
 	}
 }

+// SetRefreshAPI injects unified OAuth refresh API and executor.
+func (p *OpenAITokenProvider) SetRefreshAPI(api *OAuthRefreshAPI, executor OAuthRefreshExecutor) {
+	p.refreshAPI = api
+	p.executor = executor
+}
+
+// SetRefreshPolicy injects caller-side refresh policy.
+func (p *OpenAITokenProvider) SetRefreshPolicy(policy ProviderRefreshPolicy) {
+	p.refreshPolicy = policy
+}
+
 func (p *OpenAITokenProvider) SnapshotRuntimeMetrics() OpenAITokenRuntimeMetrics {
 	if p == nil {
 		return OpenAITokenRuntimeMetrics{}
@@ -110,7 +125,7 @@ func (p *OpenAITokenProvider) ensureMetrics() {
 	}
 }

-// GetAccessToken 获取有效的 access_token
+// GetAccessToken returns a valid access_token.
 func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Account) (string, error) {
 	p.ensureMetrics()
 	if account == nil {
@@ -122,7 +137,7 @@ func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Accou

 	cacheKey := OpenAITokenCacheKey(account)

-	// 1. 先尝试缓存
+	// 1) Try cache first.
 	if p.tokenCache != nil {
 		if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
 			slog.Debug("openai_token_cache_hit", "account_id", account.ID)
@@ -134,114 +149,62 @@ func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Accou

 	slog.Debug("openai_token_cache_miss", "account_id", account.ID)

-	// 2. 如果即将过期则刷新
+	// 2) Refresh if needed (pre-expiry skew).
 	expiresAt := account.GetCredentialAsTime("expires_at")
 	needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= openAITokenRefreshSkew
 	refreshFailed := false
-	if needsRefresh && p.tokenCache != nil {
+
+	if needsRefresh && p.refreshAPI != nil && p.executor != nil {
+		p.metrics.refreshRequests.Add(1)
+		p.metrics.touchNow()
+
+		// Sora accounts skip OpenAI OAuth refresh and keep existing token path.
+		if account.Platform == PlatformSora {
+			slog.Debug("openai_token_refresh_skipped_for_sora", "account_id", account.ID)
+			refreshFailed = true
+		} else {
+			result, err := p.refreshAPI.RefreshIfNeeded(ctx, account, p.executor, openAITokenRefreshSkew)
+			if err != nil {
+				if p.refreshPolicy.OnRefreshError == ProviderRefreshErrorReturn {
+					return "", err
+				}
+				slog.Warn("openai_token_refresh_failed", "account_id", account.ID, "error", err)
+				p.metrics.refreshFailure.Add(1)
+				refreshFailed = true
+			} else if result.LockHeld {
+				if p.refreshPolicy.OnLockHeld == ProviderLockHeldWaitForCache {
+					p.metrics.lockContention.Add(1)
+					p.metrics.touchNow()
+					token, waitErr := p.waitForTokenAfterLockRace(ctx, cacheKey)
+					if waitErr != nil {
+						return "", waitErr
+					}
+					if strings.TrimSpace(token) != "" {
+						slog.Debug("openai_token_cache_hit_after_wait", "account_id", account.ID)
+						return token, nil
+					}
+				}
+			} else if result.Refreshed {
+				p.metrics.refreshSuccess.Add(1)
+				account = result.Account
+				expiresAt = account.GetCredentialAsTime("expires_at")
+			} else {
+				account = result.Account
+				expiresAt = account.GetCredentialAsTime("expires_at")
+			}
+		}
+	} else if needsRefresh && p.tokenCache != nil {
+		// Backward-compatible test path when refreshAPI is not injected.
 		p.metrics.refreshRequests.Add(1)
 		p.metrics.touchNow()
 		locked, lockErr := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
 		if lockErr == nil && locked {
 			defer func() { _ = p.tokenCache.ReleaseRefreshLock(ctx, cacheKey) }()
-
-			// 拿到锁后再次检查缓存（另一个 worker 可能已刷新）
-			if token, err := p.tokenCache.GetAccessToken(ctx, cacheKey); err == nil && strings.TrimSpace(token) != "" {
-				return token, nil
-			}
-
-			// 从数据库获取最新账户信息
-			fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-			if err == nil && fresh != nil {
-				account = fresh
-			}
-			expiresAt = account.GetCredentialAsTime("expires_at")
-			if expiresAt == nil || time.Until(*expiresAt) <= openAITokenRefreshSkew {
-				if account.Platform == PlatformSora {
-					slog.Debug("openai_token_refresh_skipped_for_sora", "account_id", account.ID)
-					// Sora 账号不走 OpenAI OAuth 刷新，交由 Sora 客户端的 ST/RT 恢复链路处理。
-					refreshFailed = true
-				} else if p.openAIOAuthService == nil {
-					slog.Warn("openai_oauth_service_not_configured", "account_id", account.ID)
-					p.metrics.refreshFailure.Add(1)
-					refreshFailed = true // 无法刷新，标记失败
-				} else {
-					tokenInfo, err := p.openAIOAuthService.RefreshAccountToken(ctx, account)
-					if err != nil {
-						// 刷新失败时记录警告，但不立即返回错误，尝试使用现有 token
-						slog.Warn("openai_token_refresh_failed", "account_id", account.ID, "error", err)
-						p.metrics.refreshFailure.Add(1)
-						refreshFailed = true // 刷新失败，标记以使用短 TTL
-					} else {
-						p.metrics.refreshSuccess.Add(1)
-						newCredentials := p.openAIOAuthService.BuildAccountCredentials(tokenInfo)
-						for k, v := range account.Credentials {
-							if _, exists := newCredentials[k]; !exists {
-								newCredentials[k] = v
-							}
-						}
-						account.Credentials = newCredentials
-						if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-							slog.Error("openai_token_provider_update_failed", "account_id", account.ID, "error", updateErr)
-						}
-						expiresAt = account.GetCredentialAsTime("expires_at")
-					}
-				}
-			}
 		} else if lockErr != nil {
-			// Redis 错误导致无法获取锁，降级为无锁刷新（仅在 token 接近过期时）
 			p.metrics.lockAcquireFailure.Add(1)
 			p.metrics.touchNow()
-			slog.Warn("openai_token_lock_failed_degraded_refresh", "account_id", account.ID, "error", lockErr)
-
-			// 检查 ctx 是否已取消
-			if ctx.Err() != nil {
-				return "", ctx.Err()
-			}
-
-			// 从数据库获取最新账户信息
-			if p.accountRepo != nil {
-				fresh, err := p.accountRepo.GetByID(ctx, account.ID)
-				if err == nil && fresh != nil {
-					account = fresh
-				}
-			}
-			expiresAt = account.GetCredentialAsTime("expires_at")
-
-			// 仅在 expires_at 已过期/接近过期时才执行无锁刷新
-			if expiresAt == nil || time.Until(*expiresAt) <= openAITokenRefreshSkew {
-				if account.Platform == PlatformSora {
-					slog.Debug("openai_token_refresh_skipped_for_sora_degraded", "account_id", account.ID)
-					// Sora 账号不走 OpenAI OAuth 刷新，交由 Sora 客户端的 ST/RT 恢复链路处理。
-					refreshFailed = true
-				} else if p.openAIOAuthService == nil {
-					slog.Warn("openai_oauth_service_not_configured", "account_id", account.ID)
-					p.metrics.refreshFailure.Add(1)
-					refreshFailed = true
-				} else {
-					tokenInfo, err := p.openAIOAuthService.RefreshAccountToken(ctx, account)
-					if err != nil {
-						slog.Warn("openai_token_refresh_failed_degraded", "account_id", account.ID, "error", err)
-						p.metrics.refreshFailure.Add(1)
-						refreshFailed = true
-					} else {
-						p.metrics.refreshSuccess.Add(1)
-						newCredentials := p.openAIOAuthService.BuildAccountCredentials(tokenInfo)
-						for k, v := range account.Credentials {
-							if _, exists := newCredentials[k]; !exists {
-								newCredentials[k] = v
-							}
-						}
-						account.Credentials = newCredentials
-						if updateErr := p.accountRepo.Update(ctx, account); updateErr != nil {
-							slog.Error("openai_token_provider_update_failed", "account_id", account.ID, "error", updateErr)
-						}
-						expiresAt = account.GetCredentialAsTime("expires_at")
-					}
-				}
-			}
+			slog.Warn("openai_token_lock_failed", "account_id", account.ID, "error", lockErr)
 		} else {
-			// 锁被其他 worker 持有：使用短轮询+jitter，降低固定等待导致的尾延迟台阶。
 			p.metrics.lockContention.Add(1)
 			p.metrics.touchNow()
 			token, waitErr := p.waitForTokenAfterLockRace(ctx, cacheKey)
@@ -260,22 +223,23 @@ func (p *OpenAITokenProvider) GetAccessToken(ctx context.Context, account *Accou
 		return "", errors.New("access_token not found in credentials")
 	}

-	// 3. 存入缓存（验证版本后再写入，避免异步刷新任务与请求线程的竞态条件）
+	// 3) Populate cache with TTL.
 	if p.tokenCache != nil {
 		latestAccount, isStale := CheckTokenVersion(ctx, account, p.accountRepo)
 		if isStale && latestAccount != nil {
-			// 版本过时，使用 DB 中的最新 token
 			slog.Debug("openai_token_version_stale_use_latest", "account_id", account.ID)
 			accessToken = latestAccount.GetOpenAIAccessToken()
 			if strings.TrimSpace(accessToken) == "" {
 				return "", errors.New("access_token not found after version check")
 			}
-			// 不写入缓存，让下次请求重新处理
 		} else {
 			ttl := 30 * time.Minute
 			if refreshFailed {
-				// 刷新失败时使用短 TTL，避免失效 token 长时间缓存导致 401 抖动
-				ttl = time.Minute
+				if p.refreshPolicy.FailureTTL > 0 {
+					ttl = p.refreshPolicy.FailureTTL
+				} else {
+					ttl = time.Minute
+				}
 				slog.Debug("openai_token_cache_short_ttl", "account_id", account.ID, "reason", "refresh_failed")
 			} else if expiresAt != nil {
 				until := time.Until(*expiresAt)
--- a/backend/internal/service/openai_ws_forwarder.go
+++ b/backend/internal/service/openai_ws_forwarder.go
@@ -1124,11 +1124,22 @@ func (s *OpenAIGatewayService) buildOpenAIWSHeaders(
 			headers.Set("accept-language", v)
 		}
 	}
-	if sessionResolution.SessionID != "" {
-		headers.Set("session_id", sessionResolution.SessionID)
-	}
-	if sessionResolution.ConversationID != "" {
-		headers.Set("conversation_id", sessionResolution.ConversationID)
+	// OAuth 账号：将 apiKeyID 混入 session 标识符，防止跨用户会话碰撞。
+	if account != nil && account.Type == AccountTypeOAuth {
+		apiKeyID := getAPIKeyIDFromContext(c)
+		if sessionResolution.SessionID != "" {
+			headers.Set("session_id", isolateOpenAISessionID(apiKeyID, sessionResolution.SessionID))
+		}
+		if sessionResolution.ConversationID != "" {
+			headers.Set("conversation_id", isolateOpenAISessionID(apiKeyID, sessionResolution.ConversationID))
+		}
+	} else {
+		if sessionResolution.SessionID != "" {
+			headers.Set("session_id", sessionResolution.SessionID)
+		}
+		if sessionResolution.ConversationID != "" {
+			headers.Set("conversation_id", sessionResolution.ConversationID)
+		}
 	}
 	if state := strings.TrimSpace(turnState); state != "" {
 		headers.Set(openAIWSTurnStateHeader, state)
@@ -1859,7 +1870,16 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
 		}
 		return nil, wrapOpenAIWSFallback(classifyOpenAIWSAcquireError(err), err)
 	}
-	defer lease.Release()
+	// cleanExit 标记正常终端事件退出，此时上游不会再发送帧，连接可安全归还复用。
+	// 所有异常路径（读写错误、error 事件等）已在各自分支中提前调用 MarkBroken，
+	// 因此 defer 中只需处理正常退出时不 MarkBroken 即可。
+	cleanExit := false
+	defer func() {
+		if !cleanExit {
+			lease.MarkBroken()
+		}
+		lease.Release()
+	}()
 	connID := strings.TrimSpace(lease.ConnID())
 	logOpenAIWSModeDebug(
 		"connected account_id=%d account_type=%s transport=%s conn_id=%s conn_reused=%v conn_pick_ms=%d queue_wait_ms=%d has_previous_response_id=%v",
@@ -2237,6 +2257,7 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
 		}

 		if isTerminalEvent {
+			cleanExit = true
 			break
 		}
 	}
@@ -2972,12 +2993,15 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
 			pinnedSessionConnID = connID
 		}
 	}
+	// lastTurnClean 标记最后一轮 sendAndRelay 是否正常完成（收到终端事件且客户端未断连）。
+	// 所有异常路径（读写错误、error 事件、客户端断连）已在各自分支或上层（L3403）中 MarkBroken，
+	// 因此 releaseSessionLease 中只需在非正常结束时 MarkBroken。
+	lastTurnClean := false
 	releaseSessionLease := func() {
 		if sessionLease == nil {
 			return
 		}
-		if dedicatedMode {
-			// dedicated 会话结束后主动标记损坏，确保连接不会跨会话复用。
+		if !lastTurnClean {
 			sessionLease.MarkBroken()
 		}
 		unpinSessionConn(sessionConnID)
@@ -3372,6 +3396,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(

 		result, relayErr := sendAndRelay(turn, sessionLease, currentPayload, currentPayloadBytes, currentOriginalModel)
 		if relayErr != nil {
+			lastTurnClean = false
 			if recoverIngressPrevResponseNotFound(relayErr, turn, connID) {
 				continue
 			}
@@ -3391,6 +3416,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
 		turnRetry = 0
 		turnPrevRecoveryTried = false
 		lastTurnFinishedAt = time.Now()
+		lastTurnClean = true
 		if hooks != nil && hooks.AfterTurn != nil {
 			hooks.AfterTurn(turn, result, nil)
 		}
--- a/backend/internal/service/openai_ws_forwarder_success_test.go
+++ b/backend/internal/service/openai_ws_forwarder_success_test.go
@@ -380,7 +380,8 @@ func TestOpenAIGatewayService_Forward_WSv2_PoolReuseNotOneToOne(t *testing.T) {
 		require.True(t, strings.HasPrefix(result.RequestID, "resp_reuse_"))
 	}

-	require.Equal(t, int64(1), upgradeCount.Load(), "多个客户端请求应复用账号连接池而不是 1:1 对等建链")
+	// 条件式 MarkBroken：正常终端事件退出后连接归还复用，不再无条件销毁。
+	require.Equal(t, int64(1), upgradeCount.Load(), "正常完成后连接应归还复用，不应每次新建")
 	metrics := svc.SnapshotOpenAIWSPoolMetrics()
 	require.GreaterOrEqual(t, metrics.AcquireReuseTotal, int64(1))
 	require.GreaterOrEqual(t, metrics.ConnPickTotal, int64(1))
@@ -454,8 +455,10 @@ func TestOpenAIGatewayService_Forward_WSv2_OAuthStoreFalseByDefault(t *testing.T
 	require.True(t, gjson.Get(requestJSON, "stream").Exists(), "WSv2 payload 应保留 stream 字段")
 	require.True(t, gjson.Get(requestJSON, "stream").Bool(), "OAuth Codex 规范化后应强制 stream=true")
 	require.Equal(t, openAIWSBetaV2Value, captureDialer.lastHeaders.Get("OpenAI-Beta"))
-	require.Equal(t, "sess-oauth-1", captureDialer.lastHeaders.Get("session_id"))
-	require.Equal(t, "conv-oauth-1", captureDialer.lastHeaders.Get("conversation_id"))
+	// OAuth 账号的 session_id/conversation_id 应被 isolateOpenAISessionID 隔离，
+	// 测试中未设置 api_key 到 context，apiKeyID=0。
+	require.Equal(t, isolateOpenAISessionID(0, "sess-oauth-1"), captureDialer.lastHeaders.Get("session_id"))
+	require.Equal(t, isolateOpenAISessionID(0, "conv-oauth-1"), captureDialer.lastHeaders.Get("conversation_id"))
 }

 func TestOpenAIGatewayService_Forward_WSv2_OAuthOriginatorCompatibility(t *testing.T) {
@@ -596,7 +599,8 @@ func TestOpenAIGatewayService_Forward_WSv2_HeaderSessionFallbackFromPromptCacheK
 	require.NotNil(t, result)
 	require.Equal(t, "resp_prompt_cache_key", result.RequestID)

-	require.Equal(t, "pcache_123", captureDialer.lastHeaders.Get("session_id"))
+	// OAuth 账号的 session_id 应被 isolateOpenAISessionID 隔离（apiKeyID=0，未在 context 设置）。
+	require.Equal(t, isolateOpenAISessionID(0, "pcache_123"), captureDialer.lastHeaders.Get("session_id"))
 	require.Empty(t, captureDialer.lastHeaders.Get("conversation_id"))
 	require.NotNil(t, captureConn.lastWrite)
 	require.True(t, gjson.Get(requestToJSONString(captureConn.lastWrite), "stream").Exists())
@@ -961,6 +965,10 @@ func TestOpenAIGatewayService_Forward_WSv2_TurnMetadataInPayloadOnConnReuse(t *t
 	require.NotNil(t, result1)
 	require.Equal(t, "resp_meta_1", result1.RequestID)

+	require.Len(t, captureConn.writes, 1)
+	firstWrite := requestToJSONString(captureConn.writes[0])
+	require.Equal(t, "turn_meta_payload_1", gjson.Get(firstWrite, "client_metadata.x-codex-turn-metadata").String())
+
 	rec2 := httptest.NewRecorder()
 	c2, _ := gin.CreateTestContext(rec2)
 	c2.Request = httptest.NewRequest(http.MethodPost, "/openai/v1/responses", nil)
@@ -974,7 +982,7 @@ func TestOpenAIGatewayService_Forward_WSv2_TurnMetadataInPayloadOnConnReuse(t *t
 	require.Equal(t, 1, captureDialer.DialCount(), "同一账号两轮请求应复用同一 WS 连接")
 	require.Len(t, captureConn.writes, 2)

-	firstWrite := requestToJSONString(captureConn.writes[0])
+	firstWrite = requestToJSONString(captureConn.writes[0])
 	secondWrite := requestToJSONString(captureConn.writes[1])
 	require.Equal(t, "turn_meta_payload_1", gjson.Get(firstWrite, "client_metadata.x-codex-turn-metadata").String())
 	require.Equal(t, "turn_meta_payload_2", gjson.Get(secondWrite, "client_metadata.x-codex-turn-metadata").String())
--- a/backend/internal/service/proxy.go
+++ b/backend/internal/service/proxy.go
@@ -1,7 +1,9 @@
 package service

 import (
-	"fmt"
+	"net"
+	"net/url"
+	"strconv"
 	"time"
 )

@@ -23,10 +25,14 @@ func (p *Proxy) IsActive() bool {
 }

 func (p *Proxy) URL() string {
-	if p.Username != "" && p.Password != "" {
-		return fmt.Sprintf("%s://%s:%s@%s:%d", p.Protocol, p.Username, p.Password, p.Host, p.Port)
+	u := &url.URL{
+		Scheme: p.Protocol,
+		Host:   net.JoinHostPort(p.Host, strconv.Itoa(p.Port)),
 	}
-	return fmt.Sprintf("%s://%s:%d", p.Protocol, p.Host, p.Port)
+	if p.Username != "" && p.Password != "" {
+		u.User = url.UserPassword(p.Username, p.Password)
+	}
+	return u.String()
 }

 type ProxyWithAccountCount struct {
--- a/backend/internal/service/proxy_test.go
+++ b/backend/internal/service/proxy_test.go
@@ -0,0 +1,95 @@
+package service
+
+import (
+	"net/url"
+	"testing"
+)
+
+func TestProxyURL(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name  string
+		proxy Proxy
+		want  string
+	}{
+		{
+			name: "without auth",
+			proxy: Proxy{
+				Protocol: "http",
+				Host:     "proxy.example.com",
+				Port:     8080,
+			},
+			want: "http://proxy.example.com:8080",
+		},
+		{
+			name: "with auth",
+			proxy: Proxy{
+				Protocol: "socks5",
+				Host:     "socks.example.com",
+				Port:     1080,
+				Username: "user",
+				Password: "pass",
+			},
+			want: "socks5://user:pass@socks.example.com:1080",
+		},
+		{
+			name: "username only keeps no auth for compatibility",
+			proxy: Proxy{
+				Protocol: "http",
+				Host:     "proxy.example.com",
+				Port:     8080,
+				Username: "user-only",
+			},
+			want: "http://proxy.example.com:8080",
+		},
+		{
+			name: "with special characters in credentials",
+			proxy: Proxy{
+				Protocol: "http",
+				Host:     "proxy.example.com",
+				Port:     3128,
+				Username: "first last@corp",
+				Password: "p@ ss:#word",
+			},
+			want: "http://first%20last%40corp:p%40%20ss%3A%23word@proxy.example.com:3128",
+		},
+	}
+
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			if got := tc.proxy.URL(); got != tc.want {
+				t.Fatalf("Proxy.URL() mismatch: got=%q want=%q", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestProxyURL_SpecialCharactersRoundTrip(t *testing.T) {
+	t.Parallel()
+
+	proxy := Proxy{
+		Protocol: "http",
+		Host:     "proxy.example.com",
+		Port:     3128,
+		Username: "first last@corp",
+		Password: "p@ ss:#word",
+	}
+
+	parsed, err := url.Parse(proxy.URL())
+	if err != nil {
+		t.Fatalf("parse proxy URL failed: %v", err)
+	}
+	if got := parsed.User.Username(); got != proxy.Username {
+		t.Fatalf("username mismatch after parse: got=%q want=%q", got, proxy.Username)
+	}
+	pass, ok := parsed.User.Password()
+	if !ok {
+		t.Fatal("password missing after parse")
+	}
+	if pass != proxy.Password {
+		t.Fatalf("password mismatch after parse: got=%q want=%q", pass, proxy.Password)
+	}
+}
--- a/Show More
+++ b/Show More