fix(admin): 修复退款金额精度问题

- 显示完整余额精度，避免四舍五入导致的退款失败 - 添加"全部"按钮，一键填入完整余额 - 移除最小金额限制，支持任意正数金额
Merge pull request #274 from mt21625457/main
2026-04-06 08:20:23 +08:00 · 2026-01-14 10:22:31 +08:00 · 2026-01-14 09:53:09 +08:00 · 2026-01-14 09:46:10 +08:00 · 2026-01-14 09:22:37 +08:00 · 2026-01-14 09:17:58 +08:00
83 changed files with 5916 additions and 695 deletions
--- a/README_CN.md
+++ b/README_CN.md
@@ -57,6 +57,13 @@ Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅（

 ---

+## OpenAI Responses 兼容注意事项
+
+- 当请求包含 `function_call_output` 时，需要携带 `previous_response_id`，或在 `input` 中包含带 `call_id` 的 `tool_call`/`function_call`，或带非空 `id` 且与 `function_call_output.call_id` 匹配的 `item_reference`。
+- 若依赖上游历史记录，网关会强制 `store=true` 并需要复用 `previous_response_id`，以避免出现 “No tool call found for function call output” 错误。
+
+---
+
 ## 部署方式

 ### 方式一：脚本安装（推荐）
--- a/backend/cmd/server/wire.go
+++ b/backend/cmd/server/wire.go
@@ -67,6 +67,7 @@ func provideCleanup(
 	opsAlertEvaluator *service.OpsAlertEvaluatorService,
 	opsCleanup *service.OpsCleanupService,
 	opsScheduledReport *service.OpsScheduledReportService,
+	schedulerSnapshot *service.SchedulerSnapshotService,
 	tokenRefresh *service.TokenRefreshService,
 	accountExpiry *service.AccountExpiryService,
 	pricing *service.PricingService,
@@ -116,6 +117,12 @@ func provideCleanup(
 				}
 				return nil
 			}},
+			{"SchedulerSnapshotService", func() error {
+				if schedulerSnapshot != nil {
+					schedulerSnapshot.Stop()
+				}
+				return nil
+			}},
 			{"TokenRefreshService", func() error {
 				tokenRefresh.Stop()
 				return nil
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -97,7 +97,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	antigravityOAuthService := service.NewAntigravityOAuthService(proxyRepository)
 	geminiQuotaService := service.NewGeminiQuotaService(configConfig, settingRepository)
 	tempUnschedCache := repository.NewTempUnschedCache(redisClient)
-	rateLimitService := service.NewRateLimitService(accountRepository, usageLogRepository, configConfig, geminiQuotaService, tempUnschedCache)
+	timeoutCounterCache := repository.NewTimeoutCounterCache(redisClient)
+	rateLimitService := service.ProvideRateLimitService(accountRepository, usageLogRepository, configConfig, geminiQuotaService, tempUnschedCache, timeoutCounterCache, settingService)
 	claudeUsageFetcher := repository.NewClaudeUsageFetcher()
 	antigravityQuotaFetcher := service.NewAntigravityQuotaFetcher(proxyRepository)
 	usageCache := service.NewUsageCache()
@@ -111,6 +112,9 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, antigravityGatewayService, httpUpstream, configConfig)
 	concurrencyCache := repository.ProvideConcurrencyCache(redisClient, configConfig)
 	concurrencyService := service.ProvideConcurrencyService(concurrencyCache, accountRepository, configConfig)
+	schedulerCache := repository.NewSchedulerCache(redisClient)
+	schedulerOutboxRepository := repository.NewSchedulerOutboxRepository(db)
+	schedulerSnapshotService := service.ProvideSchedulerSnapshotService(schedulerCache, schedulerOutboxRepository, accountRepository, groupRepository, configConfig)
 	crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
 	accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService)
 	oAuthHandler := admin.NewOAuthHandler(oAuthService)
@@ -130,9 +134,9 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	identityCache := repository.NewIdentityCache(redisClient)
 	identityService := service.NewIdentityService(identityCache)
 	deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
-	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService)
-	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService)
-	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
+	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService)
+	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService)
+	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
 	opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService)
 	settingHandler := admin.NewSettingHandler(settingService, emailService, turnstileService, opsService)
 	opsHandler := admin.NewOpsHandler(opsService)
@@ -164,7 +168,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	opsScheduledReportService := service.ProvideOpsScheduledReportService(opsService, userService, emailService, redisClient, configConfig)
 	tokenRefreshService := service.ProvideTokenRefreshService(accountRepository, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, configConfig)
 	accountExpiryService := service.ProvideAccountExpiryService(accountRepository)
-	v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, tokenRefreshService, accountExpiryService, pricingService, emailQueueService, billingCacheService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService)
+	v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, schedulerSnapshotService, tokenRefreshService, accountExpiryService, pricingService, emailQueueService, billingCacheService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService)
 	application := &Application{
 		Server:  httpServer,
 		Cleanup: v,
@@ -194,6 +198,7 @@ func provideCleanup(
 	opsAlertEvaluator *service.OpsAlertEvaluatorService,
 	opsCleanup *service.OpsCleanupService,
 	opsScheduledReport *service.OpsScheduledReportService,
+	schedulerSnapshot *service.SchedulerSnapshotService,
 	tokenRefresh *service.TokenRefreshService,
 	accountExpiry *service.AccountExpiryService,
 	pricing *service.PricingService,
@@ -242,6 +247,12 @@ func provideCleanup(
 				}
 				return nil
 			}},
+			{"SchedulerSnapshotService", func() error {
+				if schedulerSnapshot != nil {
+					schedulerSnapshot.Stop()
+				}
+				return nil
+			}},
 			{"TokenRefreshService", func() error {
 				tokenRefresh.Stop()
 				return nil
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -270,6 +270,29 @@ type GatewaySchedulingConfig struct {

 	// 过期槽位清理周期（0 表示禁用）
 	SlotCleanupInterval time.Duration `mapstructure:"slot_cleanup_interval"`
+
+	// 受控回源配置
+	DbFallbackEnabled bool `mapstructure:"db_fallback_enabled"`
+	// 受控回源超时（秒），0 表示不额外收紧超时
+	DbFallbackTimeoutSeconds int `mapstructure:"db_fallback_timeout_seconds"`
+	// 受控回源限流（实例级 QPS），0 表示不限制
+	DbFallbackMaxQPS int `mapstructure:"db_fallback_max_qps"`
+
+	// Outbox 轮询与滞后阈值配置
+	// Outbox 轮询周期（秒）
+	OutboxPollIntervalSeconds int `mapstructure:"outbox_poll_interval_seconds"`
+	// Outbox 滞后告警阈值（秒）
+	OutboxLagWarnSeconds int `mapstructure:"outbox_lag_warn_seconds"`
+	// Outbox 触发强制重建阈值（秒）
+	OutboxLagRebuildSeconds int `mapstructure:"outbox_lag_rebuild_seconds"`
+	// Outbox 连续滞后触发次数
+	OutboxLagRebuildFailures int `mapstructure:"outbox_lag_rebuild_failures"`
+	// Outbox 积压触发重建阈值（行数）
+	OutboxBacklogRebuildRows int `mapstructure:"outbox_backlog_rebuild_rows"`
+
+	// 全量重建周期配置
+	// 全量重建周期（秒），0 表示禁用
+	FullRebuildIntervalSeconds int `mapstructure:"full_rebuild_interval_seconds"`
 }

 func (s *ServerConfig) Address() string {
@@ -744,11 +767,20 @@ func setDefaults() {
 	viper.SetDefault("gateway.stream_keepalive_interval", 10)
 	viper.SetDefault("gateway.max_line_size", 10*1024*1024)
 	viper.SetDefault("gateway.scheduling.sticky_session_max_waiting", 3)
-	viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 45*time.Second)
+	viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 120*time.Second)
 	viper.SetDefault("gateway.scheduling.fallback_wait_timeout", 30*time.Second)
 	viper.SetDefault("gateway.scheduling.fallback_max_waiting", 100)
 	viper.SetDefault("gateway.scheduling.load_batch_enabled", true)
 	viper.SetDefault("gateway.scheduling.slot_cleanup_interval", 30*time.Second)
+	viper.SetDefault("gateway.scheduling.db_fallback_enabled", true)
+	viper.SetDefault("gateway.scheduling.db_fallback_timeout_seconds", 0)
+	viper.SetDefault("gateway.scheduling.db_fallback_max_qps", 0)
+	viper.SetDefault("gateway.scheduling.outbox_poll_interval_seconds", 1)
+	viper.SetDefault("gateway.scheduling.outbox_lag_warn_seconds", 5)
+	viper.SetDefault("gateway.scheduling.outbox_lag_rebuild_seconds", 10)
+	viper.SetDefault("gateway.scheduling.outbox_lag_rebuild_failures", 3)
+	viper.SetDefault("gateway.scheduling.outbox_backlog_rebuild_rows", 10000)
+	viper.SetDefault("gateway.scheduling.full_rebuild_interval_seconds", 300)
 	viper.SetDefault("concurrency.ping_interval", 10)

 	// TokenRefresh
@@ -1021,6 +1053,35 @@ func (c *Config) Validate() error {
 	if c.Gateway.Scheduling.SlotCleanupInterval < 0 {
 		return fmt.Errorf("gateway.scheduling.slot_cleanup_interval must be non-negative")
 	}
+	if c.Gateway.Scheduling.DbFallbackTimeoutSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.db_fallback_timeout_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.DbFallbackMaxQPS < 0 {
+		return fmt.Errorf("gateway.scheduling.db_fallback_max_qps must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxPollIntervalSeconds <= 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_poll_interval_seconds must be positive")
+	}
+	if c.Gateway.Scheduling.OutboxLagWarnSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_warn_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagRebuildSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagRebuildFailures <= 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_failures must be positive")
+	}
+	if c.Gateway.Scheduling.OutboxBacklogRebuildRows < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_backlog_rebuild_rows must be non-negative")
+	}
+	if c.Gateway.Scheduling.FullRebuildIntervalSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.full_rebuild_interval_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagWarnSeconds > 0 &&
+		c.Gateway.Scheduling.OutboxLagRebuildSeconds > 0 &&
+		c.Gateway.Scheduling.OutboxLagRebuildSeconds < c.Gateway.Scheduling.OutboxLagWarnSeconds {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_seconds must be >= outbox_lag_warn_seconds")
+	}
 	if c.Ops.MetricsCollectorCache.TTL < 0 {
 		return fmt.Errorf("ops.metrics_collector_cache.ttl must be non-negative")
 	}
--- a/backend/internal/config/config_test.go
+++ b/backend/internal/config/config_test.go
@@ -39,8 +39,8 @@ func TestLoadDefaultSchedulingConfig(t *testing.T) {
 	if cfg.Gateway.Scheduling.StickySessionMaxWaiting != 3 {
 		t.Fatalf("StickySessionMaxWaiting = %d, want 3", cfg.Gateway.Scheduling.StickySessionMaxWaiting)
 	}
-	if cfg.Gateway.Scheduling.StickySessionWaitTimeout != 45*time.Second {
-		t.Fatalf("StickySessionWaitTimeout = %v, want 45s", cfg.Gateway.Scheduling.StickySessionWaitTimeout)
+	if cfg.Gateway.Scheduling.StickySessionWaitTimeout != 120*time.Second {
+		t.Fatalf("StickySessionWaitTimeout = %v, want 120s", cfg.Gateway.Scheduling.StickySessionWaitTimeout)
 	}
 	if cfg.Gateway.Scheduling.FallbackWaitTimeout != 30*time.Second {
 		t.Fatalf("FallbackWaitTimeout = %v, want 30s", cfg.Gateway.Scheduling.FallbackWaitTimeout)
--- a/backend/internal/handler/admin/ops_realtime_handler.go
+++ b/backend/internal/handler/admin/ops_realtime_handler.go
@@ -118,3 +118,96 @@ func (h *OpsHandler) GetAccountAvailability(c *gin.Context) {
 	}
 	response.Success(c, payload)
 }
+
+func parseOpsRealtimeWindow(v string) (time.Duration, string, bool) {
+	switch strings.ToLower(strings.TrimSpace(v)) {
+	case "", "1min", "1m":
+		return 1 * time.Minute, "1min", true
+	case "5min", "5m":
+		return 5 * time.Minute, "5min", true
+	case "30min", "30m":
+		return 30 * time.Minute, "30min", true
+	case "1h", "60m", "60min":
+		return 1 * time.Hour, "1h", true
+	default:
+		return 0, "", false
+	}
+}
+
+// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the selected window.
+// GET /api/v1/admin/ops/realtime-traffic
+//
+// Query params:
+// - window: 1min|5min|30min|1h (default: 1min)
+// - platform: optional
+// - group_id: optional
+func (h *OpsHandler) GetRealtimeTrafficSummary(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	windowDur, windowLabel, ok := parseOpsRealtimeWindow(c.Query("window"))
+	if !ok {
+		response.BadRequest(c, "Invalid window")
+		return
+	}
+
+	platform := strings.TrimSpace(c.Query("platform"))
+	var groupID *int64
+	if v := strings.TrimSpace(c.Query("group_id")); v != "" {
+		id, err := strconv.ParseInt(v, 10, 64)
+		if err != nil || id <= 0 {
+			response.BadRequest(c, "Invalid group_id")
+			return
+		}
+		groupID = &id
+	}
+
+	endTime := time.Now().UTC()
+	startTime := endTime.Add(-windowDur)
+
+	if !h.opsService.IsRealtimeMonitoringEnabled(c.Request.Context()) {
+		disabledSummary := &service.OpsRealtimeTrafficSummary{
+			Window:    windowLabel,
+			StartTime: startTime,
+			EndTime:   endTime,
+			Platform:  platform,
+			GroupID:   groupID,
+			QPS:       service.OpsRateSummary{},
+			TPS:       service.OpsRateSummary{},
+		}
+		response.Success(c, gin.H{
+			"enabled":   false,
+			"summary":   disabledSummary,
+			"timestamp": endTime,
+		})
+		return
+	}
+
+	filter := &service.OpsDashboardFilter{
+		StartTime: startTime,
+		EndTime:   endTime,
+		Platform:  platform,
+		GroupID:   groupID,
+		QueryMode: service.OpsQueryModeRaw,
+	}
+
+	summary, err := h.opsService.GetRealtimeTrafficSummary(c.Request.Context(), filter)
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+	if summary != nil {
+		summary.Window = windowLabel
+	}
+	response.Success(c, gin.H{
+		"enabled":   true,
+		"summary":   summary,
+		"timestamp": endTime,
+	})
+}
--- a/backend/internal/handler/admin/ops_settings_handler.go
+++ b/backend/internal/handler/admin/ops_settings_handler.go
@@ -146,3 +146,49 @@ func (h *OpsHandler) UpdateAdvancedSettings(c *gin.Context) {
 	}
 	response.Success(c, updated)
 }
+
+// GetMetricThresholds returns Ops metric thresholds (DB-backed).
+// GET /api/v1/admin/ops/settings/metric-thresholds
+func (h *OpsHandler) GetMetricThresholds(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	cfg, err := h.opsService.GetMetricThresholds(c.Request.Context())
+	if err != nil {
+		response.Error(c, http.StatusInternalServerError, "Failed to get metric thresholds")
+		return
+	}
+	response.Success(c, cfg)
+}
+
+// UpdateMetricThresholds updates Ops metric thresholds (DB-backed).
+// PUT /api/v1/admin/ops/settings/metric-thresholds
+func (h *OpsHandler) UpdateMetricThresholds(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	var req service.OpsMetricThresholds
+	if err := c.ShouldBindJSON(&req); err != nil {
+		response.BadRequest(c, "Invalid request body")
+		return
+	}
+
+	updated, err := h.opsService.UpdateMetricThresholds(c.Request.Context(), &req)
+	if err != nil {
+		response.Error(c, http.StatusBadRequest, err.Error())
+		return
+	}
+	response.Success(c, updated)
+}
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -654,3 +654,68 @@ func (h *SettingHandler) DeleteAdminAPIKey(c *gin.Context) {

 	response.Success(c, gin.H{"message": "Admin API key deleted"})
 }
+
+// GetStreamTimeoutSettings 获取流超时处理配置
+// GET /api/v1/admin/settings/stream-timeout
+func (h *SettingHandler) GetStreamTimeoutSettings(c *gin.Context) {
+	settings, err := h.settingService.GetStreamTimeoutSettings(c.Request.Context())
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	response.Success(c, dto.StreamTimeoutSettings{
+		Enabled:                settings.Enabled,
+		Action:                 settings.Action,
+		TempUnschedMinutes:     settings.TempUnschedMinutes,
+		ThresholdCount:         settings.ThresholdCount,
+		ThresholdWindowMinutes: settings.ThresholdWindowMinutes,
+	})
+}
+
+// UpdateStreamTimeoutSettingsRequest 更新流超时配置请求
+type UpdateStreamTimeoutSettingsRequest struct {
+	Enabled                bool   `json:"enabled"`
+	Action                 string `json:"action"`
+	TempUnschedMinutes     int    `json:"temp_unsched_minutes"`
+	ThresholdCount         int    `json:"threshold_count"`
+	ThresholdWindowMinutes int    `json:"threshold_window_minutes"`
+}
+
+// UpdateStreamTimeoutSettings 更新流超时处理配置
+// PUT /api/v1/admin/settings/stream-timeout
+func (h *SettingHandler) UpdateStreamTimeoutSettings(c *gin.Context) {
+	var req UpdateStreamTimeoutSettingsRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		response.BadRequest(c, "Invalid request: "+err.Error())
+		return
+	}
+
+	settings := &service.StreamTimeoutSettings{
+		Enabled:                req.Enabled,
+		Action:                 req.Action,
+		TempUnschedMinutes:     req.TempUnschedMinutes,
+		ThresholdCount:         req.ThresholdCount,
+		ThresholdWindowMinutes: req.ThresholdWindowMinutes,
+	}
+
+	if err := h.settingService.SetStreamTimeoutSettings(c.Request.Context(), settings); err != nil {
+		response.BadRequest(c, err.Error())
+		return
+	}
+
+	// 重新获取设置返回
+	updatedSettings, err := h.settingService.GetStreamTimeoutSettings(c.Request.Context())
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	response.Success(c, dto.StreamTimeoutSettings{
+		Enabled:                updatedSettings.Enabled,
+		Action:                 updatedSettings.Action,
+		TempUnschedMinutes:     updatedSettings.TempUnschedMinutes,
+		ThresholdCount:         updatedSettings.ThresholdCount,
+		ThresholdWindowMinutes: updatedSettings.ThresholdWindowMinutes,
+	})
+}
--- a/backend/internal/handler/auth_handler.go
+++ b/backend/internal/handler/auth_handler.go
@@ -3,6 +3,7 @@ package handler
 import (
 	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/handler/dto"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/response"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -76,7 +77,7 @@ func (h *AuthHandler) Register(c *gin.Context) {

 	// Turnstile 验证（当提供了邮箱验证码时跳过，因为发送验证码时已验证过）
 	if req.VerifyCode == "" {
-		if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+		if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 			response.ErrorFrom(c, err)
 			return
 		}
@@ -105,7 +106,7 @@ func (h *AuthHandler) SendVerifyCode(c *gin.Context) {
 	}

 	// Turnstile 验证
-	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
@@ -132,7 +133,7 @@ func (h *AuthHandler) Login(c *gin.Context) {
 	}

 	// Turnstile 验证
-	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
--- a/backend/internal/handler/dto/settings.go
+++ b/backend/internal/handler/dto/settings.go
@@ -66,3 +66,12 @@ type PublicSettings struct {
 	LinuxDoOAuthEnabled bool   `json:"linuxdo_oauth_enabled"`
 	Version             string `json:"version"`
 }
+
+// StreamTimeoutSettings 流超时处理配置 DTO
+type StreamTimeoutSettings struct {
+	Enabled                bool   `json:"enabled"`
+	Action                 string `json:"action"`
+	TempUnschedMinutes     int    `json:"temp_unsched_minutes"`
+	ThresholdCount         int    `json:"threshold_count"`
+	ThresholdWindowMinutes int    `json:"threshold_window_minutes"`
+}
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -15,6 +15,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
 	pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -88,6 +89,9 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		return
 	}

+	// 检查是否为 Claude Code 客户端，设置到 context 中
+	SetClaudeCodeClientContext(c, body)
+
 	setOpsRequestContext(c, "", false, body)

 	parsedReq, err := service.ParseGatewayRequest(body)
@@ -271,12 +275,11 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				var failoverErr *service.UpstreamFailoverError
 				if errors.As(err, &failoverErr) {
 					failedAccountIDs[account.ID] = struct{}{}
+					lastFailoverStatus = failoverErr.StatusCode
 					if switchCount >= maxAccountSwitches {
-						lastFailoverStatus = failoverErr.StatusCode
 						h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
 						return
 					}
-					lastFailoverStatus = failoverErr.StatusCode
 					switchCount++
 					log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 					continue
@@ -286,8 +289,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				return
 			}

+			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+			userAgent := c.GetHeader("User-Agent")
+			clientIP := ip.GetClientIP(c)
+
 			// 异步记录使用量（subscription已在函数开头获取）
-			go func(result *service.ForwardResult, usedAccount *service.Account) {
+			go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
 				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 				defer cancel()
 				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -296,10 +303,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					User:         apiKey.User,
 					Account:      usedAccount,
 					Subscription: subscription,
+					UserAgent:    ua,
+					IPAddress:    clientIP,
 				}); err != nil {
 					log.Printf("Record usage failed: %v", err)
 				}
-			}(result, account)
+			}(result, account, userAgent, clientIP)
 			return
 		}
 	}
@@ -399,12 +408,11 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			var failoverErr *service.UpstreamFailoverError
 			if errors.As(err, &failoverErr) {
 				failedAccountIDs[account.ID] = struct{}{}
+				lastFailoverStatus = failoverErr.StatusCode
 				if switchCount >= maxAccountSwitches {
-					lastFailoverStatus = failoverErr.StatusCode
 					h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
 					return
 				}
-				lastFailoverStatus = failoverErr.StatusCode
 				switchCount++
 				log.Printf("Account %d: upstream error %d, switching account %d/%d", account.ID, failoverErr.StatusCode, switchCount, maxAccountSwitches)
 				continue
@@ -414,8 +422,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// 异步记录使用量（subscription已在函数开头获取）
-		go func(result *service.ForwardResult, usedAccount *service.Account) {
+		go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -424,10 +436,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    clientIP,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -12,6 +12,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/gemini"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/googleapi"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"

@@ -314,8 +315,12 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// 6) record usage async
-		go func(result *service.ForwardResult, usedAccount *service.Account) {
+		go func(result *service.ForwardResult, usedAccount *service.Account, ua, ip string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -324,10 +329,12 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    ip,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -12,6 +12,7 @@ import (
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -113,6 +114,26 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {

 	setOpsRequestContext(c, reqModel, reqStream, body)

+	// 提前校验 function_call_output 是否具备可关联上下文，避免上游 400。
+	// 要求 previous_response_id，或 input 内存在带 call_id 的 tool_call/function_call，
+	// 或带 id 且与 call_id 匹配的 item_reference。
+	if service.HasFunctionCallOutput(reqBody) {
+		previousResponseID, _ := reqBody["previous_response_id"].(string)
+		if strings.TrimSpace(previousResponseID) == "" && !service.HasToolCallContext(reqBody) {
+			if service.HasFunctionCallOutputMissingCallID(reqBody) {
+				log.Printf("[OpenAI Handler] function_call_output 缺少 call_id: model=%s", reqModel)
+				h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires call_id or previous_response_id; if relying on history, ensure store=true and reuse previous_response_id")
+				return
+			}
+			callIDs := service.FunctionCallOutputCallIDs(reqBody)
+			if !service.HasItemReferenceForCallIDs(reqBody, callIDs) {
+				log.Printf("[OpenAI Handler] function_call_output 缺少匹配的 item_reference: model=%s", reqModel)
+				h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "function_call_output requires item_reference ids matching each call_id, or previous_response_id/tool_call context; if relying on history, ensure store=true and reuse previous_response_id")
+				return
+			}
+		}
+	}
+
 	// Track if we've started streaming (for error handling)
 	streamStarted := false

@@ -263,8 +284,12 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// Async record usage
-		go func(result *service.OpenAIForwardResult, usedAccount *service.Account) {
+		go func(result *service.OpenAIForwardResult, usedAccount *service.Account, ua, ip string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
@@ -273,10 +298,12 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    ip,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/ops_error_logger.go
+++ b/backend/internal/handler/ops_error_logger.go
@@ -15,6 +15,7 @@ import (
 	"unicode/utf8"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
 	"github.com/gin-gonic/gin"
@@ -489,6 +490,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 				Severity:          classifyOpsSeverity("upstream_error", effectiveUpstreamStatus),
 				StatusCode:        status,
 				IsBusinessLimited: false,
+				IsCountTokens:     isCountTokensRequest(c),

 				ErrorMessage: recoveredMsg,
 				ErrorBody:    "",
@@ -521,7 +523,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 			}

 			var clientIP string
-			if ip := strings.TrimSpace(c.ClientIP()); ip != "" {
+			if ip := strings.TrimSpace(ip.GetClientIP(c)); ip != "" {
 				clientIP = ip
 				entry.ClientIP = &clientIP
 			}
@@ -598,6 +600,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 			Severity:          classifyOpsSeverity(parsed.ErrorType, status),
 			StatusCode:        status,
 			IsBusinessLimited: isBusinessLimited,
+			IsCountTokens:     isCountTokensRequest(c),

 			ErrorMessage: parsed.Message,
 			// Keep the full captured error body (capture is already capped at 64KB) so the
@@ -680,7 +683,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 		}

 		var clientIP string
-		if ip := strings.TrimSpace(c.ClientIP()); ip != "" {
+		if ip := strings.TrimSpace(ip.GetClientIP(c)); ip != "" {
 			clientIP = ip
 			entry.ClientIP = &clientIP
 		}
@@ -704,6 +707,14 @@ var opsRetryRequestHeaderAllowlist = []string{
 	"anthropic-version",
 }

+// isCountTokensRequest checks if the request is a count_tokens request
+func isCountTokensRequest(c *gin.Context) bool {
+	if c == nil || c.Request == nil || c.Request.URL == nil {
+		return false
+	}
+	return strings.Contains(c.Request.URL.Path, "/count_tokens")
+}
+
 func extractOpsRetryRequestHeaders(c *gin.Context) *string {
 	if c == nil || c.Request == nil {
 		return nil
--- a/backend/internal/middleware/rate_limiter.go
+++ b/backend/internal/middleware/rate_limiter.go
@@ -2,7 +2,10 @@ package middleware

 import (
 	"context"
+	"fmt"
+	"log"
 	"net/http"
+	"strconv"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -25,15 +28,34 @@ type RateLimitOptions struct {
 var rateLimitScript = redis.NewScript(`
 local current = redis.call('INCR', KEYS[1])
 local ttl = redis.call('PTTL', KEYS[1])
-if current == 1 or ttl == -1 then
+local repaired = 0
+if current == 1 then
  redis.call('PEXPIRE', KEYS[1], ARGV[1])
+elseif ttl == -1 then
+  redis.call('PEXPIRE', KEYS[1], ARGV[1])
+  repaired = 1
 end
-return current
+return {current, repaired}
 `)

 // rateLimitRun 允许测试覆写脚本执行逻辑
-var rateLimitRun = func(ctx context.Context, client *redis.Client, key string, windowMillis int64) (int64, error) {
-	return rateLimitScript.Run(ctx, client, []string{key}, windowMillis).Int64()
+var rateLimitRun = func(ctx context.Context, client *redis.Client, key string, windowMillis int64) (int64, bool, error) {
+	values, err := rateLimitScript.Run(ctx, client, []string{key}, windowMillis).Slice()
+	if err != nil {
+		return 0, false, err
+	}
+	if len(values) < 2 {
+		return 0, false, fmt.Errorf("rate limit script returned %d values", len(values))
+	}
+	count, err := parseInt64(values[0])
+	if err != nil {
+		return 0, false, err
+	}
+	repaired, err := parseInt64(values[1])
+	if err != nil {
+		return 0, false, err
+	}
+	return count, repaired == 1, nil
 }

 // RateLimiter Redis 速率限制器
@@ -74,8 +96,9 @@ func (r *RateLimiter) LimitWithOptions(key string, limit int, window time.Durati
 		windowMillis := windowTTLMillis(window)

 		// 使用 Lua 脚本原子操作增加计数并设置过期
-		count, err := rateLimitRun(ctx, r.redis, redisKey, windowMillis)
+		count, repaired, err := rateLimitRun(ctx, r.redis, redisKey, windowMillis)
 		if err != nil {
+			log.Printf("[RateLimit] redis error: key=%s mode=%s err=%v", redisKey, failureModeLabel(failureMode), err)
 			if failureMode == RateLimitFailClose {
 				abortRateLimit(c)
 				return
@@ -84,6 +107,9 @@ func (r *RateLimiter) LimitWithOptions(key string, limit int, window time.Durati
 			c.Next()
 			return
 		}
+		if repaired {
+			log.Printf("[RateLimit] ttl repaired: key=%s window_ms=%d", redisKey, windowMillis)
+		}

 		// 超过限制
 		if count > int64(limit) {
@@ -109,3 +135,27 @@ func abortRateLimit(c *gin.Context) {
 		"message": "Too many requests, please try again later",
 	})
 }
+
+func failureModeLabel(mode RateLimitFailureMode) string {
+	if mode == RateLimitFailClose {
+		return "fail-close"
+	}
+	return "fail-open"
+}
+
+func parseInt64(value any) (int64, error) {
+	switch v := value.(type) {
+	case int64:
+		return v, nil
+	case int:
+		return int64(v), nil
+	case string:
+		parsed, err := strconv.ParseInt(v, 10, 64)
+		if err != nil {
+			return 0, err
+		}
+		return parsed, nil
+	default:
+		return 0, fmt.Errorf("unexpected value type %T", value)
+	}
+}
--- a/backend/internal/middleware/rate_limiter_test.go
+++ b/backend/internal/middleware/rate_limiter_test.go
@@ -66,13 +66,13 @@ func TestRateLimiterSuccessAndLimit(t *testing.T) {
 	originalRun := rateLimitRun
 	counts := []int64{1, 2}
 	callIndex := 0
-	rateLimitRun = func(ctx context.Context, client *redis.Client, key string, windowMillis int64) (int64, error) {
+	rateLimitRun = func(ctx context.Context, client *redis.Client, key string, windowMillis int64) (int64, bool, error) {
 		if callIndex >= len(counts) {
-			return counts[len(counts)-1], nil
+			return counts[len(counts)-1], false, nil
 		}
 		value := counts[callIndex]
 		callIndex++
-		return value, nil
+		return value, false, nil
 	}
 	t.Cleanup(func() {
 		rateLimitRun = originalRun
--- a/backend/internal/repository/account_repo.go
+++ b/backend/internal/repository/account_repo.go
@@ -15,6 +15,7 @@ import (
 	"database/sql"
 	"encoding/json"
 	"errors"
+	"log"
 	"strconv"
 	"time"

@@ -115,6 +116,9 @@ func (r *accountRepository) Create(ctx context.Context, account *service.Account
 	account.ID = created.ID
 	account.CreatedAt = created.CreatedAt
 	account.UpdatedAt = created.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &account.ID, nil, buildSchedulerGroupPayload(account.GroupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account create failed: account=%d err=%v", account.ID, err)
+	}
 	return nil
 }

@@ -341,10 +345,17 @@ func (r *accountRepository) Update(ctx context.Context, account *service.Account
 		return translatePersistenceError(err, service.ErrAccountNotFound, nil)
 	}
 	account.UpdatedAt = updated.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &account.ID, nil, buildSchedulerGroupPayload(account.GroupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account update failed: account=%d err=%v", account.ID, err)
+	}
 	return nil
 }

 func (r *accountRepository) Delete(ctx context.Context, id int64) error {
+	groupIDs, err := r.loadAccountGroupIDs(ctx, id)
+	if err != nil {
+		return err
+	}
 	// 使用事务保证账号与关联分组的删除原子性
 	tx, err := r.client.Tx(ctx)
 	if err != nil && !errors.Is(err, dbent.ErrTxStarted) {
@@ -368,7 +379,12 @@ func (r *accountRepository) Delete(ctx context.Context, id int64) error {
 	}

 	if tx != nil {
-		return tx.Commit()
+		if err := tx.Commit(); err != nil {
+			return err
+		}
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, buildSchedulerGroupPayload(groupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account delete failed: account=%d err=%v", id, err)
 	}
 	return nil
 }
@@ -455,7 +471,18 @@ func (r *accountRepository) UpdateLastUsed(ctx context.Context, id int64) error
 		Where(dbaccount.IDEQ(id)).
 		SetLastUsedAt(now).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := map[string]any{
+		"last_used": map[string]int64{
+			strconv.FormatInt(id, 10): now.Unix(),
+		},
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountLastUsed, &id, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue last used failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) BatchUpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error {
@@ -479,7 +506,18 @@ func (r *accountRepository) BatchUpdateLastUsed(ctx context.Context, updates map
 	args = append(args, pq.Array(ids))

 	_, err := r.sql.ExecContext(ctx, caseSQL, args...)
-	return err
+	if err != nil {
+		return err
+	}
+	lastUsedPayload := make(map[string]int64, len(updates))
+	for id, ts := range updates {
+		lastUsedPayload[strconv.FormatInt(id, 10)] = ts.Unix()
+	}
+	payload := map[string]any{"last_used": lastUsedPayload}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountLastUsed, nil, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue batch last used failed: err=%v", err)
+	}
+	return nil
 }

 func (r *accountRepository) SetError(ctx context.Context, id int64, errorMsg string) error {
@@ -488,7 +526,13 @@ func (r *accountRepository) SetError(ctx context.Context, id int64, errorMsg str
 		SetStatus(service.StatusError).
 		SetErrorMessage(errorMsg).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue set error failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) AddToGroup(ctx context.Context, accountID, groupID int64, priority int) error {
@@ -497,7 +541,14 @@ func (r *accountRepository) AddToGroup(ctx context.Context, accountID, groupID i
 		SetGroupID(groupID).
 		SetPriority(priority).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := buildSchedulerGroupPayload([]int64{groupID})
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue add to group failed: account=%d group=%d err=%v", accountID, groupID, err)
+	}
+	return nil
 }

 func (r *accountRepository) RemoveFromGroup(ctx context.Context, accountID, groupID int64) error {
@@ -507,7 +558,14 @@ func (r *accountRepository) RemoveFromGroup(ctx context.Context, accountID, grou
 			dbaccountgroup.GroupIDEQ(groupID),
 		).
 		Exec(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := buildSchedulerGroupPayload([]int64{groupID})
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue remove from group failed: account=%d group=%d err=%v", accountID, groupID, err)
+	}
+	return nil
 }

 func (r *accountRepository) GetGroups(ctx context.Context, accountID int64) ([]service.Group, error) {
@@ -528,6 +586,10 @@ func (r *accountRepository) GetGroups(ctx context.Context, accountID int64) ([]s
 }

 func (r *accountRepository) BindGroups(ctx context.Context, accountID int64, groupIDs []int64) error {
+	existingGroupIDs, err := r.loadAccountGroupIDs(ctx, accountID)
+	if err != nil {
+		return err
+	}
 	// 使用事务保证删除旧绑定与创建新绑定的原子性
 	tx, err := r.client.Tx(ctx)
 	if err != nil && !errors.Is(err, dbent.ErrTxStarted) {
@@ -568,7 +630,13 @@ func (r *accountRepository) BindGroups(ctx context.Context, accountID int64, gro
 	}

 	if tx != nil {
-		return tx.Commit()
+		if err := tx.Commit(); err != nil {
+			return err
+		}
+	}
+	payload := buildSchedulerGroupPayload(mergeGroupIDs(existingGroupIDs, groupIDs))
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue bind groups failed: account=%d err=%v", accountID, err)
 	}
 	return nil
 }
@@ -672,7 +740,13 @@ func (r *accountRepository) SetRateLimited(ctx context.Context, id int64, resetA
 		SetRateLimitedAt(now).
 		SetRateLimitResetAt(resetAt).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue rate limit failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope service.AntigravityQuotaScope, resetAt time.Time) error {
@@ -706,6 +780,9 @@ func (r *accountRepository) SetAntigravityQuotaScopeLimit(ctx context.Context, i
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue quota scope failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -714,7 +791,13 @@ func (r *accountRepository) SetOverloaded(ctx context.Context, id int64, until t
 		Where(dbaccount.IDEQ(id)).
 		SetOverloadUntil(until).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue overload failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) SetTempUnschedulable(ctx context.Context, id int64, until time.Time, reason string) error {
@@ -727,7 +810,13 @@ func (r *accountRepository) SetTempUnschedulable(ctx context.Context, id int64,
 			AND deleted_at IS NULL
 			AND (temp_unschedulable_until IS NULL OR temp_unschedulable_until < $1)
 	`, until, reason, id)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue temp unschedulable failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearTempUnschedulable(ctx context.Context, id int64) error {
@@ -739,7 +828,13 @@ func (r *accountRepository) ClearTempUnschedulable(ctx context.Context, id int64
 		WHERE id = $1
 			AND deleted_at IS NULL
 	`, id)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear temp unschedulable failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearRateLimit(ctx context.Context, id int64) error {
@@ -749,7 +844,13 @@ func (r *accountRepository) ClearRateLimit(ctx context.Context, id int64) error
 		ClearRateLimitResetAt().
 		ClearOverloadUntil().
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear rate limit failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearAntigravityQuotaScopes(ctx context.Context, id int64) error {
@@ -770,6 +871,9 @@ func (r *accountRepository) ClearAntigravityQuotaScopes(ctx context.Context, id
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear quota scopes failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -792,7 +896,13 @@ func (r *accountRepository) SetSchedulable(ctx context.Context, id int64, schedu
 		Where(dbaccount.IDEQ(id)).
 		SetSchedulable(schedulable).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue schedulable change failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) AutoPauseExpiredAccounts(ctx context.Context, now time.Time) (int64, error) {
@@ -813,6 +923,11 @@ func (r *accountRepository) AutoPauseExpiredAccounts(ctx context.Context, now ti
 	if err != nil {
 		return 0, err
 	}
+	if rows > 0 {
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventFullRebuild, nil, nil, nil); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue auto pause rebuild failed: err=%v", err)
+		}
+	}
 	return rows, nil
 }

@@ -844,6 +959,9 @@ func (r *accountRepository) UpdateExtra(ctx context.Context, id int64, updates m
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue extra update failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -928,6 +1046,12 @@ func (r *accountRepository) BulkUpdate(ctx context.Context, ids []int64, updates
 	if err != nil {
 		return 0, err
 	}
+	if rows > 0 {
+		payload := map[string]any{"account_ids": ids}
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountBulkChanged, nil, nil, payload); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue bulk update failed: err=%v", err)
+		}
+	}
 	return rows, nil
 }

@@ -1170,6 +1294,54 @@ func (r *accountRepository) loadAccountGroups(ctx context.Context, accountIDs []
 	return groupsByAccount, groupIDsByAccount, accountGroupsByAccount, nil
 }

+func (r *accountRepository) loadAccountGroupIDs(ctx context.Context, accountID int64) ([]int64, error) {
+	entries, err := r.client.AccountGroup.
+		Query().
+		Where(dbaccountgroup.AccountIDEQ(accountID)).
+		All(ctx)
+	if err != nil {
+		return nil, err
+	}
+	ids := make([]int64, 0, len(entries))
+	for _, entry := range entries {
+		ids = append(ids, entry.GroupID)
+	}
+	return ids, nil
+}
+
+func mergeGroupIDs(a []int64, b []int64) []int64 {
+	seen := make(map[int64]struct{}, len(a)+len(b))
+	out := make([]int64, 0, len(a)+len(b))
+	for _, id := range a {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	for _, id := range b {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	return out
+}
+
+func buildSchedulerGroupPayload(groupIDs []int64) map[string]any {
+	if len(groupIDs) == 0 {
+		return nil
+	}
+	return map[string]any{"group_ids": groupIDs}
+}
+
 func accountEntityToService(m *dbent.Account) *service.Account {
 	if m == nil {
 		return nil
--- a/backend/internal/repository/group_repo.go
+++ b/backend/internal/repository/group_repo.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"database/sql"
 	"errors"
+	"log"

 	dbent "github.com/Wei-Shaw/sub2api/ent"
 	"github.com/Wei-Shaw/sub2api/ent/apikey"
@@ -55,6 +56,9 @@ func (r *groupRepository) Create(ctx context.Context, groupIn *service.Group) er
 		groupIn.ID = created.ID
 		groupIn.CreatedAt = created.CreatedAt
 		groupIn.UpdatedAt = created.UpdatedAt
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupIn.ID, nil); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue group create failed: group=%d err=%v", groupIn.ID, err)
+		}
 	}
 	return translatePersistenceError(err, nil, service.ErrGroupExists)
 }
@@ -111,12 +115,21 @@ func (r *groupRepository) Update(ctx context.Context, groupIn *service.Group) er
 		return translatePersistenceError(err, service.ErrGroupNotFound, service.ErrGroupExists)
 	}
 	groupIn.UpdatedAt = updated.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupIn.ID, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group update failed: group=%d err=%v", groupIn.ID, err)
+	}
 	return nil
 }

 func (r *groupRepository) Delete(ctx context.Context, id int64) error {
 	_, err := r.client.Group.Delete().Where(group.IDEQ(id)).Exec(ctx)
-	return translatePersistenceError(err, service.ErrGroupNotFound, nil)
+	if err != nil {
+		return translatePersistenceError(err, service.ErrGroupNotFound, nil)
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &id, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group delete failed: group=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *groupRepository) List(ctx context.Context, params pagination.PaginationParams) ([]service.Group, *pagination.PaginationResult, error) {
@@ -246,6 +259,9 @@ func (r *groupRepository) DeleteAccountGroupsByGroupID(ctx context.Context, grou
 		return 0, err
 	}
 	affected, _ := res.RowsAffected()
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupID, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group account clear failed: group=%d err=%v", groupID, err)
+	}
 	return affected, nil
 }

@@ -353,6 +369,9 @@ func (r *groupRepository) DeleteCascade(ctx context.Context, id int64) ([]int64,
 			return nil, err
 		}
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &id, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group cascade delete failed: group=%d err=%v", id, err)
+	}

 	return affectedUserIDs, nil
 }
--- a/backend/internal/repository/migrations_runner.go
+++ b/backend/internal/repository/migrations_runner.go
@@ -28,6 +28,23 @@ CREATE TABLE IF NOT EXISTS schema_migrations (
 );
 `

+const atlasSchemaRevisionsTableDDL = `
+CREATE TABLE IF NOT EXISTS atlas_schema_revisions (
+	version TEXT PRIMARY KEY,
+	description TEXT NOT NULL,
+	type INTEGER NOT NULL,
+	applied INTEGER NOT NULL DEFAULT 0,
+	total INTEGER NOT NULL DEFAULT 0,
+	executed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+	execution_time BIGINT NOT NULL DEFAULT 0,
+	error TEXT NULL,
+	error_stmt TEXT NULL,
+	hash TEXT NOT NULL DEFAULT '',
+	partial_hashes TEXT[] NULL,
+	operator_version TEXT NULL
+);
+`
+
 // migrationsAdvisoryLockID 是用于序列化迁移操作的 PostgreSQL Advisory Lock ID。
 // 在多实例部署场景下，该锁确保同一时间只有一个实例执行迁移。
 // 任何稳定的 int64 值都可以，只要不与同一数据库中的其他锁冲突即可。
@@ -94,6 +111,11 @@ func applyMigrationsFS(ctx context.Context, db *sql.DB, fsys fs.FS) error {
 		return fmt.Errorf("create schema_migrations: %w", err)
 	}

+	// 自动对齐 Atlas 基线（如果检测到 legacy schema_migrations 且缺失 atlas_schema_revisions）。
+	if err := ensureAtlasBaselineAligned(ctx, db, fsys); err != nil {
+		return err
+	}
+
 	// 获取所有 .sql 迁移文件并按文件名排序。
 	// 命名规范：使用零填充数字前缀（如 001_init.sql, 002_add_users.sql）。
 	files, err := fs.Glob(fsys, "*.sql")
@@ -172,6 +194,80 @@ func applyMigrationsFS(ctx context.Context, db *sql.DB, fsys fs.FS) error {
 	return nil
 }

+func ensureAtlasBaselineAligned(ctx context.Context, db *sql.DB, fsys fs.FS) error {
+	hasLegacy, err := tableExists(ctx, db, "schema_migrations")
+	if err != nil {
+		return fmt.Errorf("check schema_migrations: %w", err)
+	}
+	if !hasLegacy {
+		return nil
+	}
+
+	hasAtlas, err := tableExists(ctx, db, "atlas_schema_revisions")
+	if err != nil {
+		return fmt.Errorf("check atlas_schema_revisions: %w", err)
+	}
+	if !hasAtlas {
+		if _, err := db.ExecContext(ctx, atlasSchemaRevisionsTableDDL); err != nil {
+			return fmt.Errorf("create atlas_schema_revisions: %w", err)
+		}
+	}
+
+	var count int
+	if err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM atlas_schema_revisions").Scan(&count); err != nil {
+		return fmt.Errorf("count atlas_schema_revisions: %w", err)
+	}
+	if count > 0 {
+		return nil
+	}
+
+	version, description, hash, err := latestMigrationBaseline(fsys)
+	if err != nil {
+		return fmt.Errorf("atlas baseline version: %w", err)
+	}
+
+	if _, err := db.ExecContext(ctx, `
+		INSERT INTO atlas_schema_revisions (version, description, type, applied, total, executed_at, execution_time, hash)
+		VALUES ($1, $2, $3, 0, 0, NOW(), 0, $4)
+	`, version, description, 1, hash); err != nil {
+		return fmt.Errorf("insert atlas baseline: %w", err)
+	}
+	return nil
+}
+
+func tableExists(ctx context.Context, db *sql.DB, tableName string) (bool, error) {
+	var exists bool
+	err := db.QueryRowContext(ctx, `
+		SELECT EXISTS (
+			SELECT 1
+			FROM information_schema.tables
+			WHERE table_schema = 'public' AND table_name = $1
+		)
+	`, tableName).Scan(&exists)
+	return exists, err
+}
+
+func latestMigrationBaseline(fsys fs.FS) (string, string, string, error) {
+	files, err := fs.Glob(fsys, "*.sql")
+	if err != nil {
+		return "", "", "", err
+	}
+	if len(files) == 0 {
+		return "baseline", "baseline", "", nil
+	}
+	sort.Strings(files)
+	name := files[len(files)-1]
+	contentBytes, err := fs.ReadFile(fsys, name)
+	if err != nil {
+		return "", "", "", err
+	}
+	content := strings.TrimSpace(string(contentBytes))
+	sum := sha256.Sum256([]byte(content))
+	hash := hex.EncodeToString(sum[:])
+	version := strings.TrimSuffix(name, ".sql")
+	return version, version, hash, nil
+}
+
 // pgAdvisoryLock 获取 PostgreSQL Advisory Lock。
 // Advisory Lock 是一种轻量级的锁机制，不与任何特定的数据库对象关联。
 // 它非常适合用于应用层面的分布式锁场景，如迁移序列化。
--- a/backend/internal/repository/ops_repo.go
+++ b/backend/internal/repository/ops_repo.go
@@ -46,6 +46,7 @@ INSERT INTO ops_error_logs (
  severity,
  status_code,
  is_business_limited,
+  is_count_tokens,
  error_message,
  error_body,
  error_source,
@@ -64,7 +65,7 @@ INSERT INTO ops_error_logs (
  retry_count,
  created_at
 ) VALUES (
-  $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34
+  $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34,$35
 ) RETURNING id`

 	var id int64
@@ -88,6 +89,7 @@ INSERT INTO ops_error_logs (
 		opsNullString(input.Severity),
 		opsNullInt(input.StatusCode),
 		input.IsBusinessLimited,
+		input.IsCountTokens,
 		opsNullString(input.ErrorMessage),
 		opsNullString(input.ErrorBody),
 		opsNullString(input.ErrorSource),
--- a/backend/internal/repository/ops_repo_dashboard.go
+++ b/backend/internal/repository/ops_repo_dashboard.go
@@ -964,8 +964,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
 	}

 	idx := startIndex
-	clauses := make([]string, 0, 4)
-	args = make([]any, 0, 4)
+	clauses := make([]string, 0, 5)
+	args = make([]any, 0, 5)

 	args = append(args, start)
 	clauses = append(clauses, fmt.Sprintf("created_at >= $%d", idx))
@@ -974,6 +974,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
 	clauses = append(clauses, fmt.Sprintf("created_at < $%d", idx))
 	idx++

+	clauses = append(clauses, "is_count_tokens = FALSE")
+
 	if groupID != nil && *groupID > 0 {
 		args = append(args, *groupID)
 		clauses = append(clauses, fmt.Sprintf("group_id = $%d", idx))
--- a/backend/internal/repository/ops_repo_preagg.go
+++ b/backend/internal/repository/ops_repo_preagg.go
@@ -78,7 +78,9 @@ error_base AS (
    status_code AS client_status_code,
    COALESCE(upstream_status_code, status_code, 0) AS effective_status_code
  FROM ops_error_logs
+  -- Exclude count_tokens requests from error metrics as they are informational probes
  WHERE created_at >= $1 AND created_at < $2
+    AND is_count_tokens = FALSE
 ),
 error_agg AS (
  SELECT
--- a/backend/internal/repository/ops_repo_realtime_traffic.go
+++ b/backend/internal/repository/ops_repo_realtime_traffic.go
@@ -0,0 +1,129 @@
+package repository
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+)
+
+func (r *opsRepository) GetRealtimeTrafficSummary(ctx context.Context, filter *service.OpsDashboardFilter) (*service.OpsRealtimeTrafficSummary, error) {
+	if r == nil || r.db == nil {
+		return nil, fmt.Errorf("nil ops repository")
+	}
+	if filter == nil {
+		return nil, fmt.Errorf("nil filter")
+	}
+	if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
+		return nil, fmt.Errorf("start_time/end_time required")
+	}
+
+	start := filter.StartTime.UTC()
+	end := filter.EndTime.UTC()
+	if start.After(end) {
+		return nil, fmt.Errorf("start_time must be <= end_time")
+	}
+
+	window := end.Sub(start)
+	if window <= 0 {
+		return nil, fmt.Errorf("invalid time window")
+	}
+	if window > time.Hour {
+		return nil, fmt.Errorf("window too large")
+	}
+
+	usageJoin, usageWhere, usageArgs, next := buildUsageWhere(filter, start, end, 1)
+	errorWhere, errorArgs, _ := buildErrorWhere(filter, start, end, next)
+
+	q := `
+WITH usage_buckets AS (
+  SELECT
+    date_trunc('minute', ul.created_at) AS bucket,
+    COALESCE(COUNT(*), 0) AS success_count,
+    COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS token_sum
+  FROM usage_logs ul
+  ` + usageJoin + `
+  ` + usageWhere + `
+  GROUP BY 1
+),
+error_buckets AS (
+  SELECT
+    date_trunc('minute', created_at) AS bucket,
+    COALESCE(COUNT(*), 0) AS error_count
+  FROM ops_error_logs
+  ` + errorWhere + `
+    AND COALESCE(status_code, 0) >= 400
+  GROUP BY 1
+),
+combined AS (
+  SELECT
+    COALESCE(u.bucket, e.bucket) AS bucket,
+    COALESCE(u.success_count, 0) AS success_count,
+    COALESCE(u.token_sum, 0) AS token_sum,
+    COALESCE(e.error_count, 0) AS error_count,
+    COALESCE(u.success_count, 0) + COALESCE(e.error_count, 0) AS request_total
+  FROM usage_buckets u
+  FULL OUTER JOIN error_buckets e ON u.bucket = e.bucket
+)
+SELECT
+  COALESCE(SUM(success_count), 0) AS success_total,
+  COALESCE(SUM(error_count), 0) AS error_total,
+  COALESCE(SUM(token_sum), 0) AS token_total,
+  COALESCE(MAX(request_total), 0) AS peak_requests_per_min,
+  COALESCE(MAX(token_sum), 0) AS peak_tokens_per_min
+FROM combined`
+
+	args := append(usageArgs, errorArgs...)
+	var successCount int64
+	var errorTotal int64
+	var tokenConsumed int64
+	var peakRequestsPerMin int64
+	var peakTokensPerMin int64
+	if err := r.db.QueryRowContext(ctx, q, args...).Scan(
+		&successCount,
+		&errorTotal,
+		&tokenConsumed,
+		&peakRequestsPerMin,
+		&peakTokensPerMin,
+	); err != nil {
+		return nil, err
+	}
+
+	windowSeconds := window.Seconds()
+	if windowSeconds <= 0 {
+		windowSeconds = 1
+	}
+
+	requestCountTotal := successCount + errorTotal
+	qpsAvg := roundTo1DP(float64(requestCountTotal) / windowSeconds)
+	tpsAvg := roundTo1DP(float64(tokenConsumed) / windowSeconds)
+
+	// Keep "current" consistent with the dashboard overview semantics: last 1 minute.
+	// This remains "within the selected window" since end=start+window.
+	qpsCurrent, tpsCurrent, err := r.queryCurrentRates(ctx, filter, end)
+	if err != nil {
+		return nil, err
+	}
+
+	qpsPeak := roundTo1DP(float64(peakRequestsPerMin) / 60.0)
+	tpsPeak := roundTo1DP(float64(peakTokensPerMin) / 60.0)
+
+	return &service.OpsRealtimeTrafficSummary{
+		StartTime: start,
+		EndTime:   end,
+		Platform:  strings.TrimSpace(filter.Platform),
+		GroupID:   filter.GroupID,
+		QPS: service.OpsRateSummary{
+			Current: qpsCurrent,
+			Peak:    qpsPeak,
+			Avg:     qpsAvg,
+		},
+		TPS: service.OpsRateSummary{
+			Current: tpsCurrent,
+			Peak:    tpsPeak,
+			Avg:     tpsAvg,
+		},
+	}, nil
+}
--- a/backend/internal/repository/ops_repo_trends.go
+++ b/backend/internal/repository/ops_repo_trends.go
@@ -170,6 +170,7 @@ error_totals AS (
  FROM ops_error_logs
  WHERE created_at >= $1 AND created_at < $2
    AND COALESCE(status_code, 0) >= 400
+    AND is_count_tokens = FALSE  -- 排除 count_tokens 请求的错误
  GROUP BY 1
 ),
 combined AS (
@@ -243,6 +244,7 @@ error_totals AS (
    AND platform = $3
    AND group_id IS NOT NULL
    AND COALESCE(status_code, 0) >= 400
+    AND is_count_tokens = FALSE  -- 排除 count_tokens 请求的错误
  GROUP BY 1
 ),
 combined AS (
--- a/backend/internal/repository/scheduler_cache.go
+++ b/backend/internal/repository/scheduler_cache.go
@@ -0,0 +1,276 @@
+package repository
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/redis/go-redis/v9"
+)
+
+const (
+	schedulerBucketSetKey       = "sched:buckets"
+	schedulerOutboxWatermarkKey = "sched:outbox:watermark"
+	schedulerAccountPrefix      = "sched:acc:"
+	schedulerActivePrefix       = "sched:active:"
+	schedulerReadyPrefix        = "sched:ready:"
+	schedulerVersionPrefix      = "sched:ver:"
+	schedulerSnapshotPrefix     = "sched:"
+	schedulerLockPrefix         = "sched:lock:"
+)
+
+type schedulerCache struct {
+	rdb *redis.Client
+}
+
+func NewSchedulerCache(rdb *redis.Client) service.SchedulerCache {
+	return &schedulerCache{rdb: rdb}
+}
+
+func (c *schedulerCache) GetSnapshot(ctx context.Context, bucket service.SchedulerBucket) ([]*service.Account, bool, error) {
+	readyKey := schedulerBucketKey(schedulerReadyPrefix, bucket)
+	readyVal, err := c.rdb.Get(ctx, readyKey).Result()
+	if err == redis.Nil {
+		return nil, false, nil
+	}
+	if err != nil {
+		return nil, false, err
+	}
+	if readyVal != "1" {
+		return nil, false, nil
+	}
+
+	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
+	activeVal, err := c.rdb.Get(ctx, activeKey).Result()
+	if err == redis.Nil {
+		return nil, false, nil
+	}
+	if err != nil {
+		return nil, false, err
+	}
+
+	snapshotKey := schedulerSnapshotKey(bucket, activeVal)
+	ids, err := c.rdb.ZRange(ctx, snapshotKey, 0, -1).Result()
+	if err != nil {
+		return nil, false, err
+	}
+	if len(ids) == 0 {
+		return []*service.Account{}, true, nil
+	}
+
+	keys := make([]string, 0, len(ids))
+	for _, id := range ids {
+		keys = append(keys, schedulerAccountKey(id))
+	}
+	values, err := c.rdb.MGet(ctx, keys...).Result()
+	if err != nil {
+		return nil, false, err
+	}
+
+	accounts := make([]*service.Account, 0, len(values))
+	for _, val := range values {
+		if val == nil {
+			return nil, false, nil
+		}
+		account, err := decodeCachedAccount(val)
+		if err != nil {
+			return nil, false, err
+		}
+		accounts = append(accounts, account)
+	}
+
+	return accounts, true, nil
+}
+
+func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.SchedulerBucket, accounts []service.Account) error {
+	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
+	oldActive, _ := c.rdb.Get(ctx, activeKey).Result()
+
+	versionKey := schedulerBucketKey(schedulerVersionPrefix, bucket)
+	version, err := c.rdb.Incr(ctx, versionKey).Result()
+	if err != nil {
+		return err
+	}
+
+	versionStr := strconv.FormatInt(version, 10)
+	snapshotKey := schedulerSnapshotKey(bucket, versionStr)
+
+	pipe := c.rdb.Pipeline()
+	for _, account := range accounts {
+		payload, err := json.Marshal(account)
+		if err != nil {
+			return err
+		}
+		pipe.Set(ctx, schedulerAccountKey(strconv.FormatInt(account.ID, 10)), payload, 0)
+	}
+	if len(accounts) > 0 {
+		// 使用序号作为 score，保持数据库返回的排序语义。
+		members := make([]redis.Z, 0, len(accounts))
+		for idx, account := range accounts {
+			members = append(members, redis.Z{
+				Score:  float64(idx),
+				Member: strconv.FormatInt(account.ID, 10),
+			})
+		}
+		pipe.ZAdd(ctx, snapshotKey, members...)
+	} else {
+		pipe.Del(ctx, snapshotKey)
+	}
+	pipe.Set(ctx, activeKey, versionStr, 0)
+	pipe.Set(ctx, schedulerBucketKey(schedulerReadyPrefix, bucket), "1", 0)
+	pipe.SAdd(ctx, schedulerBucketSetKey, bucket.String())
+	if _, err := pipe.Exec(ctx); err != nil {
+		return err
+	}
+
+	if oldActive != "" && oldActive != versionStr {
+		_ = c.rdb.Del(ctx, schedulerSnapshotKey(bucket, oldActive)).Err()
+	}
+
+	return nil
+}
+
+func (c *schedulerCache) GetAccount(ctx context.Context, accountID int64) (*service.Account, error) {
+	key := schedulerAccountKey(strconv.FormatInt(accountID, 10))
+	val, err := c.rdb.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, err
+	}
+	return decodeCachedAccount(val)
+}
+
+func (c *schedulerCache) SetAccount(ctx context.Context, account *service.Account) error {
+	if account == nil || account.ID <= 0 {
+		return nil
+	}
+	payload, err := json.Marshal(account)
+	if err != nil {
+		return err
+	}
+	key := schedulerAccountKey(strconv.FormatInt(account.ID, 10))
+	return c.rdb.Set(ctx, key, payload, 0).Err()
+}
+
+func (c *schedulerCache) DeleteAccount(ctx context.Context, accountID int64) error {
+	if accountID <= 0 {
+		return nil
+	}
+	key := schedulerAccountKey(strconv.FormatInt(accountID, 10))
+	return c.rdb.Del(ctx, key).Err()
+}
+
+func (c *schedulerCache) UpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error {
+	if len(updates) == 0 {
+		return nil
+	}
+
+	keys := make([]string, 0, len(updates))
+	ids := make([]int64, 0, len(updates))
+	for id := range updates {
+		keys = append(keys, schedulerAccountKey(strconv.FormatInt(id, 10)))
+		ids = append(ids, id)
+	}
+
+	values, err := c.rdb.MGet(ctx, keys...).Result()
+	if err != nil {
+		return err
+	}
+
+	pipe := c.rdb.Pipeline()
+	for i, val := range values {
+		if val == nil {
+			continue
+		}
+		account, err := decodeCachedAccount(val)
+		if err != nil {
+			return err
+		}
+		account.LastUsedAt = ptrTime(updates[ids[i]])
+		updated, err := json.Marshal(account)
+		if err != nil {
+			return err
+		}
+		pipe.Set(ctx, keys[i], updated, 0)
+	}
+	_, err = pipe.Exec(ctx)
+	return err
+}
+
+func (c *schedulerCache) TryLockBucket(ctx context.Context, bucket service.SchedulerBucket, ttl time.Duration) (bool, error) {
+	key := schedulerBucketKey(schedulerLockPrefix, bucket)
+	return c.rdb.SetNX(ctx, key, time.Now().UnixNano(), ttl).Result()
+}
+
+func (c *schedulerCache) ListBuckets(ctx context.Context) ([]service.SchedulerBucket, error) {
+	raw, err := c.rdb.SMembers(ctx, schedulerBucketSetKey).Result()
+	if err != nil {
+		return nil, err
+	}
+	out := make([]service.SchedulerBucket, 0, len(raw))
+	for _, entry := range raw {
+		bucket, ok := service.ParseSchedulerBucket(entry)
+		if !ok {
+			continue
+		}
+		out = append(out, bucket)
+	}
+	return out, nil
+}
+
+func (c *schedulerCache) GetOutboxWatermark(ctx context.Context) (int64, error) {
+	val, err := c.rdb.Get(ctx, schedulerOutboxWatermarkKey).Result()
+	if err == redis.Nil {
+		return 0, nil
+	}
+	if err != nil {
+		return 0, err
+	}
+	id, err := strconv.ParseInt(val, 10, 64)
+	if err != nil {
+		return 0, err
+	}
+	return id, nil
+}
+
+func (c *schedulerCache) SetOutboxWatermark(ctx context.Context, id int64) error {
+	return c.rdb.Set(ctx, schedulerOutboxWatermarkKey, strconv.FormatInt(id, 10), 0).Err()
+}
+
+func schedulerBucketKey(prefix string, bucket service.SchedulerBucket) string {
+	return fmt.Sprintf("%s%d:%s:%s", prefix, bucket.GroupID, bucket.Platform, bucket.Mode)
+}
+
+func schedulerSnapshotKey(bucket service.SchedulerBucket, version string) string {
+	return fmt.Sprintf("%s%d:%s:%s:v%s", schedulerSnapshotPrefix, bucket.GroupID, bucket.Platform, bucket.Mode, version)
+}
+
+func schedulerAccountKey(id string) string {
+	return schedulerAccountPrefix + id
+}
+
+func ptrTime(t time.Time) *time.Time {
+	return &t
+}
+
+func decodeCachedAccount(val any) (*service.Account, error) {
+	var payload []byte
+	switch raw := val.(type) {
+	case string:
+		payload = []byte(raw)
+	case []byte:
+		payload = raw
+	default:
+		return nil, fmt.Errorf("unexpected account cache type: %T", val)
+	}
+	var account service.Account
+	if err := json.Unmarshal(payload, &account); err != nil {
+		return nil, err
+	}
+	return &account, nil
+}
--- a/backend/internal/repository/scheduler_outbox_repo.go
+++ b/backend/internal/repository/scheduler_outbox_repo.go
@@ -0,0 +1,96 @@
+package repository
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+)
+
+type schedulerOutboxRepository struct {
+	db *sql.DB
+}
+
+func NewSchedulerOutboxRepository(db *sql.DB) service.SchedulerOutboxRepository {
+	return &schedulerOutboxRepository{db: db}
+}
+
+func (r *schedulerOutboxRepository) ListAfter(ctx context.Context, afterID int64, limit int) ([]service.SchedulerOutboxEvent, error) {
+	if limit <= 0 {
+		limit = 100
+	}
+	rows, err := r.db.QueryContext(ctx, `
+		SELECT id, event_type, account_id, group_id, payload, created_at
+		FROM scheduler_outbox
+		WHERE id > $1
+		ORDER BY id ASC
+		LIMIT $2
+	`, afterID, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		_ = rows.Close()
+	}()
+
+	events := make([]service.SchedulerOutboxEvent, 0, limit)
+	for rows.Next() {
+		var (
+			payloadRaw []byte
+			accountID  sql.NullInt64
+			groupID    sql.NullInt64
+			event      service.SchedulerOutboxEvent
+		)
+		if err := rows.Scan(&event.ID, &event.EventType, &accountID, &groupID, &payloadRaw, &event.CreatedAt); err != nil {
+			return nil, err
+		}
+		if accountID.Valid {
+			v := accountID.Int64
+			event.AccountID = &v
+		}
+		if groupID.Valid {
+			v := groupID.Int64
+			event.GroupID = &v
+		}
+		if len(payloadRaw) > 0 {
+			var payload map[string]any
+			if err := json.Unmarshal(payloadRaw, &payload); err != nil {
+				return nil, err
+			}
+			event.Payload = payload
+		}
+		events = append(events, event)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return events, nil
+}
+
+func (r *schedulerOutboxRepository) MaxID(ctx context.Context) (int64, error) {
+	var maxID int64
+	if err := r.db.QueryRowContext(ctx, "SELECT COALESCE(MAX(id), 0) FROM scheduler_outbox").Scan(&maxID); err != nil {
+		return 0, err
+	}
+	return maxID, nil
+}
+
+func enqueueSchedulerOutbox(ctx context.Context, exec sqlExecutor, eventType string, accountID *int64, groupID *int64, payload any) error {
+	if exec == nil {
+		return nil
+	}
+	var payloadArg any
+	if payload != nil {
+		encoded, err := json.Marshal(payload)
+		if err != nil {
+			return err
+		}
+		payloadArg = encoded
+	}
+	_, err := exec.ExecContext(ctx, `
+		INSERT INTO scheduler_outbox (event_type, account_id, group_id, payload)
+		VALUES ($1, $2, $3, $4)
+	`, eventType, accountID, groupID, payloadArg)
+	return err
+}
--- a/backend/internal/repository/scheduler_snapshot_outbox_integration_test.go
+++ b/backend/internal/repository/scheduler_snapshot_outbox_integration_test.go
@@ -0,0 +1,68 @@
+//go:build integration
+
+package repository
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSchedulerSnapshotOutboxReplay(t *testing.T) {
+	ctx := context.Background()
+	rdb := testRedis(t)
+	client := testEntClient(t)
+
+	_, _ = integrationDB.ExecContext(ctx, "TRUNCATE scheduler_outbox")
+
+	accountRepo := newAccountRepositoryWithSQL(client, integrationDB)
+	outboxRepo := NewSchedulerOutboxRepository(integrationDB)
+	cache := NewSchedulerCache(rdb)
+
+	cfg := &config.Config{
+		RunMode: config.RunModeStandard,
+		Gateway: config.GatewayConfig{
+			Scheduling: config.GatewaySchedulingConfig{
+				OutboxPollIntervalSeconds: 1,
+				FullRebuildIntervalSeconds: 0,
+				DbFallbackEnabled:          true,
+			},
+		},
+	}
+
+	account := &service.Account{
+		Name:        "outbox-replay-" + time.Now().Format("150405.000000"),
+		Platform:    service.PlatformOpenAI,
+		Type:        service.AccountTypeAPIKey,
+		Status:      service.StatusActive,
+		Schedulable: true,
+		Concurrency: 3,
+		Priority:    1,
+		Credentials: map[string]any{},
+		Extra:       map[string]any{},
+	}
+	require.NoError(t, accountRepo.Create(ctx, account))
+	require.NoError(t, cache.SetAccount(ctx, account))
+
+	svc := service.NewSchedulerSnapshotService(cache, outboxRepo, accountRepo, nil, cfg)
+	svc.Start()
+	t.Cleanup(svc.Stop)
+
+	require.NoError(t, accountRepo.UpdateLastUsed(ctx, account.ID))
+	updated, err := accountRepo.GetByID(ctx, account.ID)
+	require.NoError(t, err)
+	require.NotNil(t, updated.LastUsedAt)
+	expectedUnix := updated.LastUsedAt.Unix()
+
+	require.Eventually(t, func() bool {
+		cached, err := cache.GetAccount(ctx, account.ID)
+		if err != nil || cached == nil || cached.LastUsedAt == nil {
+			return false
+		}
+		return cached.LastUsedAt.Unix() == expectedUnix
+	}, 5*time.Second, 100*time.Millisecond)
+}
--- a/backend/internal/repository/timeout_counter_cache.go
+++ b/backend/internal/repository/timeout_counter_cache.go
@@ -0,0 +1,80 @@
+package repository
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/redis/go-redis/v9"
+)
+
+const timeoutCounterPrefix = "timeout_count:account:"
+
+// timeoutCounterIncrScript 使用 Lua 脚本原子性地增加计数并返回当前值
+// 如果 key 不存在，则创建并设置过期时间
+var timeoutCounterIncrScript = redis.NewScript(`
+	local key = KEYS[1]
+	local ttl = tonumber(ARGV[1])
+
+	local count = redis.call('INCR', key)
+	if count == 1 then
+		redis.call('EXPIRE', key, ttl)
+	end
+
+	return count
+`)
+
+type timeoutCounterCache struct {
+	rdb *redis.Client
+}
+
+// NewTimeoutCounterCache 创建超时计数器缓存实例
+func NewTimeoutCounterCache(rdb *redis.Client) service.TimeoutCounterCache {
+	return &timeoutCounterCache{rdb: rdb}
+}
+
+// IncrementTimeoutCount 增加账户的超时计数，返回当前计数值
+// windowMinutes 是计数窗口时间（分钟），超过此时间计数器会自动重置
+func (c *timeoutCounterCache) IncrementTimeoutCount(ctx context.Context, accountID int64, windowMinutes int) (int64, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+
+	ttlSeconds := windowMinutes * 60
+	if ttlSeconds < 60 {
+		ttlSeconds = 60 // 最小1分钟
+	}
+
+	result, err := timeoutCounterIncrScript.Run(ctx, c.rdb, []string{key}, ttlSeconds).Int64()
+	if err != nil {
+		return 0, fmt.Errorf("increment timeout count: %w", err)
+	}
+
+	return result, nil
+}
+
+// GetTimeoutCount 获取账户当前的超时计数
+func (c *timeoutCounterCache) GetTimeoutCount(ctx context.Context, accountID int64) (int64, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+
+	val, err := c.rdb.Get(ctx, key).Int64()
+	if err == redis.Nil {
+		return 0, nil
+	}
+	if err != nil {
+		return 0, fmt.Errorf("get timeout count: %w", err)
+	}
+
+	return val, nil
+}
+
+// ResetTimeoutCount 重置账户的超时计数
+func (c *timeoutCounterCache) ResetTimeoutCount(ctx context.Context, accountID int64) error {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+	return c.rdb.Del(ctx, key).Err()
+}
+
+// GetTimeoutCountTTL 获取计数器剩余过期时间
+func (c *timeoutCounterCache) GetTimeoutCountTTL(ctx context.Context, accountID int64) (time.Duration, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+	return c.rdb.TTL(ctx, key).Result()
+}
--- a/backend/internal/repository/wire.go
+++ b/backend/internal/repository/wire.go
@@ -59,6 +59,7 @@ var ProviderSet = wire.NewSet(
 	NewBillingCache,
 	NewAPIKeyCache,
 	NewTempUnschedCache,
+	NewTimeoutCounterCache,
 	ProvideConcurrencyCache,
 	NewDashboardCache,
 	NewEmailCache,
@@ -66,6 +67,8 @@ var ProviderSet = wire.NewSet(
 	NewRedeemCache,
 	NewUpdateCache,
 	NewGeminiTokenCache,
+	NewSchedulerCache,
+	NewSchedulerOutboxRepository,

 	// HTTP service ports (DI Strategy A: return interface directly)
 	NewTurnstileVerifier,
--- a/backend/internal/server/routes/admin.go
+++ b/backend/internal/server/routes/admin.go
@@ -73,6 +73,7 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		// Realtime ops signals
 		ops.GET("/concurrency", h.Admin.Ops.GetConcurrencyStats)
 		ops.GET("/account-availability", h.Admin.Ops.GetAccountAvailability)
+		ops.GET("/realtime-traffic", h.Admin.Ops.GetRealtimeTrafficSummary)

 		// Alerts (rules + events)
 		ops.GET("/alert-rules", h.Admin.Ops.ListAlertRules)
@@ -96,6 +97,13 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		ops.GET("/advanced-settings", h.Admin.Ops.GetAdvancedSettings)
 		ops.PUT("/advanced-settings", h.Admin.Ops.UpdateAdvancedSettings)

+		// Settings group (DB-backed)
+		settings := ops.Group("/settings")
+		{
+			settings.GET("/metric-thresholds", h.Admin.Ops.GetMetricThresholds)
+			settings.PUT("/metric-thresholds", h.Admin.Ops.UpdateMetricThresholds)
+		}
+
 		// WebSocket realtime (QPS/TPS)
 		ws := ops.Group("/ws")
 		{
@@ -283,6 +291,9 @@ func registerSettingsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		adminSettings.GET("/admin-api-key", h.Admin.Setting.GetAdminAPIKey)
 		adminSettings.POST("/admin-api-key/regenerate", h.Admin.Setting.RegenerateAdminAPIKey)
 		adminSettings.DELETE("/admin-api-key", h.Admin.Setting.DeleteAdminAPIKey)
+		// 流超时处理配置
+		adminSettings.GET("/stream-timeout", h.Admin.Setting.GetStreamTimeoutSettings)
+		adminSettings.PUT("/stream-timeout", h.Admin.Setting.UpdateStreamTimeoutSettings)
 	}
 }

--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -523,6 +523,9 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		proxyURL = account.Proxy.URL()
 	}

+	// Sanitize thinking blocks (clean cache_control and flatten history thinking)
+	sanitizeThinkingBlocks(&claudeReq)
+
 	// 获取转换选项
 	// Antigravity 上游要求必须包含身份提示词，否则会返回 429
 	transformOpts := s.getClaudeTransformOptions(ctx)
@@ -534,6 +537,9 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		return nil, fmt.Errorf("transform request: %w", err)
 	}

+	// Safety net: ensure no cache_control leaked into Gemini request
+	geminiBody = cleanCacheControlFromGeminiJSON(geminiBody)
+
 	// Antigravity 上游只支持流式请求，统一使用 streamGenerateContent
 	// 如果客户端请求非流式，在响应处理阶段会收集完整流式响应后转换返回
 	action := "streamGenerateContent"
@@ -903,6 +909,143 @@ func extractAntigravityErrorMessage(body []byte) string {
 	return ""
 }

+// cleanCacheControlFromGeminiJSON removes cache_control from Gemini JSON (emergency fix)
+// This should not be needed if transformation is correct, but serves as a safety net
+func cleanCacheControlFromGeminiJSON(body []byte) []byte {
+	// Try a more robust approach: parse and clean
+	var data map[string]any
+	if err := json.Unmarshal(body, &data); err != nil {
+		log.Printf("[Antigravity] Failed to parse Gemini JSON for cache_control cleaning: %v", err)
+		return body
+	}
+
+	cleaned := removeCacheControlFromAny(data)
+	if !cleaned {
+		return body
+	}
+
+	if result, err := json.Marshal(data); err == nil {
+		log.Printf("[Antigravity] Successfully cleaned cache_control from Gemini JSON")
+		return result
+	}
+
+	return body
+}
+
+// removeCacheControlFromAny recursively removes cache_control fields
+func removeCacheControlFromAny(v any) bool {
+	cleaned := false
+
+	switch val := v.(type) {
+	case map[string]any:
+		for k, child := range val {
+			if k == "cache_control" {
+				delete(val, k)
+				cleaned = true
+			} else if removeCacheControlFromAny(child) {
+				cleaned = true
+			}
+		}
+	case []any:
+		for _, item := range val {
+			if removeCacheControlFromAny(item) {
+				cleaned = true
+			}
+		}
+	}
+
+	return cleaned
+}
+
+// sanitizeThinkingBlocks cleans cache_control and flattens history thinking blocks
+// Thinking blocks do NOT support cache_control field (Anthropic API/Vertex AI requirement)
+// Additionally, history thinking blocks are flattened to text to avoid upstream validation errors
+func sanitizeThinkingBlocks(req *antigravity.ClaudeRequest) {
+	if req == nil {
+		return
+	}
+
+	log.Printf("[Antigravity] sanitizeThinkingBlocks: processing request with %d messages", len(req.Messages))
+
+	// Clean system blocks
+	if len(req.System) > 0 {
+		var systemBlocks []map[string]any
+		if err := json.Unmarshal(req.System, &systemBlocks); err == nil {
+			for i := range systemBlocks {
+				if blockType, _ := systemBlocks[i]["type"].(string); blockType == "thinking" || systemBlocks[i]["thinking"] != nil {
+					if removeCacheControlFromAny(systemBlocks[i]) {
+						log.Printf("[Antigravity] Deep cleaned cache_control from thinking block in system[%d]", i)
+					}
+				}
+			}
+			// Marshal back
+			if cleaned, err := json.Marshal(systemBlocks); err == nil {
+				req.System = cleaned
+			}
+		}
+	}
+
+	// Clean message content blocks and flatten history
+	lastMsgIdx := len(req.Messages) - 1
+	for msgIdx := range req.Messages {
+		raw := req.Messages[msgIdx].Content
+		if len(raw) == 0 {
+			continue
+		}
+
+		// Try to parse as blocks array
+		var blocks []map[string]any
+		if err := json.Unmarshal(raw, &blocks); err != nil {
+			continue
+		}
+
+		cleaned := false
+		for blockIdx := range blocks {
+			blockType, _ := blocks[blockIdx]["type"].(string)
+
+			// Check for thinking blocks (typed or untyped)
+			if blockType == "thinking" || blocks[blockIdx]["thinking"] != nil {
+				// 1. Clean cache_control
+				if removeCacheControlFromAny(blocks[blockIdx]) {
+					log.Printf("[Antigravity] Deep cleaned cache_control from thinking block in messages[%d].content[%d]", msgIdx, blockIdx)
+					cleaned = true
+				}
+
+				// 2. Flatten to text if it's a history message (not the last one)
+				if msgIdx < lastMsgIdx {
+					log.Printf("[Antigravity] Flattening history thinking block to text at messages[%d].content[%d]", msgIdx, blockIdx)
+
+					// Extract thinking content
+					var textContent string
+					if t, ok := blocks[blockIdx]["thinking"].(string); ok {
+						textContent = t
+					} else {
+						// Fallback for non-string content (marshal it)
+						if b, err := json.Marshal(blocks[blockIdx]["thinking"]); err == nil {
+							textContent = string(b)
+						}
+					}
+
+					// Convert to text block
+					blocks[blockIdx]["type"] = "text"
+					blocks[blockIdx]["text"] = textContent
+					delete(blocks[blockIdx], "thinking")
+					delete(blocks[blockIdx], "signature")
+					delete(blocks[blockIdx], "cache_control") // Ensure it's gone
+					cleaned = true
+				}
+			}
+		}
+
+		// Marshal back if modified
+		if cleaned {
+			if marshaled, err := json.Marshal(blocks); err == nil {
+				req.Messages[msgIdx].Content = marshaled
+			}
+		}
+	}
+}
+
 // stripThinkingFromClaudeRequest converts thinking blocks to text blocks in a Claude Messages request.
 // This preserves the thinking content while avoiding signature validation errors.
 // Note: redacted_thinking blocks are removed because they cannot be converted to text.
@@ -1717,6 +1860,7 @@ func (s *AntigravityGatewayService) handleGeminiStreamingResponse(c *gin.Context
 				continue
 			}
 			log.Printf("Stream data interval timeout (antigravity)")
+			// 注意：此函数没有 account 上下文，无法调用 HandleStreamTimeout
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
@@ -2271,6 +2415,7 @@ func (s *AntigravityGatewayService) handleClaudeStreamingResponse(c *gin.Context
 				continue
 			}
 			log.Printf("Stream data interval timeout (antigravity)")
+			// 注意：此函数没有 account 上下文，无法调用 HandleStreamTimeout
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: convertUsage(nil), firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -146,6 +146,13 @@ const (

 	// SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation).
 	SettingKeyOpsAdvancedSettings = "ops_advanced_settings"
+
+	// =========================
+	// Stream Timeout Handling
+	// =========================
+
+	// SettingKeyStreamTimeoutSettings stores JSON config for stream timeout handling.
+	SettingKeyStreamTimeoutSettings = "stream_timeout_settings"
 )

 // AdminAPIKeyPrefix is the prefix for admin API keys (distinct from user "sk-" keys).
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -151,6 +151,7 @@ type GatewayService struct {
 	userSubRepo         UserSubscriptionRepository
 	cache               GatewayCache
 	cfg                 *config.Config
+	schedulerSnapshot   *SchedulerSnapshotService
 	billingService      *BillingService
 	rateLimitService    *RateLimitService
 	billingCacheService *BillingCacheService
@@ -169,6 +170,7 @@ func NewGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	schedulerSnapshot *SchedulerSnapshotService,
 	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
@@ -185,6 +187,7 @@ func NewGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		schedulerSnapshot:   schedulerSnapshot,
 		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
@@ -745,6 +748,9 @@ func (s *GatewayService) resolvePlatform(ctx context.Context, groupID *int64, gr
 }

 func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, bool, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
+	}
 	useMixed := (platform == PlatformAnthropic || platform == PlatformGemini) && !hasForcePlatform
 	if useMixed {
 		platforms := []string{platform, PlatformAntigravity}
@@ -821,6 +827,13 @@ func (s *GatewayService) tryAcquireAccountSlot(ctx context.Context, accountID in
 	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
 }

+func (s *GatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
 func sortAccountsByPriorityAndLastUsed(accounts []*Account, preferOAuth bool) {
 	sort.SliceStable(accounts, func(i, j int) bool {
 		a, b := accounts[i], accounts[j]
@@ -851,7 +864,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号分组归属和平台匹配（确保粘性会话不会跨分组或跨平台）
 				if err == nil && s.isAccountInGroup(account, groupID) && account.Platform == platform && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					if err := s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL); err != nil {
@@ -864,16 +877,11 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 	}

 	// 2. 获取可调度账号列表（单平台）
-	var accounts []Account
-	var err error
-	if s.cfg.RunMode == config.RunModeSimple {
-		// 简易模式：忽略 groupID，查询所有可用账号
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
-	} else if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, platform)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
+	forcePlatform, hasForcePlatform := ctx.Value(ctxkey.ForcePlatform).(string)
+	if hasForcePlatform && forcePlatform == "" {
+		hasForcePlatform = false
 	}
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -935,7 +943,6 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 // selectAccountWithMixedScheduling 选择账户（支持混合调度）
 // 查询原生平台账户 + 启用 mixed_scheduling 的 antigravity 账户
 func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, nativePlatform string) (*Account, error) {
-	platforms := []string{nativePlatform, PlatformAntigravity}
 	preferOAuth := nativePlatform == PlatformGemini

 	// 1. 查询粘性会话
@@ -943,7 +950,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号分组归属和有效性：原生平台直接匹配，antigravity 需要启用混合调度
 				if err == nil && s.isAccountInGroup(account, groupID) && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
@@ -958,13 +965,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
 	}

 	// 2. 获取可调度账号列表
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, platforms)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, platforms)
-	}
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, nativePlatform, false)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -1226,6 +1227,9 @@ func enforceCacheControlLimit(body []byte) []byte {
 		return body
 	}

+	// 清理 thinking 块中的非法 cache_control（thinking 块不支持该字段）
+	removeCacheControlFromThinkingBlocks(data)
+
 	// 计算当前 cache_control 块数量
 	count := countCacheControlBlocks(data)
 	if count <= maxCacheControlBlocks {
@@ -1253,6 +1257,7 @@ func enforceCacheControlLimit(body []byte) []byte {
 }

 // countCacheControlBlocks 统计 system 和 messages 中的 cache_control 块数量
+// 注意：thinking 块不支持 cache_control，统计时跳过
 func countCacheControlBlocks(data map[string]any) int {
 	count := 0

@@ -1260,6 +1265,10 @@ func countCacheControlBlocks(data map[string]any) int {
 	if system, ok := data["system"].([]any); ok {
 		for _, item := range system {
 			if m, ok := item.(map[string]any); ok {
+				// thinking 块不支持 cache_control，跳过
+				if blockType, _ := m["type"].(string); blockType == "thinking" {
+					continue
+				}
 				if _, has := m["cache_control"]; has {
 					count++
 				}
@@ -1274,6 +1283,10 @@ func countCacheControlBlocks(data map[string]any) int {
 				if content, ok := msgMap["content"].([]any); ok {
 					for _, item := range content {
 						if m, ok := item.(map[string]any); ok {
+							// thinking 块不支持 cache_control，跳过
+							if blockType, _ := m["type"].(string); blockType == "thinking" {
+								continue
+							}
 							if _, has := m["cache_control"]; has {
 								count++
 							}
@@ -1289,6 +1302,7 @@ func countCacheControlBlocks(data map[string]any) int {

 // removeCacheControlFromMessages 从 messages 中移除一个 cache_control（从头开始）
 // 返回 true 表示成功移除，false 表示没有可移除的
+// 注意：跳过 thinking 块（它不支持 cache_control）
 func removeCacheControlFromMessages(data map[string]any) bool {
 	messages, ok := data["messages"].([]any)
 	if !ok {
@@ -1306,6 +1320,10 @@ func removeCacheControlFromMessages(data map[string]any) bool {
 		}
 		for _, item := range content {
 			if m, ok := item.(map[string]any); ok {
+				// thinking 块不支持 cache_control，跳过
+				if blockType, _ := m["type"].(string); blockType == "thinking" {
+					continue
+				}
 				if _, has := m["cache_control"]; has {
 					delete(m, "cache_control")
 					return true
@@ -1318,6 +1336,7 @@ func removeCacheControlFromMessages(data map[string]any) bool {

 // removeCacheControlFromSystem 从 system 中移除一个 cache_control（从尾部开始，保护注入的 prompt）
 // 返回 true 表示成功移除，false 表示没有可移除的
+// 注意：跳过 thinking 块（它不支持 cache_control）
 func removeCacheControlFromSystem(data map[string]any) bool {
 	system, ok := data["system"].([]any)
 	if !ok {
@@ -1327,6 +1346,10 @@ func removeCacheControlFromSystem(data map[string]any) bool {
 	// 从尾部开始移除，保护开头注入的 Claude Code prompt
 	for i := len(system) - 1; i >= 0; i-- {
 		if m, ok := system[i].(map[string]any); ok {
+			// thinking 块不支持 cache_control，跳过
+			if blockType, _ := m["type"].(string); blockType == "thinking" {
+				continue
+			}
 			if _, has := m["cache_control"]; has {
 				delete(m, "cache_control")
 				return true
@@ -1336,6 +1359,44 @@ func removeCacheControlFromSystem(data map[string]any) bool {
 	return false
 }

+// removeCacheControlFromThinkingBlocks 强制清理所有 thinking 块中的非法 cache_control
+// thinking 块不支持 cache_control 字段，这个函数确保所有 thinking 块都不含该字段
+func removeCacheControlFromThinkingBlocks(data map[string]any) {
+	// 清理 system 中的 thinking 块
+	if system, ok := data["system"].([]any); ok {
+		for _, item := range system {
+			if m, ok := item.(map[string]any); ok {
+				if blockType, _ := m["type"].(string); blockType == "thinking" {
+					if _, has := m["cache_control"]; has {
+						delete(m, "cache_control")
+						log.Printf("[Warning] Removed illegal cache_control from thinking block in system")
+					}
+				}
+			}
+		}
+	}
+
+	// 清理 messages 中的 thinking 块
+	if messages, ok := data["messages"].([]any); ok {
+		for msgIdx, msg := range messages {
+			if msgMap, ok := msg.(map[string]any); ok {
+				if content, ok := msgMap["content"].([]any); ok {
+					for contentIdx, item := range content {
+						if m, ok := item.(map[string]any); ok {
+							if blockType, _ := m["type"].(string); blockType == "thinking" {
+								if _, has := m["cache_control"]; has {
+									delete(m, "cache_control")
+									log.Printf("[Warning] Removed illegal cache_control from thinking block in messages[%d].content[%d]", msgIdx, contentIdx)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
 // Forward 转发请求到Claude API
 func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, parsed *ParsedRequest) (*ForwardResult, error) {
 	startTime := time.Now()
@@ -2340,6 +2401,10 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 				return &streamingResult{usage: usage, firstTokenMs: firstTokenMs, clientDisconnect: true}, nil
 			}
 			log.Printf("Stream data interval timeout: account=%d model=%s interval=%s", account.ID, originalModel, streamInterval)
+			// 处理流超时，可能标记账户为临时不可调度或错误状态
+			if s.rateLimitService != nil {
+				s.rateLimitService.HandleStreamTimeout(ctx, account, originalModel)
+			}
 			sendErrorEvent("stream_timeout")
 			return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -40,6 +40,7 @@ type GeminiMessagesCompatService struct {
 	accountRepo               AccountRepository
 	groupRepo                 GroupRepository
 	cache                     GatewayCache
+	schedulerSnapshot         *SchedulerSnapshotService
 	tokenProvider             *GeminiTokenProvider
 	rateLimitService          *RateLimitService
 	httpUpstream              HTTPUpstream
@@ -51,6 +52,7 @@ func NewGeminiMessagesCompatService(
 	accountRepo AccountRepository,
 	groupRepo GroupRepository,
 	cache GatewayCache,
+	schedulerSnapshot *SchedulerSnapshotService,
 	tokenProvider *GeminiTokenProvider,
 	rateLimitService *RateLimitService,
 	httpUpstream HTTPUpstream,
@@ -61,6 +63,7 @@ func NewGeminiMessagesCompatService(
 		accountRepo:               accountRepo,
 		groupRepo:                 groupRepo,
 		cache:                     cache,
+		schedulerSnapshot:         schedulerSnapshot,
 		tokenProvider:             tokenProvider,
 		rateLimitService:          rateLimitService,
 		httpUpstream:              httpUpstream,
@@ -105,12 +108,6 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 	// gemini 分组支持混合调度（包含启用了 mixed_scheduling 的 antigravity 账户）
 	// 注意：强制平台模式不走混合调度
 	useMixedScheduling := platform == PlatformGemini && !hasForcePlatform
-	var queryPlatforms []string
-	if useMixedScheduling {
-		queryPlatforms = []string{PlatformGemini, PlatformAntigravity}
-	} else {
-		queryPlatforms = []string{platform}
-	}

 	cacheKey := "gemini:" + sessionHash

@@ -118,7 +115,7 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), cacheKey)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号是否有效：原生平台直接匹配，antigravity 需要启用混合调度
 				if err == nil && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					valid := false
@@ -149,22 +146,16 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 	}

 	// 查询可调度账户（强制平台模式：优先按分组查找，找不到再查全部）
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, queryPlatforms)
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, platform, hasForcePlatform)
+	if err != nil {
+		return nil, fmt.Errorf("query accounts failed: %w", err)
+	}
+	// 强制平台模式下，分组中找不到账户时回退查询全部
+	if len(accounts) == 0 && groupID != nil && hasForcePlatform {
+		accounts, err = s.listSchedulableAccountsOnce(ctx, nil, platform, hasForcePlatform)
 		if err != nil {
 			return nil, fmt.Errorf("query accounts failed: %w", err)
 		}
-		// 强制平台模式下，分组中找不到账户时回退查询全部
-		if len(accounts) == 0 && hasForcePlatform {
-			accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
-		}
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
-	}
-	if err != nil {
-		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}

 	var selected *Account
@@ -245,6 +236,31 @@ func (s *GeminiMessagesCompatService) GetAntigravityGatewayService() *Antigravit
 	return s.antigravityGatewayService
 }

+func (s *GeminiMessagesCompatService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
+func (s *GeminiMessagesCompatService) listSchedulableAccountsOnce(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, error) {
+	if s.schedulerSnapshot != nil {
+		accounts, _, err := s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
+		return accounts, err
+	}
+
+	useMixedScheduling := platform == PlatformGemini && !hasForcePlatform
+	queryPlatforms := []string{platform}
+	if useMixedScheduling {
+		queryPlatforms = []string{platform, PlatformAntigravity}
+	}
+
+	if groupID != nil {
+		return s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, queryPlatforms)
+	}
+	return s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
+}
+
 func (s *GeminiMessagesCompatService) validateUpstreamBaseURL(raw string) (string, error) {
 	if s.cfg != nil && !s.cfg.Security.URLAllowlist.Enabled {
 		normalized, err := urlvalidator.ValidateURLFormat(raw, s.cfg.Security.URLAllowlist.AllowInsecureHTTP)
@@ -266,13 +282,7 @@ func (s *GeminiMessagesCompatService) validateUpstreamBaseURL(raw string) (strin

 // HasAntigravityAccounts 检查是否有可用的 antigravity 账户
 func (s *GeminiMessagesCompatService) HasAntigravityAccounts(ctx context.Context, groupID *int64) (bool, error) {
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformAntigravity)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformAntigravity)
-	}
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, PlatformAntigravity, false)
 	if err != nil {
 		return false, err
 	}
@@ -288,13 +298,7 @@ func (s *GeminiMessagesCompatService) HasAntigravityAccounts(ctx context.Context
 // 3) OAuth accounts explicitly marked as ai_studio
 // 4) Any remaining Gemini accounts (fallback)
 func (s *GeminiMessagesCompatService) SelectAccountForAIStudioEndpoints(ctx context.Context, groupID *int64) (*Account, error) {
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformGemini)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformGemini)
-	}
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, PlatformGemini, true)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -1,6 +1,7 @@
 package service

 import (
+	_ "embed"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -16,6 +17,9 @@ const (
 	codexCacheTTL          = 15 * time.Minute
 )

+//go:embed prompts/codex_cli_instructions.md
+var codexCLIInstructions string
+
 var codexModelMap = map[string]string{
 	"gpt-5.1-codex":             "gpt-5.1-codex",
 	"gpt-5.1-codex-low":         "gpt-5.1-codex",
@@ -70,6 +74,8 @@ type opencodeCacheMetadata struct {

 func applyCodexOAuthTransform(reqBody map[string]any) codexTransformResult {
 	result := codexTransformResult{}
+	// 工具续链需求会影响存储策略与 input 过滤逻辑。
+	needsToolContinuation := NeedsToolContinuation(reqBody)

 	model := ""
 	if v, ok := reqBody["model"].(string); ok {
@@ -84,6 +90,8 @@ func applyCodexOAuthTransform(reqBody map[string]any) codexTransformResult {
 		result.NormalizedModel = normalizedModel
 	}

+	// OAuth 走 ChatGPT internal API 时，store 必须为 false；显式 true 也会强制覆盖。
+	// 避免上游返回 "Store must be set to false"。
 	if v, ok := reqBody["store"].(bool); !ok || v {
 		reqBody["store"] = false
 		result.Modified = true
@@ -119,10 +127,18 @@ func applyCodexOAuthTransform(reqBody map[string]any) codexTransformResult {
 			reqBody["instructions"] = instructions
 			result.Modified = true
 		}
+	} else if existingInstructions == "" {
+		// 未获取到 opencode 指令时，回退使用 Codex CLI 指令。
+		codexInstructions := strings.TrimSpace(getCodexCLIInstructions())
+		if codexInstructions != "" {
+			reqBody["instructions"] = codexInstructions
+			result.Modified = true
+		}
 	}

+	// 续链场景保留 item_reference 与 id，避免 call_id 上下文丢失。
 	if input, ok := reqBody["input"].([]any); ok {
-		input = filterCodexInput(input)
+		input = filterCodexInput(input, needsToolContinuation)
 		reqBody["input"] = input
 		result.Modified = true
 	}
@@ -235,14 +251,75 @@ func getOpenCodeCachedPrompt(url, cacheFileName, metaFileName string) string {
 }

 func getOpenCodeCodexHeader() string {
-	return getOpenCodeCachedPrompt(opencodeCodexHeaderURL, "opencode-codex-header.txt", "opencode-codex-header-meta.json")
+	// 优先从 opencode 仓库缓存获取指令。
+	opencodeInstructions := getOpenCodeCachedPrompt(opencodeCodexHeaderURL, "opencode-codex-header.txt", "opencode-codex-header-meta.json")
+
+	// 若 opencode 指令可用，直接返回。
+	if opencodeInstructions != "" {
+		return opencodeInstructions
+	}
+
+	// 否则回退使用本地 Codex CLI 指令。
+	return getCodexCLIInstructions()
+}
+
+func getCodexCLIInstructions() string {
+	return codexCLIInstructions
 }

 func GetOpenCodeInstructions() string {
 	return getOpenCodeCodexHeader()
 }

-func filterCodexInput(input []any) []any {
+// GetCodexCLIInstructions 返回内置的 Codex CLI 指令内容。
+func GetCodexCLIInstructions() string {
+	return getCodexCLIInstructions()
+}
+
+// ReplaceWithCodexInstructions 将请求 instructions 替换为内置 Codex 指令（必要时）。
+func ReplaceWithCodexInstructions(reqBody map[string]any) bool {
+	codexInstructions := strings.TrimSpace(getCodexCLIInstructions())
+	if codexInstructions == "" {
+		return false
+	}
+
+	existingInstructions, _ := reqBody["instructions"].(string)
+	if strings.TrimSpace(existingInstructions) != codexInstructions {
+		reqBody["instructions"] = codexInstructions
+		return true
+	}
+
+	return false
+}
+
+// IsInstructionError 判断错误信息是否与指令格式/系统提示相关。
+func IsInstructionError(errorMessage string) bool {
+	if errorMessage == "" {
+		return false
+	}
+
+	lowerMsg := strings.ToLower(errorMessage)
+	instructionKeywords := []string{
+		"instruction",
+		"instructions",
+		"system prompt",
+		"system message",
+		"invalid prompt",
+		"prompt format",
+	}
+
+	for _, keyword := range instructionKeywords {
+		if strings.Contains(lowerMsg, keyword) {
+			return true
+		}
+	}
+
+	return false
+}
+
+// filterCodexInput 按需过滤 item_reference 与 id。
+// preserveReferences 为 true 时保持引用与 id，以满足续链请求对上下文的依赖。
+func filterCodexInput(input []any, preserveReferences bool) []any {
 	filtered := make([]any, 0, len(input))
 	for _, item := range input {
 		m, ok := item.(map[string]any)
@@ -250,15 +327,62 @@ func filterCodexInput(input []any) []any {
 			filtered = append(filtered, item)
 			continue
 		}
-		if typ, ok := m["type"].(string); ok && typ == "item_reference" {
+		typ, _ := m["type"].(string)
+		if typ == "item_reference" {
+			if !preserveReferences {
+				continue
+			}
+			newItem := make(map[string]any, len(m))
+			for key, value := range m {
+				newItem[key] = value
+			}
+			filtered = append(filtered, newItem)
 			continue
 		}
-		delete(m, "id")
-		filtered = append(filtered, m)
+
+		newItem := m
+		copied := false
+		// 仅在需要修改字段时创建副本，避免直接改写原始输入。
+		ensureCopy := func() {
+			if copied {
+				return
+			}
+			newItem = make(map[string]any, len(m))
+			for key, value := range m {
+				newItem[key] = value
+			}
+			copied = true
+		}
+
+		if isCodexToolCallItemType(typ) {
+			if callID, ok := m["call_id"].(string); !ok || strings.TrimSpace(callID) == "" {
+				if id, ok := m["id"].(string); ok && strings.TrimSpace(id) != "" {
+					ensureCopy()
+					newItem["call_id"] = id
+				}
+			}
+		}
+
+		if !preserveReferences {
+			ensureCopy()
+			delete(newItem, "id")
+			if !isCodexToolCallItemType(typ) {
+				delete(newItem, "call_id")
+			}
+		}
+
+		filtered = append(filtered, newItem)
 	}
 	return filtered
 }

+func isCodexToolCallItemType(typ string) bool {
+	if typ == "" {
+		return false
+	}
+	return strings.HasSuffix(typ, "_call") || strings.HasSuffix(typ, "_call_output")
+}
+
 func normalizeCodexTools(reqBody map[string]any) bool {
 	rawTools, ok := reqBody["tools"]
 	if !ok || rawTools == nil {
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -0,0 +1,167 @@
+package service
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestApplyCodexOAuthTransform_ToolContinuationPreservesInput(t *testing.T) {
+	// 续链场景：保留 item_reference 与 id，但不再强制 store=true。
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.2",
+		"input": []any{
+			map[string]any{"type": "item_reference", "id": "ref1", "text": "x"},
+			map[string]any{"type": "function_call_output", "call_id": "call_1", "output": "ok", "id": "o1"},
+		},
+		"tool_choice": "auto",
+	}
+
+	applyCodexOAuthTransform(reqBody)
+
+	// 未显式设置 store=true，默认为 false。
+	store, ok := reqBody["store"].(bool)
+	require.True(t, ok)
+	require.False(t, store)
+
+	input, ok := reqBody["input"].([]any)
+	require.True(t, ok)
+	require.Len(t, input, 2)
+
+	// 校验 input[0] 为 map，避免断言失败导致测试中断。
+	first, ok := input[0].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "item_reference", first["type"])
+	require.Equal(t, "ref1", first["id"])
+
+	// 校验 input[1] 为 map，确保后续字段断言安全。
+	second, ok := input[1].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "o1", second["id"])
+}
+
+func TestApplyCodexOAuthTransform_ExplicitStoreFalsePreserved(t *testing.T) {
+	// 续链场景：显式 store=false 不再强制为 true，保持 false。
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		"store": false,
+		"input": []any{
+			map[string]any{"type": "function_call_output", "call_id": "call_1"},
+		},
+		"tool_choice": "auto",
+	}
+
+	applyCodexOAuthTransform(reqBody)
+
+	store, ok := reqBody["store"].(bool)
+	require.True(t, ok)
+	require.False(t, store)
+}
+
+func TestApplyCodexOAuthTransform_ExplicitStoreTrueForcedFalse(t *testing.T) {
+	// 显式 store=true 也会强制为 false。
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		"store": true,
+		"input": []any{
+			map[string]any{"type": "function_call_output", "call_id": "call_1"},
+		},
+		"tool_choice": "auto",
+	}
+
+	applyCodexOAuthTransform(reqBody)
+
+	store, ok := reqBody["store"].(bool)
+	require.True(t, ok)
+	require.False(t, store)
+}
+
+func TestApplyCodexOAuthTransform_NonContinuationDefaultsStoreFalseAndStripsIDs(t *testing.T) {
+	// 非续链场景：未设置 store 时默认 false，并移除 input 中的 id。
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		"input": []any{
+			map[string]any{"type": "text", "id": "t1", "text": "hi"},
+		},
+	}
+
+	applyCodexOAuthTransform(reqBody)
+
+	store, ok := reqBody["store"].(bool)
+	require.True(t, ok)
+	require.False(t, store)
+
+	input, ok := reqBody["input"].([]any)
+	require.True(t, ok)
+	require.Len(t, input, 1)
+	// 校验 input[0] 为 map，避免类型不匹配触发 errcheck。
+	item, ok := input[0].(map[string]any)
+	require.True(t, ok)
+	_, hasID := item["id"]
+	require.False(t, hasID)
+}
+
+func TestFilterCodexInput_RemovesItemReferenceWhenNotPreserved(t *testing.T) {
+	input := []any{
+		map[string]any{"type": "item_reference", "id": "ref1"},
+		map[string]any{"type": "text", "id": "t1", "text": "hi"},
+	}
+
+	filtered := filterCodexInput(input, false)
+	require.Len(t, filtered, 1)
+	// 校验 filtered[0] 为 map，确保字段检查可靠。
+	item, ok := filtered[0].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "text", item["type"])
+	_, hasID := item["id"]
+	require.False(t, hasID)
+}
+
+func TestApplyCodexOAuthTransform_EmptyInput(t *testing.T) {
+	// 空 input 应保持为空且不触发异常。
+	setupCodexCache(t)
+
+	reqBody := map[string]any{
+		"model": "gpt-5.1",
+		"input": []any{},
+	}
+
+	applyCodexOAuthTransform(reqBody)
+
+	input, ok := reqBody["input"].([]any)
+	require.True(t, ok)
+	require.Len(t, input, 0)
+}
+
+func setupCodexCache(t *testing.T) {
+	t.Helper()
+
+	// 使用临时 HOME 避免触发网络拉取 header。
+	tempDir := t.TempDir()
+	t.Setenv("HOME", tempDir)
+
+	cacheDir := filepath.Join(tempDir, ".opencode", "cache")
+	require.NoError(t, os.MkdirAll(cacheDir, 0o755))
+	require.NoError(t, os.WriteFile(filepath.Join(cacheDir, "opencode-codex-header.txt"), []byte("header"), 0o644))
+
+	meta := map[string]any{
+		"etag":        "",
+		"lastFetch":   time.Now().UTC().Format(time.RFC3339),
+		"lastChecked": time.Now().UnixMilli(),
+	}
+	data, err := json.Marshal(meta)
+	require.NoError(t, err)
+	require.NoError(t, os.WriteFile(filepath.Join(cacheDir, "opencode-codex-header-meta.json"), data, 0o644))
+}
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -42,6 +42,7 @@ var openaiSSEDataRe = regexp.MustCompile(`^data:\s*`)
 var openaiAllowedHeaders = map[string]bool{
 	"accept-language": true,
 	"content-type":    true,
+	"conversation_id": true,
 	"user-agent":      true,
 	"originator":      true,
 	"session_id":      true,
@@ -85,6 +86,7 @@ type OpenAIGatewayService struct {
 	userSubRepo         UserSubscriptionRepository
 	cache               GatewayCache
 	cfg                 *config.Config
+	schedulerSnapshot   *SchedulerSnapshotService
 	concurrencyService  *ConcurrencyService
 	billingService      *BillingService
 	rateLimitService    *RateLimitService
@@ -101,6 +103,7 @@ func NewOpenAIGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	schedulerSnapshot *SchedulerSnapshotService,
 	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
@@ -115,6 +118,7 @@ func NewOpenAIGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		schedulerSnapshot:   schedulerSnapshot,
 		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
@@ -159,7 +163,7 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), "openai:"+sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				if err == nil && account.IsSchedulable() && account.IsOpenAI() && (requestedModel == "" || account.IsModelSupported(requestedModel)) {
 					// Refresh sticky session TTL
 					_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), "openai:"+sessionHash, openaiStickySessionTTL)
@@ -170,16 +174,7 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
 	}

 	// 2. Get schedulable OpenAI accounts
-	var accounts []Account
-	var err error
-	// 简易模式：忽略分组限制，查询所有可用账号
-	if s.cfg.RunMode == config.RunModeSimple {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
-	} else if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformOpenAI)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
-	}
+	accounts, err := s.listSchedulableAccounts(ctx, groupID)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -301,7 +296,7 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 	if sessionHash != "" {
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), "openai:"+sessionHash)
 		if err == nil && accountID > 0 && !isExcluded(accountID) {
-			account, err := s.accountRepo.GetByID(ctx, accountID)
+			account, err := s.getSchedulableAccount(ctx, accountID)
 			if err == nil && account.IsSchedulable() && account.IsOpenAI() &&
 				(requestedModel == "" || account.IsModelSupported(requestedModel)) {
 				result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
@@ -446,6 +441,10 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 }

 func (s *OpenAIGatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64) ([]Account, error) {
+	if s.schedulerSnapshot != nil {
+		accounts, _, err := s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, PlatformOpenAI, false)
+		return accounts, err
+	}
 	var accounts []Account
 	var err error
 	if s.cfg != nil && s.cfg.RunMode == config.RunModeSimple {
@@ -468,6 +467,13 @@ func (s *OpenAIGatewayService) tryAcquireAccountSlot(ctx context.Context, accoun
 	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
 }

+func (s *OpenAIGatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
 func (s *OpenAIGatewayService) schedulingConfig() config.GatewaySchedulingConfig {
 	if s.cfg != nil {
 		return s.cfg.Gateway.Scheduling
@@ -540,16 +546,35 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco

 	isCodexCLI := openai.IsCodexCLIRequest(c.GetHeader("User-Agent"))

-	// Apply model mapping (skip for Codex CLI for transparent forwarding)
-	mappedModel := reqModel
-	if !isCodexCLI {
-		mappedModel = account.GetMappedModel(reqModel)
-		if mappedModel != reqModel {
-			reqBody["model"] = mappedModel
+	// 对所有请求执行模型映射（包含 Codex CLI）。
+	mappedModel := account.GetMappedModel(reqModel)
+	if mappedModel != reqModel {
+		log.Printf("[OpenAI] Model mapping applied: %s -> %s (account: %s, isCodexCLI: %v)", reqModel, mappedModel, account.Name, isCodexCLI)
+		reqBody["model"] = mappedModel
+		bodyModified = true
+	}
+
+	// 针对所有 OpenAI 账号执行 Codex 模型名规范化，确保上游识别一致。
+	if model, ok := reqBody["model"].(string); ok {
+		normalizedModel := normalizeCodexModel(model)
+		if normalizedModel != "" && normalizedModel != model {
+			log.Printf("[OpenAI] Codex model normalization: %s -> %s (account: %s, type: %s, isCodexCLI: %v)",
+				model, normalizedModel, account.Name, account.Type, isCodexCLI)
+			reqBody["model"] = normalizedModel
+			mappedModel = normalizedModel
 			bodyModified = true
 		}
 	}

+	// 规范化 reasoning.effort 参数（minimal -> none），与上游允许值对齐。
+	if reasoning, ok := reqBody["reasoning"].(map[string]any); ok {
+		if effort, ok := reasoning["effort"].(string); ok && effort == "minimal" {
+			reasoning["effort"] = "none"
+			bodyModified = true
+			log.Printf("[OpenAI] Normalized reasoning.effort: minimal -> none (account: %s)", account.Name)
+		}
+	}
+
 	if account.Type == AccountTypeOAuth && !isCodexCLI {
 		codexResult := applyCodexOAuthTransform(reqBody)
 		if codexResult.Modified {
@@ -563,6 +588,44 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 		}
 	}

+	// Handle max_output_tokens based on platform and account type
+	if !isCodexCLI {
+		if maxOutputTokens, hasMaxOutputTokens := reqBody["max_output_tokens"]; hasMaxOutputTokens {
+			switch account.Platform {
+			case PlatformOpenAI:
+				// For OpenAI API Key, remove max_output_tokens (not supported)
+				// For OpenAI OAuth (Responses API), keep it (supported)
+				if account.Type == AccountTypeAPIKey {
+					delete(reqBody, "max_output_tokens")
+					bodyModified = true
+				}
+			case PlatformAnthropic:
+				// For Anthropic (Claude), convert to max_tokens
+				delete(reqBody, "max_output_tokens")
+				if _, hasMaxTokens := reqBody["max_tokens"]; !hasMaxTokens {
+					reqBody["max_tokens"] = maxOutputTokens
+				}
+				bodyModified = true
+			case PlatformGemini:
+				// For Gemini, remove (will be handled by Gemini-specific transform)
+				delete(reqBody, "max_output_tokens")
+				bodyModified = true
+			default:
+				// For unknown platforms, remove to be safe
+				delete(reqBody, "max_output_tokens")
+				bodyModified = true
+			}
+		}
+
+		// Also handle max_completion_tokens (similar logic)
+		if _, hasMaxCompletionTokens := reqBody["max_completion_tokens"]; hasMaxCompletionTokens {
+			if account.Type == AccountTypeAPIKey || account.Platform != PlatformOpenAI {
+				delete(reqBody, "max_completion_tokens")
+				bodyModified = true
+			}
+		}
+	}
+
 	// Re-serialize body only if modified
 	if bodyModified {
 		var err error
@@ -742,9 +805,6 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
 		if promptCacheKey != "" {
 			req.Header.Set("conversation_id", promptCacheKey)
 			req.Header.Set("session_id", promptCacheKey)
-		} else {
-			req.Header.Del("conversation_id")
-			req.Header.Del("session_id")
 		}
 	}

@@ -1042,6 +1102,10 @@ func (s *OpenAIGatewayService) handleStreamingResponse(ctx context.Context, resp
 				continue
 			}
 			log.Printf("Stream data interval timeout: account=%d model=%s interval=%s", account.ID, originalModel, streamInterval)
+			// 处理流超时，可能标记账户为临时不可调度或错误状态
+			if s.rateLimitService != nil {
+				s.rateLimitService.HandleStreamTimeout(ctx, account, originalModel)
+			}
 			sendErrorEvent("stream_timeout")
 			return &openaiStreamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")

--- a/backend/internal/service/openai_tool_continuation.go
+++ b/backend/internal/service/openai_tool_continuation.go
@@ -0,0 +1,213 @@
+package service
+
+import "strings"
+
+// NeedsToolContinuation 判定请求是否需要工具调用续链处理。
+// 满足以下任一信号即视为续链：previous_response_id、input 内包含 function_call_output/item_reference、
+// 或显式声明 tools/tool_choice。
+func NeedsToolContinuation(reqBody map[string]any) bool {
+	if reqBody == nil {
+		return false
+	}
+	if hasNonEmptyString(reqBody["previous_response_id"]) {
+		return true
+	}
+	if hasToolsSignal(reqBody) {
+		return true
+	}
+	if hasToolChoiceSignal(reqBody) {
+		return true
+	}
+	if inputHasType(reqBody, "function_call_output") {
+		return true
+	}
+	if inputHasType(reqBody, "item_reference") {
+		return true
+	}
+	return false
+}
+
+// HasFunctionCallOutput 判断 input 是否包含 function_call_output，用于触发续链校验。
+func HasFunctionCallOutput(reqBody map[string]any) bool {
+	if reqBody == nil {
+		return false
+	}
+	return inputHasType(reqBody, "function_call_output")
+}
+
+// HasToolCallContext 判断 input 是否包含带 call_id 的 tool_call/function_call，
+// 用于判断 function_call_output 是否具备可关联的上下文。
+func HasToolCallContext(reqBody map[string]any) bool {
+	if reqBody == nil {
+		return false
+	}
+	input, ok := reqBody["input"].([]any)
+	if !ok {
+		return false
+	}
+	for _, item := range input {
+		itemMap, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		itemType, _ := itemMap["type"].(string)
+		if itemType != "tool_call" && itemType != "function_call" {
+			continue
+		}
+		if callID, ok := itemMap["call_id"].(string); ok && strings.TrimSpace(callID) != "" {
+			return true
+		}
+	}
+	return false
+}
+
+// FunctionCallOutputCallIDs 提取 input 中 function_call_output 的 call_id 集合。
+// 仅返回非空 call_id，用于与 item_reference.id 做匹配校验。
+func FunctionCallOutputCallIDs(reqBody map[string]any) []string {
+	if reqBody == nil {
+		return nil
+	}
+	input, ok := reqBody["input"].([]any)
+	if !ok {
+		return nil
+	}
+	ids := make(map[string]struct{})
+	for _, item := range input {
+		itemMap, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		itemType, _ := itemMap["type"].(string)
+		if itemType != "function_call_output" {
+			continue
+		}
+		if callID, ok := itemMap["call_id"].(string); ok && strings.TrimSpace(callID) != "" {
+			ids[callID] = struct{}{}
+		}
+	}
+	if len(ids) == 0 {
+		return nil
+	}
+	result := make([]string, 0, len(ids))
+	for id := range ids {
+		result = append(result, id)
+	}
+	return result
+}
+
+// HasFunctionCallOutputMissingCallID 判断是否存在缺少 call_id 的 function_call_output。
+func HasFunctionCallOutputMissingCallID(reqBody map[string]any) bool {
+	if reqBody == nil {
+		return false
+	}
+	input, ok := reqBody["input"].([]any)
+	if !ok {
+		return false
+	}
+	for _, item := range input {
+		itemMap, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		itemType, _ := itemMap["type"].(string)
+		if itemType != "function_call_output" {
+			continue
+		}
+		callID, _ := itemMap["call_id"].(string)
+		if strings.TrimSpace(callID) == "" {
+			return true
+		}
+	}
+	return false
+}
+
+// HasItemReferenceForCallIDs 判断 item_reference.id 是否覆盖所有 call_id。
+// 用于仅依赖引用项完成续链场景的校验。
+func HasItemReferenceForCallIDs(reqBody map[string]any, callIDs []string) bool {
+	if reqBody == nil || len(callIDs) == 0 {
+		return false
+	}
+	input, ok := reqBody["input"].([]any)
+	if !ok {
+		return false
+	}
+	referenceIDs := make(map[string]struct{})
+	for _, item := range input {
+		itemMap, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		itemType, _ := itemMap["type"].(string)
+		if itemType != "item_reference" {
+			continue
+		}
+		idValue, _ := itemMap["id"].(string)
+		idValue = strings.TrimSpace(idValue)
+		if idValue == "" {
+			continue
+		}
+		referenceIDs[idValue] = struct{}{}
+	}
+	if len(referenceIDs) == 0 {
+		return false
+	}
+	for _, callID := range callIDs {
+		if _, ok := referenceIDs[callID]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+// inputHasType 判断 input 中是否存在指定类型的 item。
+func inputHasType(reqBody map[string]any, want string) bool {
+	input, ok := reqBody["input"].([]any)
+	if !ok {
+		return false
+	}
+	for _, item := range input {
+		itemMap, ok := item.(map[string]any)
+		if !ok {
+			continue
+		}
+		itemType, _ := itemMap["type"].(string)
+		if itemType == want {
+			return true
+		}
+	}
+	return false
+}
+
+// hasNonEmptyString 判断字段是否为非空字符串。
+func hasNonEmptyString(value any) bool {
+	stringValue, ok := value.(string)
+	return ok && strings.TrimSpace(stringValue) != ""
+}
+
+// hasToolsSignal 判断 tools 字段是否显式声明（存在且不为空）。
+func hasToolsSignal(reqBody map[string]any) bool {
+	raw, exists := reqBody["tools"]
+	if !exists || raw == nil {
+		return false
+	}
+	if tools, ok := raw.([]any); ok {
+		return len(tools) > 0
+	}
+	return false
+}
+
+// hasToolChoiceSignal 判断 tool_choice 是否显式声明（非空或非 nil）。
+func hasToolChoiceSignal(reqBody map[string]any) bool {
+	raw, exists := reqBody["tool_choice"]
+	if !exists || raw == nil {
+		return false
+	}
+	switch value := raw.(type) {
+	case string:
+		return strings.TrimSpace(value) != ""
+	case map[string]any:
+		return len(value) > 0
+	default:
+		return false
+	}
+}
--- a/backend/internal/service/openai_tool_continuation_test.go
+++ b/backend/internal/service/openai_tool_continuation_test.go
@@ -0,0 +1,98 @@
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestNeedsToolContinuationSignals(t *testing.T) {
+	// 覆盖所有触发续链的信号来源，确保判定逻辑完整。
+	cases := []struct {
+		name string
+		body map[string]any
+		want bool
+	}{
+		{name: "nil", body: nil, want: false},
+		{name: "previous_response_id", body: map[string]any{"previous_response_id": "resp_1"}, want: true},
+		{name: "previous_response_id_blank", body: map[string]any{"previous_response_id": "  "}, want: false},
+		{name: "function_call_output", body: map[string]any{"input": []any{map[string]any{"type": "function_call_output"}}}, want: true},
+		{name: "item_reference", body: map[string]any{"input": []any{map[string]any{"type": "item_reference"}}}, want: true},
+		{name: "tools", body: map[string]any{"tools": []any{map[string]any{"type": "function"}}}, want: true},
+		{name: "tools_empty", body: map[string]any{"tools": []any{}}, want: false},
+		{name: "tools_invalid", body: map[string]any{"tools": "bad"}, want: false},
+		{name: "tool_choice", body: map[string]any{"tool_choice": "auto"}, want: true},
+		{name: "tool_choice_object", body: map[string]any{"tool_choice": map[string]any{"type": "function"}}, want: true},
+		{name: "tool_choice_empty_object", body: map[string]any{"tool_choice": map[string]any{}}, want: false},
+		{name: "none", body: map[string]any{"input": []any{map[string]any{"type": "text", "text": "hi"}}}, want: false},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			require.Equal(t, tt.want, NeedsToolContinuation(tt.body))
+		})
+	}
+}
+
+func TestHasFunctionCallOutput(t *testing.T) {
+	// 仅当 input 中存在 function_call_output 才视为续链输出。
+	require.False(t, HasFunctionCallOutput(nil))
+	require.True(t, HasFunctionCallOutput(map[string]any{
+		"input": []any{map[string]any{"type": "function_call_output"}},
+	}))
+	require.False(t, HasFunctionCallOutput(map[string]any{
+		"input": "text",
+	}))
+}
+
+func TestHasToolCallContext(t *testing.T) {
+	// tool_call/function_call 必须包含 call_id，才能作为可关联上下文。
+	require.False(t, HasToolCallContext(nil))
+	require.True(t, HasToolCallContext(map[string]any{
+		"input": []any{map[string]any{"type": "tool_call", "call_id": "call_1"}},
+	}))
+	require.True(t, HasToolCallContext(map[string]any{
+		"input": []any{map[string]any{"type": "function_call", "call_id": "call_2"}},
+	}))
+	require.False(t, HasToolCallContext(map[string]any{
+		"input": []any{map[string]any{"type": "tool_call"}},
+	}))
+}
+
+func TestFunctionCallOutputCallIDs(t *testing.T) {
+	// 仅提取非空 call_id，去重后返回。
+	require.Empty(t, FunctionCallOutputCallIDs(nil))
+	callIDs := FunctionCallOutputCallIDs(map[string]any{
+		"input": []any{
+			map[string]any{"type": "function_call_output", "call_id": "call_1"},
+			map[string]any{"type": "function_call_output", "call_id": ""},
+			map[string]any{"type": "function_call_output", "call_id": "call_1"},
+		},
+	})
+	require.ElementsMatch(t, []string{"call_1"}, callIDs)
+}
+
+func TestHasFunctionCallOutputMissingCallID(t *testing.T) {
+	require.False(t, HasFunctionCallOutputMissingCallID(nil))
+	require.True(t, HasFunctionCallOutputMissingCallID(map[string]any{
+		"input": []any{map[string]any{"type": "function_call_output"}},
+	}))
+	require.False(t, HasFunctionCallOutputMissingCallID(map[string]any{
+		"input": []any{map[string]any{"type": "function_call_output", "call_id": "call_1"}},
+	}))
+}
+
+func TestHasItemReferenceForCallIDs(t *testing.T) {
+	// item_reference 需要覆盖所有 call_id 才视为可关联上下文。
+	require.False(t, HasItemReferenceForCallIDs(nil, []string{"call_1"}))
+	require.False(t, HasItemReferenceForCallIDs(map[string]any{}, []string{"call_1"}))
+	req := map[string]any{
+		"input": []any{
+			map[string]any{"type": "item_reference", "id": "call_1"},
+			map[string]any{"type": "item_reference", "id": "call_2"},
+		},
+	}
+	require.True(t, HasItemReferenceForCallIDs(req, []string{"call_1"}))
+	require.True(t, HasItemReferenceForCallIDs(req, []string{"call_1", "call_2"}))
+	require.False(t, HasItemReferenceForCallIDs(req, []string{"call_1", "call_3"}))
+}
--- a/backend/internal/service/ops_port.go
+++ b/backend/internal/service/ops_port.go
@@ -17,6 +17,8 @@ type OpsRepository interface {

 	// Lightweight window stats (for realtime WS / quick sampling).
 	GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error)
+	// Lightweight realtime traffic summary (for the Ops dashboard header card).
+	GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error)

 	GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error)
 	GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error)
@@ -71,6 +73,7 @@ type OpsInsertErrorLogInput struct {
 	Severity          string
 	StatusCode        int
 	IsBusinessLimited bool
+	IsCountTokens     bool // 是否为 count_tokens 请求

 	ErrorMessage string
 	ErrorBody    string
--- a/backend/internal/service/ops_realtime_traffic.go
+++ b/backend/internal/service/ops_realtime_traffic.go
@@ -0,0 +1,36 @@
+package service
+
+import (
+	"context"
+	"time"
+
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+)
+
+// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the provided window.
+// This is used by the Ops dashboard "Realtime Traffic" card and is intentionally lightweight.
+func (s *OpsService) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) {
+	if err := s.RequireMonitoringEnabled(ctx); err != nil {
+		return nil, err
+	}
+	if s.opsRepo == nil {
+		return nil, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
+	}
+	if filter == nil {
+		return nil, infraerrors.BadRequest("OPS_FILTER_REQUIRED", "filter is required")
+	}
+	if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_REQUIRED", "start_time/end_time are required")
+	}
+	if filter.StartTime.After(filter.EndTime) {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_INVALID", "start_time must be <= end_time")
+	}
+	if filter.EndTime.Sub(filter.StartTime) > time.Hour {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_TOO_LARGE", "invalid time range: max window is 1 hour")
+	}
+
+	// Realtime traffic summary always uses raw logs (minute granularity peaks).
+	filter.QueryMode = OpsQueryModeRaw
+
+	return s.opsRepo.GetRealtimeTrafficSummary(ctx, filter)
+}
--- a/backend/internal/service/ops_realtime_traffic_models.go
+++ b/backend/internal/service/ops_realtime_traffic_models.go
@@ -0,0 +1,19 @@
+package service
+
+import "time"
+
+// OpsRealtimeTrafficSummary is a lightweight summary used by the Ops dashboard "Realtime Traffic" card.
+// It reports QPS/TPS current/peak/avg for the requested time window.
+type OpsRealtimeTrafficSummary struct {
+	// Window is a normalized label (e.g. "1min", "5min", "30min", "1h").
+	Window string `json:"window"`
+
+	StartTime time.Time `json:"start_time"`
+	EndTime   time.Time `json:"end_time"`
+
+	Platform string `json:"platform"`
+	GroupID  *int64 `json:"group_id"`
+
+	QPS OpsRateSummary `json:"qps"`
+	TPS OpsRateSummary `json:"tps"`
+}
--- a/backend/internal/service/ops_settings.go
+++ b/backend/internal/service/ops_settings.go
@@ -368,6 +368,9 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
 		Aggregation: OpsAggregationSettings{
 			AggregationEnabled: false,
 		},
+		IgnoreCountTokensErrors: false,
+		AutoRefreshEnabled:      false,
+		AutoRefreshIntervalSec:  30,
 	}
 }

@@ -388,6 +391,10 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
 	if cfg.DataRetention.HourlyMetricsRetentionDays <= 0 {
 		cfg.DataRetention.HourlyMetricsRetentionDays = 30
 	}
+	// Normalize auto refresh interval (default 30 seconds)
+	if cfg.AutoRefreshIntervalSec <= 0 {
+		cfg.AutoRefreshIntervalSec = 30
+	}
 }

 func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
@@ -403,6 +410,9 @@ func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
 	if cfg.DataRetention.HourlyMetricsRetentionDays < 1 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
 		return errors.New("hourly_metrics_retention_days must be between 1 and 365")
 	}
+	if cfg.AutoRefreshIntervalSec < 15 || cfg.AutoRefreshIntervalSec > 300 {
+		return errors.New("auto_refresh_interval_seconds must be between 15 and 300")
+	}
 	return nil
 }

@@ -463,3 +473,93 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
 	_ = json.Unmarshal(raw, updated)
 	return updated, nil
 }
+
+// =========================
+// Metric thresholds
+// =========================
+
+const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
+
+func defaultOpsMetricThresholds() *OpsMetricThresholds {
+	slaMin := 99.5
+	latencyMax := 2000.0
+	ttftMax := 500.0
+	reqErrMax := 5.0
+	upstreamErrMax := 5.0
+	return &OpsMetricThresholds{
+		SLAPercentMin:               &slaMin,
+		LatencyP99MsMax:             &latencyMax,
+		TTFTp99MsMax:                &ttftMax,
+		RequestErrorRatePercentMax:  &reqErrMax,
+		UpstreamErrorRatePercentMax: &upstreamErrMax,
+	}
+}
+
+func (s *OpsService) GetMetricThresholds(ctx context.Context) (*OpsMetricThresholds, error) {
+	defaultCfg := defaultOpsMetricThresholds()
+	if s == nil || s.settingRepo == nil {
+		return defaultCfg, nil
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsMetricThresholds)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			if b, mErr := json.Marshal(defaultCfg); mErr == nil {
+				_ = s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(b))
+			}
+			return defaultCfg, nil
+		}
+		return nil, err
+	}
+
+	cfg := &OpsMetricThresholds{}
+	if err := json.Unmarshal([]byte(raw), cfg); err != nil {
+		return defaultCfg, nil
+	}
+
+	return cfg, nil
+}
+
+func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricThresholds) (*OpsMetricThresholds, error) {
+	if s == nil || s.settingRepo == nil {
+		return nil, errors.New("setting repository not initialized")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if cfg == nil {
+		return nil, errors.New("invalid config")
+	}
+
+	// Validate thresholds
+	if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
+		return nil, errors.New("sla_percent_min must be between 0 and 100")
+	}
+	if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 {
+		return nil, errors.New("latency_p99_ms_max must be >= 0")
+	}
+	if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
+		return nil, errors.New("ttft_p99_ms_max must be >= 0")
+	}
+	if cfg.RequestErrorRatePercentMax != nil && (*cfg.RequestErrorRatePercentMax < 0 || *cfg.RequestErrorRatePercentMax > 100) {
+		return nil, errors.New("request_error_rate_percent_max must be between 0 and 100")
+	}
+	if cfg.UpstreamErrorRatePercentMax != nil && (*cfg.UpstreamErrorRatePercentMax < 0 || *cfg.UpstreamErrorRatePercentMax > 100) {
+		return nil, errors.New("upstream_error_rate_percent_max must be between 0 and 100")
+	}
+
+	raw, err := json.Marshal(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(raw)); err != nil {
+		return nil, err
+	}
+
+	updated := &OpsMetricThresholds{}
+	_ = json.Unmarshal(raw, updated)
+	return updated, nil
+}
--- a/backend/internal/service/ops_settings_models.go
+++ b/backend/internal/service/ops_settings_models.go
@@ -61,17 +61,29 @@ type OpsAlertSilencingSettings struct {
 	Entries []OpsAlertSilenceEntry `json:"entries,omitempty"`
 }

+type OpsMetricThresholds struct {
+	SLAPercentMin               *float64 `json:"sla_percent_min,omitempty"`                 // SLA低于此值变红
+	LatencyP99MsMax             *float64 `json:"latency_p99_ms_max,omitempty"`              // 延迟P99高于此值变红
+	TTFTp99MsMax                *float64 `json:"ttft_p99_ms_max,omitempty"`                 // TTFT P99高于此值变红
+	RequestErrorRatePercentMax  *float64 `json:"request_error_rate_percent_max,omitempty"`  // 请求错误率高于此值变红
+	UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红
+}
+
 type OpsAlertRuntimeSettings struct {
 	EvaluationIntervalSeconds int `json:"evaluation_interval_seconds"`

 	DistributedLock OpsDistributedLockSettings `json:"distributed_lock"`
 	Silencing       OpsAlertSilencingSettings  `json:"silencing"`
+	Thresholds      OpsMetricThresholds        `json:"thresholds"` // 指标阈值配置
 }

 // OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
 type OpsAdvancedSettings struct {
-	DataRetention OpsDataRetentionSettings `json:"data_retention"`
-	Aggregation   OpsAggregationSettings   `json:"aggregation"`
+	DataRetention           OpsDataRetentionSettings `json:"data_retention"`
+	Aggregation             OpsAggregationSettings   `json:"aggregation"`
+	IgnoreCountTokensErrors bool                     `json:"ignore_count_tokens_errors"`
+	AutoRefreshEnabled      bool                     `json:"auto_refresh_enabled"`
+	AutoRefreshIntervalSec  int                      `json:"auto_refresh_interval_seconds"`
 }

 type OpsDataRetentionSettings struct {
--- a/backend/internal/service/prompts/codex_cli_instructions.md
+++ b/backend/internal/service/prompts/codex_cli_instructions.md
@@ -0,0 +1,275 @@
+You are a coding agent running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful.
+
+Your capabilities:
+
+- Receive user prompts and other context provided by the harness, such as files in the workspace.
+- Communicate with the user by streaming thinking & responses, and by making & updating plans.
+- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
+
+Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
+
+# How you work
+
+## Personality
+
+Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.
+
+# AGENTS.md spec
+- Repos often contain AGENTS.md files. These files can appear anywhere within the repository.
+- These files are a way for humans to give you (the agent) instructions or tips for working within the container.
+- Some examples might be: coding conventions, info about how code is organized, or instructions for how to run or test code.
+- Instructions in AGENTS.md files:
+    - The scope of an AGENTS.md file is the entire directory tree rooted at the folder that contains it.
+    - For every file you touch in the final patch, you must obey instructions in any AGENTS.md file whose scope includes that file.
+    - Instructions about code style, structure, naming, etc. apply only to code within the AGENTS.md file's scope, unless the file states otherwise.
+    - More-deeply-nested AGENTS.md files take precedence in the case of conflicting instructions.
+    - Direct system/developer/user instructions (as part of a prompt) take precedence over AGENTS.md instructions.
+- The contents of the AGENTS.md file at the root of the repo and any directories from the CWD up to the root are included with the developer message and don't need to be re-read. When working in a subdirectory of CWD, or a directory outside the CWD, check for any AGENTS.md files that may be applicable.
+
+## Responsiveness
+
+### Preamble messages
+
+Before making tool calls, send a brief preamble to the user explaining what you’re about to do. When sending preamble messages, follow these principles and examples:
+
+- **Logically group related actions**: if you’re about to run several related commands, describe them together in one preamble rather than sending a separate note for each.
+- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words for quick updates).
+- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.
+- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
+- **Exception**: Avoid adding a preamble for every trivial read (e.g., `cat` a single file) unless it’s part of a larger grouped action.
+
+**Examples:**
+
+- “I’ve explored the repo; now checking the API route definitions.”
+- “Next, I’ll patch the config and update the related tests.”
+- “I’m about to scaffold the CLI commands and helper functions.”
+- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.”
+- “Config’s looking tidy. Next up is patching helpers to keep things in sync.”
+- “Finished poking at the DB gateway. I will now chase down error handling.”
+- “Alright, build pipeline order is interesting. Checking how it reports failures.”
+- “Spotted a clever caching util; now hunting where it gets used.”
+
+## Planning
+
+You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go.
+
+Note that plans are not for padding out simple work with filler steps or stating the obvious. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
+
+Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
+
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step. It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+
+Use a plan when:
+
+- The task is non-trivial and will require multiple actions over a long time horizon.
+- There are logical phases or dependencies where sequencing matters.
+- The work has ambiguity that benefits from outlining high-level goals.
+- You want intermediate checkpoints for feedback and validation.
+- When the user asked you to do more than one thing in a single prompt
+- The user has asked you to use the plan tool (aka "TODOs")
+- You generate additional steps while working, and plan to do them before yielding to the user
+
+### Examples
+
+**High-quality plans**
+
+Example 1:
+
+1. Add CLI entry with file args
+2. Parse Markdown via CommonMark library
+3. Apply semantic HTML template
+4. Handle code blocks, images, links
+5. Add error handling for invalid files
+
+Example 2:
+
+1. Define CSS variables for colors
+2. Add toggle with localStorage state
+3. Refactor components to use variables
+4. Verify all views for readability
+5. Add smooth theme-change transition
+
+Example 3:
+
+1. Set up Node.js + WebSocket server
+2. Add join/leave broadcast events
+3. Implement messaging with timestamps
+4. Add usernames + mention highlighting
+5. Persist messages in lightweight DB
+6. Add typing indicators + unread count
+
+**Low-quality plans**
+
+Example 1:
+
+1. Create CLI tool
+2. Add Markdown parser
+3. Convert to HTML
+
+Example 2:
+
+1. Add dark mode toggle
+2. Save preference
+3. Make styles look good
+
+Example 3:
+
+1. Create single-file HTML game
+2. Run quick sanity check
+3. Summarize usage instructions
+
+If you need to write a plan, only write high quality plans, not low quality ones.
+
+## Task execution
+
+You are a coding agent. Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer.
+
+You MUST adhere to the following criteria when solving queries:
+
+- Working on the repo(s) in the current environment is allowed, even if they are proprietary.
+- Analyzing code for vulnerabilities is allowed.
+- Showing user code and tool call details is allowed.
+- Use the `apply_patch` tool to edit files (NEVER try `applypatch` or `apply-patch`, only `apply_patch`): {"command":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n- pass\\n+ return 123\\n*** End Patch"]}
+
+If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines:
+
+- Fix the problem at the root cause rather than applying surface-level patches, when possible.
+- Avoid unneeded complexity in your solution.
+- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+- Update documentation as necessary.
+- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task.
+- Use `git log` and `git blame` to search the history of the codebase if additional context is required.
+- NEVER add copyright or license headers unless specifically requested.
+- Do not waste tokens by re-reading files after calling `apply_patch` on them. The tool call will fail if it didn't work. The same goes for making folders, deleting folders, etc.
+- Do not `git commit` your changes or create new git branches unless explicitly requested.
+- Do not add inline comments within code unless explicitly requested.
+- Do not use one-letter variable names unless explicitly requested.
+- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
+
+## Validating your work
+
+If the codebase has tests or the ability to build or run, consider using them to verify that your work is complete. 
+
+When testing, your philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests.
+
+Similarly, once you're confident in correctness, you can suggest or use formatting commands to ensure that your code is well formatted. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one.
+
+For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+
+Be mindful of whether to run validation commands proactively. In the absence of behavioral guidance:
+
+- When running in non-interactive approval modes like **never** or **on-failure**, proactively run tests, lint and do whatever you need to ensure you've completed the task.
+- When working in interactive approval modes like **untrusted**, or **on-request**, hold off on running tests or lint commands until the user is ready for you to finalize your output, because these commands take time to run and slow down iteration. Instead suggest what you want to do next, and let the user confirm first.
+- When working on test-related tasks, such as adding tests, fixing tests, or reproducing a bug to verify behavior, you may proactively run tests regardless of approval mode. Use your judgement to decide whether this is a test-related task.
+
+## Ambition vs. precision
+
+For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation.
+
+If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature.
+
+You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified.
+
+## Sharing progress updates
+
+For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next.
+
+Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. Don't start editing or writing large files before informing the user what you are doing and why.
+
+The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along.
+
+## Presenting your work and final message
+
+Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges.
+
+You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation.
+
+The user is working on the same computer as you, and has access to your work. As such there's no need to show the full contents of large files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using `apply_patch`, there's no need to tell users to "save the file" or "copy the code into a file"—just reference the file path.
+
+If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do (even with approval) but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly.
+
+Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding.
+
+### Final answer structure and style guidelines
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+**Section Headers**
+
+- Use only when they improve clarity — they are not mandatory for every answer.
+- Choose descriptive names that fit the content
+- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**`
+- Leave no blank line before the first bullet under a header.
+- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer.
+
+**Bullets**
+
+- Use `-` followed by a space for every bullet.
+- Merge related points when possible; avoid a bullet for every trivial detail.
+- Keep bullets to one line unless breaking for clarity is unavoidable.
+- Group into short lists (4–6 bullets) ordered by importance.
+- Use consistent keyword phrasing and formatting across sections.
+
+**Monospace**
+
+- Wrap all commands, file paths, env vars, and code identifiers in backticks (`` `...` ``).
+- Apply to inline examples and to bullet keywords if the keyword itself is a literal file/command.
+- Never mix monospace and bold markers; choose one based on whether it’s a keyword (`**`) or inline code/path (`` ` ``).
+
+**File References**
+When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
+
+**Structure**
+
+- Place related bullets together; don’t mix unrelated concepts in the same section.
+- Order sections from general → specific → supporting info.
+- For subsections (e.g., “Binaries” under “Rust Workspace”), introduce with a bolded keyword bullet, then list items under it.
+- Match structure to complexity:
+  - Multi-part or detailed results → use clear headers and grouped bullets.
+  - Simple results → minimal headers, possibly just a short list or paragraph.
+
+**Tone**
+
+- Keep the voice collaborative and natural, like a coding partner handing off work.
+- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition
+- Use present tense and active voice (e.g., “Runs tests” not “This will run tests”).
+- Keep descriptions self-contained; don’t refer to “above” or “below”.
+- Use parallel structure in lists for consistency.
+
+**Don’t**
+
+- Don’t use literal words “bold” or “monospace” in the content.
+- Don’t nest bullets or create deep hierarchies.
+- Don’t output ANSI escape codes directly — the CLI renderer applies them.
+- Don’t cram unrelated keywords into a single bullet; split for clarity.
+- Don’t let keyword lists run long — wrap or reformat for scanability.
+
+Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable.
+
+For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting.
+
+# Tool Guidelines
+
+## Shell commands
+
+When using the shell, you must adhere to the following guidelines:
+
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+- Do not use python scripts to attempt to output larger chunks of a file.
+
+## `update_plan`
+
+A tool named `update_plan` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task.
+
+To create a new plan, call `update_plan` with a short list of 1‑sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
+
+When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
+
+If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -15,13 +15,15 @@ import (

 // RateLimitService 处理限流和过载状态管理
 type RateLimitService struct {
-	accountRepo        AccountRepository
-	usageRepo          UsageLogRepository
-	cfg                *config.Config
-	geminiQuotaService *GeminiQuotaService
-	tempUnschedCache   TempUnschedCache
-	usageCacheMu       sync.RWMutex
-	usageCache         map[int64]*geminiUsageCacheEntry
+	accountRepo         AccountRepository
+	usageRepo           UsageLogRepository
+	cfg                 *config.Config
+	geminiQuotaService  *GeminiQuotaService
+	tempUnschedCache    TempUnschedCache
+	timeoutCounterCache TimeoutCounterCache
+	settingService      *SettingService
+	usageCacheMu        sync.RWMutex
+	usageCache          map[int64]*geminiUsageCacheEntry
 }

 type geminiUsageCacheEntry struct {
@@ -44,11 +46,22 @@ func NewRateLimitService(accountRepo AccountRepository, usageRepo UsageLogReposi
 	}
 }

+// SetTimeoutCounterCache 设置超时计数器缓存（可选依赖）
+func (s *RateLimitService) SetTimeoutCounterCache(cache TimeoutCounterCache) {
+	s.timeoutCounterCache = cache
+}
+
+// SetSettingService 设置系统设置服务（可选依赖）
+func (s *RateLimitService) SetSettingService(settingService *SettingService) {
+	s.settingService = settingService
+}
+
 // HandleUpstreamError 处理上游错误响应，标记账号状态
 // 返回是否应该停止该账号的调度
 func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Account, statusCode int, headers http.Header, responseBody []byte) (shouldDisable bool) {
 	// apikey 类型账号：检查自定义错误码配置
 	// 如果启用且错误码不在列表中，则不处理（不停止调度、不标记限流/过载）
+	customErrorCodesEnabled := account.IsCustomErrorCodesEnabled()
 	if !account.ShouldHandleErrorCode(statusCode) {
 		log.Printf("Account %d: error %d skipped (not in custom error codes)", account.ID, statusCode)
 		return false
@@ -93,11 +106,19 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
 		s.handle529(ctx, account)
 		shouldDisable = false
 	default:
-		// 其他5xx错误：记录但不停止调度
-		if statusCode >= 500 {
+		// 自定义错误码启用时：在列表中的错误码都应该停止调度
+		if customErrorCodesEnabled {
+			msg := "Custom error code triggered"
+			if upstreamMsg != "" {
+				msg = upstreamMsg
+			}
+			s.handleCustomErrorCode(ctx, account, statusCode, msg)
+			shouldDisable = true
+		} else if statusCode >= 500 {
+			// 未启用自定义错误码时：仅记录5xx错误
 			log.Printf("Account %d received upstream error %d", account.ID, statusCode)
+			shouldDisable = false
 		}
-		shouldDisable = false
 	}

 	if tempMatched {
@@ -273,6 +294,16 @@ func (s *RateLimitService) handleAuthError(ctx context.Context, account *Account
 	log.Printf("Account %d disabled due to auth error: %s", account.ID, errorMsg)
 }

+// handleCustomErrorCode 处理自定义错误码，停止账号调度
+func (s *RateLimitService) handleCustomErrorCode(ctx context.Context, account *Account, statusCode int, errorMsg string) {
+	msg := "Custom error code " + strconv.Itoa(statusCode) + ": " + errorMsg
+	if err := s.accountRepo.SetError(ctx, account.ID, msg); err != nil {
+		log.Printf("SetError failed for account %d: %v", account.ID, err)
+		return
+	}
+	log.Printf("Account %d disabled due to custom error code %d: %s", account.ID, statusCode, errorMsg)
+}
+
 // handle429 处理429限流错误
 // 解析响应头获取重置时间，标记账号为限流状态
 func (s *RateLimitService) handle429(ctx context.Context, account *Account, headers http.Header) {
@@ -555,3 +586,125 @@ func truncateTempUnschedMessage(body []byte, maxBytes int) string {
 	}
 	return strings.TrimSpace(string(body))
 }
+
+// HandleStreamTimeout 处理流数据超时
+// 根据系统设置决定是否标记账户为临时不可调度或错误状态
+// 返回是否应该停止该账号的调度
+func (s *RateLimitService) HandleStreamTimeout(ctx context.Context, account *Account, model string) bool {
+	if account == nil {
+		return false
+	}
+
+	// 获取系统设置
+	if s.settingService == nil {
+		log.Printf("[StreamTimeout] settingService not configured, skipping timeout handling for account %d", account.ID)
+		return false
+	}
+
+	settings, err := s.settingService.GetStreamTimeoutSettings(ctx)
+	if err != nil {
+		log.Printf("[StreamTimeout] Failed to get settings: %v", err)
+		return false
+	}
+
+	if !settings.Enabled {
+		return false
+	}
+
+	if settings.Action == StreamTimeoutActionNone {
+		return false
+	}
+
+	// 增加超时计数
+	var count int64 = 1
+	if s.timeoutCounterCache != nil {
+		count, err = s.timeoutCounterCache.IncrementTimeoutCount(ctx, account.ID, settings.ThresholdWindowMinutes)
+		if err != nil {
+			log.Printf("[StreamTimeout] Failed to increment timeout count for account %d: %v", account.ID, err)
+			// 继续处理，使用 count=1
+			count = 1
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d timeout count: %d/%d (window: %d min, model: %s)",
+		account.ID, count, settings.ThresholdCount, settings.ThresholdWindowMinutes, model)
+
+	// 检查是否达到阈值
+	if count < int64(settings.ThresholdCount) {
+		return false
+	}
+
+	// 达到阈值，执行相应操作
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched:
+		return s.triggerStreamTimeoutTempUnsched(ctx, account, settings, model)
+	case StreamTimeoutActionError:
+		return s.triggerStreamTimeoutError(ctx, account, model)
+	default:
+		return false
+	}
+}
+
+// triggerStreamTimeoutTempUnsched 触发流超时临时不可调度
+func (s *RateLimitService) triggerStreamTimeoutTempUnsched(ctx context.Context, account *Account, settings *StreamTimeoutSettings, model string) bool {
+	now := time.Now()
+	until := now.Add(time.Duration(settings.TempUnschedMinutes) * time.Minute)
+
+	state := &TempUnschedState{
+		UntilUnix:       until.Unix(),
+		TriggeredAtUnix: now.Unix(),
+		StatusCode:      0, // 超时没有状态码
+		MatchedKeyword:  "stream_timeout",
+		RuleIndex:       -1, // 表示系统级规则
+		ErrorMessage:    "Stream data interval timeout for model: " + model,
+	}
+
+	reason := ""
+	if raw, err := json.Marshal(state); err == nil {
+		reason = string(raw)
+	}
+	if reason == "" {
+		reason = state.ErrorMessage
+	}
+
+	if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, reason); err != nil {
+		log.Printf("[StreamTimeout] SetTempUnschedulable failed for account %d: %v", account.ID, err)
+		return false
+	}
+
+	if s.tempUnschedCache != nil {
+		if err := s.tempUnschedCache.SetTempUnsched(ctx, account.ID, state); err != nil {
+			log.Printf("[StreamTimeout] SetTempUnsched cache failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	// 重置超时计数
+	if s.timeoutCounterCache != nil {
+		if err := s.timeoutCounterCache.ResetTimeoutCount(ctx, account.ID); err != nil {
+			log.Printf("[StreamTimeout] ResetTimeoutCount failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d marked as temp unschedulable until %v (model: %s)", account.ID, until, model)
+	return true
+}
+
+// triggerStreamTimeoutError 触发流超时错误状态
+func (s *RateLimitService) triggerStreamTimeoutError(ctx context.Context, account *Account, model string) bool {
+	errorMsg := "Stream data interval timeout (repeated failures) for model: " + model
+
+	if err := s.accountRepo.SetError(ctx, account.ID, errorMsg); err != nil {
+		log.Printf("[StreamTimeout] SetError failed for account %d: %v", account.ID, err)
+		return false
+	}
+
+	// 重置超时计数
+	if s.timeoutCounterCache != nil {
+		if err := s.timeoutCounterCache.ResetTimeoutCount(ctx, account.ID); err != nil {
+			log.Printf("[StreamTimeout] ResetTimeoutCount failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d marked as error (model: %s)", account.ID, model)
+	return true
+}
--- a/backend/internal/service/scheduler_cache.go
+++ b/backend/internal/service/scheduler_cache.go
@@ -0,0 +1,68 @@
+package service
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+const (
+	SchedulerModeSingle = "single"
+	SchedulerModeMixed  = "mixed"
+	SchedulerModeForced = "forced"
+)
+
+type SchedulerBucket struct {
+	GroupID  int64
+	Platform string
+	Mode     string
+}
+
+func (b SchedulerBucket) String() string {
+	return fmt.Sprintf("%d:%s:%s", b.GroupID, b.Platform, b.Mode)
+}
+
+func ParseSchedulerBucket(raw string) (SchedulerBucket, bool) {
+	parts := strings.Split(raw, ":")
+	if len(parts) != 3 {
+		return SchedulerBucket{}, false
+	}
+	groupID, err := strconv.ParseInt(parts[0], 10, 64)
+	if err != nil {
+		return SchedulerBucket{}, false
+	}
+	if parts[1] == "" || parts[2] == "" {
+		return SchedulerBucket{}, false
+	}
+	return SchedulerBucket{
+		GroupID:  groupID,
+		Platform: parts[1],
+		Mode:     parts[2],
+	}, true
+}
+
+// SchedulerCache 负责调度快照与账号快照的缓存读写。
+type SchedulerCache interface {
+	// GetSnapshot 读取快照并返回命中与否（ready + active + 数据完整）。
+	GetSnapshot(ctx context.Context, bucket SchedulerBucket) ([]*Account, bool, error)
+	// SetSnapshot 写入快照并切换激活版本。
+	SetSnapshot(ctx context.Context, bucket SchedulerBucket, accounts []Account) error
+	// GetAccount 获取单账号快照。
+	GetAccount(ctx context.Context, accountID int64) (*Account, error)
+	// SetAccount 写入单账号快照（包含不可调度状态）。
+	SetAccount(ctx context.Context, account *Account) error
+	// DeleteAccount 删除单账号快照。
+	DeleteAccount(ctx context.Context, accountID int64) error
+	// UpdateLastUsed 批量更新账号的最后使用时间。
+	UpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error
+	// TryLockBucket 尝试获取分桶重建锁。
+	TryLockBucket(ctx context.Context, bucket SchedulerBucket, ttl time.Duration) (bool, error)
+	// ListBuckets 返回已注册的分桶集合。
+	ListBuckets(ctx context.Context) ([]SchedulerBucket, error)
+	// GetOutboxWatermark 读取 outbox 水位。
+	GetOutboxWatermark(ctx context.Context) (int64, error)
+	// SetOutboxWatermark 保存 outbox 水位。
+	SetOutboxWatermark(ctx context.Context, id int64) error
+}
--- a/backend/internal/service/scheduler_events.go
+++ b/backend/internal/service/scheduler_events.go
@@ -0,0 +1,10 @@
+package service
+
+const (
+	SchedulerOutboxEventAccountChanged       = "account_changed"
+	SchedulerOutboxEventAccountGroupsChanged = "account_groups_changed"
+	SchedulerOutboxEventAccountBulkChanged   = "account_bulk_changed"
+	SchedulerOutboxEventAccountLastUsed      = "account_last_used"
+	SchedulerOutboxEventGroupChanged         = "group_changed"
+	SchedulerOutboxEventFullRebuild          = "full_rebuild"
+)
--- a/backend/internal/service/scheduler_outbox.go
+++ b/backend/internal/service/scheduler_outbox.go
@@ -0,0 +1,21 @@
+package service
+
+import (
+	"context"
+	"time"
+)
+
+type SchedulerOutboxEvent struct {
+	ID        int64
+	EventType string
+	AccountID *int64
+	GroupID   *int64
+	Payload   map[string]any
+	CreatedAt time.Time
+}
+
+// SchedulerOutboxRepository 提供调度 outbox 的读取接口。
+type SchedulerOutboxRepository interface {
+	ListAfter(ctx context.Context, afterID int64, limit int) ([]SchedulerOutboxEvent, error)
+	MaxID(ctx context.Context) (int64, error)
+}
--- a/backend/internal/service/scheduler_snapshot_service.go
+++ b/backend/internal/service/scheduler_snapshot_service.go
@@ -0,0 +1,786 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"log"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+)
+
+var (
+	ErrSchedulerCacheNotReady   = errors.New("scheduler cache not ready")
+	ErrSchedulerFallbackLimited = errors.New("scheduler db fallback limited")
+)
+
+const outboxEventTimeout = 2 * time.Minute
+
+type SchedulerSnapshotService struct {
+	cache         SchedulerCache
+	outboxRepo    SchedulerOutboxRepository
+	accountRepo   AccountRepository
+	groupRepo     GroupRepository
+	cfg           *config.Config
+	stopCh        chan struct{}
+	stopOnce      sync.Once
+	wg            sync.WaitGroup
+	fallbackLimit *fallbackLimiter
+	lagMu         sync.Mutex
+	lagFailures   int
+}
+
+func NewSchedulerSnapshotService(
+	cache SchedulerCache,
+	outboxRepo SchedulerOutboxRepository,
+	accountRepo AccountRepository,
+	groupRepo GroupRepository,
+	cfg *config.Config,
+) *SchedulerSnapshotService {
+	maxQPS := 0
+	if cfg != nil {
+		maxQPS = cfg.Gateway.Scheduling.DbFallbackMaxQPS
+	}
+	return &SchedulerSnapshotService{
+		cache:         cache,
+		outboxRepo:    outboxRepo,
+		accountRepo:   accountRepo,
+		groupRepo:     groupRepo,
+		cfg:           cfg,
+		stopCh:        make(chan struct{}),
+		fallbackLimit: newFallbackLimiter(maxQPS),
+	}
+}
+
+func (s *SchedulerSnapshotService) Start() {
+	if s == nil || s.cache == nil {
+		return
+	}
+
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		s.runInitialRebuild()
+	}()
+
+	interval := s.outboxPollInterval()
+	if s.outboxRepo != nil && interval > 0 {
+		s.wg.Add(1)
+		go func() {
+			defer s.wg.Done()
+			s.runOutboxWorker(interval)
+		}()
+	}
+
+	fullInterval := s.fullRebuildInterval()
+	if fullInterval > 0 {
+		s.wg.Add(1)
+		go func() {
+			defer s.wg.Done()
+			s.runFullRebuildWorker(fullInterval)
+		}()
+	}
+}
+
+func (s *SchedulerSnapshotService) Stop() {
+	if s == nil {
+		return
+	}
+	s.stopOnce.Do(func() {
+		close(s.stopCh)
+	})
+	s.wg.Wait()
+}
+
+func (s *SchedulerSnapshotService) ListSchedulableAccounts(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, bool, error) {
+	useMixed := (platform == PlatformAnthropic || platform == PlatformGemini) && !hasForcePlatform
+	mode := s.resolveMode(platform, hasForcePlatform)
+	bucket := s.bucketFor(groupID, platform, mode)
+
+	if s.cache != nil {
+		cached, hit, err := s.cache.GetSnapshot(ctx, bucket)
+		if err != nil {
+			log.Printf("[Scheduler] cache read failed: bucket=%s err=%v", bucket.String(), err)
+		} else if hit {
+			return derefAccounts(cached), useMixed, nil
+		}
+	}
+
+	if err := s.guardFallback(ctx); err != nil {
+		return nil, useMixed, err
+	}
+
+	fallbackCtx, cancel := s.withFallbackTimeout(ctx)
+	defer cancel()
+
+	accounts, err := s.loadAccountsFromDB(fallbackCtx, bucket, useMixed)
+	if err != nil {
+		return nil, useMixed, err
+	}
+
+	if s.cache != nil {
+		if err := s.cache.SetSnapshot(fallbackCtx, bucket, accounts); err != nil {
+			log.Printf("[Scheduler] cache write failed: bucket=%s err=%v", bucket.String(), err)
+		}
+	}
+
+	return accounts, useMixed, nil
+}
+
+func (s *SchedulerSnapshotService) GetAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if accountID <= 0 {
+		return nil, nil
+	}
+	if s.cache != nil {
+		account, err := s.cache.GetAccount(ctx, accountID)
+		if err != nil {
+			log.Printf("[Scheduler] account cache read failed: id=%d err=%v", accountID, err)
+		} else if account != nil {
+			return account, nil
+		}
+	}
+
+	if err := s.guardFallback(ctx); err != nil {
+		return nil, err
+	}
+	fallbackCtx, cancel := s.withFallbackTimeout(ctx)
+	defer cancel()
+	return s.accountRepo.GetByID(fallbackCtx, accountID)
+}
+
+func (s *SchedulerSnapshotService) runInitialRebuild() {
+	if s.cache == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+	buckets, err := s.cache.ListBuckets(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] list buckets failed: %v", err)
+	}
+	if len(buckets) == 0 {
+		buckets, err = s.defaultBuckets(ctx)
+		if err != nil {
+			log.Printf("[Scheduler] default buckets failed: %v", err)
+			return
+		}
+	}
+	if err := s.rebuildBuckets(ctx, buckets, "startup"); err != nil {
+		log.Printf("[Scheduler] rebuild startup failed: %v", err)
+	}
+}
+
+func (s *SchedulerSnapshotService) runOutboxWorker(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	s.pollOutbox()
+	for {
+		select {
+		case <-ticker.C:
+			s.pollOutbox()
+		case <-s.stopCh:
+			return
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) runFullRebuildWorker(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			if err := s.triggerFullRebuild("interval"); err != nil {
+				log.Printf("[Scheduler] full rebuild failed: %v", err)
+			}
+		case <-s.stopCh:
+			return
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) pollOutbox() {
+	if s.outboxRepo == nil || s.cache == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	watermark, err := s.cache.GetOutboxWatermark(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] outbox watermark read failed: %v", err)
+		return
+	}
+
+	events, err := s.outboxRepo.ListAfter(ctx, watermark, 200)
+	if err != nil {
+		log.Printf("[Scheduler] outbox poll failed: %v", err)
+		return
+	}
+	if len(events) == 0 {
+		return
+	}
+
+	watermarkForCheck := watermark
+	for _, event := range events {
+		eventCtx, cancel := context.WithTimeout(context.Background(), outboxEventTimeout)
+		err := s.handleOutboxEvent(eventCtx, event)
+		cancel()
+		if err != nil {
+			log.Printf("[Scheduler] outbox handle failed: id=%d type=%s err=%v", event.ID, event.EventType, err)
+			return
+		}
+	}
+
+	lastID := events[len(events)-1].ID
+	if err := s.cache.SetOutboxWatermark(ctx, lastID); err != nil {
+		log.Printf("[Scheduler] outbox watermark write failed: %v", err)
+	} else {
+		watermarkForCheck = lastID
+	}
+
+	s.checkOutboxLag(ctx, events[0], watermarkForCheck)
+}
+
+func (s *SchedulerSnapshotService) handleOutboxEvent(ctx context.Context, event SchedulerOutboxEvent) error {
+	switch event.EventType {
+	case SchedulerOutboxEventAccountLastUsed:
+		return s.handleLastUsedEvent(ctx, event.Payload)
+	case SchedulerOutboxEventAccountBulkChanged:
+		return s.handleBulkAccountEvent(ctx, event.Payload)
+	case SchedulerOutboxEventAccountGroupsChanged:
+		return s.handleAccountEvent(ctx, event.AccountID, event.Payload)
+	case SchedulerOutboxEventAccountChanged:
+		return s.handleAccountEvent(ctx, event.AccountID, event.Payload)
+	case SchedulerOutboxEventGroupChanged:
+		return s.handleGroupEvent(ctx, event.GroupID)
+	case SchedulerOutboxEventFullRebuild:
+		return s.triggerFullRebuild("outbox")
+	default:
+		return nil
+	}
+}
+
+func (s *SchedulerSnapshotService) handleLastUsedEvent(ctx context.Context, payload map[string]any) error {
+	if s.cache == nil || payload == nil {
+		return nil
+	}
+	raw, ok := payload["last_used"].(map[string]any)
+	if !ok || len(raw) == 0 {
+		return nil
+	}
+	updates := make(map[int64]time.Time, len(raw))
+	for key, value := range raw {
+		id, err := strconv.ParseInt(key, 10, 64)
+		if err != nil || id <= 0 {
+			continue
+		}
+		sec, ok := toInt64(value)
+		if !ok || sec <= 0 {
+			continue
+		}
+		updates[id] = time.Unix(sec, 0)
+	}
+	if len(updates) == 0 {
+		return nil
+	}
+	return s.cache.UpdateLastUsed(ctx, updates)
+}
+
+func (s *SchedulerSnapshotService) handleBulkAccountEvent(ctx context.Context, payload map[string]any) error {
+	if payload == nil {
+		return nil
+	}
+	ids := parseInt64Slice(payload["account_ids"])
+	for _, id := range ids {
+		if err := s.handleAccountEvent(ctx, &id, payload); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *SchedulerSnapshotService) handleAccountEvent(ctx context.Context, accountID *int64, payload map[string]any) error {
+	if accountID == nil || *accountID <= 0 {
+		return nil
+	}
+	if s.accountRepo == nil {
+		return nil
+	}
+
+	var groupIDs []int64
+	if payload != nil {
+		groupIDs = parseInt64Slice(payload["group_ids"])
+	}
+
+	account, err := s.accountRepo.GetByID(ctx, *accountID)
+	if err != nil {
+		if errors.Is(err, ErrAccountNotFound) {
+			if s.cache != nil {
+				if err := s.cache.DeleteAccount(ctx, *accountID); err != nil {
+					return err
+				}
+			}
+			return s.rebuildByGroupIDs(ctx, groupIDs, "account_miss")
+		}
+		return err
+	}
+	if s.cache != nil {
+		if err := s.cache.SetAccount(ctx, account); err != nil {
+			return err
+		}
+	}
+	if len(groupIDs) == 0 {
+		groupIDs = account.GroupIDs
+	}
+	return s.rebuildByAccount(ctx, account, groupIDs, "account_change")
+}
+
+func (s *SchedulerSnapshotService) handleGroupEvent(ctx context.Context, groupID *int64) error {
+	if groupID == nil || *groupID <= 0 {
+		return nil
+	}
+	groupIDs := []int64{*groupID}
+	return s.rebuildByGroupIDs(ctx, groupIDs, "group_change")
+}
+
+func (s *SchedulerSnapshotService) rebuildByAccount(ctx context.Context, account *Account, groupIDs []int64, reason string) error {
+	if account == nil {
+		return nil
+	}
+	groupIDs = s.normalizeGroupIDs(groupIDs)
+	if len(groupIDs) == 0 {
+		return nil
+	}
+
+	var firstErr error
+	if err := s.rebuildBucketsForPlatform(ctx, account.Platform, groupIDs, reason); err != nil && firstErr == nil {
+		firstErr = err
+	}
+	if account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled() {
+		if err := s.rebuildBucketsForPlatform(ctx, PlatformAnthropic, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if err := s.rebuildBucketsForPlatform(ctx, PlatformGemini, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildByGroupIDs(ctx context.Context, groupIDs []int64, reason string) error {
+	groupIDs = s.normalizeGroupIDs(groupIDs)
+	if len(groupIDs) == 0 {
+		return nil
+	}
+	platforms := []string{PlatformAnthropic, PlatformGemini, PlatformOpenAI, PlatformAntigravity}
+	var firstErr error
+	for _, platform := range platforms {
+		if err := s.rebuildBucketsForPlatform(ctx, platform, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBucketsForPlatform(ctx context.Context, platform string, groupIDs []int64, reason string) error {
+	if platform == "" {
+		return nil
+	}
+	var firstErr error
+	for _, gid := range groupIDs {
+		if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeSingle}, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeForced}, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if platform == PlatformAnthropic || platform == PlatformGemini {
+			if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeMixed}, reason); err != nil && firstErr == nil {
+				firstErr = err
+			}
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBuckets(ctx context.Context, buckets []SchedulerBucket, reason string) error {
+	var firstErr error
+	for _, bucket := range buckets {
+		if err := s.rebuildBucket(ctx, bucket, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBucket(ctx context.Context, bucket SchedulerBucket, reason string) error {
+	if s.cache == nil {
+		return ErrSchedulerCacheNotReady
+	}
+	ok, err := s.cache.TryLockBucket(ctx, bucket, 30*time.Second)
+	if err != nil {
+		return err
+	}
+	if !ok {
+		return nil
+	}
+
+	rebuildCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	accounts, err := s.loadAccountsFromDB(rebuildCtx, bucket, bucket.Mode == SchedulerModeMixed)
+	if err != nil {
+		log.Printf("[Scheduler] rebuild failed: bucket=%s reason=%s err=%v", bucket.String(), reason, err)
+		return err
+	}
+	if err := s.cache.SetSnapshot(rebuildCtx, bucket, accounts); err != nil {
+		log.Printf("[Scheduler] rebuild cache failed: bucket=%s reason=%s err=%v", bucket.String(), reason, err)
+		return err
+	}
+	log.Printf("[Scheduler] rebuild ok: bucket=%s reason=%s size=%d", bucket.String(), reason, len(accounts))
+	return nil
+}
+
+func (s *SchedulerSnapshotService) triggerFullRebuild(reason string) error {
+	if s.cache == nil {
+		return ErrSchedulerCacheNotReady
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	buckets, err := s.cache.ListBuckets(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] list buckets failed: %v", err)
+		return err
+	}
+	if len(buckets) == 0 {
+		buckets, err = s.defaultBuckets(ctx)
+		if err != nil {
+			log.Printf("[Scheduler] default buckets failed: %v", err)
+			return err
+		}
+	}
+	return s.rebuildBuckets(ctx, buckets, reason)
+}
+
+func (s *SchedulerSnapshotService) checkOutboxLag(ctx context.Context, oldest SchedulerOutboxEvent, watermark int64) {
+	if oldest.CreatedAt.IsZero() || s.cfg == nil {
+		return
+	}
+
+	lag := time.Since(oldest.CreatedAt)
+	if lagSeconds := int(lag.Seconds()); lagSeconds >= s.cfg.Gateway.Scheduling.OutboxLagWarnSeconds && s.cfg.Gateway.Scheduling.OutboxLagWarnSeconds > 0 {
+		log.Printf("[Scheduler] outbox lag warning: %ds", lagSeconds)
+	}
+
+	if s.cfg.Gateway.Scheduling.OutboxLagRebuildSeconds > 0 && int(lag.Seconds()) >= s.cfg.Gateway.Scheduling.OutboxLagRebuildSeconds {
+		s.lagMu.Lock()
+		s.lagFailures++
+		failures := s.lagFailures
+		s.lagMu.Unlock()
+
+		if failures >= s.cfg.Gateway.Scheduling.OutboxLagRebuildFailures {
+			log.Printf("[Scheduler] outbox lag rebuild triggered: lag=%s failures=%d", lag, failures)
+			s.lagMu.Lock()
+			s.lagFailures = 0
+			s.lagMu.Unlock()
+			if err := s.triggerFullRebuild("outbox_lag"); err != nil {
+				log.Printf("[Scheduler] outbox lag rebuild failed: %v", err)
+			}
+		}
+	} else {
+		s.lagMu.Lock()
+		s.lagFailures = 0
+		s.lagMu.Unlock()
+	}
+
+	threshold := s.cfg.Gateway.Scheduling.OutboxBacklogRebuildRows
+	if threshold <= 0 || s.outboxRepo == nil {
+		return
+	}
+	maxID, err := s.outboxRepo.MaxID(ctx)
+	if err != nil {
+		return
+	}
+	if maxID-watermark >= int64(threshold) {
+		log.Printf("[Scheduler] outbox backlog rebuild triggered: backlog=%d", maxID-watermark)
+		if err := s.triggerFullRebuild("outbox_backlog"); err != nil {
+			log.Printf("[Scheduler] outbox backlog rebuild failed: %v", err)
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) loadAccountsFromDB(ctx context.Context, bucket SchedulerBucket, useMixed bool) ([]Account, error) {
+	if s.accountRepo == nil {
+		return nil, ErrSchedulerCacheNotReady
+	}
+	groupID := bucket.GroupID
+	if s.isRunModeSimple() {
+		groupID = 0
+	}
+
+	if useMixed {
+		platforms := []string{bucket.Platform, PlatformAntigravity}
+		var accounts []Account
+		var err error
+		if groupID > 0 {
+			accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, groupID, platforms)
+		} else {
+			accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, platforms)
+		}
+		if err != nil {
+			return nil, err
+		}
+		filtered := make([]Account, 0, len(accounts))
+		for _, acc := range accounts {
+			if acc.Platform == PlatformAntigravity && !acc.IsMixedSchedulingEnabled() {
+				continue
+			}
+			filtered = append(filtered, acc)
+		}
+		return filtered, nil
+	}
+
+	if groupID > 0 {
+		return s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, groupID, bucket.Platform)
+	}
+	return s.accountRepo.ListSchedulableByPlatform(ctx, bucket.Platform)
+}
+
+func (s *SchedulerSnapshotService) bucketFor(groupID *int64, platform string, mode string) SchedulerBucket {
+	return SchedulerBucket{
+		GroupID:  s.normalizeGroupID(groupID),
+		Platform: platform,
+		Mode:     mode,
+	}
+}
+
+func (s *SchedulerSnapshotService) normalizeGroupID(groupID *int64) int64 {
+	if s.isRunModeSimple() {
+		return 0
+	}
+	if groupID == nil || *groupID <= 0 {
+		return 0
+	}
+	return *groupID
+}
+
+func (s *SchedulerSnapshotService) normalizeGroupIDs(groupIDs []int64) []int64 {
+	if s.isRunModeSimple() {
+		return []int64{0}
+	}
+	if len(groupIDs) == 0 {
+		return []int64{0}
+	}
+	seen := make(map[int64]struct{}, len(groupIDs))
+	out := make([]int64, 0, len(groupIDs))
+	for _, id := range groupIDs {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	if len(out) == 0 {
+		return []int64{0}
+	}
+	return out
+}
+
+func (s *SchedulerSnapshotService) resolveMode(platform string, hasForcePlatform bool) string {
+	if hasForcePlatform {
+		return SchedulerModeForced
+	}
+	if platform == PlatformAnthropic || platform == PlatformGemini {
+		return SchedulerModeMixed
+	}
+	return SchedulerModeSingle
+}
+
+func (s *SchedulerSnapshotService) guardFallback(ctx context.Context) error {
+	if s.cfg == nil || s.cfg.Gateway.Scheduling.DbFallbackEnabled {
+		if s.fallbackLimit == nil || s.fallbackLimit.Allow() {
+			return nil
+		}
+		return ErrSchedulerFallbackLimited
+	}
+	return ErrSchedulerCacheNotReady
+}
+
+func (s *SchedulerSnapshotService) withFallbackTimeout(ctx context.Context) (context.Context, context.CancelFunc) {
+	if s.cfg == nil || s.cfg.Gateway.Scheduling.DbFallbackTimeoutSeconds <= 0 {
+		return context.WithCancel(ctx)
+	}
+	timeout := time.Duration(s.cfg.Gateway.Scheduling.DbFallbackTimeoutSeconds) * time.Second
+	if deadline, ok := ctx.Deadline(); ok {
+		remaining := time.Until(deadline)
+		if remaining <= 0 {
+			return context.WithCancel(ctx)
+		}
+		if remaining < timeout {
+			timeout = remaining
+		}
+	}
+	return context.WithTimeout(ctx, timeout)
+}
+
+func (s *SchedulerSnapshotService) isRunModeSimple() bool {
+	return s.cfg != nil && s.cfg.RunMode == config.RunModeSimple
+}
+
+func (s *SchedulerSnapshotService) outboxPollInterval() time.Duration {
+	if s.cfg == nil {
+		return time.Second
+	}
+	sec := s.cfg.Gateway.Scheduling.OutboxPollIntervalSeconds
+	if sec <= 0 {
+		return time.Second
+	}
+	return time.Duration(sec) * time.Second
+}
+
+func (s *SchedulerSnapshotService) fullRebuildInterval() time.Duration {
+	if s.cfg == nil {
+		return 0
+	}
+	sec := s.cfg.Gateway.Scheduling.FullRebuildIntervalSeconds
+	if sec <= 0 {
+		return 0
+	}
+	return time.Duration(sec) * time.Second
+}
+
+func (s *SchedulerSnapshotService) defaultBuckets(ctx context.Context) ([]SchedulerBucket, error) {
+	buckets := make([]SchedulerBucket, 0)
+	platforms := []string{PlatformAnthropic, PlatformGemini, PlatformOpenAI, PlatformAntigravity}
+	for _, platform := range platforms {
+		buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeSingle})
+		buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeForced})
+		if platform == PlatformAnthropic || platform == PlatformGemini {
+			buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeMixed})
+		}
+	}
+
+	if s.isRunModeSimple() || s.groupRepo == nil {
+		return dedupeBuckets(buckets), nil
+	}
+
+	groups, err := s.groupRepo.ListActive(ctx)
+	if err != nil {
+		return dedupeBuckets(buckets), nil
+	}
+	for _, group := range groups {
+		if group.Platform == "" {
+			continue
+		}
+		buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeSingle})
+		buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeForced})
+		if group.Platform == PlatformAnthropic || group.Platform == PlatformGemini {
+			buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeMixed})
+		}
+	}
+	return dedupeBuckets(buckets), nil
+}
+
+func dedupeBuckets(in []SchedulerBucket) []SchedulerBucket {
+	seen := make(map[string]struct{}, len(in))
+	out := make([]SchedulerBucket, 0, len(in))
+	for _, bucket := range in {
+		key := bucket.String()
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		out = append(out, bucket)
+	}
+	return out
+}
+
+func derefAccounts(accounts []*Account) []Account {
+	if len(accounts) == 0 {
+		return []Account{}
+	}
+	out := make([]Account, 0, len(accounts))
+	for _, account := range accounts {
+		if account == nil {
+			continue
+		}
+		out = append(out, *account)
+	}
+	return out
+}
+
+func parseInt64Slice(value any) []int64 {
+	raw, ok := value.([]any)
+	if !ok {
+		return nil
+	}
+	out := make([]int64, 0, len(raw))
+	for _, item := range raw {
+		if v, ok := toInt64(item); ok && v > 0 {
+			out = append(out, v)
+		}
+	}
+	return out
+}
+
+func toInt64(value any) (int64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return int64(v), true
+	case int64:
+		return v, true
+	case int:
+		return int64(v), true
+	case json.Number:
+		parsed, err := strconv.ParseInt(v.String(), 10, 64)
+		return parsed, err == nil
+	default:
+		return 0, false
+	}
+}
+
+type fallbackLimiter struct {
+	maxQPS int
+	mu     sync.Mutex
+	window time.Time
+	count  int
+}
+
+func newFallbackLimiter(maxQPS int) *fallbackLimiter {
+	if maxQPS <= 0 {
+		return nil
+	}
+	return &fallbackLimiter{
+		maxQPS: maxQPS,
+		window: time.Now(),
+	}
+}
+
+func (l *fallbackLimiter) Allow() bool {
+	if l == nil || l.maxQPS <= 0 {
+		return true
+	}
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	now := time.Now()
+	if now.Sub(l.window) >= time.Second {
+		l.window = now
+		l.count = 0
+	}
+	if l.count >= l.maxQPS {
+		return false
+	}
+	l.count++
+	return true
+}
--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/hex"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"strconv"
@@ -675,3 +676,84 @@ func (s *SettingService) GetLinuxDoConnectOAuthConfig(ctx context.Context) (conf

 	return effective, nil
 }
+
+// GetStreamTimeoutSettings 获取流超时处理配置
+func (s *SettingService) GetStreamTimeoutSettings(ctx context.Context) (*StreamTimeoutSettings, error) {
+	value, err := s.settingRepo.GetValue(ctx, SettingKeyStreamTimeoutSettings)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			return DefaultStreamTimeoutSettings(), nil
+		}
+		return nil, fmt.Errorf("get stream timeout settings: %w", err)
+	}
+	if value == "" {
+		return DefaultStreamTimeoutSettings(), nil
+	}
+
+	var settings StreamTimeoutSettings
+	if err := json.Unmarshal([]byte(value), &settings); err != nil {
+		return DefaultStreamTimeoutSettings(), nil
+	}
+
+	// 验证并修正配置值
+	if settings.TempUnschedMinutes < 1 {
+		settings.TempUnschedMinutes = 1
+	}
+	if settings.TempUnschedMinutes > 60 {
+		settings.TempUnschedMinutes = 60
+	}
+	if settings.ThresholdCount < 1 {
+		settings.ThresholdCount = 1
+	}
+	if settings.ThresholdCount > 10 {
+		settings.ThresholdCount = 10
+	}
+	if settings.ThresholdWindowMinutes < 1 {
+		settings.ThresholdWindowMinutes = 1
+	}
+	if settings.ThresholdWindowMinutes > 60 {
+		settings.ThresholdWindowMinutes = 60
+	}
+
+	// 验证 action
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched, StreamTimeoutActionError, StreamTimeoutActionNone:
+		// valid
+	default:
+		settings.Action = StreamTimeoutActionTempUnsched
+	}
+
+	return &settings, nil
+}
+
+// SetStreamTimeoutSettings 设置流超时处理配置
+func (s *SettingService) SetStreamTimeoutSettings(ctx context.Context, settings *StreamTimeoutSettings) error {
+	if settings == nil {
+		return fmt.Errorf("settings cannot be nil")
+	}
+
+	// 验证配置值
+	if settings.TempUnschedMinutes < 1 || settings.TempUnschedMinutes > 60 {
+		return fmt.Errorf("temp_unsched_minutes must be between 1-60")
+	}
+	if settings.ThresholdCount < 1 || settings.ThresholdCount > 10 {
+		return fmt.Errorf("threshold_count must be between 1-10")
+	}
+	if settings.ThresholdWindowMinutes < 1 || settings.ThresholdWindowMinutes > 60 {
+		return fmt.Errorf("threshold_window_minutes must be between 1-60")
+	}
+
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched, StreamTimeoutActionError, StreamTimeoutActionNone:
+		// valid
+	default:
+		return fmt.Errorf("invalid action: %s", settings.Action)
+	}
+
+	data, err := json.Marshal(settings)
+	if err != nil {
+		return fmt.Errorf("marshal stream timeout settings: %w", err)
+	}
+
+	return s.settingRepo.Set(ctx, SettingKeyStreamTimeoutSettings, string(data))
+}
--- a/backend/internal/service/settings_view.go
+++ b/backend/internal/service/settings_view.go
@@ -69,3 +69,35 @@ type PublicSettings struct {
 	LinuxDoOAuthEnabled bool
 	Version             string
 }
+
+// StreamTimeoutSettings 流超时处理配置（仅控制超时后的处理方式，超时判定由网关配置控制）
+type StreamTimeoutSettings struct {
+	// Enabled 是否启用流超时处理
+	Enabled bool `json:"enabled"`
+	// Action 超时后的处理方式: "temp_unsched" | "error" | "none"
+	Action string `json:"action"`
+	// TempUnschedMinutes 临时不可调度持续时间（分钟）
+	TempUnschedMinutes int `json:"temp_unsched_minutes"`
+	// ThresholdCount 触发阈值次数（累计多少次超时才触发）
+	ThresholdCount int `json:"threshold_count"`
+	// ThresholdWindowMinutes 阈值窗口时间（分钟）
+	ThresholdWindowMinutes int `json:"threshold_window_minutes"`
+}
+
+// StreamTimeoutAction 流超时处理方式常量
+const (
+	StreamTimeoutActionTempUnsched = "temp_unsched" // 临时不可调度
+	StreamTimeoutActionError       = "error"        // 标记为错误状态
+	StreamTimeoutActionNone        = "none"         // 不处理
+)
+
+// DefaultStreamTimeoutSettings 返回默认的流超时配置
+func DefaultStreamTimeoutSettings() *StreamTimeoutSettings {
+	return &StreamTimeoutSettings{
+		Enabled:                false,
+		Action:                 StreamTimeoutActionTempUnsched,
+		TempUnschedMinutes:     5,
+		ThresholdCount:         3,
+		ThresholdWindowMinutes: 10,
+	}
+}
--- a/backend/internal/service/temp_unsched.go
+++ b/backend/internal/service/temp_unsched.go
@@ -2,6 +2,7 @@ package service

 import (
 	"context"
+	"time"
 )

 // TempUnschedState 临时不可调度状态
@@ -20,3 +21,16 @@ type TempUnschedCache interface {
 	GetTempUnsched(ctx context.Context, accountID int64) (*TempUnschedState, error)
 	DeleteTempUnsched(ctx context.Context, accountID int64) error
 }
+
+// TimeoutCounterCache 超时计数器缓存接口
+type TimeoutCounterCache interface {
+	// IncrementTimeoutCount 增加账户的超时计数，返回当前计数值
+	// windowMinutes 是计数窗口时间（分钟），超过此时间计数器会自动重置
+	IncrementTimeoutCount(ctx context.Context, accountID int64, windowMinutes int) (int64, error)
+	// GetTimeoutCount 获取账户当前的超时计数
+	GetTimeoutCount(ctx context.Context, accountID int64) (int64, error)
+	// ResetTimeoutCount 重置账户的超时计数
+	ResetTimeoutCount(ctx context.Context, accountID int64) error
+	// GetTimeoutCountTTL 获取计数器剩余过期时间
+	GetTimeoutCountTTL(ctx context.Context, accountID int64) (time.Duration, error)
+}
--- a/backend/internal/service/wire.go
+++ b/backend/internal/service/wire.go
@@ -86,6 +86,35 @@ func ProvideConcurrencyService(cache ConcurrencyCache, accountRepo AccountReposi
 	return svc
 }

+// ProvideSchedulerSnapshotService creates and starts SchedulerSnapshotService.
+func ProvideSchedulerSnapshotService(
+	cache SchedulerCache,
+	outboxRepo SchedulerOutboxRepository,
+	accountRepo AccountRepository,
+	groupRepo GroupRepository,
+	cfg *config.Config,
+) *SchedulerSnapshotService {
+	svc := NewSchedulerSnapshotService(cache, outboxRepo, accountRepo, groupRepo, cfg)
+	svc.Start()
+	return svc
+}
+
+// ProvideRateLimitService creates RateLimitService with optional dependencies.
+func ProvideRateLimitService(
+	accountRepo AccountRepository,
+	usageRepo UsageLogRepository,
+	cfg *config.Config,
+	geminiQuotaService *GeminiQuotaService,
+	tempUnschedCache TempUnschedCache,
+	timeoutCounterCache TimeoutCounterCache,
+	settingService *SettingService,
+) *RateLimitService {
+	svc := NewRateLimitService(accountRepo, usageRepo, cfg, geminiQuotaService, tempUnschedCache)
+	svc.SetTimeoutCounterCache(timeoutCounterCache)
+	svc.SetSettingService(settingService)
+	return svc
+}
+
 // ProvideOpsMetricsCollector creates and starts OpsMetricsCollector.
 func ProvideOpsMetricsCollector(
 	opsRepo OpsRepository,
@@ -186,7 +215,7 @@ var ProviderSet = wire.NewSet(
 	NewGeminiMessagesCompatService,
 	NewAntigravityTokenProvider,
 	NewAntigravityGatewayService,
-	NewRateLimitService,
+	ProvideRateLimitService,
 	NewAccountUsageService,
 	NewAccountTestService,
 	NewSettingService,
@@ -201,6 +230,7 @@ var ProviderSet = wire.NewSet(
 	NewTurnstileService,
 	NewSubscriptionService,
 	ProvideConcurrencyService,
+	ProvideSchedulerSnapshotService,
 	NewIdentityService,
 	NewCRSSyncService,
 	ProvideUpdateService,
--- a/backend/migrations/036_ops_error_logs_add_is_count_tokens.sql
+++ b/backend/migrations/036_ops_error_logs_add_is_count_tokens.sql
@@ -0,0 +1,16 @@
+-- Migration: 添加 is_count_tokens 字段到 ops_error_logs 表
+-- Purpose: 标记 count_tokens 请求的错误，以便在统计和告警中根据配置动态过滤
+-- Author: System
+-- Date: 2026-01-12
+
+-- Add is_count_tokens column to ops_error_logs table
+ALTER TABLE ops_error_logs
+ADD COLUMN is_count_tokens BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Add comment
+COMMENT ON COLUMN ops_error_logs.is_count_tokens IS '是否为 count_tokens 请求的错误（用于统计过滤）';
+
+-- Create index for filtering (optional, improves query performance)
+CREATE INDEX IF NOT EXISTS idx_ops_error_logs_is_count_tokens
+ON ops_error_logs(is_count_tokens)
+WHERE is_count_tokens = TRUE;
--- a/backend/migrations/036_scheduler_outbox.sql
+++ b/backend/migrations/036_scheduler_outbox.sql
@@ -0,0 +1,10 @@
+CREATE TABLE IF NOT EXISTS scheduler_outbox (
+    id BIGSERIAL PRIMARY KEY,
+    event_type TEXT NOT NULL,
+    account_id BIGINT NULL,
+    group_id BIGINT NULL,
+    payload JSONB NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_scheduler_outbox_created_at ON scheduler_outbox (created_at);
--- a/deploy/.env.example
+++ b/deploy/.env.example
@@ -69,6 +69,41 @@ JWT_EXPIRE_HOUR=24
 # Leave unset to use default ./config.yaml
 #CONFIG_FILE=./config.yaml

+# -----------------------------------------------------------------------------
+# Gateway Scheduling (Optional)
+# 调度缓存与受控回源配置（缓存就绪且命中时不读 DB）
+# -----------------------------------------------------------------------------
+# 粘性会话最大排队长度
+GATEWAY_SCHEDULING_STICKY_SESSION_MAX_WAITING=3
+# 粘性会话等待超时（时间段，例如 45s）
+GATEWAY_SCHEDULING_STICKY_SESSION_WAIT_TIMEOUT=120s
+# 兜底排队等待超时（时间段，例如 30s）
+GATEWAY_SCHEDULING_FALLBACK_WAIT_TIMEOUT=30s
+# 兜底最大排队长度
+GATEWAY_SCHEDULING_FALLBACK_MAX_WAITING=100
+# 启用调度批量负载计算
+GATEWAY_SCHEDULING_LOAD_BATCH_ENABLED=true
+# 并发槽位清理周期（时间段，例如 30s）
+GATEWAY_SCHEDULING_SLOT_CLEANUP_INTERVAL=30s
+# 是否允许受控回源到 DB（默认 true，保持现有行为）
+GATEWAY_SCHEDULING_DB_FALLBACK_ENABLED=true
+# 受控回源超时（秒），0 表示不额外收紧超时
+GATEWAY_SCHEDULING_DB_FALLBACK_TIMEOUT_SECONDS=0
+# 受控回源限流（实例级 QPS），0 表示不限制
+GATEWAY_SCHEDULING_DB_FALLBACK_MAX_QPS=0
+# outbox 轮询周期（秒）
+GATEWAY_SCHEDULING_OUTBOX_POLL_INTERVAL_SECONDS=1
+# outbox 滞后告警阈值（秒）
+GATEWAY_SCHEDULING_OUTBOX_LAG_WARN_SECONDS=5
+# outbox 触发强制重建阈值（秒）
+GATEWAY_SCHEDULING_OUTBOX_LAG_REBUILD_SECONDS=10
+# outbox 连续滞后触发次数
+GATEWAY_SCHEDULING_OUTBOX_LAG_REBUILD_FAILURES=3
+# outbox 积压触发重建阈值（行数）
+GATEWAY_SCHEDULING_OUTBOX_BACKLOG_REBUILD_ROWS=10000
+# 全量重建周期（秒）
+GATEWAY_SCHEDULING_FULL_REBUILD_INTERVAL_SECONDS=300
+
 # -----------------------------------------------------------------------------
 # Dashboard Aggregation (Optional)
 # -----------------------------------------------------------------------------
--- a/deploy/config.example.yaml
+++ b/deploy/config.example.yaml
@@ -169,6 +169,45 @@ gateway:
  # Allow failover on selected 400 errors (default: off)
  # 允许在特定 400 错误时进行故障转移（默认：关闭）
  failover_on_400: false
+  # Scheduling configuration
+  # 调度配置
+  scheduling:
+    # Sticky session max waiting queue size
+    # 粘性会话最大排队长度
+    sticky_session_max_waiting: 3
+    # Sticky session wait timeout (duration)
+    # 粘性会话等待超时（时间段）
+    sticky_session_wait_timeout: 120s
+    # Fallback wait timeout (duration)
+    # 兜底排队等待超时（时间段）
+    fallback_wait_timeout: 30s
+    # Fallback max waiting queue size
+    # 兜底最大排队长度
+    fallback_max_waiting: 100
+    # Enable batch load calculation for scheduling
+    # 启用调度批量负载计算
+    load_batch_enabled: true
+    # Slot cleanup interval (duration)
+    # 并发槽位清理周期（时间段）
+    slot_cleanup_interval: 30s
+    # 是否允许受控回源到 DB（默认 true，保持现有行为）
+    db_fallback_enabled: true
+    # 受控回源超时（秒），0 表示不额外收紧超时
+    db_fallback_timeout_seconds: 0
+    # 受控回源限流（实例级 QPS），0 表示不限制
+    db_fallback_max_qps: 0
+    # outbox 轮询周期（秒）
+    outbox_poll_interval_seconds: 1
+    # outbox 滞后告警阈值（秒）
+    outbox_lag_warn_seconds: 5
+    # outbox 触发强制重建阈值（秒）
+    outbox_lag_rebuild_seconds: 10
+    # outbox 连续滞后触发次数
+    outbox_lag_rebuild_failures: 3
+    # outbox 积压触发重建阈值（行数）
+    outbox_backlog_rebuild_rows: 10000
+    # 全量重建周期（秒），0 表示禁用
+    full_rebuild_interval_seconds: 300

 # =============================================================================
 # API Key Auth Cache Configuration
--- a/frontend/.eslintignore
+++ b/frontend/.eslintignore
@@ -0,0 +1,14 @@
+# 忽略编译后的文件
+vite.config.js
+vite.config.d.ts
+
+# 忽略依赖
+node_modules/
+
+# 忽略构建输出
+dist/
+../backend/internal/web/dist/
+
+# 忽略缓存
+.cache/
+.vite/
--- a/frontend/src/api/admin/ops.ts
+++ b/frontend/src/api/admin/ops.ts
@@ -362,6 +362,45 @@ export async function getAccountAvailabilityStats(platform?: string, groupId?: n
  return data
 }

+export interface OpsRateSummary {
+  current: number
+  peak: number
+  avg: number
+}
+
+export interface OpsRealtimeTrafficSummary {
+  window: string
+  start_time: string
+  end_time: string
+  platform: string
+  group_id?: number | null
+  qps: OpsRateSummary
+  tps: OpsRateSummary
+}
+
+export interface OpsRealtimeTrafficSummaryResponse {
+  enabled: boolean
+  summary: OpsRealtimeTrafficSummary | null
+  timestamp?: string
+}
+
+export async function getRealtimeTrafficSummary(
+  window: string,
+  platform?: string,
+  groupId?: number | null
+): Promise<OpsRealtimeTrafficSummaryResponse> {
+  const params: Record<string, any> = { window }
+  if (platform) {
+    params.platform = platform
+  }
+  if (typeof groupId === 'number' && groupId > 0) {
+    params.group_id = groupId
+  }
+
+  const { data } = await apiClient.get<OpsRealtimeTrafficSummaryResponse>('/admin/ops/realtime-traffic', { params })
+  return data
+}
+
 /**
 * Subscribe to realtime QPS updates via WebSocket.
 *
@@ -661,6 +700,14 @@ export interface EmailNotificationConfig {
  }
 }

+export interface OpsMetricThresholds {
+  sla_percent_min?: number | null                // SLA低于此值变红
+  latency_p99_ms_max?: number | null             // 延迟P99高于此值变红
+  ttft_p99_ms_max?: number | null                // TTFT P99高于此值变红
+  request_error_rate_percent_max?: number | null // 请求错误率高于此值变红
+  upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红
+}
+
 export interface OpsDistributedLockSettings {
  enabled: boolean
  key: string
@@ -681,11 +728,15 @@ export interface OpsAlertRuntimeSettings {
      reason: string
    }>
  }
+  thresholds: OpsMetricThresholds // 指标阈值配置
 }

 export interface OpsAdvancedSettings {
  data_retention: OpsDataRetentionSettings
  aggregation: OpsAggregationSettings
+  ignore_count_tokens_errors: boolean
+  auto_refresh_enabled: boolean
+  auto_refresh_interval_seconds: number
 }

 export interface OpsDataRetentionSettings {
@@ -929,6 +980,17 @@ export async function updateAdvancedSettings(config: OpsAdvancedSettings): Promi
  return data
 }

+// ==================== Metric Thresholds ====================
+
+async function getMetricThresholds(): Promise<OpsMetricThresholds> {
+  const { data } = await apiClient.get<OpsMetricThresholds>('/admin/ops/settings/metric-thresholds')
+  return data
+}
+
+async function updateMetricThresholds(thresholds: OpsMetricThresholds): Promise<void> {
+  await apiClient.put('/admin/ops/settings/metric-thresholds', thresholds)
+}
+
 export const opsAPI = {
  getDashboardOverview,
  getThroughputTrend,
@@ -937,6 +999,7 @@ export const opsAPI = {
  getErrorDistribution,
  getConcurrencyStats,
  getAccountAvailabilityStats,
+  getRealtimeTrafficSummary,
  subscribeQPS,
  listErrorLogs,
  getErrorLogDetail,
@@ -952,7 +1015,9 @@ export const opsAPI = {
  getAlertRuntimeSettings,
  updateAlertRuntimeSettings,
  getAdvancedSettings,
-  updateAdvancedSettings
+  updateAdvancedSettings,
+  getMetricThresholds,
+  updateMetricThresholds
 }

 export default opsAPI
--- a/frontend/src/api/admin/settings.ts
+++ b/frontend/src/api/admin/settings.ts
@@ -201,6 +201,41 @@ export async function deleteAdminApiKey(): Promise<{ message: string }> {
  return data
 }

+/**
+ * Stream timeout settings interface
+ */
+export interface StreamTimeoutSettings {
+  enabled: boolean
+  action: 'temp_unsched' | 'error' | 'none'
+  temp_unsched_minutes: number
+  threshold_count: number
+  threshold_window_minutes: number
+}
+
+/**
+ * Get stream timeout settings
+ * @returns Stream timeout settings
+ */
+export async function getStreamTimeoutSettings(): Promise<StreamTimeoutSettings> {
+  const { data } = await apiClient.get<StreamTimeoutSettings>('/admin/settings/stream-timeout')
+  return data
+}
+
+/**
+ * Update stream timeout settings
+ * @param settings - Stream timeout settings to update
+ * @returns Updated settings
+ */
+export async function updateStreamTimeoutSettings(
+  settings: StreamTimeoutSettings
+): Promise<StreamTimeoutSettings> {
+  const { data } = await apiClient.put<StreamTimeoutSettings>(
+    '/admin/settings/stream-timeout',
+    settings
+  )
+  return data
+}
+
 export const settingsAPI = {
  getSettings,
  updateSettings,
@@ -208,7 +243,9 @@ export const settingsAPI = {
  sendTestEmail,
  getAdminApiKey,
  regenerateAdminApiKey,
-  deleteAdminApiKey
+  deleteAdminApiKey,
+  getStreamTimeoutSettings,
+  updateStreamTimeoutSettings
 }

 export default settingsAPI
--- a/frontend/src/components/account/AccountGroupsCell.vue
+++ b/frontend/src/components/account/AccountGroupsCell.vue
@@ -0,0 +1,158 @@
+<template>
+  <div v-if="groups && groups.length > 0" class="relative max-w-56">
+    <!-- 分组容器：固定最大宽度，最多显示2行 -->
+    <div class="flex flex-wrap gap-1 max-h-14 overflow-hidden">
+      <GroupBadge
+        v-for="group in displayGroups"
+        :key="group.id"
+        :name="group.name"
+        :platform="group.platform"
+        :subscription-type="group.subscription_type"
+        :rate-multiplier="group.rate_multiplier"
+        :show-rate="false"
+        class="max-w-24"
+      />
+      <!-- 更多数量徽章 -->
+      <button
+        v-if="hiddenCount > 0"
+        ref="moreButtonRef"
+        @click.stop="showPopover = !showPopover"
+        class="inline-flex items-center gap-0.5 rounded-md px-1.5 py-0.5 text-xs font-medium bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-300 dark:hover:bg-dark-500 transition-colors cursor-pointer whitespace-nowrap"
+      >
+        <span>+{{ hiddenCount }}</span>
+      </button>
+    </div>
+
+    <!-- Popover 显示完整列表 -->
+    <Teleport to="body">
+      <Transition
+        enter-active-class="transition duration-150 ease-out"
+        enter-from-class="opacity-0 scale-95"
+        enter-to-class="opacity-100 scale-100"
+        leave-active-class="transition duration-100 ease-in"
+        leave-from-class="opacity-100 scale-100"
+        leave-to-class="opacity-0 scale-95"
+      >
+        <div
+          v-if="showPopover"
+          ref="popoverRef"
+          class="fixed z-50 min-w-48 max-w-96 rounded-lg border border-gray-200 bg-white p-3 shadow-lg dark:border-dark-600 dark:bg-dark-800"
+          :style="popoverStyle"
+        >
+          <div class="mb-2 flex items-center justify-between">
+            <span class="text-xs font-medium text-gray-500 dark:text-gray-400">
+              {{ t('admin.accounts.allGroups', { count: groups.length }) }}
+            </span>
+            <button
+              @click="showPopover = false"
+              class="rounded p-0.5 text-gray-400 hover:bg-gray-100 hover:text-gray-600 dark:hover:bg-dark-700 dark:hover:text-gray-300"
+            >
+              <svg class="h-3.5 w-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+                <path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
+              </svg>
+            </button>
+          </div>
+          <div class="flex flex-wrap gap-1.5 max-h-64 overflow-y-auto">
+            <GroupBadge
+              v-for="group in groups"
+              :key="group.id"
+              :name="group.name"
+              :platform="group.platform"
+              :subscription-type="group.subscription_type"
+              :rate-multiplier="group.rate_multiplier"
+              :show-rate="false"
+            />
+          </div>
+        </div>
+      </Transition>
+    </Teleport>
+
+    <!-- 点击外部关闭 popover -->
+    <div
+      v-if="showPopover"
+      class="fixed inset-0 z-40"
+      @click="showPopover = false"
+    />
+  </div>
+  <span v-else class="text-sm text-gray-400 dark:text-dark-500">-</span>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, onMounted, onUnmounted } from 'vue'
+import { useI18n } from 'vue-i18n'
+import GroupBadge from '@/components/common/GroupBadge.vue'
+import type { Group } from '@/types'
+
+interface Props {
+  groups: Group[] | null | undefined
+  maxDisplay?: number
+}
+
+const props = withDefaults(defineProps<Props>(), {
+  maxDisplay: 4
+})
+
+const { t } = useI18n()
+
+const moreButtonRef = ref<HTMLElement | null>(null)
+const popoverRef = ref<HTMLElement | null>(null)
+const showPopover = ref(false)
+
+// 显示的分组（最多显示 maxDisplay 个）
+const displayGroups = computed(() => {
+  if (!props.groups) return []
+  if (props.groups.length <= props.maxDisplay) {
+    return props.groups
+  }
+  // 留一个位置给 +N 按钮
+  return props.groups.slice(0, props.maxDisplay - 1)
+})
+
+// 隐藏的数量
+const hiddenCount = computed(() => {
+  if (!props.groups) return 0
+  if (props.groups.length <= props.maxDisplay) return 0
+  return props.groups.length - (props.maxDisplay - 1)
+})
+
+// Popover 位置样式
+const popoverStyle = computed(() => {
+  if (!moreButtonRef.value) return {}
+  const rect = moreButtonRef.value.getBoundingClientRect()
+  const viewportHeight = window.innerHeight
+  const viewportWidth = window.innerWidth
+
+  let top = rect.bottom + 8
+  let left = rect.left
+
+  // 如果下方空间不足，显示在上方
+  if (top + 280 > viewportHeight) {
+    top = Math.max(8, rect.top - 280)
+  }
+
+  // 如果右侧空间不足，向左偏移
+  if (left + 384 > viewportWidth) {
+    left = Math.max(8, viewportWidth - 392)
+  }
+
+  return {
+    top: `${top}px`,
+    left: `${left}px`
+  }
+})
+
+// 关闭 popover 的键盘事件
+const handleKeydown = (e: KeyboardEvent) => {
+  if (e.key === 'Escape') {
+    showPopover.value = false
+  }
+}
+
+onMounted(() => {
+  window.addEventListener('keydown', handleKeydown)
+})
+
+onUnmounted(() => {
+  window.removeEventListener('keydown', handleKeydown)
+})
+</script>
--- a/frontend/src/components/account/BulkEditAccountModal.vue
+++ b/frontend/src/components/account/BulkEditAccountModal.vue
@@ -778,6 +778,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -794,6 +804,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -1976,6 +1976,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -1993,6 +2003,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
@@ -2462,6 +2482,7 @@ const handleCookieAuth = async (sessionKey: string) => {

        await adminAPI.accounts.create({
          name: accountName,
+          notes: form.notes,
          platform: form.platform,
          type: addMethod.value, // Use addMethod as type: 'oauth' or 'setup-token'
          credentials,
@@ -2469,6 +2490,8 @@ const handleCookieAuth = async (sessionKey: string) => {
          proxy_id: form.proxy_id,
          concurrency: form.concurrency,
          priority: form.priority,
+          group_ids: form.group_ids,
+          expires_at: form.expires_at,
          auto_pause_on_expired: autoPauseOnExpired.value
        })

--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -936,6 +936,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -953,6 +963,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
--- a/frontend/src/components/admin/user/UserBalanceModal.vue
+++ b/frontend/src/components/admin/user/UserBalanceModal.vue
@@ -3,14 +3,17 @@
    <form v-if="user" id="balance-form" @submit.prevent="handleBalanceSubmit" class="space-y-5">
      <div class="flex items-center gap-3 rounded-xl bg-gray-50 p-4 dark:bg-dark-700">
        <div class="flex h-10 w-10 items-center justify-center rounded-full bg-primary-100"><span class="text-lg font-medium text-primary-700">{{ user.email.charAt(0).toUpperCase() }}</span></div>
-        <div class="flex-1"><p class="font-medium text-gray-900">{{ user.email }}</p><p class="text-sm text-gray-500">{{ t('admin.users.currentBalance') }}: ${{ user.balance.toFixed(2) }}</p></div>
+        <div class="flex-1"><p class="font-medium text-gray-900">{{ user.email }}</p><p class="text-sm text-gray-500">{{ t('admin.users.currentBalance') }}: ${{ formatBalance(user.balance) }}</p></div>
      </div>
      <div>
        <label class="input-label">{{ operation === 'add' ? t('admin.users.depositAmount') : t('admin.users.withdrawAmount') }}</label>
-        <div class="relative"><div class="absolute left-3 top-1/2 -translate-y-1/2 font-medium text-gray-500">$</div><input v-model.number="form.amount" type="number" step="0.01" min="0.01" required class="input pl-8" /></div>
+        <div class="relative flex gap-2">
+          <div class="relative flex-1"><div class="absolute left-3 top-1/2 -translate-y-1/2 font-medium text-gray-500">$</div><input v-model.number="form.amount" type="number" step="any" min="0" required class="input pl-8" /></div>
+          <button v-if="operation === 'subtract'" type="button" @click="fillAllBalance" class="btn btn-secondary whitespace-nowrap">{{ t('admin.users.withdrawAll') }}</button>
+        </div>
      </div>
      <div><label class="input-label">{{ t('admin.users.notes') }}</label><textarea v-model="form.notes" rows="3" class="input"></textarea></div>
-      <div v-if="form.amount > 0" class="rounded-xl border border-blue-200 bg-blue-50 p-4"><div class="flex items-center justify-between text-sm"><span>{{ t('admin.users.newBalance') }}:</span><span class="font-bold">${{ calculateNewBalance().toFixed(2) }}</span></div></div>
+      <div v-if="form.amount > 0" class="rounded-xl border border-blue-200 bg-blue-50 p-4"><div class="flex items-center justify-between text-sm"><span>{{ t('admin.users.newBalance') }}:</span><span class="font-bold">${{ formatBalance(calculateNewBalance()) }}</span></div></div>
    </form>
    <template #footer>
      <div class="flex justify-end gap-3">
@@ -35,13 +38,38 @@ const emit = defineEmits(['close', 'success']); const { t } = useI18n(); const a
 const submitting = ref(false); const form = reactive({ amount: 0, notes: '' })
 watch(() => props.show, (v) => { if(v) { form.amount = 0; form.notes = '' } })

-const calculateNewBalance = () => (props.user ? (props.operation === 'add' ? props.user.balance + form.amount : props.user.balance - form.amount) : 0)
+// 格式化余额：显示完整精度，去除尾部多余的0
+const formatBalance = (value: number) => {
+  if (value === 0) return '0.00'
+  // 最多保留8位小数，去除尾部的0
+  const formatted = value.toFixed(8).replace(/\.?0+$/, '')
+  // 确保至少有2位小数
+  const parts = formatted.split('.')
+  if (parts.length === 1) return formatted + '.00'
+  if (parts[1].length === 1) return formatted + '0'
+  return formatted
+}
+
+// 填入全部余额
+const fillAllBalance = () => {
+  if (props.user) {
+    form.amount = props.user.balance
+  }
+}
+
+const calculateNewBalance = () => {
+  if (!props.user) return 0
+  const result = props.operation === 'add' ? props.user.balance + form.amount : props.user.balance - form.amount
+  // 避免浮点数精度问题导致的 -0.00 显示
+  return Math.abs(result) < 1e-10 ? 0 : result
+}
 const handleBalanceSubmit = async () => {
  if (!props.user) return
  if (!form.amount || form.amount <= 0) {
    appStore.showError(t('admin.users.amountRequired'))
    return
  }
+  // 退款时验证金额不超过实际余额
  if (props.operation === 'subtract' && form.amount > props.user.balance) {
    appStore.showError(t('admin.users.insufficientBalance'))
    return
--- a/frontend/src/components/icons/Icon.vue
+++ b/frontend/src/components/icons/Icon.vue
@@ -124,7 +124,8 @@ const icons = {
  chatBubble: 'M8 10h.01M12 10h.01M16 10h.01M9 16H5a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v8a2 2 0 01-2 2h-5l-5 5v-5z',
  calculator: 'M9 7h6m0 10v-3m-3 3h.01M9 17h.01M9 14h.01M12 14h.01M15 11h.01M12 11h.01M9 11h.01M7 21h10a2 2 0 002-2V5a2 2 0 00-2-2H7a2 2 0 00-2 2v14a2 2 0 002 2z',
  fire: 'M17.657 18.657A8 8 0 016.343 7.343S7 9 9 10c0-2 .5-5 2.986-7C14 5 16.09 5.777 17.656 7.343A7.975 7.975 0 0120 13a7.975 7.975 0 01-2.343 5.657z',
-  badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z'
+  badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z',
+  brain: 'M9.75 3.104v5.714a2.25 2.25 0 01-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 014.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0112 15a9.065 9.065 0 00-6.23.693L5 14.5m0 0l-2.69 2.689c-1.232 1.232-.65 3.318 1.067 3.611A48.309 48.309 0 0012 21c2.773 0 5.491-.235 8.135-.687 1.718-.293 2.3-2.379 1.067-3.61L19.8 15.3M12 8.25a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0v3m-3-1.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0h6m-3 4.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3z'
 } as const

 const iconPath = computed(() => icons[props.name])
--- a/frontend/src/components/keys/UseKeyModal.vue
+++ b/frontend/src/components/keys/UseKeyModal.vue
@@ -376,6 +376,10 @@ const currentFiles = computed((): FileConfig[] => {
    const trimmed = `${baseRoot}/antigravity`.replace(/\/+$/, '')
    return trimmed.endsWith('/v1beta') ? trimmed : `${trimmed}/v1beta`
  })()
+  const geminiBase = (() => {
+    const trimmed = baseRoot.replace(/\/+$/, '')
+    return trimmed.endsWith('/v1beta') ? trimmed : `${trimmed}/v1beta`
+  })()

  if (activeClientTab.value === 'opencode') {
    switch (props.platform) {
@@ -384,7 +388,7 @@ const currentFiles = computed((): FileConfig[] => {
      case 'openai':
        return [generateOpenCodeConfig('openai', apiBase, apiKey)]
      case 'gemini':
-        return [generateOpenCodeConfig('gemini', apiBase, apiKey)]
+        return [generateOpenCodeConfig('gemini', geminiBase, apiKey)]
      case 'antigravity':
        return [
          generateOpenCodeConfig('antigravity-claude', antigravityBase, apiKey, 'opencode.json (Claude)'),
@@ -525,14 +529,16 @@ function generateOpenCodeConfig(platform: string, baseUrl: string, apiKey: strin
    [platform]: {
      options: {
        baseURL: baseUrl,
-        apiKey,
-        ...(platform === 'openai' ? { store: false } : {})
+        apiKey
      }
    }
  }
  const openaiModels = {
    'gpt-5.2-codex': {
      name: 'GPT-5.2 Codex',
+      options: {
+        store: false
+      },
      variants: {
        low: {},
        medium: {},
@@ -574,9 +580,26 @@ function generateOpenCodeConfig(platform: string, baseUrl: string, apiKey: strin
    provider[platform].models = openaiModels
  }

+  const agent =
+    platform === 'openai'
+      ? {
+          build: {
+            options: {
+              store: false
+            }
+          },
+          plan: {
+            options: {
+              store: false
+            }
+          }
+        }
+      : undefined
+
  const content = JSON.stringify(
    {
      provider,
+      ...(agent ? { agent } : {}),
      $schema: 'https://opencode.ai/config.json'
    },
    null,
--- a/frontend/src/composables/useModelWhitelist.ts
+++ b/frontend/src/composables/useModelWhitelist.ts
@@ -13,7 +13,17 @@ const openaiModels = [
  'o1', 'o1-preview', 'o1-mini', 'o1-pro',
  'o3', 'o3-mini', 'o3-pro',
  'o4-mini',
-  'gpt-5', 'gpt-5-mini', 'gpt-5-nano',
+  // GPT-5 系列（同步后端定价文件）
+  'gpt-5', 'gpt-5-2025-08-07', 'gpt-5-chat', 'gpt-5-chat-latest',
+  'gpt-5-codex', 'gpt-5-pro', 'gpt-5-pro-2025-10-06',
+  'gpt-5-mini', 'gpt-5-mini-2025-08-07',
+  'gpt-5-nano', 'gpt-5-nano-2025-08-07',
+  // GPT-5.1 系列
+  'gpt-5.1', 'gpt-5.1-2025-11-13', 'gpt-5.1-chat-latest',
+  'gpt-5.1-codex', 'gpt-5.1-codex-max', 'gpt-5.1-codex-mini',
+  // GPT-5.2 系列
+  'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest',
+  'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11',
  'chatgpt-4o-latest',
  'gpt-4o-audio-preview', 'gpt-4o-realtime-preview'
 ]
@@ -211,7 +221,10 @@ const openaiPresetMappings = [
  { label: 'GPT-4.1', from: 'gpt-4.1', to: 'gpt-4.1', color: 'bg-indigo-100 text-indigo-700 hover:bg-indigo-200 dark:bg-indigo-900/30 dark:text-indigo-400' },
  { label: 'o1', from: 'o1', to: 'o1', color: 'bg-purple-100 text-purple-700 hover:bg-purple-200 dark:bg-purple-900/30 dark:text-purple-400' },
  { label: 'o3', from: 'o3', to: 'o3', color: 'bg-emerald-100 text-emerald-700 hover:bg-emerald-200 dark:bg-emerald-900/30 dark:text-emerald-400' },
-  { label: 'GPT-5', from: 'gpt-5', to: 'gpt-5', color: 'bg-amber-100 text-amber-700 hover:bg-amber-200 dark:bg-amber-900/30 dark:text-amber-400' }
+  { label: 'GPT-5', from: 'gpt-5', to: 'gpt-5', color: 'bg-amber-100 text-amber-700 hover:bg-amber-200 dark:bg-amber-900/30 dark:text-amber-400' },
+  { label: 'GPT-5.1', from: 'gpt-5.1', to: 'gpt-5.1', color: 'bg-orange-100 text-orange-700 hover:bg-orange-200 dark:bg-orange-900/30 dark:text-orange-400' },
+  { label: 'GPT-5.2', from: 'gpt-5.2', to: 'gpt-5.2', color: 'bg-red-100 text-red-700 hover:bg-red-200 dark:bg-red-900/30 dark:text-red-400' },
+  { label: 'GPT-5.1 Codex', from: 'gpt-5.1-codex', to: 'gpt-5.1-codex', color: 'bg-cyan-100 text-cyan-700 hover:bg-cyan-200 dark:bg-cyan-900/30 dark:text-cyan-400' }
 ]

 const geminiPresetMappings = [
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -156,6 +156,7 @@ export default {
        unknownError: 'Unknown error occurred',
        saving: 'Saving...', 
        selectedCount: '({count} selected)',    refresh: 'Refresh',
+    settings: 'Settings',
    notAvailable: 'N/A',
    now: 'Now',
    unknown: 'Unknown',
@@ -389,7 +390,7 @@ export default {
      opencode: {
        title: 'OpenCode Example',
        subtitle: 'opencode.json',
-        hint: 'This is a group configuration example. Adjust model and options as needed.',
+        hint: 'Config path: ~/.config/opencode/opencode.json (or opencode.jsonc), create if not exists. Use default providers (openai/anthropic/google) or custom provider_id. API Key can be configured directly or via /connect command. This is an example, adjust models and options as needed.',
      },
    },
    customKeyLabel: 'Custom Key',
@@ -723,6 +724,7 @@ export default {
      withdraw: 'Withdraw',
      depositAmount: 'Deposit Amount',
      withdrawAmount: 'Withdraw Amount',
+      withdrawAll: 'All',
      currentBalance: 'Current Balance',
      depositNotesPlaceholder:
        'e.g., New user registration bonus, promotional credit, compensation, etc.',
@@ -1021,6 +1023,7 @@ export default {
      schedulableEnabled: 'Scheduling enabled',
      schedulableDisabled: 'Scheduling disabled',
      failedToToggleSchedulable: 'Failed to toggle scheduling status',
+      allGroups: '{count} groups total',
      platforms: {
        anthropic: 'Anthropic',
        claude: 'Claude',
@@ -1203,6 +1206,10 @@ export default {
      customErrorCodesHint: 'Only stop scheduling for selected error codes',
      customErrorCodesWarning:
        'Only selected error codes will stop scheduling. Other errors will return 500.',
+      customErrorCodes429Warning:
+        '429 already has built-in rate limit handling. Adding it to custom error codes will disable the account instead of temporary rate limiting. Are you sure?',
+      customErrorCodes529Warning:
+        '529 already has built-in overload handling. Adding it to custom error codes will disable the account instead of temporary overload marking. Are you sure?',
      selectedErrorCodes: 'Selected',
      noneSelectedUsesDefault: 'None selected (uses default policy)',
      enterErrorCode: 'Enter error code (100-599)',
@@ -1902,6 +1909,7 @@ export default {
      max: 'max:',
      qps: 'QPS',
      requests: 'Requests',
+      requestsTitle: 'Requests',
      upstream: 'Upstream',
      client: 'Client',
      system: 'System',
@@ -1936,6 +1944,9 @@ export default {
        '6h': 'Last 6 hours',
        '24h': 'Last 24 hours'
      },
+      fullscreen: {
+        enter: 'Enter Fullscreen'
+      },
      diagnosis: {
        title: 'Smart Diagnosis',
        footer: 'Automated diagnostic suggestions based on current metrics',
@@ -2114,7 +2125,10 @@ export default {
        empty: 'No alert rules',
        loadFailed: 'Failed to load alert rules',
        saveFailed: 'Failed to save alert rule',
+        saveSuccess: 'Alert rule saved successfully',
        deleteFailed: 'Failed to delete alert rule',
+        deleteSuccess: 'Alert rule deleted successfully',
+        manage: 'Manage Alert Rules',
        create: 'Create Rule',
        createTitle: 'Create Alert Rule',
        editTitle: 'Edit Alert Rule',
@@ -2297,6 +2311,54 @@ export default {
          accountHealthThresholdRange: 'Account health threshold must be between 0 and 100'
        }
      },
+      settings: {
+        title: 'Ops Monitoring Settings',
+        loadFailed: 'Failed to load settings',
+        saveSuccess: 'Ops monitoring settings saved successfully',
+        saveFailed: 'Failed to save settings',
+        dataCollection: 'Data Collection',
+        evaluationInterval: 'Evaluation Interval (seconds)',
+        evaluationIntervalHint: 'Frequency of detection tasks, recommended to keep default',
+        alertConfig: 'Alert Configuration',
+        enableAlert: 'Enable Alerts',
+        alertRecipients: 'Alert Recipient Emails',
+        emailPlaceholder: 'Enter email address',
+        recipientsHint: 'If empty, the system will use the first admin email as default recipient',
+        minSeverity: 'Minimum Severity',
+        reportConfig: 'Report Configuration',
+        enableReport: 'Enable Reports',
+        reportRecipients: 'Report Recipient Emails',
+        dailySummary: 'Daily Summary',
+        weeklySummary: 'Weekly Summary',
+        metricThresholds: 'Metric Thresholds',
+        metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
+        slaMinPercent: 'SLA Minimum Percentage',
+        slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
+        latencyP99MaxMs: 'Latency P99 Maximum (ms)',
+        latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
+        ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
+        ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
+        requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
+        requestErrorRateMaxPercentHint: 'Request error rate above this value will be displayed in red (default: 5%)',
+        upstreamErrorRateMaxPercent: 'Upstream Error Rate Maximum (%)',
+        upstreamErrorRateMaxPercentHint: 'Upstream error rate above this value will be displayed in red (default: 5%)',
+        advancedSettings: 'Advanced Settings',
+        dataRetention: 'Data Retention Policy',
+        enableCleanup: 'Enable Data Cleanup',
+        cleanupSchedule: 'Cleanup Schedule (Cron)',
+        cleanupScheduleHint: 'Example: 0 2 * * * means 2 AM daily',
+        errorLogRetentionDays: 'Error Log Retention Days',
+        minuteMetricsRetentionDays: 'Minute Metrics Retention Days',
+        hourlyMetricsRetentionDays: 'Hourly Metrics Retention Days',
+        retentionDaysHint: 'Recommended 7-90 days, longer periods will consume more storage',
+        aggregation: 'Pre-aggregation Tasks',
+        enableAggregation: 'Enable Pre-aggregation',
+        aggregationHint: 'Pre-aggregation improves query performance for long time windows',
+        validation: {
+          title: 'Please fix the following issues',
+          retentionDaysRange: 'Retention days must be between 1-365 days'
+        }
+      },
      concurrency: {
        title: 'Concurrency / Queue',
        byPlatform: 'By Platform',
@@ -2330,12 +2392,13 @@ export default {
        accountError: 'Error'
      },
      tooltips: {
+        totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
        throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
        latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
        errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
        errorDistribution: 'Error distribution by status code.',
        goroutines:
-          'Number of Go runtime goroutines (lightweight threads). There is no absolute “safe” number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
+          'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
        cpu: 'CPU usage percentage, showing system processor load.',
        memory: 'Memory usage, including used and total available memory.',
        db: 'Database connection pool status, including active, idle, and waiting connections.',
@@ -2345,6 +2408,7 @@ export default {
        tokens: 'Total number of tokens processed in the current time window.',
        sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
        errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
+        upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
        latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
        ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
        health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
@@ -2512,6 +2576,27 @@ export default {
        securityWarning: 'Warning: This key provides full admin access. Keep it secure.',
        usage: 'Usage: Add to request header - x-api-key: <your-admin-api-key>'
      },
+      streamTimeout: {
+        title: 'Stream Timeout Handling',
+        description: 'Configure account handling strategy when upstream response times out',
+        enabled: 'Enable Stream Timeout Handling',
+        enabledHint: 'Automatically handle problematic accounts when upstream times out',
+        timeoutSeconds: 'Timeout Threshold (seconds)',
+        timeoutSecondsHint: 'Stream data interval exceeding this time is considered timeout (30-300s)',
+        action: 'Action',
+        actionTempUnsched: 'Temporarily Unschedulable',
+        actionError: 'Mark as Error',
+        actionNone: 'No Action',
+        actionHint: 'Action to take on the account after timeout',
+        tempUnschedMinutes: 'Pause Duration (minutes)',
+        tempUnschedMinutesHint: 'Duration of temporary unschedulable state (1-60 minutes)',
+        thresholdCount: 'Trigger Threshold (count)',
+        thresholdCountHint: 'Number of timeouts before triggering action (1-10)',
+        thresholdWindowMinutes: 'Threshold Window (minutes)',
+        thresholdWindowMinutesHint: 'Time window for counting timeouts (1-60 minutes)',
+        saved: 'Stream timeout settings saved',
+        saveFailed: 'Failed to save stream timeout settings'
+      },
      saveSettings: 'Save Settings',
      saving: 'Saving...',
      settingsSaved: 'Settings saved successfully',
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -387,7 +387,7 @@ export default {
      opencode: {
        title: 'OpenCode 配置示例',
        subtitle: 'opencode.json',
-        hint: '示例仅用于演示分组配置，模型与选项可按需调整。',
+        hint: '配置文件路径：~/.config/opencode/opencode.json（或 opencode.jsonc），不存在需手动创建。可使用默认 provider（openai/anthropic/google）或自定义 provider_id。API Key 支持直接配置或通过客户端 /connect 命令配置。示例仅供参考，模型与选项可按需调整。',
      },
    },
    customKeyLabel: '自定义密钥',
@@ -780,6 +780,7 @@ export default {
      withdraw: '退款',
      depositAmount: '充值金额',
      withdrawAmount: '退款金额',
+      withdrawAll: '全部',
      depositNotesPlaceholder: '例如：新用户注册奖励、活动充值、补偿充值等',
      withdrawNotesPlaceholder: '例如：服务问题退款、错误充值退回、账户注销退款等',
      notesOptional: '备注为可选项，有助于未来查账',
@@ -1099,6 +1100,7 @@ export default {
      schedulableEnabled: '调度已开启',
      schedulableDisabled: '调度已关闭',
      failedToToggleSchedulable: '切换调度状态失败',
+      allGroups: '共 {count} 个分组',
      columns: {
        name: '名称',
        platformType: '平台/类型',
@@ -1339,6 +1341,10 @@ export default {
      customErrorCodes: '自定义错误码',
      customErrorCodesHint: '仅对选中的错误码停止调度',
      customErrorCodesWarning: '仅选中的错误码会停止调度，其他错误将返回 500。',
+      customErrorCodes429Warning:
+        '429 已有内置的限流处理机制。添加到自定义错误码后，将直接停止调度而非临时限流。确定要添加吗？',
+      customErrorCodes529Warning:
+        '529 已有内置的过载处理机制。添加到自定义错误码后，将直接停止调度而非临时标记过载。确定要添加吗？',
      selectedErrorCodes: '已选择',
      noneSelectedUsesDefault: '未选择（使用默认策略）',
      enterErrorCode: '输入错误码 (100-599)',
@@ -2018,7 +2024,7 @@ export default {
      ready: '就绪',
      requestsTotal: '请求（总计）',
      slaScope: 'SLA 范围：',
-      tokens: 'Token',
+      tokens: 'Token数',
      tps: 'TPS',
      current: '当前',
      peak: '峰值',
@@ -2047,7 +2053,8 @@ export default {
      avg: 'avg',
      max: 'max',
      qps: 'QPS',
-      requests: '请求',
+      requests: '请求数',
+      requestsTitle: '请求',
      upstream: '上游',
      client: '客户端',
      system: '系统',
@@ -2082,6 +2089,9 @@ export default {
        '6h': '近6小时',
        '24h': '近24小时'
      },
+      fullscreen: {
+        enter: '进入全屏'
+      },
      diagnosis: {
        title: '智能诊断',
        footer: '基于当前指标的自动诊断建议',
@@ -2465,6 +2475,18 @@ export default {
        reportRecipients: '评估报告接收邮箱',
        dailySummary: '每日摘要',
        weeklySummary: '每周摘要',
+        metricThresholds: '指标阈值配置',
+        metricThresholdsHint: '配置各项指标的告警阈值，超出阈值时将以红色显示',
+        slaMinPercent: 'SLA最低百分比',
+        slaMinPercentHint: 'SLA低于此值时显示为红色（默认：99.5%）',
+        latencyP99MaxMs: '延迟P99最大值（毫秒）',
+        latencyP99MaxMsHint: '延迟P99高于此值时显示为红色（默认：2000ms）',
+        ttftP99MaxMs: 'TTFT P99最大值（毫秒）',
+        ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色（默认：500ms）',
+        requestErrorRateMaxPercent: '请求错误率最大值（%）',
+        requestErrorRateMaxPercentHint: '请求错误率高于此值时显示为红色（默认：5%）',
+        upstreamErrorRateMaxPercent: '上游错误率最大值（%）',
+        upstreamErrorRateMaxPercentHint: '上游错误率高于此值时显示为红色（默认：5%）',
        advancedSettings: '高级设置',
        dataRetention: '数据保留策略',
        enableCleanup: '启用数据清理',
@@ -2696,6 +2718,27 @@ export default {
        securityWarning: '警告：此密钥拥有完整的管理员权限，请妥善保管。',
        usage: '使用方法：在请求头中添加 x-api-key: <your-admin-api-key>'
      },
+      streamTimeout: {
+        title: '流超时处理',
+        description: '配置上游响应超时时的账户处理策略，避免问题账户持续被选中',
+        enabled: '启用流超时处理',
+        enabledHint: '当上游响应超时时，自动处理问题账户',
+        timeoutSeconds: '超时阈值（秒）',
+        timeoutSecondsHint: '流数据间隔超过此时间视为超时（30-300秒）',
+        action: '处理方式',
+        actionTempUnsched: '临时不可调度',
+        actionError: '标记为错误状态',
+        actionNone: '不处理',
+        actionHint: '超时后对账户执行的操作',
+        tempUnschedMinutes: '暂停时长（分钟）',
+        tempUnschedMinutesHint: '临时不可调度的持续时间（1-60分钟）',
+        thresholdCount: '触发阈值（次数）',
+        thresholdCountHint: '累计超时多少次后触发处理（1-10次）',
+        thresholdWindowMinutes: '阈值窗口（分钟）',
+        thresholdWindowMinutesHint: '超时计数的时间窗口（1-60分钟）',
+        saved: '流超时设置保存成功',
+        saveFailed: '保存流超时设置失败'
+      },
      saveSettings: '保存设置',
      saving: '保存中...',
      settingsSaved: '设置保存成功',
--- a/frontend/src/style.css
+++ b/frontend/src/style.css
@@ -19,7 +19,22 @@
    @apply min-h-screen;
  }

-  /* 自定义滚动条 */
+  /* 自定义滚动条 - 默认隐藏，悬停或滚动时显示 */
+  * {
+    scrollbar-width: thin;
+    scrollbar-color: transparent transparent;
+  }
+
+  *:hover,
+  *:focus-within {
+    scrollbar-color: rgba(156, 163, 175, 0.5) transparent;
+  }
+
+  .dark *:hover,
+  .dark *:focus-within {
+    scrollbar-color: rgba(75, 85, 99, 0.5) transparent;
+  }
+
  ::-webkit-scrollbar {
    @apply h-2 w-2;
  }
@@ -29,10 +44,15 @@
  }

  ::-webkit-scrollbar-thumb {
-    @apply rounded-full bg-gray-300 dark:bg-dark-600;
+    @apply rounded-full bg-transparent;
+    transition: background-color 0.2s ease;
  }

-  ::-webkit-scrollbar-thumb:hover {
+  *:hover::-webkit-scrollbar-thumb {
+    @apply bg-gray-300/50 dark:bg-dark-600/50;
+  }
+
+  *:hover::-webkit-scrollbar-thumb:hover {
    @apply bg-gray-400 dark:bg-dark-500;
  }

--- a/frontend/src/views/admin/AccountsView.vue
+++ b/frontend/src/views/admin/AccountsView.vue
@@ -56,10 +56,7 @@
            <AccountTodayStatsCell :account="row" />
          </template>
          <template #cell-groups="{ row }">
-            <div v-if="row.groups && row.groups.length > 0" class="flex flex-wrap gap-1.5">
-              <GroupBadge v-for="group in row.groups" :key="group.id" :name="group.name" :platform="group.platform" :subscription-type="group.subscription_type" :rate-multiplier="group.rate_multiplier" :show-rate="false" />
-            </div>
-            <span v-else class="text-sm text-gray-400 dark:text-dark-500">-</span>
+            <AccountGroupsCell :groups="row.groups" :max-display="4" />
          </template>
          <template #cell-usage="{ row }">
            <AccountUsageCell :account="row" />
@@ -145,7 +142,7 @@ import AccountStatsModal from '@/components/admin/account/AccountStatsModal.vue'
 import AccountStatusIndicator from '@/components/account/AccountStatusIndicator.vue'
 import AccountUsageCell from '@/components/account/AccountUsageCell.vue'
 import AccountTodayStatsCell from '@/components/account/AccountTodayStatsCell.vue'
-import GroupBadge from '@/components/common/GroupBadge.vue'
+import AccountGroupsCell from '@/components/account/AccountGroupsCell.vue'
 import PlatformTypeBadge from '@/components/common/PlatformTypeBadge.vue'
 import { formatDateTime, formatRelativeTime } from '@/utils/format'
 import type { Account, Proxy, Group } from '@/types'
--- a/frontend/src/views/admin/SettingsView.vue
+++ b/frontend/src/views/admin/SettingsView.vue
@@ -147,6 +147,144 @@
          </div>
        </div>

+        <!-- Stream Timeout Settings -->
+        <div class="card">
+          <div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
+            <h2 class="text-lg font-semibold text-gray-900 dark:text-white">
+              {{ t('admin.settings.streamTimeout.title') }}
+            </h2>
+            <p class="mt-1 text-sm text-gray-500 dark:text-gray-400">
+              {{ t('admin.settings.streamTimeout.description') }}
+            </p>
+          </div>
+          <div class="space-y-5 p-6">
+            <!-- Loading State -->
+            <div v-if="streamTimeoutLoading" class="flex items-center gap-2 text-gray-500">
+              <div class="h-4 w-4 animate-spin rounded-full border-b-2 border-primary-600"></div>
+              {{ t('common.loading') }}
+            </div>
+
+            <template v-else>
+              <!-- Enable Stream Timeout -->
+              <div class="flex items-center justify-between">
+                <div>
+                  <label class="font-medium text-gray-900 dark:text-white">{{
+                    t('admin.settings.streamTimeout.enabled')
+                  }}</label>
+                  <p class="text-sm text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.enabledHint') }}
+                  </p>
+                </div>
+                <Toggle v-model="streamTimeoutForm.enabled" />
+              </div>
+
+              <!-- Settings - Only show when enabled -->
+              <div
+                v-if="streamTimeoutForm.enabled"
+                class="space-y-4 border-t border-gray-100 pt-4 dark:border-dark-700"
+              >
+                <!-- Action -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.action') }}
+                  </label>
+                  <select v-model="streamTimeoutForm.action" class="input w-64">
+                    <option value="temp_unsched">{{ t('admin.settings.streamTimeout.actionTempUnsched') }}</option>
+                    <option value="error">{{ t('admin.settings.streamTimeout.actionError') }}</option>
+                    <option value="none">{{ t('admin.settings.streamTimeout.actionNone') }}</option>
+                  </select>
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.actionHint') }}
+                  </p>
+                </div>
+
+                <!-- Temp Unsched Minutes (only show when action is temp_unsched) -->
+                <div v-if="streamTimeoutForm.action === 'temp_unsched'">
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.tempUnschedMinutes') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.temp_unsched_minutes"
+                    type="number"
+                    min="1"
+                    max="60"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.tempUnschedMinutesHint') }}
+                  </p>
+                </div>
+
+                <!-- Threshold Count -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.thresholdCount') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.threshold_count"
+                    type="number"
+                    min="1"
+                    max="10"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.thresholdCountHint') }}
+                  </p>
+                </div>
+
+                <!-- Threshold Window Minutes -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.thresholdWindowMinutes') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.threshold_window_minutes"
+                    type="number"
+                    min="1"
+                    max="60"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.thresholdWindowMinutesHint') }}
+                  </p>
+                </div>
+              </div>
+
+              <!-- Save Button -->
+              <div class="flex justify-end border-t border-gray-100 pt-4 dark:border-dark-700">
+                <button
+                  type="button"
+                  @click="saveStreamTimeoutSettings"
+                  :disabled="streamTimeoutSaving"
+                  class="btn btn-primary btn-sm"
+                >
+                  <svg
+                    v-if="streamTimeoutSaving"
+                    class="mr-1 h-4 w-4 animate-spin"
+                    fill="none"
+                    viewBox="0 0 24 24"
+                  >
+                    <circle
+                      class="opacity-25"
+                      cx="12"
+                      cy="12"
+                      r="10"
+                      stroke="currentColor"
+                      stroke-width="4"
+                    ></circle>
+                    <path
+                      class="opacity-75"
+                      fill="currentColor"
+                      d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+                    ></path>
+                  </svg>
+                  {{ streamTimeoutSaving ? t('common.saving') : t('common.save') }}
+                </button>
+              </div>
+            </template>
+          </div>
+        </div>
+
        <!-- Registration Settings -->
        <div class="card">
          <div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
@@ -840,6 +978,17 @@ const adminApiKeyMasked = ref('')
 const adminApiKeyOperating = ref(false)
 const newAdminApiKey = ref('')

+// Stream Timeout 状态
+const streamTimeoutLoading = ref(true)
+const streamTimeoutSaving = ref(false)
+const streamTimeoutForm = reactive({
+  enabled: true,
+  action: 'temp_unsched' as 'temp_unsched' | 'error' | 'none',
+  temp_unsched_minutes: 5,
+  threshold_count: 3,
+  threshold_window_minutes: 10
+})
+
 type SettingsForm = SystemSettings & {
  smtp_password: string
  turnstile_secret_key: string
@@ -1129,8 +1278,43 @@ function copyNewKey() {
    })
 }

+// Stream Timeout 方法
+async function loadStreamTimeoutSettings() {
+  streamTimeoutLoading.value = true
+  try {
+    const settings = await adminAPI.settings.getStreamTimeoutSettings()
+    Object.assign(streamTimeoutForm, settings)
+  } catch (error: any) {
+    console.error('Failed to load stream timeout settings:', error)
+  } finally {
+    streamTimeoutLoading.value = false
+  }
+}
+
+async function saveStreamTimeoutSettings() {
+  streamTimeoutSaving.value = true
+  try {
+    const updated = await adminAPI.settings.updateStreamTimeoutSettings({
+      enabled: streamTimeoutForm.enabled,
+      action: streamTimeoutForm.action,
+      temp_unsched_minutes: streamTimeoutForm.temp_unsched_minutes,
+      threshold_count: streamTimeoutForm.threshold_count,
+      threshold_window_minutes: streamTimeoutForm.threshold_window_minutes
+    })
+    Object.assign(streamTimeoutForm, updated)
+    appStore.showSuccess(t('admin.settings.streamTimeout.saved'))
+  } catch (error: any) {
+    appStore.showError(
+      t('admin.settings.streamTimeout.saveFailed') + ': ' + (error.message || t('common.unknownError'))
+    )
+  } finally {
+    streamTimeoutSaving.value = false
+  }
+}
+
 onMounted(() => {
  loadSettings()
  loadAdminApiKey()
+  loadStreamTimeoutSettings()
 })
 </script>
--- a/frontend/src/views/admin/ops/OpsDashboard.vue
+++ b/frontend/src/views/admin/ops/OpsDashboard.vue
@@ -1,6 +1,6 @@
 <template>
-  <AppLayout>
-    <div class="space-y-6 pb-12">
+  <component :is="isFullscreen ? 'div' : AppLayout" :class="isFullscreen ? 'flex min-h-screen flex-col justify-center bg-gray-50 dark:bg-dark-950' : ''">
+    <div :class="[isFullscreen ? 'p-4 md:p-6' : '', 'space-y-6 pb-12']">
      <div
        v-if="errorMessage"
        class="rounded-2xl bg-red-50 p-4 text-sm text-red-600 dark:bg-red-900/20 dark:text-red-400"
@@ -13,17 +13,16 @@
      <OpsDashboardHeader
        v-else-if="opsEnabled"
        :overview="overview"
-        :ws-status="wsStatus"
-        :ws-reconnect-in-ms="wsReconnectInMs"
-        :ws-has-data="wsHasData"
-        :real-time-qps="realTimeQPS"
-        :real-time-tps="realTimeTPS"
        :platform="platform"
        :group-id="groupId"
        :time-range="timeRange"
        :query-mode="queryMode"
        :loading="loading"
        :last-updated="lastUpdated"
+        :thresholds="metricThresholds"
+        :auto-refresh-enabled="autoRefreshEnabled"
+        :auto-refresh-countdown="autoRefreshCountdown"
+        :fullscreen="isFullscreen"
        @update:time-range="onTimeRangeChange"
        @update:platform="onPlatformChange"
        @update:group="onGroupChange"
@@ -33,6 +32,8 @@
        @open-error-details="openErrorDetails"
        @open-settings="showSettingsDialog = true"
        @open-alert-rules="showAlertRulesCard = true"
+        @enter-fullscreen="enterFullscreen"
+        @exit-fullscreen="exitFullscreen"
      />

      <!-- Row: Concurrency + Throughput -->
@@ -47,6 +48,7 @@
            :top-groups="throughputTrend?.top_groups ?? []"
            :loading="loadingTrend"
            :time-range="timeRange"
+            :fullscreen="isFullscreen"
            @select-platform="handleThroughputSelectPlatform"
            @select-group="handleThroughputSelectGroup"
            @open-details="handleOpenRequestDetails"
@@ -74,54 +76,54 @@
      <!-- Alert Events -->
      <OpsAlertEventsCard v-if="opsEnabled && !(loading && !hasLoadedOnce)" />

-      <!-- Settings Dialog -->
-      <OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="fetchData" />
+      <!-- Settings Dialog (hidden in fullscreen mode) -->
+      <template v-if="!isFullscreen">
+        <OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="onSettingsSaved" />

-      <!-- Alert Rules Dialog -->
-      <BaseDialog :show="showAlertRulesCard" :title="t('admin.ops.alertRules.title')" width="extra-wide" @close="showAlertRulesCard = false">
-        <OpsAlertRulesCard />
-      </BaseDialog>
+        <BaseDialog :show="showAlertRulesCard" :title="t('admin.ops.alertRules.title')" width="extra-wide" @close="showAlertRulesCard = false">
+          <OpsAlertRulesCard />
+        </BaseDialog>

-      <OpsErrorDetailsModal
-        :show="showErrorDetails"
-        :time-range="timeRange"
-        :platform="platform"
-        :group-id="groupId"
-        :error-type="errorDetailsType"
-        @update:show="showErrorDetails = $event"
-        @openErrorDetail="openError"
-      />
+        <OpsErrorDetailsModal
+          :show="showErrorDetails"
+          :time-range="timeRange"
+          :platform="platform"
+          :group-id="groupId"
+          :error-type="errorDetailsType"
+          @update:show="showErrorDetails = $event"
+          @openErrorDetail="openError"
+        />

-      <OpsErrorDetailModal v-model:show="showErrorModal" :error-id="selectedErrorId" />
+        <OpsErrorDetailModal v-model:show="showErrorModal" :error-id="selectedErrorId" />

-      <OpsRequestDetailsModal
-        v-model="showRequestDetails"
-        :time-range="timeRange"
-        :preset="requestDetailsPreset"
-        :platform="platform"
-        :group-id="groupId"
-        @openErrorDetail="openError"
-      />
+        <OpsRequestDetailsModal
+          v-model="showRequestDetails"
+          :time-range="timeRange"
+          :preset="requestDetailsPreset"
+          :platform="platform"
+          :group-id="groupId"
+          @openErrorDetail="openError"
+        />
+      </template>
    </div>
-  </AppLayout>
+  </component>
 </template>

 <script setup lang="ts">
 import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
-import { useDebounceFn } from '@vueuse/core'
+import { useDebounceFn, useIntervalFn } from '@vueuse/core'
 import { useI18n } from 'vue-i18n'
 import { useRoute, useRouter } from 'vue-router'
 import AppLayout from '@/components/layout/AppLayout.vue'
 import BaseDialog from '@/components/common/BaseDialog.vue'
 import {
  opsAPI,
-  OPS_WS_CLOSE_CODES,
-  type OpsWSStatus,
  type OpsDashboardOverview,
  type OpsErrorDistributionResponse,
  type OpsErrorTrendResponse,
  type OpsLatencyHistogramResponse,
-  type OpsThroughputTrendResponse
+  type OpsThroughputTrendResponse,
+  type OpsMetricThresholds
 } from '@/api/admin/ops'
 import { useAdminSettingsStore, useAppStore } from '@/stores'
 import OpsDashboardHeader from './components/OpsDashboardHeader.vue'
@@ -166,19 +168,35 @@ const QUERY_KEYS = {
  timeRange: 'tr',
  platform: 'platform',
  groupId: 'group_id',
-  queryMode: 'mode'
+  queryMode: 'mode',
+  fullscreen: 'fullscreen'
 } as const

 const isApplyingRouteQuery = ref(false)
 const isSyncingRouteQuery = ref(false)

-// WebSocket for realtime QPS/TPS
-const realTimeQPS = ref(0)
-const realTimeTPS = ref(0)
-const wsStatus = ref<OpsWSStatus>('closed')
-const wsReconnectInMs = ref<number | null>(null)
-const wsHasData = ref(false)
-let unsubscribeQPS: (() => void) | null = null
+// Fullscreen mode
+const isFullscreen = computed(() => {
+  const val = route.query[QUERY_KEYS.fullscreen]
+  return val === '1' || val === 'true'
+})
+
+function exitFullscreen() {
+  const nextQuery = { ...route.query }
+  delete nextQuery[QUERY_KEYS.fullscreen]
+  router.replace({ query: nextQuery })
+}
+
+function enterFullscreen() {
+  const nextQuery = { ...route.query, [QUERY_KEYS.fullscreen]: '1' }
+  router.replace({ query: nextQuery })
+}
+
+function handleKeydown(e: KeyboardEvent) {
+  if (e.key === 'Escape' && isFullscreen.value) {
+    exitFullscreen()
+  }
+}

 let dashboardFetchController: AbortController | null = null
 let dashboardFetchSeq = 0
@@ -199,50 +217,6 @@ function abortDashboardFetch() {
  }
 }

-function stopQPSSubscription(options?: { resetMetrics?: boolean }) {
-  wsStatus.value = 'closed'
-  wsReconnectInMs.value = null
-  if (unsubscribeQPS) unsubscribeQPS()
-  unsubscribeQPS = null
-
-  if (options?.resetMetrics) {
-    realTimeQPS.value = 0
-    realTimeTPS.value = 0
-    wsHasData.value = false
-  }
-}
-
-function startQPSSubscription() {
-  stopQPSSubscription()
-  unsubscribeQPS = opsAPI.subscribeQPS(
-    (payload) => {
-      if (payload && typeof payload === 'object' && payload.type === 'qps_update' && payload.data) {
-        realTimeQPS.value = payload.data.qps || 0
-        realTimeTPS.value = payload.data.tps || 0
-        wsHasData.value = true
-      }
-    },
-    {
-      onStatusChange: (status) => {
-        wsStatus.value = status
-        if (status === 'connected') wsReconnectInMs.value = null
-      },
-      onReconnectScheduled: ({ delayMs }) => {
-        wsReconnectInMs.value = delayMs
-      },
-      onFatalClose: (event) => {
-        // Server-side feature flag says realtime is disabled; keep UI consistent and avoid reconnect loops.
-        if (event && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) {
-          adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
-          stopQPSSubscription({ resetMetrics: true })
-        }
-      },
-      // QPS updates may be sparse in idle periods; keep the timeout conservative.
-      staleTimeoutMs: 180_000
-    }
-  )
-}
-
 const readQueryString = (key: string): string => {
  const value = route.query[key]
  if (typeof value === 'string') return value
@@ -314,6 +288,7 @@ const syncQueryToRoute = useDebounceFn(async () => {
 }, 250)

 const overview = ref<OpsDashboardOverview | null>(null)
+const metricThresholds = ref<OpsMetricThresholds | null>(null)

 const throughputTrend = ref<OpsThroughputTrendResponse | null>(null)
 const loadingTrend = ref(false)
@@ -343,6 +318,45 @@ const requestDetailsPreset = ref<OpsRequestDetailsPreset>({
 const showSettingsDialog = ref(false)
 const showAlertRulesCard = ref(false)

+// Auto refresh settings
+const autoRefreshEnabled = ref(false)
+const autoRefreshIntervalMs = ref(30000) // default 30 seconds
+const autoRefreshCountdown = ref(0)
+
+// Auto refresh timer
+const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
+  () => {
+    if (autoRefreshEnabled.value && opsEnabled.value && !loading.value) {
+      fetchData()
+    }
+  },
+  autoRefreshIntervalMs,
+  { immediate: false }
+)
+
+// Countdown timer (updates every second)
+const { pause: pauseCountdown, resume: resumeCountdown } = useIntervalFn(
+  () => {
+    if (autoRefreshEnabled.value && autoRefreshCountdown.value > 0) {
+      autoRefreshCountdown.value--
+    }
+  },
+  1000,
+  { immediate: false }
+)
+
+// Load auto refresh settings from backend
+async function loadAutoRefreshSettings() {
+  try {
+    const settings = await opsAPI.getAdvancedSettings()
+    autoRefreshEnabled.value = settings.auto_refresh_enabled
+    autoRefreshIntervalMs.value = settings.auto_refresh_interval_seconds * 1000
+    autoRefreshCountdown.value = settings.auto_refresh_interval_seconds
+  } catch (err) {
+    console.error('[OpsDashboard] Failed to load auto refresh settings', err)
+  }
+}
+
 function handleThroughputSelectPlatform(nextPlatform: string) {
  platform.value = nextPlatform || ''
  groupId.value = null
@@ -376,6 +390,11 @@ function onTimeRangeChange(v: string | number | boolean | null) {
  timeRange.value = v as TimeRange
 }

+function onSettingsSaved() {
+  loadThresholds()
+  fetchData()
+}
+
 function onPlatformChange(v: string | number | boolean | null) {
  platform.value = typeof v === 'string' ? v : ''
 }
@@ -561,6 +580,10 @@ async function fetchData() {
    ])
    if (fetchSeq !== dashboardFetchSeq) return
    lastUpdated.value = new Date()
+    // Reset auto refresh countdown after successful fetch
+    if (autoRefreshEnabled.value) {
+      autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
+    }
  } catch (err) {
    if (!isOpsDisabledError(err)) {
      console.error('[ops] failed to fetch dashboard data', err)
@@ -609,37 +632,66 @@ watch(
 )

 onMounted(async () => {
+  // Fullscreen mode: listen for ESC key
+  window.addEventListener('keydown', handleKeydown)
+
  await adminSettingsStore.fetch()
  if (!adminSettingsStore.opsMonitoringEnabled) {
    await router.replace('/admin/settings')
    return
  }

-  if (adminSettingsStore.opsRealtimeMonitoringEnabled) {
-    startQPSSubscription()
-  } else {
-    stopQPSSubscription({ resetMetrics: true })
-  }
+  // Load thresholds configuration
+  loadThresholds()
+
+  // Load auto refresh settings
+  await loadAutoRefreshSettings()

  if (opsEnabled.value) {
    await fetchData()
  }
+
+  // Start auto refresh if enabled
+  if (autoRefreshEnabled.value) {
+    resumeAutoRefresh()
+    resumeCountdown()
+  }
 })

+async function loadThresholds() {
+  try {
+    const settings = await opsAPI.getAlertRuntimeSettings()
+    metricThresholds.value = settings.thresholds || null
+  } catch (err) {
+    console.warn('[OpsDashboard] Failed to load thresholds', err)
+    metricThresholds.value = null
+  }
+}
+
 onUnmounted(() => {
-  stopQPSSubscription()
+  window.removeEventListener('keydown', handleKeydown)
  abortDashboardFetch()
+  pauseAutoRefresh()
+  pauseCountdown()
 })

-watch(
-  () => adminSettingsStore.opsRealtimeMonitoringEnabled,
-  (enabled) => {
-    if (!opsEnabled.value) return
-    if (enabled) {
-      startQPSSubscription()
-    } else {
-      stopQPSSubscription({ resetMetrics: true })
-    }
+// Watch auto refresh settings changes
+watch(autoRefreshEnabled, (enabled) => {
+  if (enabled) {
+    autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
+    resumeAutoRefresh()
+    resumeCountdown()
+  } else {
+    pauseAutoRefresh()
+    pauseCountdown()
+    autoRefreshCountdown.value = 0
  }
-)
+})
+
+// Reload auto refresh settings after settings dialog is closed
+watch(showSettingsDialog, async (show) => {
+  if (!show) {
+    await loadAutoRefreshSettings()
+  }
+})
 </script>
--- a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
+++ b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
@@ -1,29 +1,31 @@
 <script setup lang="ts">
-import { computed, onMounted, ref, watch } from 'vue'
+import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
+import { useIntervalFn } from '@vueuse/core'
 import { useI18n } from 'vue-i18n'
 import Select from '@/components/common/Select.vue'
 import HelpTooltip from '@/components/common/HelpTooltip.vue'
 import BaseDialog from '@/components/common/BaseDialog.vue'
+import Icon from '@/components/icons/Icon.vue'
 import { adminAPI } from '@/api'
-import type { OpsDashboardOverview, OpsWSStatus } from '@/api/admin/ops'
+import { opsAPI, type OpsDashboardOverview, type OpsMetricThresholds, type OpsRealtimeTrafficSummary } from '@/api/admin/ops'
 import type { OpsRequestDetailsPreset } from './OpsRequestDetailsModal.vue'
+import { useAdminSettingsStore } from '@/stores'
 import { formatNumber } from '@/utils/format'

 type RealtimeWindow = '1min' | '5min' | '30min' | '1h'

 interface Props {
  overview?: OpsDashboardOverview | null
-  wsStatus: OpsWSStatus
-  wsReconnectInMs?: number | null
-  wsHasData?: boolean
-  realTimeQps: number
-  realTimeTps: number
  platform: string
  groupId: number | null
  timeRange: string
  queryMode: string
  loading: boolean
  lastUpdated: Date | null
+  thresholds?: OpsMetricThresholds | null // 阈值配置
+  autoRefreshEnabled?: boolean
+  autoRefreshCountdown?: number
+  fullscreen?: boolean
 }

 interface Emits {
@@ -36,18 +38,51 @@ interface Emits {
  (e: 'openErrorDetails', kind: 'request' | 'upstream'): void
  (e: 'openSettings'): void
  (e: 'openAlertRules'): void
+  (e: 'enterFullscreen'): void
+  (e: 'exitFullscreen'): void
 }

 const props = defineProps<Props>()
 const emit = defineEmits<Emits>()

 const { t } = useI18n()
+const adminSettingsStore = useAdminSettingsStore()

 const realtimeWindow = ref<RealtimeWindow>('1min')

 const overview = computed(() => props.overview ?? null)
 const systemMetrics = computed(() => overview.value?.system_metrics ?? null)

+const REALTIME_WINDOW_MINUTES: Record<RealtimeWindow, number> = {
+  '1min': 1,
+  '5min': 5,
+  '30min': 30,
+  '1h': 60
+}
+
+const TOOLBAR_RANGE_MINUTES: Record<string, number> = {
+  '5m': 5,
+  '30m': 30,
+  '1h': 60,
+  '6h': 6 * 60,
+  '24h': 24 * 60
+}
+
+const availableRealtimeWindows = computed(() => {
+  const toolbarMinutes = TOOLBAR_RANGE_MINUTES[props.timeRange] ?? 60
+  return (['1min', '5min', '30min', '1h'] as const).filter((w) => REALTIME_WINDOW_MINUTES[w] <= toolbarMinutes)
+})
+
+watch(
+  () => props.timeRange,
+  () => {
+    // The realtime window must be inside the toolbar window; reset to keep UX predictable.
+    realtimeWindow.value = '1min'
+    // Keep realtime traffic consistent with toolbar changes even when the window is already 1min.
+    loadRealtimeTrafficSummary()
+  }
+)
+
 // --- Filters ---

 const groups = ref<Array<{ id: number; name: string; platform: string }>>([])
@@ -143,56 +178,143 @@ function getLatencyColor(ms: number | null | undefined): string {
  return 'text-red-600 dark:text-red-400'
 }

+// --- Threshold checking helpers ---
+function isSLABelowThreshold(slaPercent: number | null): boolean {
+  if (slaPercent == null) return false
+  const threshold = props.thresholds?.sla_percent_min
+  if (threshold == null) return false
+  return slaPercent < threshold
+}
+
+function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean {
+  if (latencyP99Ms == null) return false
+  const threshold = props.thresholds?.latency_p99_ms_max
+  if (threshold == null) return false
+  return latencyP99Ms > threshold
+}
+
+function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
+  if (ttftP99Ms == null) return false
+  const threshold = props.thresholds?.ttft_p99_ms_max
+  if (threshold == null) return false
+  return ttftP99Ms > threshold
+}
+
+function isRequestErrorRateAboveThreshold(errorRatePercent: number | null): boolean {
+  if (errorRatePercent == null) return false
+  const threshold = props.thresholds?.request_error_rate_percent_max
+  if (threshold == null) return false
+  return errorRatePercent > threshold
+}
+
+function isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent: number | null): boolean {
+  if (upstreamErrorRatePercent == null) return false
+  const threshold = props.thresholds?.upstream_error_rate_percent_max
+  if (threshold == null) return false
+  return upstreamErrorRatePercent > threshold
+}
+
 // --- Realtime / Overview labels ---

 const totalRequestsLabel = computed(() => formatNumber(overview.value?.request_count_total ?? 0))
 const totalTokensLabel = computed(() => formatNumber(overview.value?.token_consumed ?? 0))

+const realtimeTrafficSummary = ref<OpsRealtimeTrafficSummary | null>(null)
+const realtimeTrafficLoading = ref(false)
+
+function makeZeroRealtimeTrafficSummary(): OpsRealtimeTrafficSummary {
+  const now = new Date().toISOString()
+  return {
+    window: realtimeWindow.value,
+    start_time: now,
+    end_time: now,
+    platform: props.platform,
+    group_id: props.groupId,
+    qps: { current: 0, peak: 0, avg: 0 },
+    tps: { current: 0, peak: 0, avg: 0 }
+  }
+}
+
+async function loadRealtimeTrafficSummary() {
+  if (realtimeTrafficLoading.value) return
+  if (!adminSettingsStore.opsRealtimeMonitoringEnabled) {
+    realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
+    return
+  }
+  realtimeTrafficLoading.value = true
+  try {
+    const res = await opsAPI.getRealtimeTrafficSummary(realtimeWindow.value, props.platform, props.groupId)
+    if (res && res.enabled === false) {
+      adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
+    }
+    realtimeTrafficSummary.value = res?.summary ?? null
+  } catch (err) {
+    console.error('[OpsDashboardHeader] Failed to load realtime traffic summary', err)
+    realtimeTrafficSummary.value = null
+  } finally {
+    realtimeTrafficLoading.value = false
+  }
+}
+
+watch(
+  () => [realtimeWindow.value, props.platform, props.groupId] as const,
+  () => {
+    loadRealtimeTrafficSummary()
+  },
+  { immediate: true }
+)
+
+const { pause: pauseRealtimeTrafficRefresh, resume: resumeRealtimeTrafficRefresh } = useIntervalFn(
+  () => {
+    loadRealtimeTrafficSummary()
+  },
+  5000,
+  { immediate: false }
+)
+
+watch(
+  () => adminSettingsStore.opsRealtimeMonitoringEnabled,
+  (enabled) => {
+    if (enabled) {
+      resumeRealtimeTrafficRefresh()
+    } else {
+      pauseRealtimeTrafficRefresh()
+      // Keep UI stable when realtime monitoring is turned off.
+      realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
+    }
+  },
+  { immediate: true }
+)
+
+onUnmounted(() => {
+  pauseRealtimeTrafficRefresh()
+})
+
 const displayRealTimeQps = computed(() => {
-  const ov = overview.value
-  if (!ov) return 0
-  const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
-  const v = useRealtime ? props.realTimeQps : ov.qps?.current
+  const v = realtimeTrafficSummary.value?.qps?.current
  return typeof v === 'number' && Number.isFinite(v) ? v : 0
 })

 const displayRealTimeTps = computed(() => {
-  const ov = overview.value
-  if (!ov) return 0
-  const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
-  const v = useRealtime ? props.realTimeTps : ov.tps?.current
+  const v = realtimeTrafficSummary.value?.tps?.current
  return typeof v === 'number' && Number.isFinite(v) ? v : 0
 })

-// Sparkline history (keep last 60 data points)
-const qpsHistory = ref<number[]>([])
-const tpsHistory = ref<number[]>([])
-const MAX_HISTORY_POINTS = 60
-
-watch([displayRealTimeQps, displayRealTimeTps], ([newQps, newTps]) => {
-  // Add new data points
-  qpsHistory.value.push(newQps)
-  tpsHistory.value.push(newTps)
-
-  // Keep only last N points
-  if (qpsHistory.value.length > MAX_HISTORY_POINTS) {
-    qpsHistory.value.shift()
-  }
-  if (tpsHistory.value.length > MAX_HISTORY_POINTS) {
-    tpsHistory.value.shift()
-  }
+const realtimeQpsPeakLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.qps?.peak
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })
-
-const qpsPeakLabel = computed(() => {
-  const v = overview.value?.qps?.peak
-  if (typeof v !== 'number') return '-'
-  return v.toFixed(1)
+const realtimeTpsPeakLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.tps?.peak
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })
-
-const tpsPeakLabel = computed(() => {
-  const v = overview.value?.tps?.peak
-  if (typeof v !== 'number') return '-'
-  return v.toFixed(1)
+const realtimeQpsAvgLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.qps?.avg
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
+})
+const realtimeTpsAvgLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.tps?.avg
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })

 const qpsAvgLabel = computed(() => {
@@ -244,7 +366,7 @@ const ttftMaxMs = computed(() => overview.value?.ttft?.max_ms ?? null)
 const isSystemIdle = computed(() => {
  const ov = overview.value
  if (!ov) return true
-  const qps = props.wsStatus === 'connected' && props.wsHasData ? props.realTimeQps : ov.qps?.current
+  const qps = ov.qps?.current
  const errorRate = ov.error_rate ?? 0
  return (qps ?? 0) === 0 && errorRate === 0
 })
@@ -272,15 +394,15 @@ const healthScoreClass = computed(() => {
  return 'text-red-500'
 })

-const circleSize = 100
-const strokeWidth = 8
-const radius = (circleSize - strokeWidth) / 2
-const circumference = 2 * Math.PI * radius
+const circleSize = computed(() => props.fullscreen ? 140 : 100)
+const strokeWidth = computed(() => props.fullscreen ? 10 : 8)
+const radius = computed(() => (circleSize.value - strokeWidth.value) / 2)
+const circumference = computed(() => 2 * Math.PI * radius.value)
 const dashOffset = computed(() => {
  if (isSystemIdle.value) return 0
  if (healthScoreValue.value == null) return 0
  const score = Math.max(0, Math.min(100, healthScoreValue.value))
-  return circumference - (score / 100) * circumference
+  return circumference.value - (score / 100) * circumference.value
 })

 interface DiagnosisItem {
@@ -687,10 +809,15 @@ const showJobsDetails = ref(false)
 function openJobsDetails() {
  showJobsDetails.value = true
 }
+
+function handleToolbarRefresh() {
+  loadRealtimeTrafficSummary()
+  emit('refresh')
+}
 </script>

 <template>
-  <div class="flex flex-col gap-4 rounded-3xl bg-white p-6 shadow-sm ring-1 ring-gray-900/5 dark:bg-dark-800 dark:ring-dark-700">
+  <div :class="['flex flex-col gap-4 rounded-3xl bg-white shadow-sm ring-1 ring-gray-900/5 dark:bg-dark-800 dark:ring-dark-700', props.fullscreen ? 'p-8' : 'p-6']">
    <!-- Top Toolbar -->
    <div class="flex flex-wrap items-center justify-between gap-4 border-b border-gray-100 pb-4 dark:border-dark-700">
      <div>
@@ -706,7 +833,7 @@ function openJobsDetails() {
          {{ t('admin.ops.title') }}
        </h1>

-        <div class="mt-1 flex items-center gap-3 text-xs text-gray-500 dark:text-gray-400">
+        <div v-if="!props.fullscreen" class="mt-1 flex items-center gap-3 text-xs text-gray-500 dark:text-gray-400">
          <span class="flex items-center gap-1.5" :title="props.loading ? t('admin.ops.loadingText') : t('admin.ops.ready')">
            <span class="relative flex h-2 w-2">
              <span class="relative inline-flex h-2 w-2 rounded-full" :class="props.loading ? 'bg-gray-400' : 'bg-green-500'"></span>
@@ -717,6 +844,17 @@ function openJobsDetails() {
          <span>·</span>
          <span>{{ t('common.refresh') }}: {{ updatedAtLabel }}</span>

+          <template v-if="props.autoRefreshEnabled && props.autoRefreshCountdown !== undefined">
+            <span>·</span>
+            <span class="flex items-center gap-1">
+              <svg class="h-3 w-3 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
+                <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+              </svg>
+              <span>自动刷新: {{ props.autoRefreshCountdown }}s</span>
+            </span>
+          </template>
+
          <template v-if="systemMetrics">
            <span>·</span>
            <span>
@@ -728,28 +866,30 @@ function openJobsDetails() {
      </div>

      <div class="flex flex-wrap items-center gap-3">
-        <Select
-          :model-value="platform"
-          :options="platformOptions"
-          class="w-full sm:w-[140px]"
-          @update:model-value="handlePlatformChange"
-        />
+        <template v-if="!props.fullscreen">
+          <Select
+            :model-value="platform"
+            :options="platformOptions"
+            class="w-full sm:w-[140px]"
+            @update:model-value="handlePlatformChange"
+          />

-        <Select
-          :model-value="groupId"
-          :options="groupOptions"
-          class="w-full sm:w-[160px]"
-          @update:model-value="handleGroupChange"
-        />
+          <Select
+            :model-value="groupId"
+            :options="groupOptions"
+            class="w-full sm:w-[160px]"
+            @update:model-value="handleGroupChange"
+          />

-        <div class="mx-1 hidden h-4 w-[1px] bg-gray-200 dark:bg-dark-700 sm:block"></div>
+          <div class="mx-1 hidden h-4 w-[1px] bg-gray-200 dark:bg-dark-700 sm:block"></div>

-        <Select
-          :model-value="timeRange"
-          :options="timeRangeOptions"
-          class="relative w-full sm:w-[150px]"
-          @update:model-value="handleTimeRangeChange"
-        />
+          <Select
+            :model-value="timeRange"
+            :options="timeRangeOptions"
+            class="relative w-full sm:w-[150px]"
+            @update:model-value="handleTimeRangeChange"
+          />
+        </template>

        <Select
          v-if="false"
@@ -760,11 +900,12 @@ function openJobsDetails() {
        />

        <button
+          v-if="!props.fullscreen"
          type="button"
          class="flex h-8 w-8 items-center justify-center rounded-lg bg-gray-100 text-gray-500 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-400 dark:hover:bg-dark-600"
          :disabled="loading"
          :title="t('common.refresh')"
-          @click="emit('refresh')"
+          @click="handleToolbarRefresh"
        >
          <svg class="h-4 w-4" :class="{ 'animate-spin': loading }" fill="none" viewBox="0 0 24 24" stroke="currentColor">
            <path
@@ -776,9 +917,11 @@ function openJobsDetails() {
          </svg>
        </button>

-        <div class="mx-1 hidden h-4 w-[1px] bg-gray-200 dark:bg-dark-700 sm:block"></div>
+        <div v-if="!props.fullscreen" class="mx-1 hidden h-4 w-[1px] bg-gray-200 dark:bg-dark-700 sm:block"></div>

+        <!-- Alert Rules Button (hidden in fullscreen) -->
        <button
+          v-if="!props.fullscreen"
          type="button"
          class="flex h-8 items-center gap-1.5 rounded-lg bg-blue-100 px-3 text-xs font-bold text-blue-700 transition-colors hover:bg-blue-200 dark:bg-blue-900/30 dark:text-blue-400 dark:hover:bg-blue-900/50"
          :title="t('admin.ops.alertRules.title')"
@@ -790,7 +933,9 @@ function openJobsDetails() {
          <span class="hidden sm:inline">{{ t('admin.ops.alertRules.manage') }}</span>
        </button>

+        <!-- Settings Button (hidden in fullscreen) -->
        <button
+          v-if="!props.fullscreen"
          type="button"
          class="flex h-8 items-center gap-1.5 rounded-lg bg-gray-100 px-3 text-xs font-bold text-gray-700 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-300 dark:hover:bg-dark-600"
          :title="t('admin.ops.settings.title')"
@@ -802,13 +947,26 @@ function openJobsDetails() {
          </svg>
          <span class="hidden sm:inline">{{ t('common.settings') }}</span>
        </button>
+
+        <!-- Enter Fullscreen Button (hidden in fullscreen mode) -->
+        <button
+          v-if="!props.fullscreen"
+          type="button"
+          class="flex h-8 w-8 items-center justify-center rounded-lg bg-gray-100 text-gray-700 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-300 dark:hover:bg-dark-600"
+          :title="t('admin.ops.fullscreen.enter')"
+          @click="emit('enterFullscreen')"
+        >
+          <svg class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 8V4m0 0h4M4 4l5 5m11-1V4m0 0h-4m4 0l-5 5M4 16v4m0 0h4m-4 0l5-5m11 5l-5-5m5 5v-4m0 4h-4" />
+          </svg>
+        </button>
      </div>
    </div>

    <div v-if="overview" class="grid grid-cols-1 gap-6 lg:grid-cols-12">
      <!-- Left: Health + Realtime -->
-      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900 lg:col-span-5">
-        <div class="grid grid-cols-1 gap-6 md:grid-cols-[200px_1fr] md:items-center">
+      <div :class="['rounded-2xl bg-gray-50 dark:bg-dark-900 lg:col-span-5', props.fullscreen ? 'p-6' : 'p-4']">
+        <div class="grid h-full grid-cols-1 gap-6 md:grid-cols-[200px_1fr] md:items-center">
          <!-- 1) Health Score -->
          <div
            class="group relative flex cursor-pointer flex-col items-center justify-center rounded-xl py-2 transition-all hover:bg-white/60 dark:hover:bg-dark-800/60 md:border-r md:border-gray-200 md:pr-6 dark:md:border-dark-700"
@@ -818,8 +976,9 @@ function openJobsDetails() {
              class="pointer-events-none absolute left-1/2 top-full z-50 mt-2 w-72 -translate-x-1/2 opacity-0 transition-opacity duration-200 group-hover:pointer-events-auto group-hover:opacity-100 md:left-full md:top-0 md:ml-2 md:mt-0 md:translate-x-0"
            >
              <div class="rounded-xl bg-white p-4 shadow-xl ring-1 ring-black/5 dark:bg-gray-800 dark:ring-white/10">
-                <h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white">
-                  🧠 {{ t('admin.ops.diagnosis.title') }}
+                <h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white flex items-center gap-2">
+                  <Icon name="brain" size="sm" class="text-blue-500" />
+                  {{ t('admin.ops.diagnosis.title') }}
                </h4>

                <div class="space-y-3">
@@ -850,8 +1009,9 @@ function openJobsDetails() {
                    <div class="flex-1">
                      <div class="text-xs font-semibold text-gray-900 dark:text-white">{{ item.message }}</div>
                      <div class="mt-0.5 text-[11px] text-gray-500 dark:text-gray-400">{{ item.impact }}</div>
-                      <div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400">
-                        💡 {{ item.action }}
+                      <div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400 flex items-center gap-1">
+                        <Icon name="lightbulb" size="xs" />
+                        {{ item.action }}
                      </div>
                    </div>
                  </div>
@@ -889,14 +1049,14 @@ function openJobsDetails() {
              </svg>

              <div class="absolute flex flex-col items-center">
-                <span class="text-3xl font-black" :class="healthScoreClass">
+                <span :class="[props.fullscreen ? 'text-5xl' : 'text-3xl', 'font-black', healthScoreClass]">
                  {{ isSystemIdle ? t('admin.ops.idleStatus') : (overview.health_score ?? '--') }}
                </span>
-                <span class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.health') }}</span>
+                <span :class="[props.fullscreen ? 'text-xs' : 'text-[10px]', 'font-bold uppercase tracking-wider text-gray-400']">{{ t('admin.ops.health') }}</span>
              </div>
            </div>

-            <div class="mt-4 text-center">
+            <div class="mt-4 text-center" v-if="!props.fullscreen">
              <div class="flex items-center justify-center gap-1 text-xs font-medium text-gray-500">
                {{ t('admin.ops.healthCondition') }}
                <HelpTooltip :content="t('admin.ops.healthHelp')" />
@@ -914,7 +1074,7 @@ function openJobsDetails() {
          </div>

          <!-- 2) Realtime Traffic -->
-          <div class="flex flex-col justify-center py-2">
+          <div class="flex h-full flex-col justify-center py-2">
            <div class="mb-3 flex flex-wrap items-center justify-between gap-2">
              <div class="flex items-center gap-2">
                <div class="relative flex h-3 w-3 shrink-0">
@@ -922,13 +1082,13 @@ function openJobsDetails() {
                  <span class="relative inline-flex h-3 w-3 rounded-full bg-blue-500"></span>
                </div>
                <h3 class="text-xs font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.realtime.title') }}</h3>
-                <HelpTooltip :content="t('admin.ops.tooltips.qps')" />
+                <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.qps')" />
              </div>

              <!-- Time Window Selector -->
              <div class="flex flex-wrap gap-1">
                <button
-                  v-for="window in (['1min', '5min', '30min', '1h'] as RealtimeWindow[])"
+                  v-for="window in availableRealtimeWindows"
                  :key="window"
                  type="button"
                  class="rounded px-1.5 py-0.5 text-[9px] font-bold transition-colors sm:px-2 sm:text-[10px]"
@@ -942,18 +1102,18 @@ function openJobsDetails() {
              </div>
            </div>

-            <div class="space-y-3">
+            <div :class="props.fullscreen ? 'space-y-4' : 'space-y-3'">
              <!-- Row 1: Current -->
              <div>
-                <div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.current') }}</div>
+                <div :class="[props.fullscreen ? 'text-xs' : 'text-[10px]', 'font-bold uppercase text-gray-400']">{{ t('admin.ops.current') }}</div>
                <div class="mt-1 flex flex-wrap items-baseline gap-x-4 gap-y-2">
                  <div class="flex items-baseline gap-1.5">
-                    <span class="text-xl font-black text-gray-900 dark:text-white sm:text-2xl">{{ displayRealTimeQps.toFixed(1) }}</span>
-                    <span class="text-xs font-bold text-gray-500">QPS</span>
+                    <span :class="[props.fullscreen ? 'text-4xl' : 'text-xl sm:text-2xl', 'font-black text-gray-900 dark:text-white']">{{ displayRealTimeQps.toFixed(1) }}</span>
+                    <span :class="[props.fullscreen ? 'text-sm' : 'text-xs', 'font-bold text-gray-500']">QPS</span>
                  </div>
                  <div class="flex items-baseline gap-1.5">
-                    <span class="text-xl font-black text-gray-900 dark:text-white sm:text-2xl">{{ displayRealTimeTps.toFixed(1) }}</span>
-                    <span class="text-xs font-bold text-gray-500">TPS</span>
+                    <span :class="[props.fullscreen ? 'text-4xl' : 'text-xl sm:text-2xl', 'font-black text-gray-900 dark:text-white']">{{ displayRealTimeTps.toFixed(1) }}</span>
+                    <span :class="[props.fullscreen ? 'text-sm' : 'text-xs', 'font-bold text-gray-500']">TPS</span>
                  </div>
                </div>
              </div>
@@ -962,14 +1122,14 @@ function openJobsDetails() {
              <div class="grid grid-cols-2 gap-3">
                <!-- Peak -->
                <div>
-                  <div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.peak') }}</div>
-                  <div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
+                  <div :class="[props.fullscreen ? 'text-xs' : 'text-[10px]', 'font-bold uppercase text-gray-400']">{{ t('admin.ops.peak') }}</div>
+                  <div :class="[props.fullscreen ? 'text-base' : 'text-sm', 'mt-1 space-y-0.5 font-medium text-gray-600 dark:text-gray-400']">
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ qpsPeakLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsPeakLabel }}</span>
                      <span class="text-xs">QPS</span>
                    </div>
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ tpsPeakLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsPeakLabel }}</span>
                      <span class="text-xs">TPS</span>
                    </div>
                  </div>
@@ -977,14 +1137,14 @@ function openJobsDetails() {

                <!-- Average -->
                <div>
-                  <div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.average') }}</div>
-                  <div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
+                  <div :class="[props.fullscreen ? 'text-xs' : 'text-[10px]', 'font-bold uppercase text-gray-400']">{{ t('admin.ops.average') }}</div>
+                  <div :class="[props.fullscreen ? 'text-base' : 'text-sm', 'mt-1 space-y-0.5 font-medium text-gray-600 dark:text-gray-400']">
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ qpsAvgLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsAvgLabel }}</span>
                      <span class="text-xs">QPS</span>
                    </div>
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ tpsAvgLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsAvgLabel }}</span>
                      <span class="text-xs">TPS</span>
                    </div>
                  </div>
@@ -1019,15 +1179,16 @@ function openJobsDetails() {
      </div>

      <!-- Right: 6 cards (3 cols x 2 rows) -->
-      <div class="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:col-span-7 lg:grid-cols-3">
+      <div class="grid h-full grid-cols-1 content-center gap-4 sm:grid-cols-2 lg:col-span-7 lg:grid-cols-3">
        <!-- Card 1: Requests -->
        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
-              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requests') }}</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.totalRequests')" />
+              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestsTitle') }}</span>
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.totalRequests')" />
            </div>
            <button
+              v-if="!props.fullscreen"
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
              @click="openDetails({ title: t('admin.ops.requestDetails.title') })"
@@ -1060,22 +1221,23 @@ function openJobsDetails() {
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-2">
              <span class="text-[10px] font-bold uppercase text-gray-400">SLA</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.sla')" />
-              <span class="h-1.5 w-1.5 rounded-full" :class="(slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.sla')" />
+              <span class="h-1.5 w-1.5 rounded-full" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : (slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
            </div>
            <button
+              v-if="!props.fullscreen"
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
-              @click="openDetails({ title: t('admin.ops.requestDetails.title') })"
+              @click="openDetails({ title: t('admin.ops.requestDetails.title'), kind: 'error' })"
            >
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black text-gray-900 dark:text-white">
+          <div class="mt-2 text-3xl font-black" :class="isSLABelowThreshold(slaPercent) ? 'text-red-600 dark:text-red-400' : 'text-gray-900 dark:text-white'">
            {{ slaPercent == null ? '-' : `${slaPercent.toFixed(3)}%` }}
          </div>
          <div class="mt-3 h-2 w-full overflow-hidden rounded-full bg-gray-200 dark:bg-dark-700">
-            <div class="h-full bg-green-500 transition-all" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
+            <div class="h-full transition-all" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : 'bg-green-500'" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
          </div>
          <div class="mt-3 text-xs">
            <div class="flex justify-between">
@@ -1090,9 +1252,10 @@ function openJobsDetails() {
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.latencyDuration') }}</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.latency')" />
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.latency')" />
            </div>
            <button
+              v-if="!props.fullscreen"
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
              @click="openDetails({ title: t('admin.ops.latencyDuration'), sort: 'duration_desc', min_duration_ms: Math.max(Number(durationP99Ms ?? 0), 0) })"
@@ -1101,7 +1264,7 @@ function openJobsDetails() {
            </button>
          </div>
          <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="getLatencyColor(durationP99Ms)">
+            <div class="text-3xl font-black" :class="isLatencyAboveThreshold(durationP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(durationP99Ms)">
              {{ durationP99Ms ?? '-' }}
            </div>
            <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1140,18 +1303,19 @@ function openJobsDetails() {
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
              <span class="text-[10px] font-bold uppercase text-gray-400">TTFT</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.ttft')" />
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.ttft')" />
            </div>
            <button
+              v-if="!props.fullscreen"
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
-              @click="openDetails({ title: 'TTFT' })"
+              @click="openDetails({ title: 'TTFT', sort: 'duration_desc' })"
            >
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
          <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="getLatencyColor(ttftP99Ms)">
+            <div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(ttftP99Ms)">
              {{ ttftP99Ms ?? '-' }}
            </div>
            <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1190,13 +1354,13 @@ function openJobsDetails() {
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestErrors') }}</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.errors')" />
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.errors')" />
            </div>
-            <button class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openErrorDetails('request')">
+            <button v-if="!props.fullscreen" class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openErrorDetails('request')">
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black" :class="(errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
+          <div class="mt-2 text-3xl font-black" :class="isRequestErrorRateAboveThreshold(errorRatePercent) ? 'text-red-600 dark:text-red-400' : (errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
            {{ errorRatePercent == null ? '-' : `${errorRatePercent.toFixed(2)}%` }}
          </div>
          <div class="mt-3 space-y-1 text-xs">
@@ -1216,13 +1380,13 @@ function openJobsDetails() {
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.upstreamErrors') }}</span>
-              <HelpTooltip :content="t('admin.ops.tooltips.upstreamErrors')" />
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.upstreamErrors')" />
            </div>
-            <button class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openErrorDetails('upstream')">
+            <button v-if="!props.fullscreen" class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openErrorDetails('upstream')">
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black" :class="(upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
+          <div class="mt-2 text-3xl font-black" :class="isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent) ? 'text-red-600 dark:text-red-400' : (upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
            {{ upstreamErrorRatePercent == null ? '-' : `${upstreamErrorRatePercent.toFixed(2)}%` }}
          </div>
          <div class="mt-3 space-y-1 text-xs">
@@ -1246,12 +1410,12 @@ function openJobsDetails() {
        <div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
          <div class="flex items-center gap-1">
            <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">CPU</div>
-            <HelpTooltip :content="t('admin.ops.tooltips.cpu')" />
+            <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.cpu')" />
          </div>
          <div class="mt-1 text-lg font-black" :class="cpuPercentClass">
            {{ cpuPercentValue == null ? '-' : `${cpuPercentValue.toFixed(1)}%` }}
          </div>
-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{ t('common.warning') }} 80% · {{ t('common.critical') }} 95%
          </div>
        </div>
@@ -1260,12 +1424,12 @@ function openJobsDetails() {
        <div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
          <div class="flex items-center gap-1">
            <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">MEM</div>
-            <HelpTooltip :content="t('admin.ops.tooltips.memory')" />
+            <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.memory')" />
          </div>
          <div class="mt-1 text-lg font-black" :class="memPercentClass">
            {{ memPercentValue == null ? '-' : `${memPercentValue.toFixed(1)}%` }}
          </div>
-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{
              systemMetrics?.memory_used_mb == null || systemMetrics?.memory_total_mb == null
                ? '-'
@@ -1278,12 +1442,12 @@ function openJobsDetails() {
        <div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
          <div class="flex items-center gap-1">
            <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">DB</div>
-            <HelpTooltip :content="t('admin.ops.tooltips.db')" />
+            <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.db')" />
          </div>
          <div class="mt-1 text-lg font-black" :class="dbMiddleClass">
            {{ dbMiddleLabel }}
          </div>
-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{ t('admin.ops.conns') }} {{ dbConnOpenValue ?? '-' }} / {{ dbMaxOpenConnsValue ?? '-' }}
            · {{ t('admin.ops.active') }} {{ dbConnActiveValue ?? '-' }}
            · {{ t('admin.ops.idle') }} {{ dbConnIdleValue ?? '-' }}
@@ -1295,12 +1459,12 @@ function openJobsDetails() {
        <div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
          <div class="flex items-center gap-1">
            <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">Redis</div>
-            <HelpTooltip :content="t('admin.ops.tooltips.redis')" />
+            <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.redis')" />
          </div>
          <div class="mt-1 text-lg font-black" :class="redisMiddleClass">
            {{ redisMiddleLabel }}
          </div>
-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{ t('admin.ops.conns') }} {{ redisConnTotalValue ?? '-' }} / {{ redisPoolSizeValue ?? '-' }}
            <span v-if="redisConnActiveValue != null"> · {{ t('admin.ops.active') }} {{ redisConnActiveValue }} </span>
            <span v-if="redisConnIdleValue != null"> · {{ t('admin.ops.idle') }} {{ redisConnIdleValue }} </span>
@@ -1311,12 +1475,12 @@ function openJobsDetails() {
        <div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
          <div class="flex items-center gap-1">
            <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.goroutines') }}</div>
-            <HelpTooltip :content="t('admin.ops.tooltips.goroutines')" />
+            <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.goroutines')" />
          </div>
          <div class="mt-1 text-lg font-black" :class="goroutineStatusClass">
            {{ goroutineStatusLabel }}
          </div>
-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{ t('admin.ops.current') }} <span class="font-mono">{{ goroutineCountValue ?? '-' }}</span>
            · {{ t('common.warning') }} <span class="font-mono">{{ goroutinesWarnThreshold }}</span>
            · {{ t('common.critical') }} <span class="font-mono">{{ goroutinesCriticalThreshold }}</span>
@@ -1331,9 +1495,9 @@ function openJobsDetails() {
          <div class="flex items-center justify-between gap-2">
            <div class="flex items-center gap-1">
              <div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.jobs') }}</div>
-              <HelpTooltip :content="t('admin.ops.tooltips.jobs')" />
+              <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.jobs')" />
            </div>
-            <button class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openJobsDetails">
+            <button v-if="!props.fullscreen" class="text-[10px] font-bold text-blue-500 hover:underline" type="button" @click="openJobsDetails">
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
@@ -1342,7 +1506,7 @@ function openJobsDetails() {
            {{ jobsStatusLabel }}
          </div>

-          <div class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
+          <div v-if="!props.fullscreen" class="mt-1 text-[10px] text-gray-500 dark:text-gray-400">
            {{ t('common.total') }} <span class="font-mono">{{ jobHeartbeats.length }}</span>
            · {{ t('common.warning') }} <span class="font-mono">{{ jobsWarnCount }}</span>
          </div>
--- a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
+++ b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
@@ -174,69 +174,75 @@ watch(

 <template>
  <BaseDialog :show="show" :title="modalTitle" width="full" @close="close">
-    <!-- Filters -->
-    <div class="border-b border-gray-200 pb-4 mb-4 dark:border-dark-700">
-      <div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
-        <div class="lg:col-span-5">
-          <div class="relative group">
-            <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
-              <svg
-                class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-              >
-                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
-              </svg>
+    <div class="flex h-full min-h-0 flex-col">
+      <!-- Filters -->
+      <div class="mb-4 flex-shrink-0 border-b border-gray-200 pb-4 dark:border-dark-700">
+        <div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
+          <div class="lg:col-span-5">
+            <div class="relative group">
+              <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
+                <svg
+                  class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+                </svg>
+              </div>
+              <input
+                v-model="q"
+                type="text"
+                class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
+                :placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
+              />
            </div>
+          </div>
+
+          <div class="lg:col-span-2">
+            <Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
+          </div>
+
+          <div class="lg:col-span-2">
+            <Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
+          </div>
+
+          <div class="lg:col-span-2">
            <input
-              v-model="q"
+              v-model="accountIdInput"
              type="text"
-              class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
-              :placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
+              inputmode="numeric"
+              class="input w-full text-sm"
+              :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
            />
          </div>
-        </div>

-        <div class="lg:col-span-2">
-          <Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
-        </div>
-
-        <div class="lg:col-span-2">
-          <Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
-        </div>
-
-        <div class="lg:col-span-2">
-          <input
-            v-model="accountIdInput"
-            type="text"
-            inputmode="numeric"
-            class="input w-full text-sm"
-            :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
-          />
-        </div>
-
-        <div class="lg:col-span-1 flex items-center justify-end">
-          <button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
-            {{ t('common.reset') }}
-          </button>
+          <div class="lg:col-span-1 flex items-center justify-end">
+            <button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
+              {{ t('common.reset') }}
+            </button>
+          </div>
        </div>
      </div>
-    </div>

-    <!-- Body -->
-    <div class="text-xs text-gray-500 dark:text-gray-400 mb-2">
-      {{ t('admin.ops.errorDetails.total') }} {{ total }}
+      <!-- Body -->
+      <div class="flex min-h-0 flex-1 flex-col">
+        <div class="mb-2 flex-shrink-0 text-xs text-gray-500 dark:text-gray-400">
+          {{ t('admin.ops.errorDetails.total') }} {{ total }}
+        </div>
+
+        <OpsErrorLogTable
+          class="min-h-0 flex-1"
+          :rows="rows"
+          :total="total"
+          :loading="loading"
+          :page="page"
+          :page-size="pageSize"
+          @openErrorDetail="emit('openErrorDetail', $event)"
+          @update:page="page = $event"
+          @update:pageSize="pageSize = $event"
+        />
+      </div>
    </div>
-    <OpsErrorLogTable
-      :rows="rows"
-      :total="total"
-      :loading="loading"
-      :page="page"
-      :page-size="pageSize"
-      @openErrorDetail="emit('openErrorDetail', $event)"
-      @update:page="page = $event"
-      @update:pageSize="pageSize = $event"
-    />
  </BaseDialog>
 </template>
--- a/frontend/src/views/admin/ops/components/OpsErrorLogTable.vue
+++ b/frontend/src/views/admin/ops/components/OpsErrorLogTable.vue
@@ -1,176 +1,178 @@
 <template>
-  <div>
-    <div v-if="loading" class="flex items-center justify-center py-10">
+  <div class="flex h-full min-h-0 flex-col">
+    <div v-if="loading" class="flex flex-1 items-center justify-center py-10">
      <div class="h-8 w-8 animate-spin rounded-full border-b-2 border-primary-600"></div>
    </div>

-    <div v-else class="overflow-x-auto">
-      <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
-        <thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
-          <tr>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.timeId') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.context') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.status') }}
-            </th>
-            <th
-              scope="col"
-              class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.message') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.latency') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.action') }}
-            </th>
-          </tr>
-        </thead>
-        <tbody class="divide-y divide-gray-100 dark:divide-dark-700">
-          <tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
-            <td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
-              {{ t('admin.ops.errorLog.noErrors') }}
-            </td>
-          </tr>
+    <div v-else class="flex min-h-0 flex-1 flex-col">
+      <div class="min-h-0 flex-1 overflow-auto">
+        <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
+          <thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
+            <tr>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.timeId') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.context') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.status') }}
+              </th>
+              <th
+                scope="col"
+                class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.message') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.latency') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.action') }}
+              </th>
+            </tr>
+          </thead>
+          <tbody class="divide-y divide-gray-100 dark:divide-dark-700">
+            <tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
+              <td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
+                {{ t('admin.ops.errorLog.noErrors') }}
+              </td>
+            </tr>

-          <tr
-            v-for="log in rows"
-            :key="log.id"
-            class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
-            tabindex="0"
-            role="button"
-            @click="emit('openErrorDetail', log.id)"
-            @keydown.enter.prevent="emit('openErrorDetail', log.id)"
-            @keydown.space.prevent="emit('openErrorDetail', log.id)"
-          >
-            <!-- Time & ID -->
-            <td class="px-6 py-4">
-              <div class="flex flex-col gap-0.5">
-                <span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
-                  {{ formatDateTime(log.created_at).split(' ')[1] }}
-                </span>
-                <span
-                  class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
-                  :title="log.request_id || log.client_request_id"
-                >
-                  {{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
-                </span>
-              </div>
-            </td>
+            <tr
+              v-for="log in rows"
+              :key="log.id"
+              class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
+              tabindex="0"
+              role="button"
+              @click="emit('openErrorDetail', log.id)"
+              @keydown.enter.prevent="emit('openErrorDetail', log.id)"
+              @keydown.space.prevent="emit('openErrorDetail', log.id)"
+            >
+              <!-- Time & ID -->
+              <td class="px-6 py-4">
+                <div class="flex flex-col gap-0.5">
+                  <span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
+                    {{ formatDateTime(log.created_at).split(' ')[1] }}
+                  </span>
+                  <span
+                    class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
+                    :title="log.request_id || log.client_request_id"
+                  >
+                    {{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
+                  </span>
+                </div>
+              </td>

-	            <!-- Context (Platform/Model) -->
-	            <td class="px-6 py-4">
-	              <div class="flex flex-col items-start gap-1.5">
-	                <span
-	                  class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
-	                >
-	                  {{ log.platform || '-' }}
-	                </span>
-	                <span
-	                  v-if="log.model"
-	                  class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
-	                  :title="log.model"
-	                >
-	                  {{ log.model }}
-	                </span>
-	                <div
-	                  v-if="log.group_id || log.account_id"
-	                  class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
-	                >
-	                  <span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
-	                  <span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
-	                </div>
-	              </div>
-	            </td>
-
-            <!-- Status & Severity -->
-            <td class="px-6 py-4">
-              <div class="flex flex-wrap items-center gap-2">
-                <span
-                  :class="[
-                    'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
-                    getStatusClass(log.status_code)
-                  ]"
-                >
-                  {{ log.status_code }}
-                </span>
-                <span
-                  v-if="log.severity"
-                  :class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
-                >
-                  {{ log.severity }}
-                </span>
-              </div>
-            </td>
-
-            <!-- Message -->
-            <td class="px-6 py-4">
-              <div class="max-w-md lg:max-w-2xl">
-                <p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
-                  {{ formatSmartMessage(log.message) || '-' }}
-                </p>
-                <div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
-                  <div v-if="log.phase" class="flex items-center gap-1">
-                    <span class="h-1 w-1 rounded-full bg-gray-300"></span>
-                    <span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
-                  </div>
-                  <div v-if="log.client_ip" class="flex items-center gap-1">
-                    <span class="h-1 w-1 rounded-full bg-gray-300"></span>
-                    <span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
+              <!-- Context (Platform/Model) -->
+              <td class="px-6 py-4">
+                <div class="flex flex-col items-start gap-1.5">
+                  <span
+                    class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
+                  >
+                    {{ log.platform || '-' }}
+                  </span>
+                  <span
+                    v-if="log.model"
+                    class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
+                    :title="log.model"
+                  >
+                    {{ log.model }}
+                  </span>
+                  <div
+                    v-if="log.group_id || log.account_id"
+                    class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
+                  >
+                    <span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
+                    <span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
                  </div>
                </div>
-              </div>
-            </td>
+              </td>

-            <!-- Latency -->
-            <td class="px-6 py-4 text-right">
-              <div class="flex flex-col items-end">
-                <span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
-                  {{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
-                </span>
-              </div>
-            </td>
+              <!-- Status & Severity -->
+              <td class="px-6 py-4">
+                <div class="flex flex-wrap items-center gap-2">
+                  <span
+                    :class="[
+                      'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
+                      getStatusClass(log.status_code)
+                    ]"
+                  >
+                    {{ log.status_code }}
+                  </span>
+                  <span
+                    v-if="log.severity"
+                    :class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
+                  >
+                    {{ log.severity }}
+                  </span>
+                </div>
+              </td>

-            <!-- Actions -->
-            <td class="px-6 py-4 text-right" @click.stop>
-              <button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
-                {{ t('admin.ops.errorLog.details') }}
-              </button>
-            </td>
-          </tr>
-        </tbody>
-      </table>
+              <!-- Message -->
+              <td class="px-6 py-4">
+                <div class="max-w-md lg:max-w-2xl">
+                  <p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
+                    {{ formatSmartMessage(log.message) || '-' }}
+                  </p>
+                  <div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
+                    <div v-if="log.phase" class="flex items-center gap-1">
+                      <span class="h-1 w-1 rounded-full bg-gray-300"></span>
+                      <span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
+                    </div>
+                    <div v-if="log.client_ip" class="flex items-center gap-1">
+                      <span class="h-1 w-1 rounded-full bg-gray-300"></span>
+                      <span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
+                    </div>
+                  </div>
+                </div>
+              </td>
+
+              <!-- Latency -->
+              <td class="px-6 py-4 text-right">
+                <div class="flex flex-col items-end">
+                  <span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
+                    {{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
+                  </span>
+                </div>
+              </td>
+
+              <!-- Actions -->
+              <td class="px-6 py-4 text-right" @click.stop>
+                <button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
+                  {{ t('admin.ops.errorLog.details') }}
+                </button>
+              </td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+
+      <Pagination
+        v-if="total > 0"
+        :total="total"
+        :page="page"
+        :page-size="pageSize"
+        :page-size-options="[10, 20, 50, 100, 200, 500]"
+        @update:page="emit('update:page', $event)"
+        @update:pageSize="emit('update:pageSize', $event)"
+      />
    </div>
-
-    <Pagination
-      v-if="total > 0"
-      :total="total"
-      :page="page"
-      :page-size="pageSize"
-      :page-size-options="[10, 20, 50, 100, 200, 500]"
-      @update:page="emit('update:page', $event)"
-      @update:pageSize="emit('update:pageSize', $event)"
-    />
  </div>
 </template>

--- a/frontend/src/views/admin/ops/components/OpsRequestDetailsModal.vue
+++ b/frontend/src/views/admin/ops/components/OpsRequestDetailsModal.vue
@@ -95,6 +95,7 @@ watch(
  (open) => {
    if (open) {
      page.value = 1
+      pageSize.value = 20
      fetchData()
    }
  }
@@ -150,45 +151,46 @@ const kindBadgeClass = (kind: string) => {
 <template>
  <BaseDialog :show="modelValue" :title="props.preset.title || t('admin.ops.requestDetails.title')" width="full" @close="close">
    <template #default>
-      <div class="flex items-center justify-between mb-4">
-        <div class="text-xs text-gray-500 dark:text-gray-400">
-          {{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
-        </div>
-        <button
-          type="button"
-          class="btn btn-secondary btn-sm"
-          @click="fetchData"
-        >
-          {{ t('common.refresh') }}
-        </button>
-      </div>
-
-      <!-- Loading -->
-      <div v-if="loading" class="flex items-center justify-center py-16">
-        <div class="flex flex-col items-center gap-3">
-          <svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
-            <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
-            <path
-              class="opacity-75"
-              fill="currentColor"
-              d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
-            ></path>
-          </svg>
-          <span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
-        </div>
-      </div>
-
-      <!-- Table -->
-      <div v-else>
-        <div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
-          <div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
-          <div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
+      <div class="flex h-full min-h-0 flex-col">
+        <div class="mb-4 flex flex-shrink-0 items-center justify-between">
+          <div class="text-xs text-gray-500 dark:text-gray-400">
+            {{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
+          </div>
+          <button
+            type="button"
+            class="btn btn-secondary btn-sm"
+            @click="fetchData"
+          >
+            {{ t('common.refresh') }}
+          </button>
        </div>

-        <div v-else class="overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
-          <div class="overflow-x-auto">
-            <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
-              <thead class="bg-gray-50 dark:bg-dark-900">
+        <!-- Loading -->
+        <div v-if="loading" class="flex flex-1 items-center justify-center py-16">
+          <div class="flex flex-col items-center gap-3">
+            <svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
+              <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+              <path
+                class="opacity-75"
+                fill="currentColor"
+                d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+              ></path>
+            </svg>
+            <span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
+          </div>
+        </div>
+
+        <!-- Table -->
+        <div v-else class="flex min-h-0 flex-1 flex-col">
+          <div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
+            <div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
+            <div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
+          </div>
+
+          <div v-else class="flex min-h-0 flex-1 flex-col overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
+            <div class="min-h-0 flex-1 overflow-auto">
+              <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
+                <thead class="sticky top-0 z-10 bg-gray-50 dark:bg-dark-900">
                <tr>
                  <th class="px-4 py-3 text-left text-[11px] font-bold uppercase tracking-wider text-gray-500 dark:text-gray-400">
                    {{ t('admin.ops.requestDetails.table.time') }}
@@ -265,15 +267,16 @@ const kindBadgeClass = (kind: string) => {
                </tr>
              </tbody>
            </table>
-          </div>
+            </div>

-          <Pagination
-            :total="total"
-            :page="page"
-            :page-size="pageSize"
-            @update:page="handlePageChange"
-            @update:pageSize="handlePageSizeChange"
-          />
+            <Pagination
+              :total="total"
+              :page="page"
+              :page-size="pageSize"
+              @update:page="handlePageChange"
+              @update:pageSize="handlePageSizeChange"
+            />
+          </div>
        </div>
      </div>
    </template>
--- a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
+++ b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
@@ -45,6 +45,36 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR
    errors.push(t('admin.ops.runtime.validation.evalIntervalRange'))
  }

+  // Thresholds validation
+  const thresholds = settings.thresholds
+  if (thresholds) {
+    if (thresholds.sla_percent_min != null) {
+      if (!Number.isFinite(thresholds.sla_percent_min) || thresholds.sla_percent_min < 0 || thresholds.sla_percent_min > 100) {
+        errors.push('SLA 最低值必须在 0-100 之间')
+      }
+    }
+    if (thresholds.latency_p99_ms_max != null) {
+      if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) {
+        errors.push('延迟 P99 最大值必须大于或等于 0')
+      }
+    }
+    if (thresholds.ttft_p99_ms_max != null) {
+      if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) {
+        errors.push('TTFT P99 最大值必须大于或等于 0')
+      }
+    }
+    if (thresholds.request_error_rate_percent_max != null) {
+      if (!Number.isFinite(thresholds.request_error_rate_percent_max) || thresholds.request_error_rate_percent_max < 0 || thresholds.request_error_rate_percent_max > 100) {
+        errors.push('请求错误率最大值必须在 0-100 之间')
+      }
+    }
+    if (thresholds.upstream_error_rate_percent_max != null) {
+      if (!Number.isFinite(thresholds.upstream_error_rate_percent_max) || thresholds.upstream_error_rate_percent_max < 0 || thresholds.upstream_error_rate_percent_max > 100) {
+        errors.push('上游错误率最大值必须在 0-100 之间')
+      }
+    }
+  }
+
  const lock = settings.distributed_lock
  if (lock?.enabled) {
    if (!lock.key || lock.key.trim().length < 3) {
@@ -130,6 +160,15 @@ function openAlertEditor() {
    if (!Array.isArray(draftAlert.value.silencing.entries)) {
      draftAlert.value.silencing.entries = []
    }
+    if (!draftAlert.value.thresholds) {
+      draftAlert.value.thresholds = {
+        sla_percent_min: 99.5,
+        latency_p99_ms_max: 2000,
+        ttft_p99_ms_max: 500,
+        request_error_rate_percent_max: 5,
+        upstream_error_rate_percent_max: 5
+      }
+    }
  }

  showAlertEditor.value = true
@@ -295,6 +334,81 @@ onMounted(() => {
        <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.runtime.evalIntervalHint') }}</p>
      </div>

+      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
+        <div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">指标阈值配置</div>
+        <p class="mb-4 text-xs text-gray-500 dark:text-gray-400">配置各项指标的告警阈值。超出阈值的指标将在看板上以红色显示。</p>
+
+        <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">SLA 最低值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.sla_percent_min"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="99.5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">SLA 低于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">延迟 P99 最大值 (ms)</div>
+            <input
+              v-model.number="draftAlert.thresholds.latency_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+              placeholder="2000"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">延迟 P99 高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">TTFT P99 最大值 (ms)</div>
+            <input
+              v-model.number="draftAlert.thresholds.ttft_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+              placeholder="500"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">TTFT P99 高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">请求错误率最大值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.request_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">请求错误率高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">上游错误率最大值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.upstream_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">上游错误率高于此值时将显示为红色</p>
+          </div>
+        </div>
+      </div>
+
      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
        <div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.runtime.silencing.title') }}</div>

--- a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
+++ b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
@@ -6,7 +6,7 @@ import { opsAPI } from '@/api/admin/ops'
 import BaseDialog from '@/components/common/BaseDialog.vue'
 import Select from '@/components/common/Select.vue'
 import Toggle from '@/components/common/Toggle.vue'
-import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings } from '../types'
+import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings, OpsMetricThresholds } from '../types'

 const { t } = useI18n()
 const appStore = useAppStore()
@@ -29,19 +29,38 @@ const runtimeSettings = ref<OpsAlertRuntimeSettings | null>(null)
 const emailConfig = ref<EmailNotificationConfig | null>(null)
 // 高级设置
 const advancedSettings = ref<OpsAdvancedSettings | null>(null)
+// 指标阈值配置
+const metricThresholds = ref<OpsMetricThresholds>({
+  sla_percent_min: 99.5,
+  latency_p99_ms_max: 2000,
+  ttft_p99_ms_max: 500,
+  request_error_rate_percent_max: 5,
+  upstream_error_rate_percent_max: 5
+})

 // 加载所有配置
 async function loadAllSettings() {
  loading.value = true
  try {
-    const [runtime, email, advanced] = await Promise.all([
+    const [runtime, email, advanced, thresholds] = await Promise.all([
      opsAPI.getAlertRuntimeSettings(),
      opsAPI.getEmailNotificationConfig(),
-      opsAPI.getAdvancedSettings()
+      opsAPI.getAdvancedSettings(),
+      opsAPI.getMetricThresholds()
    ])
    runtimeSettings.value = runtime
    emailConfig.value = email
    advancedSettings.value = advanced
+    // 如果后端返回了阈值，使用后端的值；否则保持默认值
+    if (thresholds && Object.keys(thresholds).length > 0) {
+      metricThresholds.value = {
+        sla_percent_min: thresholds.sla_percent_min ?? 99.5,
+        latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000,
+        ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
+        request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
+        upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
+      }
+    }
  } catch (err: any) {
    console.error('[OpsSettingsDialog] Failed to load settings', err)
    appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.loadFailed'))
@@ -138,6 +157,23 @@ const validation = computed(() => {
    }
  }

+  // 验证指标阈值
+  if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) {
+    errors.push('SLA最低百分比必须在0-100之间')
+  }
+  if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) {
+    errors.push('延迟P99最大值必须大于等于0')
+  }
+  if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) {
+    errors.push('TTFT P99最大值必须大于等于0')
+  }
+  if (metricThresholds.value.request_error_rate_percent_max != null && (metricThresholds.value.request_error_rate_percent_max < 0 || metricThresholds.value.request_error_rate_percent_max > 100)) {
+    errors.push('请求错误率最大值必须在0-100之间')
+  }
+  if (metricThresholds.value.upstream_error_rate_percent_max != null && (metricThresholds.value.upstream_error_rate_percent_max < 0 || metricThresholds.value.upstream_error_rate_percent_max > 100)) {
+    errors.push('上游错误率最大值必须在0-100之间')
+  }
+
  return { valid: errors.length === 0, errors }
 })

@@ -153,14 +189,15 @@ async function saveAllSettings() {
    await Promise.all([
      runtimeSettings.value ? opsAPI.updateAlertRuntimeSettings(runtimeSettings.value) : Promise.resolve(),
      emailConfig.value ? opsAPI.updateEmailNotificationConfig(emailConfig.value) : Promise.resolve(),
-      advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve()
+      advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve(),
+      opsAPI.updateMetricThresholds(metricThresholds.value)
    ])
    appStore.showSuccess(t('admin.ops.settings.saveSuccess'))
    emit('saved')
    emit('close')
  } catch (err: any) {
    console.error('[OpsSettingsDialog] Failed to save settings', err)
-    appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
+    appStore.showError(err?.response?.data?.message || err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
  } finally {
    saving.value = false
  }
@@ -306,6 +343,77 @@ async function saveAllSettings() {
        </div>
      </div>

+      <!-- 指标阈值配置 -->
+      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
+        <h4 class="mb-3 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.settings.metricThresholds') }}</h4>
+        <p class="mb-4 text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.settings.metricThresholdsHint') }}</p>
+
+        <div class="space-y-4">
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.slaMinPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.sla_percent_min"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.slaMinPercentHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.latencyP99MaxMs') }}</label>
+            <input
+              v-model.number="metricThresholds.latency_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.latencyP99MaxMsHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.ttftP99MaxMs') }}</label>
+            <input
+              v-model.number="metricThresholds.ttft_p99_ms_max"
+              type="number"
+              min="0"
+              step="50"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.ttftP99MaxMsHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.requestErrorRateMaxPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.request_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.requestErrorRateMaxPercentHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.upstreamErrorRateMaxPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.upstream_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.upstreamErrorRateMaxPercentHint') }}</p>
+          </div>
+        </div>
+      </div>
+
      <!-- 高级设置 -->
      <details class="rounded-2xl bg-gray-50 dark:bg-dark-700/50">
        <summary class="cursor-pointer p-4 text-sm font-semibold text-gray-900 dark:text-white">
@@ -379,6 +487,48 @@ async function saveAllSettings() {
              <Toggle v-model="advancedSettings.aggregation.aggregation_enabled" />
            </div>
          </div>
+
+          <!-- 错误过滤 -->
+          <div class="space-y-3">
+            <h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">错误过滤</h5>
+
+            <div class="flex items-center justify-between">
+              <div>
+                <label class="text-sm font-medium text-gray-700 dark:text-gray-300">忽略 count_tokens 错误</label>
+                <p class="mt-1 text-xs text-gray-500">
+                  启用后，count_tokens 请求的错误将不计入运维监控的统计和告警中（但仍会存储在数据库中）
+                </p>
+              </div>
+              <Toggle v-model="advancedSettings.ignore_count_tokens_errors" />
+            </div>
+          </div>
+
+          <!-- 自动刷新 -->
+          <div class="space-y-3">
+            <h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">自动刷新</h5>
+
+            <div class="flex items-center justify-between">
+              <div>
+                <label class="text-sm font-medium text-gray-700 dark:text-gray-300">启用自动刷新</label>
+                <p class="mt-1 text-xs text-gray-500">
+                  自动刷新仪表板数据，启用后会定期拉取最新数据
+                </p>
+              </div>
+              <Toggle v-model="advancedSettings.auto_refresh_enabled" />
+            </div>
+
+            <div v-if="advancedSettings.auto_refresh_enabled">
+              <label class="input-label">刷新间隔</label>
+              <Select
+                v-model="advancedSettings.auto_refresh_interval_seconds"
+                :options="[
+                  { value: 15, label: '15 秒' },
+                  { value: 30, label: '30 秒' },
+                  { value: 60, label: '60 秒' }
+                ]"
+              />
+            </div>
+          </div>
        </div>
      </details>
    </div>
--- a/frontend/src/views/admin/ops/components/OpsThroughputTrendChart.vue
+++ b/frontend/src/views/admin/ops/components/OpsThroughputTrendChart.vue
@@ -19,6 +19,7 @@ interface Props {
  timeRange: string
  byPlatform?: OpsThroughputPlatformBreakdownItem[]
  topGroups?: OpsThroughputGroupBreakdownItem[]
+  fullscreen?: boolean
 }

 const props = defineProps<Props>()
@@ -179,38 +180,40 @@ function downloadChart() {
          <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6" />
        </svg>
        {{ t('admin.ops.throughputTrend') }}
-        <HelpTooltip :content="t('admin.ops.tooltips.throughputTrend')" />
+        <HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.throughputTrend')" />
      </h3>
      <div class="flex items-center gap-2 text-xs text-gray-500 dark:text-gray-400">
        <span class="flex items-center gap-1"><span class="h-2 w-2 rounded-full bg-blue-500"></span>{{ t('admin.ops.qps') }}</span>
        <span class="flex items-center gap-1"><span class="h-2 w-2 rounded-full bg-green-500"></span>{{ t('admin.ops.tpsK') }}</span>
-        <button
-          type="button"
-          class="ml-2 inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
-          :disabled="state !== 'ready'"
-          :title="t('admin.ops.requestDetails.title')"
-          @click="emit('openDetails')"
-        >
-          {{ t('admin.ops.requestDetails.details') }}
-        </button>
-        <button
-          type="button"
-          class="ml-2 inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
-          :disabled="state !== 'ready'"
-          :title="t('admin.ops.charts.resetZoomHint')"
-          @click="resetZoom"
-        >
-          {{ t('admin.ops.charts.resetZoom') }}
-        </button>
-        <button
-          type="button"
-          class="inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
-          :disabled="state !== 'ready'"
-          :title="t('admin.ops.charts.downloadChartHint')"
-          @click="downloadChart"
-        >
-          {{ t('admin.ops.charts.downloadChart') }}
-        </button>
+        <template v-if="!props.fullscreen">
+          <button
+            type="button"
+            class="ml-2 inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
+            :disabled="state !== 'ready'"
+            :title="t('admin.ops.requestDetails.title')"
+            @click="emit('openDetails')"
+          >
+            {{ t('admin.ops.requestDetails.details') }}
+          </button>
+          <button
+            type="button"
+            class="ml-2 inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
+            :disabled="state !== 'ready'"
+            :title="t('admin.ops.charts.resetZoomHint')"
+            @click="resetZoom"
+          >
+            {{ t('admin.ops.charts.resetZoom') }}
+          </button>
+          <button
+            type="button"
+            class="inline-flex items-center rounded-lg border border-gray-200 bg-white px-2 py-1 text-[11px] font-semibold text-gray-600 hover:bg-gray-50 disabled:opacity-50 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:hover:bg-dark-800"
+            :disabled="state !== 'ready'"
+            :title="t('admin.ops.charts.downloadChartHint')"
+            @click="downloadChart"
+          >
+            {{ t('admin.ops.charts.downloadChart') }}
+          </button>
+        </template>
      </div>
    </div>

--- a/frontend/src/views/admin/ops/types.ts
+++ b/frontend/src/views/admin/ops/types.ts
@@ -14,6 +14,7 @@ export type {
  EmailNotificationConfig,
  OpsDistributedLockSettings,
  OpsAlertRuntimeSettings,
+  OpsMetricThresholds,
  OpsAdvancedSettings,
  OpsDataRetentionSettings,
  OpsAggregationSettings