feat: OpenCode 配置提示添加配置文件路径说明

fix: 修复反向代理下客户端 IP 获取错误
Merge pull request #254 from IanShaw027/feat/ops-count-tokens-filter-and-auto-refresh
2026-04-04 07:22:13 +08:00 · 2026-01-12 20:49:54 +08:00 · 2026-01-12 20:44:38 +08:00 · 2026-01-12 17:31:54 +08:00 · 2026-01-12 17:28:25 +08:00 · 2026-01-12 17:18:49 +08:00
71 changed files with 4402 additions and 538 deletions
--- a/backend/cmd/server/wire.go
+++ b/backend/cmd/server/wire.go
@@ -67,6 +67,7 @@ func provideCleanup(
 	opsAlertEvaluator *service.OpsAlertEvaluatorService,
 	opsCleanup *service.OpsCleanupService,
 	opsScheduledReport *service.OpsScheduledReportService,
+	schedulerSnapshot *service.SchedulerSnapshotService,
 	tokenRefresh *service.TokenRefreshService,
 	accountExpiry *service.AccountExpiryService,
 	pricing *service.PricingService,
@@ -116,6 +117,12 @@ func provideCleanup(
 				}
 				return nil
 			}},
+			{"SchedulerSnapshotService", func() error {
+				if schedulerSnapshot != nil {
+					schedulerSnapshot.Stop()
+				}
+				return nil
+			}},
 			{"TokenRefreshService", func() error {
 				tokenRefresh.Stop()
 				return nil
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -97,7 +97,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	antigravityOAuthService := service.NewAntigravityOAuthService(proxyRepository)
 	geminiQuotaService := service.NewGeminiQuotaService(configConfig, settingRepository)
 	tempUnschedCache := repository.NewTempUnschedCache(redisClient)
-	rateLimitService := service.NewRateLimitService(accountRepository, usageLogRepository, configConfig, geminiQuotaService, tempUnschedCache)
+	timeoutCounterCache := repository.NewTimeoutCounterCache(redisClient)
+	rateLimitService := service.ProvideRateLimitService(accountRepository, usageLogRepository, configConfig, geminiQuotaService, tempUnschedCache, timeoutCounterCache, settingService)
 	claudeUsageFetcher := repository.NewClaudeUsageFetcher()
 	antigravityQuotaFetcher := service.NewAntigravityQuotaFetcher(proxyRepository)
 	usageCache := service.NewUsageCache()
@@ -111,6 +112,9 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, antigravityGatewayService, httpUpstream, configConfig)
 	concurrencyCache := repository.ProvideConcurrencyCache(redisClient, configConfig)
 	concurrencyService := service.ProvideConcurrencyService(concurrencyCache, accountRepository, configConfig)
+	schedulerCache := repository.NewSchedulerCache(redisClient)
+	schedulerOutboxRepository := repository.NewSchedulerOutboxRepository(db)
+	schedulerSnapshotService := service.ProvideSchedulerSnapshotService(schedulerCache, schedulerOutboxRepository, accountRepository, groupRepository, configConfig)
 	crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
 	accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService)
 	oAuthHandler := admin.NewOAuthHandler(oAuthService)
@@ -130,9 +134,9 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	identityCache := repository.NewIdentityCache(redisClient)
 	identityService := service.NewIdentityService(identityCache)
 	deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
-	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService)
-	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService)
-	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
+	gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService)
+	openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService)
+	geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)
 	opsService := service.NewOpsService(opsRepository, settingRepository, configConfig, accountRepository, concurrencyService, gatewayService, openAIGatewayService, geminiMessagesCompatService, antigravityGatewayService)
 	settingHandler := admin.NewSettingHandler(settingService, emailService, turnstileService, opsService)
 	opsHandler := admin.NewOpsHandler(opsService)
@@ -164,7 +168,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	opsScheduledReportService := service.ProvideOpsScheduledReportService(opsService, userService, emailService, redisClient, configConfig)
 	tokenRefreshService := service.ProvideTokenRefreshService(accountRepository, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, configConfig)
 	accountExpiryService := service.ProvideAccountExpiryService(accountRepository)
-	v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, tokenRefreshService, accountExpiryService, pricingService, emailQueueService, billingCacheService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService)
+	v := provideCleanup(client, redisClient, opsMetricsCollector, opsAggregationService, opsAlertEvaluatorService, opsCleanupService, opsScheduledReportService, schedulerSnapshotService, tokenRefreshService, accountExpiryService, pricingService, emailQueueService, billingCacheService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService)
 	application := &Application{
 		Server:  httpServer,
 		Cleanup: v,
@@ -194,6 +198,7 @@ func provideCleanup(
 	opsAlertEvaluator *service.OpsAlertEvaluatorService,
 	opsCleanup *service.OpsCleanupService,
 	opsScheduledReport *service.OpsScheduledReportService,
+	schedulerSnapshot *service.SchedulerSnapshotService,
 	tokenRefresh *service.TokenRefreshService,
 	accountExpiry *service.AccountExpiryService,
 	pricing *service.PricingService,
@@ -242,6 +247,12 @@ func provideCleanup(
 				}
 				return nil
 			}},
+			{"SchedulerSnapshotService", func() error {
+				if schedulerSnapshot != nil {
+					schedulerSnapshot.Stop()
+				}
+				return nil
+			}},
 			{"TokenRefreshService", func() error {
 				tokenRefresh.Stop()
 				return nil
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -270,6 +270,29 @@ type GatewaySchedulingConfig struct {

 	// 过期槽位清理周期（0 表示禁用）
 	SlotCleanupInterval time.Duration `mapstructure:"slot_cleanup_interval"`
+
+	// 受控回源配置
+	DbFallbackEnabled bool `mapstructure:"db_fallback_enabled"`
+	// 受控回源超时（秒），0 表示不额外收紧超时
+	DbFallbackTimeoutSeconds int `mapstructure:"db_fallback_timeout_seconds"`
+	// 受控回源限流（实例级 QPS），0 表示不限制
+	DbFallbackMaxQPS int `mapstructure:"db_fallback_max_qps"`
+
+	// Outbox 轮询与滞后阈值配置
+	// Outbox 轮询周期（秒）
+	OutboxPollIntervalSeconds int `mapstructure:"outbox_poll_interval_seconds"`
+	// Outbox 滞后告警阈值（秒）
+	OutboxLagWarnSeconds int `mapstructure:"outbox_lag_warn_seconds"`
+	// Outbox 触发强制重建阈值（秒）
+	OutboxLagRebuildSeconds int `mapstructure:"outbox_lag_rebuild_seconds"`
+	// Outbox 连续滞后触发次数
+	OutboxLagRebuildFailures int `mapstructure:"outbox_lag_rebuild_failures"`
+	// Outbox 积压触发重建阈值（行数）
+	OutboxBacklogRebuildRows int `mapstructure:"outbox_backlog_rebuild_rows"`
+
+	// 全量重建周期配置
+	// 全量重建周期（秒），0 表示禁用
+	FullRebuildIntervalSeconds int `mapstructure:"full_rebuild_interval_seconds"`
 }

 func (s *ServerConfig) Address() string {
@@ -744,11 +767,20 @@ func setDefaults() {
 	viper.SetDefault("gateway.stream_keepalive_interval", 10)
 	viper.SetDefault("gateway.max_line_size", 10*1024*1024)
 	viper.SetDefault("gateway.scheduling.sticky_session_max_waiting", 3)
-	viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 45*time.Second)
+	viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 120*time.Second)
 	viper.SetDefault("gateway.scheduling.fallback_wait_timeout", 30*time.Second)
 	viper.SetDefault("gateway.scheduling.fallback_max_waiting", 100)
 	viper.SetDefault("gateway.scheduling.load_batch_enabled", true)
 	viper.SetDefault("gateway.scheduling.slot_cleanup_interval", 30*time.Second)
+	viper.SetDefault("gateway.scheduling.db_fallback_enabled", true)
+	viper.SetDefault("gateway.scheduling.db_fallback_timeout_seconds", 0)
+	viper.SetDefault("gateway.scheduling.db_fallback_max_qps", 0)
+	viper.SetDefault("gateway.scheduling.outbox_poll_interval_seconds", 1)
+	viper.SetDefault("gateway.scheduling.outbox_lag_warn_seconds", 5)
+	viper.SetDefault("gateway.scheduling.outbox_lag_rebuild_seconds", 10)
+	viper.SetDefault("gateway.scheduling.outbox_lag_rebuild_failures", 3)
+	viper.SetDefault("gateway.scheduling.outbox_backlog_rebuild_rows", 10000)
+	viper.SetDefault("gateway.scheduling.full_rebuild_interval_seconds", 300)
 	viper.SetDefault("concurrency.ping_interval", 10)

 	// TokenRefresh
@@ -1021,6 +1053,35 @@ func (c *Config) Validate() error {
 	if c.Gateway.Scheduling.SlotCleanupInterval < 0 {
 		return fmt.Errorf("gateway.scheduling.slot_cleanup_interval must be non-negative")
 	}
+	if c.Gateway.Scheduling.DbFallbackTimeoutSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.db_fallback_timeout_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.DbFallbackMaxQPS < 0 {
+		return fmt.Errorf("gateway.scheduling.db_fallback_max_qps must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxPollIntervalSeconds <= 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_poll_interval_seconds must be positive")
+	}
+	if c.Gateway.Scheduling.OutboxLagWarnSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_warn_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagRebuildSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagRebuildFailures <= 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_failures must be positive")
+	}
+	if c.Gateway.Scheduling.OutboxBacklogRebuildRows < 0 {
+		return fmt.Errorf("gateway.scheduling.outbox_backlog_rebuild_rows must be non-negative")
+	}
+	if c.Gateway.Scheduling.FullRebuildIntervalSeconds < 0 {
+		return fmt.Errorf("gateway.scheduling.full_rebuild_interval_seconds must be non-negative")
+	}
+	if c.Gateway.Scheduling.OutboxLagWarnSeconds > 0 &&
+		c.Gateway.Scheduling.OutboxLagRebuildSeconds > 0 &&
+		c.Gateway.Scheduling.OutboxLagRebuildSeconds < c.Gateway.Scheduling.OutboxLagWarnSeconds {
+		return fmt.Errorf("gateway.scheduling.outbox_lag_rebuild_seconds must be >= outbox_lag_warn_seconds")
+	}
 	if c.Ops.MetricsCollectorCache.TTL < 0 {
 		return fmt.Errorf("ops.metrics_collector_cache.ttl must be non-negative")
 	}
--- a/backend/internal/config/config_test.go
+++ b/backend/internal/config/config_test.go
@@ -39,8 +39,8 @@ func TestLoadDefaultSchedulingConfig(t *testing.T) {
 	if cfg.Gateway.Scheduling.StickySessionMaxWaiting != 3 {
 		t.Fatalf("StickySessionMaxWaiting = %d, want 3", cfg.Gateway.Scheduling.StickySessionMaxWaiting)
 	}
-	if cfg.Gateway.Scheduling.StickySessionWaitTimeout != 45*time.Second {
-		t.Fatalf("StickySessionWaitTimeout = %v, want 45s", cfg.Gateway.Scheduling.StickySessionWaitTimeout)
+	if cfg.Gateway.Scheduling.StickySessionWaitTimeout != 120*time.Second {
+		t.Fatalf("StickySessionWaitTimeout = %v, want 120s", cfg.Gateway.Scheduling.StickySessionWaitTimeout)
 	}
 	if cfg.Gateway.Scheduling.FallbackWaitTimeout != 30*time.Second {
 		t.Fatalf("FallbackWaitTimeout = %v, want 30s", cfg.Gateway.Scheduling.FallbackWaitTimeout)
--- a/backend/internal/handler/admin/ops_realtime_handler.go
+++ b/backend/internal/handler/admin/ops_realtime_handler.go
@@ -118,3 +118,96 @@ func (h *OpsHandler) GetAccountAvailability(c *gin.Context) {
 	}
 	response.Success(c, payload)
 }
+
+func parseOpsRealtimeWindow(v string) (time.Duration, string, bool) {
+	switch strings.ToLower(strings.TrimSpace(v)) {
+	case "", "1min", "1m":
+		return 1 * time.Minute, "1min", true
+	case "5min", "5m":
+		return 5 * time.Minute, "5min", true
+	case "30min", "30m":
+		return 30 * time.Minute, "30min", true
+	case "1h", "60m", "60min":
+		return 1 * time.Hour, "1h", true
+	default:
+		return 0, "", false
+	}
+}
+
+// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the selected window.
+// GET /api/v1/admin/ops/realtime-traffic
+//
+// Query params:
+// - window: 1min|5min|30min|1h (default: 1min)
+// - platform: optional
+// - group_id: optional
+func (h *OpsHandler) GetRealtimeTrafficSummary(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	windowDur, windowLabel, ok := parseOpsRealtimeWindow(c.Query("window"))
+	if !ok {
+		response.BadRequest(c, "Invalid window")
+		return
+	}
+
+	platform := strings.TrimSpace(c.Query("platform"))
+	var groupID *int64
+	if v := strings.TrimSpace(c.Query("group_id")); v != "" {
+		id, err := strconv.ParseInt(v, 10, 64)
+		if err != nil || id <= 0 {
+			response.BadRequest(c, "Invalid group_id")
+			return
+		}
+		groupID = &id
+	}
+
+	endTime := time.Now().UTC()
+	startTime := endTime.Add(-windowDur)
+
+	if !h.opsService.IsRealtimeMonitoringEnabled(c.Request.Context()) {
+		disabledSummary := &service.OpsRealtimeTrafficSummary{
+			Window:    windowLabel,
+			StartTime: startTime,
+			EndTime:   endTime,
+			Platform:  platform,
+			GroupID:   groupID,
+			QPS:       service.OpsRateSummary{},
+			TPS:       service.OpsRateSummary{},
+		}
+		response.Success(c, gin.H{
+			"enabled":   false,
+			"summary":   disabledSummary,
+			"timestamp": endTime,
+		})
+		return
+	}
+
+	filter := &service.OpsDashboardFilter{
+		StartTime: startTime,
+		EndTime:   endTime,
+		Platform:  platform,
+		GroupID:   groupID,
+		QueryMode: service.OpsQueryModeRaw,
+	}
+
+	summary, err := h.opsService.GetRealtimeTrafficSummary(c.Request.Context(), filter)
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+	if summary != nil {
+		summary.Window = windowLabel
+	}
+	response.Success(c, gin.H{
+		"enabled":   true,
+		"summary":   summary,
+		"timestamp": endTime,
+	})
+}
--- a/backend/internal/handler/admin/ops_settings_handler.go
+++ b/backend/internal/handler/admin/ops_settings_handler.go
@@ -146,3 +146,49 @@ func (h *OpsHandler) UpdateAdvancedSettings(c *gin.Context) {
 	}
 	response.Success(c, updated)
 }
+
+// GetMetricThresholds returns Ops metric thresholds (DB-backed).
+// GET /api/v1/admin/ops/settings/metric-thresholds
+func (h *OpsHandler) GetMetricThresholds(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	cfg, err := h.opsService.GetMetricThresholds(c.Request.Context())
+	if err != nil {
+		response.Error(c, http.StatusInternalServerError, "Failed to get metric thresholds")
+		return
+	}
+	response.Success(c, cfg)
+}
+
+// UpdateMetricThresholds updates Ops metric thresholds (DB-backed).
+// PUT /api/v1/admin/ops/settings/metric-thresholds
+func (h *OpsHandler) UpdateMetricThresholds(c *gin.Context) {
+	if h.opsService == nil {
+		response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
+		return
+	}
+	if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	var req service.OpsMetricThresholds
+	if err := c.ShouldBindJSON(&req); err != nil {
+		response.BadRequest(c, "Invalid request body")
+		return
+	}
+
+	updated, err := h.opsService.UpdateMetricThresholds(c.Request.Context(), &req)
+	if err != nil {
+		response.Error(c, http.StatusBadRequest, err.Error())
+		return
+	}
+	response.Success(c, updated)
+}
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -654,3 +654,68 @@ func (h *SettingHandler) DeleteAdminAPIKey(c *gin.Context) {

 	response.Success(c, gin.H{"message": "Admin API key deleted"})
 }
+
+// GetStreamTimeoutSettings 获取流超时处理配置
+// GET /api/v1/admin/settings/stream-timeout
+func (h *SettingHandler) GetStreamTimeoutSettings(c *gin.Context) {
+	settings, err := h.settingService.GetStreamTimeoutSettings(c.Request.Context())
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	response.Success(c, dto.StreamTimeoutSettings{
+		Enabled:                settings.Enabled,
+		Action:                 settings.Action,
+		TempUnschedMinutes:     settings.TempUnschedMinutes,
+		ThresholdCount:         settings.ThresholdCount,
+		ThresholdWindowMinutes: settings.ThresholdWindowMinutes,
+	})
+}
+
+// UpdateStreamTimeoutSettingsRequest 更新流超时配置请求
+type UpdateStreamTimeoutSettingsRequest struct {
+	Enabled                bool   `json:"enabled"`
+	Action                 string `json:"action"`
+	TempUnschedMinutes     int    `json:"temp_unsched_minutes"`
+	ThresholdCount         int    `json:"threshold_count"`
+	ThresholdWindowMinutes int    `json:"threshold_window_minutes"`
+}
+
+// UpdateStreamTimeoutSettings 更新流超时处理配置
+// PUT /api/v1/admin/settings/stream-timeout
+func (h *SettingHandler) UpdateStreamTimeoutSettings(c *gin.Context) {
+	var req UpdateStreamTimeoutSettingsRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		response.BadRequest(c, "Invalid request: "+err.Error())
+		return
+	}
+
+	settings := &service.StreamTimeoutSettings{
+		Enabled:                req.Enabled,
+		Action:                 req.Action,
+		TempUnschedMinutes:     req.TempUnschedMinutes,
+		ThresholdCount:         req.ThresholdCount,
+		ThresholdWindowMinutes: req.ThresholdWindowMinutes,
+	}
+
+	if err := h.settingService.SetStreamTimeoutSettings(c.Request.Context(), settings); err != nil {
+		response.BadRequest(c, err.Error())
+		return
+	}
+
+	// 重新获取设置返回
+	updatedSettings, err := h.settingService.GetStreamTimeoutSettings(c.Request.Context())
+	if err != nil {
+		response.ErrorFrom(c, err)
+		return
+	}
+
+	response.Success(c, dto.StreamTimeoutSettings{
+		Enabled:                updatedSettings.Enabled,
+		Action:                 updatedSettings.Action,
+		TempUnschedMinutes:     updatedSettings.TempUnschedMinutes,
+		ThresholdCount:         updatedSettings.ThresholdCount,
+		ThresholdWindowMinutes: updatedSettings.ThresholdWindowMinutes,
+	})
+}
--- a/backend/internal/handler/auth_handler.go
+++ b/backend/internal/handler/auth_handler.go
@@ -3,6 +3,7 @@ package handler
 import (
 	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/handler/dto"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/response"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -76,7 +77,7 @@ func (h *AuthHandler) Register(c *gin.Context) {

 	// Turnstile 验证（当提供了邮箱验证码时跳过，因为发送验证码时已验证过）
 	if req.VerifyCode == "" {
-		if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+		if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 			response.ErrorFrom(c, err)
 			return
 		}
@@ -105,7 +106,7 @@ func (h *AuthHandler) SendVerifyCode(c *gin.Context) {
 	}

 	// Turnstile 验证
-	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
@@ -132,7 +133,7 @@ func (h *AuthHandler) Login(c *gin.Context) {
 	}

 	// Turnstile 验证
-	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, c.ClientIP()); err != nil {
+	if err := h.authService.VerifyTurnstile(c.Request.Context(), req.TurnstileToken, ip.GetClientIP(c)); err != nil {
 		response.ErrorFrom(c, err)
 		return
 	}
--- a/backend/internal/handler/dto/settings.go
+++ b/backend/internal/handler/dto/settings.go
@@ -66,3 +66,12 @@ type PublicSettings struct {
 	LinuxDoOAuthEnabled bool   `json:"linuxdo_oauth_enabled"`
 	Version             string `json:"version"`
 }
+
+// StreamTimeoutSettings 流超时处理配置 DTO
+type StreamTimeoutSettings struct {
+	Enabled                bool   `json:"enabled"`
+	Action                 string `json:"action"`
+	TempUnschedMinutes     int    `json:"temp_unsched_minutes"`
+	ThresholdCount         int    `json:"threshold_count"`
+	ThresholdWindowMinutes int    `json:"threshold_window_minutes"`
+}
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -15,6 +15,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/claude"
 	pkgerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -88,6 +89,9 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		return
 	}

+	// 检查是否为 Claude Code 客户端，设置到 context 中
+	SetClaudeCodeClientContext(c, body)
+
 	setOpsRequestContext(c, "", false, body)

 	parsedReq, err := service.ParseGatewayRequest(body)
@@ -286,8 +290,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				return
 			}

+			// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+			userAgent := c.GetHeader("User-Agent")
+			clientIP := ip.GetClientIP(c)
+
 			// 异步记录使用量（subscription已在函数开头获取）
-			go func(result *service.ForwardResult, usedAccount *service.Account) {
+			go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
 				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 				defer cancel()
 				if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -296,10 +304,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					User:         apiKey.User,
 					Account:      usedAccount,
 					Subscription: subscription,
+					UserAgent:    ua,
+					IPAddress:    clientIP,
 				}); err != nil {
 					log.Printf("Record usage failed: %v", err)
 				}
-			}(result, account)
+			}(result, account, userAgent, clientIP)
 			return
 		}
 	}
@@ -414,8 +424,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// 异步记录使用量（subscription已在函数开头获取）
-		go func(result *service.ForwardResult, usedAccount *service.Account) {
+		go func(result *service.ForwardResult, usedAccount *service.Account, ua, clientIP string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -424,10 +438,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    clientIP,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -12,6 +12,7 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/gemini"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/googleapi"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"

@@ -314,8 +315,12 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// 6) record usage async
-		go func(result *service.ForwardResult, usedAccount *service.Account) {
+		go func(result *service.ForwardResult, usedAccount *service.Account, ua, ip string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{
@@ -324,10 +329,12 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    ip,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -12,6 +12,7 @@ import (
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
@@ -263,8 +264,12 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 			return
 		}

+		// 捕获请求信息（用于异步记录，避免在 goroutine 中访问 gin.Context）
+		userAgent := c.GetHeader("User-Agent")
+		clientIP := ip.GetClientIP(c)
+
 		// Async record usage
-		go func(result *service.OpenAIForwardResult, usedAccount *service.Account) {
+		go func(result *service.OpenAIForwardResult, usedAccount *service.Account, ua, ip string) {
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()
 			if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
@@ -273,10 +278,12 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 				User:         apiKey.User,
 				Account:      usedAccount,
 				Subscription: subscription,
+				UserAgent:    ua,
+				IPAddress:    ip,
 			}); err != nil {
 				log.Printf("Record usage failed: %v", err)
 			}
-		}(result, account)
+		}(result, account, userAgent, clientIP)
 		return
 	}
 }
--- a/backend/internal/handler/ops_error_logger.go
+++ b/backend/internal/handler/ops_error_logger.go
@@ -15,6 +15,7 @@ import (
 	"unicode/utf8"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ip"
 	middleware2 "github.com/Wei-Shaw/sub2api/internal/server/middleware"
 	"github.com/Wei-Shaw/sub2api/internal/service"
 	"github.com/gin-gonic/gin"
@@ -489,6 +490,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 				Severity:          classifyOpsSeverity("upstream_error", effectiveUpstreamStatus),
 				StatusCode:        status,
 				IsBusinessLimited: false,
+				IsCountTokens:     isCountTokensRequest(c),

 				ErrorMessage: recoveredMsg,
 				ErrorBody:    "",
@@ -521,7 +523,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 			}

 			var clientIP string
-			if ip := strings.TrimSpace(c.ClientIP()); ip != "" {
+			if ip := strings.TrimSpace(ip.GetClientIP(c)); ip != "" {
 				clientIP = ip
 				entry.ClientIP = &clientIP
 			}
@@ -598,6 +600,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 			Severity:          classifyOpsSeverity(parsed.ErrorType, status),
 			StatusCode:        status,
 			IsBusinessLimited: isBusinessLimited,
+			IsCountTokens:     isCountTokensRequest(c),

 			ErrorMessage: parsed.Message,
 			// Keep the full captured error body (capture is already capped at 64KB) so the
@@ -680,7 +683,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
 		}

 		var clientIP string
-		if ip := strings.TrimSpace(c.ClientIP()); ip != "" {
+		if ip := strings.TrimSpace(ip.GetClientIP(c)); ip != "" {
 			clientIP = ip
 			entry.ClientIP = &clientIP
 		}
@@ -704,6 +707,14 @@ var opsRetryRequestHeaderAllowlist = []string{
 	"anthropic-version",
 }

+// isCountTokensRequest checks if the request is a count_tokens request
+func isCountTokensRequest(c *gin.Context) bool {
+	if c == nil || c.Request == nil || c.Request.URL == nil {
+		return false
+	}
+	return strings.Contains(c.Request.URL.Path, "/count_tokens")
+}
+
 func extractOpsRetryRequestHeaders(c *gin.Context) *string {
 	if c == nil || c.Request == nil {
 		return nil
--- a/backend/internal/repository/account_repo.go
+++ b/backend/internal/repository/account_repo.go
@@ -15,6 +15,7 @@ import (
 	"database/sql"
 	"encoding/json"
 	"errors"
+	"log"
 	"strconv"
 	"time"

@@ -115,6 +116,9 @@ func (r *accountRepository) Create(ctx context.Context, account *service.Account
 	account.ID = created.ID
 	account.CreatedAt = created.CreatedAt
 	account.UpdatedAt = created.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &account.ID, nil, buildSchedulerGroupPayload(account.GroupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account create failed: account=%d err=%v", account.ID, err)
+	}
 	return nil
 }

@@ -341,10 +345,17 @@ func (r *accountRepository) Update(ctx context.Context, account *service.Account
 		return translatePersistenceError(err, service.ErrAccountNotFound, nil)
 	}
 	account.UpdatedAt = updated.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &account.ID, nil, buildSchedulerGroupPayload(account.GroupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account update failed: account=%d err=%v", account.ID, err)
+	}
 	return nil
 }

 func (r *accountRepository) Delete(ctx context.Context, id int64) error {
+	groupIDs, err := r.loadAccountGroupIDs(ctx, id)
+	if err != nil {
+		return err
+	}
 	// 使用事务保证账号与关联分组的删除原子性
 	tx, err := r.client.Tx(ctx)
 	if err != nil && !errors.Is(err, dbent.ErrTxStarted) {
@@ -368,7 +379,12 @@ func (r *accountRepository) Delete(ctx context.Context, id int64) error {
 	}

 	if tx != nil {
-		return tx.Commit()
+		if err := tx.Commit(); err != nil {
+			return err
+		}
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, buildSchedulerGroupPayload(groupIDs)); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue account delete failed: account=%d err=%v", id, err)
 	}
 	return nil
 }
@@ -455,7 +471,18 @@ func (r *accountRepository) UpdateLastUsed(ctx context.Context, id int64) error
 		Where(dbaccount.IDEQ(id)).
 		SetLastUsedAt(now).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := map[string]any{
+		"last_used": map[string]int64{
+			strconv.FormatInt(id, 10): now.Unix(),
+		},
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountLastUsed, &id, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue last used failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) BatchUpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error {
@@ -479,7 +506,18 @@ func (r *accountRepository) BatchUpdateLastUsed(ctx context.Context, updates map
 	args = append(args, pq.Array(ids))

 	_, err := r.sql.ExecContext(ctx, caseSQL, args...)
-	return err
+	if err != nil {
+		return err
+	}
+	lastUsedPayload := make(map[string]int64, len(updates))
+	for id, ts := range updates {
+		lastUsedPayload[strconv.FormatInt(id, 10)] = ts.Unix()
+	}
+	payload := map[string]any{"last_used": lastUsedPayload}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountLastUsed, nil, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue batch last used failed: err=%v", err)
+	}
+	return nil
 }

 func (r *accountRepository) SetError(ctx context.Context, id int64, errorMsg string) error {
@@ -488,7 +526,13 @@ func (r *accountRepository) SetError(ctx context.Context, id int64, errorMsg str
 		SetStatus(service.StatusError).
 		SetErrorMessage(errorMsg).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue set error failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) AddToGroup(ctx context.Context, accountID, groupID int64, priority int) error {
@@ -497,7 +541,14 @@ func (r *accountRepository) AddToGroup(ctx context.Context, accountID, groupID i
 		SetGroupID(groupID).
 		SetPriority(priority).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := buildSchedulerGroupPayload([]int64{groupID})
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue add to group failed: account=%d group=%d err=%v", accountID, groupID, err)
+	}
+	return nil
 }

 func (r *accountRepository) RemoveFromGroup(ctx context.Context, accountID, groupID int64) error {
@@ -507,7 +558,14 @@ func (r *accountRepository) RemoveFromGroup(ctx context.Context, accountID, grou
 			dbaccountgroup.GroupIDEQ(groupID),
 		).
 		Exec(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	payload := buildSchedulerGroupPayload([]int64{groupID})
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue remove from group failed: account=%d group=%d err=%v", accountID, groupID, err)
+	}
+	return nil
 }

 func (r *accountRepository) GetGroups(ctx context.Context, accountID int64) ([]service.Group, error) {
@@ -528,6 +586,10 @@ func (r *accountRepository) GetGroups(ctx context.Context, accountID int64) ([]s
 }

 func (r *accountRepository) BindGroups(ctx context.Context, accountID int64, groupIDs []int64) error {
+	existingGroupIDs, err := r.loadAccountGroupIDs(ctx, accountID)
+	if err != nil {
+		return err
+	}
 	// 使用事务保证删除旧绑定与创建新绑定的原子性
 	tx, err := r.client.Tx(ctx)
 	if err != nil && !errors.Is(err, dbent.ErrTxStarted) {
@@ -568,7 +630,13 @@ func (r *accountRepository) BindGroups(ctx context.Context, accountID int64, gro
 	}

 	if tx != nil {
-		return tx.Commit()
+		if err := tx.Commit(); err != nil {
+			return err
+		}
+	}
+	payload := buildSchedulerGroupPayload(mergeGroupIDs(existingGroupIDs, groupIDs))
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountGroupsChanged, &accountID, nil, payload); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue bind groups failed: account=%d err=%v", accountID, err)
 	}
 	return nil
 }
@@ -672,7 +740,13 @@ func (r *accountRepository) SetRateLimited(ctx context.Context, id int64, resetA
 		SetRateLimitedAt(now).
 		SetRateLimitResetAt(resetAt).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue rate limit failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope service.AntigravityQuotaScope, resetAt time.Time) error {
@@ -706,6 +780,9 @@ func (r *accountRepository) SetAntigravityQuotaScopeLimit(ctx context.Context, i
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue quota scope failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -714,7 +791,13 @@ func (r *accountRepository) SetOverloaded(ctx context.Context, id int64, until t
 		Where(dbaccount.IDEQ(id)).
 		SetOverloadUntil(until).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue overload failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) SetTempUnschedulable(ctx context.Context, id int64, until time.Time, reason string) error {
@@ -727,7 +810,13 @@ func (r *accountRepository) SetTempUnschedulable(ctx context.Context, id int64,
 			AND deleted_at IS NULL
 			AND (temp_unschedulable_until IS NULL OR temp_unschedulable_until < $1)
 	`, until, reason, id)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue temp unschedulable failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearTempUnschedulable(ctx context.Context, id int64) error {
@@ -739,7 +828,13 @@ func (r *accountRepository) ClearTempUnschedulable(ctx context.Context, id int64
 		WHERE id = $1
 			AND deleted_at IS NULL
 	`, id)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear temp unschedulable failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearRateLimit(ctx context.Context, id int64) error {
@@ -749,7 +844,13 @@ func (r *accountRepository) ClearRateLimit(ctx context.Context, id int64) error
 		ClearRateLimitResetAt().
 		ClearOverloadUntil().
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear rate limit failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) ClearAntigravityQuotaScopes(ctx context.Context, id int64) error {
@@ -770,6 +871,9 @@ func (r *accountRepository) ClearAntigravityQuotaScopes(ctx context.Context, id
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue clear quota scopes failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -792,7 +896,13 @@ func (r *accountRepository) SetSchedulable(ctx context.Context, id int64, schedu
 		Where(dbaccount.IDEQ(id)).
 		SetSchedulable(schedulable).
 		Save(ctx)
-	return err
+	if err != nil {
+		return err
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue schedulable change failed: account=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *accountRepository) AutoPauseExpiredAccounts(ctx context.Context, now time.Time) (int64, error) {
@@ -813,6 +923,11 @@ func (r *accountRepository) AutoPauseExpiredAccounts(ctx context.Context, now ti
 	if err != nil {
 		return 0, err
 	}
+	if rows > 0 {
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventFullRebuild, nil, nil, nil); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue auto pause rebuild failed: err=%v", err)
+		}
+	}
 	return rows, nil
 }

@@ -844,6 +959,9 @@ func (r *accountRepository) UpdateExtra(ctx context.Context, id int64, updates m
 	if affected == 0 {
 		return service.ErrAccountNotFound
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountChanged, &id, nil, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue extra update failed: account=%d err=%v", id, err)
+	}
 	return nil
 }

@@ -928,6 +1046,12 @@ func (r *accountRepository) BulkUpdate(ctx context.Context, ids []int64, updates
 	if err != nil {
 		return 0, err
 	}
+	if rows > 0 {
+		payload := map[string]any{"account_ids": ids}
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventAccountBulkChanged, nil, nil, payload); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue bulk update failed: err=%v", err)
+		}
+	}
 	return rows, nil
 }

@@ -1170,6 +1294,54 @@ func (r *accountRepository) loadAccountGroups(ctx context.Context, accountIDs []
 	return groupsByAccount, groupIDsByAccount, accountGroupsByAccount, nil
 }

+func (r *accountRepository) loadAccountGroupIDs(ctx context.Context, accountID int64) ([]int64, error) {
+	entries, err := r.client.AccountGroup.
+		Query().
+		Where(dbaccountgroup.AccountIDEQ(accountID)).
+		All(ctx)
+	if err != nil {
+		return nil, err
+	}
+	ids := make([]int64, 0, len(entries))
+	for _, entry := range entries {
+		ids = append(ids, entry.GroupID)
+	}
+	return ids, nil
+}
+
+func mergeGroupIDs(a []int64, b []int64) []int64 {
+	seen := make(map[int64]struct{}, len(a)+len(b))
+	out := make([]int64, 0, len(a)+len(b))
+	for _, id := range a {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	for _, id := range b {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	return out
+}
+
+func buildSchedulerGroupPayload(groupIDs []int64) map[string]any {
+	if len(groupIDs) == 0 {
+		return nil
+	}
+	return map[string]any{"group_ids": groupIDs}
+}
+
 func accountEntityToService(m *dbent.Account) *service.Account {
 	if m == nil {
 		return nil
--- a/backend/internal/repository/group_repo.go
+++ b/backend/internal/repository/group_repo.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"database/sql"
 	"errors"
+	"log"

 	dbent "github.com/Wei-Shaw/sub2api/ent"
 	"github.com/Wei-Shaw/sub2api/ent/apikey"
@@ -55,6 +56,9 @@ func (r *groupRepository) Create(ctx context.Context, groupIn *service.Group) er
 		groupIn.ID = created.ID
 		groupIn.CreatedAt = created.CreatedAt
 		groupIn.UpdatedAt = created.UpdatedAt
+		if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupIn.ID, nil); err != nil {
+			log.Printf("[SchedulerOutbox] enqueue group create failed: group=%d err=%v", groupIn.ID, err)
+		}
 	}
 	return translatePersistenceError(err, nil, service.ErrGroupExists)
 }
@@ -111,12 +115,21 @@ func (r *groupRepository) Update(ctx context.Context, groupIn *service.Group) er
 		return translatePersistenceError(err, service.ErrGroupNotFound, service.ErrGroupExists)
 	}
 	groupIn.UpdatedAt = updated.UpdatedAt
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupIn.ID, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group update failed: group=%d err=%v", groupIn.ID, err)
+	}
 	return nil
 }

 func (r *groupRepository) Delete(ctx context.Context, id int64) error {
 	_, err := r.client.Group.Delete().Where(group.IDEQ(id)).Exec(ctx)
-	return translatePersistenceError(err, service.ErrGroupNotFound, nil)
+	if err != nil {
+		return translatePersistenceError(err, service.ErrGroupNotFound, nil)
+	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &id, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group delete failed: group=%d err=%v", id, err)
+	}
+	return nil
 }

 func (r *groupRepository) List(ctx context.Context, params pagination.PaginationParams) ([]service.Group, *pagination.PaginationResult, error) {
@@ -246,6 +259,9 @@ func (r *groupRepository) DeleteAccountGroupsByGroupID(ctx context.Context, grou
 		return 0, err
 	}
 	affected, _ := res.RowsAffected()
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &groupID, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group account clear failed: group=%d err=%v", groupID, err)
+	}
 	return affected, nil
 }

@@ -353,6 +369,9 @@ func (r *groupRepository) DeleteCascade(ctx context.Context, id int64) ([]int64,
 			return nil, err
 		}
 	}
+	if err := enqueueSchedulerOutbox(ctx, r.sql, service.SchedulerOutboxEventGroupChanged, nil, &id, nil); err != nil {
+		log.Printf("[SchedulerOutbox] enqueue group cascade delete failed: group=%d err=%v", id, err)
+	}

 	return affectedUserIDs, nil
 }
--- a/backend/internal/repository/migrations_runner.go
+++ b/backend/internal/repository/migrations_runner.go
@@ -28,6 +28,23 @@ CREATE TABLE IF NOT EXISTS schema_migrations (
 );
 `

+const atlasSchemaRevisionsTableDDL = `
+CREATE TABLE IF NOT EXISTS atlas_schema_revisions (
+	version TEXT PRIMARY KEY,
+	description TEXT NOT NULL,
+	type INTEGER NOT NULL,
+	applied INTEGER NOT NULL DEFAULT 0,
+	total INTEGER NOT NULL DEFAULT 0,
+	executed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+	execution_time BIGINT NOT NULL DEFAULT 0,
+	error TEXT NULL,
+	error_stmt TEXT NULL,
+	hash TEXT NOT NULL DEFAULT '',
+	partial_hashes TEXT[] NULL,
+	operator_version TEXT NULL
+);
+`
+
 // migrationsAdvisoryLockID 是用于序列化迁移操作的 PostgreSQL Advisory Lock ID。
 // 在多实例部署场景下，该锁确保同一时间只有一个实例执行迁移。
 // 任何稳定的 int64 值都可以，只要不与同一数据库中的其他锁冲突即可。
@@ -94,6 +111,11 @@ func applyMigrationsFS(ctx context.Context, db *sql.DB, fsys fs.FS) error {
 		return fmt.Errorf("create schema_migrations: %w", err)
 	}

+	// 自动对齐 Atlas 基线（如果检测到 legacy schema_migrations 且缺失 atlas_schema_revisions）。
+	if err := ensureAtlasBaselineAligned(ctx, db, fsys); err != nil {
+		return err
+	}
+
 	// 获取所有 .sql 迁移文件并按文件名排序。
 	// 命名规范：使用零填充数字前缀（如 001_init.sql, 002_add_users.sql）。
 	files, err := fs.Glob(fsys, "*.sql")
@@ -172,6 +194,80 @@ func applyMigrationsFS(ctx context.Context, db *sql.DB, fsys fs.FS) error {
 	return nil
 }

+func ensureAtlasBaselineAligned(ctx context.Context, db *sql.DB, fsys fs.FS) error {
+	hasLegacy, err := tableExists(ctx, db, "schema_migrations")
+	if err != nil {
+		return fmt.Errorf("check schema_migrations: %w", err)
+	}
+	if !hasLegacy {
+		return nil
+	}
+
+	hasAtlas, err := tableExists(ctx, db, "atlas_schema_revisions")
+	if err != nil {
+		return fmt.Errorf("check atlas_schema_revisions: %w", err)
+	}
+	if !hasAtlas {
+		if _, err := db.ExecContext(ctx, atlasSchemaRevisionsTableDDL); err != nil {
+			return fmt.Errorf("create atlas_schema_revisions: %w", err)
+		}
+	}
+
+	var count int
+	if err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM atlas_schema_revisions").Scan(&count); err != nil {
+		return fmt.Errorf("count atlas_schema_revisions: %w", err)
+	}
+	if count > 0 {
+		return nil
+	}
+
+	version, description, hash, err := latestMigrationBaseline(fsys)
+	if err != nil {
+		return fmt.Errorf("atlas baseline version: %w", err)
+	}
+
+	if _, err := db.ExecContext(ctx, `
+		INSERT INTO atlas_schema_revisions (version, description, type, applied, total, executed_at, execution_time, hash)
+		VALUES ($1, $2, $3, 0, 0, NOW(), 0, $4)
+	`, version, description, 1, hash); err != nil {
+		return fmt.Errorf("insert atlas baseline: %w", err)
+	}
+	return nil
+}
+
+func tableExists(ctx context.Context, db *sql.DB, tableName string) (bool, error) {
+	var exists bool
+	err := db.QueryRowContext(ctx, `
+		SELECT EXISTS (
+			SELECT 1
+			FROM information_schema.tables
+			WHERE table_schema = 'public' AND table_name = $1
+		)
+	`, tableName).Scan(&exists)
+	return exists, err
+}
+
+func latestMigrationBaseline(fsys fs.FS) (string, string, string, error) {
+	files, err := fs.Glob(fsys, "*.sql")
+	if err != nil {
+		return "", "", "", err
+	}
+	if len(files) == 0 {
+		return "baseline", "baseline", "", nil
+	}
+	sort.Strings(files)
+	name := files[len(files)-1]
+	contentBytes, err := fs.ReadFile(fsys, name)
+	if err != nil {
+		return "", "", "", err
+	}
+	content := strings.TrimSpace(string(contentBytes))
+	sum := sha256.Sum256([]byte(content))
+	hash := hex.EncodeToString(sum[:])
+	version := strings.TrimSuffix(name, ".sql")
+	return version, version, hash, nil
+}
+
 // pgAdvisoryLock 获取 PostgreSQL Advisory Lock。
 // Advisory Lock 是一种轻量级的锁机制，不与任何特定的数据库对象关联。
 // 它非常适合用于应用层面的分布式锁场景，如迁移序列化。
--- a/backend/internal/repository/ops_repo.go
+++ b/backend/internal/repository/ops_repo.go
@@ -46,6 +46,7 @@ INSERT INTO ops_error_logs (
  severity,
  status_code,
  is_business_limited,
+  is_count_tokens,
  error_message,
  error_body,
  error_source,
@@ -64,7 +65,7 @@ INSERT INTO ops_error_logs (
  retry_count,
  created_at
 ) VALUES (
-  $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34
+  $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34,$35
 ) RETURNING id`

 	var id int64
@@ -88,6 +89,7 @@ INSERT INTO ops_error_logs (
 		opsNullString(input.Severity),
 		opsNullInt(input.StatusCode),
 		input.IsBusinessLimited,
+		input.IsCountTokens,
 		opsNullString(input.ErrorMessage),
 		opsNullString(input.ErrorBody),
 		opsNullString(input.ErrorSource),
--- a/backend/internal/repository/ops_repo_dashboard.go
+++ b/backend/internal/repository/ops_repo_dashboard.go
@@ -964,8 +964,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
 	}

 	idx := startIndex
-	clauses := make([]string, 0, 4)
-	args = make([]any, 0, 4)
+	clauses := make([]string, 0, 5)
+	args = make([]any, 0, 5)

 	args = append(args, start)
 	clauses = append(clauses, fmt.Sprintf("created_at >= $%d", idx))
@@ -974,6 +974,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
 	clauses = append(clauses, fmt.Sprintf("created_at < $%d", idx))
 	idx++

+	clauses = append(clauses, "is_count_tokens = FALSE")
+
 	if groupID != nil && *groupID > 0 {
 		args = append(args, *groupID)
 		clauses = append(clauses, fmt.Sprintf("group_id = $%d", idx))
--- a/backend/internal/repository/ops_repo_preagg.go
+++ b/backend/internal/repository/ops_repo_preagg.go
@@ -78,7 +78,9 @@ error_base AS (
    status_code AS client_status_code,
    COALESCE(upstream_status_code, status_code, 0) AS effective_status_code
  FROM ops_error_logs
+  -- Exclude count_tokens requests from error metrics as they are informational probes
  WHERE created_at >= $1 AND created_at < $2
+    AND is_count_tokens = FALSE
 ),
 error_agg AS (
  SELECT
--- a/backend/internal/repository/ops_repo_realtime_traffic.go
+++ b/backend/internal/repository/ops_repo_realtime_traffic.go
@@ -0,0 +1,129 @@
+package repository
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+)
+
+func (r *opsRepository) GetRealtimeTrafficSummary(ctx context.Context, filter *service.OpsDashboardFilter) (*service.OpsRealtimeTrafficSummary, error) {
+	if r == nil || r.db == nil {
+		return nil, fmt.Errorf("nil ops repository")
+	}
+	if filter == nil {
+		return nil, fmt.Errorf("nil filter")
+	}
+	if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
+		return nil, fmt.Errorf("start_time/end_time required")
+	}
+
+	start := filter.StartTime.UTC()
+	end := filter.EndTime.UTC()
+	if start.After(end) {
+		return nil, fmt.Errorf("start_time must be <= end_time")
+	}
+
+	window := end.Sub(start)
+	if window <= 0 {
+		return nil, fmt.Errorf("invalid time window")
+	}
+	if window > time.Hour {
+		return nil, fmt.Errorf("window too large")
+	}
+
+	usageJoin, usageWhere, usageArgs, next := buildUsageWhere(filter, start, end, 1)
+	errorWhere, errorArgs, _ := buildErrorWhere(filter, start, end, next)
+
+	q := `
+WITH usage_buckets AS (
+  SELECT
+    date_trunc('minute', ul.created_at) AS bucket,
+    COALESCE(COUNT(*), 0) AS success_count,
+    COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS token_sum
+  FROM usage_logs ul
+  ` + usageJoin + `
+  ` + usageWhere + `
+  GROUP BY 1
+),
+error_buckets AS (
+  SELECT
+    date_trunc('minute', created_at) AS bucket,
+    COALESCE(COUNT(*), 0) AS error_count
+  FROM ops_error_logs
+  ` + errorWhere + `
+    AND COALESCE(status_code, 0) >= 400
+  GROUP BY 1
+),
+combined AS (
+  SELECT
+    COALESCE(u.bucket, e.bucket) AS bucket,
+    COALESCE(u.success_count, 0) AS success_count,
+    COALESCE(u.token_sum, 0) AS token_sum,
+    COALESCE(e.error_count, 0) AS error_count,
+    COALESCE(u.success_count, 0) + COALESCE(e.error_count, 0) AS request_total
+  FROM usage_buckets u
+  FULL OUTER JOIN error_buckets e ON u.bucket = e.bucket
+)
+SELECT
+  COALESCE(SUM(success_count), 0) AS success_total,
+  COALESCE(SUM(error_count), 0) AS error_total,
+  COALESCE(SUM(token_sum), 0) AS token_total,
+  COALESCE(MAX(request_total), 0) AS peak_requests_per_min,
+  COALESCE(MAX(token_sum), 0) AS peak_tokens_per_min
+FROM combined`
+
+	args := append(usageArgs, errorArgs...)
+	var successCount int64
+	var errorTotal int64
+	var tokenConsumed int64
+	var peakRequestsPerMin int64
+	var peakTokensPerMin int64
+	if err := r.db.QueryRowContext(ctx, q, args...).Scan(
+		&successCount,
+		&errorTotal,
+		&tokenConsumed,
+		&peakRequestsPerMin,
+		&peakTokensPerMin,
+	); err != nil {
+		return nil, err
+	}
+
+	windowSeconds := window.Seconds()
+	if windowSeconds <= 0 {
+		windowSeconds = 1
+	}
+
+	requestCountTotal := successCount + errorTotal
+	qpsAvg := roundTo1DP(float64(requestCountTotal) / windowSeconds)
+	tpsAvg := roundTo1DP(float64(tokenConsumed) / windowSeconds)
+
+	// Keep "current" consistent with the dashboard overview semantics: last 1 minute.
+	// This remains "within the selected window" since end=start+window.
+	qpsCurrent, tpsCurrent, err := r.queryCurrentRates(ctx, filter, end)
+	if err != nil {
+		return nil, err
+	}
+
+	qpsPeak := roundTo1DP(float64(peakRequestsPerMin) / 60.0)
+	tpsPeak := roundTo1DP(float64(peakTokensPerMin) / 60.0)
+
+	return &service.OpsRealtimeTrafficSummary{
+		StartTime: start,
+		EndTime:   end,
+		Platform:  strings.TrimSpace(filter.Platform),
+		GroupID:   filter.GroupID,
+		QPS: service.OpsRateSummary{
+			Current: qpsCurrent,
+			Peak:    qpsPeak,
+			Avg:     qpsAvg,
+		},
+		TPS: service.OpsRateSummary{
+			Current: tpsCurrent,
+			Peak:    tpsPeak,
+			Avg:     tpsAvg,
+		},
+	}, nil
+}
--- a/backend/internal/repository/ops_repo_trends.go
+++ b/backend/internal/repository/ops_repo_trends.go
@@ -170,6 +170,7 @@ error_totals AS (
  FROM ops_error_logs
  WHERE created_at >= $1 AND created_at < $2
    AND COALESCE(status_code, 0) >= 400
+    AND is_count_tokens = FALSE  -- 排除 count_tokens 请求的错误
  GROUP BY 1
 ),
 combined AS (
@@ -243,6 +244,7 @@ error_totals AS (
    AND platform = $3
    AND group_id IS NOT NULL
    AND COALESCE(status_code, 0) >= 400
+    AND is_count_tokens = FALSE  -- 排除 count_tokens 请求的错误
  GROUP BY 1
 ),
 combined AS (
--- a/backend/internal/repository/scheduler_cache.go
+++ b/backend/internal/repository/scheduler_cache.go
@@ -0,0 +1,276 @@
+package repository
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/redis/go-redis/v9"
+)
+
+const (
+	schedulerBucketSetKey       = "sched:buckets"
+	schedulerOutboxWatermarkKey = "sched:outbox:watermark"
+	schedulerAccountPrefix      = "sched:acc:"
+	schedulerActivePrefix       = "sched:active:"
+	schedulerReadyPrefix        = "sched:ready:"
+	schedulerVersionPrefix      = "sched:ver:"
+	schedulerSnapshotPrefix     = "sched:"
+	schedulerLockPrefix         = "sched:lock:"
+)
+
+type schedulerCache struct {
+	rdb *redis.Client
+}
+
+func NewSchedulerCache(rdb *redis.Client) service.SchedulerCache {
+	return &schedulerCache{rdb: rdb}
+}
+
+func (c *schedulerCache) GetSnapshot(ctx context.Context, bucket service.SchedulerBucket) ([]*service.Account, bool, error) {
+	readyKey := schedulerBucketKey(schedulerReadyPrefix, bucket)
+	readyVal, err := c.rdb.Get(ctx, readyKey).Result()
+	if err == redis.Nil {
+		return nil, false, nil
+	}
+	if err != nil {
+		return nil, false, err
+	}
+	if readyVal != "1" {
+		return nil, false, nil
+	}
+
+	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
+	activeVal, err := c.rdb.Get(ctx, activeKey).Result()
+	if err == redis.Nil {
+		return nil, false, nil
+	}
+	if err != nil {
+		return nil, false, err
+	}
+
+	snapshotKey := schedulerSnapshotKey(bucket, activeVal)
+	ids, err := c.rdb.ZRange(ctx, snapshotKey, 0, -1).Result()
+	if err != nil {
+		return nil, false, err
+	}
+	if len(ids) == 0 {
+		return []*service.Account{}, true, nil
+	}
+
+	keys := make([]string, 0, len(ids))
+	for _, id := range ids {
+		keys = append(keys, schedulerAccountKey(id))
+	}
+	values, err := c.rdb.MGet(ctx, keys...).Result()
+	if err != nil {
+		return nil, false, err
+	}
+
+	accounts := make([]*service.Account, 0, len(values))
+	for _, val := range values {
+		if val == nil {
+			return nil, false, nil
+		}
+		account, err := decodeCachedAccount(val)
+		if err != nil {
+			return nil, false, err
+		}
+		accounts = append(accounts, account)
+	}
+
+	return accounts, true, nil
+}
+
+func (c *schedulerCache) SetSnapshot(ctx context.Context, bucket service.SchedulerBucket, accounts []service.Account) error {
+	activeKey := schedulerBucketKey(schedulerActivePrefix, bucket)
+	oldActive, _ := c.rdb.Get(ctx, activeKey).Result()
+
+	versionKey := schedulerBucketKey(schedulerVersionPrefix, bucket)
+	version, err := c.rdb.Incr(ctx, versionKey).Result()
+	if err != nil {
+		return err
+	}
+
+	versionStr := strconv.FormatInt(version, 10)
+	snapshotKey := schedulerSnapshotKey(bucket, versionStr)
+
+	pipe := c.rdb.Pipeline()
+	for _, account := range accounts {
+		payload, err := json.Marshal(account)
+		if err != nil {
+			return err
+		}
+		pipe.Set(ctx, schedulerAccountKey(strconv.FormatInt(account.ID, 10)), payload, 0)
+	}
+	if len(accounts) > 0 {
+		// 使用序号作为 score，保持数据库返回的排序语义。
+		members := make([]redis.Z, 0, len(accounts))
+		for idx, account := range accounts {
+			members = append(members, redis.Z{
+				Score:  float64(idx),
+				Member: strconv.FormatInt(account.ID, 10),
+			})
+		}
+		pipe.ZAdd(ctx, snapshotKey, members...)
+	} else {
+		pipe.Del(ctx, snapshotKey)
+	}
+	pipe.Set(ctx, activeKey, versionStr, 0)
+	pipe.Set(ctx, schedulerBucketKey(schedulerReadyPrefix, bucket), "1", 0)
+	pipe.SAdd(ctx, schedulerBucketSetKey, bucket.String())
+	if _, err := pipe.Exec(ctx); err != nil {
+		return err
+	}
+
+	if oldActive != "" && oldActive != versionStr {
+		_ = c.rdb.Del(ctx, schedulerSnapshotKey(bucket, oldActive)).Err()
+	}
+
+	return nil
+}
+
+func (c *schedulerCache) GetAccount(ctx context.Context, accountID int64) (*service.Account, error) {
+	key := schedulerAccountKey(strconv.FormatInt(accountID, 10))
+	val, err := c.rdb.Get(ctx, key).Result()
+	if err == redis.Nil {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, err
+	}
+	return decodeCachedAccount(val)
+}
+
+func (c *schedulerCache) SetAccount(ctx context.Context, account *service.Account) error {
+	if account == nil || account.ID <= 0 {
+		return nil
+	}
+	payload, err := json.Marshal(account)
+	if err != nil {
+		return err
+	}
+	key := schedulerAccountKey(strconv.FormatInt(account.ID, 10))
+	return c.rdb.Set(ctx, key, payload, 0).Err()
+}
+
+func (c *schedulerCache) DeleteAccount(ctx context.Context, accountID int64) error {
+	if accountID <= 0 {
+		return nil
+	}
+	key := schedulerAccountKey(strconv.FormatInt(accountID, 10))
+	return c.rdb.Del(ctx, key).Err()
+}
+
+func (c *schedulerCache) UpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error {
+	if len(updates) == 0 {
+		return nil
+	}
+
+	keys := make([]string, 0, len(updates))
+	ids := make([]int64, 0, len(updates))
+	for id := range updates {
+		keys = append(keys, schedulerAccountKey(strconv.FormatInt(id, 10)))
+		ids = append(ids, id)
+	}
+
+	values, err := c.rdb.MGet(ctx, keys...).Result()
+	if err != nil {
+		return err
+	}
+
+	pipe := c.rdb.Pipeline()
+	for i, val := range values {
+		if val == nil {
+			continue
+		}
+		account, err := decodeCachedAccount(val)
+		if err != nil {
+			return err
+		}
+		account.LastUsedAt = ptrTime(updates[ids[i]])
+		updated, err := json.Marshal(account)
+		if err != nil {
+			return err
+		}
+		pipe.Set(ctx, keys[i], updated, 0)
+	}
+	_, err = pipe.Exec(ctx)
+	return err
+}
+
+func (c *schedulerCache) TryLockBucket(ctx context.Context, bucket service.SchedulerBucket, ttl time.Duration) (bool, error) {
+	key := schedulerBucketKey(schedulerLockPrefix, bucket)
+	return c.rdb.SetNX(ctx, key, time.Now().UnixNano(), ttl).Result()
+}
+
+func (c *schedulerCache) ListBuckets(ctx context.Context) ([]service.SchedulerBucket, error) {
+	raw, err := c.rdb.SMembers(ctx, schedulerBucketSetKey).Result()
+	if err != nil {
+		return nil, err
+	}
+	out := make([]service.SchedulerBucket, 0, len(raw))
+	for _, entry := range raw {
+		bucket, ok := service.ParseSchedulerBucket(entry)
+		if !ok {
+			continue
+		}
+		out = append(out, bucket)
+	}
+	return out, nil
+}
+
+func (c *schedulerCache) GetOutboxWatermark(ctx context.Context) (int64, error) {
+	val, err := c.rdb.Get(ctx, schedulerOutboxWatermarkKey).Result()
+	if err == redis.Nil {
+		return 0, nil
+	}
+	if err != nil {
+		return 0, err
+	}
+	id, err := strconv.ParseInt(val, 10, 64)
+	if err != nil {
+		return 0, err
+	}
+	return id, nil
+}
+
+func (c *schedulerCache) SetOutboxWatermark(ctx context.Context, id int64) error {
+	return c.rdb.Set(ctx, schedulerOutboxWatermarkKey, strconv.FormatInt(id, 10), 0).Err()
+}
+
+func schedulerBucketKey(prefix string, bucket service.SchedulerBucket) string {
+	return fmt.Sprintf("%s%d:%s:%s", prefix, bucket.GroupID, bucket.Platform, bucket.Mode)
+}
+
+func schedulerSnapshotKey(bucket service.SchedulerBucket, version string) string {
+	return fmt.Sprintf("%s%d:%s:%s:v%s", schedulerSnapshotPrefix, bucket.GroupID, bucket.Platform, bucket.Mode, version)
+}
+
+func schedulerAccountKey(id string) string {
+	return schedulerAccountPrefix + id
+}
+
+func ptrTime(t time.Time) *time.Time {
+	return &t
+}
+
+func decodeCachedAccount(val any) (*service.Account, error) {
+	var payload []byte
+	switch raw := val.(type) {
+	case string:
+		payload = []byte(raw)
+	case []byte:
+		payload = raw
+	default:
+		return nil, fmt.Errorf("unexpected account cache type: %T", val)
+	}
+	var account service.Account
+	if err := json.Unmarshal(payload, &account); err != nil {
+		return nil, err
+	}
+	return &account, nil
+}
--- a/backend/internal/repository/scheduler_outbox_repo.go
+++ b/backend/internal/repository/scheduler_outbox_repo.go
@@ -0,0 +1,96 @@
+package repository
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+)
+
+type schedulerOutboxRepository struct {
+	db *sql.DB
+}
+
+func NewSchedulerOutboxRepository(db *sql.DB) service.SchedulerOutboxRepository {
+	return &schedulerOutboxRepository{db: db}
+}
+
+func (r *schedulerOutboxRepository) ListAfter(ctx context.Context, afterID int64, limit int) ([]service.SchedulerOutboxEvent, error) {
+	if limit <= 0 {
+		limit = 100
+	}
+	rows, err := r.db.QueryContext(ctx, `
+		SELECT id, event_type, account_id, group_id, payload, created_at
+		FROM scheduler_outbox
+		WHERE id > $1
+		ORDER BY id ASC
+		LIMIT $2
+	`, afterID, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		_ = rows.Close()
+	}()
+
+	events := make([]service.SchedulerOutboxEvent, 0, limit)
+	for rows.Next() {
+		var (
+			payloadRaw []byte
+			accountID  sql.NullInt64
+			groupID    sql.NullInt64
+			event      service.SchedulerOutboxEvent
+		)
+		if err := rows.Scan(&event.ID, &event.EventType, &accountID, &groupID, &payloadRaw, &event.CreatedAt); err != nil {
+			return nil, err
+		}
+		if accountID.Valid {
+			v := accountID.Int64
+			event.AccountID = &v
+		}
+		if groupID.Valid {
+			v := groupID.Int64
+			event.GroupID = &v
+		}
+		if len(payloadRaw) > 0 {
+			var payload map[string]any
+			if err := json.Unmarshal(payloadRaw, &payload); err != nil {
+				return nil, err
+			}
+			event.Payload = payload
+		}
+		events = append(events, event)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return events, nil
+}
+
+func (r *schedulerOutboxRepository) MaxID(ctx context.Context) (int64, error) {
+	var maxID int64
+	if err := r.db.QueryRowContext(ctx, "SELECT COALESCE(MAX(id), 0) FROM scheduler_outbox").Scan(&maxID); err != nil {
+		return 0, err
+	}
+	return maxID, nil
+}
+
+func enqueueSchedulerOutbox(ctx context.Context, exec sqlExecutor, eventType string, accountID *int64, groupID *int64, payload any) error {
+	if exec == nil {
+		return nil
+	}
+	var payloadArg any
+	if payload != nil {
+		encoded, err := json.Marshal(payload)
+		if err != nil {
+			return err
+		}
+		payloadArg = encoded
+	}
+	_, err := exec.ExecContext(ctx, `
+		INSERT INTO scheduler_outbox (event_type, account_id, group_id, payload)
+		VALUES ($1, $2, $3, $4)
+	`, eventType, accountID, groupID, payloadArg)
+	return err
+}
--- a/backend/internal/repository/scheduler_snapshot_outbox_integration_test.go
+++ b/backend/internal/repository/scheduler_snapshot_outbox_integration_test.go
@@ -0,0 +1,68 @@
+//go:build integration
+
+package repository
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSchedulerSnapshotOutboxReplay(t *testing.T) {
+	ctx := context.Background()
+	rdb := testRedis(t)
+	client := testEntClient(t)
+
+	_, _ = integrationDB.ExecContext(ctx, "TRUNCATE scheduler_outbox")
+
+	accountRepo := newAccountRepositoryWithSQL(client, integrationDB)
+	outboxRepo := NewSchedulerOutboxRepository(integrationDB)
+	cache := NewSchedulerCache(rdb)
+
+	cfg := &config.Config{
+		RunMode: config.RunModeStandard,
+		Gateway: config.GatewayConfig{
+			Scheduling: config.GatewaySchedulingConfig{
+				OutboxPollIntervalSeconds: 1,
+				FullRebuildIntervalSeconds: 0,
+				DbFallbackEnabled:          true,
+			},
+		},
+	}
+
+	account := &service.Account{
+		Name:        "outbox-replay-" + time.Now().Format("150405.000000"),
+		Platform:    service.PlatformOpenAI,
+		Type:        service.AccountTypeAPIKey,
+		Status:      service.StatusActive,
+		Schedulable: true,
+		Concurrency: 3,
+		Priority:    1,
+		Credentials: map[string]any{},
+		Extra:       map[string]any{},
+	}
+	require.NoError(t, accountRepo.Create(ctx, account))
+	require.NoError(t, cache.SetAccount(ctx, account))
+
+	svc := service.NewSchedulerSnapshotService(cache, outboxRepo, accountRepo, nil, cfg)
+	svc.Start()
+	t.Cleanup(svc.Stop)
+
+	require.NoError(t, accountRepo.UpdateLastUsed(ctx, account.ID))
+	updated, err := accountRepo.GetByID(ctx, account.ID)
+	require.NoError(t, err)
+	require.NotNil(t, updated.LastUsedAt)
+	expectedUnix := updated.LastUsedAt.Unix()
+
+	require.Eventually(t, func() bool {
+		cached, err := cache.GetAccount(ctx, account.ID)
+		if err != nil || cached == nil || cached.LastUsedAt == nil {
+			return false
+		}
+		return cached.LastUsedAt.Unix() == expectedUnix
+	}, 5*time.Second, 100*time.Millisecond)
+}
--- a/backend/internal/repository/timeout_counter_cache.go
+++ b/backend/internal/repository/timeout_counter_cache.go
@@ -0,0 +1,80 @@
+package repository
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/redis/go-redis/v9"
+)
+
+const timeoutCounterPrefix = "timeout_count:account:"
+
+// timeoutCounterIncrScript 使用 Lua 脚本原子性地增加计数并返回当前值
+// 如果 key 不存在，则创建并设置过期时间
+var timeoutCounterIncrScript = redis.NewScript(`
+	local key = KEYS[1]
+	local ttl = tonumber(ARGV[1])
+
+	local count = redis.call('INCR', key)
+	if count == 1 then
+		redis.call('EXPIRE', key, ttl)
+	end
+
+	return count
+`)
+
+type timeoutCounterCache struct {
+	rdb *redis.Client
+}
+
+// NewTimeoutCounterCache 创建超时计数器缓存实例
+func NewTimeoutCounterCache(rdb *redis.Client) service.TimeoutCounterCache {
+	return &timeoutCounterCache{rdb: rdb}
+}
+
+// IncrementTimeoutCount 增加账户的超时计数，返回当前计数值
+// windowMinutes 是计数窗口时间（分钟），超过此时间计数器会自动重置
+func (c *timeoutCounterCache) IncrementTimeoutCount(ctx context.Context, accountID int64, windowMinutes int) (int64, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+
+	ttlSeconds := windowMinutes * 60
+	if ttlSeconds < 60 {
+		ttlSeconds = 60 // 最小1分钟
+	}
+
+	result, err := timeoutCounterIncrScript.Run(ctx, c.rdb, []string{key}, ttlSeconds).Int64()
+	if err != nil {
+		return 0, fmt.Errorf("increment timeout count: %w", err)
+	}
+
+	return result, nil
+}
+
+// GetTimeoutCount 获取账户当前的超时计数
+func (c *timeoutCounterCache) GetTimeoutCount(ctx context.Context, accountID int64) (int64, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+
+	val, err := c.rdb.Get(ctx, key).Int64()
+	if err == redis.Nil {
+		return 0, nil
+	}
+	if err != nil {
+		return 0, fmt.Errorf("get timeout count: %w", err)
+	}
+
+	return val, nil
+}
+
+// ResetTimeoutCount 重置账户的超时计数
+func (c *timeoutCounterCache) ResetTimeoutCount(ctx context.Context, accountID int64) error {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+	return c.rdb.Del(ctx, key).Err()
+}
+
+// GetTimeoutCountTTL 获取计数器剩余过期时间
+func (c *timeoutCounterCache) GetTimeoutCountTTL(ctx context.Context, accountID int64) (time.Duration, error) {
+	key := fmt.Sprintf("%s%d", timeoutCounterPrefix, accountID)
+	return c.rdb.TTL(ctx, key).Result()
+}
--- a/backend/internal/repository/wire.go
+++ b/backend/internal/repository/wire.go
@@ -59,6 +59,7 @@ var ProviderSet = wire.NewSet(
 	NewBillingCache,
 	NewAPIKeyCache,
 	NewTempUnschedCache,
+	NewTimeoutCounterCache,
 	ProvideConcurrencyCache,
 	NewDashboardCache,
 	NewEmailCache,
@@ -66,6 +67,8 @@ var ProviderSet = wire.NewSet(
 	NewRedeemCache,
 	NewUpdateCache,
 	NewGeminiTokenCache,
+	NewSchedulerCache,
+	NewSchedulerOutboxRepository,

 	// HTTP service ports (DI Strategy A: return interface directly)
 	NewTurnstileVerifier,
--- a/backend/internal/server/routes/admin.go
+++ b/backend/internal/server/routes/admin.go
@@ -73,6 +73,7 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		// Realtime ops signals
 		ops.GET("/concurrency", h.Admin.Ops.GetConcurrencyStats)
 		ops.GET("/account-availability", h.Admin.Ops.GetAccountAvailability)
+		ops.GET("/realtime-traffic", h.Admin.Ops.GetRealtimeTrafficSummary)

 		// Alerts (rules + events)
 		ops.GET("/alert-rules", h.Admin.Ops.ListAlertRules)
@@ -96,6 +97,13 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		ops.GET("/advanced-settings", h.Admin.Ops.GetAdvancedSettings)
 		ops.PUT("/advanced-settings", h.Admin.Ops.UpdateAdvancedSettings)

+		// Settings group (DB-backed)
+		settings := ops.Group("/settings")
+		{
+			settings.GET("/metric-thresholds", h.Admin.Ops.GetMetricThresholds)
+			settings.PUT("/metric-thresholds", h.Admin.Ops.UpdateMetricThresholds)
+		}
+
 		// WebSocket realtime (QPS/TPS)
 		ws := ops.Group("/ws")
 		{
@@ -283,6 +291,9 @@ func registerSettingsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		adminSettings.GET("/admin-api-key", h.Admin.Setting.GetAdminAPIKey)
 		adminSettings.POST("/admin-api-key/regenerate", h.Admin.Setting.RegenerateAdminAPIKey)
 		adminSettings.DELETE("/admin-api-key", h.Admin.Setting.DeleteAdminAPIKey)
+		// 流超时处理配置
+		adminSettings.GET("/stream-timeout", h.Admin.Setting.GetStreamTimeoutSettings)
+		adminSettings.PUT("/stream-timeout", h.Admin.Setting.UpdateStreamTimeoutSettings)
 	}
 }

--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -1717,6 +1717,7 @@ func (s *AntigravityGatewayService) handleGeminiStreamingResponse(c *gin.Context
 				continue
 			}
 			log.Printf("Stream data interval timeout (antigravity)")
+			// 注意：此函数没有 account 上下文，无法调用 HandleStreamTimeout
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
@@ -2271,6 +2272,7 @@ func (s *AntigravityGatewayService) handleClaudeStreamingResponse(c *gin.Context
 				continue
 			}
 			log.Printf("Stream data interval timeout (antigravity)")
+			// 注意：此函数没有 account 上下文，无法调用 HandleStreamTimeout
 			sendErrorEvent("stream_timeout")
 			return &antigravityStreamResult{usage: convertUsage(nil), firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -146,6 +146,13 @@ const (

 	// SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation).
 	SettingKeyOpsAdvancedSettings = "ops_advanced_settings"
+
+	// =========================
+	// Stream Timeout Handling
+	// =========================
+
+	// SettingKeyStreamTimeoutSettings stores JSON config for stream timeout handling.
+	SettingKeyStreamTimeoutSettings = "stream_timeout_settings"
 )

 // AdminAPIKeyPrefix is the prefix for admin API keys (distinct from user "sk-" keys).
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -151,6 +151,7 @@ type GatewayService struct {
 	userSubRepo         UserSubscriptionRepository
 	cache               GatewayCache
 	cfg                 *config.Config
+	schedulerSnapshot   *SchedulerSnapshotService
 	billingService      *BillingService
 	rateLimitService    *RateLimitService
 	billingCacheService *BillingCacheService
@@ -169,6 +170,7 @@ func NewGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	schedulerSnapshot *SchedulerSnapshotService,
 	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
@@ -185,6 +187,7 @@ func NewGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		schedulerSnapshot:   schedulerSnapshot,
 		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
@@ -745,6 +748,9 @@ func (s *GatewayService) resolvePlatform(ctx context.Context, groupID *int64, gr
 }

 func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, bool, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
+	}
 	useMixed := (platform == PlatformAnthropic || platform == PlatformGemini) && !hasForcePlatform
 	if useMixed {
 		platforms := []string{platform, PlatformAntigravity}
@@ -821,6 +827,13 @@ func (s *GatewayService) tryAcquireAccountSlot(ctx context.Context, accountID in
 	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
 }

+func (s *GatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
 func sortAccountsByPriorityAndLastUsed(accounts []*Account, preferOAuth bool) {
 	sort.SliceStable(accounts, func(i, j int) bool {
 		a, b := accounts[i], accounts[j]
@@ -851,7 +864,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号分组归属和平台匹配（确保粘性会话不会跨分组或跨平台）
 				if err == nil && s.isAccountInGroup(account, groupID) && account.Platform == platform && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					if err := s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL); err != nil {
@@ -864,16 +877,11 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 	}

 	// 2. 获取可调度账号列表（单平台）
-	var accounts []Account
-	var err error
-	if s.cfg.RunMode == config.RunModeSimple {
-		// 简易模式：忽略 groupID，查询所有可用账号
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
-	} else if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, platform)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
+	forcePlatform, hasForcePlatform := ctx.Value(ctxkey.ForcePlatform).(string)
+	if hasForcePlatform && forcePlatform == "" {
+		hasForcePlatform = false
 	}
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -935,7 +943,6 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 // selectAccountWithMixedScheduling 选择账户（支持混合调度）
 // 查询原生平台账户 + 启用 mixed_scheduling 的 antigravity 账户
 func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, nativePlatform string) (*Account, error) {
-	platforms := []string{nativePlatform, PlatformAntigravity}
 	preferOAuth := nativePlatform == PlatformGemini

 	// 1. 查询粘性会话
@@ -943,7 +950,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号分组归属和有效性：原生平台直接匹配，antigravity 需要启用混合调度
 				if err == nil && s.isAccountInGroup(account, groupID) && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
@@ -958,13 +965,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
 	}

 	// 2. 获取可调度账号列表
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, platforms)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, platforms)
-	}
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, nativePlatform, false)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -2340,6 +2341,10 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
 				return &streamingResult{usage: usage, firstTokenMs: firstTokenMs, clientDisconnect: true}, nil
 			}
 			log.Printf("Stream data interval timeout: account=%d model=%s interval=%s", account.ID, originalModel, streamInterval)
+			// 处理流超时，可能标记账户为临时不可调度或错误状态
+			if s.rateLimitService != nil {
+				s.rateLimitService.HandleStreamTimeout(ctx, account, originalModel)
+			}
 			sendErrorEvent("stream_timeout")
 			return &streamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")
 		}
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -40,6 +40,7 @@ type GeminiMessagesCompatService struct {
 	accountRepo               AccountRepository
 	groupRepo                 GroupRepository
 	cache                     GatewayCache
+	schedulerSnapshot         *SchedulerSnapshotService
 	tokenProvider             *GeminiTokenProvider
 	rateLimitService          *RateLimitService
 	httpUpstream              HTTPUpstream
@@ -51,6 +52,7 @@ func NewGeminiMessagesCompatService(
 	accountRepo AccountRepository,
 	groupRepo GroupRepository,
 	cache GatewayCache,
+	schedulerSnapshot *SchedulerSnapshotService,
 	tokenProvider *GeminiTokenProvider,
 	rateLimitService *RateLimitService,
 	httpUpstream HTTPUpstream,
@@ -61,6 +63,7 @@ func NewGeminiMessagesCompatService(
 		accountRepo:               accountRepo,
 		groupRepo:                 groupRepo,
 		cache:                     cache,
+		schedulerSnapshot:         schedulerSnapshot,
 		tokenProvider:             tokenProvider,
 		rateLimitService:          rateLimitService,
 		httpUpstream:              httpUpstream,
@@ -105,12 +108,6 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 	// gemini 分组支持混合调度（包含启用了 mixed_scheduling 的 antigravity 账户）
 	// 注意：强制平台模式不走混合调度
 	useMixedScheduling := platform == PlatformGemini && !hasForcePlatform
-	var queryPlatforms []string
-	if useMixedScheduling {
-		queryPlatforms = []string{PlatformGemini, PlatformAntigravity}
-	} else {
-		queryPlatforms = []string{platform}
-	}

 	cacheKey := "gemini:" + sessionHash

@@ -118,7 +115,7 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), cacheKey)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				// 检查账号是否有效：原生平台直接匹配，antigravity 需要启用混合调度
 				if err == nil && account.IsSchedulableForModel(requestedModel) && (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
 					valid := false
@@ -149,22 +146,16 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 	}

 	// 查询可调度账户（强制平台模式：优先按分组查找，找不到再查全部）
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, queryPlatforms)
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, platform, hasForcePlatform)
+	if err != nil {
+		return nil, fmt.Errorf("query accounts failed: %w", err)
+	}
+	// 强制平台模式下，分组中找不到账户时回退查询全部
+	if len(accounts) == 0 && groupID != nil && hasForcePlatform {
+		accounts, err = s.listSchedulableAccountsOnce(ctx, nil, platform, hasForcePlatform)
 		if err != nil {
 			return nil, fmt.Errorf("query accounts failed: %w", err)
 		}
-		// 强制平台模式下，分组中找不到账户时回退查询全部
-		if len(accounts) == 0 && hasForcePlatform {
-			accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
-		}
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
-	}
-	if err != nil {
-		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}

 	var selected *Account
@@ -245,6 +236,31 @@ func (s *GeminiMessagesCompatService) GetAntigravityGatewayService() *Antigravit
 	return s.antigravityGatewayService
 }

+func (s *GeminiMessagesCompatService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
+func (s *GeminiMessagesCompatService) listSchedulableAccountsOnce(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, error) {
+	if s.schedulerSnapshot != nil {
+		accounts, _, err := s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
+		return accounts, err
+	}
+
+	useMixedScheduling := platform == PlatformGemini && !hasForcePlatform
+	queryPlatforms := []string{platform}
+	if useMixedScheduling {
+		queryPlatforms = []string{platform, PlatformAntigravity}
+	}
+
+	if groupID != nil {
+		return s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, queryPlatforms)
+	}
+	return s.accountRepo.ListSchedulableByPlatforms(ctx, queryPlatforms)
+}
+
 func (s *GeminiMessagesCompatService) validateUpstreamBaseURL(raw string) (string, error) {
 	if s.cfg != nil && !s.cfg.Security.URLAllowlist.Enabled {
 		normalized, err := urlvalidator.ValidateURLFormat(raw, s.cfg.Security.URLAllowlist.AllowInsecureHTTP)
@@ -266,13 +282,7 @@ func (s *GeminiMessagesCompatService) validateUpstreamBaseURL(raw string) (strin

 // HasAntigravityAccounts 检查是否有可用的 antigravity 账户
 func (s *GeminiMessagesCompatService) HasAntigravityAccounts(ctx context.Context, groupID *int64) (bool, error) {
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformAntigravity)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformAntigravity)
-	}
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, PlatformAntigravity, false)
 	if err != nil {
 		return false, err
 	}
@@ -288,13 +298,7 @@ func (s *GeminiMessagesCompatService) HasAntigravityAccounts(ctx context.Context
 // 3) OAuth accounts explicitly marked as ai_studio
 // 4) Any remaining Gemini accounts (fallback)
 func (s *GeminiMessagesCompatService) SelectAccountForAIStudioEndpoints(ctx context.Context, groupID *int64) (*Account, error) {
-	var accounts []Account
-	var err error
-	if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformGemini)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformGemini)
-	}
+	accounts, err := s.listSchedulableAccountsOnce(ctx, groupID, PlatformGemini, true)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -85,6 +85,7 @@ type OpenAIGatewayService struct {
 	userSubRepo         UserSubscriptionRepository
 	cache               GatewayCache
 	cfg                 *config.Config
+	schedulerSnapshot   *SchedulerSnapshotService
 	concurrencyService  *ConcurrencyService
 	billingService      *BillingService
 	rateLimitService    *RateLimitService
@@ -101,6 +102,7 @@ func NewOpenAIGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	schedulerSnapshot *SchedulerSnapshotService,
 	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
@@ -115,6 +117,7 @@ func NewOpenAIGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		schedulerSnapshot:   schedulerSnapshot,
 		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
@@ -159,7 +162,7 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), "openai:"+sessionHash)
 		if err == nil && accountID > 0 {
 			if _, excluded := excludedIDs[accountID]; !excluded {
-				account, err := s.accountRepo.GetByID(ctx, accountID)
+				account, err := s.getSchedulableAccount(ctx, accountID)
 				if err == nil && account.IsSchedulable() && account.IsOpenAI() && (requestedModel == "" || account.IsModelSupported(requestedModel)) {
 					// Refresh sticky session TTL
 					_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), "openai:"+sessionHash, openaiStickySessionTTL)
@@ -170,16 +173,7 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
 	}

 	// 2. Get schedulable OpenAI accounts
-	var accounts []Account
-	var err error
-	// 简易模式：忽略分组限制，查询所有可用账号
-	if s.cfg.RunMode == config.RunModeSimple {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
-	} else if groupID != nil {
-		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformOpenAI)
-	} else {
-		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
-	}
+	accounts, err := s.listSchedulableAccounts(ctx, groupID)
 	if err != nil {
 		return nil, fmt.Errorf("query accounts failed: %w", err)
 	}
@@ -301,7 +295,7 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 	if sessionHash != "" {
 		accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), "openai:"+sessionHash)
 		if err == nil && accountID > 0 && !isExcluded(accountID) {
-			account, err := s.accountRepo.GetByID(ctx, accountID)
+			account, err := s.getSchedulableAccount(ctx, accountID)
 			if err == nil && account.IsSchedulable() && account.IsOpenAI() &&
 				(requestedModel == "" || account.IsModelSupported(requestedModel)) {
 				result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
@@ -446,6 +440,10 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 }

 func (s *OpenAIGatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64) ([]Account, error) {
+	if s.schedulerSnapshot != nil {
+		accounts, _, err := s.schedulerSnapshot.ListSchedulableAccounts(ctx, groupID, PlatformOpenAI, false)
+		return accounts, err
+	}
 	var accounts []Account
 	var err error
 	if s.cfg != nil && s.cfg.RunMode == config.RunModeSimple {
@@ -468,6 +466,13 @@ func (s *OpenAIGatewayService) tryAcquireAccountSlot(ctx context.Context, accoun
 	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
 }

+func (s *OpenAIGatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if s.schedulerSnapshot != nil {
+		return s.schedulerSnapshot.GetAccount(ctx, accountID)
+	}
+	return s.accountRepo.GetByID(ctx, accountID)
+}
+
 func (s *OpenAIGatewayService) schedulingConfig() config.GatewaySchedulingConfig {
 	if s.cfg != nil {
 		return s.cfg.Gateway.Scheduling
@@ -1042,6 +1047,10 @@ func (s *OpenAIGatewayService) handleStreamingResponse(ctx context.Context, resp
 				continue
 			}
 			log.Printf("Stream data interval timeout: account=%d model=%s interval=%s", account.ID, originalModel, streamInterval)
+			// 处理流超时，可能标记账户为临时不可调度或错误状态
+			if s.rateLimitService != nil {
+				s.rateLimitService.HandleStreamTimeout(ctx, account, originalModel)
+			}
 			sendErrorEvent("stream_timeout")
 			return &openaiStreamingResult{usage: usage, firstTokenMs: firstTokenMs}, fmt.Errorf("stream data interval timeout")

--- a/backend/internal/service/ops_port.go
+++ b/backend/internal/service/ops_port.go
@@ -17,6 +17,8 @@ type OpsRepository interface {

 	// Lightweight window stats (for realtime WS / quick sampling).
 	GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error)
+	// Lightweight realtime traffic summary (for the Ops dashboard header card).
+	GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error)

 	GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error)
 	GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error)
@@ -71,6 +73,7 @@ type OpsInsertErrorLogInput struct {
 	Severity          string
 	StatusCode        int
 	IsBusinessLimited bool
+	IsCountTokens     bool // 是否为 count_tokens 请求

 	ErrorMessage string
 	ErrorBody    string
--- a/backend/internal/service/ops_realtime_traffic.go
+++ b/backend/internal/service/ops_realtime_traffic.go
@@ -0,0 +1,36 @@
+package service
+
+import (
+	"context"
+	"time"
+
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+)
+
+// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the provided window.
+// This is used by the Ops dashboard "Realtime Traffic" card and is intentionally lightweight.
+func (s *OpsService) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) {
+	if err := s.RequireMonitoringEnabled(ctx); err != nil {
+		return nil, err
+	}
+	if s.opsRepo == nil {
+		return nil, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
+	}
+	if filter == nil {
+		return nil, infraerrors.BadRequest("OPS_FILTER_REQUIRED", "filter is required")
+	}
+	if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_REQUIRED", "start_time/end_time are required")
+	}
+	if filter.StartTime.After(filter.EndTime) {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_INVALID", "start_time must be <= end_time")
+	}
+	if filter.EndTime.Sub(filter.StartTime) > time.Hour {
+		return nil, infraerrors.BadRequest("OPS_TIME_RANGE_TOO_LARGE", "invalid time range: max window is 1 hour")
+	}
+
+	// Realtime traffic summary always uses raw logs (minute granularity peaks).
+	filter.QueryMode = OpsQueryModeRaw
+
+	return s.opsRepo.GetRealtimeTrafficSummary(ctx, filter)
+}
--- a/backend/internal/service/ops_realtime_traffic_models.go
+++ b/backend/internal/service/ops_realtime_traffic_models.go
@@ -0,0 +1,19 @@
+package service
+
+import "time"
+
+// OpsRealtimeTrafficSummary is a lightweight summary used by the Ops dashboard "Realtime Traffic" card.
+// It reports QPS/TPS current/peak/avg for the requested time window.
+type OpsRealtimeTrafficSummary struct {
+	// Window is a normalized label (e.g. "1min", "5min", "30min", "1h").
+	Window string `json:"window"`
+
+	StartTime time.Time `json:"start_time"`
+	EndTime   time.Time `json:"end_time"`
+
+	Platform string `json:"platform"`
+	GroupID  *int64 `json:"group_id"`
+
+	QPS OpsRateSummary `json:"qps"`
+	TPS OpsRateSummary `json:"tps"`
+}
--- a/backend/internal/service/ops_settings.go
+++ b/backend/internal/service/ops_settings.go
@@ -368,6 +368,9 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
 		Aggregation: OpsAggregationSettings{
 			AggregationEnabled: false,
 		},
+		IgnoreCountTokensErrors: false,
+		AutoRefreshEnabled:      false,
+		AutoRefreshIntervalSec:  30,
 	}
 }

@@ -388,6 +391,10 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
 	if cfg.DataRetention.HourlyMetricsRetentionDays <= 0 {
 		cfg.DataRetention.HourlyMetricsRetentionDays = 30
 	}
+	// Normalize auto refresh interval (default 30 seconds)
+	if cfg.AutoRefreshIntervalSec <= 0 {
+		cfg.AutoRefreshIntervalSec = 30
+	}
 }

 func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
@@ -403,6 +410,9 @@ func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
 	if cfg.DataRetention.HourlyMetricsRetentionDays < 1 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
 		return errors.New("hourly_metrics_retention_days must be between 1 and 365")
 	}
+	if cfg.AutoRefreshIntervalSec < 15 || cfg.AutoRefreshIntervalSec > 300 {
+		return errors.New("auto_refresh_interval_seconds must be between 15 and 300")
+	}
 	return nil
 }

@@ -463,3 +473,93 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
 	_ = json.Unmarshal(raw, updated)
 	return updated, nil
 }
+
+// =========================
+// Metric thresholds
+// =========================
+
+const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
+
+func defaultOpsMetricThresholds() *OpsMetricThresholds {
+	slaMin := 99.5
+	latencyMax := 2000.0
+	ttftMax := 500.0
+	reqErrMax := 5.0
+	upstreamErrMax := 5.0
+	return &OpsMetricThresholds{
+		SLAPercentMin:               &slaMin,
+		LatencyP99MsMax:             &latencyMax,
+		TTFTp99MsMax:                &ttftMax,
+		RequestErrorRatePercentMax:  &reqErrMax,
+		UpstreamErrorRatePercentMax: &upstreamErrMax,
+	}
+}
+
+func (s *OpsService) GetMetricThresholds(ctx context.Context) (*OpsMetricThresholds, error) {
+	defaultCfg := defaultOpsMetricThresholds()
+	if s == nil || s.settingRepo == nil {
+		return defaultCfg, nil
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsMetricThresholds)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			if b, mErr := json.Marshal(defaultCfg); mErr == nil {
+				_ = s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(b))
+			}
+			return defaultCfg, nil
+		}
+		return nil, err
+	}
+
+	cfg := &OpsMetricThresholds{}
+	if err := json.Unmarshal([]byte(raw), cfg); err != nil {
+		return defaultCfg, nil
+	}
+
+	return cfg, nil
+}
+
+func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricThresholds) (*OpsMetricThresholds, error) {
+	if s == nil || s.settingRepo == nil {
+		return nil, errors.New("setting repository not initialized")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if cfg == nil {
+		return nil, errors.New("invalid config")
+	}
+
+	// Validate thresholds
+	if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
+		return nil, errors.New("sla_percent_min must be between 0 and 100")
+	}
+	if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 {
+		return nil, errors.New("latency_p99_ms_max must be >= 0")
+	}
+	if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
+		return nil, errors.New("ttft_p99_ms_max must be >= 0")
+	}
+	if cfg.RequestErrorRatePercentMax != nil && (*cfg.RequestErrorRatePercentMax < 0 || *cfg.RequestErrorRatePercentMax > 100) {
+		return nil, errors.New("request_error_rate_percent_max must be between 0 and 100")
+	}
+	if cfg.UpstreamErrorRatePercentMax != nil && (*cfg.UpstreamErrorRatePercentMax < 0 || *cfg.UpstreamErrorRatePercentMax > 100) {
+		return nil, errors.New("upstream_error_rate_percent_max must be between 0 and 100")
+	}
+
+	raw, err := json.Marshal(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(raw)); err != nil {
+		return nil, err
+	}
+
+	updated := &OpsMetricThresholds{}
+	_ = json.Unmarshal(raw, updated)
+	return updated, nil
+}
--- a/backend/internal/service/ops_settings_models.go
+++ b/backend/internal/service/ops_settings_models.go
@@ -61,17 +61,29 @@ type OpsAlertSilencingSettings struct {
 	Entries []OpsAlertSilenceEntry `json:"entries,omitempty"`
 }

+type OpsMetricThresholds struct {
+	SLAPercentMin               *float64 `json:"sla_percent_min,omitempty"`                 // SLA低于此值变红
+	LatencyP99MsMax             *float64 `json:"latency_p99_ms_max,omitempty"`              // 延迟P99高于此值变红
+	TTFTp99MsMax                *float64 `json:"ttft_p99_ms_max,omitempty"`                 // TTFT P99高于此值变红
+	RequestErrorRatePercentMax  *float64 `json:"request_error_rate_percent_max,omitempty"`  // 请求错误率高于此值变红
+	UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红
+}
+
 type OpsAlertRuntimeSettings struct {
 	EvaluationIntervalSeconds int `json:"evaluation_interval_seconds"`

 	DistributedLock OpsDistributedLockSettings `json:"distributed_lock"`
 	Silencing       OpsAlertSilencingSettings  `json:"silencing"`
+	Thresholds      OpsMetricThresholds        `json:"thresholds"` // 指标阈值配置
 }

 // OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
 type OpsAdvancedSettings struct {
-	DataRetention OpsDataRetentionSettings `json:"data_retention"`
-	Aggregation   OpsAggregationSettings   `json:"aggregation"`
+	DataRetention           OpsDataRetentionSettings `json:"data_retention"`
+	Aggregation             OpsAggregationSettings   `json:"aggregation"`
+	IgnoreCountTokensErrors bool                     `json:"ignore_count_tokens_errors"`
+	AutoRefreshEnabled      bool                     `json:"auto_refresh_enabled"`
+	AutoRefreshIntervalSec  int                      `json:"auto_refresh_interval_seconds"`
 }

 type OpsDataRetentionSettings struct {
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -15,13 +15,15 @@ import (

 // RateLimitService 处理限流和过载状态管理
 type RateLimitService struct {
-	accountRepo        AccountRepository
-	usageRepo          UsageLogRepository
-	cfg                *config.Config
-	geminiQuotaService *GeminiQuotaService
-	tempUnschedCache   TempUnschedCache
-	usageCacheMu       sync.RWMutex
-	usageCache         map[int64]*geminiUsageCacheEntry
+	accountRepo         AccountRepository
+	usageRepo           UsageLogRepository
+	cfg                 *config.Config
+	geminiQuotaService  *GeminiQuotaService
+	tempUnschedCache    TempUnschedCache
+	timeoutCounterCache TimeoutCounterCache
+	settingService      *SettingService
+	usageCacheMu        sync.RWMutex
+	usageCache          map[int64]*geminiUsageCacheEntry
 }

 type geminiUsageCacheEntry struct {
@@ -44,11 +46,22 @@ func NewRateLimitService(accountRepo AccountRepository, usageRepo UsageLogReposi
 	}
 }

+// SetTimeoutCounterCache 设置超时计数器缓存（可选依赖）
+func (s *RateLimitService) SetTimeoutCounterCache(cache TimeoutCounterCache) {
+	s.timeoutCounterCache = cache
+}
+
+// SetSettingService 设置系统设置服务（可选依赖）
+func (s *RateLimitService) SetSettingService(settingService *SettingService) {
+	s.settingService = settingService
+}
+
 // HandleUpstreamError 处理上游错误响应，标记账号状态
 // 返回是否应该停止该账号的调度
 func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Account, statusCode int, headers http.Header, responseBody []byte) (shouldDisable bool) {
 	// apikey 类型账号：检查自定义错误码配置
 	// 如果启用且错误码不在列表中，则不处理（不停止调度、不标记限流/过载）
+	customErrorCodesEnabled := account.IsCustomErrorCodesEnabled()
 	if !account.ShouldHandleErrorCode(statusCode) {
 		log.Printf("Account %d: error %d skipped (not in custom error codes)", account.ID, statusCode)
 		return false
@@ -93,11 +106,19 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
 		s.handle529(ctx, account)
 		shouldDisable = false
 	default:
-		// 其他5xx错误：记录但不停止调度
-		if statusCode >= 500 {
+		// 自定义错误码启用时：在列表中的错误码都应该停止调度
+		if customErrorCodesEnabled {
+			msg := "Custom error code triggered"
+			if upstreamMsg != "" {
+				msg = upstreamMsg
+			}
+			s.handleCustomErrorCode(ctx, account, statusCode, msg)
+			shouldDisable = true
+		} else if statusCode >= 500 {
+			// 未启用自定义错误码时：仅记录5xx错误
 			log.Printf("Account %d received upstream error %d", account.ID, statusCode)
+			shouldDisable = false
 		}
-		shouldDisable = false
 	}

 	if tempMatched {
@@ -273,6 +294,16 @@ func (s *RateLimitService) handleAuthError(ctx context.Context, account *Account
 	log.Printf("Account %d disabled due to auth error: %s", account.ID, errorMsg)
 }

+// handleCustomErrorCode 处理自定义错误码，停止账号调度
+func (s *RateLimitService) handleCustomErrorCode(ctx context.Context, account *Account, statusCode int, errorMsg string) {
+	msg := "Custom error code " + strconv.Itoa(statusCode) + ": " + errorMsg
+	if err := s.accountRepo.SetError(ctx, account.ID, msg); err != nil {
+		log.Printf("SetError failed for account %d: %v", account.ID, err)
+		return
+	}
+	log.Printf("Account %d disabled due to custom error code %d: %s", account.ID, statusCode, errorMsg)
+}
+
 // handle429 处理429限流错误
 // 解析响应头获取重置时间，标记账号为限流状态
 func (s *RateLimitService) handle429(ctx context.Context, account *Account, headers http.Header) {
@@ -555,3 +586,125 @@ func truncateTempUnschedMessage(body []byte, maxBytes int) string {
 	}
 	return strings.TrimSpace(string(body))
 }
+
+// HandleStreamTimeout 处理流数据超时
+// 根据系统设置决定是否标记账户为临时不可调度或错误状态
+// 返回是否应该停止该账号的调度
+func (s *RateLimitService) HandleStreamTimeout(ctx context.Context, account *Account, model string) bool {
+	if account == nil {
+		return false
+	}
+
+	// 获取系统设置
+	if s.settingService == nil {
+		log.Printf("[StreamTimeout] settingService not configured, skipping timeout handling for account %d", account.ID)
+		return false
+	}
+
+	settings, err := s.settingService.GetStreamTimeoutSettings(ctx)
+	if err != nil {
+		log.Printf("[StreamTimeout] Failed to get settings: %v", err)
+		return false
+	}
+
+	if !settings.Enabled {
+		return false
+	}
+
+	if settings.Action == StreamTimeoutActionNone {
+		return false
+	}
+
+	// 增加超时计数
+	var count int64 = 1
+	if s.timeoutCounterCache != nil {
+		count, err = s.timeoutCounterCache.IncrementTimeoutCount(ctx, account.ID, settings.ThresholdWindowMinutes)
+		if err != nil {
+			log.Printf("[StreamTimeout] Failed to increment timeout count for account %d: %v", account.ID, err)
+			// 继续处理，使用 count=1
+			count = 1
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d timeout count: %d/%d (window: %d min, model: %s)",
+		account.ID, count, settings.ThresholdCount, settings.ThresholdWindowMinutes, model)
+
+	// 检查是否达到阈值
+	if count < int64(settings.ThresholdCount) {
+		return false
+	}
+
+	// 达到阈值，执行相应操作
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched:
+		return s.triggerStreamTimeoutTempUnsched(ctx, account, settings, model)
+	case StreamTimeoutActionError:
+		return s.triggerStreamTimeoutError(ctx, account, model)
+	default:
+		return false
+	}
+}
+
+// triggerStreamTimeoutTempUnsched 触发流超时临时不可调度
+func (s *RateLimitService) triggerStreamTimeoutTempUnsched(ctx context.Context, account *Account, settings *StreamTimeoutSettings, model string) bool {
+	now := time.Now()
+	until := now.Add(time.Duration(settings.TempUnschedMinutes) * time.Minute)
+
+	state := &TempUnschedState{
+		UntilUnix:       until.Unix(),
+		TriggeredAtUnix: now.Unix(),
+		StatusCode:      0, // 超时没有状态码
+		MatchedKeyword:  "stream_timeout",
+		RuleIndex:       -1, // 表示系统级规则
+		ErrorMessage:    "Stream data interval timeout for model: " + model,
+	}
+
+	reason := ""
+	if raw, err := json.Marshal(state); err == nil {
+		reason = string(raw)
+	}
+	if reason == "" {
+		reason = state.ErrorMessage
+	}
+
+	if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, reason); err != nil {
+		log.Printf("[StreamTimeout] SetTempUnschedulable failed for account %d: %v", account.ID, err)
+		return false
+	}
+
+	if s.tempUnschedCache != nil {
+		if err := s.tempUnschedCache.SetTempUnsched(ctx, account.ID, state); err != nil {
+			log.Printf("[StreamTimeout] SetTempUnsched cache failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	// 重置超时计数
+	if s.timeoutCounterCache != nil {
+		if err := s.timeoutCounterCache.ResetTimeoutCount(ctx, account.ID); err != nil {
+			log.Printf("[StreamTimeout] ResetTimeoutCount failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d marked as temp unschedulable until %v (model: %s)", account.ID, until, model)
+	return true
+}
+
+// triggerStreamTimeoutError 触发流超时错误状态
+func (s *RateLimitService) triggerStreamTimeoutError(ctx context.Context, account *Account, model string) bool {
+	errorMsg := "Stream data interval timeout (repeated failures) for model: " + model
+
+	if err := s.accountRepo.SetError(ctx, account.ID, errorMsg); err != nil {
+		log.Printf("[StreamTimeout] SetError failed for account %d: %v", account.ID, err)
+		return false
+	}
+
+	// 重置超时计数
+	if s.timeoutCounterCache != nil {
+		if err := s.timeoutCounterCache.ResetTimeoutCount(ctx, account.ID); err != nil {
+			log.Printf("[StreamTimeout] ResetTimeoutCount failed for account %d: %v", account.ID, err)
+		}
+	}
+
+	log.Printf("[StreamTimeout] Account %d marked as error (model: %s)", account.ID, model)
+	return true
+}
--- a/backend/internal/service/scheduler_cache.go
+++ b/backend/internal/service/scheduler_cache.go
@@ -0,0 +1,68 @@
+package service
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+const (
+	SchedulerModeSingle = "single"
+	SchedulerModeMixed  = "mixed"
+	SchedulerModeForced = "forced"
+)
+
+type SchedulerBucket struct {
+	GroupID  int64
+	Platform string
+	Mode     string
+}
+
+func (b SchedulerBucket) String() string {
+	return fmt.Sprintf("%d:%s:%s", b.GroupID, b.Platform, b.Mode)
+}
+
+func ParseSchedulerBucket(raw string) (SchedulerBucket, bool) {
+	parts := strings.Split(raw, ":")
+	if len(parts) != 3 {
+		return SchedulerBucket{}, false
+	}
+	groupID, err := strconv.ParseInt(parts[0], 10, 64)
+	if err != nil {
+		return SchedulerBucket{}, false
+	}
+	if parts[1] == "" || parts[2] == "" {
+		return SchedulerBucket{}, false
+	}
+	return SchedulerBucket{
+		GroupID:  groupID,
+		Platform: parts[1],
+		Mode:     parts[2],
+	}, true
+}
+
+// SchedulerCache 负责调度快照与账号快照的缓存读写。
+type SchedulerCache interface {
+	// GetSnapshot 读取快照并返回命中与否（ready + active + 数据完整）。
+	GetSnapshot(ctx context.Context, bucket SchedulerBucket) ([]*Account, bool, error)
+	// SetSnapshot 写入快照并切换激活版本。
+	SetSnapshot(ctx context.Context, bucket SchedulerBucket, accounts []Account) error
+	// GetAccount 获取单账号快照。
+	GetAccount(ctx context.Context, accountID int64) (*Account, error)
+	// SetAccount 写入单账号快照（包含不可调度状态）。
+	SetAccount(ctx context.Context, account *Account) error
+	// DeleteAccount 删除单账号快照。
+	DeleteAccount(ctx context.Context, accountID int64) error
+	// UpdateLastUsed 批量更新账号的最后使用时间。
+	UpdateLastUsed(ctx context.Context, updates map[int64]time.Time) error
+	// TryLockBucket 尝试获取分桶重建锁。
+	TryLockBucket(ctx context.Context, bucket SchedulerBucket, ttl time.Duration) (bool, error)
+	// ListBuckets 返回已注册的分桶集合。
+	ListBuckets(ctx context.Context) ([]SchedulerBucket, error)
+	// GetOutboxWatermark 读取 outbox 水位。
+	GetOutboxWatermark(ctx context.Context) (int64, error)
+	// SetOutboxWatermark 保存 outbox 水位。
+	SetOutboxWatermark(ctx context.Context, id int64) error
+}
--- a/backend/internal/service/scheduler_events.go
+++ b/backend/internal/service/scheduler_events.go
@@ -0,0 +1,10 @@
+package service
+
+const (
+	SchedulerOutboxEventAccountChanged       = "account_changed"
+	SchedulerOutboxEventAccountGroupsChanged = "account_groups_changed"
+	SchedulerOutboxEventAccountBulkChanged   = "account_bulk_changed"
+	SchedulerOutboxEventAccountLastUsed      = "account_last_used"
+	SchedulerOutboxEventGroupChanged         = "group_changed"
+	SchedulerOutboxEventFullRebuild          = "full_rebuild"
+)
--- a/backend/internal/service/scheduler_outbox.go
+++ b/backend/internal/service/scheduler_outbox.go
@@ -0,0 +1,21 @@
+package service
+
+import (
+	"context"
+	"time"
+)
+
+type SchedulerOutboxEvent struct {
+	ID        int64
+	EventType string
+	AccountID *int64
+	GroupID   *int64
+	Payload   map[string]any
+	CreatedAt time.Time
+}
+
+// SchedulerOutboxRepository 提供调度 outbox 的读取接口。
+type SchedulerOutboxRepository interface {
+	ListAfter(ctx context.Context, afterID int64, limit int) ([]SchedulerOutboxEvent, error)
+	MaxID(ctx context.Context) (int64, error)
+}
--- a/backend/internal/service/scheduler_snapshot_service.go
+++ b/backend/internal/service/scheduler_snapshot_service.go
@@ -0,0 +1,786 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"log"
+	"strconv"
+	"sync"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+)
+
+var (
+	ErrSchedulerCacheNotReady   = errors.New("scheduler cache not ready")
+	ErrSchedulerFallbackLimited = errors.New("scheduler db fallback limited")
+)
+
+const outboxEventTimeout = 2 * time.Minute
+
+type SchedulerSnapshotService struct {
+	cache         SchedulerCache
+	outboxRepo    SchedulerOutboxRepository
+	accountRepo   AccountRepository
+	groupRepo     GroupRepository
+	cfg           *config.Config
+	stopCh        chan struct{}
+	stopOnce      sync.Once
+	wg            sync.WaitGroup
+	fallbackLimit *fallbackLimiter
+	lagMu         sync.Mutex
+	lagFailures   int
+}
+
+func NewSchedulerSnapshotService(
+	cache SchedulerCache,
+	outboxRepo SchedulerOutboxRepository,
+	accountRepo AccountRepository,
+	groupRepo GroupRepository,
+	cfg *config.Config,
+) *SchedulerSnapshotService {
+	maxQPS := 0
+	if cfg != nil {
+		maxQPS = cfg.Gateway.Scheduling.DbFallbackMaxQPS
+	}
+	return &SchedulerSnapshotService{
+		cache:         cache,
+		outboxRepo:    outboxRepo,
+		accountRepo:   accountRepo,
+		groupRepo:     groupRepo,
+		cfg:           cfg,
+		stopCh:        make(chan struct{}),
+		fallbackLimit: newFallbackLimiter(maxQPS),
+	}
+}
+
+func (s *SchedulerSnapshotService) Start() {
+	if s == nil || s.cache == nil {
+		return
+	}
+
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		s.runInitialRebuild()
+	}()
+
+	interval := s.outboxPollInterval()
+	if s.outboxRepo != nil && interval > 0 {
+		s.wg.Add(1)
+		go func() {
+			defer s.wg.Done()
+			s.runOutboxWorker(interval)
+		}()
+	}
+
+	fullInterval := s.fullRebuildInterval()
+	if fullInterval > 0 {
+		s.wg.Add(1)
+		go func() {
+			defer s.wg.Done()
+			s.runFullRebuildWorker(fullInterval)
+		}()
+	}
+}
+
+func (s *SchedulerSnapshotService) Stop() {
+	if s == nil {
+		return
+	}
+	s.stopOnce.Do(func() {
+		close(s.stopCh)
+	})
+	s.wg.Wait()
+}
+
+func (s *SchedulerSnapshotService) ListSchedulableAccounts(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, bool, error) {
+	useMixed := (platform == PlatformAnthropic || platform == PlatformGemini) && !hasForcePlatform
+	mode := s.resolveMode(platform, hasForcePlatform)
+	bucket := s.bucketFor(groupID, platform, mode)
+
+	if s.cache != nil {
+		cached, hit, err := s.cache.GetSnapshot(ctx, bucket)
+		if err != nil {
+			log.Printf("[Scheduler] cache read failed: bucket=%s err=%v", bucket.String(), err)
+		} else if hit {
+			return derefAccounts(cached), useMixed, nil
+		}
+	}
+
+	if err := s.guardFallback(ctx); err != nil {
+		return nil, useMixed, err
+	}
+
+	fallbackCtx, cancel := s.withFallbackTimeout(ctx)
+	defer cancel()
+
+	accounts, err := s.loadAccountsFromDB(fallbackCtx, bucket, useMixed)
+	if err != nil {
+		return nil, useMixed, err
+	}
+
+	if s.cache != nil {
+		if err := s.cache.SetSnapshot(fallbackCtx, bucket, accounts); err != nil {
+			log.Printf("[Scheduler] cache write failed: bucket=%s err=%v", bucket.String(), err)
+		}
+	}
+
+	return accounts, useMixed, nil
+}
+
+func (s *SchedulerSnapshotService) GetAccount(ctx context.Context, accountID int64) (*Account, error) {
+	if accountID <= 0 {
+		return nil, nil
+	}
+	if s.cache != nil {
+		account, err := s.cache.GetAccount(ctx, accountID)
+		if err != nil {
+			log.Printf("[Scheduler] account cache read failed: id=%d err=%v", accountID, err)
+		} else if account != nil {
+			return account, nil
+		}
+	}
+
+	if err := s.guardFallback(ctx); err != nil {
+		return nil, err
+	}
+	fallbackCtx, cancel := s.withFallbackTimeout(ctx)
+	defer cancel()
+	return s.accountRepo.GetByID(fallbackCtx, accountID)
+}
+
+func (s *SchedulerSnapshotService) runInitialRebuild() {
+	if s.cache == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+	buckets, err := s.cache.ListBuckets(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] list buckets failed: %v", err)
+	}
+	if len(buckets) == 0 {
+		buckets, err = s.defaultBuckets(ctx)
+		if err != nil {
+			log.Printf("[Scheduler] default buckets failed: %v", err)
+			return
+		}
+	}
+	if err := s.rebuildBuckets(ctx, buckets, "startup"); err != nil {
+		log.Printf("[Scheduler] rebuild startup failed: %v", err)
+	}
+}
+
+func (s *SchedulerSnapshotService) runOutboxWorker(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	s.pollOutbox()
+	for {
+		select {
+		case <-ticker.C:
+			s.pollOutbox()
+		case <-s.stopCh:
+			return
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) runFullRebuildWorker(interval time.Duration) {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			if err := s.triggerFullRebuild("interval"); err != nil {
+				log.Printf("[Scheduler] full rebuild failed: %v", err)
+			}
+		case <-s.stopCh:
+			return
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) pollOutbox() {
+	if s.outboxRepo == nil || s.cache == nil {
+		return
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	watermark, err := s.cache.GetOutboxWatermark(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] outbox watermark read failed: %v", err)
+		return
+	}
+
+	events, err := s.outboxRepo.ListAfter(ctx, watermark, 200)
+	if err != nil {
+		log.Printf("[Scheduler] outbox poll failed: %v", err)
+		return
+	}
+	if len(events) == 0 {
+		return
+	}
+
+	watermarkForCheck := watermark
+	for _, event := range events {
+		eventCtx, cancel := context.WithTimeout(context.Background(), outboxEventTimeout)
+		err := s.handleOutboxEvent(eventCtx, event)
+		cancel()
+		if err != nil {
+			log.Printf("[Scheduler] outbox handle failed: id=%d type=%s err=%v", event.ID, event.EventType, err)
+			return
+		}
+	}
+
+	lastID := events[len(events)-1].ID
+	if err := s.cache.SetOutboxWatermark(ctx, lastID); err != nil {
+		log.Printf("[Scheduler] outbox watermark write failed: %v", err)
+	} else {
+		watermarkForCheck = lastID
+	}
+
+	s.checkOutboxLag(ctx, events[0], watermarkForCheck)
+}
+
+func (s *SchedulerSnapshotService) handleOutboxEvent(ctx context.Context, event SchedulerOutboxEvent) error {
+	switch event.EventType {
+	case SchedulerOutboxEventAccountLastUsed:
+		return s.handleLastUsedEvent(ctx, event.Payload)
+	case SchedulerOutboxEventAccountBulkChanged:
+		return s.handleBulkAccountEvent(ctx, event.Payload)
+	case SchedulerOutboxEventAccountGroupsChanged:
+		return s.handleAccountEvent(ctx, event.AccountID, event.Payload)
+	case SchedulerOutboxEventAccountChanged:
+		return s.handleAccountEvent(ctx, event.AccountID, event.Payload)
+	case SchedulerOutboxEventGroupChanged:
+		return s.handleGroupEvent(ctx, event.GroupID)
+	case SchedulerOutboxEventFullRebuild:
+		return s.triggerFullRebuild("outbox")
+	default:
+		return nil
+	}
+}
+
+func (s *SchedulerSnapshotService) handleLastUsedEvent(ctx context.Context, payload map[string]any) error {
+	if s.cache == nil || payload == nil {
+		return nil
+	}
+	raw, ok := payload["last_used"].(map[string]any)
+	if !ok || len(raw) == 0 {
+		return nil
+	}
+	updates := make(map[int64]time.Time, len(raw))
+	for key, value := range raw {
+		id, err := strconv.ParseInt(key, 10, 64)
+		if err != nil || id <= 0 {
+			continue
+		}
+		sec, ok := toInt64(value)
+		if !ok || sec <= 0 {
+			continue
+		}
+		updates[id] = time.Unix(sec, 0)
+	}
+	if len(updates) == 0 {
+		return nil
+	}
+	return s.cache.UpdateLastUsed(ctx, updates)
+}
+
+func (s *SchedulerSnapshotService) handleBulkAccountEvent(ctx context.Context, payload map[string]any) error {
+	if payload == nil {
+		return nil
+	}
+	ids := parseInt64Slice(payload["account_ids"])
+	for _, id := range ids {
+		if err := s.handleAccountEvent(ctx, &id, payload); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *SchedulerSnapshotService) handleAccountEvent(ctx context.Context, accountID *int64, payload map[string]any) error {
+	if accountID == nil || *accountID <= 0 {
+		return nil
+	}
+	if s.accountRepo == nil {
+		return nil
+	}
+
+	var groupIDs []int64
+	if payload != nil {
+		groupIDs = parseInt64Slice(payload["group_ids"])
+	}
+
+	account, err := s.accountRepo.GetByID(ctx, *accountID)
+	if err != nil {
+		if errors.Is(err, ErrAccountNotFound) {
+			if s.cache != nil {
+				if err := s.cache.DeleteAccount(ctx, *accountID); err != nil {
+					return err
+				}
+			}
+			return s.rebuildByGroupIDs(ctx, groupIDs, "account_miss")
+		}
+		return err
+	}
+	if s.cache != nil {
+		if err := s.cache.SetAccount(ctx, account); err != nil {
+			return err
+		}
+	}
+	if len(groupIDs) == 0 {
+		groupIDs = account.GroupIDs
+	}
+	return s.rebuildByAccount(ctx, account, groupIDs, "account_change")
+}
+
+func (s *SchedulerSnapshotService) handleGroupEvent(ctx context.Context, groupID *int64) error {
+	if groupID == nil || *groupID <= 0 {
+		return nil
+	}
+	groupIDs := []int64{*groupID}
+	return s.rebuildByGroupIDs(ctx, groupIDs, "group_change")
+}
+
+func (s *SchedulerSnapshotService) rebuildByAccount(ctx context.Context, account *Account, groupIDs []int64, reason string) error {
+	if account == nil {
+		return nil
+	}
+	groupIDs = s.normalizeGroupIDs(groupIDs)
+	if len(groupIDs) == 0 {
+		return nil
+	}
+
+	var firstErr error
+	if err := s.rebuildBucketsForPlatform(ctx, account.Platform, groupIDs, reason); err != nil && firstErr == nil {
+		firstErr = err
+	}
+	if account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled() {
+		if err := s.rebuildBucketsForPlatform(ctx, PlatformAnthropic, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if err := s.rebuildBucketsForPlatform(ctx, PlatformGemini, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildByGroupIDs(ctx context.Context, groupIDs []int64, reason string) error {
+	groupIDs = s.normalizeGroupIDs(groupIDs)
+	if len(groupIDs) == 0 {
+		return nil
+	}
+	platforms := []string{PlatformAnthropic, PlatformGemini, PlatformOpenAI, PlatformAntigravity}
+	var firstErr error
+	for _, platform := range platforms {
+		if err := s.rebuildBucketsForPlatform(ctx, platform, groupIDs, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBucketsForPlatform(ctx context.Context, platform string, groupIDs []int64, reason string) error {
+	if platform == "" {
+		return nil
+	}
+	var firstErr error
+	for _, gid := range groupIDs {
+		if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeSingle}, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeForced}, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+		if platform == PlatformAnthropic || platform == PlatformGemini {
+			if err := s.rebuildBucket(ctx, SchedulerBucket{GroupID: gid, Platform: platform, Mode: SchedulerModeMixed}, reason); err != nil && firstErr == nil {
+				firstErr = err
+			}
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBuckets(ctx context.Context, buckets []SchedulerBucket, reason string) error {
+	var firstErr error
+	for _, bucket := range buckets {
+		if err := s.rebuildBucket(ctx, bucket, reason); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func (s *SchedulerSnapshotService) rebuildBucket(ctx context.Context, bucket SchedulerBucket, reason string) error {
+	if s.cache == nil {
+		return ErrSchedulerCacheNotReady
+	}
+	ok, err := s.cache.TryLockBucket(ctx, bucket, 30*time.Second)
+	if err != nil {
+		return err
+	}
+	if !ok {
+		return nil
+	}
+
+	rebuildCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
+	defer cancel()
+
+	accounts, err := s.loadAccountsFromDB(rebuildCtx, bucket, bucket.Mode == SchedulerModeMixed)
+	if err != nil {
+		log.Printf("[Scheduler] rebuild failed: bucket=%s reason=%s err=%v", bucket.String(), reason, err)
+		return err
+	}
+	if err := s.cache.SetSnapshot(rebuildCtx, bucket, accounts); err != nil {
+		log.Printf("[Scheduler] rebuild cache failed: bucket=%s reason=%s err=%v", bucket.String(), reason, err)
+		return err
+	}
+	log.Printf("[Scheduler] rebuild ok: bucket=%s reason=%s size=%d", bucket.String(), reason, len(accounts))
+	return nil
+}
+
+func (s *SchedulerSnapshotService) triggerFullRebuild(reason string) error {
+	if s.cache == nil {
+		return ErrSchedulerCacheNotReady
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	buckets, err := s.cache.ListBuckets(ctx)
+	if err != nil {
+		log.Printf("[Scheduler] list buckets failed: %v", err)
+		return err
+	}
+	if len(buckets) == 0 {
+		buckets, err = s.defaultBuckets(ctx)
+		if err != nil {
+			log.Printf("[Scheduler] default buckets failed: %v", err)
+			return err
+		}
+	}
+	return s.rebuildBuckets(ctx, buckets, reason)
+}
+
+func (s *SchedulerSnapshotService) checkOutboxLag(ctx context.Context, oldest SchedulerOutboxEvent, watermark int64) {
+	if oldest.CreatedAt.IsZero() || s.cfg == nil {
+		return
+	}
+
+	lag := time.Since(oldest.CreatedAt)
+	if lagSeconds := int(lag.Seconds()); lagSeconds >= s.cfg.Gateway.Scheduling.OutboxLagWarnSeconds && s.cfg.Gateway.Scheduling.OutboxLagWarnSeconds > 0 {
+		log.Printf("[Scheduler] outbox lag warning: %ds", lagSeconds)
+	}
+
+	if s.cfg.Gateway.Scheduling.OutboxLagRebuildSeconds > 0 && int(lag.Seconds()) >= s.cfg.Gateway.Scheduling.OutboxLagRebuildSeconds {
+		s.lagMu.Lock()
+		s.lagFailures++
+		failures := s.lagFailures
+		s.lagMu.Unlock()
+
+		if failures >= s.cfg.Gateway.Scheduling.OutboxLagRebuildFailures {
+			log.Printf("[Scheduler] outbox lag rebuild triggered: lag=%s failures=%d", lag, failures)
+			s.lagMu.Lock()
+			s.lagFailures = 0
+			s.lagMu.Unlock()
+			if err := s.triggerFullRebuild("outbox_lag"); err != nil {
+				log.Printf("[Scheduler] outbox lag rebuild failed: %v", err)
+			}
+		}
+	} else {
+		s.lagMu.Lock()
+		s.lagFailures = 0
+		s.lagMu.Unlock()
+	}
+
+	threshold := s.cfg.Gateway.Scheduling.OutboxBacklogRebuildRows
+	if threshold <= 0 || s.outboxRepo == nil {
+		return
+	}
+	maxID, err := s.outboxRepo.MaxID(ctx)
+	if err != nil {
+		return
+	}
+	if maxID-watermark >= int64(threshold) {
+		log.Printf("[Scheduler] outbox backlog rebuild triggered: backlog=%d", maxID-watermark)
+		if err := s.triggerFullRebuild("outbox_backlog"); err != nil {
+			log.Printf("[Scheduler] outbox backlog rebuild failed: %v", err)
+		}
+	}
+}
+
+func (s *SchedulerSnapshotService) loadAccountsFromDB(ctx context.Context, bucket SchedulerBucket, useMixed bool) ([]Account, error) {
+	if s.accountRepo == nil {
+		return nil, ErrSchedulerCacheNotReady
+	}
+	groupID := bucket.GroupID
+	if s.isRunModeSimple() {
+		groupID = 0
+	}
+
+	if useMixed {
+		platforms := []string{bucket.Platform, PlatformAntigravity}
+		var accounts []Account
+		var err error
+		if groupID > 0 {
+			accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, groupID, platforms)
+		} else {
+			accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, platforms)
+		}
+		if err != nil {
+			return nil, err
+		}
+		filtered := make([]Account, 0, len(accounts))
+		for _, acc := range accounts {
+			if acc.Platform == PlatformAntigravity && !acc.IsMixedSchedulingEnabled() {
+				continue
+			}
+			filtered = append(filtered, acc)
+		}
+		return filtered, nil
+	}
+
+	if groupID > 0 {
+		return s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, groupID, bucket.Platform)
+	}
+	return s.accountRepo.ListSchedulableByPlatform(ctx, bucket.Platform)
+}
+
+func (s *SchedulerSnapshotService) bucketFor(groupID *int64, platform string, mode string) SchedulerBucket {
+	return SchedulerBucket{
+		GroupID:  s.normalizeGroupID(groupID),
+		Platform: platform,
+		Mode:     mode,
+	}
+}
+
+func (s *SchedulerSnapshotService) normalizeGroupID(groupID *int64) int64 {
+	if s.isRunModeSimple() {
+		return 0
+	}
+	if groupID == nil || *groupID <= 0 {
+		return 0
+	}
+	return *groupID
+}
+
+func (s *SchedulerSnapshotService) normalizeGroupIDs(groupIDs []int64) []int64 {
+	if s.isRunModeSimple() {
+		return []int64{0}
+	}
+	if len(groupIDs) == 0 {
+		return []int64{0}
+	}
+	seen := make(map[int64]struct{}, len(groupIDs))
+	out := make([]int64, 0, len(groupIDs))
+	for _, id := range groupIDs {
+		if id <= 0 {
+			continue
+		}
+		if _, ok := seen[id]; ok {
+			continue
+		}
+		seen[id] = struct{}{}
+		out = append(out, id)
+	}
+	if len(out) == 0 {
+		return []int64{0}
+	}
+	return out
+}
+
+func (s *SchedulerSnapshotService) resolveMode(platform string, hasForcePlatform bool) string {
+	if hasForcePlatform {
+		return SchedulerModeForced
+	}
+	if platform == PlatformAnthropic || platform == PlatformGemini {
+		return SchedulerModeMixed
+	}
+	return SchedulerModeSingle
+}
+
+func (s *SchedulerSnapshotService) guardFallback(ctx context.Context) error {
+	if s.cfg == nil || s.cfg.Gateway.Scheduling.DbFallbackEnabled {
+		if s.fallbackLimit == nil || s.fallbackLimit.Allow() {
+			return nil
+		}
+		return ErrSchedulerFallbackLimited
+	}
+	return ErrSchedulerCacheNotReady
+}
+
+func (s *SchedulerSnapshotService) withFallbackTimeout(ctx context.Context) (context.Context, context.CancelFunc) {
+	if s.cfg == nil || s.cfg.Gateway.Scheduling.DbFallbackTimeoutSeconds <= 0 {
+		return context.WithCancel(ctx)
+	}
+	timeout := time.Duration(s.cfg.Gateway.Scheduling.DbFallbackTimeoutSeconds) * time.Second
+	if deadline, ok := ctx.Deadline(); ok {
+		remaining := time.Until(deadline)
+		if remaining <= 0 {
+			return context.WithCancel(ctx)
+		}
+		if remaining < timeout {
+			timeout = remaining
+		}
+	}
+	return context.WithTimeout(ctx, timeout)
+}
+
+func (s *SchedulerSnapshotService) isRunModeSimple() bool {
+	return s.cfg != nil && s.cfg.RunMode == config.RunModeSimple
+}
+
+func (s *SchedulerSnapshotService) outboxPollInterval() time.Duration {
+	if s.cfg == nil {
+		return time.Second
+	}
+	sec := s.cfg.Gateway.Scheduling.OutboxPollIntervalSeconds
+	if sec <= 0 {
+		return time.Second
+	}
+	return time.Duration(sec) * time.Second
+}
+
+func (s *SchedulerSnapshotService) fullRebuildInterval() time.Duration {
+	if s.cfg == nil {
+		return 0
+	}
+	sec := s.cfg.Gateway.Scheduling.FullRebuildIntervalSeconds
+	if sec <= 0 {
+		return 0
+	}
+	return time.Duration(sec) * time.Second
+}
+
+func (s *SchedulerSnapshotService) defaultBuckets(ctx context.Context) ([]SchedulerBucket, error) {
+	buckets := make([]SchedulerBucket, 0)
+	platforms := []string{PlatformAnthropic, PlatformGemini, PlatformOpenAI, PlatformAntigravity}
+	for _, platform := range platforms {
+		buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeSingle})
+		buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeForced})
+		if platform == PlatformAnthropic || platform == PlatformGemini {
+			buckets = append(buckets, SchedulerBucket{GroupID: 0, Platform: platform, Mode: SchedulerModeMixed})
+		}
+	}
+
+	if s.isRunModeSimple() || s.groupRepo == nil {
+		return dedupeBuckets(buckets), nil
+	}
+
+	groups, err := s.groupRepo.ListActive(ctx)
+	if err != nil {
+		return dedupeBuckets(buckets), nil
+	}
+	for _, group := range groups {
+		if group.Platform == "" {
+			continue
+		}
+		buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeSingle})
+		buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeForced})
+		if group.Platform == PlatformAnthropic || group.Platform == PlatformGemini {
+			buckets = append(buckets, SchedulerBucket{GroupID: group.ID, Platform: group.Platform, Mode: SchedulerModeMixed})
+		}
+	}
+	return dedupeBuckets(buckets), nil
+}
+
+func dedupeBuckets(in []SchedulerBucket) []SchedulerBucket {
+	seen := make(map[string]struct{}, len(in))
+	out := make([]SchedulerBucket, 0, len(in))
+	for _, bucket := range in {
+		key := bucket.String()
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		out = append(out, bucket)
+	}
+	return out
+}
+
+func derefAccounts(accounts []*Account) []Account {
+	if len(accounts) == 0 {
+		return []Account{}
+	}
+	out := make([]Account, 0, len(accounts))
+	for _, account := range accounts {
+		if account == nil {
+			continue
+		}
+		out = append(out, *account)
+	}
+	return out
+}
+
+func parseInt64Slice(value any) []int64 {
+	raw, ok := value.([]any)
+	if !ok {
+		return nil
+	}
+	out := make([]int64, 0, len(raw))
+	for _, item := range raw {
+		if v, ok := toInt64(item); ok && v > 0 {
+			out = append(out, v)
+		}
+	}
+	return out
+}
+
+func toInt64(value any) (int64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return int64(v), true
+	case int64:
+		return v, true
+	case int:
+		return int64(v), true
+	case json.Number:
+		parsed, err := strconv.ParseInt(v.String(), 10, 64)
+		return parsed, err == nil
+	default:
+		return 0, false
+	}
+}
+
+type fallbackLimiter struct {
+	maxQPS int
+	mu     sync.Mutex
+	window time.Time
+	count  int
+}
+
+func newFallbackLimiter(maxQPS int) *fallbackLimiter {
+	if maxQPS <= 0 {
+		return nil
+	}
+	return &fallbackLimiter{
+		maxQPS: maxQPS,
+		window: time.Now(),
+	}
+}
+
+func (l *fallbackLimiter) Allow() bool {
+	if l == nil || l.maxQPS <= 0 {
+		return true
+	}
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	now := time.Now()
+	if now.Sub(l.window) >= time.Second {
+		l.window = now
+		l.count = 0
+	}
+	if l.count >= l.maxQPS {
+		return false
+	}
+	l.count++
+	return true
+}
--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/hex"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"strconv"
@@ -675,3 +676,84 @@ func (s *SettingService) GetLinuxDoConnectOAuthConfig(ctx context.Context) (conf

 	return effective, nil
 }
+
+// GetStreamTimeoutSettings 获取流超时处理配置
+func (s *SettingService) GetStreamTimeoutSettings(ctx context.Context) (*StreamTimeoutSettings, error) {
+	value, err := s.settingRepo.GetValue(ctx, SettingKeyStreamTimeoutSettings)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			return DefaultStreamTimeoutSettings(), nil
+		}
+		return nil, fmt.Errorf("get stream timeout settings: %w", err)
+	}
+	if value == "" {
+		return DefaultStreamTimeoutSettings(), nil
+	}
+
+	var settings StreamTimeoutSettings
+	if err := json.Unmarshal([]byte(value), &settings); err != nil {
+		return DefaultStreamTimeoutSettings(), nil
+	}
+
+	// 验证并修正配置值
+	if settings.TempUnschedMinutes < 1 {
+		settings.TempUnschedMinutes = 1
+	}
+	if settings.TempUnschedMinutes > 60 {
+		settings.TempUnschedMinutes = 60
+	}
+	if settings.ThresholdCount < 1 {
+		settings.ThresholdCount = 1
+	}
+	if settings.ThresholdCount > 10 {
+		settings.ThresholdCount = 10
+	}
+	if settings.ThresholdWindowMinutes < 1 {
+		settings.ThresholdWindowMinutes = 1
+	}
+	if settings.ThresholdWindowMinutes > 60 {
+		settings.ThresholdWindowMinutes = 60
+	}
+
+	// 验证 action
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched, StreamTimeoutActionError, StreamTimeoutActionNone:
+		// valid
+	default:
+		settings.Action = StreamTimeoutActionTempUnsched
+	}
+
+	return &settings, nil
+}
+
+// SetStreamTimeoutSettings 设置流超时处理配置
+func (s *SettingService) SetStreamTimeoutSettings(ctx context.Context, settings *StreamTimeoutSettings) error {
+	if settings == nil {
+		return fmt.Errorf("settings cannot be nil")
+	}
+
+	// 验证配置值
+	if settings.TempUnschedMinutes < 1 || settings.TempUnschedMinutes > 60 {
+		return fmt.Errorf("temp_unsched_minutes must be between 1-60")
+	}
+	if settings.ThresholdCount < 1 || settings.ThresholdCount > 10 {
+		return fmt.Errorf("threshold_count must be between 1-10")
+	}
+	if settings.ThresholdWindowMinutes < 1 || settings.ThresholdWindowMinutes > 60 {
+		return fmt.Errorf("threshold_window_minutes must be between 1-60")
+	}
+
+	switch settings.Action {
+	case StreamTimeoutActionTempUnsched, StreamTimeoutActionError, StreamTimeoutActionNone:
+		// valid
+	default:
+		return fmt.Errorf("invalid action: %s", settings.Action)
+	}
+
+	data, err := json.Marshal(settings)
+	if err != nil {
+		return fmt.Errorf("marshal stream timeout settings: %w", err)
+	}
+
+	return s.settingRepo.Set(ctx, SettingKeyStreamTimeoutSettings, string(data))
+}
--- a/backend/internal/service/settings_view.go
+++ b/backend/internal/service/settings_view.go
@@ -69,3 +69,35 @@ type PublicSettings struct {
 	LinuxDoOAuthEnabled bool
 	Version             string
 }
+
+// StreamTimeoutSettings 流超时处理配置（仅控制超时后的处理方式，超时判定由网关配置控制）
+type StreamTimeoutSettings struct {
+	// Enabled 是否启用流超时处理
+	Enabled bool `json:"enabled"`
+	// Action 超时后的处理方式: "temp_unsched" | "error" | "none"
+	Action string `json:"action"`
+	// TempUnschedMinutes 临时不可调度持续时间（分钟）
+	TempUnschedMinutes int `json:"temp_unsched_minutes"`
+	// ThresholdCount 触发阈值次数（累计多少次超时才触发）
+	ThresholdCount int `json:"threshold_count"`
+	// ThresholdWindowMinutes 阈值窗口时间（分钟）
+	ThresholdWindowMinutes int `json:"threshold_window_minutes"`
+}
+
+// StreamTimeoutAction 流超时处理方式常量
+const (
+	StreamTimeoutActionTempUnsched = "temp_unsched" // 临时不可调度
+	StreamTimeoutActionError       = "error"        // 标记为错误状态
+	StreamTimeoutActionNone        = "none"         // 不处理
+)
+
+// DefaultStreamTimeoutSettings 返回默认的流超时配置
+func DefaultStreamTimeoutSettings() *StreamTimeoutSettings {
+	return &StreamTimeoutSettings{
+		Enabled:                false,
+		Action:                 StreamTimeoutActionTempUnsched,
+		TempUnschedMinutes:     5,
+		ThresholdCount:         3,
+		ThresholdWindowMinutes: 10,
+	}
+}
--- a/backend/internal/service/temp_unsched.go
+++ b/backend/internal/service/temp_unsched.go
@@ -2,6 +2,7 @@ package service

 import (
 	"context"
+	"time"
 )

 // TempUnschedState 临时不可调度状态
@@ -20,3 +21,16 @@ type TempUnschedCache interface {
 	GetTempUnsched(ctx context.Context, accountID int64) (*TempUnschedState, error)
 	DeleteTempUnsched(ctx context.Context, accountID int64) error
 }
+
+// TimeoutCounterCache 超时计数器缓存接口
+type TimeoutCounterCache interface {
+	// IncrementTimeoutCount 增加账户的超时计数，返回当前计数值
+	// windowMinutes 是计数窗口时间（分钟），超过此时间计数器会自动重置
+	IncrementTimeoutCount(ctx context.Context, accountID int64, windowMinutes int) (int64, error)
+	// GetTimeoutCount 获取账户当前的超时计数
+	GetTimeoutCount(ctx context.Context, accountID int64) (int64, error)
+	// ResetTimeoutCount 重置账户的超时计数
+	ResetTimeoutCount(ctx context.Context, accountID int64) error
+	// GetTimeoutCountTTL 获取计数器剩余过期时间
+	GetTimeoutCountTTL(ctx context.Context, accountID int64) (time.Duration, error)
+}
--- a/backend/internal/service/wire.go
+++ b/backend/internal/service/wire.go
@@ -86,6 +86,35 @@ func ProvideConcurrencyService(cache ConcurrencyCache, accountRepo AccountReposi
 	return svc
 }

+// ProvideSchedulerSnapshotService creates and starts SchedulerSnapshotService.
+func ProvideSchedulerSnapshotService(
+	cache SchedulerCache,
+	outboxRepo SchedulerOutboxRepository,
+	accountRepo AccountRepository,
+	groupRepo GroupRepository,
+	cfg *config.Config,
+) *SchedulerSnapshotService {
+	svc := NewSchedulerSnapshotService(cache, outboxRepo, accountRepo, groupRepo, cfg)
+	svc.Start()
+	return svc
+}
+
+// ProvideRateLimitService creates RateLimitService with optional dependencies.
+func ProvideRateLimitService(
+	accountRepo AccountRepository,
+	usageRepo UsageLogRepository,
+	cfg *config.Config,
+	geminiQuotaService *GeminiQuotaService,
+	tempUnschedCache TempUnschedCache,
+	timeoutCounterCache TimeoutCounterCache,
+	settingService *SettingService,
+) *RateLimitService {
+	svc := NewRateLimitService(accountRepo, usageRepo, cfg, geminiQuotaService, tempUnschedCache)
+	svc.SetTimeoutCounterCache(timeoutCounterCache)
+	svc.SetSettingService(settingService)
+	return svc
+}
+
 // ProvideOpsMetricsCollector creates and starts OpsMetricsCollector.
 func ProvideOpsMetricsCollector(
 	opsRepo OpsRepository,
@@ -186,7 +215,7 @@ var ProviderSet = wire.NewSet(
 	NewGeminiMessagesCompatService,
 	NewAntigravityTokenProvider,
 	NewAntigravityGatewayService,
-	NewRateLimitService,
+	ProvideRateLimitService,
 	NewAccountUsageService,
 	NewAccountTestService,
 	NewSettingService,
@@ -201,6 +230,7 @@ var ProviderSet = wire.NewSet(
 	NewTurnstileService,
 	NewSubscriptionService,
 	ProvideConcurrencyService,
+	ProvideSchedulerSnapshotService,
 	NewIdentityService,
 	NewCRSSyncService,
 	ProvideUpdateService,
--- a/backend/migrations/036_ops_error_logs_add_is_count_tokens.sql
+++ b/backend/migrations/036_ops_error_logs_add_is_count_tokens.sql
@@ -0,0 +1,16 @@
+-- Migration: 添加 is_count_tokens 字段到 ops_error_logs 表
+-- Purpose: 标记 count_tokens 请求的错误，以便在统计和告警中根据配置动态过滤
+-- Author: System
+-- Date: 2026-01-12
+
+-- Add is_count_tokens column to ops_error_logs table
+ALTER TABLE ops_error_logs
+ADD COLUMN is_count_tokens BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Add comment
+COMMENT ON COLUMN ops_error_logs.is_count_tokens IS '是否为 count_tokens 请求的错误（用于统计过滤）';
+
+-- Create index for filtering (optional, improves query performance)
+CREATE INDEX IF NOT EXISTS idx_ops_error_logs_is_count_tokens
+ON ops_error_logs(is_count_tokens)
+WHERE is_count_tokens = TRUE;
--- a/backend/migrations/036_scheduler_outbox.sql
+++ b/backend/migrations/036_scheduler_outbox.sql
@@ -0,0 +1,10 @@
+CREATE TABLE IF NOT EXISTS scheduler_outbox (
+    id BIGSERIAL PRIMARY KEY,
+    event_type TEXT NOT NULL,
+    account_id BIGINT NULL,
+    group_id BIGINT NULL,
+    payload JSONB NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_scheduler_outbox_created_at ON scheduler_outbox (created_at);
--- a/deploy/.env.example
+++ b/deploy/.env.example
@@ -69,6 +69,41 @@ JWT_EXPIRE_HOUR=24
 # Leave unset to use default ./config.yaml
 #CONFIG_FILE=./config.yaml

+# -----------------------------------------------------------------------------
+# Gateway Scheduling (Optional)
+# 调度缓存与受控回源配置（缓存就绪且命中时不读 DB）
+# -----------------------------------------------------------------------------
+# 粘性会话最大排队长度
+GATEWAY_SCHEDULING_STICKY_SESSION_MAX_WAITING=3
+# 粘性会话等待超时（时间段，例如 45s）
+GATEWAY_SCHEDULING_STICKY_SESSION_WAIT_TIMEOUT=120s
+# 兜底排队等待超时（时间段，例如 30s）
+GATEWAY_SCHEDULING_FALLBACK_WAIT_TIMEOUT=30s
+# 兜底最大排队长度
+GATEWAY_SCHEDULING_FALLBACK_MAX_WAITING=100
+# 启用调度批量负载计算
+GATEWAY_SCHEDULING_LOAD_BATCH_ENABLED=true
+# 并发槽位清理周期（时间段，例如 30s）
+GATEWAY_SCHEDULING_SLOT_CLEANUP_INTERVAL=30s
+# 是否允许受控回源到 DB（默认 true，保持现有行为）
+GATEWAY_SCHEDULING_DB_FALLBACK_ENABLED=true
+# 受控回源超时（秒），0 表示不额外收紧超时
+GATEWAY_SCHEDULING_DB_FALLBACK_TIMEOUT_SECONDS=0
+# 受控回源限流（实例级 QPS），0 表示不限制
+GATEWAY_SCHEDULING_DB_FALLBACK_MAX_QPS=0
+# outbox 轮询周期（秒）
+GATEWAY_SCHEDULING_OUTBOX_POLL_INTERVAL_SECONDS=1
+# outbox 滞后告警阈值（秒）
+GATEWAY_SCHEDULING_OUTBOX_LAG_WARN_SECONDS=5
+# outbox 触发强制重建阈值（秒）
+GATEWAY_SCHEDULING_OUTBOX_LAG_REBUILD_SECONDS=10
+# outbox 连续滞后触发次数
+GATEWAY_SCHEDULING_OUTBOX_LAG_REBUILD_FAILURES=3
+# outbox 积压触发重建阈值（行数）
+GATEWAY_SCHEDULING_OUTBOX_BACKLOG_REBUILD_ROWS=10000
+# 全量重建周期（秒）
+GATEWAY_SCHEDULING_FULL_REBUILD_INTERVAL_SECONDS=300
+
 # -----------------------------------------------------------------------------
 # Dashboard Aggregation (Optional)
 # -----------------------------------------------------------------------------
--- a/deploy/config.example.yaml
+++ b/deploy/config.example.yaml
@@ -169,6 +169,45 @@ gateway:
  # Allow failover on selected 400 errors (default: off)
  # 允许在特定 400 错误时进行故障转移（默认：关闭）
  failover_on_400: false
+  # Scheduling configuration
+  # 调度配置
+  scheduling:
+    # Sticky session max waiting queue size
+    # 粘性会话最大排队长度
+    sticky_session_max_waiting: 3
+    # Sticky session wait timeout (duration)
+    # 粘性会话等待超时（时间段）
+    sticky_session_wait_timeout: 120s
+    # Fallback wait timeout (duration)
+    # 兜底排队等待超时（时间段）
+    fallback_wait_timeout: 30s
+    # Fallback max waiting queue size
+    # 兜底最大排队长度
+    fallback_max_waiting: 100
+    # Enable batch load calculation for scheduling
+    # 启用调度批量负载计算
+    load_batch_enabled: true
+    # Slot cleanup interval (duration)
+    # 并发槽位清理周期（时间段）
+    slot_cleanup_interval: 30s
+    # 是否允许受控回源到 DB（默认 true，保持现有行为）
+    db_fallback_enabled: true
+    # 受控回源超时（秒），0 表示不额外收紧超时
+    db_fallback_timeout_seconds: 0
+    # 受控回源限流（实例级 QPS），0 表示不限制
+    db_fallback_max_qps: 0
+    # outbox 轮询周期（秒）
+    outbox_poll_interval_seconds: 1
+    # outbox 滞后告警阈值（秒）
+    outbox_lag_warn_seconds: 5
+    # outbox 触发强制重建阈值（秒）
+    outbox_lag_rebuild_seconds: 10
+    # outbox 连续滞后触发次数
+    outbox_lag_rebuild_failures: 3
+    # outbox 积压触发重建阈值（行数）
+    outbox_backlog_rebuild_rows: 10000
+    # 全量重建周期（秒），0 表示禁用
+    full_rebuild_interval_seconds: 300

 # =============================================================================
 # API Key Auth Cache Configuration
--- a/frontend/.eslintignore
+++ b/frontend/.eslintignore
@@ -0,0 +1,14 @@
+# 忽略编译后的文件
+vite.config.js
+vite.config.d.ts
+
+# 忽略依赖
+node_modules/
+
+# 忽略构建输出
+dist/
+../backend/internal/web/dist/
+
+# 忽略缓存
+.cache/
+.vite/
--- a/frontend/src/api/admin/ops.ts
+++ b/frontend/src/api/admin/ops.ts
@@ -362,6 +362,45 @@ export async function getAccountAvailabilityStats(platform?: string, groupId?: n
  return data
 }

+export interface OpsRateSummary {
+  current: number
+  peak: number
+  avg: number
+}
+
+export interface OpsRealtimeTrafficSummary {
+  window: string
+  start_time: string
+  end_time: string
+  platform: string
+  group_id?: number | null
+  qps: OpsRateSummary
+  tps: OpsRateSummary
+}
+
+export interface OpsRealtimeTrafficSummaryResponse {
+  enabled: boolean
+  summary: OpsRealtimeTrafficSummary | null
+  timestamp?: string
+}
+
+export async function getRealtimeTrafficSummary(
+  window: string,
+  platform?: string,
+  groupId?: number | null
+): Promise<OpsRealtimeTrafficSummaryResponse> {
+  const params: Record<string, any> = { window }
+  if (platform) {
+    params.platform = platform
+  }
+  if (typeof groupId === 'number' && groupId > 0) {
+    params.group_id = groupId
+  }
+
+  const { data } = await apiClient.get<OpsRealtimeTrafficSummaryResponse>('/admin/ops/realtime-traffic', { params })
+  return data
+}
+
 /**
 * Subscribe to realtime QPS updates via WebSocket.
 *
@@ -661,6 +700,14 @@ export interface EmailNotificationConfig {
  }
 }

+export interface OpsMetricThresholds {
+  sla_percent_min?: number | null                // SLA低于此值变红
+  latency_p99_ms_max?: number | null             // 延迟P99高于此值变红
+  ttft_p99_ms_max?: number | null                // TTFT P99高于此值变红
+  request_error_rate_percent_max?: number | null // 请求错误率高于此值变红
+  upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红
+}
+
 export interface OpsDistributedLockSettings {
  enabled: boolean
  key: string
@@ -681,11 +728,15 @@ export interface OpsAlertRuntimeSettings {
      reason: string
    }>
  }
+  thresholds: OpsMetricThresholds // 指标阈值配置
 }

 export interface OpsAdvancedSettings {
  data_retention: OpsDataRetentionSettings
  aggregation: OpsAggregationSettings
+  ignore_count_tokens_errors: boolean
+  auto_refresh_enabled: boolean
+  auto_refresh_interval_seconds: number
 }

 export interface OpsDataRetentionSettings {
@@ -929,6 +980,17 @@ export async function updateAdvancedSettings(config: OpsAdvancedSettings): Promi
  return data
 }

+// ==================== Metric Thresholds ====================
+
+async function getMetricThresholds(): Promise<OpsMetricThresholds> {
+  const { data } = await apiClient.get<OpsMetricThresholds>('/admin/ops/settings/metric-thresholds')
+  return data
+}
+
+async function updateMetricThresholds(thresholds: OpsMetricThresholds): Promise<void> {
+  await apiClient.put('/admin/ops/settings/metric-thresholds', thresholds)
+}
+
 export const opsAPI = {
  getDashboardOverview,
  getThroughputTrend,
@@ -937,6 +999,7 @@ export const opsAPI = {
  getErrorDistribution,
  getConcurrencyStats,
  getAccountAvailabilityStats,
+  getRealtimeTrafficSummary,
  subscribeQPS,
  listErrorLogs,
  getErrorLogDetail,
@@ -952,7 +1015,9 @@ export const opsAPI = {
  getAlertRuntimeSettings,
  updateAlertRuntimeSettings,
  getAdvancedSettings,
-  updateAdvancedSettings
+  updateAdvancedSettings,
+  getMetricThresholds,
+  updateMetricThresholds
 }

 export default opsAPI
--- a/frontend/src/api/admin/settings.ts
+++ b/frontend/src/api/admin/settings.ts
@@ -201,6 +201,41 @@ export async function deleteAdminApiKey(): Promise<{ message: string }> {
  return data
 }

+/**
+ * Stream timeout settings interface
+ */
+export interface StreamTimeoutSettings {
+  enabled: boolean
+  action: 'temp_unsched' | 'error' | 'none'
+  temp_unsched_minutes: number
+  threshold_count: number
+  threshold_window_minutes: number
+}
+
+/**
+ * Get stream timeout settings
+ * @returns Stream timeout settings
+ */
+export async function getStreamTimeoutSettings(): Promise<StreamTimeoutSettings> {
+  const { data } = await apiClient.get<StreamTimeoutSettings>('/admin/settings/stream-timeout')
+  return data
+}
+
+/**
+ * Update stream timeout settings
+ * @param settings - Stream timeout settings to update
+ * @returns Updated settings
+ */
+export async function updateStreamTimeoutSettings(
+  settings: StreamTimeoutSettings
+): Promise<StreamTimeoutSettings> {
+  const { data } = await apiClient.put<StreamTimeoutSettings>(
+    '/admin/settings/stream-timeout',
+    settings
+  )
+  return data
+}
+
 export const settingsAPI = {
  getSettings,
  updateSettings,
@@ -208,7 +243,9 @@ export const settingsAPI = {
  sendTestEmail,
  getAdminApiKey,
  regenerateAdminApiKey,
-  deleteAdminApiKey
+  deleteAdminApiKey,
+  getStreamTimeoutSettings,
+  updateStreamTimeoutSettings
 }

 export default settingsAPI
--- a/frontend/src/components/account/AccountGroupsCell.vue
+++ b/frontend/src/components/account/AccountGroupsCell.vue
@@ -0,0 +1,158 @@
+<template>
+  <div v-if="groups && groups.length > 0" class="relative max-w-56">
+    <!-- 分组容器：固定最大宽度，最多显示2行 -->
+    <div class="flex flex-wrap gap-1 max-h-14 overflow-hidden">
+      <GroupBadge
+        v-for="group in displayGroups"
+        :key="group.id"
+        :name="group.name"
+        :platform="group.platform"
+        :subscription-type="group.subscription_type"
+        :rate-multiplier="group.rate_multiplier"
+        :show-rate="false"
+        class="max-w-24"
+      />
+      <!-- 更多数量徽章 -->
+      <button
+        v-if="hiddenCount > 0"
+        ref="moreButtonRef"
+        @click.stop="showPopover = !showPopover"
+        class="inline-flex items-center gap-0.5 rounded-md px-1.5 py-0.5 text-xs font-medium bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-300 dark:hover:bg-dark-500 transition-colors cursor-pointer whitespace-nowrap"
+      >
+        <span>+{{ hiddenCount }}</span>
+      </button>
+    </div>
+
+    <!-- Popover 显示完整列表 -->
+    <Teleport to="body">
+      <Transition
+        enter-active-class="transition duration-150 ease-out"
+        enter-from-class="opacity-0 scale-95"
+        enter-to-class="opacity-100 scale-100"
+        leave-active-class="transition duration-100 ease-in"
+        leave-from-class="opacity-100 scale-100"
+        leave-to-class="opacity-0 scale-95"
+      >
+        <div
+          v-if="showPopover"
+          ref="popoverRef"
+          class="fixed z-50 min-w-48 max-w-96 rounded-lg border border-gray-200 bg-white p-3 shadow-lg dark:border-dark-600 dark:bg-dark-800"
+          :style="popoverStyle"
+        >
+          <div class="mb-2 flex items-center justify-between">
+            <span class="text-xs font-medium text-gray-500 dark:text-gray-400">
+              {{ t('admin.accounts.allGroups', { count: groups.length }) }}
+            </span>
+            <button
+              @click="showPopover = false"
+              class="rounded p-0.5 text-gray-400 hover:bg-gray-100 hover:text-gray-600 dark:hover:bg-dark-700 dark:hover:text-gray-300"
+            >
+              <svg class="h-3.5 w-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+                <path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
+              </svg>
+            </button>
+          </div>
+          <div class="flex flex-wrap gap-1.5 max-h-64 overflow-y-auto">
+            <GroupBadge
+              v-for="group in groups"
+              :key="group.id"
+              :name="group.name"
+              :platform="group.platform"
+              :subscription-type="group.subscription_type"
+              :rate-multiplier="group.rate_multiplier"
+              :show-rate="false"
+            />
+          </div>
+        </div>
+      </Transition>
+    </Teleport>
+
+    <!-- 点击外部关闭 popover -->
+    <div
+      v-if="showPopover"
+      class="fixed inset-0 z-40"
+      @click="showPopover = false"
+    />
+  </div>
+  <span v-else class="text-sm text-gray-400 dark:text-dark-500">-</span>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, onMounted, onUnmounted } from 'vue'
+import { useI18n } from 'vue-i18n'
+import GroupBadge from '@/components/common/GroupBadge.vue'
+import type { Group } from '@/types'
+
+interface Props {
+  groups: Group[] | null | undefined
+  maxDisplay?: number
+}
+
+const props = withDefaults(defineProps<Props>(), {
+  maxDisplay: 4
+})
+
+const { t } = useI18n()
+
+const moreButtonRef = ref<HTMLElement | null>(null)
+const popoverRef = ref<HTMLElement | null>(null)
+const showPopover = ref(false)
+
+// 显示的分组（最多显示 maxDisplay 个）
+const displayGroups = computed(() => {
+  if (!props.groups) return []
+  if (props.groups.length <= props.maxDisplay) {
+    return props.groups
+  }
+  // 留一个位置给 +N 按钮
+  return props.groups.slice(0, props.maxDisplay - 1)
+})
+
+// 隐藏的数量
+const hiddenCount = computed(() => {
+  if (!props.groups) return 0
+  if (props.groups.length <= props.maxDisplay) return 0
+  return props.groups.length - (props.maxDisplay - 1)
+})
+
+// Popover 位置样式
+const popoverStyle = computed(() => {
+  if (!moreButtonRef.value) return {}
+  const rect = moreButtonRef.value.getBoundingClientRect()
+  const viewportHeight = window.innerHeight
+  const viewportWidth = window.innerWidth
+
+  let top = rect.bottom + 8
+  let left = rect.left
+
+  // 如果下方空间不足，显示在上方
+  if (top + 280 > viewportHeight) {
+    top = Math.max(8, rect.top - 280)
+  }
+
+  // 如果右侧空间不足，向左偏移
+  if (left + 384 > viewportWidth) {
+    left = Math.max(8, viewportWidth - 392)
+  }
+
+  return {
+    top: `${top}px`,
+    left: `${left}px`
+  }
+})
+
+// 关闭 popover 的键盘事件
+const handleKeydown = (e: KeyboardEvent) => {
+  if (e.key === 'Escape') {
+    showPopover.value = false
+  }
+}
+
+onMounted(() => {
+  window.addEventListener('keydown', handleKeydown)
+})
+
+onUnmounted(() => {
+  window.removeEventListener('keydown', handleKeydown)
+})
+</script>
--- a/frontend/src/components/account/BulkEditAccountModal.vue
+++ b/frontend/src/components/account/BulkEditAccountModal.vue
@@ -778,6 +778,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -794,6 +804,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -1976,6 +1976,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -1993,6 +2003,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
@@ -2462,6 +2482,7 @@ const handleCookieAuth = async (sessionKey: string) => {

        await adminAPI.accounts.create({
          name: accountName,
+          notes: form.notes,
          platform: form.platform,
          type: addMethod.value, // Use addMethod as type: 'oauth' or 'setup-token'
          credentials,
@@ -2469,6 +2490,8 @@ const handleCookieAuth = async (sessionKey: string) => {
          proxy_id: form.proxy_id,
          concurrency: form.concurrency,
          priority: form.priority,
+          group_ids: form.group_ids,
+          expires_at: form.expires_at,
          auto_pause_on_expired: autoPauseOnExpired.value
        })

--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -936,6 +936,16 @@ const addPresetMapping = (from: string, to: string) => {
 const toggleErrorCode = (code: number) => {
  const index = selectedErrorCodes.value.indexOf(code)
  if (index === -1) {
+    // Adding code - check for 429/529 warning
+    if (code === 429) {
+      if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+        return
+      }
+    } else if (code === 529) {
+      if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+        return
+      }
+    }
    selectedErrorCodes.value.push(code)
  } else {
    selectedErrorCodes.value.splice(index, 1)
@@ -953,6 +963,16 @@ const addCustomErrorCode = () => {
    appStore.showInfo(t('admin.accounts.errorCodeExists'))
    return
  }
+  // Check for 429/529 warning
+  if (code === 429) {
+    if (!confirm(t('admin.accounts.customErrorCodes429Warning'))) {
+      return
+    }
+  } else if (code === 529) {
+    if (!confirm(t('admin.accounts.customErrorCodes529Warning'))) {
+      return
+    }
+  }
  selectedErrorCodes.value.push(code)
  customErrorCodeInput.value = null
 }
--- a/frontend/src/components/admin/user/UserBalanceModal.vue
+++ b/frontend/src/components/admin/user/UserBalanceModal.vue
@@ -35,14 +35,22 @@ const emit = defineEmits(['close', 'success']); const { t } = useI18n(); const a
 const submitting = ref(false); const form = reactive({ amount: 0, notes: '' })
 watch(() => props.show, (v) => { if(v) { form.amount = 0; form.notes = '' } })

-const calculateNewBalance = () => (props.user ? (props.operation === 'add' ? props.user.balance + form.amount : props.user.balance - form.amount) : 0)
+const calculateNewBalance = () => {
+  if (!props.user) return 0
+  const result = props.operation === 'add' ? props.user.balance + form.amount : props.user.balance - form.amount
+  // 避免浮点数精度问题导致的 -0.00 显示
+  return result === 0 || Object.is(result, -0) ? 0 : result
+}
 const handleBalanceSubmit = async () => {
  if (!props.user) return
  if (!form.amount || form.amount <= 0) {
    appStore.showError(t('admin.users.amountRequired'))
    return
  }
-  if (props.operation === 'subtract' && form.amount > props.user.balance) {
+  // 使用小数点后两位精度比较，避免浮点数精度问题
+  const amount = Math.round(form.amount * 100) / 100
+  const balance = Math.round(props.user.balance * 100) / 100
+  if (props.operation === 'subtract' && amount > balance) {
    appStore.showError(t('admin.users.insufficientBalance'))
    return
  }
--- a/frontend/src/components/icons/Icon.vue
+++ b/frontend/src/components/icons/Icon.vue
@@ -124,7 +124,8 @@ const icons = {
  chatBubble: 'M8 10h.01M12 10h.01M16 10h.01M9 16H5a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v8a2 2 0 01-2 2h-5l-5 5v-5z',
  calculator: 'M9 7h6m0 10v-3m-3 3h.01M9 17h.01M9 14h.01M12 14h.01M15 11h.01M12 11h.01M9 11h.01M7 21h10a2 2 0 002-2V5a2 2 0 00-2-2H7a2 2 0 00-2 2v14a2 2 0 002 2z',
  fire: 'M17.657 18.657A8 8 0 016.343 7.343S7 9 9 10c0-2 .5-5 2.986-7C14 5 16.09 5.777 17.656 7.343A7.975 7.975 0 0120 13a7.975 7.975 0 01-2.343 5.657z',
-  badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z'
+  badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z',
+  brain: 'M9.75 3.104v5.714a2.25 2.25 0 01-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 014.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0112 15a9.065 9.065 0 00-6.23.693L5 14.5m0 0l-2.69 2.689c-1.232 1.232-.65 3.318 1.067 3.611A48.309 48.309 0 0012 21c2.773 0 5.491-.235 8.135-.687 1.718-.293 2.3-2.379 1.067-3.61L19.8 15.3M12 8.25a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0v3m-3-1.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0h6m-3 4.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3z'
 } as const

 const iconPath = computed(() => icons[props.name])
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -156,6 +156,7 @@ export default {
        unknownError: 'Unknown error occurred',
        saving: 'Saving...', 
        selectedCount: '({count} selected)',    refresh: 'Refresh',
+    settings: 'Settings',
    notAvailable: 'N/A',
    now: 'Now',
    unknown: 'Unknown',
@@ -389,7 +390,7 @@ export default {
      opencode: {
        title: 'OpenCode Example',
        subtitle: 'opencode.json',
-        hint: 'This is a group configuration example. Adjust model and options as needed.',
+        hint: 'Config path: ~/.config/opencode/opencode.json (create if not exists). This is an example, adjust model and options as needed.',
      },
    },
    customKeyLabel: 'Custom Key',
@@ -1021,6 +1022,7 @@ export default {
      schedulableEnabled: 'Scheduling enabled',
      schedulableDisabled: 'Scheduling disabled',
      failedToToggleSchedulable: 'Failed to toggle scheduling status',
+      allGroups: '{count} groups total',
      platforms: {
        anthropic: 'Anthropic',
        claude: 'Claude',
@@ -1203,6 +1205,10 @@ export default {
      customErrorCodesHint: 'Only stop scheduling for selected error codes',
      customErrorCodesWarning:
        'Only selected error codes will stop scheduling. Other errors will return 500.',
+      customErrorCodes429Warning:
+        '429 already has built-in rate limit handling. Adding it to custom error codes will disable the account instead of temporary rate limiting. Are you sure?',
+      customErrorCodes529Warning:
+        '529 already has built-in overload handling. Adding it to custom error codes will disable the account instead of temporary overload marking. Are you sure?',
      selectedErrorCodes: 'Selected',
      noneSelectedUsesDefault: 'None selected (uses default policy)',
      enterErrorCode: 'Enter error code (100-599)',
@@ -1902,6 +1908,7 @@ export default {
      max: 'max:',
      qps: 'QPS',
      requests: 'Requests',
+      requestsTitle: 'Requests',
      upstream: 'Upstream',
      client: 'Client',
      system: 'System',
@@ -2114,7 +2121,10 @@ export default {
        empty: 'No alert rules',
        loadFailed: 'Failed to load alert rules',
        saveFailed: 'Failed to save alert rule',
+        saveSuccess: 'Alert rule saved successfully',
        deleteFailed: 'Failed to delete alert rule',
+        deleteSuccess: 'Alert rule deleted successfully',
+        manage: 'Manage Alert Rules',
        create: 'Create Rule',
        createTitle: 'Create Alert Rule',
        editTitle: 'Edit Alert Rule',
@@ -2297,6 +2307,54 @@ export default {
          accountHealthThresholdRange: 'Account health threshold must be between 0 and 100'
        }
      },
+      settings: {
+        title: 'Ops Monitoring Settings',
+        loadFailed: 'Failed to load settings',
+        saveSuccess: 'Ops monitoring settings saved successfully',
+        saveFailed: 'Failed to save settings',
+        dataCollection: 'Data Collection',
+        evaluationInterval: 'Evaluation Interval (seconds)',
+        evaluationIntervalHint: 'Frequency of detection tasks, recommended to keep default',
+        alertConfig: 'Alert Configuration',
+        enableAlert: 'Enable Alerts',
+        alertRecipients: 'Alert Recipient Emails',
+        emailPlaceholder: 'Enter email address',
+        recipientsHint: 'If empty, the system will use the first admin email as default recipient',
+        minSeverity: 'Minimum Severity',
+        reportConfig: 'Report Configuration',
+        enableReport: 'Enable Reports',
+        reportRecipients: 'Report Recipient Emails',
+        dailySummary: 'Daily Summary',
+        weeklySummary: 'Weekly Summary',
+        metricThresholds: 'Metric Thresholds',
+        metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
+        slaMinPercent: 'SLA Minimum Percentage',
+        slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
+        latencyP99MaxMs: 'Latency P99 Maximum (ms)',
+        latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
+        ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
+        ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
+        requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
+        requestErrorRateMaxPercentHint: 'Request error rate above this value will be displayed in red (default: 5%)',
+        upstreamErrorRateMaxPercent: 'Upstream Error Rate Maximum (%)',
+        upstreamErrorRateMaxPercentHint: 'Upstream error rate above this value will be displayed in red (default: 5%)',
+        advancedSettings: 'Advanced Settings',
+        dataRetention: 'Data Retention Policy',
+        enableCleanup: 'Enable Data Cleanup',
+        cleanupSchedule: 'Cleanup Schedule (Cron)',
+        cleanupScheduleHint: 'Example: 0 2 * * * means 2 AM daily',
+        errorLogRetentionDays: 'Error Log Retention Days',
+        minuteMetricsRetentionDays: 'Minute Metrics Retention Days',
+        hourlyMetricsRetentionDays: 'Hourly Metrics Retention Days',
+        retentionDaysHint: 'Recommended 7-90 days, longer periods will consume more storage',
+        aggregation: 'Pre-aggregation Tasks',
+        enableAggregation: 'Enable Pre-aggregation',
+        aggregationHint: 'Pre-aggregation improves query performance for long time windows',
+        validation: {
+          title: 'Please fix the following issues',
+          retentionDaysRange: 'Retention days must be between 1-365 days'
+        }
+      },
      concurrency: {
        title: 'Concurrency / Queue',
        byPlatform: 'By Platform',
@@ -2330,12 +2388,13 @@ export default {
        accountError: 'Error'
      },
      tooltips: {
+        totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
        throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
        latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
        errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
        errorDistribution: 'Error distribution by status code.',
        goroutines:
-          'Number of Go runtime goroutines (lightweight threads). There is no absolute “safe” number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
+          'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k–8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
        cpu: 'CPU usage percentage, showing system processor load.',
        memory: 'Memory usage, including used and total available memory.',
        db: 'Database connection pool status, including active, idle, and waiting connections.',
@@ -2345,6 +2404,7 @@ export default {
        tokens: 'Total number of tokens processed in the current time window.',
        sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
        errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
+        upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
        latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
        ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
        health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
@@ -2512,6 +2572,27 @@ export default {
        securityWarning: 'Warning: This key provides full admin access. Keep it secure.',
        usage: 'Usage: Add to request header - x-api-key: <your-admin-api-key>'
      },
+      streamTimeout: {
+        title: 'Stream Timeout Handling',
+        description: 'Configure account handling strategy when upstream response times out',
+        enabled: 'Enable Stream Timeout Handling',
+        enabledHint: 'Automatically handle problematic accounts when upstream times out',
+        timeoutSeconds: 'Timeout Threshold (seconds)',
+        timeoutSecondsHint: 'Stream data interval exceeding this time is considered timeout (30-300s)',
+        action: 'Action',
+        actionTempUnsched: 'Temporarily Unschedulable',
+        actionError: 'Mark as Error',
+        actionNone: 'No Action',
+        actionHint: 'Action to take on the account after timeout',
+        tempUnschedMinutes: 'Pause Duration (minutes)',
+        tempUnschedMinutesHint: 'Duration of temporary unschedulable state (1-60 minutes)',
+        thresholdCount: 'Trigger Threshold (count)',
+        thresholdCountHint: 'Number of timeouts before triggering action (1-10)',
+        thresholdWindowMinutes: 'Threshold Window (minutes)',
+        thresholdWindowMinutesHint: 'Time window for counting timeouts (1-60 minutes)',
+        saved: 'Stream timeout settings saved',
+        saveFailed: 'Failed to save stream timeout settings'
+      },
      saveSettings: 'Save Settings',
      saving: 'Saving...',
      settingsSaved: 'Settings saved successfully',
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -387,7 +387,7 @@ export default {
      opencode: {
        title: 'OpenCode 配置示例',
        subtitle: 'opencode.json',
-        hint: '示例仅用于演示分组配置，模型与选项可按需调整。',
+        hint: '配置文件路径：~/.config/opencode/opencode.json，不存在需手动创建。示例仅供参考，模型与选项可按需调整。',
      },
    },
    customKeyLabel: '自定义密钥',
@@ -1099,6 +1099,7 @@ export default {
      schedulableEnabled: '调度已开启',
      schedulableDisabled: '调度已关闭',
      failedToToggleSchedulable: '切换调度状态失败',
+      allGroups: '共 {count} 个分组',
      columns: {
        name: '名称',
        platformType: '平台/类型',
@@ -1339,6 +1340,10 @@ export default {
      customErrorCodes: '自定义错误码',
      customErrorCodesHint: '仅对选中的错误码停止调度',
      customErrorCodesWarning: '仅选中的错误码会停止调度，其他错误将返回 500。',
+      customErrorCodes429Warning:
+        '429 已有内置的限流处理机制。添加到自定义错误码后，将直接停止调度而非临时限流。确定要添加吗？',
+      customErrorCodes529Warning:
+        '529 已有内置的过载处理机制。添加到自定义错误码后，将直接停止调度而非临时标记过载。确定要添加吗？',
      selectedErrorCodes: '已选择',
      noneSelectedUsesDefault: '未选择（使用默认策略）',
      enterErrorCode: '输入错误码 (100-599)',
@@ -2018,7 +2023,7 @@ export default {
      ready: '就绪',
      requestsTotal: '请求（总计）',
      slaScope: 'SLA 范围：',
-      tokens: 'Token',
+      tokens: 'Token数',
      tps: 'TPS',
      current: '当前',
      peak: '峰值',
@@ -2047,7 +2052,8 @@ export default {
      avg: 'avg',
      max: 'max',
      qps: 'QPS',
-      requests: '请求',
+      requests: '请求数',
+      requestsTitle: '请求',
      upstream: '上游',
      client: '客户端',
      system: '系统',
@@ -2465,6 +2471,18 @@ export default {
        reportRecipients: '评估报告接收邮箱',
        dailySummary: '每日摘要',
        weeklySummary: '每周摘要',
+        metricThresholds: '指标阈值配置',
+        metricThresholdsHint: '配置各项指标的告警阈值，超出阈值时将以红色显示',
+        slaMinPercent: 'SLA最低百分比',
+        slaMinPercentHint: 'SLA低于此值时显示为红色（默认：99.5%）',
+        latencyP99MaxMs: '延迟P99最大值（毫秒）',
+        latencyP99MaxMsHint: '延迟P99高于此值时显示为红色（默认：2000ms）',
+        ttftP99MaxMs: 'TTFT P99最大值（毫秒）',
+        ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色（默认：500ms）',
+        requestErrorRateMaxPercent: '请求错误率最大值（%）',
+        requestErrorRateMaxPercentHint: '请求错误率高于此值时显示为红色（默认：5%）',
+        upstreamErrorRateMaxPercent: '上游错误率最大值（%）',
+        upstreamErrorRateMaxPercentHint: '上游错误率高于此值时显示为红色（默认：5%）',
        advancedSettings: '高级设置',
        dataRetention: '数据保留策略',
        enableCleanup: '启用数据清理',
@@ -2696,6 +2714,27 @@ export default {
        securityWarning: '警告：此密钥拥有完整的管理员权限，请妥善保管。',
        usage: '使用方法：在请求头中添加 x-api-key: <your-admin-api-key>'
      },
+      streamTimeout: {
+        title: '流超时处理',
+        description: '配置上游响应超时时的账户处理策略，避免问题账户持续被选中',
+        enabled: '启用流超时处理',
+        enabledHint: '当上游响应超时时，自动处理问题账户',
+        timeoutSeconds: '超时阈值（秒）',
+        timeoutSecondsHint: '流数据间隔超过此时间视为超时（30-300秒）',
+        action: '处理方式',
+        actionTempUnsched: '临时不可调度',
+        actionError: '标记为错误状态',
+        actionNone: '不处理',
+        actionHint: '超时后对账户执行的操作',
+        tempUnschedMinutes: '暂停时长（分钟）',
+        tempUnschedMinutesHint: '临时不可调度的持续时间（1-60分钟）',
+        thresholdCount: '触发阈值（次数）',
+        thresholdCountHint: '累计超时多少次后触发处理（1-10次）',
+        thresholdWindowMinutes: '阈值窗口（分钟）',
+        thresholdWindowMinutesHint: '超时计数的时间窗口（1-60分钟）',
+        saved: '流超时设置保存成功',
+        saveFailed: '保存流超时设置失败'
+      },
      saveSettings: '保存设置',
      saving: '保存中...',
      settingsSaved: '设置保存成功',
--- a/frontend/src/views/admin/AccountsView.vue
+++ b/frontend/src/views/admin/AccountsView.vue
@@ -56,10 +56,7 @@
            <AccountTodayStatsCell :account="row" />
          </template>
          <template #cell-groups="{ row }">
-            <div v-if="row.groups && row.groups.length > 0" class="flex flex-wrap gap-1.5">
-              <GroupBadge v-for="group in row.groups" :key="group.id" :name="group.name" :platform="group.platform" :subscription-type="group.subscription_type" :rate-multiplier="group.rate_multiplier" :show-rate="false" />
-            </div>
-            <span v-else class="text-sm text-gray-400 dark:text-dark-500">-</span>
+            <AccountGroupsCell :groups="row.groups" :max-display="4" />
          </template>
          <template #cell-usage="{ row }">
            <AccountUsageCell :account="row" />
@@ -145,7 +142,7 @@ import AccountStatsModal from '@/components/admin/account/AccountStatsModal.vue'
 import AccountStatusIndicator from '@/components/account/AccountStatusIndicator.vue'
 import AccountUsageCell from '@/components/account/AccountUsageCell.vue'
 import AccountTodayStatsCell from '@/components/account/AccountTodayStatsCell.vue'
-import GroupBadge from '@/components/common/GroupBadge.vue'
+import AccountGroupsCell from '@/components/account/AccountGroupsCell.vue'
 import PlatformTypeBadge from '@/components/common/PlatformTypeBadge.vue'
 import { formatDateTime, formatRelativeTime } from '@/utils/format'
 import type { Account, Proxy, Group } from '@/types'
--- a/frontend/src/views/admin/SettingsView.vue
+++ b/frontend/src/views/admin/SettingsView.vue
@@ -147,6 +147,144 @@
          </div>
        </div>

+        <!-- Stream Timeout Settings -->
+        <div class="card">
+          <div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
+            <h2 class="text-lg font-semibold text-gray-900 dark:text-white">
+              {{ t('admin.settings.streamTimeout.title') }}
+            </h2>
+            <p class="mt-1 text-sm text-gray-500 dark:text-gray-400">
+              {{ t('admin.settings.streamTimeout.description') }}
+            </p>
+          </div>
+          <div class="space-y-5 p-6">
+            <!-- Loading State -->
+            <div v-if="streamTimeoutLoading" class="flex items-center gap-2 text-gray-500">
+              <div class="h-4 w-4 animate-spin rounded-full border-b-2 border-primary-600"></div>
+              {{ t('common.loading') }}
+            </div>
+
+            <template v-else>
+              <!-- Enable Stream Timeout -->
+              <div class="flex items-center justify-between">
+                <div>
+                  <label class="font-medium text-gray-900 dark:text-white">{{
+                    t('admin.settings.streamTimeout.enabled')
+                  }}</label>
+                  <p class="text-sm text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.enabledHint') }}
+                  </p>
+                </div>
+                <Toggle v-model="streamTimeoutForm.enabled" />
+              </div>
+
+              <!-- Settings - Only show when enabled -->
+              <div
+                v-if="streamTimeoutForm.enabled"
+                class="space-y-4 border-t border-gray-100 pt-4 dark:border-dark-700"
+              >
+                <!-- Action -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.action') }}
+                  </label>
+                  <select v-model="streamTimeoutForm.action" class="input w-64">
+                    <option value="temp_unsched">{{ t('admin.settings.streamTimeout.actionTempUnsched') }}</option>
+                    <option value="error">{{ t('admin.settings.streamTimeout.actionError') }}</option>
+                    <option value="none">{{ t('admin.settings.streamTimeout.actionNone') }}</option>
+                  </select>
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.actionHint') }}
+                  </p>
+                </div>
+
+                <!-- Temp Unsched Minutes (only show when action is temp_unsched) -->
+                <div v-if="streamTimeoutForm.action === 'temp_unsched'">
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.tempUnschedMinutes') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.temp_unsched_minutes"
+                    type="number"
+                    min="1"
+                    max="60"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.tempUnschedMinutesHint') }}
+                  </p>
+                </div>
+
+                <!-- Threshold Count -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.thresholdCount') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.threshold_count"
+                    type="number"
+                    min="1"
+                    max="10"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.thresholdCountHint') }}
+                  </p>
+                </div>
+
+                <!-- Threshold Window Minutes -->
+                <div>
+                  <label class="mb-2 block text-sm font-medium text-gray-700 dark:text-gray-300">
+                    {{ t('admin.settings.streamTimeout.thresholdWindowMinutes') }}
+                  </label>
+                  <input
+                    v-model.number="streamTimeoutForm.threshold_window_minutes"
+                    type="number"
+                    min="1"
+                    max="60"
+                    class="input w-32"
+                  />
+                  <p class="mt-1.5 text-xs text-gray-500 dark:text-gray-400">
+                    {{ t('admin.settings.streamTimeout.thresholdWindowMinutesHint') }}
+                  </p>
+                </div>
+              </div>
+
+              <!-- Save Button -->
+              <div class="flex justify-end border-t border-gray-100 pt-4 dark:border-dark-700">
+                <button
+                  type="button"
+                  @click="saveStreamTimeoutSettings"
+                  :disabled="streamTimeoutSaving"
+                  class="btn btn-primary btn-sm"
+                >
+                  <svg
+                    v-if="streamTimeoutSaving"
+                    class="mr-1 h-4 w-4 animate-spin"
+                    fill="none"
+                    viewBox="0 0 24 24"
+                  >
+                    <circle
+                      class="opacity-25"
+                      cx="12"
+                      cy="12"
+                      r="10"
+                      stroke="currentColor"
+                      stroke-width="4"
+                    ></circle>
+                    <path
+                      class="opacity-75"
+                      fill="currentColor"
+                      d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+                    ></path>
+                  </svg>
+                  {{ streamTimeoutSaving ? t('common.saving') : t('common.save') }}
+                </button>
+              </div>
+            </template>
+          </div>
+        </div>
+
        <!-- Registration Settings -->
        <div class="card">
          <div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
@@ -840,6 +978,17 @@ const adminApiKeyMasked = ref('')
 const adminApiKeyOperating = ref(false)
 const newAdminApiKey = ref('')

+// Stream Timeout 状态
+const streamTimeoutLoading = ref(true)
+const streamTimeoutSaving = ref(false)
+const streamTimeoutForm = reactive({
+  enabled: true,
+  action: 'temp_unsched' as 'temp_unsched' | 'error' | 'none',
+  temp_unsched_minutes: 5,
+  threshold_count: 3,
+  threshold_window_minutes: 10
+})
+
 type SettingsForm = SystemSettings & {
  smtp_password: string
  turnstile_secret_key: string
@@ -1129,8 +1278,43 @@ function copyNewKey() {
    })
 }

+// Stream Timeout 方法
+async function loadStreamTimeoutSettings() {
+  streamTimeoutLoading.value = true
+  try {
+    const settings = await adminAPI.settings.getStreamTimeoutSettings()
+    Object.assign(streamTimeoutForm, settings)
+  } catch (error: any) {
+    console.error('Failed to load stream timeout settings:', error)
+  } finally {
+    streamTimeoutLoading.value = false
+  }
+}
+
+async function saveStreamTimeoutSettings() {
+  streamTimeoutSaving.value = true
+  try {
+    const updated = await adminAPI.settings.updateStreamTimeoutSettings({
+      enabled: streamTimeoutForm.enabled,
+      action: streamTimeoutForm.action,
+      temp_unsched_minutes: streamTimeoutForm.temp_unsched_minutes,
+      threshold_count: streamTimeoutForm.threshold_count,
+      threshold_window_minutes: streamTimeoutForm.threshold_window_minutes
+    })
+    Object.assign(streamTimeoutForm, updated)
+    appStore.showSuccess(t('admin.settings.streamTimeout.saved'))
+  } catch (error: any) {
+    appStore.showError(
+      t('admin.settings.streamTimeout.saveFailed') + ': ' + (error.message || t('common.unknownError'))
+    )
+  } finally {
+    streamTimeoutSaving.value = false
+  }
+}
+
 onMounted(() => {
  loadSettings()
  loadAdminApiKey()
+  loadStreamTimeoutSettings()
 })
 </script>
--- a/frontend/src/views/admin/ops/OpsDashboard.vue
+++ b/frontend/src/views/admin/ops/OpsDashboard.vue
@@ -13,17 +13,15 @@
      <OpsDashboardHeader
        v-else-if="opsEnabled"
        :overview="overview"
-        :ws-status="wsStatus"
-        :ws-reconnect-in-ms="wsReconnectInMs"
-        :ws-has-data="wsHasData"
-        :real-time-qps="realTimeQPS"
-        :real-time-tps="realTimeTPS"
        :platform="platform"
        :group-id="groupId"
        :time-range="timeRange"
        :query-mode="queryMode"
        :loading="loading"
        :last-updated="lastUpdated"
+        :thresholds="metricThresholds"
+        :auto-refresh-enabled="autoRefreshEnabled"
+        :auto-refresh-countdown="autoRefreshCountdown"
        @update:time-range="onTimeRangeChange"
        @update:platform="onPlatformChange"
        @update:group="onGroupChange"
@@ -75,7 +73,7 @@
      <OpsAlertEventsCard v-if="opsEnabled && !(loading && !hasLoadedOnce)" />

      <!-- Settings Dialog -->
-      <OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="fetchData" />
+      <OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="onSettingsSaved" />

      <!-- Alert Rules Dialog -->
      <BaseDialog :show="showAlertRulesCard" :title="t('admin.ops.alertRules.title')" width="extra-wide" @close="showAlertRulesCard = false">
@@ -108,20 +106,19 @@

 <script setup lang="ts">
 import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
-import { useDebounceFn } from '@vueuse/core'
+import { useDebounceFn, useIntervalFn } from '@vueuse/core'
 import { useI18n } from 'vue-i18n'
 import { useRoute, useRouter } from 'vue-router'
 import AppLayout from '@/components/layout/AppLayout.vue'
 import BaseDialog from '@/components/common/BaseDialog.vue'
 import {
  opsAPI,
-  OPS_WS_CLOSE_CODES,
-  type OpsWSStatus,
  type OpsDashboardOverview,
  type OpsErrorDistributionResponse,
  type OpsErrorTrendResponse,
  type OpsLatencyHistogramResponse,
-  type OpsThroughputTrendResponse
+  type OpsThroughputTrendResponse,
+  type OpsMetricThresholds
 } from '@/api/admin/ops'
 import { useAdminSettingsStore, useAppStore } from '@/stores'
 import OpsDashboardHeader from './components/OpsDashboardHeader.vue'
@@ -172,14 +169,6 @@ const QUERY_KEYS = {
 const isApplyingRouteQuery = ref(false)
 const isSyncingRouteQuery = ref(false)

-// WebSocket for realtime QPS/TPS
-const realTimeQPS = ref(0)
-const realTimeTPS = ref(0)
-const wsStatus = ref<OpsWSStatus>('closed')
-const wsReconnectInMs = ref<number | null>(null)
-const wsHasData = ref(false)
-let unsubscribeQPS: (() => void) | null = null
-
 let dashboardFetchController: AbortController | null = null
 let dashboardFetchSeq = 0

@@ -199,50 +188,6 @@ function abortDashboardFetch() {
  }
 }

-function stopQPSSubscription(options?: { resetMetrics?: boolean }) {
-  wsStatus.value = 'closed'
-  wsReconnectInMs.value = null
-  if (unsubscribeQPS) unsubscribeQPS()
-  unsubscribeQPS = null
-
-  if (options?.resetMetrics) {
-    realTimeQPS.value = 0
-    realTimeTPS.value = 0
-    wsHasData.value = false
-  }
-}
-
-function startQPSSubscription() {
-  stopQPSSubscription()
-  unsubscribeQPS = opsAPI.subscribeQPS(
-    (payload) => {
-      if (payload && typeof payload === 'object' && payload.type === 'qps_update' && payload.data) {
-        realTimeQPS.value = payload.data.qps || 0
-        realTimeTPS.value = payload.data.tps || 0
-        wsHasData.value = true
-      }
-    },
-    {
-      onStatusChange: (status) => {
-        wsStatus.value = status
-        if (status === 'connected') wsReconnectInMs.value = null
-      },
-      onReconnectScheduled: ({ delayMs }) => {
-        wsReconnectInMs.value = delayMs
-      },
-      onFatalClose: (event) => {
-        // Server-side feature flag says realtime is disabled; keep UI consistent and avoid reconnect loops.
-        if (event && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) {
-          adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
-          stopQPSSubscription({ resetMetrics: true })
-        }
-      },
-      // QPS updates may be sparse in idle periods; keep the timeout conservative.
-      staleTimeoutMs: 180_000
-    }
-  )
-}
-
 const readQueryString = (key: string): string => {
  const value = route.query[key]
  if (typeof value === 'string') return value
@@ -314,6 +259,7 @@ const syncQueryToRoute = useDebounceFn(async () => {
 }, 250)

 const overview = ref<OpsDashboardOverview | null>(null)
+const metricThresholds = ref<OpsMetricThresholds | null>(null)

 const throughputTrend = ref<OpsThroughputTrendResponse | null>(null)
 const loadingTrend = ref(false)
@@ -343,6 +289,45 @@ const requestDetailsPreset = ref<OpsRequestDetailsPreset>({
 const showSettingsDialog = ref(false)
 const showAlertRulesCard = ref(false)

+// Auto refresh settings
+const autoRefreshEnabled = ref(false)
+const autoRefreshIntervalMs = ref(30000) // default 30 seconds
+const autoRefreshCountdown = ref(0)
+
+// Auto refresh timer
+const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
+  () => {
+    if (autoRefreshEnabled.value && opsEnabled.value && !loading.value) {
+      fetchData()
+    }
+  },
+  autoRefreshIntervalMs,
+  { immediate: false }
+)
+
+// Countdown timer (updates every second)
+const { pause: pauseCountdown, resume: resumeCountdown } = useIntervalFn(
+  () => {
+    if (autoRefreshEnabled.value && autoRefreshCountdown.value > 0) {
+      autoRefreshCountdown.value--
+    }
+  },
+  1000,
+  { immediate: false }
+)
+
+// Load auto refresh settings from backend
+async function loadAutoRefreshSettings() {
+  try {
+    const settings = await opsAPI.getAdvancedSettings()
+    autoRefreshEnabled.value = settings.auto_refresh_enabled
+    autoRefreshIntervalMs.value = settings.auto_refresh_interval_seconds * 1000
+    autoRefreshCountdown.value = settings.auto_refresh_interval_seconds
+  } catch (err) {
+    console.error('[OpsDashboard] Failed to load auto refresh settings', err)
+  }
+}
+
 function handleThroughputSelectPlatform(nextPlatform: string) {
  platform.value = nextPlatform || ''
  groupId.value = null
@@ -376,6 +361,11 @@ function onTimeRangeChange(v: string | number | boolean | null) {
  timeRange.value = v as TimeRange
 }

+function onSettingsSaved() {
+  loadThresholds()
+  fetchData()
+}
+
 function onPlatformChange(v: string | number | boolean | null) {
  platform.value = typeof v === 'string' ? v : ''
 }
@@ -561,6 +551,10 @@ async function fetchData() {
    ])
    if (fetchSeq !== dashboardFetchSeq) return
    lastUpdated.value = new Date()
+    // Reset auto refresh countdown after successful fetch
+    if (autoRefreshEnabled.value) {
+      autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
+    }
  } catch (err) {
    if (!isOpsDisabledError(err)) {
      console.error('[ops] failed to fetch dashboard data', err)
@@ -615,31 +609,56 @@ onMounted(async () => {
    return
  }

-  if (adminSettingsStore.opsRealtimeMonitoringEnabled) {
-    startQPSSubscription()
-  } else {
-    stopQPSSubscription({ resetMetrics: true })
-  }
+  // Load thresholds configuration
+  loadThresholds()
+
+  // Load auto refresh settings
+  await loadAutoRefreshSettings()

  if (opsEnabled.value) {
    await fetchData()
  }
+
+  // Start auto refresh if enabled
+  if (autoRefreshEnabled.value) {
+    resumeAutoRefresh()
+    resumeCountdown()
+  }
 })

+async function loadThresholds() {
+  try {
+    const settings = await opsAPI.getAlertRuntimeSettings()
+    metricThresholds.value = settings.thresholds || null
+  } catch (err) {
+    console.warn('[OpsDashboard] Failed to load thresholds', err)
+    metricThresholds.value = null
+  }
+}
+
 onUnmounted(() => {
-  stopQPSSubscription()
  abortDashboardFetch()
+  pauseAutoRefresh()
+  pauseCountdown()
 })

-watch(
-  () => adminSettingsStore.opsRealtimeMonitoringEnabled,
-  (enabled) => {
-    if (!opsEnabled.value) return
-    if (enabled) {
-      startQPSSubscription()
-    } else {
-      stopQPSSubscription({ resetMetrics: true })
-    }
+// Watch auto refresh settings changes
+watch(autoRefreshEnabled, (enabled) => {
+  if (enabled) {
+    autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
+    resumeAutoRefresh()
+    resumeCountdown()
+  } else {
+    pauseAutoRefresh()
+    pauseCountdown()
+    autoRefreshCountdown.value = 0
  }
-)
+})
+
+// Reload auto refresh settings after settings dialog is closed
+watch(showSettingsDialog, async (show) => {
+  if (!show) {
+    await loadAutoRefreshSettings()
+  }
+})
 </script>
--- a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
+++ b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
@@ -1,29 +1,30 @@
 <script setup lang="ts">
-import { computed, onMounted, ref, watch } from 'vue'
+import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
+import { useIntervalFn } from '@vueuse/core'
 import { useI18n } from 'vue-i18n'
 import Select from '@/components/common/Select.vue'
 import HelpTooltip from '@/components/common/HelpTooltip.vue'
 import BaseDialog from '@/components/common/BaseDialog.vue'
+import Icon from '@/components/icons/Icon.vue'
 import { adminAPI } from '@/api'
-import type { OpsDashboardOverview, OpsWSStatus } from '@/api/admin/ops'
+import { opsAPI, type OpsDashboardOverview, type OpsMetricThresholds, type OpsRealtimeTrafficSummary } from '@/api/admin/ops'
 import type { OpsRequestDetailsPreset } from './OpsRequestDetailsModal.vue'
+import { useAdminSettingsStore } from '@/stores'
 import { formatNumber } from '@/utils/format'

 type RealtimeWindow = '1min' | '5min' | '30min' | '1h'

 interface Props {
  overview?: OpsDashboardOverview | null
-  wsStatus: OpsWSStatus
-  wsReconnectInMs?: number | null
-  wsHasData?: boolean
-  realTimeQps: number
-  realTimeTps: number
  platform: string
  groupId: number | null
  timeRange: string
  queryMode: string
  loading: boolean
  lastUpdated: Date | null
+  thresholds?: OpsMetricThresholds | null // 阈值配置
+  autoRefreshEnabled?: boolean
+  autoRefreshCountdown?: number
 }

 interface Emits {
@@ -42,12 +43,43 @@ const props = defineProps<Props>()
 const emit = defineEmits<Emits>()

 const { t } = useI18n()
+const adminSettingsStore = useAdminSettingsStore()

 const realtimeWindow = ref<RealtimeWindow>('1min')

 const overview = computed(() => props.overview ?? null)
 const systemMetrics = computed(() => overview.value?.system_metrics ?? null)

+const REALTIME_WINDOW_MINUTES: Record<RealtimeWindow, number> = {
+  '1min': 1,
+  '5min': 5,
+  '30min': 30,
+  '1h': 60
+}
+
+const TOOLBAR_RANGE_MINUTES: Record<string, number> = {
+  '5m': 5,
+  '30m': 30,
+  '1h': 60,
+  '6h': 6 * 60,
+  '24h': 24 * 60
+}
+
+const availableRealtimeWindows = computed(() => {
+  const toolbarMinutes = TOOLBAR_RANGE_MINUTES[props.timeRange] ?? 60
+  return (['1min', '5min', '30min', '1h'] as const).filter((w) => REALTIME_WINDOW_MINUTES[w] <= toolbarMinutes)
+})
+
+watch(
+  () => props.timeRange,
+  () => {
+    // The realtime window must be inside the toolbar window; reset to keep UX predictable.
+    realtimeWindow.value = '1min'
+    // Keep realtime traffic consistent with toolbar changes even when the window is already 1min.
+    loadRealtimeTrafficSummary()
+  }
+)
+
 // --- Filters ---

 const groups = ref<Array<{ id: number; name: string; platform: string }>>([])
@@ -143,56 +175,143 @@ function getLatencyColor(ms: number | null | undefined): string {
  return 'text-red-600 dark:text-red-400'
 }

+// --- Threshold checking helpers ---
+function isSLABelowThreshold(slaPercent: number | null): boolean {
+  if (slaPercent == null) return false
+  const threshold = props.thresholds?.sla_percent_min
+  if (threshold == null) return false
+  return slaPercent < threshold
+}
+
+function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean {
+  if (latencyP99Ms == null) return false
+  const threshold = props.thresholds?.latency_p99_ms_max
+  if (threshold == null) return false
+  return latencyP99Ms > threshold
+}
+
+function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
+  if (ttftP99Ms == null) return false
+  const threshold = props.thresholds?.ttft_p99_ms_max
+  if (threshold == null) return false
+  return ttftP99Ms > threshold
+}
+
+function isRequestErrorRateAboveThreshold(errorRatePercent: number | null): boolean {
+  if (errorRatePercent == null) return false
+  const threshold = props.thresholds?.request_error_rate_percent_max
+  if (threshold == null) return false
+  return errorRatePercent > threshold
+}
+
+function isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent: number | null): boolean {
+  if (upstreamErrorRatePercent == null) return false
+  const threshold = props.thresholds?.upstream_error_rate_percent_max
+  if (threshold == null) return false
+  return upstreamErrorRatePercent > threshold
+}
+
 // --- Realtime / Overview labels ---

 const totalRequestsLabel = computed(() => formatNumber(overview.value?.request_count_total ?? 0))
 const totalTokensLabel = computed(() => formatNumber(overview.value?.token_consumed ?? 0))

+const realtimeTrafficSummary = ref<OpsRealtimeTrafficSummary | null>(null)
+const realtimeTrafficLoading = ref(false)
+
+function makeZeroRealtimeTrafficSummary(): OpsRealtimeTrafficSummary {
+  const now = new Date().toISOString()
+  return {
+    window: realtimeWindow.value,
+    start_time: now,
+    end_time: now,
+    platform: props.platform,
+    group_id: props.groupId,
+    qps: { current: 0, peak: 0, avg: 0 },
+    tps: { current: 0, peak: 0, avg: 0 }
+  }
+}
+
+async function loadRealtimeTrafficSummary() {
+  if (realtimeTrafficLoading.value) return
+  if (!adminSettingsStore.opsRealtimeMonitoringEnabled) {
+    realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
+    return
+  }
+  realtimeTrafficLoading.value = true
+  try {
+    const res = await opsAPI.getRealtimeTrafficSummary(realtimeWindow.value, props.platform, props.groupId)
+    if (res && res.enabled === false) {
+      adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
+    }
+    realtimeTrafficSummary.value = res?.summary ?? null
+  } catch (err) {
+    console.error('[OpsDashboardHeader] Failed to load realtime traffic summary', err)
+    realtimeTrafficSummary.value = null
+  } finally {
+    realtimeTrafficLoading.value = false
+  }
+}
+
+watch(
+  () => [realtimeWindow.value, props.platform, props.groupId] as const,
+  () => {
+    loadRealtimeTrafficSummary()
+  },
+  { immediate: true }
+)
+
+const { pause: pauseRealtimeTrafficRefresh, resume: resumeRealtimeTrafficRefresh } = useIntervalFn(
+  () => {
+    loadRealtimeTrafficSummary()
+  },
+  5000,
+  { immediate: false }
+)
+
+watch(
+  () => adminSettingsStore.opsRealtimeMonitoringEnabled,
+  (enabled) => {
+    if (enabled) {
+      resumeRealtimeTrafficRefresh()
+    } else {
+      pauseRealtimeTrafficRefresh()
+      // Keep UI stable when realtime monitoring is turned off.
+      realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
+    }
+  },
+  { immediate: true }
+)
+
+onUnmounted(() => {
+  pauseRealtimeTrafficRefresh()
+})
+
 const displayRealTimeQps = computed(() => {
-  const ov = overview.value
-  if (!ov) return 0
-  const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
-  const v = useRealtime ? props.realTimeQps : ov.qps?.current
+  const v = realtimeTrafficSummary.value?.qps?.current
  return typeof v === 'number' && Number.isFinite(v) ? v : 0
 })

 const displayRealTimeTps = computed(() => {
-  const ov = overview.value
-  if (!ov) return 0
-  const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
-  const v = useRealtime ? props.realTimeTps : ov.tps?.current
+  const v = realtimeTrafficSummary.value?.tps?.current
  return typeof v === 'number' && Number.isFinite(v) ? v : 0
 })

-// Sparkline history (keep last 60 data points)
-const qpsHistory = ref<number[]>([])
-const tpsHistory = ref<number[]>([])
-const MAX_HISTORY_POINTS = 60
-
-watch([displayRealTimeQps, displayRealTimeTps], ([newQps, newTps]) => {
-  // Add new data points
-  qpsHistory.value.push(newQps)
-  tpsHistory.value.push(newTps)
-
-  // Keep only last N points
-  if (qpsHistory.value.length > MAX_HISTORY_POINTS) {
-    qpsHistory.value.shift()
-  }
-  if (tpsHistory.value.length > MAX_HISTORY_POINTS) {
-    tpsHistory.value.shift()
-  }
+const realtimeQpsPeakLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.qps?.peak
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })
-
-const qpsPeakLabel = computed(() => {
-  const v = overview.value?.qps?.peak
-  if (typeof v !== 'number') return '-'
-  return v.toFixed(1)
+const realtimeTpsPeakLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.tps?.peak
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })
-
-const tpsPeakLabel = computed(() => {
-  const v = overview.value?.tps?.peak
-  if (typeof v !== 'number') return '-'
-  return v.toFixed(1)
+const realtimeQpsAvgLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.qps?.avg
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
+})
+const realtimeTpsAvgLabel = computed(() => {
+  const v = realtimeTrafficSummary.value?.tps?.avg
+  return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
 })

 const qpsAvgLabel = computed(() => {
@@ -244,7 +363,7 @@ const ttftMaxMs = computed(() => overview.value?.ttft?.max_ms ?? null)
 const isSystemIdle = computed(() => {
  const ov = overview.value
  if (!ov) return true
-  const qps = props.wsStatus === 'connected' && props.wsHasData ? props.realTimeQps : ov.qps?.current
+  const qps = ov.qps?.current
  const errorRate = ov.error_rate ?? 0
  return (qps ?? 0) === 0 && errorRate === 0
 })
@@ -687,6 +806,11 @@ const showJobsDetails = ref(false)
 function openJobsDetails() {
  showJobsDetails.value = true
 }
+
+function handleToolbarRefresh() {
+  loadRealtimeTrafficSummary()
+  emit('refresh')
+}
 </script>

 <template>
@@ -717,6 +841,17 @@ function openJobsDetails() {
          <span>·</span>
          <span>{{ t('common.refresh') }}: {{ updatedAtLabel }}</span>

+          <template v-if="props.autoRefreshEnabled && props.autoRefreshCountdown !== undefined">
+            <span>·</span>
+            <span class="flex items-center gap-1">
+              <svg class="h-3 w-3 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
+                <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+                <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
+              </svg>
+              <span>自动刷新: {{ props.autoRefreshCountdown }}s</span>
+            </span>
+          </template>
+
          <template v-if="systemMetrics">
            <span>·</span>
            <span>
@@ -764,7 +899,7 @@ function openJobsDetails() {
          class="flex h-8 w-8 items-center justify-center rounded-lg bg-gray-100 text-gray-500 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-400 dark:hover:bg-dark-600"
          :disabled="loading"
          :title="t('common.refresh')"
-          @click="emit('refresh')"
+          @click="handleToolbarRefresh"
        >
          <svg class="h-4 w-4" :class="{ 'animate-spin': loading }" fill="none" viewBox="0 0 24 24" stroke="currentColor">
            <path
@@ -818,8 +953,9 @@ function openJobsDetails() {
              class="pointer-events-none absolute left-1/2 top-full z-50 mt-2 w-72 -translate-x-1/2 opacity-0 transition-opacity duration-200 group-hover:pointer-events-auto group-hover:opacity-100 md:left-full md:top-0 md:ml-2 md:mt-0 md:translate-x-0"
            >
              <div class="rounded-xl bg-white p-4 shadow-xl ring-1 ring-black/5 dark:bg-gray-800 dark:ring-white/10">
-                <h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white">
-                  🧠 {{ t('admin.ops.diagnosis.title') }}
+                <h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white flex items-center gap-2">
+                  <Icon name="brain" size="sm" class="text-blue-500" />
+                  {{ t('admin.ops.diagnosis.title') }}
                </h4>

                <div class="space-y-3">
@@ -850,8 +986,9 @@ function openJobsDetails() {
                    <div class="flex-1">
                      <div class="text-xs font-semibold text-gray-900 dark:text-white">{{ item.message }}</div>
                      <div class="mt-0.5 text-[11px] text-gray-500 dark:text-gray-400">{{ item.impact }}</div>
-                      <div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400">
-                        💡 {{ item.action }}
+                      <div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400 flex items-center gap-1">
+                        <Icon name="lightbulb" size="xs" />
+                        {{ item.action }}
                      </div>
                    </div>
                  </div>
@@ -928,7 +1065,7 @@ function openJobsDetails() {
              <!-- Time Window Selector -->
              <div class="flex flex-wrap gap-1">
                <button
-                  v-for="window in (['1min', '5min', '30min', '1h'] as RealtimeWindow[])"
+                  v-for="window in availableRealtimeWindows"
                  :key="window"
                  type="button"
                  class="rounded px-1.5 py-0.5 text-[9px] font-bold transition-colors sm:px-2 sm:text-[10px]"
@@ -965,11 +1102,11 @@ function openJobsDetails() {
                  <div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.peak') }}</div>
                  <div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ qpsPeakLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsPeakLabel }}</span>
                      <span class="text-xs">QPS</span>
                    </div>
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ tpsPeakLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsPeakLabel }}</span>
                      <span class="text-xs">TPS</span>
                    </div>
                  </div>
@@ -980,11 +1117,11 @@ function openJobsDetails() {
                  <div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.average') }}</div>
                  <div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ qpsAvgLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsAvgLabel }}</span>
                      <span class="text-xs">QPS</span>
                    </div>
                    <div class="flex items-baseline gap-1.5">
-                      <span class="font-black text-gray-900 dark:text-white">{{ tpsAvgLabel }}</span>
+                      <span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsAvgLabel }}</span>
                      <span class="text-xs">TPS</span>
                    </div>
                  </div>
@@ -1024,7 +1161,7 @@ function openJobsDetails() {
        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
          <div class="flex items-center justify-between">
            <div class="flex items-center gap-1">
-              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requests') }}</span>
+              <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestsTitle') }}</span>
              <HelpTooltip :content="t('admin.ops.tooltips.totalRequests')" />
            </div>
            <button
@@ -1061,21 +1198,21 @@ function openJobsDetails() {
            <div class="flex items-center gap-2">
              <span class="text-[10px] font-bold uppercase text-gray-400">SLA</span>
              <HelpTooltip :content="t('admin.ops.tooltips.sla')" />
-              <span class="h-1.5 w-1.5 rounded-full" :class="(slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
+              <span class="h-1.5 w-1.5 rounded-full" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : (slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
            </div>
            <button
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
-              @click="openDetails({ title: t('admin.ops.requestDetails.title') })"
+              @click="openDetails({ title: t('admin.ops.requestDetails.title'), kind: 'error' })"
            >
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black text-gray-900 dark:text-white">
+          <div class="mt-2 text-3xl font-black" :class="isSLABelowThreshold(slaPercent) ? 'text-red-600 dark:text-red-400' : 'text-gray-900 dark:text-white'">
            {{ slaPercent == null ? '-' : `${slaPercent.toFixed(3)}%` }}
          </div>
          <div class="mt-3 h-2 w-full overflow-hidden rounded-full bg-gray-200 dark:bg-dark-700">
-            <div class="h-full bg-green-500 transition-all" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
+            <div class="h-full transition-all" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : 'bg-green-500'" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
          </div>
          <div class="mt-3 text-xs">
            <div class="flex justify-between">
@@ -1101,7 +1238,7 @@ function openJobsDetails() {
            </button>
          </div>
          <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="getLatencyColor(durationP99Ms)">
+            <div class="text-3xl font-black" :class="isLatencyAboveThreshold(durationP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(durationP99Ms)">
              {{ durationP99Ms ?? '-' }}
            </div>
            <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1145,13 +1282,13 @@ function openJobsDetails() {
            <button
              class="text-[10px] font-bold text-blue-500 hover:underline"
              type="button"
-              @click="openDetails({ title: 'TTFT' })"
+              @click="openDetails({ title: 'TTFT', sort: 'duration_desc' })"
            >
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
          <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="getLatencyColor(ttftP99Ms)">
+            <div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(ttftP99Ms)">
              {{ ttftP99Ms ?? '-' }}
            </div>
            <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1196,7 +1333,7 @@ function openJobsDetails() {
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black" :class="(errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
+          <div class="mt-2 text-3xl font-black" :class="isRequestErrorRateAboveThreshold(errorRatePercent) ? 'text-red-600 dark:text-red-400' : (errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
            {{ errorRatePercent == null ? '-' : `${errorRatePercent.toFixed(2)}%` }}
          </div>
          <div class="mt-3 space-y-1 text-xs">
@@ -1222,7 +1359,7 @@ function openJobsDetails() {
              {{ t('admin.ops.requestDetails.details') }}
            </button>
          </div>
-          <div class="mt-2 text-3xl font-black" :class="(upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
+          <div class="mt-2 text-3xl font-black" :class="isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent) ? 'text-red-600 dark:text-red-400' : (upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
            {{ upstreamErrorRatePercent == null ? '-' : `${upstreamErrorRatePercent.toFixed(2)}%` }}
          </div>
          <div class="mt-3 space-y-1 text-xs">
--- a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
+++ b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
@@ -174,69 +174,75 @@ watch(

 <template>
  <BaseDialog :show="show" :title="modalTitle" width="full" @close="close">
-    <!-- Filters -->
-    <div class="border-b border-gray-200 pb-4 mb-4 dark:border-dark-700">
-      <div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
-        <div class="lg:col-span-5">
-          <div class="relative group">
-            <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
-              <svg
-                class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
-                fill="none"
-                viewBox="0 0 24 24"
-                stroke="currentColor"
-              >
-                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
-              </svg>
+    <div class="flex h-full min-h-0 flex-col">
+      <!-- Filters -->
+      <div class="mb-4 flex-shrink-0 border-b border-gray-200 pb-4 dark:border-dark-700">
+        <div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
+          <div class="lg:col-span-5">
+            <div class="relative group">
+              <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
+                <svg
+                  class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
+                  fill="none"
+                  viewBox="0 0 24 24"
+                  stroke="currentColor"
+                >
+                  <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+                </svg>
+              </div>
+              <input
+                v-model="q"
+                type="text"
+                class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
+                :placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
+              />
            </div>
+          </div>
+
+          <div class="lg:col-span-2">
+            <Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
+          </div>
+
+          <div class="lg:col-span-2">
+            <Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
+          </div>
+
+          <div class="lg:col-span-2">
            <input
-              v-model="q"
+              v-model="accountIdInput"
              type="text"
-              class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
-              :placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
+              inputmode="numeric"
+              class="input w-full text-sm"
+              :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
            />
          </div>
-        </div>

-        <div class="lg:col-span-2">
-          <Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
-        </div>
-
-        <div class="lg:col-span-2">
-          <Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
-        </div>
-
-        <div class="lg:col-span-2">
-          <input
-            v-model="accountIdInput"
-            type="text"
-            inputmode="numeric"
-            class="input w-full text-sm"
-            :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
-          />
-        </div>
-
-        <div class="lg:col-span-1 flex items-center justify-end">
-          <button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
-            {{ t('common.reset') }}
-          </button>
+          <div class="lg:col-span-1 flex items-center justify-end">
+            <button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
+              {{ t('common.reset') }}
+            </button>
+          </div>
        </div>
      </div>
-    </div>

-    <!-- Body -->
-    <div class="text-xs text-gray-500 dark:text-gray-400 mb-2">
-      {{ t('admin.ops.errorDetails.total') }} {{ total }}
+      <!-- Body -->
+      <div class="flex min-h-0 flex-1 flex-col">
+        <div class="mb-2 flex-shrink-0 text-xs text-gray-500 dark:text-gray-400">
+          {{ t('admin.ops.errorDetails.total') }} {{ total }}
+        </div>
+
+        <OpsErrorLogTable
+          class="min-h-0 flex-1"
+          :rows="rows"
+          :total="total"
+          :loading="loading"
+          :page="page"
+          :page-size="pageSize"
+          @openErrorDetail="emit('openErrorDetail', $event)"
+          @update:page="page = $event"
+          @update:pageSize="pageSize = $event"
+        />
+      </div>
    </div>
-    <OpsErrorLogTable
-      :rows="rows"
-      :total="total"
-      :loading="loading"
-      :page="page"
-      :page-size="pageSize"
-      @openErrorDetail="emit('openErrorDetail', $event)"
-      @update:page="page = $event"
-      @update:pageSize="pageSize = $event"
-    />
  </BaseDialog>
 </template>
--- a/frontend/src/views/admin/ops/components/OpsErrorLogTable.vue
+++ b/frontend/src/views/admin/ops/components/OpsErrorLogTable.vue
@@ -1,176 +1,178 @@
 <template>
-  <div>
-    <div v-if="loading" class="flex items-center justify-center py-10">
+  <div class="flex h-full min-h-0 flex-col">
+    <div v-if="loading" class="flex flex-1 items-center justify-center py-10">
      <div class="h-8 w-8 animate-spin rounded-full border-b-2 border-primary-600"></div>
    </div>

-    <div v-else class="overflow-x-auto">
-      <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
-        <thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
-          <tr>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.timeId') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.context') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.status') }}
-            </th>
-            <th
-              scope="col"
-              class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.message') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.latency') }}
-            </th>
-            <th
-              scope="col"
-              class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
-            >
-              {{ t('admin.ops.errorLog.action') }}
-            </th>
-          </tr>
-        </thead>
-        <tbody class="divide-y divide-gray-100 dark:divide-dark-700">
-          <tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
-            <td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
-              {{ t('admin.ops.errorLog.noErrors') }}
-            </td>
-          </tr>
+    <div v-else class="flex min-h-0 flex-1 flex-col">
+      <div class="min-h-0 flex-1 overflow-auto">
+        <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
+          <thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
+            <tr>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.timeId') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.context') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.status') }}
+              </th>
+              <th
+                scope="col"
+                class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.message') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.latency') }}
+              </th>
+              <th
+                scope="col"
+                class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
+              >
+                {{ t('admin.ops.errorLog.action') }}
+              </th>
+            </tr>
+          </thead>
+          <tbody class="divide-y divide-gray-100 dark:divide-dark-700">
+            <tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
+              <td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
+                {{ t('admin.ops.errorLog.noErrors') }}
+              </td>
+            </tr>

-          <tr
-            v-for="log in rows"
-            :key="log.id"
-            class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
-            tabindex="0"
-            role="button"
-            @click="emit('openErrorDetail', log.id)"
-            @keydown.enter.prevent="emit('openErrorDetail', log.id)"
-            @keydown.space.prevent="emit('openErrorDetail', log.id)"
-          >
-            <!-- Time & ID -->
-            <td class="px-6 py-4">
-              <div class="flex flex-col gap-0.5">
-                <span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
-                  {{ formatDateTime(log.created_at).split(' ')[1] }}
-                </span>
-                <span
-                  class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
-                  :title="log.request_id || log.client_request_id"
-                >
-                  {{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
-                </span>
-              </div>
-            </td>
+            <tr
+              v-for="log in rows"
+              :key="log.id"
+              class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
+              tabindex="0"
+              role="button"
+              @click="emit('openErrorDetail', log.id)"
+              @keydown.enter.prevent="emit('openErrorDetail', log.id)"
+              @keydown.space.prevent="emit('openErrorDetail', log.id)"
+            >
+              <!-- Time & ID -->
+              <td class="px-6 py-4">
+                <div class="flex flex-col gap-0.5">
+                  <span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
+                    {{ formatDateTime(log.created_at).split(' ')[1] }}
+                  </span>
+                  <span
+                    class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
+                    :title="log.request_id || log.client_request_id"
+                  >
+                    {{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
+                  </span>
+                </div>
+              </td>

-	            <!-- Context (Platform/Model) -->
-	            <td class="px-6 py-4">
-	              <div class="flex flex-col items-start gap-1.5">
-	                <span
-	                  class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
-	                >
-	                  {{ log.platform || '-' }}
-	                </span>
-	                <span
-	                  v-if="log.model"
-	                  class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
-	                  :title="log.model"
-	                >
-	                  {{ log.model }}
-	                </span>
-	                <div
-	                  v-if="log.group_id || log.account_id"
-	                  class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
-	                >
-	                  <span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
-	                  <span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
-	                </div>
-	              </div>
-	            </td>
-
-            <!-- Status & Severity -->
-            <td class="px-6 py-4">
-              <div class="flex flex-wrap items-center gap-2">
-                <span
-                  :class="[
-                    'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
-                    getStatusClass(log.status_code)
-                  ]"
-                >
-                  {{ log.status_code }}
-                </span>
-                <span
-                  v-if="log.severity"
-                  :class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
-                >
-                  {{ log.severity }}
-                </span>
-              </div>
-            </td>
-
-            <!-- Message -->
-            <td class="px-6 py-4">
-              <div class="max-w-md lg:max-w-2xl">
-                <p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
-                  {{ formatSmartMessage(log.message) || '-' }}
-                </p>
-                <div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
-                  <div v-if="log.phase" class="flex items-center gap-1">
-                    <span class="h-1 w-1 rounded-full bg-gray-300"></span>
-                    <span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
-                  </div>
-                  <div v-if="log.client_ip" class="flex items-center gap-1">
-                    <span class="h-1 w-1 rounded-full bg-gray-300"></span>
-                    <span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
+              <!-- Context (Platform/Model) -->
+              <td class="px-6 py-4">
+                <div class="flex flex-col items-start gap-1.5">
+                  <span
+                    class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
+                  >
+                    {{ log.platform || '-' }}
+                  </span>
+                  <span
+                    v-if="log.model"
+                    class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
+                    :title="log.model"
+                  >
+                    {{ log.model }}
+                  </span>
+                  <div
+                    v-if="log.group_id || log.account_id"
+                    class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
+                  >
+                    <span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
+                    <span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
                  </div>
                </div>
-              </div>
-            </td>
+              </td>

-            <!-- Latency -->
-            <td class="px-6 py-4 text-right">
-              <div class="flex flex-col items-end">
-                <span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
-                  {{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
-                </span>
-              </div>
-            </td>
+              <!-- Status & Severity -->
+              <td class="px-6 py-4">
+                <div class="flex flex-wrap items-center gap-2">
+                  <span
+                    :class="[
+                      'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
+                      getStatusClass(log.status_code)
+                    ]"
+                  >
+                    {{ log.status_code }}
+                  </span>
+                  <span
+                    v-if="log.severity"
+                    :class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
+                  >
+                    {{ log.severity }}
+                  </span>
+                </div>
+              </td>

-            <!-- Actions -->
-            <td class="px-6 py-4 text-right" @click.stop>
-              <button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
-                {{ t('admin.ops.errorLog.details') }}
-              </button>
-            </td>
-          </tr>
-        </tbody>
-      </table>
+              <!-- Message -->
+              <td class="px-6 py-4">
+                <div class="max-w-md lg:max-w-2xl">
+                  <p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
+                    {{ formatSmartMessage(log.message) || '-' }}
+                  </p>
+                  <div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
+                    <div v-if="log.phase" class="flex items-center gap-1">
+                      <span class="h-1 w-1 rounded-full bg-gray-300"></span>
+                      <span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
+                    </div>
+                    <div v-if="log.client_ip" class="flex items-center gap-1">
+                      <span class="h-1 w-1 rounded-full bg-gray-300"></span>
+                      <span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
+                    </div>
+                  </div>
+                </div>
+              </td>
+
+              <!-- Latency -->
+              <td class="px-6 py-4 text-right">
+                <div class="flex flex-col items-end">
+                  <span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
+                    {{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
+                  </span>
+                </div>
+              </td>
+
+              <!-- Actions -->
+              <td class="px-6 py-4 text-right" @click.stop>
+                <button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
+                  {{ t('admin.ops.errorLog.details') }}
+                </button>
+              </td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+
+      <Pagination
+        v-if="total > 0"
+        :total="total"
+        :page="page"
+        :page-size="pageSize"
+        :page-size-options="[10, 20, 50, 100, 200, 500]"
+        @update:page="emit('update:page', $event)"
+        @update:pageSize="emit('update:pageSize', $event)"
+      />
    </div>
-
-    <Pagination
-      v-if="total > 0"
-      :total="total"
-      :page="page"
-      :page-size="pageSize"
-      :page-size-options="[10, 20, 50, 100, 200, 500]"
-      @update:page="emit('update:page', $event)"
-      @update:pageSize="emit('update:pageSize', $event)"
-    />
  </div>
 </template>

--- a/frontend/src/views/admin/ops/components/OpsRequestDetailsModal.vue
+++ b/frontend/src/views/admin/ops/components/OpsRequestDetailsModal.vue
@@ -95,6 +95,7 @@ watch(
  (open) => {
    if (open) {
      page.value = 1
+      pageSize.value = 20
      fetchData()
    }
  }
@@ -150,45 +151,46 @@ const kindBadgeClass = (kind: string) => {
 <template>
  <BaseDialog :show="modelValue" :title="props.preset.title || t('admin.ops.requestDetails.title')" width="full" @close="close">
    <template #default>
-      <div class="flex items-center justify-between mb-4">
-        <div class="text-xs text-gray-500 dark:text-gray-400">
-          {{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
-        </div>
-        <button
-          type="button"
-          class="btn btn-secondary btn-sm"
-          @click="fetchData"
-        >
-          {{ t('common.refresh') }}
-        </button>
-      </div>
-
-      <!-- Loading -->
-      <div v-if="loading" class="flex items-center justify-center py-16">
-        <div class="flex flex-col items-center gap-3">
-          <svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
-            <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
-            <path
-              class="opacity-75"
-              fill="currentColor"
-              d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
-            ></path>
-          </svg>
-          <span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
-        </div>
-      </div>
-
-      <!-- Table -->
-      <div v-else>
-        <div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
-          <div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
-          <div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
+      <div class="flex h-full min-h-0 flex-col">
+        <div class="mb-4 flex flex-shrink-0 items-center justify-between">
+          <div class="text-xs text-gray-500 dark:text-gray-400">
+            {{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
+          </div>
+          <button
+            type="button"
+            class="btn btn-secondary btn-sm"
+            @click="fetchData"
+          >
+            {{ t('common.refresh') }}
+          </button>
        </div>

-        <div v-else class="overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
-          <div class="overflow-x-auto">
-            <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
-              <thead class="bg-gray-50 dark:bg-dark-900">
+        <!-- Loading -->
+        <div v-if="loading" class="flex flex-1 items-center justify-center py-16">
+          <div class="flex flex-col items-center gap-3">
+            <svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
+              <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
+              <path
+                class="opacity-75"
+                fill="currentColor"
+                d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
+              ></path>
+            </svg>
+            <span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
+          </div>
+        </div>
+
+        <!-- Table -->
+        <div v-else class="flex min-h-0 flex-1 flex-col">
+          <div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
+            <div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
+            <div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
+          </div>
+
+          <div v-else class="flex min-h-0 flex-1 flex-col overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
+            <div class="min-h-0 flex-1 overflow-auto">
+              <table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
+                <thead class="sticky top-0 z-10 bg-gray-50 dark:bg-dark-900">
                <tr>
                  <th class="px-4 py-3 text-left text-[11px] font-bold uppercase tracking-wider text-gray-500 dark:text-gray-400">
                    {{ t('admin.ops.requestDetails.table.time') }}
@@ -265,15 +267,16 @@ const kindBadgeClass = (kind: string) => {
                </tr>
              </tbody>
            </table>
-          </div>
+            </div>

-          <Pagination
-            :total="total"
-            :page="page"
-            :page-size="pageSize"
-            @update:page="handlePageChange"
-            @update:pageSize="handlePageSizeChange"
-          />
+            <Pagination
+              :total="total"
+              :page="page"
+              :page-size="pageSize"
+              @update:page="handlePageChange"
+              @update:pageSize="handlePageSizeChange"
+            />
+          </div>
        </div>
      </div>
    </template>
--- a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
+++ b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
@@ -45,6 +45,36 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR
    errors.push(t('admin.ops.runtime.validation.evalIntervalRange'))
  }

+  // Thresholds validation
+  const thresholds = settings.thresholds
+  if (thresholds) {
+    if (thresholds.sla_percent_min != null) {
+      if (!Number.isFinite(thresholds.sla_percent_min) || thresholds.sla_percent_min < 0 || thresholds.sla_percent_min > 100) {
+        errors.push('SLA 最低值必须在 0-100 之间')
+      }
+    }
+    if (thresholds.latency_p99_ms_max != null) {
+      if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) {
+        errors.push('延迟 P99 最大值必须大于或等于 0')
+      }
+    }
+    if (thresholds.ttft_p99_ms_max != null) {
+      if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) {
+        errors.push('TTFT P99 最大值必须大于或等于 0')
+      }
+    }
+    if (thresholds.request_error_rate_percent_max != null) {
+      if (!Number.isFinite(thresholds.request_error_rate_percent_max) || thresholds.request_error_rate_percent_max < 0 || thresholds.request_error_rate_percent_max > 100) {
+        errors.push('请求错误率最大值必须在 0-100 之间')
+      }
+    }
+    if (thresholds.upstream_error_rate_percent_max != null) {
+      if (!Number.isFinite(thresholds.upstream_error_rate_percent_max) || thresholds.upstream_error_rate_percent_max < 0 || thresholds.upstream_error_rate_percent_max > 100) {
+        errors.push('上游错误率最大值必须在 0-100 之间')
+      }
+    }
+  }
+
  const lock = settings.distributed_lock
  if (lock?.enabled) {
    if (!lock.key || lock.key.trim().length < 3) {
@@ -130,6 +160,15 @@ function openAlertEditor() {
    if (!Array.isArray(draftAlert.value.silencing.entries)) {
      draftAlert.value.silencing.entries = []
    }
+    if (!draftAlert.value.thresholds) {
+      draftAlert.value.thresholds = {
+        sla_percent_min: 99.5,
+        latency_p99_ms_max: 2000,
+        ttft_p99_ms_max: 500,
+        request_error_rate_percent_max: 5,
+        upstream_error_rate_percent_max: 5
+      }
+    }
  }

  showAlertEditor.value = true
@@ -295,6 +334,81 @@ onMounted(() => {
        <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.runtime.evalIntervalHint') }}</p>
      </div>

+      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
+        <div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">指标阈值配置</div>
+        <p class="mb-4 text-xs text-gray-500 dark:text-gray-400">配置各项指标的告警阈值。超出阈值的指标将在看板上以红色显示。</p>
+
+        <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">SLA 最低值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.sla_percent_min"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="99.5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">SLA 低于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">延迟 P99 最大值 (ms)</div>
+            <input
+              v-model.number="draftAlert.thresholds.latency_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+              placeholder="2000"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">延迟 P99 高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">TTFT P99 最大值 (ms)</div>
+            <input
+              v-model.number="draftAlert.thresholds.ttft_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+              placeholder="500"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">TTFT P99 高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">请求错误率最大值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.request_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">请求错误率高于此值时将显示为红色</p>
+          </div>
+
+          <div>
+            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">上游错误率最大值 (%)</div>
+            <input
+              v-model.number="draftAlert.thresholds.upstream_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+              placeholder="5"
+            />
+            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">上游错误率高于此值时将显示为红色</p>
+          </div>
+        </div>
+      </div>
+
      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
        <div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.runtime.silencing.title') }}</div>

--- a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
+++ b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
@@ -6,7 +6,7 @@ import { opsAPI } from '@/api/admin/ops'
 import BaseDialog from '@/components/common/BaseDialog.vue'
 import Select from '@/components/common/Select.vue'
 import Toggle from '@/components/common/Toggle.vue'
-import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings } from '../types'
+import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings, OpsMetricThresholds } from '../types'

 const { t } = useI18n()
 const appStore = useAppStore()
@@ -29,19 +29,38 @@ const runtimeSettings = ref<OpsAlertRuntimeSettings | null>(null)
 const emailConfig = ref<EmailNotificationConfig | null>(null)
 // 高级设置
 const advancedSettings = ref<OpsAdvancedSettings | null>(null)
+// 指标阈值配置
+const metricThresholds = ref<OpsMetricThresholds>({
+  sla_percent_min: 99.5,
+  latency_p99_ms_max: 2000,
+  ttft_p99_ms_max: 500,
+  request_error_rate_percent_max: 5,
+  upstream_error_rate_percent_max: 5
+})

 // 加载所有配置
 async function loadAllSettings() {
  loading.value = true
  try {
-    const [runtime, email, advanced] = await Promise.all([
+    const [runtime, email, advanced, thresholds] = await Promise.all([
      opsAPI.getAlertRuntimeSettings(),
      opsAPI.getEmailNotificationConfig(),
-      opsAPI.getAdvancedSettings()
+      opsAPI.getAdvancedSettings(),
+      opsAPI.getMetricThresholds()
    ])
    runtimeSettings.value = runtime
    emailConfig.value = email
    advancedSettings.value = advanced
+    // 如果后端返回了阈值，使用后端的值；否则保持默认值
+    if (thresholds && Object.keys(thresholds).length > 0) {
+      metricThresholds.value = {
+        sla_percent_min: thresholds.sla_percent_min ?? 99.5,
+        latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000,
+        ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
+        request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
+        upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
+      }
+    }
  } catch (err: any) {
    console.error('[OpsSettingsDialog] Failed to load settings', err)
    appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.loadFailed'))
@@ -138,6 +157,23 @@ const validation = computed(() => {
    }
  }

+  // 验证指标阈值
+  if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) {
+    errors.push('SLA最低百分比必须在0-100之间')
+  }
+  if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) {
+    errors.push('延迟P99最大值必须大于等于0')
+  }
+  if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) {
+    errors.push('TTFT P99最大值必须大于等于0')
+  }
+  if (metricThresholds.value.request_error_rate_percent_max != null && (metricThresholds.value.request_error_rate_percent_max < 0 || metricThresholds.value.request_error_rate_percent_max > 100)) {
+    errors.push('请求错误率最大值必须在0-100之间')
+  }
+  if (metricThresholds.value.upstream_error_rate_percent_max != null && (metricThresholds.value.upstream_error_rate_percent_max < 0 || metricThresholds.value.upstream_error_rate_percent_max > 100)) {
+    errors.push('上游错误率最大值必须在0-100之间')
+  }
+
  return { valid: errors.length === 0, errors }
 })

@@ -153,14 +189,15 @@ async function saveAllSettings() {
    await Promise.all([
      runtimeSettings.value ? opsAPI.updateAlertRuntimeSettings(runtimeSettings.value) : Promise.resolve(),
      emailConfig.value ? opsAPI.updateEmailNotificationConfig(emailConfig.value) : Promise.resolve(),
-      advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve()
+      advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve(),
+      opsAPI.updateMetricThresholds(metricThresholds.value)
    ])
    appStore.showSuccess(t('admin.ops.settings.saveSuccess'))
    emit('saved')
    emit('close')
  } catch (err: any) {
    console.error('[OpsSettingsDialog] Failed to save settings', err)
-    appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
+    appStore.showError(err?.response?.data?.message || err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
  } finally {
    saving.value = false
  }
@@ -306,6 +343,77 @@ async function saveAllSettings() {
        </div>
      </div>

+      <!-- 指标阈值配置 -->
+      <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
+        <h4 class="mb-3 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.settings.metricThresholds') }}</h4>
+        <p class="mb-4 text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.settings.metricThresholdsHint') }}</p>
+
+        <div class="space-y-4">
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.slaMinPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.sla_percent_min"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.slaMinPercentHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.latencyP99MaxMs') }}</label>
+            <input
+              v-model.number="metricThresholds.latency_p99_ms_max"
+              type="number"
+              min="0"
+              step="100"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.latencyP99MaxMsHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.ttftP99MaxMs') }}</label>
+            <input
+              v-model.number="metricThresholds.ttft_p99_ms_max"
+              type="number"
+              min="0"
+              step="50"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.ttftP99MaxMsHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.requestErrorRateMaxPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.request_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.requestErrorRateMaxPercentHint') }}</p>
+          </div>
+
+          <div>
+            <label class="input-label">{{ t('admin.ops.settings.upstreamErrorRateMaxPercent') }}</label>
+            <input
+              v-model.number="metricThresholds.upstream_error_rate_percent_max"
+              type="number"
+              min="0"
+              max="100"
+              step="0.1"
+              class="input"
+            />
+            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.upstreamErrorRateMaxPercentHint') }}</p>
+          </div>
+        </div>
+      </div>
+
      <!-- 高级设置 -->
      <details class="rounded-2xl bg-gray-50 dark:bg-dark-700/50">
        <summary class="cursor-pointer p-4 text-sm font-semibold text-gray-900 dark:text-white">
@@ -379,6 +487,48 @@ async function saveAllSettings() {
              <Toggle v-model="advancedSettings.aggregation.aggregation_enabled" />
            </div>
          </div>
+
+          <!-- 错误过滤 -->
+          <div class="space-y-3">
+            <h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">错误过滤</h5>
+
+            <div class="flex items-center justify-between">
+              <div>
+                <label class="text-sm font-medium text-gray-700 dark:text-gray-300">忽略 count_tokens 错误</label>
+                <p class="mt-1 text-xs text-gray-500">
+                  启用后，count_tokens 请求的错误将不计入运维监控的统计和告警中（但仍会存储在数据库中）
+                </p>
+              </div>
+              <Toggle v-model="advancedSettings.ignore_count_tokens_errors" />
+            </div>
+          </div>
+
+          <!-- 自动刷新 -->
+          <div class="space-y-3">
+            <h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">自动刷新</h5>
+
+            <div class="flex items-center justify-between">
+              <div>
+                <label class="text-sm font-medium text-gray-700 dark:text-gray-300">启用自动刷新</label>
+                <p class="mt-1 text-xs text-gray-500">
+                  自动刷新仪表板数据，启用后会定期拉取最新数据
+                </p>
+              </div>
+              <Toggle v-model="advancedSettings.auto_refresh_enabled" />
+            </div>
+
+            <div v-if="advancedSettings.auto_refresh_enabled">
+              <label class="input-label">刷新间隔</label>
+              <Select
+                v-model="advancedSettings.auto_refresh_interval_seconds"
+                :options="[
+                  { value: 15, label: '15 秒' },
+                  { value: 30, label: '30 秒' },
+                  { value: 60, label: '60 秒' }
+                ]"
+              />
+            </div>
+          </div>
        </div>
      </details>
    </div>
--- a/frontend/src/views/admin/ops/types.ts
+++ b/frontend/src/views/admin/ops/types.ts
@@ -14,6 +14,7 @@ export type {
  EmailNotificationConfig,
  OpsDistributedLockSettings,
  OpsAlertRuntimeSettings,
+  OpsMetricThresholds,
  OpsAdvancedSettings,
  OpsDataRetentionSettings,
  OpsAggregationSettings
Author	SHA1	Message	Date
shaw	3b71bc3df1	feat: OpenCode 配置提示添加配置文件路径说明	2026-01-12 20:49:54 +08:00
shaw	22ef9534e0	fix: 修复反向代理下客户端 IP 获取错误	2026-01-12 20:44:38 +08:00
Wesley Liddick	c206d12d5c	Merge pull request #254 from IanShaw027/feat/ops-count-tokens-filter-and-auto-refresh feat(ops): count_tokens 错误过滤和自动刷新功能	2026-01-12 17:31:54 +08:00
IanShaw027	6ad29a470c	style(ops): 移除未使用的 isAutoRefreshActive 变量	2026-01-12 17:28:25 +08:00
IanShaw027	2d45e61a9b	style(ops): 修复代码格式问题以通过 golangci-lint	2026-01-12 17:18:49 +08:00
IanShaw027	b98fb013ae	feat(ops): 添加自动刷新配置功能功能特性： - 支持配置启用/禁用自动刷新 - 可配置刷新间隔（15秒/30秒/60秒） - 实时倒计时显示，用户可见下次刷新时间 - 手动刷新自动重置倒计时 - 页面卸载时自动清理定时器用户体验： - 默认禁用，用户可根据需求开启 - 与现有 OpsConcurrencyCard 5秒刷新保持一致 - 倒计时带旋转动画，视觉反馈清晰 - 配置修改后立即生效，无需刷新页面技术实现： - ops.ts: 添加 auto_refresh_enabled 和 auto_refresh_interval_seconds 配置 - OpsSettingsDialog.vue: 添加自动刷新配置界面 - OpsDashboard.vue: 实现主刷新逻辑和双定时器设计 - OpsDashboardHeader.vue: 倒计时显示组件配置说明： - auto_refresh_enabled: 是否启用（默认 false） - auto_refresh_interval_seconds: 刷新间隔（默认 30 秒，范围 15-300 秒）	2026-01-12 17:07:07 +08:00
IanShaw027	345a965fa3	feat(ops): 添加 count_tokens 错误过滤功能功能特性： - 自动识别并标记 count_tokens 请求的错误 - 支持配置是否在统计中忽略 count_tokens 错误 - 错误数据完整保留，仅在统计时动态过滤技术实现： - ops_error_logger.go: 自动标记 count_tokens 请求 - ops_repo.go: INSERT 语句添加 is_count_tokens 字段 - ops_repo_dashboard.go: buildErrorWhere 核心过滤函数 - ops_repo_preagg.go: 预聚合统计中添加过滤 - ops_repo_trends.go: 趋势统计查询添加过滤（2 处） - ops_settings_models.go: 添加 ignore_count_tokens_errors 配置 - ops_settings.go: 配置验证和默认值设置 - ops_port.go: 错误日志模型添加 IsCountTokens 字段业务价值： - count_tokens 是探测性请求，其错误不影响真实业务 SLA - 用户可根据需求灵活控制是否计入统计 - 提升错误率、告警等运维指标的准确性影响范围： - Dashboard 概览统计 - 错误趋势图表 - 告警规则评估 - 预聚合指标（hourly/daily） - 健康分数计算	2026-01-12 17:06:12 +08:00
IanShaw027	c02c120579	feat(ops): 添加 count_tokens 错误标记数据库迁移 - 新增 is_count_tokens 布尔字段到 ops_error_logs 表 - 默认值为 false - 支持后续动态过滤统计	2026-01-12 17:06:12 +08:00
shaw	4da681f58a	Merge branch 'mt21625457/main'	2026-01-12 16:20:55 +08:00
shaw	68ba866c38	fix(frontend): 修复账号管理页面分组显示和 Cookie 授权问题 - 新增 AccountGroupsCell 组件优化分组列显示（最多4个+折叠） - 修复 Cookie 自动授权时 group_ids/notes/expires_at 字段丢失 - 修复 SettingsView 流超时配置前后端字段不一致问题	2026-01-12 16:08:44 +08:00
yangjianbo	9622347faa	fix(调度): 修复 outbox 空载写入并稳固回放测试将 outbox payload 为空时写入 NULL 避免事务因 JSON 解析错误中断调整回放测试为预置缓存后验证 last_used 更新测试: go test -tags=integration ./internal/repository	2026-01-12 15:46:55 +08:00
shaw	8363663ea8	fix(gateway): 修复 usage_logs 记录 IP 不正确的问题在 nginx 反向代理场景下，使用 ip.GetClientIP() 替代 c.ClientIP() 以正确获取客户端真实 IP 地址	2026-01-12 15:37:45 +08:00
Wesley Liddick	b588ea194c	Merge pull request #251 from IanShaw027/fix/ops-bugs feat(ops): 运维看板功能增强 - 实时流量监控与指标阈值配置	2026-01-12 15:26:26 +08:00
Wesley Liddick	465ba76788	Merge pull request #250 from IanShaw027/fix/custom-error-codes-disable-scheduling fix(gateway): 自定义错误码触发停止调度	2026-01-12 15:26:14 +08:00
shaw	cf313d5761	fix(gateway): 修复 Claude Code 客户端检测和请求信息记录 - 在 Messages 方法中调用 SetClaudeCodeClientContext 启用客户端检测 - 修复 RecordUsageInput 未传递 UserAgent 和 IPAddress 的问题	2026-01-12 15:19:40 +08:00
yangjianbo	8c1958c9ad	fix(调度): 修复流超时配置并补回放测试删除前端未支持的 timeout_seconds 字段，避免类型检查失败新增调度 outbox 回放集成测试调整调度默认等待超时断言测试: make test	2026-01-12 15:13:39 +08:00
yangjianbo	2db34139f0	Merge branch 'main' of https://github.com/mt21625457/aicodex2api	2026-01-12 14:50:53 +08:00
IanShaw027	e0cccf6ed2	fix(ops): 修复Go代码格式问题	2026-01-12 14:36:32 +08:00
IanShaw027	89c1a41305	fix(ops): 修复错误日志和请求详情模态框的布局问题 - 修复 OpsErrorDetailsModal 的内容溢出问题，使用 flex 布局确保正确显示 - 修复 OpsErrorLogTable 的表格滚动问题，添加 min-h-0 确保正确的滚动行为 - 修复 OpsRequestDetailsModal 的布局问题，添加 pageSize 初始化并优化 flex 布局 - 统一使用 flex 布局模式，确保模态框内容在不同屏幕尺寸下正确显示	2026-01-12 14:31:21 +08:00
yangjianbo	202ec21bab	fix(config): 提升粘性会话默认等待时长 - 默认值调整为 120s - 同步示例配置与环境变量	2026-01-12 14:26:31 +08:00
ianshaw	6dcb27632e	fix(gateway): 自定义错误码触发停止调度 - 修改 HandleUpstreamError 逻辑，启用自定义错误码时所有在列表中的错误码都会停止调度 - 添加 handleCustomErrorCode 函数处理自定义错误码的账号停用 - 前端添加 429/529 错误码的警告提示，因为这些错误码已有内置处理机制 - 更新 EditAccountModal、CreateAccountModal、BulkEditAccountModal 的错误码添加逻辑	2026-01-11 22:20:02 -08:00
yangjianbo	3141aa5144	feat(scheduler): 引入调度快照缓存与 outbox 回放 - 调度热路径优先读 Redis 快照，保留分组排序语义 - outbox 回放 + 全量重建纠偏，失败重试不推进水位 - 自动 Atlas 基线对齐并同步调度配置示例	2026-01-12 14:19:06 +08:00
IanShaw027	5443efd7d7	feat(ops): 前端集成实时流量功能 - 添加实时流量API调用方法 - 优化OpsDashboard组件代码	2026-01-12 14:18:16 +08:00
IanShaw027	62771583e7	feat(ops): 集成实时流量API接口 - 添加实时流量handler处理逻辑 - 注册实时流量路由 - 扩展ops service接口定义	2026-01-12 14:17:58 +08:00
IanShaw027	5526f122b7	feat(ops): 新增实时流量数据层 - 添加实时流量repository层实现 - 添加实时流量service层逻辑 - 定义实时流量数据模型	2026-01-12 14:17:42 +08:00
Wesley Liddick	9c144587fe	Merge pull request #249 from IanShaw027/feat/stream-timeout-handling feat(gateway): 添加流超时处理机制	2026-01-12 14:14:21 +08:00
IanShaw027	098bf5a1e8	fix(i18n): 补充缺失的英文翻译 - 添加 admin.ops.requestsTitle - 添加 admin.ops.alertRules.manage 和 saveSuccess/deleteSuccess - 添加 common.settings - 添加完整的 admin.ops.settings 部分 - 添加 admin.ops.tooltips.totalRequests 和 upstreamErrors	2026-01-12 14:10:44 +08:00
Wesley Liddick	4c37ca71ee	Merge pull request #247 from 7836246/fix/negative-zero-balance fix: 修复扣款时浮点数精度导致的余额不足误判和 -0.00 显示问题	2026-01-12 14:10:41 +08:00
ianshaw	0c52809591	refactor(settings): 简化流超时配置，移除冗余字段 - 移除 TimeoutSeconds 字段，超时判定由网关配置控制 - 默认禁用流超时处理功能	2026-01-11 22:09:35 -08:00
小海	53e730f8d5	fix: 修复扣款时浮点数精度导致的余额不足误判和 -0.00 显示问题	2026-01-12 14:06:30 +08:00
IanShaw027	8e248e0853	fix(ops): 修正卡片标题翻译 - 卡片标题显示"请求" - 卡片内部标签保持"请求数"	2026-01-12 14:05:10 +08:00
ianshaw	2a0758bdfe	feat(gateway): 添加流超时处理机制 - 添加 StreamTimeoutSettings 配置结构体和系统设置 - 实现 TimeoutCounterCache Redis 计数器用于累计超时次数 - 在 RateLimitService 添加 HandleStreamTimeout 方法 - 在 gateway_service、openai_gateway_service、antigravity_gateway_service 中调用超时处理 - 添加后端 API 端点 GET/PUT /admin/settings/stream-timeout - 添加前端配置界面到系统设置页面 - 支持配置：启用开关、超时阈值、处理方式、暂停时长、触发阈值、阈值窗口默认配置： - 启用：true - 超时阈值：60秒 - 处理方式：临时不可调度 - 暂停时长：5分钟 - 触发阈值：3次 - 阈值窗口：10分钟	2026-01-11 21:54:52 -08:00
IanShaw027	f55ba3f6c1	fix(ops): 优化卡片标题和明细筛选逻辑 - 将"请求数"改为"请求" - SLA卡片明细只显示错误请求（kind='error'） - TTFT卡片明细按延迟降序排序	2026-01-12 13:00:39 +08:00
IanShaw027	db51e65b42	chore: 添加ESLint忽略配置 - 添加.eslintignore文件	2026-01-12 11:44:34 +08:00
IanShaw027	72a2ed958b	feat(ops): 看板上应用指标阈值显示 - 在OpsDashboard中加载阈值配置 - 在OpsDashboardHeader中根据阈值判断指标是否超标 - 超出阈值的指标显示为红色（SLA低于阈值也显示红色） - 用Icon组件替换emoji表情	2026-01-12 11:44:14 +08:00
IanShaw027	d0b91a40d4	feat(ops): 添加指标阈值配置UI - 在OpsSettingsDialog中添加指标阈值配置表单 - 在OpsRuntimeSettingsCard中添加阈值配置区域 - 添加阈值验证逻辑 - 更新国际化文本	2026-01-12 11:43:54 +08:00
IanShaw027	bd74bf7994	fix(ops): 添加brain图标替换emoji表情 - 在Icon组件中添加brain图标 - 用于替换运维诊断中的emoji表情	2026-01-12 11:43:35 +08:00
IanShaw027	f28d4b78e7	feat(ops): 前端添加指标阈值类型定义和API - 添加OpsMetricThresholds类型定义 - 新增getMetricThresholds和updateMetricThresholds API方法	2026-01-12 11:43:15 +08:00
IanShaw027	7536dbfee5	feat(ops): 后端添加指标阈值管理API - 新增GetMetricThresholds和UpdateMetricThresholds接口 - 支持配置SLA、延迟P99、TTFT P99、请求错误率、上游错误率阈值 - 添加参数验证逻辑 - 提供默认阈值配置	2026-01-12 11:42:56 +08:00