diff --git a/backend/internal/handler/admin/ops_alerts_handler.go b/backend/internal/handler/admin/ops_alerts_handler.go index c9da19c7..edc8c7f7 100644 --- a/backend/internal/handler/admin/ops_alerts_handler.go +++ b/backend/internal/handler/admin/ops_alerts_handler.go @@ -23,6 +23,13 @@ var validOpsAlertMetricTypes = []string{ "cpu_usage_percent", "memory_usage_percent", "concurrency_queue_depth", + "group_available_accounts", + "group_available_ratio", + "group_rate_limit_ratio", + "account_rate_limited_count", + "account_error_count", + "account_error_ratio", + "overload_account_count", } var validOpsAlertMetricTypeSet = func() map[string]struct{} { @@ -82,7 +89,10 @@ func isPercentOrRateMetric(metricType string) bool { "error_rate", "upstream_error_rate", "cpu_usage_percent", - "memory_usage_percent": + "memory_usage_percent", + "group_available_ratio", + "group_rate_limit_ratio", + "account_error_ratio": return true default: return false diff --git a/backend/internal/service/ops_alert_evaluator_service.go b/backend/internal/service/ops_alert_evaluator_service.go index 169a5e32..88883180 100644 --- a/backend/internal/service/ops_alert_evaluator_service.go +++ b/backend/internal/service/ops_alert_evaluator_service.go @@ -506,6 +506,48 @@ func (s *OpsAlertEvaluatorService) computeRuleMetric( return float64(countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool { return acc.HasError && acc.TempUnschedulableUntil == nil })), true + case "group_rate_limit_ratio": + if groupID == nil || *groupID <= 0 { + return 0, false + } + if s == nil || s.opsService == nil { + return 0, false + } + availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID) + if err != nil || availability == nil { + return 0, false + } + if availability.Group == nil || availability.Group.TotalAccounts <= 0 { + return 0, true + } + return (float64(availability.Group.RateLimitCount) / float64(availability.Group.TotalAccounts)) * 100, true + case "account_error_ratio": + if s == nil || s.opsService == nil { + return 0, false + } + availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID) + if err != nil || availability == nil { + return 0, false + } + total := int64(len(availability.Accounts)) + if total <= 0 { + return 0, true + } + errorCount := countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool { + return acc.HasError && acc.TempUnschedulableUntil == nil + }) + return (float64(errorCount) / float64(total)) * 100, true + case "overload_account_count": + if s == nil || s.opsService == nil { + return 0, false + } + availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID) + if err != nil || availability == nil { + return 0, false + } + return float64(countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool { + return acc.IsOverloaded + })), true } overview, err := s.opsRepo.GetDashboardOverview(ctx, &OpsDashboardFilter{