mirror of
https://gitee.com/wanwujie/sub2api
synced 2026-04-03 06:52:13 +08:00
Merge pull request #908 from wucm667/fix/ops-alert-group-account-metrics
fix: 补充缺失的组级和账户级运维告警指标
This commit is contained in:
@@ -23,6 +23,13 @@ var validOpsAlertMetricTypes = []string{
|
|||||||
"cpu_usage_percent",
|
"cpu_usage_percent",
|
||||||
"memory_usage_percent",
|
"memory_usage_percent",
|
||||||
"concurrency_queue_depth",
|
"concurrency_queue_depth",
|
||||||
|
"group_available_accounts",
|
||||||
|
"group_available_ratio",
|
||||||
|
"group_rate_limit_ratio",
|
||||||
|
"account_rate_limited_count",
|
||||||
|
"account_error_count",
|
||||||
|
"account_error_ratio",
|
||||||
|
"overload_account_count",
|
||||||
}
|
}
|
||||||
|
|
||||||
var validOpsAlertMetricTypeSet = func() map[string]struct{} {
|
var validOpsAlertMetricTypeSet = func() map[string]struct{} {
|
||||||
@@ -82,7 +89,10 @@ func isPercentOrRateMetric(metricType string) bool {
|
|||||||
"error_rate",
|
"error_rate",
|
||||||
"upstream_error_rate",
|
"upstream_error_rate",
|
||||||
"cpu_usage_percent",
|
"cpu_usage_percent",
|
||||||
"memory_usage_percent":
|
"memory_usage_percent",
|
||||||
|
"group_available_ratio",
|
||||||
|
"group_rate_limit_ratio",
|
||||||
|
"account_error_ratio":
|
||||||
return true
|
return true
|
||||||
default:
|
default:
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -506,6 +506,48 @@ func (s *OpsAlertEvaluatorService) computeRuleMetric(
|
|||||||
return float64(countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool {
|
return float64(countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool {
|
||||||
return acc.HasError && acc.TempUnschedulableUntil == nil
|
return acc.HasError && acc.TempUnschedulableUntil == nil
|
||||||
})), true
|
})), true
|
||||||
|
case "group_rate_limit_ratio":
|
||||||
|
if groupID == nil || *groupID <= 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
if s == nil || s.opsService == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID)
|
||||||
|
if err != nil || availability == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
if availability.Group == nil || availability.Group.TotalAccounts <= 0 {
|
||||||
|
return 0, true
|
||||||
|
}
|
||||||
|
return (float64(availability.Group.RateLimitCount) / float64(availability.Group.TotalAccounts)) * 100, true
|
||||||
|
case "account_error_ratio":
|
||||||
|
if s == nil || s.opsService == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID)
|
||||||
|
if err != nil || availability == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
total := int64(len(availability.Accounts))
|
||||||
|
if total <= 0 {
|
||||||
|
return 0, true
|
||||||
|
}
|
||||||
|
errorCount := countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool {
|
||||||
|
return acc.HasError && acc.TempUnschedulableUntil == nil
|
||||||
|
})
|
||||||
|
return (float64(errorCount) / float64(total)) * 100, true
|
||||||
|
case "overload_account_count":
|
||||||
|
if s == nil || s.opsService == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
availability, err := s.opsService.GetAccountAvailability(ctx, platform, groupID)
|
||||||
|
if err != nil || availability == nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return float64(countAccountsByCondition(availability.Accounts, func(acc *AccountAvailability) bool {
|
||||||
|
return acc.IsOverloaded
|
||||||
|
})), true
|
||||||
}
|
}
|
||||||
|
|
||||||
overview, err := s.opsRepo.GetDashboardOverview(ctx, &OpsDashboardFilter{
|
overview, err := s.opsRepo.GetDashboardOverview(ctx, &OpsDashboardFilter{
|
||||||
|
|||||||
Reference in New Issue
Block a user