mirror of
https://gitee.com/wanwujie/sub2api
synced 2026-05-04 21:20:51 +08:00
Background / 背景 The ops cleanup task currently rejects retention days < 1 in both validate and normalize, so operators who want minimal-history setups (e.g. high churn deployments that prefer near-realtime cleanup) cannot express that intent through the UI. The only options are 1+ days, which keeps at least 24h of history regardless of cron frequency. ops 清理任务目前在 validate 和 normalize 两处都拒绝小于 1 的保留天数, 让希望尽量不留历史的运维场景(高吞吐部署 + 想用近实时清理)无法通过 UI 表达。最低只能配 1,等于不管 cron 多频繁,至少都会保留 24 小时的历史。 Purpose / 目的 Let admins set retention days to 0, meaning "every scheduled cleanup run wipes the corresponding table(s) entirely". Combined with a more frequent cron (e.g. `0 * * * *`) this yields effectively rolling cleanup. 允许管理员把保留天数设为 0,语义为"每次定时清理时把对应表全部清空"。 搭配更频繁的 cron(比如每小时整点)即可获得近似滚动清理的效果。 Changes / 改动内容 Backend - service/ops_settings.go: validate accepts [0, 365]; normalize only refills default 30 when value is < 0 (negative is treated as legacy bad data, 0 is honoured) - service/ops_cleanup_service.go: introduce `opsCleanupPlan(now, days)` returning `(cutoff, truncate, ok)`. days==0 returns truncate=true and short-circuits to a new `truncateOpsTable` helper that uses `TRUNCATE TABLE` (O(1), no WAL, no VACUUM pressure). days>0 keeps the existing batched DELETE path unchanged. Empty tables skip TRUNCATE to avoid the ACCESS EXCLUSIVE lock entirely - Extract `isMissingRelationError` helper to dedupe the "table not yet created" tolerance shared by both delete and truncate paths - Add unit tests for `opsCleanupPlan` (three branches) and `isMissingRelationError` 后端 - service/ops_settings.go: validate 接受 [0, 365];normalize 仅在 < 0 时回填默认 30(负数视为脏数据,0 被尊重) - service/ops_cleanup_service.go: 抽 `opsCleanupPlan(now, days)` 返回 `(cutoff, truncate, ok)`。days==0 → truncate=true,走新增 `truncateOpsTable`(TRUNCATE TABLE,O(1),无 WAL、无 VACUUM 压力); days>0 仍走原批量 DELETE 路径,行为完全不变。空表跳过 TRUNCATE, 避免无意义的 ACCESS EXCLUSIVE 锁 - 抽 `isMissingRelationError` helper 复用 delete / truncate 两处的 "表不存在"宽容判断 - 补 `opsCleanupPlan` 三分支 + `isMissingRelationError` 单元测试 Frontend - OpsSettingsDialog.vue: validation accepts [0, 365]; input min=0 - i18n (zh/en): hint mentions "0 = wipe all on every cleanup", validation message updated to 0-365 range 前端 - OpsSettingsDialog.vue: 校验放宽到 [0, 365],input min 改 0 - i18n(zh/en):hint 补"0 = 每次清理时清空所有",错误提示改 0-365 Trade-offs / 取舍 - TRUNCATE requires ACCESS EXCLUSIVE lock briefly, but ops tables only have the cleanup task as a writer, so the lock is invisible to other workloads - Empty-table guard avoids the lock when there is nothing to clean - Negative values are still treated as legacy bad data and replaced with default 30 to preserve compatibility
569 lines
18 KiB
Go
569 lines
18 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"errors"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
const (
|
||
opsAlertEvaluatorLeaderLockKeyDefault = "ops:alert:evaluator:leader"
|
||
opsAlertEvaluatorLeaderLockTTLDefault = 30 * time.Second
|
||
)
|
||
|
||
// =========================
|
||
// Email notification config
|
||
// =========================
|
||
|
||
func (s *OpsService) GetEmailNotificationConfig(ctx context.Context) (*OpsEmailNotificationConfig, error) {
|
||
defaultCfg := defaultOpsEmailNotificationConfig()
|
||
if s == nil || s.settingRepo == nil {
|
||
return defaultCfg, nil
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
|
||
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsEmailNotificationConfig)
|
||
if err != nil {
|
||
if errors.Is(err, ErrSettingNotFound) {
|
||
// Initialize defaults on first read (best-effort).
|
||
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
||
_ = s.settingRepo.Set(ctx, SettingKeyOpsEmailNotificationConfig, string(b))
|
||
}
|
||
return defaultCfg, nil
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
cfg := &OpsEmailNotificationConfig{}
|
||
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
||
// Corrupted JSON should not break ops UI; fall back to defaults.
|
||
return defaultCfg, nil
|
||
}
|
||
normalizeOpsEmailNotificationConfig(cfg)
|
||
return cfg, nil
|
||
}
|
||
|
||
func (s *OpsService) UpdateEmailNotificationConfig(ctx context.Context, req *OpsEmailNotificationConfigUpdateRequest) (*OpsEmailNotificationConfig, error) {
|
||
if s == nil || s.settingRepo == nil {
|
||
return nil, errors.New("setting repository not initialized")
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
if req == nil {
|
||
return nil, errors.New("invalid request")
|
||
}
|
||
|
||
cfg, err := s.GetEmailNotificationConfig(ctx)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
if req.Alert != nil {
|
||
cfg.Alert.Enabled = req.Alert.Enabled
|
||
if req.Alert.Recipients != nil {
|
||
cfg.Alert.Recipients = req.Alert.Recipients
|
||
}
|
||
cfg.Alert.MinSeverity = strings.TrimSpace(req.Alert.MinSeverity)
|
||
cfg.Alert.RateLimitPerHour = req.Alert.RateLimitPerHour
|
||
cfg.Alert.BatchingWindowSeconds = req.Alert.BatchingWindowSeconds
|
||
cfg.Alert.IncludeResolvedAlerts = req.Alert.IncludeResolvedAlerts
|
||
}
|
||
|
||
if req.Report != nil {
|
||
cfg.Report.Enabled = req.Report.Enabled
|
||
if req.Report.Recipients != nil {
|
||
cfg.Report.Recipients = req.Report.Recipients
|
||
}
|
||
cfg.Report.DailySummaryEnabled = req.Report.DailySummaryEnabled
|
||
cfg.Report.DailySummarySchedule = strings.TrimSpace(req.Report.DailySummarySchedule)
|
||
cfg.Report.WeeklySummaryEnabled = req.Report.WeeklySummaryEnabled
|
||
cfg.Report.WeeklySummarySchedule = strings.TrimSpace(req.Report.WeeklySummarySchedule)
|
||
cfg.Report.ErrorDigestEnabled = req.Report.ErrorDigestEnabled
|
||
cfg.Report.ErrorDigestSchedule = strings.TrimSpace(req.Report.ErrorDigestSchedule)
|
||
cfg.Report.ErrorDigestMinCount = req.Report.ErrorDigestMinCount
|
||
cfg.Report.AccountHealthEnabled = req.Report.AccountHealthEnabled
|
||
cfg.Report.AccountHealthSchedule = strings.TrimSpace(req.Report.AccountHealthSchedule)
|
||
cfg.Report.AccountHealthErrorRateThreshold = req.Report.AccountHealthErrorRateThreshold
|
||
}
|
||
|
||
if err := validateOpsEmailNotificationConfig(cfg); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
normalizeOpsEmailNotificationConfig(cfg)
|
||
raw, err := json.Marshal(cfg)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := s.settingRepo.Set(ctx, SettingKeyOpsEmailNotificationConfig, string(raw)); err != nil {
|
||
return nil, err
|
||
}
|
||
return cfg, nil
|
||
}
|
||
|
||
func defaultOpsEmailNotificationConfig() *OpsEmailNotificationConfig {
|
||
return &OpsEmailNotificationConfig{
|
||
Alert: OpsEmailAlertConfig{
|
||
Enabled: true,
|
||
Recipients: []string{},
|
||
MinSeverity: "",
|
||
RateLimitPerHour: 0,
|
||
BatchingWindowSeconds: 0,
|
||
IncludeResolvedAlerts: false,
|
||
},
|
||
Report: OpsEmailReportConfig{
|
||
Enabled: false,
|
||
Recipients: []string{},
|
||
DailySummaryEnabled: false,
|
||
DailySummarySchedule: "0 9 * * *",
|
||
WeeklySummaryEnabled: false,
|
||
WeeklySummarySchedule: "0 9 * * 1",
|
||
ErrorDigestEnabled: false,
|
||
ErrorDigestSchedule: "0 9 * * *",
|
||
ErrorDigestMinCount: 10,
|
||
AccountHealthEnabled: false,
|
||
AccountHealthSchedule: "0 9 * * *",
|
||
AccountHealthErrorRateThreshold: 10.0,
|
||
},
|
||
}
|
||
}
|
||
|
||
func normalizeOpsEmailNotificationConfig(cfg *OpsEmailNotificationConfig) {
|
||
if cfg == nil {
|
||
return
|
||
}
|
||
if cfg.Alert.Recipients == nil {
|
||
cfg.Alert.Recipients = []string{}
|
||
}
|
||
if cfg.Report.Recipients == nil {
|
||
cfg.Report.Recipients = []string{}
|
||
}
|
||
|
||
cfg.Alert.MinSeverity = strings.TrimSpace(cfg.Alert.MinSeverity)
|
||
cfg.Report.DailySummarySchedule = strings.TrimSpace(cfg.Report.DailySummarySchedule)
|
||
cfg.Report.WeeklySummarySchedule = strings.TrimSpace(cfg.Report.WeeklySummarySchedule)
|
||
cfg.Report.ErrorDigestSchedule = strings.TrimSpace(cfg.Report.ErrorDigestSchedule)
|
||
cfg.Report.AccountHealthSchedule = strings.TrimSpace(cfg.Report.AccountHealthSchedule)
|
||
|
||
// Fill missing schedules with defaults to avoid breaking cron logic if clients send empty strings.
|
||
if cfg.Report.DailySummarySchedule == "" {
|
||
cfg.Report.DailySummarySchedule = "0 9 * * *"
|
||
}
|
||
if cfg.Report.WeeklySummarySchedule == "" {
|
||
cfg.Report.WeeklySummarySchedule = "0 9 * * 1"
|
||
}
|
||
if cfg.Report.ErrorDigestSchedule == "" {
|
||
cfg.Report.ErrorDigestSchedule = "0 9 * * *"
|
||
}
|
||
if cfg.Report.AccountHealthSchedule == "" {
|
||
cfg.Report.AccountHealthSchedule = "0 9 * * *"
|
||
}
|
||
}
|
||
|
||
func validateOpsEmailNotificationConfig(cfg *OpsEmailNotificationConfig) error {
|
||
if cfg == nil {
|
||
return errors.New("invalid config")
|
||
}
|
||
|
||
if cfg.Alert.RateLimitPerHour < 0 {
|
||
return errors.New("alert.rate_limit_per_hour must be >= 0")
|
||
}
|
||
if cfg.Alert.BatchingWindowSeconds < 0 {
|
||
return errors.New("alert.batching_window_seconds must be >= 0")
|
||
}
|
||
switch strings.TrimSpace(cfg.Alert.MinSeverity) {
|
||
case "", "critical", "warning", "info":
|
||
default:
|
||
return errors.New("alert.min_severity must be one of: critical, warning, info, or empty")
|
||
}
|
||
|
||
if cfg.Report.ErrorDigestMinCount < 0 {
|
||
return errors.New("report.error_digest_min_count must be >= 0")
|
||
}
|
||
if cfg.Report.AccountHealthErrorRateThreshold < 0 || cfg.Report.AccountHealthErrorRateThreshold > 100 {
|
||
return errors.New("report.account_health_error_rate_threshold must be between 0 and 100")
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// =========================
|
||
// Alert runtime settings
|
||
// =========================
|
||
|
||
func defaultOpsAlertRuntimeSettings() *OpsAlertRuntimeSettings {
|
||
return &OpsAlertRuntimeSettings{
|
||
EvaluationIntervalSeconds: 60,
|
||
DistributedLock: OpsDistributedLockSettings{
|
||
Enabled: true,
|
||
Key: opsAlertEvaluatorLeaderLockKeyDefault,
|
||
TTLSeconds: int(opsAlertEvaluatorLeaderLockTTLDefault.Seconds()),
|
||
},
|
||
Silencing: OpsAlertSilencingSettings{
|
||
Enabled: false,
|
||
GlobalUntilRFC3339: "",
|
||
GlobalReason: "",
|
||
Entries: []OpsAlertSilenceEntry{},
|
||
},
|
||
}
|
||
}
|
||
|
||
func normalizeOpsDistributedLockSettings(s *OpsDistributedLockSettings, defaultKey string, defaultTTLSeconds int) {
|
||
if s == nil {
|
||
return
|
||
}
|
||
s.Key = strings.TrimSpace(s.Key)
|
||
if s.Key == "" {
|
||
s.Key = defaultKey
|
||
}
|
||
if s.TTLSeconds <= 0 {
|
||
s.TTLSeconds = defaultTTLSeconds
|
||
}
|
||
}
|
||
|
||
func normalizeOpsAlertSilencingSettings(s *OpsAlertSilencingSettings) {
|
||
if s == nil {
|
||
return
|
||
}
|
||
s.GlobalUntilRFC3339 = strings.TrimSpace(s.GlobalUntilRFC3339)
|
||
s.GlobalReason = strings.TrimSpace(s.GlobalReason)
|
||
if s.Entries == nil {
|
||
s.Entries = []OpsAlertSilenceEntry{}
|
||
}
|
||
for i := range s.Entries {
|
||
s.Entries[i].UntilRFC3339 = strings.TrimSpace(s.Entries[i].UntilRFC3339)
|
||
s.Entries[i].Reason = strings.TrimSpace(s.Entries[i].Reason)
|
||
}
|
||
}
|
||
|
||
func validateOpsDistributedLockSettings(s OpsDistributedLockSettings) error {
|
||
if strings.TrimSpace(s.Key) == "" {
|
||
return errors.New("distributed_lock.key is required")
|
||
}
|
||
if s.TTLSeconds <= 0 || s.TTLSeconds > int((24*time.Hour).Seconds()) {
|
||
return errors.New("distributed_lock.ttl_seconds must be between 1 and 86400")
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func validateOpsAlertSilencingSettings(s OpsAlertSilencingSettings) error {
|
||
parse := func(raw string) error {
|
||
if strings.TrimSpace(raw) == "" {
|
||
return nil
|
||
}
|
||
if _, err := time.Parse(time.RFC3339, raw); err != nil {
|
||
return errors.New("silencing time must be RFC3339")
|
||
}
|
||
return nil
|
||
}
|
||
|
||
if err := parse(s.GlobalUntilRFC3339); err != nil {
|
||
return err
|
||
}
|
||
for _, entry := range s.Entries {
|
||
if strings.TrimSpace(entry.UntilRFC3339) == "" {
|
||
return errors.New("silencing.entries.until_rfc3339 is required")
|
||
}
|
||
if _, err := time.Parse(time.RFC3339, entry.UntilRFC3339); err != nil {
|
||
return errors.New("silencing.entries.until_rfc3339 must be RFC3339")
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (s *OpsService) GetOpsAlertRuntimeSettings(ctx context.Context) (*OpsAlertRuntimeSettings, error) {
|
||
defaultCfg := defaultOpsAlertRuntimeSettings()
|
||
if s == nil || s.settingRepo == nil {
|
||
return defaultCfg, nil
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
|
||
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsAlertRuntimeSettings)
|
||
if err != nil {
|
||
if errors.Is(err, ErrSettingNotFound) {
|
||
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
||
_ = s.settingRepo.Set(ctx, SettingKeyOpsAlertRuntimeSettings, string(b))
|
||
}
|
||
return defaultCfg, nil
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
cfg := &OpsAlertRuntimeSettings{}
|
||
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
||
return defaultCfg, nil
|
||
}
|
||
|
||
if cfg.EvaluationIntervalSeconds <= 0 {
|
||
cfg.EvaluationIntervalSeconds = defaultCfg.EvaluationIntervalSeconds
|
||
}
|
||
normalizeOpsDistributedLockSettings(&cfg.DistributedLock, opsAlertEvaluatorLeaderLockKeyDefault, defaultCfg.DistributedLock.TTLSeconds)
|
||
normalizeOpsAlertSilencingSettings(&cfg.Silencing)
|
||
|
||
return cfg, nil
|
||
}
|
||
|
||
func (s *OpsService) UpdateOpsAlertRuntimeSettings(ctx context.Context, cfg *OpsAlertRuntimeSettings) (*OpsAlertRuntimeSettings, error) {
|
||
if s == nil || s.settingRepo == nil {
|
||
return nil, errors.New("setting repository not initialized")
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
if cfg == nil {
|
||
return nil, errors.New("invalid config")
|
||
}
|
||
|
||
if cfg.EvaluationIntervalSeconds < 1 || cfg.EvaluationIntervalSeconds > int((24*time.Hour).Seconds()) {
|
||
return nil, errors.New("evaluation_interval_seconds must be between 1 and 86400")
|
||
}
|
||
if cfg.DistributedLock.Enabled {
|
||
if err := validateOpsDistributedLockSettings(cfg.DistributedLock); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
if cfg.Silencing.Enabled {
|
||
if err := validateOpsAlertSilencingSettings(cfg.Silencing); err != nil {
|
||
return nil, err
|
||
}
|
||
}
|
||
|
||
defaultCfg := defaultOpsAlertRuntimeSettings()
|
||
normalizeOpsDistributedLockSettings(&cfg.DistributedLock, opsAlertEvaluatorLeaderLockKeyDefault, defaultCfg.DistributedLock.TTLSeconds)
|
||
normalizeOpsAlertSilencingSettings(&cfg.Silencing)
|
||
|
||
raw, err := json.Marshal(cfg)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := s.settingRepo.Set(ctx, SettingKeyOpsAlertRuntimeSettings, string(raw)); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
// Return a fresh copy (avoid callers holding pointers into internal slices that may be mutated).
|
||
updated := &OpsAlertRuntimeSettings{}
|
||
_ = json.Unmarshal(raw, updated)
|
||
return updated, nil
|
||
}
|
||
|
||
// =========================
|
||
// Advanced settings
|
||
// =========================
|
||
|
||
func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
|
||
return &OpsAdvancedSettings{
|
||
DataRetention: OpsDataRetentionSettings{
|
||
CleanupEnabled: false,
|
||
CleanupSchedule: "0 2 * * *",
|
||
ErrorLogRetentionDays: 30,
|
||
MinuteMetricsRetentionDays: 30,
|
||
HourlyMetricsRetentionDays: 30,
|
||
},
|
||
Aggregation: OpsAggregationSettings{
|
||
AggregationEnabled: false,
|
||
},
|
||
IgnoreCountTokensErrors: true, // count_tokens 404 是预期行为,默认忽略
|
||
IgnoreContextCanceled: true, // Default to true - client disconnects are not errors
|
||
IgnoreNoAvailableAccounts: false, // Default to false - this is a real routing issue
|
||
IgnoreInsufficientBalanceErrors: false, // 默认不忽略,余额不足可能需要关注
|
||
DisplayOpenAITokenStats: false,
|
||
DisplayAlertEvents: true,
|
||
AutoRefreshEnabled: false,
|
||
AutoRefreshIntervalSec: 30,
|
||
}
|
||
}
|
||
|
||
func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
||
if cfg == nil {
|
||
return
|
||
}
|
||
cfg.DataRetention.CleanupSchedule = strings.TrimSpace(cfg.DataRetention.CleanupSchedule)
|
||
if cfg.DataRetention.CleanupSchedule == "" {
|
||
cfg.DataRetention.CleanupSchedule = "0 2 * * *"
|
||
}
|
||
// 保留天数:0 表示每次定时清理全部(清空所有),> 0 表示按天数保留;
|
||
// 仅在拿到非法的负数时回填默认值,避免覆盖用户主动设的 0。
|
||
if cfg.DataRetention.ErrorLogRetentionDays < 0 {
|
||
cfg.DataRetention.ErrorLogRetentionDays = 30
|
||
}
|
||
if cfg.DataRetention.MinuteMetricsRetentionDays < 0 {
|
||
cfg.DataRetention.MinuteMetricsRetentionDays = 30
|
||
}
|
||
if cfg.DataRetention.HourlyMetricsRetentionDays < 0 {
|
||
cfg.DataRetention.HourlyMetricsRetentionDays = 30
|
||
}
|
||
// Normalize auto refresh interval (default 30 seconds)
|
||
if cfg.AutoRefreshIntervalSec <= 0 {
|
||
cfg.AutoRefreshIntervalSec = 30
|
||
}
|
||
}
|
||
|
||
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
||
if cfg == nil {
|
||
return errors.New("invalid config")
|
||
}
|
||
// 保留天数:0 表示每次清理全部,1-365 表示按天数保留。
|
||
if cfg.DataRetention.ErrorLogRetentionDays < 0 || cfg.DataRetention.ErrorLogRetentionDays > 365 {
|
||
return errors.New("error_log_retention_days must be between 0 and 365")
|
||
}
|
||
if cfg.DataRetention.MinuteMetricsRetentionDays < 0 || cfg.DataRetention.MinuteMetricsRetentionDays > 365 {
|
||
return errors.New("minute_metrics_retention_days must be between 0 and 365")
|
||
}
|
||
if cfg.DataRetention.HourlyMetricsRetentionDays < 0 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
|
||
return errors.New("hourly_metrics_retention_days must be between 0 and 365")
|
||
}
|
||
if cfg.AutoRefreshIntervalSec < 15 || cfg.AutoRefreshIntervalSec > 300 {
|
||
return errors.New("auto_refresh_interval_seconds must be between 15 and 300")
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func (s *OpsService) GetOpsAdvancedSettings(ctx context.Context) (*OpsAdvancedSettings, error) {
|
||
defaultCfg := defaultOpsAdvancedSettings()
|
||
if s == nil || s.settingRepo == nil {
|
||
return defaultCfg, nil
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
|
||
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsAdvancedSettings)
|
||
if err != nil {
|
||
if errors.Is(err, ErrSettingNotFound) {
|
||
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
||
_ = s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(b))
|
||
}
|
||
return defaultCfg, nil
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
cfg := defaultOpsAdvancedSettings()
|
||
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
||
return defaultCfg, nil
|
||
}
|
||
|
||
normalizeOpsAdvancedSettings(cfg)
|
||
return cfg, nil
|
||
}
|
||
|
||
func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdvancedSettings) (*OpsAdvancedSettings, error) {
|
||
if s == nil || s.settingRepo == nil {
|
||
return nil, errors.New("setting repository not initialized")
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
if cfg == nil {
|
||
return nil, errors.New("invalid config")
|
||
}
|
||
|
||
if err := validateOpsAdvancedSettings(cfg); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
normalizeOpsAdvancedSettings(cfg)
|
||
raw, err := json.Marshal(cfg)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := s.settingRepo.Set(ctx, SettingKeyOpsAdvancedSettings, string(raw)); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
updated := &OpsAdvancedSettings{}
|
||
_ = json.Unmarshal(raw, updated)
|
||
return updated, nil
|
||
}
|
||
|
||
// =========================
|
||
// Metric thresholds
|
||
// =========================
|
||
|
||
const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
|
||
|
||
func defaultOpsMetricThresholds() *OpsMetricThresholds {
|
||
slaMin := 99.5
|
||
ttftMax := 500.0
|
||
reqErrMax := 5.0
|
||
upstreamErrMax := 5.0
|
||
return &OpsMetricThresholds{
|
||
SLAPercentMin: &slaMin,
|
||
TTFTp99MsMax: &ttftMax,
|
||
RequestErrorRatePercentMax: &reqErrMax,
|
||
UpstreamErrorRatePercentMax: &upstreamErrMax,
|
||
}
|
||
}
|
||
|
||
func (s *OpsService) GetMetricThresholds(ctx context.Context) (*OpsMetricThresholds, error) {
|
||
defaultCfg := defaultOpsMetricThresholds()
|
||
if s == nil || s.settingRepo == nil {
|
||
return defaultCfg, nil
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
|
||
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsMetricThresholds)
|
||
if err != nil {
|
||
if errors.Is(err, ErrSettingNotFound) {
|
||
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
||
_ = s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(b))
|
||
}
|
||
return defaultCfg, nil
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
cfg := &OpsMetricThresholds{}
|
||
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
||
return defaultCfg, nil
|
||
}
|
||
|
||
return cfg, nil
|
||
}
|
||
|
||
func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricThresholds) (*OpsMetricThresholds, error) {
|
||
if s == nil || s.settingRepo == nil {
|
||
return nil, errors.New("setting repository not initialized")
|
||
}
|
||
if ctx == nil {
|
||
ctx = context.Background()
|
||
}
|
||
if cfg == nil {
|
||
return nil, errors.New("invalid config")
|
||
}
|
||
|
||
// Validate thresholds
|
||
if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
|
||
return nil, errors.New("sla_percent_min must be between 0 and 100")
|
||
}
|
||
if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
|
||
return nil, errors.New("ttft_p99_ms_max must be >= 0")
|
||
}
|
||
if cfg.RequestErrorRatePercentMax != nil && (*cfg.RequestErrorRatePercentMax < 0 || *cfg.RequestErrorRatePercentMax > 100) {
|
||
return nil, errors.New("request_error_rate_percent_max must be between 0 and 100")
|
||
}
|
||
if cfg.UpstreamErrorRatePercentMax != nil && (*cfg.UpstreamErrorRatePercentMax < 0 || *cfg.UpstreamErrorRatePercentMax > 100) {
|
||
return nil, errors.New("upstream_error_rate_percent_max must be between 0 and 100")
|
||
}
|
||
|
||
raw, err := json.Marshal(cfg)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
if err := s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(raw)); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
updated := &OpsMetricThresholds{}
|
||
_ = json.Unmarshal(raw, updated)
|
||
return updated, nil
|
||
}
|