/** * Admin Ops API endpoints (vNext) * - Error logs list/detail + retry (client/upstream) * - Dashboard overview (raw path) */ import { apiClient } from '../client' import type { PaginatedResponse } from '@/types' export type OpsRetryMode = 'client' | 'upstream' export type OpsQueryMode = 'auto' | 'raw' | 'preagg' export interface OpsRequestOptions { signal?: AbortSignal } export interface OpsRetryRequest { mode: OpsRetryMode pinned_account_id?: number force?: boolean } export interface OpsRetryAttempt { id: number created_at: string requested_by_user_id: number source_error_id: number mode: string pinned_account_id?: number | null pinned_account_name?: string status: string started_at?: string | null finished_at?: string | null duration_ms?: number | null success?: boolean | null http_status_code?: number | null upstream_request_id?: string | null used_account_id?: number | null used_account_name?: string response_preview?: string | null response_truncated?: boolean | null result_request_id?: string | null result_error_id?: number | null error_message?: string | null } export type OpsUpstreamErrorEvent = { at_unix_ms?: number platform?: string account_id?: number account_name?: string upstream_status_code?: number upstream_request_id?: string upstream_request_body?: string kind?: string message?: string detail?: string } export interface OpsRetryResult { attempt_id: number mode: OpsRetryMode status: 'running' | 'succeeded' | 'failed' | string pinned_account_id?: number | null used_account_id?: number | null http_status_code: number upstream_request_id: string response_preview: string response_truncated: boolean error_message: string started_at: string finished_at: string duration_ms: number } export interface OpsDashboardOverview { start_time: string end_time: string platform: string group_id?: number | null health_score?: number system_metrics?: OpsSystemMetricsSnapshot | null job_heartbeats?: OpsJobHeartbeat[] | null success_count: number error_count_total: number business_limited_count: number error_count_sla: number request_count_total: number request_count_sla: number token_consumed: number sla: number error_rate: number upstream_error_rate: number upstream_error_count_excl_429_529: number upstream_429_count: number upstream_529_count: number qps: { current: number peak: number avg: number } tps: { current: number peak: number avg: number } duration: OpsPercentiles ttft: OpsPercentiles } export interface OpsPercentiles { p50_ms?: number | null p90_ms?: number | null p95_ms?: number | null p99_ms?: number | null avg_ms?: number | null max_ms?: number | null } export interface OpsThroughputTrendPoint { bucket_start: string request_count: number token_consumed: number switch_count?: number qps: number tps: number } export interface OpsThroughputPlatformBreakdownItem { platform: string request_count: number token_consumed: number } export interface OpsThroughputGroupBreakdownItem { group_id: number group_name: string request_count: number token_consumed: number } export interface OpsThroughputTrendResponse { bucket: string points: OpsThroughputTrendPoint[] by_platform?: OpsThroughputPlatformBreakdownItem[] top_groups?: OpsThroughputGroupBreakdownItem[] } export type OpsRequestKind = 'success' | 'error' export type OpsRequestDetailsKind = OpsRequestKind | 'all' export type OpsRequestDetailsSort = 'created_at_desc' | 'duration_desc' export interface OpsRequestDetail { kind: OpsRequestKind created_at: string request_id: string platform?: string model?: string duration_ms?: number | null status_code?: number | null error_id?: number | null phase?: string severity?: string message?: string user_id?: number | null api_key_id?: number | null account_id?: number | null group_id?: number | null stream?: boolean } export interface OpsRequestDetailsParams { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string kind?: OpsRequestDetailsKind platform?: string group_id?: number | null user_id?: number api_key_id?: number account_id?: number model?: string request_id?: string q?: string min_duration_ms?: number max_duration_ms?: number sort?: OpsRequestDetailsSort page?: number page_size?: number } export type OpsRequestDetailsResponse = PaginatedResponse export interface OpsLatencyHistogramBucket { range: string count: number } export interface OpsLatencyHistogramResponse { start_time: string end_time: string platform: string group_id?: number | null total_requests: number buckets: OpsLatencyHistogramBucket[] } export interface OpsErrorTrendPoint { bucket_start: string error_count_total: number business_limited_count: number error_count_sla: number upstream_error_count_excl_429_529: number upstream_429_count: number upstream_529_count: number } export interface OpsErrorTrendResponse { bucket: string points: OpsErrorTrendPoint[] } export interface OpsErrorDistributionItem { status_code: number total: number sla: number business_limited: number } export interface OpsErrorDistributionResponse { total: number items: OpsErrorDistributionItem[] } export interface OpsDashboardSnapshotV2Response { generated_at: string overview: OpsDashboardOverview throughput_trend: OpsThroughputTrendResponse error_trend: OpsErrorTrendResponse } export type OpsOpenAITokenStatsTimeRange = '30m' | '1h' | '1d' | '15d' | '30d' export interface OpsOpenAITokenStatsItem { model: string request_count: number avg_tokens_per_sec?: number | null avg_first_token_ms?: number | null total_output_tokens: number avg_duration_ms: number requests_with_first_token: number } export interface OpsOpenAITokenStatsResponse { time_range: OpsOpenAITokenStatsTimeRange start_time: string end_time: string platform?: string group_id?: number | null items: OpsOpenAITokenStatsItem[] total: number page?: number page_size?: number top_n?: number | null } export interface OpsOpenAITokenStatsParams { time_range?: OpsOpenAITokenStatsTimeRange platform?: string group_id?: number | null page?: number page_size?: number top_n?: number } export interface OpsSystemMetricsSnapshot { id: number created_at: string window_minutes: number cpu_usage_percent?: number | null memory_used_mb?: number | null memory_total_mb?: number | null memory_usage_percent?: number | null db_ok?: boolean | null redis_ok?: boolean | null // Config-derived limits (best-effort) for rendering "current vs max". db_max_open_conns?: number | null redis_pool_size?: number | null redis_conn_total?: number | null redis_conn_idle?: number | null db_conn_active?: number | null db_conn_idle?: number | null db_conn_waiting?: number | null goroutine_count?: number | null concurrency_queue_depth?: number | null account_switch_count?: number | null } export interface OpsJobHeartbeat { job_name: string last_run_at?: string | null last_success_at?: string | null last_error_at?: string | null last_error?: string | null last_duration_ms?: number | null last_result?: string | null updated_at: string } export interface PlatformConcurrencyInfo { platform: string current_in_use: number max_capacity: number load_percentage: number waiting_in_queue: number } export interface GroupConcurrencyInfo { group_id: number group_name: string platform: string current_in_use: number max_capacity: number load_percentage: number waiting_in_queue: number } export interface AccountConcurrencyInfo { account_id: number account_name?: string platform: string group_id: number group_name: string current_in_use: number max_capacity: number load_percentage: number waiting_in_queue: number } export interface OpsConcurrencyStatsResponse { enabled: boolean platform: Record group: Record account: Record timestamp?: string } export interface UserConcurrencyInfo { user_id: number user_email: string username: string current_in_use: number max_capacity: number load_percentage: number waiting_in_queue: number } export interface OpsUserConcurrencyStatsResponse { enabled: boolean user: Record timestamp?: string } export async function getConcurrencyStats(platform?: string, groupId?: number | null): Promise { const params: Record = {} if (platform) { params.platform = platform } if (typeof groupId === 'number' && groupId > 0) { params.group_id = groupId } const { data } = await apiClient.get('/admin/ops/concurrency', { params }) return data } export async function getUserConcurrencyStats(): Promise { const { data } = await apiClient.get('/admin/ops/user-concurrency') return data } export interface PlatformAvailability { platform: string total_accounts: number available_count: number rate_limit_count: number error_count: number } export interface GroupAvailability { group_id: number group_name: string platform: string total_accounts: number available_count: number rate_limit_count: number error_count: number } export interface AccountAvailability { account_id: number account_name: string platform: string group_id: number group_name: string status: string is_available: boolean is_rate_limited: boolean rate_limit_reset_at?: string rate_limit_remaining_sec?: number is_overloaded: boolean overload_until?: string overload_remaining_sec?: number has_error: boolean error_message?: string } export interface OpsAccountAvailabilityStatsResponse { enabled: boolean platform: Record group: Record account: Record timestamp?: string } export async function getAccountAvailabilityStats(platform?: string, groupId?: number | null): Promise { const params: Record = {} if (platform) { params.platform = platform } if (typeof groupId === 'number' && groupId > 0) { params.group_id = groupId } const { data } = await apiClient.get('/admin/ops/account-availability', { params }) return data } export interface OpsRateSummary { current: number peak: number avg: number } export interface OpsRealtimeTrafficSummary { window: string start_time: string end_time: string platform: string group_id?: number | null qps: OpsRateSummary tps: OpsRateSummary } export interface OpsRealtimeTrafficSummaryResponse { enabled: boolean summary: OpsRealtimeTrafficSummary | null timestamp?: string } export async function getRealtimeTrafficSummary( window: string, platform?: string, groupId?: number | null ): Promise { const params: Record = { window } if (platform) { params.platform = platform } if (typeof groupId === 'number' && groupId > 0) { params.group_id = groupId } const { data } = await apiClient.get('/admin/ops/realtime-traffic', { params }) return data } /** * Subscribe to realtime QPS updates via WebSocket. * * Note: browsers cannot set Authorization headers for WebSockets. * We authenticate via Sec-WebSocket-Protocol using a prefixed token item: * ["sub2api-admin", "jwt."] */ export interface SubscribeQPSOptions { token?: string | null onOpen?: () => void onClose?: (event: CloseEvent) => void onError?: (event: Event) => void /** * Called when the server closes with an application close code that indicates * reconnecting is not useful (e.g. feature flag disabled). */ onFatalClose?: (event: CloseEvent) => void /** * More granular status updates for UI (connecting/reconnecting/offline/etc). */ onStatusChange?: (status: OpsWSStatus) => void /** * Called when a reconnect is scheduled (helps display "retry in Xs"). */ onReconnectScheduled?: (info: { attempt: number, delayMs: number }) => void wsBaseUrl?: string /** * Maximum reconnect attempts. Defaults to Infinity to keep the dashboard live. * Set to 0 to disable reconnect. */ maxReconnectAttempts?: number reconnectBaseDelayMs?: number reconnectMaxDelayMs?: number /** * Stale connection detection (heartbeat-by-observation). * If no messages are received within this window, the socket is closed to trigger a reconnect. * Set to 0 to disable. */ staleTimeoutMs?: number /** * How often to check staleness. Only used when `staleTimeoutMs > 0`. */ staleCheckIntervalMs?: number } export type OpsWSStatus = 'connecting' | 'connected' | 'reconnecting' | 'offline' | 'closed' export const OPS_WS_CLOSE_CODES = { REALTIME_DISABLED: 4001 } as const const OPS_WS_BASE_PROTOCOL = 'sub2api-admin' export function subscribeQPS(onMessage: (data: any) => void, options: SubscribeQPSOptions = {}): () => void { let ws: WebSocket | null = null let reconnectAttempts = 0 const maxReconnectAttempts = Number.isFinite(options.maxReconnectAttempts as number) ? (options.maxReconnectAttempts as number) : Infinity const baseDelayMs = options.reconnectBaseDelayMs ?? 1000 const maxDelayMs = options.reconnectMaxDelayMs ?? 30000 let reconnectTimer: ReturnType | null = null let shouldReconnect = true let isConnecting = false let hasConnectedOnce = false let lastMessageAt = 0 const staleTimeoutMs = options.staleTimeoutMs ?? 120_000 const staleCheckIntervalMs = options.staleCheckIntervalMs ?? 30_000 let staleTimer: ReturnType | null = null const setStatus = (status: OpsWSStatus) => { options.onStatusChange?.(status) } const clearReconnectTimer = () => { if (reconnectTimer) { clearTimeout(reconnectTimer) reconnectTimer = null } } const clearStaleTimer = () => { if (staleTimer) { clearInterval(staleTimer) staleTimer = null } } const startStaleTimer = () => { clearStaleTimer() if (!staleTimeoutMs || staleTimeoutMs <= 0) return staleTimer = setInterval(() => { if (!shouldReconnect) return if (!ws || ws.readyState !== WebSocket.OPEN) return if (!lastMessageAt) return const ageMs = Date.now() - lastMessageAt if (ageMs > staleTimeoutMs) { // Treat as a half-open connection; closing triggers the normal reconnect path. ws.close() } }, staleCheckIntervalMs) } const scheduleReconnect = () => { if (!shouldReconnect) return if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return // If we're offline, wait for the browser to come back online. if (typeof navigator !== 'undefined' && 'onLine' in navigator && !navigator.onLine) { setStatus('offline') return } const expDelay = baseDelayMs * Math.pow(2, reconnectAttempts) const delay = Math.min(expDelay, maxDelayMs) const jitter = Math.floor(Math.random() * 250) clearReconnectTimer() reconnectTimer = setTimeout(() => { reconnectAttempts++ connect() }, delay + jitter) options.onReconnectScheduled?.({ attempt: reconnectAttempts + 1, delayMs: delay + jitter }) } const handleOnline = () => { if (!shouldReconnect) return if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return connect() } const handleOffline = () => { setStatus('offline') } const connect = () => { if (!shouldReconnect) return if (isConnecting) return if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return isConnecting = true setStatus(hasConnectedOnce ? 'reconnecting' : 'connecting') const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:' const wsBaseUrl = options.wsBaseUrl || import.meta.env.VITE_WS_BASE_URL || window.location.host const wsURL = new URL(`${protocol}//${wsBaseUrl}/api/v1/admin/ops/ws/qps`) // Do NOT put admin JWT in the URL query string (it can leak via access logs, proxies, etc). // Browsers cannot set Authorization headers for WebSockets, so we pass the token via // Sec-WebSocket-Protocol (subprotocol list): ["sub2api-admin", "jwt."]. const rawToken = String(options.token ?? localStorage.getItem('auth_token') ?? '').trim() const protocols: string[] = [OPS_WS_BASE_PROTOCOL] if (rawToken) protocols.push(`jwt.${rawToken}`) ws = new WebSocket(wsURL.toString(), protocols) ws.onopen = () => { reconnectAttempts = 0 isConnecting = false hasConnectedOnce = true clearReconnectTimer() lastMessageAt = Date.now() startStaleTimer() setStatus('connected') options.onOpen?.() } ws.onmessage = (e) => { try { const data = JSON.parse(e.data) lastMessageAt = Date.now() onMessage(data) } catch (err) { console.warn('[OpsWS] Failed to parse message:', err) } } ws.onerror = (error) => { console.error('[OpsWS] Connection error:', error) options.onError?.(error) } ws.onclose = (event) => { isConnecting = false options.onClose?.(event) clearStaleTimer() ws = null // If the server explicitly tells us to stop reconnecting, honor it. if (event && typeof event.code === 'number' && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) { shouldReconnect = false clearReconnectTimer() setStatus('closed') options.onFatalClose?.(event) return } scheduleReconnect() } } window.addEventListener('online', handleOnline) window.addEventListener('offline', handleOffline) connect() return () => { shouldReconnect = false window.removeEventListener('online', handleOnline) window.removeEventListener('offline', handleOffline) clearReconnectTimer() clearStaleTimer() if (ws) ws.close() ws = null setStatus('closed') } } export type OpsSeverity = string export type OpsPhase = string export type AlertSeverity = 'critical' | 'warning' | 'info' export type ThresholdMode = 'count' | 'percentage' | 'both' export type MetricType = | 'success_rate' | 'error_rate' | 'upstream_error_rate' | 'cpu_usage_percent' | 'memory_usage_percent' | 'concurrency_queue_depth' | 'group_available_accounts' | 'group_available_ratio' | 'group_rate_limit_ratio' | 'account_rate_limited_count' | 'account_error_count' | 'account_error_ratio' | 'overload_account_count' export type Operator = '>' | '>=' | '<' | '<=' | '==' | '!=' export interface AlertRule { id?: number name: string description?: string enabled: boolean metric_type: MetricType operator: Operator threshold: number window_minutes: number sustained_minutes: number severity: OpsSeverity cooldown_minutes: number notify_email: boolean filters?: Record created_at?: string updated_at?: string last_triggered_at?: string | null } export interface AlertEvent { id: number rule_id: number severity: OpsSeverity | string status: 'firing' | 'resolved' | 'manual_resolved' | string title?: string description?: string metric_value?: number threshold_value?: number dimensions?: Record fired_at: string resolved_at?: string | null email_sent: boolean created_at: string } export interface EmailNotificationConfig { alert: { enabled: boolean recipients: string[] min_severity: AlertSeverity | '' rate_limit_per_hour: number batching_window_seconds: number include_resolved_alerts: boolean } report: { enabled: boolean recipients: string[] daily_summary_enabled: boolean daily_summary_schedule: string weekly_summary_enabled: boolean weekly_summary_schedule: string error_digest_enabled: boolean error_digest_schedule: string error_digest_min_count: number account_health_enabled: boolean account_health_schedule: string account_health_error_rate_threshold: number } } export interface OpsMetricThresholds { sla_percent_min?: number | null // SLA低于此值变红 ttft_p99_ms_max?: number | null // TTFT P99高于此值变红 request_error_rate_percent_max?: number | null // 请求错误率高于此值变红 upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红 } export interface OpsDistributedLockSettings { enabled: boolean key: string ttl_seconds: number } export interface OpsAlertRuntimeSettings { evaluation_interval_seconds: number distributed_lock: OpsDistributedLockSettings silencing: { enabled: boolean global_until_rfc3339: string global_reason: string entries?: Array<{ rule_id?: number severities?: Array until_rfc3339: string reason: string }> } thresholds: OpsMetricThresholds // 指标阈值配置 } export interface OpsAdvancedSettings { data_retention: OpsDataRetentionSettings aggregation: OpsAggregationSettings ignore_count_tokens_errors: boolean ignore_context_canceled: boolean ignore_no_available_accounts: boolean ignore_invalid_api_key_errors: boolean display_openai_token_stats: boolean display_alert_events: boolean auto_refresh_enabled: boolean auto_refresh_interval_seconds: number } export interface OpsDataRetentionSettings { cleanup_enabled: boolean cleanup_schedule: string error_log_retention_days: number minute_metrics_retention_days: number hourly_metrics_retention_days: number } export interface OpsAggregationSettings { aggregation_enabled: boolean } export interface OpsRuntimeLogConfig { level: 'debug' | 'info' | 'warn' | 'error' enable_sampling: boolean sampling_initial: number sampling_thereafter: number caller: boolean stacktrace_level: 'none' | 'error' | 'fatal' retention_days: number source?: string updated_at?: string updated_by_user_id?: number } export interface OpsSystemLog { id: number created_at: string level: string component: string message: string request_id?: string client_request_id?: string user_id?: number | null account_id?: number | null platform?: string model?: string extra?: Record } export type OpsSystemLogListResponse = PaginatedResponse export interface OpsSystemLogQuery { page?: number page_size?: number time_range?: '5m' | '30m' | '1h' | '6h' | '24h' | '7d' | '30d' start_time?: string end_time?: string level?: string component?: string request_id?: string client_request_id?: string user_id?: number | null account_id?: number | null platform?: string model?: string q?: string } export interface OpsSystemLogCleanupRequest { start_time?: string end_time?: string level?: string component?: string request_id?: string client_request_id?: string user_id?: number | null account_id?: number | null platform?: string model?: string q?: string } export interface OpsSystemLogSinkHealth { queue_depth: number queue_capacity: number dropped_count: number write_failed_count: number written_count: number avg_write_delay_ms: number last_error?: string } export interface OpsErrorLog { id: number created_at: string // Standardized classification phase: OpsPhase type: string error_owner: 'client' | 'provider' | 'platform' | string error_source: 'client_request' | 'upstream_http' | 'gateway' | string severity: OpsSeverity status_code: number platform: string model: string is_retryable: boolean retry_count: number resolved: boolean resolved_at?: string | null resolved_by_user_id?: number | null resolved_retry_id?: number | null client_request_id: string request_id: string message: string user_id?: number | null user_email: string api_key_id?: number | null account_id?: number | null account_name: string group_id?: number | null group_name: string client_ip?: string | null request_path?: string stream?: boolean } export interface OpsErrorDetail extends OpsErrorLog { error_body: string user_agent: string // Upstream context (optional; enriched by gateway services) upstream_status_code?: number | null upstream_error_message?: string upstream_error_detail?: string upstream_errors?: string auth_latency_ms?: number | null routing_latency_ms?: number | null upstream_latency_ms?: number | null response_latency_ms?: number | null time_to_first_token_ms?: number | null request_body: string request_body_truncated: boolean request_body_bytes?: number | null is_business_limited: boolean } export type OpsErrorLogsResponse = PaginatedResponse export async function getDashboardOverview( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/overview', { params, signal: options.signal }) return data } export async function getDashboardSnapshotV2( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/snapshot-v2', { params, signal: options.signal }) return data } export async function getThroughputTrend( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/throughput-trend', { params, signal: options.signal }) return data } export async function getLatencyHistogram( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/latency-histogram', { params, signal: options.signal }) return data } export async function getErrorTrend( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/error-trend', { params, signal: options.signal }) return data } export async function getErrorDistribution( params: { time_range?: '5m' | '30m' | '1h' | '6h' | '24h' start_time?: string end_time?: string platform?: string group_id?: number | null mode?: OpsQueryMode }, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/error-distribution', { params, signal: options.signal }) return data } export async function getOpenAITokenStats( params: OpsOpenAITokenStatsParams, options: OpsRequestOptions = {} ): Promise { const { data } = await apiClient.get('/admin/ops/dashboard/openai-token-stats', { params, signal: options.signal }) return data } export type OpsErrorListView = 'errors' | 'excluded' | 'all' export type OpsErrorListQueryParams = { page?: number page_size?: number time_range?: string start_time?: string end_time?: string platform?: string group_id?: number | null account_id?: number | null phase?: string error_owner?: string error_source?: string resolved?: string view?: OpsErrorListView q?: string status_codes?: string status_codes_other?: string } // Legacy unified endpoints export async function listErrorLogs(params: OpsErrorListQueryParams): Promise { const { data } = await apiClient.get('/admin/ops/errors', { params }) return data } export async function getErrorLogDetail(id: number): Promise { const { data } = await apiClient.get(`/admin/ops/errors/${id}`) return data } export async function retryErrorRequest(id: number, req: OpsRetryRequest): Promise { const { data } = await apiClient.post(`/admin/ops/errors/${id}/retry`, req) return data } export async function listRetryAttempts(errorId: number, limit = 50): Promise { const { data } = await apiClient.get(`/admin/ops/errors/${errorId}/retries`, { params: { limit } }) return data } export async function updateErrorResolved(errorId: number, resolved: boolean): Promise { await apiClient.put(`/admin/ops/errors/${errorId}/resolve`, { resolved }) } // New split endpoints export async function listRequestErrors(params: OpsErrorListQueryParams): Promise { const { data } = await apiClient.get('/admin/ops/request-errors', { params }) return data } export async function listUpstreamErrors(params: OpsErrorListQueryParams): Promise { const { data } = await apiClient.get('/admin/ops/upstream-errors', { params }) return data } export async function getRequestErrorDetail(id: number): Promise { const { data } = await apiClient.get(`/admin/ops/request-errors/${id}`) return data } export async function getUpstreamErrorDetail(id: number): Promise { const { data } = await apiClient.get(`/admin/ops/upstream-errors/${id}`) return data } export async function retryRequestErrorClient(id: number): Promise { const { data } = await apiClient.post(`/admin/ops/request-errors/${id}/retry-client`, {}) return data } export async function retryRequestErrorUpstreamEvent(id: number, idx: number): Promise { const { data } = await apiClient.post(`/admin/ops/request-errors/${id}/upstream-errors/${idx}/retry`, {}) return data } export async function retryUpstreamError(id: number): Promise { const { data } = await apiClient.post(`/admin/ops/upstream-errors/${id}/retry`, {}) return data } export async function updateRequestErrorResolved(errorId: number, resolved: boolean): Promise { await apiClient.put(`/admin/ops/request-errors/${errorId}/resolve`, { resolved }) } export async function updateUpstreamErrorResolved(errorId: number, resolved: boolean): Promise { await apiClient.put(`/admin/ops/upstream-errors/${errorId}/resolve`, { resolved }) } export async function listRequestErrorUpstreamErrors( id: number, params: OpsErrorListQueryParams = {}, options: { include_detail?: boolean } = {} ): Promise> { const query: Record = { ...params } if (options.include_detail) query.include_detail = '1' const { data } = await apiClient.get>(`/admin/ops/request-errors/${id}/upstream-errors`, { params: query }) return data } export async function listRequestDetails(params: OpsRequestDetailsParams): Promise { const { data } = await apiClient.get('/admin/ops/requests', { params }) return data } // Alert rules export async function listAlertRules(): Promise { const { data } = await apiClient.get('/admin/ops/alert-rules') return data } export async function createAlertRule(rule: AlertRule): Promise { const { data } = await apiClient.post('/admin/ops/alert-rules', rule) return data } export async function updateAlertRule(id: number, rule: Partial): Promise { const { data } = await apiClient.put(`/admin/ops/alert-rules/${id}`, rule) return data } export async function deleteAlertRule(id: number): Promise { await apiClient.delete(`/admin/ops/alert-rules/${id}`) } export interface AlertEventsQuery { limit?: number status?: string severity?: string email_sent?: boolean time_range?: string start_time?: string end_time?: string before_fired_at?: string before_id?: number platform?: string group_id?: number } export async function listAlertEvents(params: AlertEventsQuery = {}): Promise { const { data } = await apiClient.get('/admin/ops/alert-events', { params }) return data } export async function getAlertEvent(id: number): Promise { const { data } = await apiClient.get(`/admin/ops/alert-events/${id}`) return data } export async function updateAlertEventStatus(id: number, status: 'resolved' | 'manual_resolved'): Promise { await apiClient.put(`/admin/ops/alert-events/${id}/status`, { status }) } export async function createAlertSilence(payload: { rule_id: number platform: string group_id?: number | null region?: string | null until: string reason?: string }): Promise { await apiClient.post('/admin/ops/alert-silences', payload) } // Email notification config export async function getEmailNotificationConfig(): Promise { const { data } = await apiClient.get('/admin/ops/email-notification/config') return data } export async function updateEmailNotificationConfig(config: EmailNotificationConfig): Promise { const { data } = await apiClient.put('/admin/ops/email-notification/config', config) return data } // Runtime settings (DB-backed) export async function getAlertRuntimeSettings(): Promise { const { data } = await apiClient.get('/admin/ops/runtime/alert') return data } export async function updateAlertRuntimeSettings(config: OpsAlertRuntimeSettings): Promise { const { data } = await apiClient.put('/admin/ops/runtime/alert', config) return data } export async function getRuntimeLogConfig(): Promise { const { data } = await apiClient.get('/admin/ops/runtime/logging') return data } export async function updateRuntimeLogConfig(config: OpsRuntimeLogConfig): Promise { const { data } = await apiClient.put('/admin/ops/runtime/logging', config) return data } export async function resetRuntimeLogConfig(): Promise { const { data } = await apiClient.post('/admin/ops/runtime/logging/reset') return data } export async function listSystemLogs(params: OpsSystemLogQuery): Promise { const { data } = await apiClient.get('/admin/ops/system-logs', { params }) return data } export async function cleanupSystemLogs(payload: OpsSystemLogCleanupRequest): Promise<{ deleted: number }> { const { data } = await apiClient.post<{ deleted: number }>('/admin/ops/system-logs/cleanup', payload) return data } export async function getSystemLogSinkHealth(): Promise { const { data } = await apiClient.get('/admin/ops/system-logs/health') return data } // Advanced settings (DB-backed) export async function getAdvancedSettings(): Promise { const { data } = await apiClient.get('/admin/ops/advanced-settings') return data } export async function updateAdvancedSettings(config: OpsAdvancedSettings): Promise { const { data } = await apiClient.put('/admin/ops/advanced-settings', config) return data } // ==================== Metric Thresholds ==================== async function getMetricThresholds(): Promise { const { data } = await apiClient.get('/admin/ops/settings/metric-thresholds') return data } async function updateMetricThresholds(thresholds: OpsMetricThresholds): Promise { await apiClient.put('/admin/ops/settings/metric-thresholds', thresholds) } export const opsAPI = { getDashboardSnapshotV2, getDashboardOverview, getThroughputTrend, getLatencyHistogram, getErrorTrend, getErrorDistribution, getOpenAITokenStats, getConcurrencyStats, getUserConcurrencyStats, getAccountAvailabilityStats, getRealtimeTrafficSummary, subscribeQPS, // Legacy unified endpoints listErrorLogs, getErrorLogDetail, retryErrorRequest, listRetryAttempts, updateErrorResolved, // New split endpoints listRequestErrors, listUpstreamErrors, getRequestErrorDetail, getUpstreamErrorDetail, retryRequestErrorClient, retryRequestErrorUpstreamEvent, retryUpstreamError, updateRequestErrorResolved, updateUpstreamErrorResolved, listRequestErrorUpstreamErrors, listRequestDetails, listAlertRules, createAlertRule, updateAlertRule, deleteAlertRule, listAlertEvents, getAlertEvent, updateAlertEventStatus, createAlertSilence, getEmailNotificationConfig, updateEmailNotificationConfig, getAlertRuntimeSettings, updateAlertRuntimeSettings, getRuntimeLogConfig, updateRuntimeLogConfig, resetRuntimeLogConfig, getAdvancedSettings, updateAdvancedSettings, getMetricThresholds, updateMetricThresholds, listSystemLogs, cleanupSystemLogs, getSystemLogSinkHealth } export default opsAPI