feat(alerts): metric-alert rule-management UI (Phase 2)

Completes metric-threshold alerting end-to-end: /metric-alert-rules
list/new/edit routes (mirroring log-scan-rules) with metric/comparator/
threshold fields, the workload scope picker, ToggleSwitch, and a
ConfirmDialog delete flow; an api.ts MetricAlertRule CRUD client; an
"Observe" nav entry; and a full metricalert.* i18n namespace (en/ru
parity). Create-form cooldown defaults to 300s to match the server.

Rules are now manageable in the WebUI; breaches already surface in the
per-app activity timeline and fire any configured event-trigger webhook.

Reviewed: typescript APPROVE (0 CRITICAL/HIGH).
This commit is contained in:
2026-05-29 14:34:01 +03:00
parent 2e26f555c5
commit 7576f54e76
7 changed files with 1927 additions and 1 deletions
+100
View File
@@ -17,6 +17,7 @@
"apps": "Apps",
"eventTriggers": "Event Triggers",
"logScanRules": "Log Rules",
"metricAlertRules": "Metric Alerts",
"triggers": "Triggers",
"proxies": "Proxies",
"events": "Events",
@@ -887,6 +888,105 @@
"disabled": "disabled"
}
},
"metricalert": {
"title": "Metric alert rules",
"titleNew": "Forge a new alert",
"titleSingular": "Alert rule",
"lede": "Threshold checks the watcher runs against each running container's sampled CPU and memory. When a sample crosses the threshold the rule fires into event_log with the rule's severity, where event triggers pick it up and fan out to operator-configured webhooks. {enabled} of {total} enabled.",
"ledeNew": "Pick a metric, a comparator, and a threshold. Leave the workload field empty to create a global rule that applies to every workload, or scope it to a single workload.",
"stat": {
"total": "TOTAL",
"global": "GLOBAL",
"workload": "WORKLOAD",
"enabled": "ENABLED"
},
"toolbar": {
"newButton": "New alert",
"backToList": "Back to alerts"
},
"filter": {
"all": "ALL",
"global": "GLOBAL",
"workload": "WORKLOAD"
},
"empty": {
"heading": "No alert rules yet",
"body": "Start with a global rule like CPU greater than 80%, then narrow per-workload by scoping a rule to a single workload.",
"cta": "Create the first alert"
},
"list": {
"name": "Name",
"condition": "Condition",
"scope": "Scope",
"severity": "Severity",
"status": "Status",
"open": "Open"
},
"detail": {
"config": "Configuration",
"configSub": "id #{id} · scope {scope}",
"dangerZone": "Danger zone",
"dangerZoneSub": "Deleting an alert rule removes it immediately and stops it from firing.",
"deleteButton": "Delete alert",
"deleteTitle": "Delete alert rule?",
"deleteMessage": "Rule \"{name}\" will be removed immediately and will stop firing."
},
"form": {
"name": "Name",
"namePlaceholder": "e.g. Worker CPU saturated",
"condition": "Condition",
"metric": "Metric",
"comparator": "Comparator",
"threshold": "Threshold",
"thresholdPlaceholder": "e.g. 80",
"thresholdHintPercent": "Percent of the limit (0100). The rule fires when the sampled value crosses this threshold.",
"thresholdHintBytes": "Absolute bytes (e.g. 536870912 for 512 MiB). The rule fires when sampled memory crosses this threshold.",
"matchShape": "Match shape",
"matchShapeOpts": "SEVERITY · COOLDOWN",
"severity": "Severity",
"cooldown": "Cooldown (s)",
"cooldownHint": "Cooldown is per-rule per-container — the same rule firing on two containers stays independent. It caps how often a sustained breach re-emits to event_log.",
"scope": "Scope",
"scopeHint": "Workload-scoped rules apply only to that workload's containers. Leave empty to apply the rule to every workload.",
"scopeGlobal": "Global (applies to every workload)",
"scopePick": "Pick workload…",
"scopePickTitle": "Pick a workload",
"scopeClear": "Make global",
"scopeSelected": "Workload",
"scopeUnknown": "Unknown workload",
"enabled": "Enabled",
"enabledHint": "Disabled rules stay in the table but never fire.",
"required": "REQUIRED",
"optional": "OPTIONAL",
"submit": "Forge alert",
"submitting": "Forging…"
},
"metric": {
"cpu_percent": "CPU %",
"memory_percent": "Memory %",
"memory_bytes": "Memory (bytes)"
},
"metricShort": {
"cpu": "CPU",
"memory": "Memory"
},
"comparator": {
"gt": "greater than",
"lt": "less than"
},
"unit": {
"percent": "%",
"bytes": "bytes"
},
"scope": {
"global": "global",
"workload": "workload {id}"
},
"status": {
"enabled": "enabled",
"disabled": "disabled"
}
},
"logscan": {
"title": "Log scan rules",
"titleNew": "Forge a new rule",