From 7576f54e767ee1cb4eb26e50ba69f6fdbb8c7cf1 Mon Sep 17 00:00:00 2001 From: "alexei.dolgolyov" Date: Fri, 29 May 2026 14:34:01 +0300 Subject: [PATCH] feat(alerts): metric-alert rule-management UI (Phase 2) Completes metric-threshold alerting end-to-end: /metric-alert-rules list/new/edit routes (mirroring log-scan-rules) with metric/comparator/ threshold fields, the workload scope picker, ToggleSwitch, and a ConfirmDialog delete flow; an api.ts MetricAlertRule CRUD client; an "Observe" nav entry; and a full metricalert.* i18n namespace (en/ru parity). Create-form cooldown defaults to 300s to match the server. Rules are now manageable in the WebUI; breaches already surface in the per-app activity timeline and fire any configured event-trigger webhook. Reviewed: typescript APPROVE (0 CRITICAL/HIGH). --- web/src/lib/api.ts | 53 ++ web/src/lib/i18n/en.json | 100 +++ web/src/lib/i18n/ru.json | 100 +++ web/src/routes/+layout.svelte | 5 +- .../routes/metric-alert-rules/+page.svelte | 561 +++++++++++++++++ .../metric-alert-rules/[id]/+page.svelte | 516 +++++++++++++++ .../metric-alert-rules/new/+page.svelte | 593 ++++++++++++++++++ 7 files changed, 1927 insertions(+), 1 deletion(-) create mode 100644 web/src/routes/metric-alert-rules/+page.svelte create mode 100644 web/src/routes/metric-alert-rules/[id]/+page.svelte create mode 100644 web/src/routes/metric-alert-rules/new/+page.svelte diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 4c1101e..a12314c 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -1375,3 +1375,56 @@ export function getLogScanStats(signal?: AbortSignal): Promise { return get('/api/log-scan-rules/stats', signal); } +// ── Metric alert rules ────────────────────────────────────────────── +// Backend: internal/api/metric_alert_rules.go. Rules compare a sampled +// container metric (cpu/memory) against a threshold using a comparator. +// Scope model: workload_id="" → global; workload_id set → workload-only. +// Unlike log-scan rules there is no override / test / effective-rules +// concept — a metric-alert rule is a flat threshold check. + +export interface MetricAlertRule { + id: number; + workload_id: string; // "" = global + name: string; + metric: 'cpu_percent' | 'memory_percent' | 'memory_bytes'; + comparator: 'gt' | 'lt'; + threshold: number; + severity: 'info' | 'warn' | 'error'; + cooldown_seconds: number; + enabled: boolean; + created_at: string; + updated_at: string; +} +export interface MetricAlertRuleInput { + workload_id?: string; + name: string; + metric: 'cpu_percent' | 'memory_percent' | 'memory_bytes'; + comparator: 'gt' | 'lt'; + threshold: number; + severity?: 'info' | 'warn' | 'error'; + cooldown_seconds?: number; + enabled?: boolean; +} +export function listMetricAlertRules(opts?: { + workloadID?: string; + signal?: AbortSignal; +}): Promise { + const params = opts?.workloadID ? `?workload_id=${encodeURIComponent(opts.workloadID)}` : ''; + return get(`/api/metric-alert-rules${params}`, opts?.signal); +} +export function getMetricAlertRule(id: number, signal?: AbortSignal): Promise { + return get(`/api/metric-alert-rules/${id}`, signal); +} +export function createMetricAlertRule(data: MetricAlertRuleInput): Promise { + return post('/api/metric-alert-rules', data); +} +export function updateMetricAlertRule( + id: number, + data: MetricAlertRuleInput +): Promise { + return patch(`/api/metric-alert-rules/${id}`, data); +} +export function deleteMetricAlertRule(id: number): Promise { + return del(`/api/metric-alert-rules/${id}`); +} + diff --git a/web/src/lib/i18n/en.json b/web/src/lib/i18n/en.json index 94f9276..b098f99 100644 --- a/web/src/lib/i18n/en.json +++ b/web/src/lib/i18n/en.json @@ -17,6 +17,7 @@ "apps": "Apps", "eventTriggers": "Event Triggers", "logScanRules": "Log Rules", + "metricAlertRules": "Metric Alerts", "triggers": "Triggers", "proxies": "Proxies", "events": "Events", @@ -887,6 +888,105 @@ "disabled": "disabled" } }, + "metricalert": { + "title": "Metric alert rules", + "titleNew": "Forge a new alert", + "titleSingular": "Alert rule", + "lede": "Threshold checks the watcher runs against each running container's sampled CPU and memory. When a sample crosses the threshold the rule fires into event_log with the rule's severity, where event triggers pick it up and fan out to operator-configured webhooks. {enabled} of {total} enabled.", + "ledeNew": "Pick a metric, a comparator, and a threshold. Leave the workload field empty to create a global rule that applies to every workload, or scope it to a single workload.", + "stat": { + "total": "TOTAL", + "global": "GLOBAL", + "workload": "WORKLOAD", + "enabled": "ENABLED" + }, + "toolbar": { + "newButton": "New alert", + "backToList": "Back to alerts" + }, + "filter": { + "all": "ALL", + "global": "GLOBAL", + "workload": "WORKLOAD" + }, + "empty": { + "heading": "No alert rules yet", + "body": "Start with a global rule like CPU greater than 80%, then narrow per-workload by scoping a rule to a single workload.", + "cta": "Create the first alert" + }, + "list": { + "name": "Name", + "condition": "Condition", + "scope": "Scope", + "severity": "Severity", + "status": "Status", + "open": "Open" + }, + "detail": { + "config": "Configuration", + "configSub": "id #{id} · scope {scope}", + "dangerZone": "Danger zone", + "dangerZoneSub": "Deleting an alert rule removes it immediately and stops it from firing.", + "deleteButton": "Delete alert", + "deleteTitle": "Delete alert rule?", + "deleteMessage": "Rule \"{name}\" will be removed immediately and will stop firing." + }, + "form": { + "name": "Name", + "namePlaceholder": "e.g. Worker CPU saturated", + "condition": "Condition", + "metric": "Metric", + "comparator": "Comparator", + "threshold": "Threshold", + "thresholdPlaceholder": "e.g. 80", + "thresholdHintPercent": "Percent of the limit (0–100). The rule fires when the sampled value crosses this threshold.", + "thresholdHintBytes": "Absolute bytes (e.g. 536870912 for 512 MiB). The rule fires when sampled memory crosses this threshold.", + "matchShape": "Match shape", + "matchShapeOpts": "SEVERITY · COOLDOWN", + "severity": "Severity", + "cooldown": "Cooldown (s)", + "cooldownHint": "Cooldown is per-rule per-container — the same rule firing on two containers stays independent. It caps how often a sustained breach re-emits to event_log.", + "scope": "Scope", + "scopeHint": "Workload-scoped rules apply only to that workload's containers. Leave empty to apply the rule to every workload.", + "scopeGlobal": "Global (applies to every workload)", + "scopePick": "Pick workload…", + "scopePickTitle": "Pick a workload", + "scopeClear": "Make global", + "scopeSelected": "Workload", + "scopeUnknown": "Unknown workload", + "enabled": "Enabled", + "enabledHint": "Disabled rules stay in the table but never fire.", + "required": "REQUIRED", + "optional": "OPTIONAL", + "submit": "Forge alert", + "submitting": "Forging…" + }, + "metric": { + "cpu_percent": "CPU %", + "memory_percent": "Memory %", + "memory_bytes": "Memory (bytes)" + }, + "metricShort": { + "cpu": "CPU", + "memory": "Memory" + }, + "comparator": { + "gt": "greater than", + "lt": "less than" + }, + "unit": { + "percent": "%", + "bytes": "bytes" + }, + "scope": { + "global": "global", + "workload": "workload {id}" + }, + "status": { + "enabled": "enabled", + "disabled": "disabled" + } + }, "logscan": { "title": "Log scan rules", "titleNew": "Forge a new rule", diff --git a/web/src/lib/i18n/ru.json b/web/src/lib/i18n/ru.json index aed4662..2365dc2 100644 --- a/web/src/lib/i18n/ru.json +++ b/web/src/lib/i18n/ru.json @@ -17,6 +17,7 @@ "apps": "Приложения", "eventTriggers": "Триггеры событий", "logScanRules": "Лог-правила", + "metricAlertRules": "Метрик-алерты", "triggers": "Триггеры", "proxies": "Прокси", "events": "События", @@ -887,6 +888,105 @@ "disabled": "выключен" } }, + "metricalert": { + "title": "Правила метрик-алертов", + "titleNew": "Создать новый алерт", + "titleSingular": "Правило алерта", + "lede": "Пороговые проверки, которые наблюдатель выполняет по выборкам CPU и памяти каждого запущенного контейнера. Когда выборка пересекает порог, правило записывается в event_log с указанной важностью, откуда триггеры событий подхватывают его и рассылают по настроенным вебхукам. Включено {enabled} из {total}.", + "ledeNew": "Выберите метрику, оператор сравнения и порог. Оставьте поле рабочей нагрузки пустым, чтобы создать глобальное правило для всех нагрузок, или ограничьте его одной нагрузкой.", + "stat": { + "total": "ВСЕГО", + "global": "ГЛОБАЛЬНЫЕ", + "workload": "НАГРУЗКА", + "enabled": "ВКЛЮЧЕНО" + }, + "toolbar": { + "newButton": "Новый алерт", + "backToList": "К списку алертов" + }, + "filter": { + "all": "ВСЕ", + "global": "ГЛОБАЛЬНЫЕ", + "workload": "НАГРУЗКА" + }, + "empty": { + "heading": "Пока нет правил алертов", + "body": "Начните с глобального правила, например «CPU больше 80%», затем сузьте его, ограничив правило отдельной рабочей нагрузкой.", + "cta": "Создать первый алерт" + }, + "list": { + "name": "Название", + "condition": "Условие", + "scope": "Область", + "severity": "Важность", + "status": "Статус", + "open": "Открыть" + }, + "detail": { + "config": "Конфигурация", + "configSub": "id #{id} · область {scope}", + "dangerZone": "Опасная зона", + "dangerZoneSub": "Удаление правила алерта немедленно убирает его и прекращает срабатывания.", + "deleteButton": "Удалить алерт", + "deleteTitle": "Удалить правило алерта?", + "deleteMessage": "Правило «{name}» будет удалено немедленно и перестанет срабатывать." + }, + "form": { + "name": "Название", + "namePlaceholder": "напр. Перегрузка CPU воркера", + "condition": "Условие", + "metric": "Метрика", + "comparator": "Оператор", + "threshold": "Порог", + "thresholdPlaceholder": "напр. 80", + "thresholdHintPercent": "Процент от лимита (0–100). Правило срабатывает, когда выборка пересекает этот порог.", + "thresholdHintBytes": "Абсолютные байты (напр. 536870912 для 512 МиБ). Правило срабатывает, когда выборка памяти пересекает этот порог.", + "matchShape": "Параметры срабатывания", + "matchShapeOpts": "ВАЖНОСТЬ · ЗАДЕРЖКА", + "severity": "Важность", + "cooldown": "Задержка (с)", + "cooldownHint": "Задержка действует на каждое правило и контейнер отдельно — одно правило на двух контейнерах работает независимо. Она ограничивает, как часто длительное превышение повторно пишется в event_log.", + "scope": "Область", + "scopeHint": "Правила, привязанные к нагрузке, применяются только к её контейнерам. Оставьте пустым, чтобы применить правило ко всем нагрузкам.", + "scopeGlobal": "Глобально (применяется ко всем нагрузкам)", + "scopePick": "Выбрать нагрузку…", + "scopePickTitle": "Выберите нагрузку", + "scopeClear": "Сделать глобальным", + "scopeSelected": "Нагрузка", + "scopeUnknown": "Неизвестная нагрузка", + "enabled": "Включено", + "enabledHint": "Отключённые правила остаются в таблице, но не срабатывают.", + "required": "ОБЯЗАТЕЛЬНО", + "optional": "НЕОБЯЗАТЕЛЬНО", + "submit": "Создать алерт", + "submitting": "Создаём…" + }, + "metric": { + "cpu_percent": "CPU %", + "memory_percent": "Память %", + "memory_bytes": "Память (байты)" + }, + "metricShort": { + "cpu": "CPU", + "memory": "Память" + }, + "comparator": { + "gt": "больше чем", + "lt": "меньше чем" + }, + "unit": { + "percent": "%", + "bytes": "байт" + }, + "scope": { + "global": "глобально", + "workload": "нагрузка {id}" + }, + "status": { + "enabled": "включено", + "disabled": "отключено" + } + }, "logscan": { "title": "Правила сканирования логов", "titleNew": "Новое правило", diff --git a/web/src/routes/+layout.svelte b/web/src/routes/+layout.svelte index 1d12eca..26c1884 100644 --- a/web/src/routes/+layout.svelte +++ b/web/src/routes/+layout.svelte @@ -6,7 +6,7 @@ import Toast from '$lib/components/Toast.svelte'; import ThemeToggle from '$lib/components/ThemeToggle.svelte'; import LocaleSwitcher from '$lib/components/LocaleSwitcher.svelte'; - import { IconDashboard, IconDeploy, IconEvents, IconWifi, IconSettings, IconMenu, IconX, IconLogout, IconBox, IconContainer } from '$lib/components/icons'; + import { IconDashboard, IconDeploy, IconEvents, IconWifi, IconSettings, IconMenu, IconX, IconLogout, IconBox, IconContainer, IconAlert } from '$lib/components/icons'; import { goto } from '$app/navigation'; import { resolvedTheme, applyTheme } from '$lib/stores/theme'; import { exchangeOidcToken, setAuthToken, clearAuth, isAuthenticated } from '$lib/auth'; @@ -49,6 +49,7 @@ { href: '/events', labelKey: 'nav.events', icon: 'events', section: 'observe', countKey: 'eventsErrors', alert: true }, { href: '/event-triggers', labelKey: 'nav.eventTriggers', icon: 'events', section: 'observe' }, { href: '/log-scan-rules', labelKey: 'nav.logScanRules', icon: 'events', section: 'observe' }, + { href: '/metric-alert-rules', labelKey: 'nav.metricAlertRules', icon: 'alert', section: 'observe' }, { href: '/settings', labelKey: 'nav.settings', icon: 'settings', section: 'system' } ]; @@ -316,6 +317,8 @@ {:else if item.icon === 'events'} + {:else if item.icon === 'alert'} + {:else if item.icon === 'settings'} {/if} diff --git a/web/src/routes/metric-alert-rules/+page.svelte b/web/src/routes/metric-alert-rules/+page.svelte new file mode 100644 index 0000000..1b1f6ca --- /dev/null +++ b/web/src/routes/metric-alert-rules/+page.svelte @@ -0,0 +1,561 @@ + + + + {$t('metricalert.title')} · Tinyforge + + +
+ {#snippet toolbar()} + + + + {$t('metricalert.toolbar.newButton')} + + {/snippet} + + {#snippet stats()} +
+
{$t('metricalert.stat.total')}
+
{loading ? '—' : String(rules.length).padStart(2, '0')}
+
+
+
{$t('metricalert.stat.global')}
+
{loading ? '—' : String(globals.length).padStart(2, '0')}
+
+
+
{$t('metricalert.stat.workload')}
+
{loading ? '—' : String(workloadOnly.length).padStart(2, '0')}
+
+
+
{$t('metricalert.stat.enabled')}
+
{loading ? '—' : String(enabledCount).padStart(2, '0')}
+
+ {/snippet} + + {#snippet lede()} + {$t('metricalert.lede', { enabled: String(enabledCount), total: String(rules.length) })} + {/snippet} + + + + {#if error} + + {/if} + + {#if !loading && rules.length > 0} +
+ {#each [['all', $t('metricalert.filter.all'), rules.length], ['global', $t('metricalert.filter.global'), globals.length], ['workload', $t('metricalert.filter.workload'), workloadOnly.length]] as [key, label, count]} + + {/each} +
+ {/if} + + {#if loading} +
+ {#each Array(4) as _, i} +
+ {/each} +
+ {:else if rules.length === 0} +
+ +

{$t('metricalert.empty.heading')}

+

{$t('metricalert.empty.body')}

+ + {$t('metricalert.empty.cta')} + +
+ {:else} +
+ + + + + + + + + + + + + {#each filtered as r, i (r.id)} + + + + + + + + + {/each} + +
{$t('metricalert.list.name')}{$t('metricalert.list.condition')}{$t('metricalert.list.scope')}{$t('metricalert.list.severity')}{$t('metricalert.list.status')}{$t('metricalert.list.open')}
+ + {String(i + 1).padStart(2, '0')} + {r.name} + + {ruleSummary(r)} + {scopeLabel(r)} + + {r.severity} + + + + {r.enabled ? $t('metricalert.status.enabled') : $t('metricalert.status.disabled')} + + + + {$t('observability.open')} + +
+
+ {/if} +
+ + diff --git a/web/src/routes/metric-alert-rules/[id]/+page.svelte b/web/src/routes/metric-alert-rules/[id]/+page.svelte new file mode 100644 index 0000000..02d415a --- /dev/null +++ b/web/src/routes/metric-alert-rules/[id]/+page.svelte @@ -0,0 +1,516 @@ + + + + {rule?.name ?? $t('metricalert.titleSingular')} · Tinyforge + + +
+ {#snippet detailLede()} + {#if rule} + + {$t('metricalert.list.scope')} {scopeLabel(rule)} · + {$t('metricalert.list.severity')} {rule.severity} + + {/if} + {/snippet} + + + + {#if error} + + {/if} + + {#if loading || !rule} +
+ {#each Array(3) as _, i} +
+ {/each} +
+ {:else} +
+
+

{$t('metricalert.detail.config')}.

+ + {$t('metricalert.detail.configSub', { id: String(rule.id), scope: scopeLabel(rule) })} + +
+ +
+ + +
+ +
+ + + +
+

{thresholdHint}

+ +
+ + +
+ +
+
+ +

{$t('metricalert.form.enabledHint')}

+
+ +
+ +
+ +
+
+ +
+
+

{$t('metricalert.detail.dangerZone')}.

+ {$t('metricalert.detail.dangerZoneSub')} +
+
+ +
+
+ + (confirmDelete = false)} + /> + {/if} +
+ + diff --git a/web/src/routes/metric-alert-rules/new/+page.svelte b/web/src/routes/metric-alert-rules/new/+page.svelte new file mode 100644 index 0000000..e99df97 --- /dev/null +++ b/web/src/routes/metric-alert-rules/new/+page.svelte @@ -0,0 +1,593 @@ + + + + {$t('metricalert.titleNew')} · Tinyforge + + +
+ {#snippet lede()} + {$t('metricalert.ledeNew')} + {/snippet} + + + +
+ {#if error} + + {/if} + +
+ + +
+ +
+
+ + {$t('metricalert.form.condition')} + {$t('metricalert.form.required')} +
+
+ + + +
+

{thresholdHint}

+
+ +
+
+ + {$t('metricalert.form.matchShape')} + {$t('metricalert.form.matchShapeOpts')} +
+
+ + +
+

{$t('metricalert.form.cooldownHint')}

+
+ +
+
+ + {$t('metricalert.form.scope')} + {$t('metricalert.form.optional')} +
+
+ {#if workloadID === ''} +
+ + {$t('metricalert.form.scopeGlobal')} + +
+ {:else} +
+ {$t('metricalert.form.scopeSelected')} + {#if selectedWorkload} + {selectedWorkload.name} + + {selectedWorkload.source_kind || selectedWorkload.kind} + + {:else} + {$t('metricalert.form.scopeUnknown')} + {workloadID} + {/if} + + +
+ {/if} +
+

{$t('metricalert.form.scopeHint')}

+
+ +
+
+ +

{$t('metricalert.form.enabledHint')}

+
+ +
+ +
+ {$t('observability.cancel')} + +
+
+ + (pickerOpen = false)} + /> +
+ +