cdb9fd57d1
Operators can define metric-threshold alert rules (cpu_percent, memory_percent, memory_bytes; gt/lt) per-workload or global via /api/metric-alert-rules. A periodic evaluator (internal/metricalert, 30s tick) checks the freshest container stats sample per container against enabled rules and, on breach (per-rule-per-workload cooldown), emits into the existing event_log + bus pipeline (source "metric_alert", workload_id set). Alerts therefore surface on the global events page, the per-app activity timeline, and any configured event-trigger webhook -- no new notification plumbing. Mirrors the log_scan_rules store/API/route patterns and the stats.Collector lifecycle. Rule CRUD reads are authed, mutations AdminOnly. Frontend rule-config UI is a follow-up phase. Reviewed: go APPROVE (0 CRITICAL/HIGH).
192 lines
6.2 KiB
Go
192 lines
6.2 KiB
Go
package store
|
|
|
|
import (
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
// CreateMetricAlertRule inserts a new rule row after validating its
|
|
// metric/comparator/severity enums and rejecting negative cooldowns.
|
|
func (s *Store) CreateMetricAlertRule(r MetricAlertRule) (MetricAlertRule, error) {
|
|
if err := validateMetricAlertRule(r); err != nil {
|
|
return MetricAlertRule{}, err
|
|
}
|
|
now := Now()
|
|
r.CreatedAt = now
|
|
r.UpdatedAt = now
|
|
res, err := s.db.Exec(
|
|
`INSERT INTO metric_alert_rules
|
|
(workload_id, name, metric, comparator, threshold, severity,
|
|
cooldown_seconds, enabled, created_at, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
r.WorkloadID, r.Name, r.Metric, r.Comparator, r.Threshold, r.Severity,
|
|
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
|
|
)
|
|
if err != nil {
|
|
return MetricAlertRule{}, fmt.Errorf("insert metric alert rule: %w", err)
|
|
}
|
|
id, err := res.LastInsertId()
|
|
if err != nil {
|
|
return MetricAlertRule{}, fmt.Errorf("get metric alert rule id: %w", err)
|
|
}
|
|
r.ID = id
|
|
return r, nil
|
|
}
|
|
|
|
// ListMetricAlertRules returns every rule, ordered by id for stable UI
|
|
// rendering.
|
|
func (s *Store) ListMetricAlertRules() ([]MetricAlertRule, error) {
|
|
return s.queryMetricAlertRules(
|
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM metric_alert_rules ORDER BY id`,
|
|
)
|
|
}
|
|
|
|
// ListMetricAlertRulesByWorkload returns rules that apply to the given
|
|
// workload: rows explicitly scoped to it plus global rows (workload_id
|
|
// = ""). Useful for the workload detail page.
|
|
func (s *Store) ListMetricAlertRulesByWorkload(workloadID string) ([]MetricAlertRule, error) {
|
|
return s.queryMetricAlertRules(
|
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM metric_alert_rules WHERE workload_id = ? OR workload_id = '' ORDER BY id`,
|
|
workloadID,
|
|
)
|
|
}
|
|
|
|
// GetMetricAlertRule fetches one rule by id or returns ErrNotFound.
|
|
func (s *Store) GetMetricAlertRule(id int64) (MetricAlertRule, error) {
|
|
row := s.db.QueryRow(
|
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
|
cooldown_seconds, enabled, created_at, updated_at
|
|
FROM metric_alert_rules WHERE id = ?`, id,
|
|
)
|
|
r, err := scanMetricAlertRuleRow(row)
|
|
if errors.Is(err, sql.ErrNoRows) {
|
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule %d: %w", id, ErrNotFound)
|
|
}
|
|
if err != nil {
|
|
return MetricAlertRule{}, fmt.Errorf("query metric alert rule: %w", err)
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
// UpdateMetricAlertRule overwrites the editable columns of a rule row.
|
|
// id and workload_id are immutable on update — change the scope of a
|
|
// rule by deleting + recreating, mirroring the log-scan store.
|
|
func (s *Store) UpdateMetricAlertRule(r MetricAlertRule) (MetricAlertRule, error) {
|
|
if r.ID == 0 {
|
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule: id is required for update")
|
|
}
|
|
if err := validateMetricAlertRule(r); err != nil {
|
|
return MetricAlertRule{}, err
|
|
}
|
|
r.UpdatedAt = Now()
|
|
res, err := s.db.Exec(
|
|
`UPDATE metric_alert_rules
|
|
SET name = ?, metric = ?, comparator = ?, threshold = ?, severity = ?,
|
|
cooldown_seconds = ?, enabled = ?, updated_at = ?
|
|
WHERE id = ?`,
|
|
r.Name, r.Metric, r.Comparator, r.Threshold, r.Severity,
|
|
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
|
|
)
|
|
if err != nil {
|
|
return MetricAlertRule{}, fmt.Errorf("update metric alert rule: %w", err)
|
|
}
|
|
n, _ := res.RowsAffected()
|
|
if n == 0 {
|
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule %d: %w", r.ID, ErrNotFound)
|
|
}
|
|
return s.GetMetricAlertRule(r.ID)
|
|
}
|
|
|
|
// DeleteMetricAlertRule removes a rule by id, returning ErrNotFound when
|
|
// no row matched.
|
|
func (s *Store) DeleteMetricAlertRule(id int64) error {
|
|
res, err := s.db.Exec(`DELETE FROM metric_alert_rules WHERE id = ?`, id)
|
|
if err != nil {
|
|
return fmt.Errorf("delete metric alert rule: %w", err)
|
|
}
|
|
n, _ := res.RowsAffected()
|
|
if n == 0 {
|
|
return fmt.Errorf("metric alert rule %d: %w", id, ErrNotFound)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Store) queryMetricAlertRules(query string, args ...any) ([]MetricAlertRule, error) {
|
|
rows, err := s.db.Query(query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query metric alert rules: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
out := []MetricAlertRule{}
|
|
for rows.Next() {
|
|
r, err := scanMetricAlertRuleRows(rows)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, r)
|
|
}
|
|
return out, rows.Err()
|
|
}
|
|
|
|
func scanMetricAlertRuleRows(rows *sql.Rows) (MetricAlertRule, error) {
|
|
var r MetricAlertRule
|
|
var enabled int
|
|
if err := rows.Scan(
|
|
&r.ID, &r.WorkloadID, &r.Name, &r.Metric, &r.Comparator, &r.Threshold, &r.Severity,
|
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
|
); err != nil {
|
|
return MetricAlertRule{}, fmt.Errorf("scan metric alert rule: %w", err)
|
|
}
|
|
r.Enabled = enabled != 0
|
|
return r, nil
|
|
}
|
|
|
|
func scanMetricAlertRuleRow(row *sql.Row) (MetricAlertRule, error) {
|
|
var r MetricAlertRule
|
|
var enabled int
|
|
if err := row.Scan(
|
|
&r.ID, &r.WorkloadID, &r.Name, &r.Metric, &r.Comparator, &r.Threshold, &r.Severity,
|
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
|
); err != nil {
|
|
return MetricAlertRule{}, err
|
|
}
|
|
r.Enabled = enabled != 0
|
|
return r, nil
|
|
}
|
|
|
|
// validateMetricAlertRule enforces the per-row invariants: a non-empty
|
|
// name, a known metric/comparator, a valid severity (blank allowed so
|
|
// the caller can default it), and a non-negative cooldown.
|
|
func validateMetricAlertRule(r MetricAlertRule) error {
|
|
if strings.TrimSpace(r.Name) == "" {
|
|
return fmt.Errorf("metric alert rule: name is required")
|
|
}
|
|
switch r.Metric {
|
|
case MetricCPUPercent, MetricMemoryPercent, MetricMemoryBytes:
|
|
default:
|
|
return fmt.Errorf("metric alert rule: invalid metric %q", r.Metric)
|
|
}
|
|
switch r.Comparator {
|
|
case MetricComparatorGT, MetricComparatorLT:
|
|
default:
|
|
return fmt.Errorf("metric alert rule: invalid comparator %q", r.Comparator)
|
|
}
|
|
switch r.Severity {
|
|
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
|
|
case "":
|
|
// Default applied at the caller; allow blank.
|
|
default:
|
|
return fmt.Errorf("metric alert rule: invalid severity %q", r.Severity)
|
|
}
|
|
if r.CooldownSeconds < 0 {
|
|
return fmt.Errorf("metric alert rule: cooldown_seconds must be >= 0")
|
|
}
|
|
return nil
|
|
}
|