cdb9fd57d1
Operators can define metric-threshold alert rules (cpu_percent, memory_percent, memory_bytes; gt/lt) per-workload or global via /api/metric-alert-rules. A periodic evaluator (internal/metricalert, 30s tick) checks the freshest container stats sample per container against enabled rules and, on breach (per-rule-per-workload cooldown), emits into the existing event_log + bus pipeline (source "metric_alert", workload_id set). Alerts therefore surface on the global events page, the per-app activity timeline, and any configured event-trigger webhook -- no new notification plumbing. Mirrors the log_scan_rules store/API/route patterns and the stats.Collector lifecycle. Rule CRUD reads are authed, mutations AdminOnly. Frontend rule-config UI is a follow-up phase. Reviewed: go APPROVE (0 CRITICAL/HIGH).
168 lines
4.8 KiB
Go
168 lines
4.8 KiB
Go
package store
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
func TestCreateMetricAlertRule_Validates(t *testing.T) {
|
|
s := newTestStore(t)
|
|
cases := []struct {
|
|
name string
|
|
in MetricAlertRule
|
|
wantErr string
|
|
}{
|
|
{
|
|
name: "missing name",
|
|
in: MetricAlertRule{Metric: MetricCPUPercent, Comparator: MetricComparatorGT},
|
|
wantErr: "name is required",
|
|
},
|
|
{
|
|
name: "bad metric",
|
|
in: MetricAlertRule{Name: "n", Metric: "load_avg", Comparator: MetricComparatorGT},
|
|
wantErr: "invalid metric",
|
|
},
|
|
{
|
|
name: "bad comparator",
|
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: "eq"},
|
|
wantErr: "invalid comparator",
|
|
},
|
|
{
|
|
name: "bad severity",
|
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT, Severity: "loud"},
|
|
wantErr: "invalid severity",
|
|
},
|
|
{
|
|
name: "negative cooldown",
|
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT, CooldownSeconds: -1},
|
|
wantErr: "cooldown_seconds must be",
|
|
},
|
|
}
|
|
for _, c := range cases {
|
|
t.Run(c.name, func(t *testing.T) {
|
|
_, err := s.CreateMetricAlertRule(c.in)
|
|
if err == nil {
|
|
t.Fatalf("expected error containing %q, got nil", c.wantErr)
|
|
}
|
|
if !strings.Contains(err.Error(), c.wantErr) {
|
|
t.Fatalf("error mismatch: got %q want substring %q", err.Error(), c.wantErr)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestCreateAndGetMetricAlertRule(t *testing.T) {
|
|
s := newTestStore(t)
|
|
r, err := s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "cpu-hot", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
|
Threshold: 80, Severity: "warn", CooldownSeconds: 300, Enabled: true,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("create: %v", err)
|
|
}
|
|
if r.ID == 0 {
|
|
t.Fatal("id should be set")
|
|
}
|
|
got, err := s.GetMetricAlertRule(r.ID)
|
|
if err != nil {
|
|
t.Fatalf("get: %v", err)
|
|
}
|
|
if got.Metric != MetricCPUPercent || got.Comparator != MetricComparatorGT {
|
|
t.Errorf("metric/comparator mismatch: %q %q", got.Metric, got.Comparator)
|
|
}
|
|
if got.Threshold != 80 {
|
|
t.Errorf("threshold mismatch: %v", got.Threshold)
|
|
}
|
|
if !got.Enabled {
|
|
t.Error("enabled lost on round-trip")
|
|
}
|
|
}
|
|
|
|
func TestGetMetricAlertRule_NotFound(t *testing.T) {
|
|
s := newTestStore(t)
|
|
if _, err := s.GetMetricAlertRule(999); err == nil {
|
|
t.Fatal("expected ErrNotFound for missing rule")
|
|
}
|
|
}
|
|
|
|
func TestListMetricAlertRulesByWorkload(t *testing.T) {
|
|
s := newTestStore(t)
|
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "global", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
|
Threshold: 90, Severity: "warn", Enabled: true,
|
|
})
|
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "w1-mem", WorkloadID: "w1", Metric: MetricMemoryPercent, Comparator: MetricComparatorGT,
|
|
Threshold: 85, Severity: "error", Enabled: true,
|
|
})
|
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "w2-mem", WorkloadID: "w2", Metric: MetricMemoryBytes, Comparator: MetricComparatorGT,
|
|
Threshold: 1000, Severity: "info", Enabled: true,
|
|
})
|
|
|
|
w1, err := s.ListMetricAlertRulesByWorkload("w1")
|
|
if err != nil {
|
|
t.Fatalf("by workload: %v", err)
|
|
}
|
|
// w1 sees its own rule + the global, but NOT w2's rule.
|
|
if len(w1) != 2 {
|
|
t.Fatalf("w1 should see 2 rules (own + global), got %d", len(w1))
|
|
}
|
|
for _, r := range w1 {
|
|
if r.WorkloadID == "w2" {
|
|
t.Errorf("w1 should not see w2's rule")
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestUpdateMetricAlertRule(t *testing.T) {
|
|
s := newTestStore(t)
|
|
r, _ := s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
|
Threshold: 80, Severity: "warn", Enabled: true,
|
|
})
|
|
r.Threshold = 95
|
|
r.Comparator = MetricComparatorLT
|
|
r.Enabled = false
|
|
got, err := s.UpdateMetricAlertRule(r)
|
|
if err != nil {
|
|
t.Fatalf("update: %v", err)
|
|
}
|
|
if got.Threshold != 95 {
|
|
t.Errorf("threshold not updated: %v", got.Threshold)
|
|
}
|
|
if got.Comparator != MetricComparatorLT {
|
|
t.Errorf("comparator not updated: %q", got.Comparator)
|
|
}
|
|
if got.Enabled {
|
|
t.Error("enabled=false not applied")
|
|
}
|
|
}
|
|
|
|
func TestUpdateMetricAlertRule_NotFound(t *testing.T) {
|
|
s := newTestStore(t)
|
|
_, err := s.UpdateMetricAlertRule(MetricAlertRule{
|
|
ID: 999, Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
|
})
|
|
if err == nil {
|
|
t.Fatal("expected ErrNotFound updating missing rule")
|
|
}
|
|
}
|
|
|
|
func TestDeleteMetricAlertRule(t *testing.T) {
|
|
s := newTestStore(t)
|
|
r, _ := s.CreateMetricAlertRule(MetricAlertRule{
|
|
Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
|
Threshold: 80, Severity: "warn", Enabled: true,
|
|
})
|
|
if err := s.DeleteMetricAlertRule(r.ID); err != nil {
|
|
t.Fatalf("delete: %v", err)
|
|
}
|
|
if _, err := s.GetMetricAlertRule(r.ID); err == nil {
|
|
t.Error("rule should be gone after delete")
|
|
}
|
|
if err := s.DeleteMetricAlertRule(r.ID); err == nil {
|
|
t.Error("expected ErrNotFound deleting already-deleted rule")
|
|
}
|
|
}
|