package metricalert import ( "testing" "time" "github.com/alexei/tinyforge/internal/events" "github.com/alexei/tinyforge/internal/store" ) // --- fakes ----------------------------------------------------------- type fakeRules struct { rules []store.MetricAlertRule err error } func (f *fakeRules) ListMetricAlertRules() ([]store.MetricAlertRule, error) { return f.rules, f.err } type fakeSamples struct { samples []store.ContainerStatsSample err error since int64 // captured arg of the last call } func (f *fakeSamples) ListAllRecentContainerStatsSamples(sinceTS int64) ([]store.ContainerStatsSample, error) { f.since = sinceTS return f.samples, f.err } type recordedEvent struct { evt store.EventLog } type fakeSink struct { events []recordedEvent err error nextID int64 } func (f *fakeSink) InsertEvent(e store.EventLog) (store.EventLog, error) { if f.err != nil { return store.EventLog{}, f.err } f.nextID++ e.ID = f.nextID e.CreatedAt = "2026-05-29T00:00:00Z" f.events = append(f.events, recordedEvent{evt: e}) return e, nil } type fakePublisher struct { published []events.Event } func (f *fakePublisher) Publish(e events.Event) { f.published = append(f.published, e) } func newManager(rules []store.MetricAlertRule, samples []store.ContainerStatsSample) (*Manager, *fakeSink, *fakePublisher) { sink := &fakeSink{} pub := &fakePublisher{} m := New(&fakeRules{rules: rules}, &fakeSamples{samples: samples}, sink, pub) return m, sink, pub } // --- tests ----------------------------------------------------------- func TestEvaluate_BreachEmits(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Severity: "error", CooldownSeconds: 300, Enabled: true, }} samples := []store.ContainerStatsSample{{ ContainerID: "c1", OwnerID: "w1", OwnerType: "instance", TS: 100, CPUPercent: 95, }} m, sink, pub := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 1 { t.Fatalf("expected 1 event, got %d", len(sink.events)) } got := sink.events[0].evt if got.Source != "metric_alert" { t.Errorf("source = %q, want metric_alert", got.Source) } if got.Severity != "error" { t.Errorf("severity = %q, want error", got.Severity) } if got.WorkloadID != "w1" { t.Errorf("workload_id = %q, want w1", got.WorkloadID) } if got.Metadata == "" || got.Metadata == "{}" { t.Errorf("metadata should be populated JSON, got %q", got.Metadata) } if len(pub.published) != 1 { t.Fatalf("expected 1 published event, got %d", len(pub.published)) } payload, ok := pub.published[0].Payload.(events.EventLogPayload) if !ok { t.Fatalf("published payload is not EventLogPayload") } if payload.WorkloadID != "w1" || payload.Source != "metric_alert" { t.Errorf("payload workload/source mismatch: %+v", payload) } } func TestEvaluate_NoBreachNoEmit(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true, }} samples := []store.ContainerStatsSample{{ ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 10, }} m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 0 { t.Fatalf("expected no events for non-breach, got %d", len(sink.events)) } } func TestEvaluate_DisabledRuleSkipped(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: false, }} samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}} m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 0 { t.Fatalf("disabled rule should not emit, got %d", len(sink.events)) } } func TestEvaluate_PerWorkloadScoping(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "w2-only", WorkloadID: "w2", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true, }} samples := []store.ContainerStatsSample{ {ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}, // breach but wrong workload {ContainerID: "c2", OwnerID: "w2", TS: 100, CPUPercent: 95}, // breach, correct workload } m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 1 { t.Fatalf("expected 1 event (only w2), got %d", len(sink.events)) } if sink.events[0].evt.WorkloadID != "w2" { t.Errorf("event should be scoped to w2, got %q", sink.events[0].evt.WorkloadID) } } func TestEvaluate_GlobalRuleMatchesAll(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "global", WorkloadID: "", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true, }} samples := []store.ContainerStatsSample{ {ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}, {ContainerID: "c2", OwnerID: "w2", TS: 100, CPUPercent: 95}, } m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 2 { t.Fatalf("global rule should fire for both workloads, got %d", len(sink.events)) } } func TestEvaluate_MemoryPercentDivByZeroSkip(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "mem", Metric: store.MetricMemoryPercent, Comparator: store.MetricComparatorGT, Threshold: 50, Enabled: true, }} samples := []store.ContainerStatsSample{{ ContainerID: "c1", OwnerID: "w1", TS: 100, MemoryUsage: 1000, MemoryLimit: 0, }} m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 0 { t.Fatalf("zero memory limit should be skipped for percent rule, got %d", len(sink.events)) } } func TestEvaluate_MemoryPercentBreaches(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "mem", Metric: store.MetricMemoryPercent, Comparator: store.MetricComparatorGT, Threshold: 90, Enabled: true, }} samples := []store.ContainerStatsSample{{ ContainerID: "c1", OwnerID: "w1", TS: 100, MemoryUsage: 950, MemoryLimit: 1000, // 95% }} m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 1 { t.Fatalf("95%% should breach 90%% threshold, got %d events", len(sink.events)) } } func TestEvaluate_CooldownSuppressesSecondEmit(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, CooldownSeconds: 300, Enabled: true, }} samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}} m, sink, _ := newManager(rules, samples) base := time.Unix(1000, 0) m.evaluate(base) // 10s later — still inside the 300s cooldown window. m.evaluate(base.Add(10 * time.Second)) if len(sink.events) != 1 { t.Fatalf("cooldown should suppress second emit, got %d events", len(sink.events)) } // Past the window — should fire again. m.evaluate(base.Add(301 * time.Second)) if len(sink.events) != 2 { t.Fatalf("should re-fire after cooldown elapses, got %d events", len(sink.events)) } } func TestEvaluate_LatestSamplePerContainer(t *testing.T) { // Two samples for the same container: an old non-breaching reading // and a newer breaching one. Only the freshest should be judged. rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true, }} samples := []store.ContainerStatsSample{ {ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 10}, {ContainerID: "c1", OwnerID: "w1", TS: 150, CPUPercent: 95}, } m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 1 { t.Fatalf("expected exactly 1 event from freshest sample, got %d", len(sink.events)) } } func TestEvaluate_LessThanComparator(t *testing.T) { rules := []store.MetricAlertRule{{ ID: 1, Name: "cpu-idle", Metric: store.MetricCPUPercent, Comparator: store.MetricComparatorLT, Threshold: 5, Enabled: true, }} samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 1}} m, sink, _ := newManager(rules, samples) m.evaluate(time.Unix(200, 0)) if len(sink.events) != 1 { t.Fatalf("1%% < 5%% threshold should breach lt rule, got %d events", len(sink.events)) } } func TestEvaluate_NoRulesNoFetch(t *testing.T) { // With no rules there's nothing to do; we shouldn't even query samples. samplesSrc := &fakeSamples{samples: nil} m := New(&fakeRules{rules: nil}, samplesSrc, &fakeSink{}, &fakePublisher{}) m.evaluate(time.Unix(200, 0)) if samplesSrc.since != 0 { t.Errorf("samples should not be queried when there are no rules") } }