cdb9fd57d1
Operators can define metric-threshold alert rules (cpu_percent, memory_percent, memory_bytes; gt/lt) per-workload or global via /api/metric-alert-rules. A periodic evaluator (internal/metricalert, 30s tick) checks the freshest container stats sample per container against enabled rules and, on breach (per-rule-per-workload cooldown), emits into the existing event_log + bus pipeline (source "metric_alert", workload_id set). Alerts therefore surface on the global events page, the per-app activity timeline, and any configured event-trigger webhook -- no new notification plumbing. Mirrors the log_scan_rules store/API/route patterns and the stats.Collector lifecycle. Rule CRUD reads are authed, mutations AdminOnly. Frontend rule-config UI is a follow-up phase. Reviewed: go APPROVE (0 CRITICAL/HIGH).
236 lines
6.7 KiB
Go
236 lines
6.7 KiB
Go
// Package api: metric-alert rule HTTP handlers. The evaluator lives in
|
|
// internal/metricalert; this file is the REST surface that lets
|
|
// operators create, edit, and delete threshold rules. Mirrors the
|
|
// log-scan rule handlers.
|
|
package api
|
|
|
|
import (
|
|
"errors"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
)
|
|
|
|
// metricAlertRuleInput is the JSON shape accepted by POST + PATCH.
|
|
// Pointers distinguish "absent" from explicit empty/zero. WorkloadID is
|
|
// immutable on update (per store.UpdateMetricAlertRule) so it only takes
|
|
// effect on create.
|
|
type metricAlertRuleInput struct {
|
|
WorkloadID *string `json:"workload_id"`
|
|
Name *string `json:"name"`
|
|
Metric *string `json:"metric"`
|
|
Comparator *string `json:"comparator"`
|
|
Threshold *float64 `json:"threshold"`
|
|
Severity *string `json:"severity"`
|
|
CooldownSeconds *int `json:"cooldown_seconds"`
|
|
Enabled *bool `json:"enabled"`
|
|
}
|
|
|
|
// listMetricAlertRules handles GET /api/metric-alert-rules. Optional
|
|
// query filter `workload_id=...` returns rules applying to that workload
|
|
// (its own rows plus globals).
|
|
func (s *Server) listMetricAlertRules(w http.ResponseWriter, r *http.Request) {
|
|
if wlID := r.URL.Query().Get("workload_id"); wlID != "" {
|
|
out, err := s.store.ListMetricAlertRulesByWorkload(wlID)
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "list metric alert rules")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, out)
|
|
return
|
|
}
|
|
out, err := s.store.ListMetricAlertRules()
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "list metric alert rules")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, out)
|
|
}
|
|
|
|
// getMetricAlertRule handles GET /api/metric-alert-rules/{id}.
|
|
func (s *Server) getMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := parseMetricAlertRuleID(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
rule, err := s.store.GetMetricAlertRule(id)
|
|
if err != nil {
|
|
mapStoreError(w, err, "metric alert rule")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, rule)
|
|
}
|
|
|
|
// createMetricAlertRule handles POST /api/metric-alert-rules.
|
|
func (s *Server) createMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
|
var in metricAlertRuleInput
|
|
if !decodeJSON(w, r, &in) {
|
|
return
|
|
}
|
|
rule := store.MetricAlertRule{
|
|
WorkloadID: derefString(in.WorkloadID),
|
|
Name: derefString(in.Name),
|
|
Metric: derefString(in.Metric),
|
|
Comparator: derefString(in.Comparator),
|
|
Threshold: derefFloat64(in.Threshold),
|
|
Severity: firstNonEmpty(derefString(in.Severity), store.LogScanSeverityWarn),
|
|
CooldownSeconds: derefIntDefault(in.CooldownSeconds, 300),
|
|
Enabled: in.Enabled == nil || *in.Enabled,
|
|
}
|
|
if msg := validateMetricAlertInput(rule); msg != "" {
|
|
respondError(w, http.StatusBadRequest, msg)
|
|
return
|
|
}
|
|
out, err := s.store.CreateMetricAlertRule(rule)
|
|
if err != nil {
|
|
if isMetricAlertValidationErr(err) {
|
|
respondError(w, http.StatusBadRequest, err.Error())
|
|
return
|
|
}
|
|
respondError(w, http.StatusInternalServerError, "create metric alert rule")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusCreated, out)
|
|
}
|
|
|
|
// updateMetricAlertRule handles PATCH /api/metric-alert-rules/{id}.
|
|
// workload_id is immutable; name/metric/comparator/threshold/severity/
|
|
// cooldown/enabled are individually overridable.
|
|
func (s *Server) updateMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := parseMetricAlertRuleID(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
existing, err := s.store.GetMetricAlertRule(id)
|
|
if err != nil {
|
|
mapStoreError(w, err, "metric alert rule")
|
|
return
|
|
}
|
|
var in metricAlertRuleInput
|
|
if !decodeJSON(w, r, &in) {
|
|
return
|
|
}
|
|
if in.Name != nil {
|
|
existing.Name = *in.Name
|
|
}
|
|
if in.Metric != nil && *in.Metric != "" {
|
|
existing.Metric = *in.Metric
|
|
}
|
|
if in.Comparator != nil && *in.Comparator != "" {
|
|
existing.Comparator = *in.Comparator
|
|
}
|
|
if in.Threshold != nil {
|
|
existing.Threshold = *in.Threshold
|
|
}
|
|
if in.Severity != nil && *in.Severity != "" {
|
|
existing.Severity = *in.Severity
|
|
}
|
|
if in.CooldownSeconds != nil {
|
|
existing.CooldownSeconds = *in.CooldownSeconds
|
|
}
|
|
if in.Enabled != nil {
|
|
existing.Enabled = *in.Enabled
|
|
}
|
|
if msg := validateMetricAlertInput(existing); msg != "" {
|
|
respondError(w, http.StatusBadRequest, msg)
|
|
return
|
|
}
|
|
out, err := s.store.UpdateMetricAlertRule(existing)
|
|
if err != nil {
|
|
if errors.Is(err, store.ErrNotFound) {
|
|
respondNotFound(w, "metric alert rule")
|
|
return
|
|
}
|
|
if isMetricAlertValidationErr(err) {
|
|
respondError(w, http.StatusBadRequest, err.Error())
|
|
return
|
|
}
|
|
respondError(w, http.StatusInternalServerError, "update metric alert rule")
|
|
return
|
|
}
|
|
respondJSON(w, http.StatusOK, out)
|
|
}
|
|
|
|
// deleteMetricAlertRule handles DELETE /api/metric-alert-rules/{id}.
|
|
func (s *Server) deleteMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
|
id, ok := parseMetricAlertRuleID(w, r)
|
|
if !ok {
|
|
return
|
|
}
|
|
if err := s.store.DeleteMetricAlertRule(id); err != nil {
|
|
mapStoreError(w, err, "metric alert rule")
|
|
return
|
|
}
|
|
w.WriteHeader(http.StatusNoContent)
|
|
}
|
|
|
|
// validateMetricAlertInput does boundary validation so we return a
|
|
// clear 400 before hitting the store. The store re-validates the same
|
|
// invariants as a backstop.
|
|
func validateMetricAlertInput(rule store.MetricAlertRule) string {
|
|
if strings.TrimSpace(rule.Name) == "" {
|
|
return "name is required"
|
|
}
|
|
switch rule.Metric {
|
|
case store.MetricCPUPercent, store.MetricMemoryPercent, store.MetricMemoryBytes:
|
|
default:
|
|
return "invalid metric: must be cpu_percent, memory_percent, or memory_bytes"
|
|
}
|
|
switch rule.Comparator {
|
|
case store.MetricComparatorGT, store.MetricComparatorLT:
|
|
default:
|
|
return "invalid comparator: must be gt or lt"
|
|
}
|
|
switch rule.Severity {
|
|
case store.LogScanSeverityInfo, store.LogScanSeverityWarn, store.LogScanSeverityError, "":
|
|
default:
|
|
return "invalid severity: must be info, warn, or error"
|
|
}
|
|
if rule.CooldownSeconds < 0 {
|
|
return "cooldown_seconds must be >= 0"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// isMetricAlertValidationErr maps the store's validation errors to 400
|
|
// rather than 500 without leaking driver text.
|
|
func isMetricAlertValidationErr(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
msg := err.Error()
|
|
for _, needle := range []string{
|
|
"name is required",
|
|
"invalid metric",
|
|
"invalid comparator",
|
|
"invalid severity",
|
|
"cooldown_seconds must be",
|
|
} {
|
|
if strings.Contains(msg, needle) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func parseMetricAlertRuleID(w http.ResponseWriter, r *http.Request) (int64, bool) {
|
|
raw := chi.URLParam(r, "id")
|
|
id, err := strconv.ParseInt(raw, 10, 64)
|
|
if err != nil || id <= 0 {
|
|
respondError(w, http.StatusBadRequest, "invalid rule id")
|
|
return 0, false
|
|
}
|
|
return id, true
|
|
}
|
|
|
|
func derefFloat64(p *float64) float64 {
|
|
if p == nil {
|
|
return 0
|
|
}
|
|
return *p
|
|
}
|