// Package metrics provides a minimal Prometheus text-format exposition // of Tinyforge's operational counters. We deliberately do NOT import the // official client_golang library: the metrics set here is small, the text // format is simple, and avoiding the dependency keeps `tinyforge` a fast // single-binary install. // // Every counter is a sync/atomic.Int64 — cheap, lock-free, and safe to // touch from any goroutine. Histograms / gauges aren't modeled yet; the // few we need (request latency p50/p99) live downstream of slog and can // be added when the operator actually wants them. package metrics import ( "fmt" "io" "log/slog" "sort" "strings" "sync" "sync/atomic" ) // Registry holds the process-wide counter set. A single zero-value // Registry is ready to use — see DefaultRegistry below for the // recommended way to grab the global handle. type Registry struct { mu sync.RWMutex counters map[string]*counter } type counter struct { name string help string labels []string // label names, ordered as declared at registration series map[string]*atomic.Int64 // seriesMu only protects insertion of new label tuples — increments // on existing tuples are lock-free via the atomic. seriesMu sync.Mutex } // DefaultRegistry is the process-wide registry. All Tinyforge metrics // register against it. Tests can instantiate their own Registry. var DefaultRegistry = newRegistry() func newRegistry() *Registry { return &Registry{counters: make(map[string]*counter)} } // NewCounter declares a counter on the default registry. Call once at // package init or during NewServer; subsequent calls with the same name // return the existing counter so re-registration is safe. // // label names define the dimensions; calls to Inc must pass values in // the same order. Use the empty slice for label-less counters. func NewCounter(name, help string, labels ...string) *Counter { return DefaultRegistry.NewCounter(name, help, labels...) } // NewCounter on a specific Registry — useful in tests. func (r *Registry) NewCounter(name, help string, labels ...string) *Counter { r.mu.Lock() defer r.mu.Unlock() if c, ok := r.counters[name]; ok { return &Counter{c: c} } c := &counter{ name: name, help: help, labels: append([]string(nil), labels...), series: make(map[string]*atomic.Int64), } r.counters[name] = c return &Counter{c: c} } // Counter is the public handle returned by NewCounter. Pass it around as // a value — the underlying state lives on the registry. type Counter struct { c *counter } // Inc atomically increments the counter for the given label values. // Passing the wrong number of values is a programmer error; we surface // it as a panic during testing rather than silently aggregating into a // bogus series. func (c Counter) Inc(labelValues ...string) { c.Add(1, labelValues...) } // Add atomically adds delta. Negative delta is rejected (counters are // monotonic by definition). func (c Counter) Add(delta int64, labelValues ...string) { if delta < 0 { return } if len(labelValues) != len(c.c.labels) { // Programmer error. This used to panic to surface the bug, but Add // runs on hot paths (HTTP middleware, deploy dispatch) and several // callers are off the request goroutine, where a panic would take // down the whole process rather than a single request. Log loudly // and drop the sample so a mislabeled call site can never crash the // server; the bug still shows up immediately in the logs and in // tests via the error output. slog.Error("metrics: label count mismatch — dropping sample", "counter", c.c.name, "want", len(c.c.labels), "got", len(labelValues)) return } key := encodeKey(labelValues) c.c.seriesMu.Lock() v, ok := c.c.series[key] if !ok { v = new(atomic.Int64) c.c.series[key] = v } c.c.seriesMu.Unlock() v.Add(delta) } // encodeKey joins label values with a 0x1f separator. Prometheus label // values may contain anything except `"` and `\n`, which we escape on // exposition only — the key here is just a map index. func encodeKey(values []string) string { return strings.Join(values, "\x1f") } // WritePrometheus dumps the registry in the text exposition format // Prometheus / VictoriaMetrics / OpenMetrics understands. Stable // ordering: counters alphabetical by name; series alphabetical by // encoded label tuple. func (r *Registry) WritePrometheus(w io.Writer) error { r.mu.RLock() names := make([]string, 0, len(r.counters)) for n := range r.counters { names = append(names, n) } r.mu.RUnlock() sort.Strings(names) for _, name := range names { r.mu.RLock() c := r.counters[name] r.mu.RUnlock() if err := writeCounter(w, c); err != nil { return err } } return nil } func writeCounter(w io.Writer, c *counter) error { if _, err := fmt.Fprintf(w, "# HELP %s %s\n# TYPE %s counter\n", c.name, escapeHelp(c.help), c.name); err != nil { return err } // Snapshot the series map under a SINGLE lock acquisition. The // previous shape acquired+released seriesMu twice per emitted // series (once for the key list, once per Load), contending with // every hot-path Inc on the HTTP request path. The *atomic.Int64 // pointers are stable for the lifetime of the registry (we never // delete entries), so reading them after the unlock is safe. type sample struct { key string val *atomic.Int64 } c.seriesMu.Lock() samples := make([]sample, 0, len(c.series)) for k, v := range c.series { samples = append(samples, sample{k, v}) } c.seriesMu.Unlock() sort.Slice(samples, func(i, j int) bool { return samples[i].key < samples[j].key }) for _, s := range samples { val := s.val.Load() labels := decodeKey(s.key, c.labels) if labels == "" { if _, err := fmt.Fprintf(w, "%s %d\n", c.name, val); err != nil { return err } continue } if _, err := fmt.Fprintf(w, "%s{%s} %d\n", c.name, labels, val); err != nil { return err } } return nil } func decodeKey(key string, names []string) string { if key == "" || len(names) == 0 { return "" } values := strings.Split(key, "\x1f") if len(values) != len(names) { // Should not happen — encodeKey/decode are symmetric. return "" } parts := make([]string, len(names)) for i, n := range names { parts[i] = fmt.Sprintf(`%s="%s"`, n, escapeLabelValue(values[i])) } return strings.Join(parts, ",") } func escapeHelp(s string) string { r := strings.NewReplacer("\\", "\\\\", "\n", "\\n") return r.Replace(s) } func escapeLabelValue(s string) string { r := strings.NewReplacer("\\", "\\\\", "\n", "\\n", `"`, `\"`) return r.Replace(s) } // ── Pre-declared counters ──────────────────────────────────────────── // // These are the counters Tinyforge surfaces to operators. Adding more is // a one-line NewCounter call at the call site — no central catalogue, // just keep names lowercase_snake with the `tinyforge_` prefix. var ( HTTPRequestsTotal = NewCounter( "tinyforge_http_requests_total", "Total HTTP requests handled, partitioned by method and outcome class.", "method", "status_class", ) DeploysTotal = NewCounter( "tinyforge_deploys_total", "Total deploys dispatched, partitioned by source kind and outcome.", "source_kind", "outcome", ) WebhookDeliveriesTotal = NewCounter( "tinyforge_webhook_deliveries_total", "Total inbound webhook deliveries, partitioned by outcome.", "outcome", ) SchedulerTicksTotal = NewCounter( "tinyforge_scheduler_ticks_total", "Total scheduler ticks. The dispatched counter is the success measure.", ) SchedulerDispatchedTotal = NewCounter( "tinyforge_scheduler_dispatched_total", "Triggers actually dispatched by the scheduler.", ) OutboundNotifyTotal = NewCounter( "tinyforge_outbound_notify_total", "Outbound notification dispatch attempts, partitioned by outcome.", "outcome", ) )