410a131cec
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
+ {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
/apps/[id] edit form onto the same components (removes the duplication). Add
vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
label hints; dashboard + /apps "Total workloads" count only source_kind workloads
(drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.
Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
251 lines
7.8 KiB
Go
251 lines
7.8 KiB
Go
// Package metrics provides a minimal Prometheus text-format exposition
|
|
// of Tinyforge's operational counters. We deliberately do NOT import the
|
|
// official client_golang library: the metrics set here is small, the text
|
|
// format is simple, and avoiding the dependency keeps `tinyforge` a fast
|
|
// single-binary install.
|
|
//
|
|
// Every counter is a sync/atomic.Int64 — cheap, lock-free, and safe to
|
|
// touch from any goroutine. Histograms / gauges aren't modeled yet; the
|
|
// few we need (request latency p50/p99) live downstream of slog and can
|
|
// be added when the operator actually wants them.
|
|
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
)
|
|
|
|
// Registry holds the process-wide counter set. A single zero-value
|
|
// Registry is ready to use — see DefaultRegistry below for the
|
|
// recommended way to grab the global handle.
|
|
type Registry struct {
|
|
mu sync.RWMutex
|
|
counters map[string]*counter
|
|
}
|
|
|
|
type counter struct {
|
|
name string
|
|
help string
|
|
labels []string // label names, ordered as declared at registration
|
|
series map[string]*atomic.Int64
|
|
// seriesMu only protects insertion of new label tuples — increments
|
|
// on existing tuples are lock-free via the atomic.
|
|
seriesMu sync.Mutex
|
|
}
|
|
|
|
// DefaultRegistry is the process-wide registry. All Tinyforge metrics
|
|
// register against it. Tests can instantiate their own Registry.
|
|
var DefaultRegistry = newRegistry()
|
|
|
|
func newRegistry() *Registry {
|
|
return &Registry{counters: make(map[string]*counter)}
|
|
}
|
|
|
|
// NewCounter declares a counter on the default registry. Call once at
|
|
// package init or during NewServer; subsequent calls with the same name
|
|
// return the existing counter so re-registration is safe.
|
|
//
|
|
// label names define the dimensions; calls to Inc must pass values in
|
|
// the same order. Use the empty slice for label-less counters.
|
|
func NewCounter(name, help string, labels ...string) *Counter {
|
|
return DefaultRegistry.NewCounter(name, help, labels...)
|
|
}
|
|
|
|
// NewCounter on a specific Registry — useful in tests.
|
|
func (r *Registry) NewCounter(name, help string, labels ...string) *Counter {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
if c, ok := r.counters[name]; ok {
|
|
return &Counter{c: c}
|
|
}
|
|
c := &counter{
|
|
name: name,
|
|
help: help,
|
|
labels: append([]string(nil), labels...),
|
|
series: make(map[string]*atomic.Int64),
|
|
}
|
|
r.counters[name] = c
|
|
return &Counter{c: c}
|
|
}
|
|
|
|
// Counter is the public handle returned by NewCounter. Pass it around as
|
|
// a value — the underlying state lives on the registry.
|
|
type Counter struct {
|
|
c *counter
|
|
}
|
|
|
|
// Inc atomically increments the counter for the given label values.
|
|
// Passing the wrong number of values is a programmer error; we surface
|
|
// it as a panic during testing rather than silently aggregating into a
|
|
// bogus series.
|
|
func (c Counter) Inc(labelValues ...string) {
|
|
c.Add(1, labelValues...)
|
|
}
|
|
|
|
// Add atomically adds delta. Negative delta is rejected (counters are
|
|
// monotonic by definition).
|
|
func (c Counter) Add(delta int64, labelValues ...string) {
|
|
if delta < 0 {
|
|
return
|
|
}
|
|
if len(labelValues) != len(c.c.labels) {
|
|
// Programmer error. This used to panic to surface the bug, but Add
|
|
// runs on hot paths (HTTP middleware, deploy dispatch) and several
|
|
// callers are off the request goroutine, where a panic would take
|
|
// down the whole process rather than a single request. Log loudly
|
|
// and drop the sample so a mislabeled call site can never crash the
|
|
// server; the bug still shows up immediately in the logs and in
|
|
// tests via the error output.
|
|
slog.Error("metrics: label count mismatch — dropping sample",
|
|
"counter", c.c.name, "want", len(c.c.labels), "got", len(labelValues))
|
|
return
|
|
}
|
|
key := encodeKey(labelValues)
|
|
c.c.seriesMu.Lock()
|
|
v, ok := c.c.series[key]
|
|
if !ok {
|
|
v = new(atomic.Int64)
|
|
c.c.series[key] = v
|
|
}
|
|
c.c.seriesMu.Unlock()
|
|
v.Add(delta)
|
|
}
|
|
|
|
// encodeKey joins label values with a 0x1f separator. Prometheus label
|
|
// values may contain anything except `"` and `\n`, which we escape on
|
|
// exposition only — the key here is just a map index.
|
|
func encodeKey(values []string) string {
|
|
return strings.Join(values, "\x1f")
|
|
}
|
|
|
|
// WritePrometheus dumps the registry in the text exposition format
|
|
// Prometheus / VictoriaMetrics / OpenMetrics understands. Stable
|
|
// ordering: counters alphabetical by name; series alphabetical by
|
|
// encoded label tuple.
|
|
func (r *Registry) WritePrometheus(w io.Writer) error {
|
|
r.mu.RLock()
|
|
names := make([]string, 0, len(r.counters))
|
|
for n := range r.counters {
|
|
names = append(names, n)
|
|
}
|
|
r.mu.RUnlock()
|
|
sort.Strings(names)
|
|
|
|
for _, name := range names {
|
|
r.mu.RLock()
|
|
c := r.counters[name]
|
|
r.mu.RUnlock()
|
|
if err := writeCounter(w, c); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func writeCounter(w io.Writer, c *counter) error {
|
|
if _, err := fmt.Fprintf(w, "# HELP %s %s\n# TYPE %s counter\n", c.name, escapeHelp(c.help), c.name); err != nil {
|
|
return err
|
|
}
|
|
// Snapshot the series map under a SINGLE lock acquisition. The
|
|
// previous shape acquired+released seriesMu twice per emitted
|
|
// series (once for the key list, once per Load), contending with
|
|
// every hot-path Inc on the HTTP request path. The *atomic.Int64
|
|
// pointers are stable for the lifetime of the registry (we never
|
|
// delete entries), so reading them after the unlock is safe.
|
|
type sample struct {
|
|
key string
|
|
val *atomic.Int64
|
|
}
|
|
c.seriesMu.Lock()
|
|
samples := make([]sample, 0, len(c.series))
|
|
for k, v := range c.series {
|
|
samples = append(samples, sample{k, v})
|
|
}
|
|
c.seriesMu.Unlock()
|
|
|
|
sort.Slice(samples, func(i, j int) bool { return samples[i].key < samples[j].key })
|
|
|
|
for _, s := range samples {
|
|
val := s.val.Load()
|
|
labels := decodeKey(s.key, c.labels)
|
|
if labels == "" {
|
|
if _, err := fmt.Fprintf(w, "%s %d\n", c.name, val); err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
if _, err := fmt.Fprintf(w, "%s{%s} %d\n", c.name, labels, val); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func decodeKey(key string, names []string) string {
|
|
if key == "" || len(names) == 0 {
|
|
return ""
|
|
}
|
|
values := strings.Split(key, "\x1f")
|
|
if len(values) != len(names) {
|
|
// Should not happen — encodeKey/decode are symmetric.
|
|
return ""
|
|
}
|
|
parts := make([]string, len(names))
|
|
for i, n := range names {
|
|
parts[i] = fmt.Sprintf(`%s="%s"`, n, escapeLabelValue(values[i]))
|
|
}
|
|
return strings.Join(parts, ",")
|
|
}
|
|
|
|
func escapeHelp(s string) string {
|
|
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n")
|
|
return r.Replace(s)
|
|
}
|
|
|
|
func escapeLabelValue(s string) string {
|
|
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n", `"`, `\"`)
|
|
return r.Replace(s)
|
|
}
|
|
|
|
// ── Pre-declared counters ────────────────────────────────────────────
|
|
//
|
|
// These are the counters Tinyforge surfaces to operators. Adding more is
|
|
// a one-line NewCounter call at the call site — no central catalogue,
|
|
// just keep names lowercase_snake with the `tinyforge_` prefix.
|
|
|
|
var (
|
|
HTTPRequestsTotal = NewCounter(
|
|
"tinyforge_http_requests_total",
|
|
"Total HTTP requests handled, partitioned by method and outcome class.",
|
|
"method", "status_class",
|
|
)
|
|
DeploysTotal = NewCounter(
|
|
"tinyforge_deploys_total",
|
|
"Total deploys dispatched, partitioned by source kind and outcome.",
|
|
"source_kind", "outcome",
|
|
)
|
|
WebhookDeliveriesTotal = NewCounter(
|
|
"tinyforge_webhook_deliveries_total",
|
|
"Total inbound webhook deliveries, partitioned by outcome.",
|
|
"outcome",
|
|
)
|
|
SchedulerTicksTotal = NewCounter(
|
|
"tinyforge_scheduler_ticks_total",
|
|
"Total scheduler ticks. The dispatched counter is the success measure.",
|
|
)
|
|
SchedulerDispatchedTotal = NewCounter(
|
|
"tinyforge_scheduler_dispatched_total",
|
|
"Triggers actually dispatched by the scheduler.",
|
|
)
|
|
OutboundNotifyTotal = NewCounter(
|
|
"tinyforge_outbound_notify_total",
|
|
"Outbound notification dispatch attempts, partitioned by outcome.",
|
|
"outcome",
|
|
)
|
|
)
|