feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
+ {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
/apps/[id] edit form onto the same components (removes the duplication). Add
vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
label hints; dashboard + /apps "Total workloads" count only source_kind workloads
(drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.
Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
This commit is contained in:
@@ -16,13 +16,12 @@ import (
|
||||
)
|
||||
|
||||
// rateLimitedLogin wraps the login handler with per-IP rate limiting.
|
||||
// Uses clientIP() so X-Forwarded-For is honored only when the request
|
||||
// arrives from a configured trusted-proxy CIDR — preventing remote
|
||||
// attackers from spoofing the header to bypass the per-IP login limiter.
|
||||
func (s *Server) rateLimitedLogin(rl *rateLimiter) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
ip := r.RemoteAddr
|
||||
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
|
||||
ip = fwd
|
||||
}
|
||||
if !rl.allow(ip) {
|
||||
if !rl.allow(clientIP(r)) {
|
||||
respondError(w, http.StatusTooManyRequests, "too many login attempts, try again later")
|
||||
return
|
||||
}
|
||||
|
||||
+73
-32
@@ -1,7 +1,6 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -118,7 +117,22 @@ func (s *Server) deleteBackup(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// restoreBackup handles POST /api/backups/{id}/restore.
|
||||
// This replaces the current database with the backup and triggers a graceful shutdown.
|
||||
//
|
||||
// Restore happens in three documented stages so a failure at any stage
|
||||
// leaves the live DB intact:
|
||||
//
|
||||
// 1. PRE-FLIGHT (sync, before the HTTP response): PrepareRestore opens
|
||||
// the candidate read-only and runs `PRAGMA integrity_check`. If it
|
||||
// fails the live DB is untouched and we return 400 with the reason.
|
||||
//
|
||||
// 2. SAFETY NET: a pre-restore backup of the LIVE DB is created so the
|
||||
// operator can roll back even if the candidate is later discovered
|
||||
// to be missing data.
|
||||
//
|
||||
// 3. SWAP (async, after the response is flushed): close the live DB,
|
||||
// atomic-rename the candidate over the live path, wipe WAL/SHM,
|
||||
// trigger graceful shutdown. supervisord / systemd / docker
|
||||
// restart=on-failure brings the process back with the new DB.
|
||||
func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
if s.backupEngine == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "backup engine not initialized")
|
||||
@@ -126,13 +140,44 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
id := chi.URLParam(r, "id")
|
||||
restorePath, err := s.backupEngine.RestorePath(id)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusNotFound, "backup not found: "+err.Error())
|
||||
|
||||
// CSRF / accidental-fire guard: the restore endpoint is the most
|
||||
// destructive surface in the API (replaces the whole DB). Even
|
||||
// though it sits behind AdminOnly + Bearer JWT, a blind cross-site
|
||||
// POST or a misclicked button in any open admin tab can fire it.
|
||||
// Require the operator's client to echo X-Confirm-Restore: <id>
|
||||
// — matching the path param — so a CSRF post-form / image-src
|
||||
// trick can't trigger restore (browsers don't let cross-origin
|
||||
// requests set custom headers without a preflight).
|
||||
if confirm := r.Header.Get("X-Confirm-Restore"); confirm != id {
|
||||
respondError(w, http.StatusBadRequest,
|
||||
"missing or mismatched X-Confirm-Restore header (must equal backup id)")
|
||||
return
|
||||
}
|
||||
|
||||
// Create a safety backup before restore so the user can undo if needed.
|
||||
// Single-flight guard: a rapid double-click would otherwise spawn
|
||||
// two goroutines racing s.store.Close() and the candidate-over-
|
||||
// live rename. CAS to true here; if someone else won, return 409.
|
||||
if !s.restoreInFlight.CompareAndSwap(false, true) {
|
||||
respondError(w, http.StatusConflict, "a restore is already in progress")
|
||||
return
|
||||
}
|
||||
// Do NOT release the flag — the restore path triggers shutdown.
|
||||
// A failed restore is also terminal (the DB may be closed); a
|
||||
// fresh process boot is the recovery path.
|
||||
// PRE-FLIGHT: refuse before touching anything if the candidate is
|
||||
// not a valid SQLite database or fails integrity_check. This is the
|
||||
// guard the prior code lacked — a corrupt backup would silently
|
||||
// overwrite a healthy live DB.
|
||||
restorePath, err := s.backupEngine.PrepareRestore(id)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// SAFETY NET: pre-restore snapshot of the live DB. A failure here
|
||||
// is logged but does not abort — the integrity-checked candidate
|
||||
// is still safer than refusing to restore.
|
||||
if _, err := s.backupEngine.CreateBackup("pre-restore"); err != nil {
|
||||
slog.Warn("failed to create pre-restore backup", "error", err)
|
||||
}
|
||||
@@ -153,41 +198,37 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
go func() {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// Close the current database to release locks.
|
||||
// Once we begin closing the live DB the process can no longer serve
|
||||
// requests against a sane store, so EVERY exit path from here must
|
||||
// trigger shutdown. Returning early would leave the server limping
|
||||
// on a closed/half-swapped database with no path to recovery except
|
||||
// an external kill. shutdownFunc → graceful shutdown → main returns
|
||||
// → deferred releaseLock()/db.Close() run, and the supervisor reopens
|
||||
// whatever DB is on disk on the next boot.
|
||||
triggerShutdown := func() {
|
||||
if s.shutdownFunc != nil {
|
||||
s.shutdownFunc()
|
||||
}
|
||||
}
|
||||
|
||||
// Close the current database to release locks. AtomicReplaceDB
|
||||
// expects the live file to be unmapped before swap (especially
|
||||
// important on Windows where open files cannot be renamed over).
|
||||
if err := s.store.Close(); err != nil {
|
||||
slog.Error("restore: failed to close database", "error", err)
|
||||
slog.Error("restore: failed to close database, restarting", "error", err)
|
||||
triggerShutdown()
|
||||
return
|
||||
}
|
||||
|
||||
// Copy the backup file over the main database using streaming (no full read into memory).
|
||||
src, err := os.Open(restorePath)
|
||||
if err != nil {
|
||||
slog.Error("restore: failed to open backup file", "error", err)
|
||||
if err := s.backupEngine.AtomicReplaceDB(restorePath, s.dbPath); err != nil {
|
||||
slog.Error("restore: atomic replace failed, restarting", "error", err)
|
||||
triggerShutdown()
|
||||
return
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
dst, err := os.Create(s.dbPath)
|
||||
if err != nil {
|
||||
slog.Error("restore: failed to create database file", "error", err)
|
||||
return
|
||||
}
|
||||
defer dst.Close()
|
||||
|
||||
if _, err := io.Copy(dst, src); err != nil {
|
||||
slog.Error("restore: failed to copy backup to database", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Remove WAL and SHM files to ensure clean state.
|
||||
os.Remove(s.dbPath + "-wal")
|
||||
os.Remove(s.dbPath + "-shm")
|
||||
|
||||
slog.Info("restore: database replaced, triggering shutdown")
|
||||
|
||||
// Signal the server to shut down gracefully so it can be restarted.
|
||||
if s.shutdownFunc != nil {
|
||||
s.shutdownFunc()
|
||||
}
|
||||
triggerShutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/docker"
|
||||
"github.com/alexei/tinyforge/internal/staticsite"
|
||||
)
|
||||
|
||||
@@ -350,6 +351,54 @@ func (s *Server) listImageConflicts(w http.ResponseWriter, r *http.Request) {
|
||||
respondJSON(w, http.StatusOK, conflicts)
|
||||
}
|
||||
|
||||
// inspectImageRequest is the body for POST /api/discovery/image/inspect.
|
||||
type inspectImageRequest struct {
|
||||
Image string `json:"image"`
|
||||
}
|
||||
|
||||
// inspectImageResponse mirrors the frontend InspectResult shape the
|
||||
// new-app wizard pre-fills from: the first exposed port (parsed to int,
|
||||
// 0 when none) and the image's HEALTHCHECK command string.
|
||||
type inspectImageResponse struct {
|
||||
Port int `json:"port"`
|
||||
Healthcheck string `json:"healthcheck"`
|
||||
}
|
||||
|
||||
// inspectImageMetadata inspects a LOCAL image and returns its first
|
||||
// exposed port + healthcheck so the wizard can pre-fill those fields.
|
||||
// POST /api/discovery/image/inspect.
|
||||
//
|
||||
// This inspects local images only — it does not pull. When the image is
|
||||
// not present locally the docker call fails; we return a generic,
|
||||
// non-leaky 400 rather than the git-specific upstreamError so a raw
|
||||
// docker daemon string (which may echo the ref) never reaches the client.
|
||||
func (s *Server) inspectImageMetadata(w http.ResponseWriter, r *http.Request) {
|
||||
var req inspectImageRequest
|
||||
if !decodeJSON(w, r, &req) {
|
||||
return
|
||||
}
|
||||
image := strings.TrimSpace(req.Image)
|
||||
if image == "" {
|
||||
respondError(w, http.StatusBadRequest, "image is required")
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(r.Context(), discoveryTimeout)
|
||||
defer cancel()
|
||||
|
||||
info, err := s.docker.InspectImage(ctx, image)
|
||||
if err != nil {
|
||||
slog.Warn("inspect image metadata failed", "error", err)
|
||||
respondError(w, http.StatusBadRequest, "could not inspect image — make sure it is pulled locally and the reference is correct")
|
||||
return
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, inspectImageResponse{
|
||||
Port: docker.ExtractPort(info.ExposedPorts),
|
||||
Healthcheck: info.Healthcheck,
|
||||
})
|
||||
}
|
||||
|
||||
// stripImageTag returns the image reference with the trailing :tag
|
||||
// removed, taking care to leave a registry port (e.g. registry:5000/foo)
|
||||
// intact. Digest references (image@sha256:...) are returned unchanged.
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
)
|
||||
|
||||
// livez always returns 200 if the process is up. Used by container
|
||||
// orchestrators / load balancers / Docker HEALTHCHECK as the "is the
|
||||
// binary alive" probe. Intentionally does NOT touch the DB or Docker —
|
||||
// a slow DB must not cause restart loops.
|
||||
func (s *Server) livez(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
_, _ = w.Write([]byte("ok\n"))
|
||||
}
|
||||
|
||||
// readyz returns 200 only when the process can actually serve traffic:
|
||||
// SQLite is reachable, the encryption key is loaded, the deployer is
|
||||
// not draining. The response body is intentionally minimal — the
|
||||
// specific failing probe name is recorded in slog (operator-visible)
|
||||
// rather than returned to unauthenticated callers. This avoids handing
|
||||
// reconnaissance to an attacker who can hit /readyz during an outage
|
||||
// ("DB down" vs "encryption key missing" leaks operational state).
|
||||
func (s *Server) readyz(w http.ResponseWriter, r *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// DB ping: cheap and exact — exercises the connection pool, file
|
||||
// lock, and busy-timeout. A failing ping means SQLite WAL is wedged
|
||||
// or the data dir is gone.
|
||||
if err := s.store.DB().PingContext(ctx); err != nil {
|
||||
slog.Warn("readyz: db ping failed", "error", err)
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte("not ready\n"))
|
||||
return
|
||||
}
|
||||
|
||||
// Encryption key sanity: if it's zero we cannot decrypt any stored
|
||||
// secret, so the deployer paths will all explode at first use.
|
||||
if s.encKey == ([32]byte{}) {
|
||||
slog.Warn("readyz: encryption key not loaded")
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
_, _ = w.Write([]byte("not ready\n"))
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
_, _ = w.Write([]byte("ready\n"))
|
||||
}
|
||||
|
||||
// metricsExport writes the process-wide metrics registry in Prometheus
|
||||
// text format. Admin-only by router placement; surface is intentionally
|
||||
// thin (no histograms / quantiles, only counters) to keep the binary
|
||||
// dependency-free.
|
||||
func (s *Server) metricsExport(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
_ = metrics.DefaultRegistry.WritePrometheus(w)
|
||||
}
|
||||
+318
-7
@@ -1,14 +1,119 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
)
|
||||
|
||||
// requestIDKey is the context key under which the generated/forwarded
|
||||
// X-Request-ID is stored. Exported indirectly via RequestIDFromContext
|
||||
// so handlers and services downstream of the API layer can thread it
|
||||
// into their own slog calls without re-extracting from headers.
|
||||
type requestIDKeyType struct{}
|
||||
|
||||
var requestIDKey = requestIDKeyType{}
|
||||
|
||||
// RequestIDFromContext returns the correlation ID for the request, or
|
||||
// "" when called outside the API request path.
|
||||
func RequestIDFromContext(ctx context.Context) string {
|
||||
if v, ok := ctx.Value(requestIDKey).(string); ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// requestID middleware ensures every request has a stable correlation
|
||||
// ID. Honors a caller-supplied X-Request-ID when the request comes from
|
||||
// a trusted proxy AND the value matches a safe character set; otherwise
|
||||
// generates a fresh 128-bit ID. The ID is echoed back as X-Request-ID
|
||||
// and stitched into every subsequent slog call via the context value
|
||||
// the `logging` middleware reads.
|
||||
//
|
||||
// Format clamp: a compromised reverse proxy (or one that mis-parses an
|
||||
// untrusted header) could forward an ID containing newlines, semicolons,
|
||||
// or other separator characters. Those would corrupt structured log
|
||||
// parsers that assume one record per line / key-value. Restricting to
|
||||
// `[A-Za-z0-9._-]{1,64}` covers UUIDs, hex IDs, and trace-context IDs
|
||||
// without any sharp edges.
|
||||
func requestID(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
rid := r.Header.Get("X-Request-ID")
|
||||
if rid == "" || !isTrustedPeer(r) || !isValidRequestID(rid) {
|
||||
rid = newRequestID()
|
||||
}
|
||||
w.Header().Set("X-Request-ID", rid)
|
||||
ctx := context.WithValue(r.Context(), requestIDKey, rid)
|
||||
next.ServeHTTP(w, r.WithContext(ctx))
|
||||
})
|
||||
}
|
||||
|
||||
// isValidRequestID enforces `[A-Za-z0-9._-]{1,64}` without compiling a
|
||||
// regex on the request path. Single linear scan, no allocations.
|
||||
func isValidRequestID(s string) bool {
|
||||
if len(s) == 0 || len(s) > 64 {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
switch {
|
||||
case c >= 'A' && c <= 'Z':
|
||||
case c >= 'a' && c <= 'z':
|
||||
case c >= '0' && c <= '9':
|
||||
case c == '.' || c == '_' || c == '-':
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// isTrustedPeer is a thin wrapper around the TRUSTED_PROXY_CIDRS allow-
|
||||
// list — we honor a forwarded request-id only from upstreams we already
|
||||
// trust for X-Forwarded-For. Otherwise an internet client could spam
|
||||
// log files with attacker-chosen IDs.
|
||||
func isTrustedPeer(r *http.Request) bool {
|
||||
peer := r.RemoteAddr
|
||||
if host, _, err := net.SplitHostPort(peer); err == nil {
|
||||
peer = host
|
||||
}
|
||||
if len(trustedProxyCIDRs) == 0 {
|
||||
return false
|
||||
}
|
||||
ip := net.ParseIP(peer)
|
||||
if ip == nil {
|
||||
return false
|
||||
}
|
||||
for _, n := range trustedProxyCIDRs {
|
||||
if n.Contains(ip) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func newRequestID() string {
|
||||
var b [16]byte
|
||||
if _, err := rand.Read(b[:]); err != nil {
|
||||
// Fall back to time-based suffix if crypto/rand is unavailable
|
||||
// — extremely unlikely outside of broken environments, but the
|
||||
// ID is for tracing not security, so a deterministic fallback
|
||||
// is preferable to a panic.
|
||||
return "ts-" + time.Now().UTC().Format("20060102T150405.000000000")
|
||||
}
|
||||
return hex.EncodeToString(b[:])
|
||||
}
|
||||
|
||||
// logging is an HTTP middleware that logs every request with method, path,
|
||||
// status code, and duration. Webhook URLs are redacted before being logged
|
||||
// because the secret is the only authenticator — leaking it to log
|
||||
@@ -20,15 +125,58 @@ func logging(next http.Handler) http.Handler {
|
||||
|
||||
next.ServeHTTP(wrapped, r)
|
||||
|
||||
slog.Info("http request",
|
||||
fields := []any{
|
||||
"method", r.Method,
|
||||
"path", redactPath(r.URL.Path),
|
||||
"status", wrapped.status,
|
||||
"duration", time.Since(start).String(),
|
||||
)
|
||||
}
|
||||
if rq := redactQuery(r.URL.RawQuery); rq != "" {
|
||||
fields = append(fields, "query", rq)
|
||||
}
|
||||
if rid := RequestIDFromContext(r.Context()); rid != "" {
|
||||
fields = append(fields, "request_id", rid)
|
||||
}
|
||||
slog.Info("http request", fields...)
|
||||
|
||||
// Lightweight per-request counter. Bucket by status class so
|
||||
// the cardinality stays at 5 × #methods regardless of how many
|
||||
// distinct response codes we emit.
|
||||
metrics.HTTPRequestsTotal.Inc(bucketMethod(r.Method), statusClass(wrapped.status))
|
||||
})
|
||||
}
|
||||
|
||||
// bucketMethod normalises HTTP method names against the standard set
|
||||
// so a malicious client cannot spam arbitrary method tokens (RFC 7230
|
||||
// allows any token) and inflate the metrics map. Anything off the
|
||||
// allow-list collapses to "other".
|
||||
func bucketMethod(m string) string {
|
||||
switch m {
|
||||
case "GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS", "CONNECT", "TRACE":
|
||||
return m
|
||||
}
|
||||
return "other"
|
||||
}
|
||||
|
||||
// statusClass buckets a status code into "1xx".."5xx" / "other". Keeps
|
||||
// metrics cardinality bounded so a chatty endpoint can't explode the
|
||||
// metrics map with one series per distinct response code.
|
||||
func statusClass(code int) string {
|
||||
switch {
|
||||
case code >= 100 && code < 200:
|
||||
return "1xx"
|
||||
case code >= 200 && code < 300:
|
||||
return "2xx"
|
||||
case code >= 300 && code < 400:
|
||||
return "3xx"
|
||||
case code >= 400 && code < 500:
|
||||
return "4xx"
|
||||
case code >= 500 && code < 600:
|
||||
return "5xx"
|
||||
}
|
||||
return "other"
|
||||
}
|
||||
|
||||
// redactPath strips secrets from URL paths that carry them in segments.
|
||||
// Only the canonical /api/webhook/triggers/{secret} surface remains after
|
||||
// the hard cutover.
|
||||
@@ -40,6 +188,45 @@ func redactPath(path string) string {
|
||||
return path
|
||||
}
|
||||
|
||||
// redactQueryKeys is the case-insensitive set of query-parameter names whose
|
||||
// values are masked before a URL lands in the request log. `token` is used by
|
||||
// SSE/EventSource when a custom header can't be set; the rest are
|
||||
// defence-in-depth against sensitive values ever appearing in a query string.
|
||||
var redactQueryKeys = map[string]struct{}{
|
||||
"token": {},
|
||||
"secret": {},
|
||||
"password": {},
|
||||
"passwd": {},
|
||||
"api_key": {},
|
||||
"apikey": {},
|
||||
"access_token": {},
|
||||
"client_secret": {},
|
||||
"sig": {},
|
||||
"signature": {},
|
||||
}
|
||||
|
||||
// redactQuery masks the values of sensitive query parameters (see
|
||||
// redactQueryKeys) in a URL's raw query before it lands in the request log.
|
||||
// Key matching is case-insensitive. Returns the input unchanged when there is
|
||||
// nothing to redact so a malformed URL surfaces naturally.
|
||||
func redactQuery(rawQuery string) string {
|
||||
if rawQuery == "" {
|
||||
return ""
|
||||
}
|
||||
parts := strings.Split(rawQuery, "&")
|
||||
for i, p := range parts {
|
||||
eq := strings.IndexByte(p, '=')
|
||||
if eq < 0 {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(p[:eq])
|
||||
if _, ok := redactQueryKeys[key]; ok {
|
||||
parts[i] = p[:eq+1] + "***"
|
||||
}
|
||||
}
|
||||
return strings.Join(parts, "&")
|
||||
}
|
||||
|
||||
// recovery is an HTTP middleware that catches panics and returns a 500 response.
|
||||
func recovery(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -54,16 +241,49 @@ func recovery(next http.Handler) http.Handler {
|
||||
}
|
||||
|
||||
// securityHeaders sets standard security headers on all responses.
|
||||
//
|
||||
// Strict-Transport-Security is emitted only when the request arrived
|
||||
// over HTTPS (direct TLS or forwarded). Emitting HSTS over plain HTTP
|
||||
// is harmless to compliant browsers but flags as an issue in scanners
|
||||
// and confuses some reverse proxies.
|
||||
//
|
||||
// The CSP keeps `'unsafe-inline'` for now because SvelteKit injects
|
||||
// inline boot scripts and styles; removing it requires a nonce-based
|
||||
// strategy threaded through the SvelteKit handle hook. Tracked as a
|
||||
// follow-up; documented in the security report.
|
||||
func securityHeaders(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
w.Header().Set("X-Frame-Options", "DENY")
|
||||
w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
|
||||
w.Header().Set("Content-Security-Policy", "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self'; font-src 'self'")
|
||||
w.Header().Set("Permissions-Policy", "camera=(), microphone=(), geolocation=(), payment=()")
|
||||
w.Header().Set("Content-Security-Policy",
|
||||
"default-src 'self'; "+
|
||||
"script-src 'self' 'unsafe-inline'; "+
|
||||
"style-src 'self' 'unsafe-inline'; "+
|
||||
"img-src 'self' data:; "+
|
||||
"connect-src 'self'; "+
|
||||
"font-src 'self'; "+
|
||||
"frame-ancestors 'none'; "+
|
||||
"base-uri 'self'; "+
|
||||
"form-action 'self'")
|
||||
if isHTTPS(r) {
|
||||
w.Header().Set("Strict-Transport-Security", "max-age=31536000; includeSubDomains")
|
||||
}
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
func isHTTPS(r *http.Request) bool {
|
||||
if r.TLS != nil {
|
||||
return true
|
||||
}
|
||||
if r.Header.Get("X-Forwarded-Proto") == "https" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// cors is an HTTP middleware that handles CORS for same-origin requests.
|
||||
// The frontend is served from the same origin, so cross-origin requests are not expected.
|
||||
func cors(next http.Handler) http.Handler {
|
||||
@@ -164,10 +384,7 @@ func jsonContentType(next http.Handler) http.Handler {
|
||||
func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
ip := r.RemoteAddr
|
||||
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
|
||||
ip = fwd
|
||||
}
|
||||
ip := clientIP(r)
|
||||
if !rl.allow(ip) {
|
||||
respondError(w, http.StatusTooManyRequests, "rate limit exceeded")
|
||||
return
|
||||
@@ -177,6 +394,100 @@ func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
|
||||
}
|
||||
}
|
||||
|
||||
// trustedProxyCIDRs is the parsed allow-list of upstream proxy networks
|
||||
// whose X-Forwarded-For header we honor. Set TRUSTED_PROXY_CIDRS to a
|
||||
// comma-separated list of CIDRs (e.g. "127.0.0.1/32,10.0.0.0/8") to
|
||||
// enable. When unset (the default) X-Forwarded-For is ignored entirely
|
||||
// and rate limiting + audit logging use r.RemoteAddr — preventing a
|
||||
// remote attacker from spoofing the header to bypass per-IP limiters.
|
||||
var trustedProxyCIDRs = parseTrustedProxyCIDRs(os.Getenv("TRUSTED_PROXY_CIDRS"))
|
||||
|
||||
func parseTrustedProxyCIDRs(raw string) []*net.IPNet {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
return nil
|
||||
}
|
||||
var nets []*net.IPNet
|
||||
for _, p := range strings.Split(raw, ",") {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
// Allow bare IPs as /32 (IPv4) or /128 (IPv6).
|
||||
if !strings.Contains(p, "/") {
|
||||
if ip := net.ParseIP(p); ip != nil {
|
||||
if ip.To4() != nil {
|
||||
p += "/32"
|
||||
} else {
|
||||
p += "/128"
|
||||
}
|
||||
}
|
||||
}
|
||||
_, n, err := net.ParseCIDR(p)
|
||||
if err != nil {
|
||||
slog.Warn("ignoring invalid TRUSTED_PROXY_CIDRS entry", "value", p, "error", err)
|
||||
continue
|
||||
}
|
||||
nets = append(nets, n)
|
||||
}
|
||||
return nets
|
||||
}
|
||||
|
||||
// clientIP returns the per-request "client" address used for rate-limit
|
||||
// keying and audit attribution. X-Forwarded-For is honored ONLY when the
|
||||
// direct peer (r.RemoteAddr) belongs to a configured trusted-proxy CIDR;
|
||||
// otherwise the header is ignored to prevent header-spoofing bypasses.
|
||||
func clientIP(r *http.Request) string {
|
||||
peer := r.RemoteAddr
|
||||
if host, _, err := net.SplitHostPort(peer); err == nil {
|
||||
peer = host
|
||||
}
|
||||
if len(trustedProxyCIDRs) == 0 {
|
||||
return peer
|
||||
}
|
||||
peerIP := net.ParseIP(peer)
|
||||
if peerIP == nil || !isTrustedProxy(peerIP) {
|
||||
return peer
|
||||
}
|
||||
fwd := r.Header.Get("X-Forwarded-For")
|
||||
if fwd == "" {
|
||||
return peer
|
||||
}
|
||||
// Walk X-Forwarded-For from the RIGHTMOST entry (the address closest to
|
||||
// us, appended by our trusted peer) leftward, skipping entries that are
|
||||
// themselves trusted proxies, and return the first untrusted address.
|
||||
// The LEFTMOST entry is fully client-controlled — trusting it (as a
|
||||
// naive `fwd[:firstComma]` does) lets an attacker spoof their rate-limit
|
||||
// and audit identity by prepending a forged value, defeating the per-IP
|
||||
// login limiter.
|
||||
parts := strings.Split(fwd, ",")
|
||||
for i := len(parts) - 1; i >= 0; i-- {
|
||||
candidate := strings.TrimSpace(parts[i])
|
||||
ip := net.ParseIP(candidate)
|
||||
if ip == nil {
|
||||
continue
|
||||
}
|
||||
if isTrustedProxy(ip) {
|
||||
continue
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
// Every forwarded entry was a trusted proxy (or unparseable) — fall back
|
||||
// to the direct peer.
|
||||
return peer
|
||||
}
|
||||
|
||||
// isTrustedProxy reports whether ip falls within a configured
|
||||
// trusted-proxy CIDR.
|
||||
func isTrustedProxy(ip net.IP) bool {
|
||||
for _, n := range trustedProxyCIDRs {
|
||||
if n.Contains(ip) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// statusRecorder wraps http.ResponseWriter to capture the status code.
|
||||
type statusRecorder struct {
|
||||
http.ResponseWriter
|
||||
|
||||
+74
-12
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
@@ -61,6 +62,13 @@ type Server struct {
|
||||
shutdownFunc func() // called after restore to trigger graceful shutdown
|
||||
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
|
||||
onProxyProviderChanged func(provider proxy.Provider) // called when proxy provider changes
|
||||
|
||||
// restoreInFlight is a process-wide guard against double-firing
|
||||
// the restore endpoint. A rapid double-click would otherwise
|
||||
// schedule two goroutines racing s.store.Close() and the
|
||||
// candidate-over-live rename. CAS to true at the entry point;
|
||||
// reject the second caller with 409 Conflict.
|
||||
restoreInFlight atomic.Bool
|
||||
}
|
||||
|
||||
// NewServer creates a new API Server with all required dependencies.
|
||||
@@ -157,13 +165,32 @@ func (s *Server) SetDNSProviderChangedCallback(fn DNSProviderChangedFunc) {
|
||||
|
||||
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
|
||||
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
||||
// Decrypt the OIDC client secret if it's encrypted.
|
||||
// Decrypt the OIDC client secret. The prior code did a try-decrypt
|
||||
// and silently treated failures as plaintext — under a rotated key
|
||||
// that sent ciphertext upstream to the OP. Now:
|
||||
// - If the value carries the tf1: envelope → fail loud on
|
||||
// decrypt failure (rotated key / corrupted ciphertext).
|
||||
// - If the value is unprefixed (legacy ciphertext from v0 or true
|
||||
// plaintext from an old migration) → try decrypt; on failure
|
||||
// accept as plaintext (the only safe legacy interpretation).
|
||||
clientSecret := as.OIDCClientSecret
|
||||
if clientSecret != "" {
|
||||
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
|
||||
switch {
|
||||
case crypto.HasEnvelope(clientSecret):
|
||||
decrypted, err := crypto.Decrypt(s.encKey, clientSecret)
|
||||
if err != nil {
|
||||
slog.Error("OIDC client secret could not be decrypted — refusing to initialize provider",
|
||||
"error", err,
|
||||
"hint", "rotate ENCRYPTION_KEY back, OR re-save OIDC settings to re-encrypt with the current key")
|
||||
return
|
||||
}
|
||||
clientSecret = decrypted
|
||||
default:
|
||||
// Legacy v0 value: try decrypt; on failure assume plaintext.
|
||||
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
|
||||
clientSecret = decrypted
|
||||
}
|
||||
}
|
||||
// If decrypt fails, assume it's already plaintext (migration scenario).
|
||||
}
|
||||
provider, err := auth.NewOIDCProvider(ctx, auth.OIDCConfig{
|
||||
IssuerURL: as.OIDCIssuerURL,
|
||||
@@ -183,12 +210,29 @@ func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
||||
func (s *Server) Router() chi.Router {
|
||||
r := chi.NewRouter()
|
||||
|
||||
// Global middleware.
|
||||
// Global middleware. requestID runs first so every downstream log
|
||||
// line (and the access log emitted by `logging`) carries the same
|
||||
// correlation id, plus the response carries it back on the
|
||||
// X-Request-ID header for the operator to grep across services.
|
||||
r.Use(requestID)
|
||||
r.Use(recovery)
|
||||
r.Use(securityHeaders)
|
||||
r.Use(logging)
|
||||
r.Use(cors)
|
||||
|
||||
// Unauthenticated health probes — mounted at the root so container
|
||||
// orchestrators / load balancers can hit them without knowing about
|
||||
// the /api prefix. /livez intentionally does no work and stays
|
||||
// unbounded; /readyz pings the DB and is rate-limited to keep an
|
||||
// unauthenticated flood from serialising behind SQLite's single
|
||||
// writer connection (busy-timeout = 5s) and log-amplifying every
|
||||
// request via the structured access log. The 10-per-minute budget
|
||||
// is the existing rateLimiter default — generous for k8s readiness
|
||||
// probes (typically every 5-10s), restrictive for an attacker.
|
||||
r.Get("/livez", s.livez)
|
||||
readyLimiter := newRateLimiter()
|
||||
r.With(rateLimitMiddleware(readyLimiter)).Get("/readyz", s.readyz)
|
||||
|
||||
loginLimiter := newRateLimiter()
|
||||
webhookLimiter := newRateLimiter()
|
||||
|
||||
@@ -232,6 +276,7 @@ func (s *Server) Router() chi.Router {
|
||||
r.Post("/discovery/git/branches", s.listGitBranches)
|
||||
r.Post("/discovery/git/tree", s.listGitTree)
|
||||
r.Get("/discovery/image/conflicts", s.listImageConflicts)
|
||||
r.Post("/discovery/image/inspect", s.inspectImageMetadata)
|
||||
})
|
||||
|
||||
// Read-only endpoints (any authenticated user).
|
||||
@@ -245,16 +290,18 @@ func (s *Server) Router() chi.Router {
|
||||
r.Get("/events/log/stats", s.getEventLogStats)
|
||||
r.Get("/registries", s.listRegistries)
|
||||
r.Route("/registries/{id}", func(r chi.Router) {
|
||||
// All registry probes are admin-gated. The /tags and
|
||||
// /images endpoints used to be open to any authenticated
|
||||
// user, but they make outbound requests using the
|
||||
// admin-encrypted registry token — a viewer could
|
||||
// effectively drive arbitrary requests against a private
|
||||
// registry under admin credentials.
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Get("/tags/*", s.listRegistryTags)
|
||||
r.Get("/images", s.listRegistryImages)
|
||||
|
||||
// Admin-only registry mutations.
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
r.Put("/", s.updateRegistry)
|
||||
r.Delete("/", s.deleteRegistry)
|
||||
r.Post("/test", s.testRegistry)
|
||||
})
|
||||
r.Put("/", s.updateRegistry)
|
||||
r.Delete("/", s.deleteRegistry)
|
||||
r.Post("/test", s.testRegistry)
|
||||
})
|
||||
r.Get("/settings", s.getSettings)
|
||||
r.Get("/settings/npm-certificates", s.listNpmCertificates)
|
||||
@@ -312,6 +359,15 @@ func (s *Server) Router() chi.Router {
|
||||
// of /triggers/{id}/bindings keyed on the workload side.
|
||||
r.Get("/triggers", s.listBindingsForWorkload)
|
||||
r.With(auth.AdminOnly).Post("/triggers", s.bindTriggerToWorkload)
|
||||
|
||||
// Per-workload notification routes — multi-destination
|
||||
// fan-out (Slack channel + Discord webhook + ...). When
|
||||
// zero rows are configured the dispatcher falls back to
|
||||
// the legacy single-URL columns on the workload row.
|
||||
r.Get("/notifications", s.listWorkloadNotifications)
|
||||
r.With(auth.AdminOnly).Post("/notifications", s.createWorkloadNotification)
|
||||
r.With(auth.AdminOnly).Put("/notifications/{nid}", s.updateWorkloadNotification)
|
||||
r.With(auth.AdminOnly).Delete("/notifications/{nid}", s.deleteWorkloadNotification)
|
||||
})
|
||||
|
||||
// Global container index, joined to workload + app names.
|
||||
@@ -379,6 +435,12 @@ func (s *Server) Router() chi.Router {
|
||||
r.Group(func(r chi.Router) {
|
||||
r.Use(auth.AdminOnly)
|
||||
|
||||
// Prometheus-format metrics export. Admin-only so the
|
||||
// counter cardinality cannot be enumerated by a low-trust
|
||||
// viewer to map internal endpoints / sources / outcomes.
|
||||
// Scrape with bearer auth from your Prometheus job.
|
||||
r.Get("/metrics", s.metricsExport)
|
||||
|
||||
// Config export (reveals registry/global details).
|
||||
r.Get("/config/export", s.exportConfig)
|
||||
|
||||
|
||||
+19
-2
@@ -32,9 +32,26 @@ func (s *Server) streamEvents(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
flusher.Flush()
|
||||
|
||||
// Subscribe to instance status, deploy status, and persistent event log events.
|
||||
// Build logs are high-volume: a single verbose `docker build` can emit
|
||||
// thousands of lines. Streaming them to EVERY connection would flood each
|
||||
// subscriber's bounded bus buffer and evict status/log events for ALL
|
||||
// clients. So build logs are delivered ONLY to connections that opt in
|
||||
// with ?workload_id=<id>, and only for that workload. Connections without
|
||||
// the param (e.g. the global dashboard) never receive build-log frames.
|
||||
buildLogWorkloadID := r.URL.Query().Get("workload_id")
|
||||
sub := s.eventBus.Subscribe(func(evt events.Event) bool {
|
||||
return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus || evt.Type == events.EventLog
|
||||
switch evt.Type {
|
||||
case events.EventInstanceStatus, events.EventDeployStatus, events.EventLog:
|
||||
return true
|
||||
case events.EventBuildLog:
|
||||
if buildLogWorkloadID == "" {
|
||||
return false
|
||||
}
|
||||
p, ok := evt.Payload.(events.BuildLogPayload)
|
||||
return ok && p.WorkloadID == buildLogWorkloadID
|
||||
default:
|
||||
return false
|
||||
}
|
||||
})
|
||||
defer s.eventBus.Unsubscribe(sub)
|
||||
|
||||
|
||||
@@ -89,12 +89,16 @@ func toTriggerViewWithCount(row store.TriggerWithBindingCount) triggerView {
|
||||
// triggerRequest is the create/update body. Config is opaque per kind.
|
||||
// Auto-generates a webhook secret on create when WebhookEnabled is true;
|
||||
// the secret is exposed only via the /webhook subresource.
|
||||
//
|
||||
// WebhookRequireSignature is a *bool so we can distinguish "field omitted
|
||||
// by client" (nil → apply secure default of true when webhook is enabled)
|
||||
// from an explicit opt-out (false → respected).
|
||||
type triggerRequest struct {
|
||||
Kind string `json:"kind"`
|
||||
Name string `json:"name"`
|
||||
Config json.RawMessage `json:"config"`
|
||||
WebhookEnabled bool `json:"webhook_enabled"`
|
||||
WebhookRequireSignature bool `json:"webhook_require_signature"`
|
||||
WebhookRequireSignature *bool `json:"webhook_require_signature,omitempty"`
|
||||
}
|
||||
|
||||
// Same per-blob caps used on the workload pluginWorkloadRequest path —
|
||||
@@ -134,12 +138,26 @@ func (s *Server) getTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
// buildTriggerFromRequest assembles a store.Trigger ready for insert.
|
||||
// Centralized so the standalone create endpoint and the inline-bind
|
||||
// endpoint cannot drift on secret-generation defaults.
|
||||
//
|
||||
// SECURITY: a new trigger with webhook enabled defaults to require_signature
|
||||
// = true. Operators can opt out at create time for receivers that do not
|
||||
// support HMAC, but the safer default avoids the "freshly-created trigger
|
||||
// accepts unsigned posts to its URL" footgun.
|
||||
func buildTriggerFromRequest(req triggerRequest) store.Trigger {
|
||||
// Secure default: if webhook is enabled and the operator did NOT
|
||||
// explicitly set require_signature, force it on. Explicit false is
|
||||
// preserved (legacy receivers without HMAC support still work).
|
||||
requireSig := false
|
||||
if req.WebhookRequireSignature != nil {
|
||||
requireSig = *req.WebhookRequireSignature
|
||||
} else if req.WebhookEnabled {
|
||||
requireSig = true
|
||||
}
|
||||
t := store.Trigger{
|
||||
Kind: req.Kind,
|
||||
Name: strings.TrimSpace(req.Name),
|
||||
Config: string(req.Config),
|
||||
WebhookRequireSignature: req.WebhookRequireSignature,
|
||||
WebhookRequireSignature: requireSig,
|
||||
}
|
||||
if req.WebhookEnabled {
|
||||
t.WebhookSecret = generateWebhookSecret()
|
||||
@@ -199,7 +217,13 @@ func (s *Server) updateTrigger(w http.ResponseWriter, r *http.Request) {
|
||||
if len(req.Config) > 0 {
|
||||
existing.Config = string(req.Config)
|
||||
}
|
||||
existing.WebhookRequireSignature = req.WebhookRequireSignature
|
||||
if req.WebhookRequireSignature != nil {
|
||||
existing.WebhookRequireSignature = *req.WebhookRequireSignature
|
||||
} else if req.WebhookEnabled && !existing.WebhookRequireSignature {
|
||||
// Re-enabling webhook without specifying the signature flag —
|
||||
// take the secure default.
|
||||
existing.WebhookRequireSignature = true
|
||||
}
|
||||
wasEnabled := existing.WebhookSecret != ""
|
||||
if req.WebhookEnabled && !wasEnabled {
|
||||
// false→true transition: rotate both secrets so re-enabling
|
||||
|
||||
@@ -13,18 +13,29 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/auth"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||
)
|
||||
|
||||
// chainNode is the lightweight shape returned by /chain — we deliberately
|
||||
// don't return full plugin.Workload values for ancestor/descendant rows
|
||||
// because the secret fields don't belong in a chain-traversal response.
|
||||
//
|
||||
// IsPreview / PreviewBranch surface branch-preview children to the UI so it
|
||||
// can render them in a dedicated "Preview environments" panel rather than as
|
||||
// undistinguished stage children. They are computed against the chain's
|
||||
// `self` workload via preview.IsPreviewChild — the canonical "this child is a
|
||||
// branch preview" test that reverses the MaterializeForBranch naming formula.
|
||||
// Both are zero-valued (false / "") for the parent and self nodes and for
|
||||
// operator-created stage children.
|
||||
type chainNode struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
SourceKind string `json:"source_kind"`
|
||||
TriggerKind string `json:"trigger_kind"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
SourceKind string `json:"source_kind"`
|
||||
TriggerKind string `json:"trigger_kind"`
|
||||
IsPreview bool `json:"is_preview"`
|
||||
PreviewBranch string `json:"preview_branch,omitempty"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
func chainNodeOf(w store.Workload) chainNode {
|
||||
@@ -38,6 +49,32 @@ func chainNodeOf(w store.Workload) chainNode {
|
||||
}
|
||||
}
|
||||
|
||||
// previewBranchOf extracts the branch a preview child was materialized for
|
||||
// from its source_config (the `branch` key MaterializeForBranch wrote).
|
||||
// Returns "" on a missing/malformed config — the caller only calls this for
|
||||
// rows preview.IsPreviewChild already confirmed, so a blank result just means
|
||||
// the JSON couldn't be decoded.
|
||||
func previewBranchOf(w store.Workload) string {
|
||||
var cfg struct {
|
||||
Branch string `json:"branch"`
|
||||
}
|
||||
if w.SourceConfig != "" {
|
||||
_ = json.Unmarshal([]byte(w.SourceConfig), &cfg)
|
||||
}
|
||||
return cfg.Branch
|
||||
}
|
||||
|
||||
// childChainNode builds a chainNode for a child row, marking it as a branch
|
||||
// preview (and attaching its branch) when it was materialized from `self`.
|
||||
func childChainNode(self, child store.Workload) chainNode {
|
||||
node := chainNodeOf(child)
|
||||
if preview.IsPreviewChild(self, child) {
|
||||
node.IsPreview = true
|
||||
node.PreviewBranch = previewBranchOf(child)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
// getWorkloadChain handles GET /api/workloads/{id}/chain.
|
||||
//
|
||||
// Returns the workload's parent (or nil), itself, and its direct children
|
||||
@@ -76,7 +113,7 @@ func (s *Server) getWorkloadChain(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
children := make([]chainNode, 0, len(childRows))
|
||||
for _, c := range childRows {
|
||||
children = append(children, chainNodeOf(c))
|
||||
children = append(children, childChainNode(self, c))
|
||||
}
|
||||
|
||||
respondJSON(w, http.StatusOK, map[string]any{
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// TestChildChainNode_MarksPreviewChildren verifies the /chain DTO builder
|
||||
// distinguishes branch-preview children (materialized by the preview package)
|
||||
// from operator-created stage children that merely share the parent link.
|
||||
// The discriminator is preview.IsPreviewChild, which reverses the
|
||||
// MaterializeForBranch naming formula: name == template.Name + "/" + slug.
|
||||
func TestChildChainNode_MarksPreviewChildren(t *testing.T) {
|
||||
template := store.Workload{
|
||||
ID: "tmpl-1",
|
||||
Name: "myapp",
|
||||
SourceKind: "dockerfile",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
child store.Workload
|
||||
wantPrev bool
|
||||
wantBranch string
|
||||
}{
|
||||
{
|
||||
name: "preview child is marked with its branch",
|
||||
child: store.Workload{
|
||||
ID: "child-prev",
|
||||
Name: "myapp/feat-login",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"feat/login","port":3000}`,
|
||||
ParentWorkloadID: "tmpl-1",
|
||||
},
|
||||
wantPrev: true,
|
||||
wantBranch: "feat/login",
|
||||
},
|
||||
{
|
||||
name: "operator-named stage child sharing the parent is not a preview",
|
||||
child: store.Workload{
|
||||
ID: "child-stage",
|
||||
Name: "myapp-staging",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"main"}`,
|
||||
ParentWorkloadID: "tmpl-1",
|
||||
},
|
||||
wantPrev: false,
|
||||
wantBranch: "",
|
||||
},
|
||||
{
|
||||
name: "child of a different parent is not a preview of self",
|
||||
child: store.Workload{
|
||||
ID: "child-other",
|
||||
Name: "myapp/feat-login",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"feat/login"}`,
|
||||
ParentWorkloadID: "some-other-template",
|
||||
},
|
||||
wantPrev: false,
|
||||
wantBranch: "",
|
||||
},
|
||||
{
|
||||
name: "child with no branch in source_config is not a preview",
|
||||
child: store.Workload{
|
||||
ID: "child-nobranch",
|
||||
Name: "myapp/feat-login",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{}`,
|
||||
ParentWorkloadID: "tmpl-1",
|
||||
},
|
||||
wantPrev: false,
|
||||
wantBranch: "",
|
||||
},
|
||||
{
|
||||
// Same parent + a valid branch, but the name carries an extra
|
||||
// suffix so it fails ONLY the slug-equality check (expected
|
||||
// "myapp/feat-login", got "myapp/feat-login-staging"). The
|
||||
// branch alone must not be enough to mark a preview.
|
||||
name: "valid branch but name fails the slug match is not a preview",
|
||||
child: store.Workload{
|
||||
ID: "child-slugmiss",
|
||||
Name: "myapp/feat-login-staging",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"feat/login","port":3000}`,
|
||||
ParentWorkloadID: "tmpl-1",
|
||||
},
|
||||
wantPrev: false,
|
||||
wantBranch: "",
|
||||
},
|
||||
{
|
||||
// Uppercase + slash branch: slugifyBranch lowercases and maps
|
||||
// "/" -> "-", so "Feature/Login" -> "feature-login" and the name
|
||||
// "myapp/feature-login" matches. PreviewBranch must echo the RAW
|
||||
// branch from source_config ("Feature/Login"), not the slug.
|
||||
name: "uppercase slash branch matches and keeps raw branch",
|
||||
child: store.Workload{
|
||||
ID: "child-upper",
|
||||
Name: "myapp/feature-login",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"Feature/Login","port":8080}`,
|
||||
ParentWorkloadID: "tmpl-1",
|
||||
},
|
||||
wantPrev: true,
|
||||
wantBranch: "Feature/Login",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
node := childChainNode(template, tc.child)
|
||||
if node.IsPreview != tc.wantPrev {
|
||||
t.Errorf("IsPreview = %v, want %v", node.IsPreview, tc.wantPrev)
|
||||
}
|
||||
if node.PreviewBranch != tc.wantBranch {
|
||||
t.Errorf("PreviewBranch = %q, want %q", node.PreviewBranch, tc.wantBranch)
|
||||
}
|
||||
// Base fields must always round-trip regardless of preview status.
|
||||
if node.ID != tc.child.ID || node.Name != tc.child.Name {
|
||||
t.Errorf("base fields mangled: got id=%q name=%q", node.ID, node.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestPreviewBranchOf_ToleratesMalformedConfig confirms the branch extractor
|
||||
// returns "" rather than panicking on a missing or invalid source_config.
|
||||
func TestPreviewBranchOf_ToleratesMalformedConfig(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
cfg string
|
||||
want string
|
||||
}{
|
||||
{"valid branch", `{"branch":"release/v1"}`, "release/v1"},
|
||||
{"empty config", ``, ""},
|
||||
{"empty object", `{}`, ""},
|
||||
{"malformed json", `{not-json`, ""},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
got := previewBranchOf(store.Workload{SourceConfig: c.cfg})
|
||||
if got != c.want {
|
||||
t.Errorf("previewBranchOf(%q) = %q, want %q", c.cfg, got, c.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/crypto"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// workloadNotificationRow is the JSON shape returned to clients. The
|
||||
// `secret_set` boolean replaces the actual ciphertext: once stored a
|
||||
// secret is write-only, mirroring how workload_env hides encrypted
|
||||
// values. Rotating means submitting a new value.
|
||||
type workloadNotificationRow struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
SecretSet bool `json:"secret_set"`
|
||||
EventTypes string `json:"event_types"`
|
||||
Enabled bool `json:"enabled"`
|
||||
SortOrder int `json:"sort_order"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
func toWorkloadNotificationRow(n store.WorkloadNotification) workloadNotificationRow {
|
||||
return workloadNotificationRow{
|
||||
ID: n.ID,
|
||||
WorkloadID: n.WorkloadID,
|
||||
Name: n.Name,
|
||||
URL: n.URL,
|
||||
SecretSet: n.Secret != "",
|
||||
EventTypes: n.EventTypes,
|
||||
Enabled: n.Enabled,
|
||||
SortOrder: n.SortOrder,
|
||||
CreatedAt: n.CreatedAt,
|
||||
UpdatedAt: n.UpdatedAt,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) listWorkloadNotifications(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get workload")
|
||||
return
|
||||
}
|
||||
rows, err := s.store.ListWorkloadNotifications(id)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusInternalServerError, "list workload notifications")
|
||||
return
|
||||
}
|
||||
out := make([]workloadNotificationRow, 0, len(rows))
|
||||
for _, n := range rows {
|
||||
out = append(out, toWorkloadNotificationRow(n))
|
||||
}
|
||||
respondJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
// workloadNotificationRequest is the POST/PUT body. Secret is the raw
|
||||
// plaintext webhook signing key; the server encrypts it at rest with
|
||||
// the global encryption key before INSERT. An empty Secret on UPDATE
|
||||
// leaves the stored secret untouched so the operator can edit the URL
|
||||
// or event filter without re-entering the secret each time.
|
||||
type workloadNotificationRequest struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
Secret string `json:"secret"`
|
||||
EventTypes string `json:"event_types"`
|
||||
Enabled *bool `json:"enabled"`
|
||||
SortOrder int `json:"sort_order"`
|
||||
}
|
||||
|
||||
func (s *Server) createWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get workload")
|
||||
return
|
||||
}
|
||||
var req workloadNotificationRequest
|
||||
if !decodeJSONStrict(w, r, &req) {
|
||||
return
|
||||
}
|
||||
req.URL = strings.TrimSpace(req.URL)
|
||||
req.Name = strings.TrimSpace(req.Name)
|
||||
if req.URL == "" {
|
||||
respondError(w, http.StatusBadRequest, "url is required")
|
||||
return
|
||||
}
|
||||
encSecret := ""
|
||||
if req.Secret != "" {
|
||||
v, err := crypto.Encrypt(s.encKey, req.Secret)
|
||||
if err != nil {
|
||||
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "encrypt secret")
|
||||
return
|
||||
}
|
||||
encSecret = v
|
||||
}
|
||||
enabled := true
|
||||
if req.Enabled != nil {
|
||||
enabled = *req.Enabled
|
||||
}
|
||||
created, err := s.store.CreateWorkloadNotification(store.WorkloadNotification{
|
||||
WorkloadID: id,
|
||||
Name: req.Name,
|
||||
URL: req.URL,
|
||||
Secret: encSecret,
|
||||
EventTypes: req.EventTypes,
|
||||
Enabled: enabled,
|
||||
SortOrder: req.SortOrder,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("workload notifications: create", "workload", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "create workload notification")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusCreated, toWorkloadNotificationRow(created))
|
||||
}
|
||||
|
||||
func (s *Server) updateWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
nid := chi.URLParam(r, "nid")
|
||||
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get workload")
|
||||
return
|
||||
}
|
||||
existing, err := s.store.GetWorkloadNotification(nid)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get workload_notification")
|
||||
return
|
||||
}
|
||||
if existing.WorkloadID != id {
|
||||
// Route mismatch — the row exists but under a different workload.
|
||||
// Return 404 rather than 403 so we don't leak the existence of
|
||||
// foreign rows to an unauthorised caller.
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
|
||||
var req workloadNotificationRequest
|
||||
if !decodeJSONStrict(w, r, &req) {
|
||||
return
|
||||
}
|
||||
req.URL = strings.TrimSpace(req.URL)
|
||||
req.Name = strings.TrimSpace(req.Name)
|
||||
if req.URL == "" {
|
||||
respondError(w, http.StatusBadRequest, "url is required")
|
||||
return
|
||||
}
|
||||
|
||||
existing.Name = req.Name
|
||||
existing.URL = req.URL
|
||||
existing.EventTypes = req.EventTypes
|
||||
existing.SortOrder = req.SortOrder
|
||||
if req.Enabled != nil {
|
||||
existing.Enabled = *req.Enabled
|
||||
}
|
||||
// Empty Secret on UPDATE preserves the stored ciphertext — explicit
|
||||
// rotation requires sending the new plaintext. This avoids forcing
|
||||
// the operator to re-enter their secret on every URL edit.
|
||||
if req.Secret != "" {
|
||||
v, err := crypto.Encrypt(s.encKey, req.Secret)
|
||||
if err != nil {
|
||||
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "encrypt secret")
|
||||
return
|
||||
}
|
||||
existing.Secret = v
|
||||
}
|
||||
|
||||
if err := s.store.UpdateWorkloadNotification(existing); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
slog.Error("workload notifications: update", "workload", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "update workload notification")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, toWorkloadNotificationRow(existing))
|
||||
}
|
||||
|
||||
func (s *Server) deleteWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||
id := chi.URLParam(r, "id")
|
||||
nid := chi.URLParam(r, "nid")
|
||||
existing, err := s.store.GetWorkloadNotification(nid)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
respondError(w, http.StatusInternalServerError, "get workload_notification")
|
||||
return
|
||||
}
|
||||
if existing.WorkloadID != id {
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
if err := s.store.DeleteWorkloadNotification(nid); err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondNotFound(w, "workload_notification")
|
||||
return
|
||||
}
|
||||
slog.Error("workload notifications: delete", "workload", id, "error", err)
|
||||
respondError(w, http.StatusInternalServerError, "delete workload notification")
|
||||
return
|
||||
}
|
||||
respondJSON(w, http.StatusOK, map[string]any{"success": true})
|
||||
}
|
||||
@@ -82,16 +82,27 @@ func (s *Server) getWorkloadRuntimeState(w http.ResponseWriter, r *http.Request)
|
||||
|
||||
payload := runtimeStatePayload{SourceKind: workload.SourceKind}
|
||||
|
||||
if workload.SourceKind != "static" {
|
||||
// Both static and dockerfile sources persist their runtime state into
|
||||
// containers.extra_json under a deterministic row id. The shapes
|
||||
// match (status / last_commit_sha / last_sync_at / last_error) so the
|
||||
// handler can decode them identically. The suffix differs per source
|
||||
// kind: static uses ":site", dockerfile uses ":dockerfile".
|
||||
var rowSuffix string
|
||||
switch workload.SourceKind {
|
||||
case "static":
|
||||
rowSuffix = ":site"
|
||||
case "dockerfile":
|
||||
rowSuffix = ":dockerfile"
|
||||
default:
|
||||
respondJSON(w, http.StatusOK, payload)
|
||||
return
|
||||
}
|
||||
|
||||
// The static plugin owns one container row per workload at the
|
||||
// deterministic ID <workloadID>:site. A missing row means the
|
||||
// workload has never been deployed — return HasState=false so the
|
||||
// UI can prompt the operator to deploy.
|
||||
row, err := s.store.GetContainerByID(id + ":site")
|
||||
// The owning plugin maintains one container row per workload at the
|
||||
// deterministic ID. A missing row means the workload has never been
|
||||
// deployed — return HasState=false so the UI can prompt the operator
|
||||
// to deploy.
|
||||
row, err := s.store.GetContainerByID(id + rowSuffix)
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
respondJSON(w, http.StatusOK, payload)
|
||||
|
||||
@@ -130,6 +130,13 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
||||
SourceKind: "static",
|
||||
SourceConfig: `{"provider":"gitea"}`,
|
||||
})
|
||||
// Seed a row with a valid extra_json first, then corrupt it via raw
|
||||
// SQL. Prior to the write-side validateExtraJSON guard this test
|
||||
// could pass a malformed string straight to UpsertContainer; the
|
||||
// guard now rejects that at the boundary, which is the correct
|
||||
// behaviour. The reader resilience this test verifies remains
|
||||
// relevant for pre-existing bad rows from upgrades or external
|
||||
// manipulation, so we still produce one via direct SQL.
|
||||
if err := e.store.UpsertContainer(store.Container{
|
||||
ID: wl.ID + ":site",
|
||||
WorkloadID: wl.ID,
|
||||
@@ -137,10 +144,16 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
||||
Host: "local",
|
||||
ContainerID: "abc",
|
||||
State: "running",
|
||||
ExtraJSON: `{this is not json`,
|
||||
ExtraJSON: `{}`,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed: %v", err)
|
||||
}
|
||||
if _, err := e.store.DB().Exec(
|
||||
`UPDATE containers SET extra_json = ? WHERE id = ?`,
|
||||
`{this is not json`, wl.ID+":site",
|
||||
); err != nil {
|
||||
t.Fatalf("corrupt extra_json: %v", err)
|
||||
}
|
||||
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want 200 (decode is non-fatal)", resp.StatusCode)
|
||||
@@ -155,6 +168,57 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetWorkloadRuntimeState_DockerfileSourceDeployed_DecodesExtraJSON(t *testing.T) {
|
||||
e := newAPITestEnv(t)
|
||||
wl, err := e.store.CreateWorkload(store.Workload{
|
||||
Kind: string(store.WorkloadKindProject),
|
||||
Name: "build-app",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"provider":"gitea","port":3000}`,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("seed workload: %v", err)
|
||||
}
|
||||
extra, _ := json.Marshal(map[string]any{
|
||||
"status": "deployed",
|
||||
"last_commit_sha": "deadbeef",
|
||||
"last_sync_at": "2026-05-23T10:00:00Z",
|
||||
"last_error": "",
|
||||
})
|
||||
if err := e.store.UpsertContainer(store.Container{
|
||||
ID: wl.ID + ":dockerfile",
|
||||
WorkloadID: wl.ID,
|
||||
WorkloadKind: string(store.WorkloadKindBuild),
|
||||
Host: "local",
|
||||
ContainerID: "ffeeddcc",
|
||||
State: "running",
|
||||
ExtraJSON: string(extra),
|
||||
}); err != nil {
|
||||
t.Fatalf("seed container: %v", err)
|
||||
}
|
||||
|
||||
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("status = %d, want 200", resp.StatusCode)
|
||||
}
|
||||
var got runtimeStatePayload
|
||||
if errMsg := decodeEnvelope(t, resp, &got); errMsg != "" {
|
||||
t.Fatalf("envelope error: %q", errMsg)
|
||||
}
|
||||
if !got.HasState {
|
||||
t.Fatalf("HasState = false, want true")
|
||||
}
|
||||
if got.SourceKind != "dockerfile" {
|
||||
t.Errorf("SourceKind = %q, want dockerfile", got.SourceKind)
|
||||
}
|
||||
if got.ContainerID != "ffeeddcc" || got.State != "running" {
|
||||
t.Errorf("container fields = (%q,%q), want (ffeeddcc, running)", got.ContainerID, got.State)
|
||||
}
|
||||
if got.Status != "deployed" || got.LastCommitSHA != "deadbeef" {
|
||||
t.Errorf("runtime fields = %+v, want deployed/deadbeef", got)
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// GET /api/workloads/{id}/storage
|
||||
// =============================================================================
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/alexei/tinyforge/internal/auth"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||
)
|
||||
|
||||
// pluginWorkloadRequest is the JSON body accepted by create + update.
|
||||
@@ -227,6 +228,28 @@ func (s *Server) deletePluginWorkload(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Cascade-teardown any branch previews materialized from this workload
|
||||
// so deleting a template does not orphan their containers, proxy routes,
|
||||
// and rows. Operator-managed stage-chain children (which share the same
|
||||
// parent link) are deliberately left alone — only previews are auto-owned
|
||||
// by the template (see preview.IsPreviewChild).
|
||||
if previews, err := preview.ListPreviewChildren(s.store, row); err != nil {
|
||||
slog.Warn("delete workload: list preview children", "workload", id, "error", err)
|
||||
} else {
|
||||
for _, child := range previews {
|
||||
if child.SourceKind != "" {
|
||||
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(child)); err != nil {
|
||||
slog.Warn("delete workload: preview child teardown error",
|
||||
"workload", id, "child", child.ID, "error", err)
|
||||
}
|
||||
}
|
||||
if err := s.store.DeleteWorkload(child.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||
slog.Warn("delete workload: preview child delete error",
|
||||
"workload", id, "child", child.ID, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if row.SourceKind != "" {
|
||||
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(row)); err != nil {
|
||||
slog.Warn("delete workload: teardown error",
|
||||
|
||||
@@ -85,9 +85,15 @@ func (la *LocalAuth) cleanBlacklist() {
|
||||
}
|
||||
}
|
||||
|
||||
// bcryptCost is the work factor used for new password hashes. Bumped from
|
||||
// the library default (10) to 12 so cost grows with hardware. Existing
|
||||
// hashes at lower costs still verify — bcrypt encodes the cost in the
|
||||
// stored hash itself.
|
||||
const bcryptCost = 12
|
||||
|
||||
// HashPassword hashes a plaintext password using bcrypt.
|
||||
func HashPassword(password string) (string, error) {
|
||||
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
|
||||
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcryptCost)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("hash password: %w", err)
|
||||
}
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
_ "modernc.org/sqlite" // read-only candidate inspection via PRAGMA integrity_check
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
@@ -129,6 +133,17 @@ func (e *Engine) RestorePath(id string) (string, error) {
|
||||
return "", fmt.Errorf("get backup: %w", err)
|
||||
}
|
||||
|
||||
// Filename comes from a DB row. Defence-in-depth: a backup file must live
|
||||
// directly under backupDir, so reject any value carrying a path separator
|
||||
// or traversal before joining. A poisoned row (future import path, manual
|
||||
// insert) must never let restore read — and then atomically copy over the
|
||||
// live DB — an arbitrary file. CreateBackup builds safe base names; this
|
||||
// enforces the same invariant on read.
|
||||
if backup.Filename == "" || backup.Filename == "." || backup.Filename == ".." ||
|
||||
backup.Filename != filepath.Base(backup.Filename) {
|
||||
return "", fmt.Errorf("backup: invalid filename %q", backup.Filename)
|
||||
}
|
||||
|
||||
filePath := filepath.Join(e.backupDir, backup.Filename)
|
||||
if _, err := os.Stat(filePath); err != nil {
|
||||
return "", fmt.Errorf("backup file not found: %w", err)
|
||||
@@ -137,6 +152,153 @@ func (e *Engine) RestorePath(id string) (string, error) {
|
||||
return filePath, nil
|
||||
}
|
||||
|
||||
// PrepareRestore validates a backup candidate before the caller swaps it
|
||||
// over the live DB. Runs three checks in order:
|
||||
//
|
||||
// 1. The candidate file exists and is non-empty.
|
||||
// 2. SQLite header magic matches (catches corrupted or partial downloads).
|
||||
// 3. `PRAGMA integrity_check` against a temp copy returns "ok"
|
||||
// (catches WAL/page corruption that the header check misses).
|
||||
//
|
||||
// On success returns the candidate path. On failure returns a wrapped
|
||||
// error describing which probe rejected the file, so the operator can
|
||||
// see exactly why a "restore" was refused rather than getting a corrupt
|
||||
// DB at next boot.
|
||||
//
|
||||
// We use a *temp copy* for integrity_check because attaching the
|
||||
// candidate read-only into the live process would still hold a file
|
||||
// handle SQLite considers writable on Windows.
|
||||
func (e *Engine) PrepareRestore(id string) (string, error) {
|
||||
path, err := e.RestorePath(id)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("restore: stat candidate: %w", err)
|
||||
}
|
||||
if info.Size() < 100 {
|
||||
return "", fmt.Errorf("restore: candidate %s is suspiciously small (%d bytes)", path, info.Size())
|
||||
}
|
||||
|
||||
// SQLite file header: "SQLite format 3\x00" (16 bytes).
|
||||
hdr, err := readHead(path, 16)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("restore: read header: %w", err)
|
||||
}
|
||||
if string(hdr) != "SQLite format 3\x00" {
|
||||
return "", fmt.Errorf("restore: candidate %s is not a SQLite database (header mismatch)", path)
|
||||
}
|
||||
|
||||
if err := integrityCheck(path); err != nil {
|
||||
return "", fmt.Errorf("restore: integrity check failed: %w", err)
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func readHead(path string, n int) ([]byte, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
buf := make([]byte, n)
|
||||
// io.ReadFull (not f.Read) guarantees the buffer is filled.
|
||||
// A bare Read can short-return on some filesystems / on small
|
||||
// files, which would skew the SQLite-header magic check below.
|
||||
if _, err := io.ReadFull(f, buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// integrityCheck opens the candidate read-only and runs
|
||||
// `PRAGMA integrity_check`. We use immutable=1 so the driver does not
|
||||
// try to create WAL/SHM sidecars or upgrade the journal mode on the
|
||||
// candidate — both of which fail with "attempt to write a readonly
|
||||
// database" against a backup file. Anything other than the single row
|
||||
// `"ok"` is treated as corruption.
|
||||
func integrityCheck(path string) error {
|
||||
db, err := sql.Open("sqlite", "file:"+path+"?mode=ro&immutable=1")
|
||||
if err != nil {
|
||||
return fmt.Errorf("open candidate: %w", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
rows, err := db.Query("PRAGMA integrity_check")
|
||||
if err != nil {
|
||||
return fmt.Errorf("pragma integrity_check: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
if !rows.Next() {
|
||||
return fmt.Errorf("integrity_check returned no rows")
|
||||
}
|
||||
var result string
|
||||
if err := rows.Scan(&result); err != nil {
|
||||
return fmt.Errorf("scan integrity_check: %w", err)
|
||||
}
|
||||
if result != "ok" {
|
||||
return fmt.Errorf("integrity_check: %s", result)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AtomicReplaceDB writes a backup candidate into place atomically.
|
||||
// The caller is expected to:
|
||||
// 1. Call PrepareRestore(id) → candidatePath.
|
||||
// 2. Take a "pre-restore" backup of the current DB via CreateBackup.
|
||||
// 3. Close the live *sql.DB.
|
||||
// 4. Call AtomicReplaceDB(candidatePath, livePath).
|
||||
// 5. Trigger graceful shutdown; main() will re-open on next start.
|
||||
//
|
||||
// AtomicReplaceDB also wipes WAL/SHM sidecar files so the new DB starts
|
||||
// from a clean checkpoint state. Failure to remove sidecars is logged
|
||||
// but non-fatal — SQLite recreates them on open.
|
||||
func (e *Engine) AtomicReplaceDB(candidatePath, livePath string) error {
|
||||
// Copy candidate to a tmp file next to the live DB, then rename
|
||||
// atomically. On Windows os.Rename across volumes fails, so we
|
||||
// keep tmp on the same dir as the destination.
|
||||
tmp := livePath + ".restore.tmp"
|
||||
if err := copyFile(candidatePath, tmp); err != nil {
|
||||
return fmt.Errorf("copy candidate to %s: %w", tmp, err)
|
||||
}
|
||||
// Best-effort: remove WAL/SHM so SQLite re-checkpoints from the
|
||||
// restored main file rather than a stale WAL pointing at the old
|
||||
// DB's pages.
|
||||
for _, sidecar := range []string{livePath + "-wal", livePath + "-shm"} {
|
||||
if err := os.Remove(sidecar); err != nil && !os.IsNotExist(err) {
|
||||
slog.Warn("restore: remove sidecar", "path", sidecar, "error", err)
|
||||
}
|
||||
}
|
||||
if err := os.Rename(tmp, livePath); err != nil {
|
||||
// Clean up tmp on rename failure so we don't leak a partial file.
|
||||
_ = os.Remove(tmp)
|
||||
return fmt.Errorf("rename %s → %s: %w", tmp, livePath, err)
|
||||
}
|
||||
slog.Info("restore: database file replaced atomically", "live", livePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFile(src, dst string) error {
|
||||
in, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer in.Close()
|
||||
out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := io.Copy(out, in); err != nil {
|
||||
_ = out.Close()
|
||||
return err
|
||||
}
|
||||
return out.Close()
|
||||
}
|
||||
|
||||
// Prune removes old backups exceeding the retention count.
|
||||
// Returns the number of backups pruned.
|
||||
func (e *Engine) Prune(retentionCount int) (int, error) {
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
package backup
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// newTestEngine spins up an isolated store + engine pair for tests.
|
||||
// Each test gets its own tempdir so backup files do not collide.
|
||||
func newTestEngine(t *testing.T) (*Engine, *store.Store, string) {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "tinyforge.db")
|
||||
st, err := store.New(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("store.New: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = st.Close() })
|
||||
|
||||
eng, err := New(st, dbPath, dir)
|
||||
if err != nil {
|
||||
t.Fatalf("backup.New: %v", err)
|
||||
}
|
||||
return eng, st, dbPath
|
||||
}
|
||||
|
||||
func TestPrepareRestore_RejectsTinyFile(t *testing.T) {
|
||||
eng, st, _ := newTestEngine(t)
|
||||
|
||||
// Plant a backup row with a tiny file masquerading as a backup.
|
||||
tinyPath := filepath.Join(eng.BackupDir(), "tinyforge-manual-junk.db")
|
||||
if err := os.WriteFile(tinyPath, []byte("hi"), 0o600); err != nil {
|
||||
t.Fatalf("write tiny: %v", err)
|
||||
}
|
||||
bk, err := st.CreateBackup(store.Backup{
|
||||
Filename: "tinyforge-manual-junk.db",
|
||||
SizeBytes: 2,
|
||||
BackupType: "manual",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBackup row: %v", err)
|
||||
}
|
||||
|
||||
if _, err := eng.PrepareRestore(bk.ID); err == nil {
|
||||
t.Fatal("expected PrepareRestore to reject tiny file, got nil")
|
||||
} else if !strings.Contains(err.Error(), "suspiciously small") {
|
||||
t.Errorf("error = %v, want 'suspiciously small'", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrepareRestore_RejectsNonSQLite(t *testing.T) {
|
||||
eng, st, _ := newTestEngine(t)
|
||||
|
||||
// 200 bytes of non-SQLite garbage: passes the size check, fails
|
||||
// the header magic check.
|
||||
garbagePath := filepath.Join(eng.BackupDir(), "tinyforge-manual-bogus.db")
|
||||
junk := make([]byte, 200)
|
||||
for i := range junk {
|
||||
junk[i] = byte('x')
|
||||
}
|
||||
if err := os.WriteFile(garbagePath, junk, 0o600); err != nil {
|
||||
t.Fatalf("write junk: %v", err)
|
||||
}
|
||||
bk, err := st.CreateBackup(store.Backup{
|
||||
Filename: "tinyforge-manual-bogus.db",
|
||||
SizeBytes: int64(len(junk)),
|
||||
BackupType: "manual",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBackup row: %v", err)
|
||||
}
|
||||
|
||||
if _, err := eng.PrepareRestore(bk.ID); err == nil {
|
||||
t.Fatal("expected PrepareRestore to reject non-SQLite blob, got nil")
|
||||
} else if !strings.Contains(err.Error(), "header") {
|
||||
t.Errorf("error = %v, want header mismatch", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrepareRestore_AcceptsValidVacuumInto(t *testing.T) {
|
||||
eng, _, _ := newTestEngine(t)
|
||||
|
||||
// A fresh CreateBackup from the engine itself is, by construction,
|
||||
// a valid SQLite database — VACUUM INTO produces a clean copy.
|
||||
bk, err := eng.CreateBackup("manual")
|
||||
if err != nil {
|
||||
t.Fatalf("CreateBackup: %v", err)
|
||||
}
|
||||
path, err := eng.PrepareRestore(bk.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("PrepareRestore on valid backup: %v", err)
|
||||
}
|
||||
if path == "" {
|
||||
t.Errorf("PrepareRestore returned empty path")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrepareRestore_UnknownID(t *testing.T) {
|
||||
eng, _, _ := newTestEngine(t)
|
||||
|
||||
_, err := eng.PrepareRestore("nonexistent-id")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown id, got nil")
|
||||
}
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
// fine — wrapped through RestorePath
|
||||
}
|
||||
}
|
||||
+46
-10
@@ -10,11 +10,26 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrNoKey is returned when ENCRYPTION_KEY is not set.
|
||||
var ErrNoKey = errors.New("ENCRYPTION_KEY environment variable is not set")
|
||||
|
||||
// ErrDecryptFailed wraps any cipher.Open / decoder failure. Callers
|
||||
// upgrading from the silent-fallback pattern (treat-as-plaintext when
|
||||
// decrypt errored) MUST instead surface this — a rotated key would
|
||||
// otherwise silently leak ciphertext to upstream services as if it
|
||||
// were plaintext.
|
||||
var ErrDecryptFailed = errors.New("crypto: decrypt failed (wrong key, corrupted ciphertext, or unversioned legacy value)")
|
||||
|
||||
// envelopeV1Prefix tags ciphertext produced by Encrypt going forward.
|
||||
// Older databases may carry unprefixed hex blobs from the v0 era; those
|
||||
// are still readable via Decrypt for backward compatibility, but every
|
||||
// new write goes through EncryptV1 and emits the prefix so a future key
|
||||
// rotation has a clean fail-loud signal.
|
||||
const envelopeV1Prefix = "tf1:"
|
||||
|
||||
// DeriveKey computes a 32-byte AES-256 key from the given passphrase using SHA-256.
|
||||
// This is acceptable when ENCRYPTION_KEY is a high-entropy random string (e.g., 32+ hex chars).
|
||||
// For human-chosen passphrases, consider Argon2id or PBKDF2 with a salt instead.
|
||||
@@ -35,7 +50,8 @@ func KeyFromEnv() ([32]byte, error) {
|
||||
}
|
||||
|
||||
// Encrypt encrypts plaintext using AES-256-GCM with a random nonce.
|
||||
// The returned ciphertext is hex-encoded: nonce || ciphertext+tag.
|
||||
// Returns a versioned envelope (tf1:<hex>) so downstream readers can
|
||||
// distinguish ciphertext from accidentally-stored plaintext.
|
||||
func Encrypt(key [32]byte, plaintext string) (string, error) {
|
||||
block, err := aes.NewCipher(key[:])
|
||||
if err != nil {
|
||||
@@ -53,14 +69,34 @@ func Encrypt(key [32]byte, plaintext string) (string, error) {
|
||||
}
|
||||
|
||||
sealed := gcm.Seal(nonce, nonce, []byte(plaintext), nil)
|
||||
return hex.EncodeToString(sealed), nil
|
||||
return envelopeV1Prefix + hex.EncodeToString(sealed), nil
|
||||
}
|
||||
|
||||
// Decrypt decrypts a hex-encoded ciphertext produced by Encrypt.
|
||||
func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
|
||||
data, err := hex.DecodeString(ciphertextHex)
|
||||
// HasEnvelope reports whether the value is a v1-prefixed ciphertext.
|
||||
// Useful for router-level "decrypt only if encrypted" decision points
|
||||
// that previously relied on `err == nil` from a try-decrypt — that
|
||||
// pattern silently masked rotated-key failures.
|
||||
func HasEnvelope(value string) bool {
|
||||
return strings.HasPrefix(value, envelopeV1Prefix)
|
||||
}
|
||||
|
||||
// Decrypt decrypts an envelope (tf1:<hex>). For backward compatibility
|
||||
// it also accepts unprefixed hex from the v0 era — but only when the
|
||||
// resulting plaintext is valid; a wrong key for legacy data now returns
|
||||
// ErrDecryptFailed instead of silently treating ciphertext as
|
||||
// plaintext.
|
||||
//
|
||||
// Callers MUST NOT swallow the error and fall back to "use as-is".
|
||||
// That pattern is the exact footgun the envelope versioning removes.
|
||||
func Decrypt(key [32]byte, ciphertext string) (string, error) {
|
||||
hexBlob := ciphertext
|
||||
if strings.HasPrefix(hexBlob, envelopeV1Prefix) {
|
||||
hexBlob = hexBlob[len(envelopeV1Prefix):]
|
||||
}
|
||||
|
||||
data, err := hex.DecodeString(hexBlob)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decode hex: %w", err)
|
||||
return "", fmt.Errorf("%w: decode hex: %v", ErrDecryptFailed, err)
|
||||
}
|
||||
|
||||
block, err := aes.NewCipher(key[:])
|
||||
@@ -75,15 +111,15 @@ func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
|
||||
|
||||
nonceSize := gcm.NonceSize()
|
||||
if len(data) < nonceSize {
|
||||
return "", errors.New("ciphertext too short")
|
||||
return "", fmt.Errorf("%w: ciphertext too short", ErrDecryptFailed)
|
||||
}
|
||||
|
||||
nonce := data[:nonceSize]
|
||||
ciphertext := data[nonceSize:]
|
||||
body := data[nonceSize:]
|
||||
|
||||
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
|
||||
plaintext, err := gcm.Open(nil, nonce, body, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decrypt: %w", err)
|
||||
return "", fmt.Errorf("%w: %v", ErrDecryptFailed, err)
|
||||
}
|
||||
|
||||
return string(plaintext), nil
|
||||
|
||||
@@ -34,7 +34,19 @@ type Deployer struct {
|
||||
dnsMu sync.RWMutex
|
||||
dns dns.Provider // nil when wildcard DNS is active
|
||||
|
||||
// proxyMu protects hot-swap of d.proxy from runtime settings updates
|
||||
// (SetProxyProvider) racing with PluginDeps() reads on the deploy path.
|
||||
proxyMu sync.RWMutex
|
||||
|
||||
// Graceful shutdown: tracks in-progress deploys.
|
||||
//
|
||||
// drainMu serializes the "is-draining check + activeWg.Add(1)" in
|
||||
// beginDispatch against the "set shuttingDown + Wait()" in Drain. Without
|
||||
// it, a dispatch could pass the draining check, Drain could then flip the
|
||||
// flag and start Wait() with a zero counter, and the dispatch could call
|
||||
// Add(1) concurrently with Wait — a documented sync.WaitGroup misuse
|
||||
// (panic risk) that also lets a deploy slip past the drain barrier.
|
||||
drainMu sync.Mutex
|
||||
activeWg sync.WaitGroup
|
||||
shuttingDown atomic.Bool
|
||||
}
|
||||
@@ -73,7 +85,11 @@ func New(
|
||||
}
|
||||
|
||||
// SetProxyProvider updates the proxy provider at runtime (e.g., when settings change).
|
||||
// Guarded by proxyMu so concurrent deploys that read d.proxy via PluginDeps()
|
||||
// observe a coherent value (previously a torn-pointer race under -race).
|
||||
func (d *Deployer) SetProxyProvider(provider proxy.Provider) {
|
||||
d.proxyMu.Lock()
|
||||
defer d.proxyMu.Unlock()
|
||||
d.proxy = provider
|
||||
}
|
||||
|
||||
@@ -110,8 +126,11 @@ func (d *Deployer) SetDNSProvider(provider dns.Provider) {
|
||||
|
||||
// Drain waits for all in-progress deploys to complete. Call this during graceful shutdown.
|
||||
func (d *Deployer) Drain() {
|
||||
if !d.shuttingDown.CompareAndSwap(false, true) {
|
||||
// Already draining.
|
||||
d.drainMu.Lock()
|
||||
already := d.shuttingDown.Swap(true)
|
||||
d.drainMu.Unlock()
|
||||
if already {
|
||||
slog.Info("deployer: drain already in progress")
|
||||
}
|
||||
slog.Info("deployer: draining in-progress deploys")
|
||||
d.activeWg.Wait()
|
||||
@@ -121,11 +140,17 @@ func (d *Deployer) Drain() {
|
||||
// ShuttingDown reports whether Drain() has been called.
|
||||
func (d *Deployer) ShuttingDown() bool { return d.shuttingDown.Load() }
|
||||
|
||||
// rejectIfDraining is exposed in case any plugin wants the same hard-stop
|
||||
// behaviour the legacy pipeline used.
|
||||
func (d *Deployer) rejectIfDraining() error {
|
||||
// beginDispatch atomically rejects when draining and otherwise registers the
|
||||
// in-flight unit on activeWg. The shuttingDown check and the Add(1) MUST be
|
||||
// done together under drainMu (see the field comment): Drain sets the flag
|
||||
// under the same mutex before Wait(), so once Wait() observes a zero counter
|
||||
// no further Add can race it. Callers must defer d.activeWg.Done() on success.
|
||||
func (d *Deployer) beginDispatch() error {
|
||||
d.drainMu.Lock()
|
||||
defer d.drainMu.Unlock()
|
||||
if d.shuttingDown.Load() {
|
||||
return fmt.Errorf("deployer is shutting down, rejecting new deploy")
|
||||
}
|
||||
d.activeWg.Add(1)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
@@ -14,16 +15,37 @@ import (
|
||||
// triggers + image deploys still go through the legacy path, while
|
||||
// /api/hooks/generic + the unified webhook ingress go through here.
|
||||
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||
if err := d.beginDispatch(); err != nil {
|
||||
metrics.DeploysTotal.Inc(w.SourceKind, "rejected_draining")
|
||||
return err
|
||||
}
|
||||
defer d.activeWg.Done()
|
||||
src, err := plugin.GetSource(w.SourceKind)
|
||||
if err != nil {
|
||||
// Unknown source: use the constant "unknown" sentinel for the
|
||||
// label so a typo-spam attack can't grow the metrics map with
|
||||
// one series per bogus source_kind. The actual user-supplied
|
||||
// value still surfaces via the wrapped error / event log.
|
||||
metrics.DeploysTotal.Inc("unknown", "unknown_source")
|
||||
return fmt.Errorf("dispatch %s: %w", w.Name, err)
|
||||
}
|
||||
return src.Deploy(ctx, d.PluginDeps(), w, intent)
|
||||
err = src.Deploy(ctx, d.PluginDeps(), w, intent)
|
||||
outcome := "success"
|
||||
if err != nil {
|
||||
outcome = "failure"
|
||||
}
|
||||
metrics.DeploysTotal.Inc(w.SourceKind, outcome)
|
||||
return err
|
||||
}
|
||||
|
||||
// DispatchTeardown routes a teardown call to the matching Source plugin.
|
||||
// Used when a workload is deleted.
|
||||
// Used when a workload is deleted. Tracked via activeWg so Drain() honours
|
||||
// in-progress teardowns just like deploys.
|
||||
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
|
||||
if err := d.beginDispatch(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer d.activeWg.Done()
|
||||
src, err := plugin.GetSource(w.SourceKind)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dispatch teardown %s: %w", w.Name, err)
|
||||
@@ -33,8 +55,17 @@ func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) erro
|
||||
|
||||
// DispatchReconcile routes a Reconcile call. Periodic reconciler iterates
|
||||
// every Workload and calls this; idle Sources should make it a cheap
|
||||
// no-op.
|
||||
// no-op. Tracked via activeWg so a long-running reconcile blocks Drain().
|
||||
func (d *Deployer) DispatchReconcile(ctx context.Context, w plugin.Workload) error {
|
||||
if err := d.beginDispatch(); err != nil {
|
||||
// Silent skip — reconcile is a periodic tick, not a user-initiated
|
||||
// action, so we don't want to surface "draining" errors back to the
|
||||
// reconciler loop. The next tick after restart will catch up. Routing
|
||||
// through beginDispatch keeps the activeWg.Add atomic with the drain
|
||||
// check (see Drain) instead of a bare shuttingDown.Load + Add race.
|
||||
return nil
|
||||
}
|
||||
defer d.activeWg.Done()
|
||||
src, err := plugin.GetSource(w.SourceKind)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dispatch reconcile %s: %w", w.Name, err)
|
||||
@@ -52,10 +83,13 @@ func (d *Deployer) PluginDeps() plugin.Deps {
|
||||
d.dnsMu.RLock()
|
||||
dnsProvider := d.dns
|
||||
d.dnsMu.RUnlock()
|
||||
d.proxyMu.RLock()
|
||||
proxyProvider := d.proxy
|
||||
d.proxyMu.RUnlock()
|
||||
return plugin.Deps{
|
||||
Store: d.store,
|
||||
Docker: d.docker,
|
||||
Proxy: d.proxy,
|
||||
Proxy: proxyProvider,
|
||||
DNS: dnsProvider,
|
||||
Health: d.health,
|
||||
Notifier: d.notifier,
|
||||
|
||||
+119
-20
@@ -2,20 +2,58 @@ package docker
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/moby/moby/api/types/build"
|
||||
"github.com/moby/moby/client"
|
||||
)
|
||||
|
||||
// BuildImage builds a Docker image from a directory containing a Dockerfile.
|
||||
// The directory is packaged as a tar archive and sent to the Docker daemon.
|
||||
// The tag parameter is the image name:tag to apply (e.g., "dw-site-myapp:latest").
|
||||
// BuildImage builds a Docker image from a directory containing a Dockerfile
|
||||
// at the context root. Kept as a thin wrapper around BuildImageAt for the
|
||||
// static-site plugin which always emits its generated Dockerfile at the
|
||||
// context root. New code should prefer BuildImageAt so the Dockerfile path
|
||||
// is explicit.
|
||||
func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
||||
return c.BuildImageAt(ctx, contextDir, "Dockerfile", tag, nil)
|
||||
}
|
||||
|
||||
// BuildImageAt builds a Docker image from a tar of contextDir, using the
|
||||
// Dockerfile at `dockerfile` *inside* the context (typically "Dockerfile"
|
||||
// but may be e.g. "docker/Dockerfile" when the user-supplied repo layout
|
||||
// keeps Dockerfiles in a subfolder).
|
||||
//
|
||||
// The dockerfile argument is the path *relative to contextDir*. Empty
|
||||
// strings are normalised to "Dockerfile" so callers can pass through a
|
||||
// user config value without sanitising twice.
|
||||
//
|
||||
// logFn, if non-nil, is invoked for every non-empty `stream` line the
|
||||
// daemon emits during the build. Callers use this to forward live build
|
||||
// progress (e.g. SSE bus). Errors from the daemon are NOT delivered via
|
||||
// logFn — they surface as the returned error so the caller's failure
|
||||
// path stays the single source of truth.
|
||||
func (c *Client) BuildImageAt(ctx context.Context, contextDir, dockerfile, tag string, logFn func(line string)) error {
|
||||
if dockerfile == "" {
|
||||
dockerfile = "Dockerfile"
|
||||
}
|
||||
// Normalise to forward slashes — the tar entry names use them and the
|
||||
// Docker daemon expects the same.
|
||||
dockerfile = filepath.ToSlash(dockerfile)
|
||||
// Defence-in-depth: the dockerfile path is relative to contextDir and
|
||||
// is increasingly user/config-supplied (subfolder Dockerfiles). Reject
|
||||
// absolute paths and any `..` traversal at the boundary so a value like
|
||||
// "../../etc/passwd" can never be handed to the daemon's build options,
|
||||
// regardless of which builder backend resolves it.
|
||||
if filepath.IsAbs(dockerfile) || strings.HasPrefix(dockerfile, "/") ||
|
||||
dockerfile == ".." || strings.HasPrefix(dockerfile, "../") || strings.Contains(dockerfile, "/../") {
|
||||
return fmt.Errorf("docker build: invalid dockerfile path %q (must be relative to the build context, no traversal)", dockerfile)
|
||||
}
|
||||
// Create tar archive of the build context.
|
||||
pr, pw := io.Pipe()
|
||||
|
||||
@@ -50,16 +88,14 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open %s: %w", path, err)
|
||||
// Per-file close, NOT defer. `defer file.Close()` inside the
|
||||
// WalkFunc only runs when the outer goroutine returns — for a
|
||||
// build context with thousands of files (node_modules-heavy
|
||||
// repo) that leaks one fd per file until the walk completes
|
||||
// and trips EMFILE on default ulimit=1024 systems.
|
||||
if err := streamFileIntoTar(tw, path, relPath); err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
if _, err := io.Copy(tw, file); err != nil {
|
||||
return fmt.Errorf("copy %s to tar: %w", relPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
@@ -69,8 +105,16 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
||||
pw.CloseWithError(err)
|
||||
}()
|
||||
|
||||
// Pin the legacy builder explicitly. On Docker Engine 23+ BuildKit
|
||||
// is the default for the CLI, but the daemon honours the explicit
|
||||
// Version field on ImageBuildOptions. Legacy builder does NOT support
|
||||
// `RUN --mount=type=bind,source=/host` so a malicious Dockerfile
|
||||
// cannot mount host paths into the build context. Switching to
|
||||
// BuildKit later requires (a) Dockerfile-content validation to
|
||||
// reject bind-mount hints, or (b) an explicit per-workload opt-in.
|
||||
resp, err := c.api.ImageBuild(ctx, pr, client.ImageBuildOptions{
|
||||
Dockerfile: "Dockerfile",
|
||||
Version: build.BuilderV1,
|
||||
Dockerfile: dockerfile,
|
||||
Tags: []string{tag},
|
||||
Remove: true,
|
||||
ForceRemove: true,
|
||||
@@ -80,16 +124,71 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read the build output to completion (required for the build to finish).
|
||||
output, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
// Drain the daemon's NDJSON stream to completion. The stream MUST
|
||||
// be read for the build to finish — closing the body early aborts
|
||||
// the build. We parse line-by-line into the {Stream, Error} shape
|
||||
// the daemon emits so an honest `{"error":"..."}` line surfaces
|
||||
// without false positives from informational `{"stream":"error
|
||||
// handling: retrying..."}` chatter that the old strings.Contains
|
||||
// path would have flagged.
|
||||
type buildLine struct {
|
||||
Stream string `json:"stream,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
scanner := bufio.NewScanner(resp.Body)
|
||||
// Some build steps emit single lines exceeding the default 64 KiB
|
||||
// (e.g. a fat go-mod-download dump). Bump to 1 MiB so we don't
|
||||
// silently truncate and miss the trailing error line.
|
||||
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
|
||||
var firstErr string
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
var bl buildLine
|
||||
if err := json.Unmarshal(line, &bl); err != nil {
|
||||
// Non-JSON line — daemon shouldn't produce these, but
|
||||
// don't fail the build over a parse hiccup.
|
||||
continue
|
||||
}
|
||||
if bl.Error != "" && firstErr == "" {
|
||||
firstErr = bl.Error
|
||||
}
|
||||
if logFn != nil && bl.Stream != "" {
|
||||
logFn(bl.Stream)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return fmt.Errorf("read build output for %s: %w", tag, err)
|
||||
}
|
||||
|
||||
// Check for error in build output.
|
||||
if strings.Contains(string(output), `"error"`) {
|
||||
return fmt.Errorf("build image %s: build errors in output", tag)
|
||||
if firstErr != "" {
|
||||
return fmt.Errorf("build image %s: %s", tag, firstErr)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// streamFileIntoTar opens path, copies its contents into the tar writer
|
||||
// under the given relPath header, and closes the file *before returning*
|
||||
// — i.e. once per file, not deferred to the end of the entire walk.
|
||||
// Extracted so the per-iteration close discipline is obvious at the
|
||||
// callsite and the file handle isn't accidentally hoisted into the
|
||||
// caller's defer stack via a future refactor.
|
||||
func streamFileIntoTar(tw *tar.Writer, path, relPath string) error {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open %s: %w", path, err)
|
||||
}
|
||||
_, copyErr := io.Copy(tw, file)
|
||||
// Close BEFORE returning so the fd is released even on copy
|
||||
// failure. Capture both errors so the more-specific copy error
|
||||
// wins when both fire.
|
||||
if cerr := file.Close(); cerr != nil && copyErr == nil {
|
||||
copyErr = cerr
|
||||
}
|
||||
if copyErr != nil {
|
||||
return fmt.Errorf("copy %s to tar: %w", relPath, copyErr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -27,6 +27,13 @@ const (
|
||||
|
||||
// EventStackStatus is emitted when a compose stack status changes.
|
||||
EventStackStatus EventType = "stack_status"
|
||||
|
||||
// EventBuildLog is emitted for each line of a streaming image build.
|
||||
// Per-line events are ephemeral (not persisted to the event_log) — they
|
||||
// exist to drive a live tail UI during the slow "building" phase of a
|
||||
// dockerfile-source deploy. Subscribers should filter by WorkloadID
|
||||
// because every dockerfile deploy on the box publishes on the same bus.
|
||||
EventBuildLog EventType = "build_log"
|
||||
)
|
||||
|
||||
// Event is a single event published on the bus.
|
||||
@@ -77,6 +84,14 @@ type StaticSiteStatusPayload struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// BuildLogPayload is the payload for EventBuildLog events. One event
|
||||
// per non-empty line read off the daemon's NDJSON build stream.
|
||||
type BuildLogPayload struct {
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Line string `json:"line"`
|
||||
Stream string `json:"stream,omitempty"`
|
||||
}
|
||||
|
||||
// StackStatusPayload is the payload for EventStackStatus events.
|
||||
type StackStatusPayload struct {
|
||||
StackID string `json:"stack_id"`
|
||||
|
||||
@@ -0,0 +1,250 @@
|
||||
// Package metrics provides a minimal Prometheus text-format exposition
|
||||
// of Tinyforge's operational counters. We deliberately do NOT import the
|
||||
// official client_golang library: the metrics set here is small, the text
|
||||
// format is simple, and avoiding the dependency keeps `tinyforge` a fast
|
||||
// single-binary install.
|
||||
//
|
||||
// Every counter is a sync/atomic.Int64 — cheap, lock-free, and safe to
|
||||
// touch from any goroutine. Histograms / gauges aren't modeled yet; the
|
||||
// few we need (request latency p50/p99) live downstream of slog and can
|
||||
// be added when the operator actually wants them.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Registry holds the process-wide counter set. A single zero-value
|
||||
// Registry is ready to use — see DefaultRegistry below for the
|
||||
// recommended way to grab the global handle.
|
||||
type Registry struct {
|
||||
mu sync.RWMutex
|
||||
counters map[string]*counter
|
||||
}
|
||||
|
||||
type counter struct {
|
||||
name string
|
||||
help string
|
||||
labels []string // label names, ordered as declared at registration
|
||||
series map[string]*atomic.Int64
|
||||
// seriesMu only protects insertion of new label tuples — increments
|
||||
// on existing tuples are lock-free via the atomic.
|
||||
seriesMu sync.Mutex
|
||||
}
|
||||
|
||||
// DefaultRegistry is the process-wide registry. All Tinyforge metrics
|
||||
// register against it. Tests can instantiate their own Registry.
|
||||
var DefaultRegistry = newRegistry()
|
||||
|
||||
func newRegistry() *Registry {
|
||||
return &Registry{counters: make(map[string]*counter)}
|
||||
}
|
||||
|
||||
// NewCounter declares a counter on the default registry. Call once at
|
||||
// package init or during NewServer; subsequent calls with the same name
|
||||
// return the existing counter so re-registration is safe.
|
||||
//
|
||||
// label names define the dimensions; calls to Inc must pass values in
|
||||
// the same order. Use the empty slice for label-less counters.
|
||||
func NewCounter(name, help string, labels ...string) *Counter {
|
||||
return DefaultRegistry.NewCounter(name, help, labels...)
|
||||
}
|
||||
|
||||
// NewCounter on a specific Registry — useful in tests.
|
||||
func (r *Registry) NewCounter(name, help string, labels ...string) *Counter {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if c, ok := r.counters[name]; ok {
|
||||
return &Counter{c: c}
|
||||
}
|
||||
c := &counter{
|
||||
name: name,
|
||||
help: help,
|
||||
labels: append([]string(nil), labels...),
|
||||
series: make(map[string]*atomic.Int64),
|
||||
}
|
||||
r.counters[name] = c
|
||||
return &Counter{c: c}
|
||||
}
|
||||
|
||||
// Counter is the public handle returned by NewCounter. Pass it around as
|
||||
// a value — the underlying state lives on the registry.
|
||||
type Counter struct {
|
||||
c *counter
|
||||
}
|
||||
|
||||
// Inc atomically increments the counter for the given label values.
|
||||
// Passing the wrong number of values is a programmer error; we surface
|
||||
// it as a panic during testing rather than silently aggregating into a
|
||||
// bogus series.
|
||||
func (c Counter) Inc(labelValues ...string) {
|
||||
c.Add(1, labelValues...)
|
||||
}
|
||||
|
||||
// Add atomically adds delta. Negative delta is rejected (counters are
|
||||
// monotonic by definition).
|
||||
func (c Counter) Add(delta int64, labelValues ...string) {
|
||||
if delta < 0 {
|
||||
return
|
||||
}
|
||||
if len(labelValues) != len(c.c.labels) {
|
||||
// Programmer error. This used to panic to surface the bug, but Add
|
||||
// runs on hot paths (HTTP middleware, deploy dispatch) and several
|
||||
// callers are off the request goroutine, where a panic would take
|
||||
// down the whole process rather than a single request. Log loudly
|
||||
// and drop the sample so a mislabeled call site can never crash the
|
||||
// server; the bug still shows up immediately in the logs and in
|
||||
// tests via the error output.
|
||||
slog.Error("metrics: label count mismatch — dropping sample",
|
||||
"counter", c.c.name, "want", len(c.c.labels), "got", len(labelValues))
|
||||
return
|
||||
}
|
||||
key := encodeKey(labelValues)
|
||||
c.c.seriesMu.Lock()
|
||||
v, ok := c.c.series[key]
|
||||
if !ok {
|
||||
v = new(atomic.Int64)
|
||||
c.c.series[key] = v
|
||||
}
|
||||
c.c.seriesMu.Unlock()
|
||||
v.Add(delta)
|
||||
}
|
||||
|
||||
// encodeKey joins label values with a 0x1f separator. Prometheus label
|
||||
// values may contain anything except `"` and `\n`, which we escape on
|
||||
// exposition only — the key here is just a map index.
|
||||
func encodeKey(values []string) string {
|
||||
return strings.Join(values, "\x1f")
|
||||
}
|
||||
|
||||
// WritePrometheus dumps the registry in the text exposition format
|
||||
// Prometheus / VictoriaMetrics / OpenMetrics understands. Stable
|
||||
// ordering: counters alphabetical by name; series alphabetical by
|
||||
// encoded label tuple.
|
||||
func (r *Registry) WritePrometheus(w io.Writer) error {
|
||||
r.mu.RLock()
|
||||
names := make([]string, 0, len(r.counters))
|
||||
for n := range r.counters {
|
||||
names = append(names, n)
|
||||
}
|
||||
r.mu.RUnlock()
|
||||
sort.Strings(names)
|
||||
|
||||
for _, name := range names {
|
||||
r.mu.RLock()
|
||||
c := r.counters[name]
|
||||
r.mu.RUnlock()
|
||||
if err := writeCounter(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeCounter(w io.Writer, c *counter) error {
|
||||
if _, err := fmt.Fprintf(w, "# HELP %s %s\n# TYPE %s counter\n", c.name, escapeHelp(c.help), c.name); err != nil {
|
||||
return err
|
||||
}
|
||||
// Snapshot the series map under a SINGLE lock acquisition. The
|
||||
// previous shape acquired+released seriesMu twice per emitted
|
||||
// series (once for the key list, once per Load), contending with
|
||||
// every hot-path Inc on the HTTP request path. The *atomic.Int64
|
||||
// pointers are stable for the lifetime of the registry (we never
|
||||
// delete entries), so reading them after the unlock is safe.
|
||||
type sample struct {
|
||||
key string
|
||||
val *atomic.Int64
|
||||
}
|
||||
c.seriesMu.Lock()
|
||||
samples := make([]sample, 0, len(c.series))
|
||||
for k, v := range c.series {
|
||||
samples = append(samples, sample{k, v})
|
||||
}
|
||||
c.seriesMu.Unlock()
|
||||
|
||||
sort.Slice(samples, func(i, j int) bool { return samples[i].key < samples[j].key })
|
||||
|
||||
for _, s := range samples {
|
||||
val := s.val.Load()
|
||||
labels := decodeKey(s.key, c.labels)
|
||||
if labels == "" {
|
||||
if _, err := fmt.Fprintf(w, "%s %d\n", c.name, val); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, "%s{%s} %d\n", c.name, labels, val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeKey(key string, names []string) string {
|
||||
if key == "" || len(names) == 0 {
|
||||
return ""
|
||||
}
|
||||
values := strings.Split(key, "\x1f")
|
||||
if len(values) != len(names) {
|
||||
// Should not happen — encodeKey/decode are symmetric.
|
||||
return ""
|
||||
}
|
||||
parts := make([]string, len(names))
|
||||
for i, n := range names {
|
||||
parts[i] = fmt.Sprintf(`%s="%s"`, n, escapeLabelValue(values[i]))
|
||||
}
|
||||
return strings.Join(parts, ",")
|
||||
}
|
||||
|
||||
func escapeHelp(s string) string {
|
||||
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n")
|
||||
return r.Replace(s)
|
||||
}
|
||||
|
||||
func escapeLabelValue(s string) string {
|
||||
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n", `"`, `\"`)
|
||||
return r.Replace(s)
|
||||
}
|
||||
|
||||
// ── Pre-declared counters ────────────────────────────────────────────
|
||||
//
|
||||
// These are the counters Tinyforge surfaces to operators. Adding more is
|
||||
// a one-line NewCounter call at the call site — no central catalogue,
|
||||
// just keep names lowercase_snake with the `tinyforge_` prefix.
|
||||
|
||||
var (
|
||||
HTTPRequestsTotal = NewCounter(
|
||||
"tinyforge_http_requests_total",
|
||||
"Total HTTP requests handled, partitioned by method and outcome class.",
|
||||
"method", "status_class",
|
||||
)
|
||||
DeploysTotal = NewCounter(
|
||||
"tinyforge_deploys_total",
|
||||
"Total deploys dispatched, partitioned by source kind and outcome.",
|
||||
"source_kind", "outcome",
|
||||
)
|
||||
WebhookDeliveriesTotal = NewCounter(
|
||||
"tinyforge_webhook_deliveries_total",
|
||||
"Total inbound webhook deliveries, partitioned by outcome.",
|
||||
"outcome",
|
||||
)
|
||||
SchedulerTicksTotal = NewCounter(
|
||||
"tinyforge_scheduler_ticks_total",
|
||||
"Total scheduler ticks. The dispatched counter is the success measure.",
|
||||
)
|
||||
SchedulerDispatchedTotal = NewCounter(
|
||||
"tinyforge_scheduler_dispatched_total",
|
||||
"Triggers actually dispatched by the scheduler.",
|
||||
)
|
||||
OutboundNotifyTotal = NewCounter(
|
||||
"tinyforge_outbound_notify_total",
|
||||
"Outbound notification dispatch attempts, partitioned by outcome.",
|
||||
"outcome",
|
||||
)
|
||||
)
|
||||
@@ -16,6 +16,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
)
|
||||
|
||||
// Event represents a deployment / site-sync notification payload.
|
||||
@@ -83,17 +85,68 @@ type TestResult struct {
|
||||
// Notifications are fire-and-forget by default — failures are logged but do
|
||||
// not propagate. SendSyncForTest is the exception, used only by the manual
|
||||
// test endpoint.
|
||||
//
|
||||
// outboundSem caps the number of in-flight outbound notifications. Without
|
||||
// it a single burst (e.g. 1000 event triggers firing on a noisy log scan)
|
||||
// would spawn 1000 simultaneous TCP connections, which both DoSes the
|
||||
// receiver and exhausts local FDs.
|
||||
type Notifier struct {
|
||||
httpClient *http.Client
|
||||
wg sync.WaitGroup
|
||||
httpClient *http.Client
|
||||
wg sync.WaitGroup
|
||||
outboundSem chan struct{}
|
||||
}
|
||||
|
||||
// maxOutboundNotifications bounds the in-flight outbound webhook fan-out.
|
||||
// Sized to keep small bursts non-blocking while preventing a runaway storm
|
||||
// from starving the rest of the process. Tunable later via settings if any
|
||||
// operator legitimately needs more concurrency.
|
||||
const maxOutboundNotifications = 32
|
||||
|
||||
// New creates a Notifier with sensible defaults.
|
||||
func New() *Notifier {
|
||||
// Transport with bounded host pooling so a slow receiver cannot pin
|
||||
// arbitrarily many sockets open. MaxConnsPerHost mirrors the worker
|
||||
// pool size; idle pruning keeps long-lived processes from holding
|
||||
// stale TCP entries indefinitely.
|
||||
//
|
||||
// NOTE: we deliberately do NOT apply the staticsite SSRF dialer here.
|
||||
// Notification URLs are admin-configured, and an admin already has
|
||||
// Docker-socket (host-root-equivalent) access, so the SSRF surface adds
|
||||
// nothing they couldn't already reach. Blocking loopback/private targets
|
||||
// would instead break the common self-hosted pattern of notifying a
|
||||
// same-host sidecar/bridge (e.g. service-to-notification-bridge on
|
||||
// 127.0.0.1). See the security review (rated LOW / out of trust boundary).
|
||||
tr := &http.Transport{
|
||||
MaxIdleConns: 64,
|
||||
MaxIdleConnsPerHost: 8,
|
||||
MaxConnsPerHost: maxOutboundNotifications,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
}
|
||||
return &Notifier{
|
||||
httpClient: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
Timeout: 10 * time.Second,
|
||||
Transport: tr,
|
||||
},
|
||||
outboundSem: make(chan struct{}, maxOutboundNotifications),
|
||||
}
|
||||
}
|
||||
|
||||
// acquireSlot reserves an outbound slot, respecting ctx so a backed-up
|
||||
// queue cannot starve a request that already has its own deadline.
|
||||
func (n *Notifier) acquireSlot(ctx context.Context) bool {
|
||||
select {
|
||||
case n.outboundSem <- struct{}{}:
|
||||
return true
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Notifier) releaseSlot() {
|
||||
select {
|
||||
case <-n.outboundSem:
|
||||
default:
|
||||
// Drained during shutdown — never block.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,8 +181,15 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
|
||||
n.wg.Add(1)
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel()
|
||||
if !n.acquireSlot(ctx) {
|
||||
slog.Warn("notify: dropped — outbound queue saturated",
|
||||
"tier", tier, "host", safeHost(webhookURL), "delivery", delivery, "event", event.Type)
|
||||
metrics.OutboundNotifyTotal.Inc("dropped")
|
||||
return
|
||||
}
|
||||
defer n.releaseSlot()
|
||||
|
||||
_, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
|
||||
// URL host only — never log the secret or full URL with user-info.
|
||||
@@ -138,11 +198,13 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
|
||||
slog.Warn("notify: webhook send failed",
|
||||
"tier", tier, "host", host, "delivery", delivery,
|
||||
"event", event.Type, "signed", secret != "", "error", err)
|
||||
metrics.OutboundNotifyTotal.Inc("failure")
|
||||
return
|
||||
}
|
||||
slog.Info("notify: webhook dispatched",
|
||||
"tier", tier, "host", host, "delivery", delivery,
|
||||
"event", event.Type, "signed", secret != "")
|
||||
metrics.OutboundNotifyTotal.Inc("success")
|
||||
}()
|
||||
}
|
||||
|
||||
@@ -166,8 +228,15 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
|
||||
n.wg.Add(1)
|
||||
go func() {
|
||||
defer n.wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel()
|
||||
if !n.acquireSlot(ctx) {
|
||||
slog.Warn("notify: dropped trigger payload — outbound queue saturated",
|
||||
"tier", TierEventTrigger, "host", safeHost(webhookURL), "delivery", delivery, "event", eventType)
|
||||
metrics.OutboundNotifyTotal.Inc("dropped")
|
||||
return
|
||||
}
|
||||
defer n.releaseSlot()
|
||||
|
||||
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
|
||||
host := safeHost(webhookURL)
|
||||
@@ -175,11 +244,13 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
|
||||
slog.Warn("notify: trigger webhook send failed",
|
||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||
"event", eventType, "signed", secret != "", "error", err)
|
||||
metrics.OutboundNotifyTotal.Inc("failure")
|
||||
return
|
||||
}
|
||||
slog.Info("notify: trigger webhook dispatched",
|
||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||
"event", eventType, "signed", secret != "")
|
||||
metrics.OutboundNotifyTotal.Inc("success")
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin/trigger/schedule"
|
||||
@@ -124,6 +125,7 @@ func (s *Scheduler) loop(ctx context.Context) {
|
||||
// TickOnce runs a single sweep. Exposed for tests and for the boot
|
||||
// kick. On error per-trigger the loop continues with the next row.
|
||||
func (s *Scheduler) TickOnce(ctx context.Context) {
|
||||
metrics.SchedulerTicksTotal.Inc()
|
||||
rows, err := s.store.ListTriggers("schedule")
|
||||
if err != nil {
|
||||
slog.Warn("scheduler: list triggers", "error", err)
|
||||
@@ -226,5 +228,6 @@ func (s *Scheduler) fire(ctx context.Context, t store.Trigger, now time.Time) {
|
||||
slog.Warn("scheduler: dispatch", "trigger", t.Name, "error", err)
|
||||
return
|
||||
}
|
||||
metrics.SchedulerDispatchedTotal.Inc()
|
||||
slog.Info("scheduler: fired", "trigger", t.Name, "kind", t.Kind, "at", ts)
|
||||
}
|
||||
|
||||
@@ -92,17 +92,27 @@ func (c *Compose) Ps(ctx context.Context, projectName, yamlPath string) ([]Servi
|
||||
}
|
||||
|
||||
// Logs runs `docker compose -p <projectName> logs --no-color --tail=<n> <service>`.
|
||||
// If service is empty, logs for all services are returned.
|
||||
// If service is empty, logs for all services are returned. The service arg
|
||||
// is preceded by `--` so a service name that begins with `-` cannot be
|
||||
// re-parsed as a flag by the docker CLI (flag-injection guard).
|
||||
func (c *Compose) Logs(ctx context.Context, projectName, service string, tail int) (string, error) {
|
||||
args := []string{"logs", "--no-color", fmt.Sprintf("--tail=%d", tail)}
|
||||
if service != "" {
|
||||
args = append(args, service)
|
||||
args = append(args, "--", service)
|
||||
}
|
||||
return c.run(ctx, projectName, args...)
|
||||
}
|
||||
|
||||
// run executes `docker compose -p <projectName> <args...>` and returns combined output.
|
||||
// run executes `docker compose -p <projectName> <args...>` and returns
|
||||
// combined output. projectName is verified not to begin with `-` because
|
||||
// `docker compose -p '--foo'` would otherwise be re-parsed as a flag —
|
||||
// the callers already sanitize project names through projectNameSanitizer,
|
||||
// but a belt-and-braces refusal here means any future caller cannot
|
||||
// accidentally bypass the sanitizer.
|
||||
func (c *Compose) run(ctx context.Context, projectName string, args ...string) (string, error) {
|
||||
if projectName == "" || strings.HasPrefix(projectName, "-") {
|
||||
return "", fmt.Errorf("docker compose: refusing project name %q", projectName)
|
||||
}
|
||||
full := append([]string{"compose", "-p", projectName}, args...)
|
||||
cmd := exec.CommandContext(ctx, c.binary, full...)
|
||||
var buf bytes.Buffer
|
||||
|
||||
+146
-6
@@ -2,6 +2,7 @@ package stack
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@@ -15,11 +16,25 @@ type ComposeSpec struct {
|
||||
}
|
||||
|
||||
// ServiceSpec captures the subset of compose service fields we inspect.
|
||||
//
|
||||
// All host-escape-adjacent fields are decoded here even though Tinyforge
|
||||
// itself never reads them at runtime — surfacing them to Validate() is the
|
||||
// only way to *reject* them. Add new fields here when blocking a new
|
||||
// escape vector.
|
||||
type ServiceSpec struct {
|
||||
Image string `yaml:"image,omitempty"`
|
||||
Ports []any `yaml:"ports,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Privileged bool `yaml:"privileged,omitempty"`
|
||||
Image string `yaml:"image,omitempty"`
|
||||
Build any `yaml:"build,omitempty"` // banned — see Validate
|
||||
Ports []any `yaml:"ports,omitempty"`
|
||||
Labels map[string]string `yaml:"labels,omitempty"`
|
||||
Privileged bool `yaml:"privileged,omitempty"`
|
||||
Volumes []any `yaml:"volumes,omitempty"`
|
||||
NetworkMode string `yaml:"network_mode,omitempty"`
|
||||
Pid string `yaml:"pid,omitempty"`
|
||||
Ipc string `yaml:"ipc,omitempty"`
|
||||
UsernsMode string `yaml:"userns_mode,omitempty"`
|
||||
CapAdd []string `yaml:"cap_add,omitempty"`
|
||||
Devices []any `yaml:"devices,omitempty"`
|
||||
SecurityOpt []string `yaml:"security_opt,omitempty"`
|
||||
}
|
||||
|
||||
// Parse decodes YAML into a ComposeSpec. Returns a descriptive error on failure.
|
||||
@@ -35,10 +50,20 @@ func Parse(yamlText string) (ComposeSpec, error) {
|
||||
}
|
||||
|
||||
// Validate enforces Tinyforge-level constraints beyond compose schema validity.
|
||||
// All blocked fields below are documented host-escape vectors: any one of
|
||||
// them on its own gives the container root on the host. Tinyforge already
|
||||
// owns the docker socket, so the threat model is "any admin == host root,"
|
||||
// and these blocks raise the bar for any *future* viewer-to-admin
|
||||
// escalation as well as honest-mistake guardrails.
|
||||
//
|
||||
// Current rules:
|
||||
// - No service may set `privileged: true`.
|
||||
// - Every service must declare an image (compose supports build: too, but
|
||||
// Tinyforge v1 disallows building from context to avoid arbitrary-code exec).
|
||||
// - Every service must declare an image (build contexts disallowed).
|
||||
// - No host-IPC / host-PID / host-userns / host networking.
|
||||
// - No `cap_add`, `security_opt`, `devices`.
|
||||
// - `volumes` may not bind-mount the docker socket, /, /etc, /var, /proc,
|
||||
// /sys, /root, or /home — list is conservative; operators with real
|
||||
// bind-mount needs should ship a Source plugin or a dedicated wizard.
|
||||
func Validate(spec ComposeSpec) error {
|
||||
for name, svc := range spec.Services {
|
||||
if svc.Privileged {
|
||||
@@ -47,6 +72,121 @@ func Validate(spec ComposeSpec) error {
|
||||
if svc.Image == "" {
|
||||
return fmt.Errorf("service %q: image is required (build contexts not supported)", name)
|
||||
}
|
||||
if svc.Build != nil {
|
||||
return fmt.Errorf("service %q: build: is not supported (use image:)", name)
|
||||
}
|
||||
if isBlockedNamespaceMode(svc.NetworkMode) {
|
||||
return fmt.Errorf("service %q: network_mode %q is not allowed", name, svc.NetworkMode)
|
||||
}
|
||||
if isBlockedNamespaceMode(svc.Pid) {
|
||||
return fmt.Errorf("service %q: pid: %q is not allowed", name, svc.Pid)
|
||||
}
|
||||
if isBlockedNamespaceMode(svc.Ipc) {
|
||||
return fmt.Errorf("service %q: ipc: %q is not allowed", name, svc.Ipc)
|
||||
}
|
||||
if isHostMode(svc.UsernsMode) {
|
||||
return fmt.Errorf("service %q: userns_mode %q is not allowed", name, svc.UsernsMode)
|
||||
}
|
||||
if len(svc.CapAdd) > 0 {
|
||||
return fmt.Errorf("service %q: cap_add is not allowed", name)
|
||||
}
|
||||
if len(svc.SecurityOpt) > 0 {
|
||||
return fmt.Errorf("service %q: security_opt is not allowed", name)
|
||||
}
|
||||
if len(svc.Devices) > 0 {
|
||||
return fmt.Errorf("service %q: devices is not allowed", name)
|
||||
}
|
||||
for _, v := range svc.Volumes {
|
||||
if host, ok := bindMountHostPath(v); ok {
|
||||
if isBlockedBindMount(host) {
|
||||
return fmt.Errorf("service %q: bind-mounting %q is not allowed", name, host)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isHostMode reports a host-namespace share, i.e. network_mode / pid / ipc /
|
||||
// userns_mode set to "host". (It deliberately does NOT match "host-gateway",
|
||||
// which is an extra_hosts value, not a namespace mode — matching it here only
|
||||
// produced misleading rejections.)
|
||||
func isHostMode(v string) bool {
|
||||
return v == "host"
|
||||
}
|
||||
|
||||
// isBlockedNamespaceMode reports a namespace mode that must be rejected for
|
||||
// network_mode / pid / ipc: either host sharing ("host") or joining another
|
||||
// container's / compose service's namespace ("container:<id>",
|
||||
// "service:<name>"). The container/service joins are a lateral-movement and
|
||||
// sandbox-escape vector — a malicious service could attach to a victim
|
||||
// container's network or PID namespace.
|
||||
func isBlockedNamespaceMode(v string) bool {
|
||||
return isHostMode(v) ||
|
||||
strings.HasPrefix(v, "container:") ||
|
||||
strings.HasPrefix(v, "service:")
|
||||
}
|
||||
|
||||
// bindMountHostPath extracts the host-side path from a compose volume
|
||||
// declaration. Compose accepts two shapes: a short string "src:dst[:mode]"
|
||||
// and a long form map with a "source" key. Returns ok=false for named
|
||||
// volumes (no host source).
|
||||
func bindMountHostPath(v any) (string, bool) {
|
||||
switch t := v.(type) {
|
||||
case string:
|
||||
// "named:/in/container" has no '/' or '.' prefix on the source.
|
||||
if t == "" {
|
||||
return "", false
|
||||
}
|
||||
parts := strings.SplitN(t, ":", 3)
|
||||
src := parts[0]
|
||||
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
|
||||
return src, true
|
||||
}
|
||||
return "", false
|
||||
case map[string]any:
|
||||
if typ, _ := t["type"].(string); typ != "" && typ != "bind" {
|
||||
return "", false
|
||||
}
|
||||
if src, ok := t["source"].(string); ok {
|
||||
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
|
||||
return src, true
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// isBlockedBindMount returns true for paths that obviously escape the
|
||||
// container's intended sandbox. Conservative deny-list — operators with
|
||||
// legitimate bind-mount needs should write a dedicated Source plugin
|
||||
// rather than tunnel them through compose.
|
||||
func isBlockedBindMount(host string) bool {
|
||||
// Normalize trailing slash so "/var" and "/var/" both match.
|
||||
clean := strings.TrimRight(host, "/")
|
||||
if clean == "" || clean == "/" {
|
||||
return true
|
||||
}
|
||||
// Relative ("./x", "../x", ".") and home-relative ("~/...") sources are
|
||||
// resolved by Docker against the compose working directory (which
|
||||
// Tinyforge controls and never intends as a host-bind source) or left
|
||||
// unexpanded — and "../" can climb out of that directory entirely. The
|
||||
// absolute-prefix deny-list below can't see these, so reject them
|
||||
// outright rather than give a false sense of coverage.
|
||||
if strings.HasPrefix(clean, ".") || strings.HasPrefix(clean, "~") {
|
||||
return true
|
||||
}
|
||||
// Specific blocked files / sockets.
|
||||
switch clean {
|
||||
case "/var/run/docker.sock", "/run/docker.sock":
|
||||
return true
|
||||
}
|
||||
// Blocked prefixes (cover sub-paths too).
|
||||
blocked := []string{"/etc", "/var", "/proc", "/sys", "/root", "/home", "/boot", "/dev"}
|
||||
for _, p := range blocked {
|
||||
if clean == p || strings.HasPrefix(clean, p+"/") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -50,34 +50,7 @@ func ValidateBaseURL(raw string) error {
|
||||
func NewSafeHTTPClient(timeout time.Duration) *http.Client {
|
||||
dialer := &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second}
|
||||
transport := &http.Transport{
|
||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
host, port, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// If the caller passed a literal IP, skip the DNS round-trip.
|
||||
if literal := net.ParseIP(host); literal != nil {
|
||||
if reason := blockReason(literal); reason != "" {
|
||||
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, literal, reason)
|
||||
}
|
||||
return dialer.DialContext(ctx, network, addr)
|
||||
}
|
||||
ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(ips) == 0 {
|
||||
return nil, fmt.Errorf("no addresses for %s", host)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if reason := blockReason(ip.IP); reason != "" {
|
||||
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, ip.IP, reason)
|
||||
}
|
||||
}
|
||||
// Bind to the first resolved IP so a rebind between resolution
|
||||
// and connect cannot redirect the request to a blocked address.
|
||||
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
|
||||
},
|
||||
DialContext: SafeDialContext(dialer),
|
||||
MaxIdleConns: 16,
|
||||
IdleConnTimeout: 30 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
@@ -85,6 +58,43 @@ func NewSafeHTTPClient(timeout time.Duration) *http.Client {
|
||||
return &http.Client{Timeout: timeout, Transport: transport}
|
||||
}
|
||||
|
||||
// SafeDialContext returns a DialContext that rejects loopback, link-local,
|
||||
// multicast, unspecified, and cloud-metadata addresses at connect time,
|
||||
// re-resolving and binding to the resolved IP so a DNS rebind between
|
||||
// resolution and connect cannot slip through. Exposed so other transports
|
||||
// (e.g. the outbound notification client) can apply the same SSRF policy
|
||||
// without duplicating it or losing their own connection-pool tuning.
|
||||
func SafeDialContext(dialer *net.Dialer) func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
return func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||
host, port, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// If the caller passed a literal IP, skip the DNS round-trip.
|
||||
if literal := net.ParseIP(host); literal != nil {
|
||||
if reason := blockReason(literal); reason != "" {
|
||||
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, literal, reason)
|
||||
}
|
||||
return dialer.DialContext(ctx, network, addr)
|
||||
}
|
||||
ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(ips) == 0 {
|
||||
return nil, fmt.Errorf("no addresses for %s", host)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if reason := blockReason(ip.IP); reason != "" {
|
||||
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, ip.IP, reason)
|
||||
}
|
||||
}
|
||||
// Bind to the first resolved IP so a rebind between resolution
|
||||
// and connect cannot redirect the request to a blocked address.
|
||||
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
|
||||
}
|
||||
}
|
||||
|
||||
// blockReason returns a human label for why an IP is rejected, or ""
|
||||
// if the IP is allowed. Centralized so all callers share the same
|
||||
// policy.
|
||||
@@ -92,6 +102,13 @@ func blockReason(ip net.IP) string {
|
||||
if ip == nil {
|
||||
return "nil address"
|
||||
}
|
||||
// Normalize IPv4-mapped IPv6 (::ffff:x.x.x.x) so the loopback / link-local
|
||||
// classifiers below catch them. net.IP.To4() returns the 4-byte form for
|
||||
// IPv4-mapped addresses; net's IsLoopback already handles this, but pin
|
||||
// the conversion to avoid future surprises if the std-lib semantics drift.
|
||||
if v4 := ip.To4(); v4 != nil {
|
||||
ip = v4
|
||||
}
|
||||
switch {
|
||||
case ip.IsLoopback():
|
||||
return "loopback"
|
||||
@@ -104,5 +121,22 @@ func blockReason(ip net.IP) string {
|
||||
case ip.IsMulticast():
|
||||
return "multicast"
|
||||
}
|
||||
// Cloud metadata endpoints — AWS / GCP / Azure are covered by the
|
||||
// link-local block (169.254.169.254). The rest must be enumerated.
|
||||
if metadataIPSet[ip.String()] {
|
||||
return "cloud metadata endpoint"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// metadataIPSet enumerates well-known cloud metadata IPs that are NOT
|
||||
// covered by net.IP.IsLinkLocalUnicast. Updating this set is the lightest
|
||||
// way to keep up with new providers without changing the policy shape.
|
||||
var metadataIPSet = map[string]bool{
|
||||
// Alibaba Cloud ECS metadata.
|
||||
"100.100.100.200": true,
|
||||
// Oracle Cloud Infrastructure metadata.
|
||||
"192.0.0.192": true,
|
||||
// AWS IMDS over IPv6 (ULA — not link-local, must be listed).
|
||||
"fd00:ec2::254": true,
|
||||
}
|
||||
|
||||
@@ -234,17 +234,17 @@ func (c *Collector) sampleAll(ctx context.Context, targets []target) []store.Con
|
||||
found := make([]bool, len(targets))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
loop:
|
||||
for i, t := range targets {
|
||||
// Acquire the semaphore in the parent loop so ctx cancellation
|
||||
// short-circuits the queue rather than spawning goroutines that
|
||||
// block on an unreachable slot.
|
||||
// block on an unreachable slot. The labelled break exits the for
|
||||
// loop directly; a bare `break` inside `select` would only break
|
||||
// the select and let the loop continue.
|
||||
select {
|
||||
case sem <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
break
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
break loop
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(i int, t target) {
|
||||
|
||||
@@ -2,6 +2,7 @@ package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
@@ -9,6 +10,22 @@ import (
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// validateExtraJSON ensures the extra_json column never receives an
|
||||
// invalid JSON document. The codemap (docs/CODEMAPS/container-extra-json.md)
|
||||
// is explicit that readers tolerate unknown keys — but only if the value
|
||||
// is valid JSON at all. A buggy plugin writing `"not json"` would silently
|
||||
// break every reader, with no schema-level check to catch it. Guarding at
|
||||
// the store boundary keeps the invariant cheap and obvious.
|
||||
func validateExtraJSON(v string) error {
|
||||
if v == "" {
|
||||
return nil
|
||||
}
|
||||
if !json.Valid([]byte(v)) {
|
||||
return fmt.Errorf("extra_json: not valid JSON (%d bytes)", len(v))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerColumns is the canonical column list for `containers` queries.
|
||||
// stage_id is populated by the deployer for project containers (so ListProxyRoutes
|
||||
// survives stage renames) and left empty for stacks and sites.
|
||||
@@ -42,6 +59,9 @@ func (s *Store) CreateContainer(c Container) (Container, error) {
|
||||
if c.ExtraJSON == "" {
|
||||
c.ExtraJSON = "{}"
|
||||
}
|
||||
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||
return Container{}, err
|
||||
}
|
||||
|
||||
_, err := s.db.Exec(
|
||||
`INSERT INTO containers (`+containerColumns+`)
|
||||
@@ -77,6 +97,9 @@ func (s *Store) UpsertContainer(c Container) error {
|
||||
if c.ExtraJSON == "" {
|
||||
c.ExtraJSON = "{}"
|
||||
}
|
||||
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// SQLite UPSERT — INSERT...ON CONFLICT(id) DO UPDATE.
|
||||
_, err := s.db.Exec(
|
||||
@@ -129,6 +152,9 @@ func (s *Store) ReconcileContainer(c Container) error {
|
||||
if c.ExtraJSON == "" {
|
||||
c.ExtraJSON = "{}"
|
||||
}
|
||||
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// extra_json is deliberately NOT in the ON CONFLICT SET clause: the
|
||||
// reconciler can't observe per-face route IDs from Docker, and
|
||||
@@ -321,6 +347,9 @@ func (s *Store) UpdateContainer(c Container) error {
|
||||
if c.ExtraJSON == "" {
|
||||
c.ExtraJSON = "{}"
|
||||
}
|
||||
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||
return err
|
||||
}
|
||||
result, err := s.db.Exec(
|
||||
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, stage_id=?, container_id=?,
|
||||
image_ref=?, image_tag=?, host=?, state=?, port=?,
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ErrLockHeld is returned when another Tinyforge process appears to be
|
||||
// running against the same data directory. SQLite + SetMaxOpenConns(1)
|
||||
// makes this otherwise-silent collision a recipe for double-fired
|
||||
// schedulers, double-polled registries, and `extra_json` RMW corruption.
|
||||
var ErrLockHeld = errors.New("data directory is locked by another tinyforge process")
|
||||
|
||||
// Lockfile is a portable PID file. AcquireLockfile takes it; the returned
|
||||
// Release function removes it. The contract:
|
||||
//
|
||||
// - Lockfile is created with O_CREATE|O_EXCL — atomic on POSIX, atomic
|
||||
// on NTFS / ReFS via the equivalent.
|
||||
// - On collision, the existing file's PID is read; if the PID is dead,
|
||||
// we treat the lock as stale (process crashed without cleanup),
|
||||
// reclaim it, and proceed. Live PID → ErrLockHeld.
|
||||
// - flock is intentionally not used: cross-platform consistency wins
|
||||
// over advisory-lock semantics for the single-instance use case.
|
||||
type Lockfile struct {
|
||||
path string
|
||||
}
|
||||
|
||||
// AcquireLockfile creates a PID-file lock under dataDir. Returns a
|
||||
// Release function the caller must defer. If another live process holds
|
||||
// the lock, returns ErrLockHeld with a hint pointing at the lockfile.
|
||||
//
|
||||
// Reclaim atomicity: when the existing lockfile names a dead PID, the
|
||||
// replacement is serialized through an auxiliary reclaim lock (see
|
||||
// reclaimStaleLock) so that, of N processes booting concurrently against
|
||||
// the same stale lockfile, EXACTLY ONE reclaims it and the rest get
|
||||
// ErrLockHeld. A bare `os.Remove`+`O_EXCL` retry — or a rename, which is
|
||||
// "last-writer-wins" — cannot guarantee this: multiple reclaimers can each
|
||||
// end up believing they own the lock, defeating the single-instance guard.
|
||||
func AcquireLockfile(dataDir string) (release func(), err error) {
|
||||
path := filepath.Join(dataDir, "tinyforge.lock")
|
||||
|
||||
// First try: clean acquire.
|
||||
if rel, ok, err := tryCreateExclusive(path); ok {
|
||||
return rel, nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Existing lockfile — read PID and decide whether to reclaim.
|
||||
pid, readErr := readLockPID(path)
|
||||
if readErr == nil && processAlive(pid) {
|
||||
return nil, fmt.Errorf("%w (held by pid %d, lockfile=%s)", ErrLockHeld, pid, path)
|
||||
}
|
||||
// Stale lock (dead pid) or malformed file — reclaim under serialization.
|
||||
reason := "malformed existing lockfile"
|
||||
if readErr == nil {
|
||||
reason = fmt.Sprintf("stale lockfile (dead pid %d)", pid)
|
||||
}
|
||||
return reclaimStaleLock(path, reason)
|
||||
}
|
||||
|
||||
// tryCreateExclusive attempts an atomic O_CREATE|O_EXCL create at path.
|
||||
// Returns (release, true, nil) on success; (nil, false, nil) when the
|
||||
// file already exists; (nil, false, err) on any other error.
|
||||
func tryCreateExclusive(path string) (func(), bool, error) {
|
||||
f, openErr := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||
if openErr != nil {
|
||||
if os.IsExist(openErr) {
|
||||
return nil, false, nil
|
||||
}
|
||||
return nil, false, fmt.Errorf("open lockfile: %w", openErr)
|
||||
}
|
||||
if _, err := fmt.Fprintf(f, "%d\n", os.Getpid()); err != nil {
|
||||
_ = f.Close()
|
||||
_ = os.Remove(path)
|
||||
return nil, false, fmt.Errorf("write lockfile: %w", err)
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
_ = os.Remove(path)
|
||||
return nil, false, fmt.Errorf("close lockfile: %w", err)
|
||||
}
|
||||
return func() { _ = os.Remove(path) }, true, nil
|
||||
}
|
||||
|
||||
// reclaimStaleLock replaces a stale/malformed lockfile with one holding our
|
||||
// PID, serialized by an auxiliary reclaim lock. Holding the reclaim lock
|
||||
// (O_EXCL) guarantees that only one process performs the remove-and-recreate
|
||||
// of the main lockfile at a time, so concurrent reclaimers cannot each end
|
||||
// up "owning" the lock the way a rename or unguarded remove+create would
|
||||
// allow. The reclaim lock is itself liveness-checked so a reclaimer that
|
||||
// crashed mid-reclaim cannot wedge startup forever.
|
||||
func reclaimStaleLock(lockPath, reason string) (func(), error) {
|
||||
reclaimPath := lockPath + ".reclaim"
|
||||
if err := acquireReclaimLock(reclaimPath); err != nil {
|
||||
return nil, fmt.Errorf("%w (%v; %s)", ErrLockHeld, err, reason)
|
||||
}
|
||||
defer func() { _ = os.Remove(reclaimPath) }()
|
||||
|
||||
// Serialized now. Re-check the main lock: another process may have fully
|
||||
// reclaimed it between our liveness probe and our taking the reclaim lock.
|
||||
if pid, perr := readLockPID(lockPath); perr == nil && processAlive(pid) {
|
||||
return nil, fmt.Errorf("%w (reclaimed by pid %d while we waited; %s)",
|
||||
ErrLockHeld, pid, reason)
|
||||
}
|
||||
|
||||
// Safe to replace: remove the stale file, then create a fresh exclusive
|
||||
// one. Both run while we hold the reclaim lock, so no other reclaimer can
|
||||
// observe the gap.
|
||||
if err := os.Remove(lockPath); err != nil && !os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("%w (could not remove stale lockfile %s: %v; %s)",
|
||||
ErrLockHeld, lockPath, err, reason)
|
||||
}
|
||||
rel, ok, err := tryCreateExclusive(lockPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !ok {
|
||||
// Should be impossible while we hold the reclaim lock; fail safe.
|
||||
return nil, fmt.Errorf("%w (lockfile reappeared during reclaim of %s; %s)",
|
||||
ErrLockHeld, lockPath, reason)
|
||||
}
|
||||
return rel, nil
|
||||
}
|
||||
|
||||
// acquireReclaimLock takes the auxiliary reclaim lock with O_EXCL. An
|
||||
// existing reclaim lock is honoured only while its recorded PID is alive (a
|
||||
// genuine concurrent reclaim); a stale one (dead/foreign PID) is removed once
|
||||
// and re-attempted so a crashed reclaimer cannot block boot indefinitely. Of
|
||||
// concurrent callers, O_EXCL ensures at most one acquires it; the rest fail
|
||||
// and back off to ErrLockHeld.
|
||||
func acquireReclaimLock(reclaimPath string) error {
|
||||
for attempt := 0; attempt < 2; attempt++ {
|
||||
f, err := os.OpenFile(reclaimPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||
if err == nil {
|
||||
if _, werr := fmt.Fprintf(f, "%d\n", os.Getpid()); werr != nil {
|
||||
_ = f.Close()
|
||||
_ = os.Remove(reclaimPath)
|
||||
return fmt.Errorf("write reclaim lock %s: %v", reclaimPath, werr)
|
||||
}
|
||||
return f.Close()
|
||||
}
|
||||
if !os.IsExist(err) {
|
||||
return fmt.Errorf("create reclaim lock %s: %v", reclaimPath, err)
|
||||
}
|
||||
// Reclaim lock present. A live owner means a real concurrent reclaim.
|
||||
if pid, perr := readLockPID(reclaimPath); perr == nil && processAlive(pid) {
|
||||
return fmt.Errorf("concurrent reclaim in progress (pid %d)", pid)
|
||||
}
|
||||
// Stale reclaim lock — clear it and retry the exclusive create once.
|
||||
if rerr := os.Remove(reclaimPath); rerr != nil && !os.IsNotExist(rerr) {
|
||||
return fmt.Errorf("remove stale reclaim lock %s: %v", reclaimPath, rerr)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("could not acquire reclaim lock %s after retry", reclaimPath)
|
||||
}
|
||||
|
||||
func readLockPID(path string) (int, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pidStr := strings.TrimSpace(string(data))
|
||||
if pidStr == "" {
|
||||
return 0, errors.New("empty lockfile")
|
||||
}
|
||||
return strconv.Atoi(pidStr)
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAcquireLockfile_FreshDir(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
release, err := AcquireLockfile(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("AcquireLockfile: %v", err)
|
||||
}
|
||||
defer release()
|
||||
|
||||
// Lockfile should exist with our PID.
|
||||
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
|
||||
if err != nil {
|
||||
t.Fatalf("read lockfile: %v", err)
|
||||
}
|
||||
want := fmt.Sprintf("%d\n", os.Getpid())
|
||||
if string(data) != want {
|
||||
t.Errorf("lockfile content = %q, want %q", data, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcquireLockfile_HeldByLivePID_Refused(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// Plant a lockfile holding the current PID (which is obviously alive).
|
||||
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||
[]byte(fmt.Sprintf("%d\n", os.Getpid())), 0o600); err != nil {
|
||||
t.Fatalf("plant lockfile: %v", err)
|
||||
}
|
||||
release, err := AcquireLockfile(dir)
|
||||
if err == nil {
|
||||
release()
|
||||
t.Fatal("expected ErrLockHeld, got nil")
|
||||
}
|
||||
if !errors.Is(err, ErrLockHeld) {
|
||||
t.Errorf("error = %v, want wrap of ErrLockHeld", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcquireLockfile_StalePID_Reclaimed(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// PID 1 is init/launchd/systemd on POSIX and the System Idle Process
|
||||
// on Windows — never our process, and very unlikely to be dead. We
|
||||
// use a deliberately-impossible PID instead: a 31-bit value far
|
||||
// above any plausible system maximum.
|
||||
stalePID := 2147483640
|
||||
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
|
||||
t.Fatalf("plant stale lockfile: %v", err)
|
||||
}
|
||||
release, err := AcquireLockfile(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("expected reclaim of stale lock, got: %v", err)
|
||||
}
|
||||
defer release()
|
||||
|
||||
// Verify it now holds OUR pid, not the stale one.
|
||||
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
|
||||
if err != nil {
|
||||
t.Fatalf("read lockfile after reclaim: %v", err)
|
||||
}
|
||||
want := fmt.Sprintf("%d\n", os.Getpid())
|
||||
if string(data) != want {
|
||||
t.Errorf("lockfile content after reclaim = %q, want %q", data, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcquireLockfile_ConcurrentReclaim_SingleWinner(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// Plant a stale lockfile (impossibly high, certainly-dead PID), then have
|
||||
// many goroutines race to reclaim it. Exactly one must win; the rest must
|
||||
// be refused with ErrLockHeld. A "last-writer-wins" reclaim would let
|
||||
// several goroutines all believe they own the lock.
|
||||
stalePID := 2147483640
|
||||
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
|
||||
t.Fatalf("plant stale lockfile: %v", err)
|
||||
}
|
||||
|
||||
const n = 16
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
mu sync.Mutex
|
||||
winners int
|
||||
releases []func()
|
||||
)
|
||||
start := make(chan struct{})
|
||||
for i := 0; i < n; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-start
|
||||
release, err := AcquireLockfile(dir)
|
||||
if err != nil {
|
||||
if !errors.Is(err, ErrLockHeld) {
|
||||
t.Errorf("loser error = %v, want wrap of ErrLockHeld", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
mu.Lock()
|
||||
winners++
|
||||
releases = append(releases, release)
|
||||
mu.Unlock()
|
||||
}()
|
||||
}
|
||||
close(start)
|
||||
wg.Wait()
|
||||
|
||||
for _, r := range releases {
|
||||
r()
|
||||
}
|
||||
if winners != 1 {
|
||||
t.Fatalf("concurrent reclaim winners = %d, want exactly 1", winners)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAcquireLockfile_ReleaseRemovesFile(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
release, err := AcquireLockfile(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("AcquireLockfile: %v", err)
|
||||
}
|
||||
release()
|
||||
|
||||
path := filepath.Join(dir, "tinyforge.lock")
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
t.Errorf("lockfile still present after release: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
//go:build !windows
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// processAlive checks whether the given PID belongs to a running process.
|
||||
// On POSIX, kill(pid, 0) sends no signal but returns ESRCH if the PID is
|
||||
// dead, EPERM if alive-but-foreign-owned (still "alive" for our purposes).
|
||||
//
|
||||
// os.FindProcess never returns a non-nil error on Linux / macOS / *BSD
|
||||
// for any PID value — it just records the integer. The probe is purely
|
||||
// the Signal(0) result. We keep the FindProcess call to obtain the
|
||||
// *os.Process handle Signal needs; we don't branch on its error.
|
||||
func processAlive(pid int) bool {
|
||||
if pid <= 0 {
|
||||
return false
|
||||
}
|
||||
proc, _ := os.FindProcess(pid)
|
||||
if proc == nil {
|
||||
return false
|
||||
}
|
||||
err := proc.Signal(syscall.Signal(0))
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
// EPERM = alive but not ours; ESRCH = dead.
|
||||
return errors.Is(err, os.ErrPermission) || errors.Is(err, syscall.EPERM)
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
//go:build windows
|
||||
|
||||
package store
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
// processAlive returns true when the given PID is currently held by a
|
||||
// running Windows process. OpenProcess with PROCESS_QUERY_LIMITED_INFORMATION
|
||||
// is the supported way to check liveness without elevation.
|
||||
func processAlive(pid int) bool {
|
||||
if pid <= 0 {
|
||||
return false
|
||||
}
|
||||
h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer windows.CloseHandle(h)
|
||||
var exitCode uint32
|
||||
if err := windows.GetExitCodeProcess(h, &exitCode); err != nil {
|
||||
// Conservative: if we can't ask, assume alive so we don't reclaim
|
||||
// an active lock. Worst case the operator sees ErrLockHeld and
|
||||
// removes the lockfile by hand.
|
||||
return true
|
||||
}
|
||||
const stillActive = 259 // STILL_ACTIVE
|
||||
return exitCode == stillActive
|
||||
}
|
||||
@@ -278,12 +278,20 @@ const (
|
||||
// containers.workload_kind and workloads.kind. After the hard cutover the
|
||||
// backing project / stack / static_site tables are gone — these constants
|
||||
// are just strings used to filter the unified containers index in the UI.
|
||||
//
|
||||
// `build` is the dockerfile-source kind: a container built from a
|
||||
// Dockerfile in a Git repo. Operationally it looks like a site (one
|
||||
// container, one optional public face) but its origin is the build
|
||||
// pipeline, not a static-asset extract. Dashboard filters that need to
|
||||
// distinguish "I built this from source" from "I served files from a
|
||||
// repo" should key on this value.
|
||||
type WorkloadKind string
|
||||
|
||||
const (
|
||||
WorkloadKindProject WorkloadKind = "project"
|
||||
WorkloadKindStack WorkloadKind = "stack"
|
||||
WorkloadKindSite WorkloadKind = "site"
|
||||
WorkloadKindBuild WorkloadKind = "build"
|
||||
)
|
||||
|
||||
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
|
||||
@@ -316,6 +324,31 @@ type Workload struct {
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// WorkloadNotification is one configured outbound notification route for
|
||||
// a workload. Multiple rows per workload model the "one Slack channel
|
||||
// for failures, one Discord webhook for successes" routing the legacy
|
||||
// single notification_url column could not express.
|
||||
//
|
||||
// EventTypes is a comma-separated allow-list (e.g. "build_failure" or
|
||||
// "deploy_success,deploy_failure"). An empty EventTypes means the row
|
||||
// fires for every event type — the cheapest way to keep the existing
|
||||
// single-destination behaviour expressible in the new shape.
|
||||
//
|
||||
// Secret round-trips through the same crypto envelope as other stored
|
||||
// secrets; the API layer strips it from responses.
|
||||
type WorkloadNotification struct {
|
||||
ID string `json:"id"`
|
||||
WorkloadID string `json:"workload_id"`
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
Secret string `json:"-"`
|
||||
EventTypes string `json:"event_types"`
|
||||
Enabled bool `json:"enabled"`
|
||||
SortOrder int `json:"sort_order"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
UpdatedAt string `json:"updated_at"`
|
||||
}
|
||||
|
||||
// Container is the normalized index of every Tinyforge-managed container.
|
||||
// Replaces the project-specific Instance table after migration. Subdomain/
|
||||
// proxy fields are hoisted as first-class columns because ListProxyRoutes,
|
||||
|
||||
+232
-2
@@ -55,11 +55,20 @@ func New(dbPath string) (*Store, error) {
|
||||
db.SetMaxOpenConns(1)
|
||||
db.SetConnMaxLifetime(0)
|
||||
|
||||
// Enable WAL mode and foreign keys for better concurrency and referential integrity.
|
||||
// Enable WAL mode and foreign keys for better concurrency and
|
||||
// referential integrity. `synchronous=NORMAL` pairs with WAL to skip
|
||||
// the per-write fsync — the OS still flushes on checkpoint, durability
|
||||
// is preserved across clean shutdowns, and crashes lose at most the
|
||||
// last few committed transactions (acceptable for a tinyforge box).
|
||||
// cache_size=-20000 = 20 MiB page cache, temp_store=MEMORY keeps
|
||||
// indexer scratch off disk; both are pure perf knobs.
|
||||
pragmas := []string{
|
||||
"PRAGMA journal_mode=WAL",
|
||||
"PRAGMA synchronous=NORMAL",
|
||||
"PRAGMA foreign_keys=ON",
|
||||
"PRAGMA busy_timeout=5000",
|
||||
"PRAGMA cache_size=-20000",
|
||||
"PRAGMA temp_store=MEMORY",
|
||||
}
|
||||
for _, p := range pragmas {
|
||||
if _, err := db.Exec(p); err != nil {
|
||||
@@ -284,6 +293,24 @@ func (s *Store) runMigrations() error {
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// workload_notifications: per-workload notification destinations.
|
||||
// Each row is one route (Slack channel, Discord webhook, generic
|
||||
// receiver, ...). event_types is a comma-separated allow-list —
|
||||
// empty means "all events". When zero rows exist for a workload
|
||||
// the dispatcher falls back to the legacy single notification_url
|
||||
// column on workloads so existing setups keep working unchanged.
|
||||
`CREATE TABLE IF NOT EXISTS workload_notifications (
|
||||
id TEXT PRIMARY KEY,
|
||||
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
secret TEXT NOT NULL DEFAULT '',
|
||||
event_types TEXT NOT NULL DEFAULT '',
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
sort_order INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`,
|
||||
// workload_trigger_bindings: many-to-many between workloads and
|
||||
// triggers. binding_config is the per-binding override applied on
|
||||
// top of trigger.config (top-level JSON merge, binding wins).
|
||||
@@ -427,6 +454,7 @@ func (s *Store) runMigrations() error {
|
||||
`CREATE UNIQUE INDEX IF NOT EXISTS idx_triggers_webhook_secret ON triggers(webhook_secret) WHERE webhook_secret != ''`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_bindings_workload ON workload_trigger_bindings(workload_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_bindings_trigger ON workload_trigger_bindings(trigger_id)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_workload_notifs_workload ON workload_notifications(workload_id)`,
|
||||
}
|
||||
for _, idx := range indexes {
|
||||
if _, err := s.db.Exec(idx); err != nil {
|
||||
@@ -434,13 +462,215 @@ func (s *Store) runMigrations() error {
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.backfillTriggersFromWorkloads(); err != nil {
|
||||
// schema_versions table gates one-shot data migrations like the
|
||||
// trigger backfill below. Without this, the backfill scan ran on
|
||||
// every boot even on fully-migrated DBs — wasted I/O and (more
|
||||
// importantly) made it impossible to tell whether a "no rows
|
||||
// processed" was a clean state or a missed-migration bug.
|
||||
if _, err := s.db.Exec(`CREATE TABLE IF NOT EXISTS schema_versions (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)`); err != nil {
|
||||
return fmt.Errorf("create schema_versions: %w", err)
|
||||
}
|
||||
|
||||
if err := s.runOnce(1, "trigger backfill", s.backfillTriggersFromWorkloads); err != nil {
|
||||
// Backfill failure is non-fatal — we log and let the operator
|
||||
// retry. The version is only recorded on success.
|
||||
slog.Warn("trigger backfill", "error", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// runOnce executes fn at most one time per database lifetime, recording
|
||||
// success in schema_versions. Useful for data migrations whose source
|
||||
// table eventually disappears (so re-running becomes pointless or
|
||||
// dangerous).
|
||||
func (s *Store) runOnce(version int, label string, fn func() error) error {
|
||||
var applied int
|
||||
if err := s.db.QueryRow(`SELECT COUNT(*) FROM schema_versions WHERE version = ?`, version).Scan(&applied); err != nil {
|
||||
return fmt.Errorf("check %s: %w", label, err)
|
||||
}
|
||||
if applied > 0 {
|
||||
return nil
|
||||
}
|
||||
if err := fn(); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.db.Exec(`INSERT INTO schema_versions (version) VALUES (?)`, version); err != nil {
|
||||
return fmt.Errorf("mark %s applied: %w", label, err)
|
||||
}
|
||||
slog.Info("schema migration applied", "version", version, "label", label)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RunOnce is the public counterpart of runOnce, exposed so cmd/server can
|
||||
// gate post-store-open migrations (e.g. crypto re-encryption that needs
|
||||
// the ENCRYPTION_KEY which Store does not own) through the same
|
||||
// schema_versions ledger.
|
||||
func (s *Store) RunOnce(version int, label string, fn func() error) error {
|
||||
return s.runOnce(version, label, fn)
|
||||
}
|
||||
|
||||
// EnvelopeMigrator describes the contract a crypto package implements to
|
||||
// rewrite legacy unprefixed-hex ciphertext as versioned envelope values.
|
||||
// hasEnvelope reports whether a value already carries the new prefix.
|
||||
// decrypt returns plaintext for either form; encrypt always produces the
|
||||
// new envelope form. By accepting closures the store stays free of any
|
||||
// import on internal/crypto, mirroring the rest of the package layout.
|
||||
type EnvelopeMigrator struct {
|
||||
HasEnvelope func(value string) bool
|
||||
Decrypt func(ciphertext string) (string, error)
|
||||
Encrypt func(plaintext string) (string, error)
|
||||
}
|
||||
|
||||
// MigrateSecretsToEnvelope walks every column known to carry an encrypted
|
||||
// secret and rewrites legacy unprefixed-hex values into the new
|
||||
// envelope form using the current encryption key.
|
||||
//
|
||||
// Behaviour, per-row:
|
||||
// - empty value → skip (no secret stored)
|
||||
// - already-envelope value → skip (already migrated)
|
||||
// - decrypt fails → skip (value is either plaintext from a v0 boot
|
||||
// OR ciphertext from a rotated key; either way we cannot safely
|
||||
// re-encrypt and leaving it alone preserves the existing read
|
||||
// semantics)
|
||||
// - decrypt succeeds → encrypt to envelope form + UPDATE
|
||||
//
|
||||
// The whole sweep runs in a single transaction so a power-loss
|
||||
// mid-migration leaves the DB in either the pre- or post-migration
|
||||
// state, never half. Idempotent via schema_versions version 2 — the
|
||||
// next boot is a no-op.
|
||||
//
|
||||
// Columns covered:
|
||||
// - settings.npm_password
|
||||
// - settings.cloudflare_api_token
|
||||
// - auth_settings.oidc_client_secret
|
||||
// - registries.token
|
||||
// - workload_env.value WHERE encrypted=1
|
||||
func (s *Store) MigrateSecretsToEnvelope(m EnvelopeMigrator) error {
|
||||
return s.runOnce(2, "secrets envelope migration", func() error {
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin: %w", err)
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
// Single-row tables (settings, auth_settings) — read-update inline.
|
||||
singleRowColumns := []struct {
|
||||
table, column string
|
||||
}{
|
||||
{"settings", "npm_password"},
|
||||
{"settings", "cloudflare_api_token"},
|
||||
{"auth_settings", "oidc_client_secret"},
|
||||
}
|
||||
for _, c := range singleRowColumns {
|
||||
var v string
|
||||
err := tx.QueryRow(
|
||||
fmt.Sprintf(`SELECT %s FROM %s LIMIT 1`, c.column, c.table),
|
||||
).Scan(&v)
|
||||
if err != nil {
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
continue
|
||||
}
|
||||
// auth_settings may not exist on a brand-new DB until
|
||||
// the OIDC code touches it; treat as nothing-to-migrate.
|
||||
slog.Debug("envelope migration: column read skipped",
|
||||
"table", c.table, "column", c.column, "error", err)
|
||||
continue
|
||||
}
|
||||
migrated, ok := tryMigrate(m, v)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, err := tx.Exec(
|
||||
fmt.Sprintf(`UPDATE %s SET %s = ?`, c.table, c.column),
|
||||
migrated,
|
||||
); err != nil {
|
||||
return fmt.Errorf("update %s.%s: %w", c.table, c.column, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-row: registries.token
|
||||
if err := migrateRowColumn(tx, m,
|
||||
`SELECT id, token FROM registries WHERE token != ''`,
|
||||
`UPDATE registries SET token = ? WHERE id = ?`,
|
||||
); err != nil {
|
||||
return fmt.Errorf("registries.token: %w", err)
|
||||
}
|
||||
|
||||
// Multi-row: workload_env.value WHERE encrypted=1
|
||||
if err := migrateRowColumn(tx, m,
|
||||
`SELECT id, value FROM workload_env WHERE encrypted = 1 AND value != ''`,
|
||||
`UPDATE workload_env SET value = ? WHERE id = ?`,
|
||||
); err != nil {
|
||||
return fmt.Errorf("workload_env.value: %w", err)
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("commit: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// migrateRowColumn applies the envelope rewrite to every (id, value)
|
||||
// pair returned by selectQ. updateQ takes (newValue, id) as parameters.
|
||||
// Each row is its own attempt; one row failing migration (decrypt fail)
|
||||
// does not abort the others.
|
||||
func migrateRowColumn(tx *sql.Tx, m EnvelopeMigrator, selectQ, updateQ string) error {
|
||||
rows, err := tx.Query(selectQ)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
type pending struct{ id, newValue string }
|
||||
var updates []pending
|
||||
for rows.Next() {
|
||||
var id, value string
|
||||
if err := rows.Scan(&id, &value); err != nil {
|
||||
return err
|
||||
}
|
||||
newValue, ok := tryMigrate(m, value)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
updates = append(updates, pending{id, newValue})
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, u := range updates {
|
||||
if _, err := tx.Exec(updateQ, u.newValue, u.id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// tryMigrate returns the envelope-form ciphertext + true when the input
|
||||
// is a legacy unprefixed value that decrypts successfully with the
|
||||
// current key. Returns ("", false) for anything else: empty, already
|
||||
// envelope, plaintext, or decrypt-failed (rotated-key case).
|
||||
func tryMigrate(m EnvelopeMigrator, v string) (string, bool) {
|
||||
if v == "" {
|
||||
return "", false
|
||||
}
|
||||
if m.HasEnvelope(v) {
|
||||
return "", false
|
||||
}
|
||||
plaintext, err := m.Decrypt(v)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
enc, err := m.Encrypt(plaintext)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return enc, true
|
||||
}
|
||||
|
||||
// backfillTriggersFromWorkloads converts embedded trigger config on
|
||||
// workload rows into standalone trigger + binding rows. Runs once per
|
||||
// boot and is idempotent — only workloads with non-empty trigger_kind
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
const workloadNotificationColumns = `id, workload_id, name, url, secret,
|
||||
event_types, enabled, sort_order, created_at, updated_at`
|
||||
|
||||
func scanWorkloadNotification(scanner interface{ Scan(...any) error }) (WorkloadNotification, error) {
|
||||
var n WorkloadNotification
|
||||
var enabled int
|
||||
err := scanner.Scan(
|
||||
&n.ID, &n.WorkloadID, &n.Name, &n.URL, &n.Secret,
|
||||
&n.EventTypes, &enabled, &n.SortOrder, &n.CreatedAt, &n.UpdatedAt,
|
||||
)
|
||||
n.Enabled = enabled != 0
|
||||
return n, err
|
||||
}
|
||||
|
||||
// CreateWorkloadNotification inserts a notification route. Returns the
|
||||
// populated row (with assigned id + timestamps) so callers don't need to
|
||||
// follow up with a Get.
|
||||
func (s *Store) CreateWorkloadNotification(n WorkloadNotification) (WorkloadNotification, error) {
|
||||
if n.WorkloadID == "" {
|
||||
return WorkloadNotification{}, fmt.Errorf("workload_id is required")
|
||||
}
|
||||
if n.URL == "" {
|
||||
return WorkloadNotification{}, fmt.Errorf("url is required")
|
||||
}
|
||||
if n.ID == "" {
|
||||
n.ID = uuid.New().String()
|
||||
}
|
||||
n.CreatedAt = Now()
|
||||
n.UpdatedAt = n.CreatedAt
|
||||
|
||||
_, err := s.db.Exec(
|
||||
`INSERT INTO workload_notifications (`+workloadNotificationColumns+`)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
n.ID, n.WorkloadID, n.Name, n.URL, n.Secret,
|
||||
n.EventTypes, BoolToInt(n.Enabled), n.SortOrder, n.CreatedAt, n.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return WorkloadNotification{}, fmt.Errorf("insert workload_notification: %w", err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ListWorkloadNotifications returns every notification row for a
|
||||
// workload ordered by (sort_order, created_at) so the UI stays stable
|
||||
// across reorderings.
|
||||
func (s *Store) ListWorkloadNotifications(workloadID string) ([]WorkloadNotification, error) {
|
||||
rows, err := s.db.Query(
|
||||
`SELECT `+workloadNotificationColumns+`
|
||||
FROM workload_notifications
|
||||
WHERE workload_id = ?
|
||||
ORDER BY sort_order, created_at`,
|
||||
workloadID,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list workload_notifications: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
out := []WorkloadNotification{}
|
||||
for rows.Next() {
|
||||
n, err := scanWorkloadNotification(rows)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scan workload_notification: %w", err)
|
||||
}
|
||||
out = append(out, n)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// GetWorkloadNotification fetches one notification row by id. Returns
|
||||
// ErrNotFound when the row does not exist so callers can return 404
|
||||
// cleanly.
|
||||
func (s *Store) GetWorkloadNotification(id string) (WorkloadNotification, error) {
|
||||
n, err := scanWorkloadNotification(s.db.QueryRow(
|
||||
`SELECT `+workloadNotificationColumns+`
|
||||
FROM workload_notifications WHERE id = ?`, id,
|
||||
))
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return WorkloadNotification{}, fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return WorkloadNotification{}, fmt.Errorf("query workload_notification: %w", err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// UpdateWorkloadNotification rewrites an existing row. WorkloadID is
|
||||
// immutable — re-anchoring a route to a different workload would invite
|
||||
// silent reassignments after a paste-bug in the UI; recreate instead.
|
||||
func (s *Store) UpdateWorkloadNotification(n WorkloadNotification) error {
|
||||
if n.ID == "" {
|
||||
return fmt.Errorf("id is required")
|
||||
}
|
||||
if n.URL == "" {
|
||||
return fmt.Errorf("url is required")
|
||||
}
|
||||
n.UpdatedAt = Now()
|
||||
res, err := s.db.Exec(
|
||||
`UPDATE workload_notifications
|
||||
SET name = ?, url = ?, secret = ?, event_types = ?,
|
||||
enabled = ?, sort_order = ?, updated_at = ?
|
||||
WHERE id = ?`,
|
||||
n.Name, n.URL, n.Secret, n.EventTypes,
|
||||
BoolToInt(n.Enabled), n.SortOrder, n.UpdatedAt, n.ID,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("update workload_notification: %w", err)
|
||||
}
|
||||
rows, _ := res.RowsAffected()
|
||||
if rows == 0 {
|
||||
return fmt.Errorf("workload_notification %s: %w", n.ID, ErrNotFound)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteWorkloadNotification drops a single notification row.
|
||||
// Idempotent: missing id returns ErrNotFound so the API can map it to
|
||||
// 404 cleanly.
|
||||
func (s *Store) DeleteWorkloadNotification(id string) error {
|
||||
res, err := s.db.Exec(`DELETE FROM workload_notifications WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete workload_notification: %w", err)
|
||||
}
|
||||
rows, _ := res.RowsAffected()
|
||||
if rows == 0 {
|
||||
return fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MatchesEventType returns true when the notification row's EventTypes
|
||||
// allow-list includes eventType (or is empty, meaning "match all").
|
||||
// Helper exported so the notification dispatcher can fan-out filtering
|
||||
// inline without duplicating the comma-split parser.
|
||||
func (n WorkloadNotification) MatchesEventType(eventType string) bool {
|
||||
if !n.Enabled {
|
||||
return false
|
||||
}
|
||||
if n.EventTypes == "" {
|
||||
return true
|
||||
}
|
||||
for _, et := range strings.Split(n.EventTypes, ",") {
|
||||
if strings.TrimSpace(et) == eventType {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// seedWorkloadForNotifications creates a minimal workload row so the FK
|
||||
// constraint on workload_notifications is satisfied. Returns the new
|
||||
// workload's ID for tests to reference.
|
||||
func seedWorkloadForNotifications(t *testing.T, s *Store, name string) string {
|
||||
t.Helper()
|
||||
w, err := s.CreateWorkload(Workload{
|
||||
Kind: string(WorkloadKindProject),
|
||||
Name: name,
|
||||
SourceKind: "image",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("seed workload: %v", err)
|
||||
}
|
||||
return w.ID
|
||||
}
|
||||
|
||||
func TestCreateWorkloadNotification_RoundTrip(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||
|
||||
created, err := s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID,
|
||||
Name: "Slack alerts",
|
||||
URL: "https://hooks.slack.test/x",
|
||||
Secret: "shh",
|
||||
EventTypes: "deploy_failure,build_failure",
|
||||
Enabled: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("CreateWorkloadNotification: %v", err)
|
||||
}
|
||||
if created.ID == "" {
|
||||
t.Fatal("expected ID to be assigned")
|
||||
}
|
||||
|
||||
got, err := s.GetWorkloadNotification(created.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("Get: %v", err)
|
||||
}
|
||||
if got.URL != "https://hooks.slack.test/x" || got.Name != "Slack alerts" {
|
||||
t.Errorf("row mismatch: %+v", got)
|
||||
}
|
||||
if !got.Enabled {
|
||||
t.Error("expected Enabled=true")
|
||||
}
|
||||
if got.EventTypes != "deploy_failure,build_failure" {
|
||||
t.Errorf("event_types = %q", got.EventTypes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateWorkloadNotification_RejectsMissingURL(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||
_, err := s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID,
|
||||
Name: "broken",
|
||||
URL: "",
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected URL validation error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListWorkloadNotifications_SortedByOrder(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||
|
||||
// Insert out of order; ListWorkloadNotifications should return
|
||||
// them sorted by SortOrder ascending.
|
||||
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID, Name: "C", URL: "https://c.test", SortOrder: 30,
|
||||
})
|
||||
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID, Name: "A", URL: "https://a.test", SortOrder: 10,
|
||||
})
|
||||
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID, Name: "B", URL: "https://b.test", SortOrder: 20,
|
||||
})
|
||||
|
||||
rows, err := s.ListWorkloadNotifications(wlID)
|
||||
if err != nil {
|
||||
t.Fatalf("list: %v", err)
|
||||
}
|
||||
if len(rows) != 3 {
|
||||
t.Fatalf("len = %d, want 3", len(rows))
|
||||
}
|
||||
if rows[0].Name != "A" || rows[1].Name != "B" || rows[2].Name != "C" {
|
||||
t.Errorf("sort order wrong: %q %q %q", rows[0].Name, rows[1].Name, rows[2].Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateWorkloadNotification_PersistsChanges(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||
n, _ := s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID, Name: "old", URL: "https://old.test", Enabled: true,
|
||||
})
|
||||
n.Name = "new"
|
||||
n.URL = "https://new.test"
|
||||
n.Enabled = false
|
||||
n.EventTypes = "deploy_success"
|
||||
if err := s.UpdateWorkloadNotification(n); err != nil {
|
||||
t.Fatalf("update: %v", err)
|
||||
}
|
||||
got, _ := s.GetWorkloadNotification(n.ID)
|
||||
if got.Name != "new" || got.URL != "https://new.test" || got.Enabled {
|
||||
t.Errorf("update did not persist: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteWorkloadNotification_ReturnsNotFoundForMissing(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
err := s.DeleteWorkloadNotification("nope")
|
||||
if !errors.Is(err, ErrNotFound) {
|
||||
t.Errorf("expected ErrNotFound, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteWorkloadNotification_CascadesFromWorkload(t *testing.T) {
|
||||
s := newTestStore(t)
|
||||
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||
WorkloadID: wlID, Name: "x", URL: "https://x.test",
|
||||
})
|
||||
if err := s.DeleteWorkload(wlID); err != nil {
|
||||
t.Fatalf("delete workload: %v", err)
|
||||
}
|
||||
rows, err := s.ListWorkloadNotifications(wlID)
|
||||
if err != nil {
|
||||
t.Fatalf("list after cascade: %v", err)
|
||||
}
|
||||
if len(rows) != 0 {
|
||||
t.Errorf("expected cascade delete to remove rows, got %d", len(rows))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchesEventType_AllowList(t *testing.T) {
|
||||
cases := []struct {
|
||||
eventTypes string
|
||||
probe string
|
||||
want bool
|
||||
}{
|
||||
{"", "deploy_success", true}, // empty = all
|
||||
{"deploy_success,deploy_failure", "deploy_success", true},
|
||||
{"deploy_success,deploy_failure", "build_failure", false},
|
||||
{"build_failure", "build_failure", true},
|
||||
{" deploy_success , build_failure ", "build_failure", true}, // whitespace tolerated
|
||||
}
|
||||
for _, c := range cases {
|
||||
n := WorkloadNotification{Enabled: true, EventTypes: c.eventTypes}
|
||||
got := n.MatchesEventType(c.probe)
|
||||
if got != c.want {
|
||||
t.Errorf("MatchesEventType(%q, %q) = %v, want %v", c.eventTypes, c.probe, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchesEventType_DisabledNeverMatches(t *testing.T) {
|
||||
n := WorkloadNotification{Enabled: false, EventTypes: ""}
|
||||
if n.MatchesEventType("any") {
|
||||
t.Error("disabled row should never match")
|
||||
}
|
||||
}
|
||||
@@ -173,11 +173,24 @@ func (s *Store) UpdateWorkload(w Workload) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteWorkload removes a workload row. Cascading deletes for the matching
|
||||
// project/stack/site row stay with the kind-specific Delete functions; this
|
||||
// only removes the workload entry.
|
||||
// DeleteWorkload removes a workload row. Cascading deletes for FK-backed
|
||||
// child tables (workload_env, workload_volumes, workload_trigger_bindings)
|
||||
// happen via SQLite's ON DELETE CASCADE. The `containers` table doesn't
|
||||
// yet have an FK to workloads (planned migration — see ops notes), so we
|
||||
// drop its rows explicitly here in the same transaction to prevent zombie
|
||||
// container rows from outliving their owning workload.
|
||||
func (s *Store) DeleteWorkload(id string) error {
|
||||
result, err := s.db.Exec(`DELETE FROM workloads WHERE id = ?`, id)
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin: %w", err)
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
// Explicit container cleanup until the FK migration lands.
|
||||
if _, err := tx.Exec(`DELETE FROM containers WHERE workload_id = ?`, id); err != nil {
|
||||
return fmt.Errorf("delete containers: %w", err)
|
||||
}
|
||||
result, err := tx.Exec(`DELETE FROM workloads WHERE id = ?`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("delete workload: %w", err)
|
||||
}
|
||||
@@ -188,6 +201,9 @@ func (s *Store) DeleteWorkload(id string) error {
|
||||
if n == 0 {
|
||||
return fmt.Errorf("workload %s: %w", id, ErrNotFound)
|
||||
}
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("commit: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -169,6 +169,18 @@ func SaveFile(rootPath, relativePath string, r io.Reader) error {
|
||||
|
||||
// safePath resolves a relative path within rootPath and validates it doesn't escape.
|
||||
// Resolves symlinks to prevent symlink-based traversal attacks.
|
||||
//
|
||||
// The check used to be `strings.HasPrefix(absResolved, absRoot)` which has
|
||||
// a classic boundary bug: a sibling root at /data/vol10 would pass the
|
||||
// prefix test for /data/vol1. The fix enforces a separator boundary so
|
||||
// the only allowed cases are absResolved == absRoot OR absResolved begins
|
||||
// with absRoot + separator.
|
||||
//
|
||||
// For paths that don't yet exist (e.g. SaveFile creating a new file),
|
||||
// EvalSymlinks returns an error and we fall back to the lexical path.
|
||||
// In that case we walk every existing ancestor with EvalSymlinks too —
|
||||
// if any ancestor is a symlink that escapes the root, we reject. This
|
||||
// closes the prior gap where pre-planted symlinks could divert writes.
|
||||
func safePath(rootPath, relativePath string) (string, error) {
|
||||
if relativePath == "" {
|
||||
return rootPath, nil
|
||||
@@ -176,7 +188,7 @@ func safePath(rootPath, relativePath string) (string, error) {
|
||||
|
||||
// Clean and ensure no traversal.
|
||||
cleaned := filepath.Clean(relativePath)
|
||||
if strings.Contains(cleaned, "..") {
|
||||
if cleaned == ".." || strings.HasPrefix(cleaned, ".."+string(filepath.Separator)) || strings.Contains(cleaned, string(filepath.Separator)+".."+string(filepath.Separator)) {
|
||||
return "", fmt.Errorf("path traversal not allowed")
|
||||
}
|
||||
|
||||
@@ -191,18 +203,66 @@ func safePath(rootPath, relativePath string) (string, error) {
|
||||
absRoot = realRoot
|
||||
}
|
||||
|
||||
// Resolve the target path including symlinks.
|
||||
// Resolve the target path. If the leaf doesn't exist (write path),
|
||||
// walk parent directories — any of which may already be a symlink.
|
||||
absResolved, err := filepath.Abs(absPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("resolve path: %w", err)
|
||||
}
|
||||
if realResolved, err := filepath.EvalSymlinks(absResolved); err == nil {
|
||||
absResolved = realResolved
|
||||
} else {
|
||||
// Leaf missing — resolve the deepest existing ancestor and
|
||||
// re-join the unresolved tail. This catches a pre-planted
|
||||
// symlink in any parent dir. An error here means an ancestor
|
||||
// could not be resolved (e.g. a symlink we cannot follow): we MUST
|
||||
// reject rather than fall back to the lexical path, which still
|
||||
// carries the absRoot prefix and would let a symlink ancestor that
|
||||
// escapes the root slip past the boundary check below.
|
||||
resolved, tailErr := resolveExistingAncestor(absResolved)
|
||||
if tailErr != nil {
|
||||
return "", fmt.Errorf("path traversal not allowed")
|
||||
}
|
||||
if resolved != "" {
|
||||
absResolved = resolved
|
||||
}
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(absResolved, absRoot) {
|
||||
if absResolved != absRoot && !strings.HasPrefix(absResolved, absRoot+string(filepath.Separator)) {
|
||||
return "", fmt.Errorf("path traversal not allowed")
|
||||
}
|
||||
|
||||
return absPath, nil
|
||||
}
|
||||
|
||||
// resolveExistingAncestor walks p upward until it finds an existing
|
||||
// directory, resolves its symlinks, then rejoins the missing tail.
|
||||
// Returns ("", nil) when no ancestor exists (vanishingly rare).
|
||||
func resolveExistingAncestor(p string) (string, error) {
|
||||
tail := ""
|
||||
cur := p
|
||||
for {
|
||||
if cur == "" || cur == "/" || cur == filepath.VolumeName(cur)+string(filepath.Separator) {
|
||||
return "", nil
|
||||
}
|
||||
info, err := os.Lstat(cur)
|
||||
if err == nil {
|
||||
real, rerr := filepath.EvalSymlinks(cur)
|
||||
if rerr != nil {
|
||||
return "", rerr
|
||||
}
|
||||
_ = info
|
||||
if tail == "" {
|
||||
return real, nil
|
||||
}
|
||||
return filepath.Join(real, tail), nil
|
||||
}
|
||||
// Move one level up.
|
||||
parent := filepath.Dir(cur)
|
||||
if parent == cur {
|
||||
return "", nil
|
||||
}
|
||||
tail = filepath.Join(filepath.Base(cur), tail)
|
||||
cur = parent
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,8 +131,14 @@ const maxWebhookBodyBytes = 256 * 1024 // 256 KiB
|
||||
// PluginDispatcher is what the plugin-workload webhook handler needs from
|
||||
// the deployer: the canonical Source-dispatch entry point plus access to
|
||||
// the same Deps bundle so Trigger.Match can read store / crypto.
|
||||
//
|
||||
// DispatchTeardown is required so the preview-deploy flow can tear down
|
||||
// an ephemeral per-branch child workload when its upstream branch is
|
||||
// deleted. Same teardown path the API /workloads/{id} DELETE route uses;
|
||||
// nil error on a clean teardown lets the caller delete the workload row.
|
||||
type PluginDispatcher interface {
|
||||
DispatchPlugin(ctx context.Context, w pluginWorkload, intent pluginIntent) error
|
||||
DispatchTeardown(ctx context.Context, w pluginWorkload) error
|
||||
PluginDeps() pluginDeps
|
||||
}
|
||||
|
||||
|
||||
@@ -13,8 +13,10 @@ import (
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/metrics"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||
)
|
||||
|
||||
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
|
||||
@@ -44,6 +46,17 @@ const (
|
||||
ReasonConfigError = "config merge error"
|
||||
ReasonMatchError = "match error"
|
||||
ReasonDispatchFailed = "dispatch failed"
|
||||
ReasonPreviewError = "preview materialize error"
|
||||
ReasonPreviewTorndown = "preview torn down"
|
||||
// ReasonPreviewNoop: a branch-delete webhook arrived but no preview was
|
||||
// ever materialized for that branch — a legitimate clean skip, distinct
|
||||
// from "no binding matched" so it isn't misreported as a wiring problem.
|
||||
ReasonPreviewNoop = "preview noop"
|
||||
// ReasonPreviewOrphaned: the preview container was torn down but its
|
||||
// workload row could not be deleted, leaving an orphan row. Surfaced
|
||||
// distinctly so the partial failure is visible rather than masquerading
|
||||
// as a clean teardown.
|
||||
ReasonPreviewOrphaned = "preview torn down (row orphaned)"
|
||||
)
|
||||
|
||||
// handleTriggerWebhook processes an inbound webhook for a first-class
|
||||
@@ -172,7 +185,7 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Deployed:
|
||||
deployed++
|
||||
case r.Reason == ReasonBindingDisabled:
|
||||
case r.Reason == ReasonBindingDisabled, r.Reason == ReasonPreviewNoop:
|
||||
skipped++
|
||||
case r.Reason == ReasonNoMatch:
|
||||
noMatch++
|
||||
@@ -194,8 +207,10 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
case noMatch == len(results)-skipped:
|
||||
delivery.Detail = "no binding matched"
|
||||
default:
|
||||
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d", skipped, errored)
|
||||
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d nomatch=%d",
|
||||
skipped, errored, noMatch)
|
||||
}
|
||||
metrics.WebhookDeliveriesTotal.Inc(delivery.Outcome)
|
||||
respondWebhookJSON(w, http.StatusOK, map[string]any{
|
||||
"success": true,
|
||||
"trigger": trg.Name,
|
||||
@@ -326,6 +341,18 @@ func (h *Handler) fireBinding(
|
||||
if intent.TriggeredBy == "" {
|
||||
intent.TriggeredBy = "trigger-webhook"
|
||||
}
|
||||
|
||||
// Preview-deploy fork: the git trigger plugin attaches preview_branch
|
||||
// metadata when BranchPattern matches a non-baseline branch. Route
|
||||
// the dispatch through a per-branch child workload rather than
|
||||
// redeploying the parent template. The fork is intentionally before
|
||||
// the dispatch so the template's container never gets clobbered by
|
||||
// a feature-branch push.
|
||||
if previewBranch := intent.Metadata["preview_branch"]; previewBranch != "" {
|
||||
fired, reason := h.handlePreviewIntent(ctx, row, intent, previewBranch)
|
||||
return fired, reason
|
||||
}
|
||||
|
||||
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
|
||||
slog.Warn("webhook: dispatch failed",
|
||||
"trigger", trg.Name, "workload", row.Name, "error", err)
|
||||
@@ -336,3 +363,72 @@ func (h *Handler) fireBinding(
|
||||
return true, intent.Reason
|
||||
}
|
||||
|
||||
// handlePreviewIntent dispatches an intent that targeted a non-baseline
|
||||
// branch on a preview-template workload. Two paths:
|
||||
//
|
||||
// 1. Branch deleted: find the matching preview workload, dispatch
|
||||
// Teardown, then delete the workload row so the dashboard reflects
|
||||
// the upstream state.
|
||||
// 2. Branch pushed: materialize (or reuse) the preview workload, then
|
||||
// dispatch the deploy against it. The template workload itself is
|
||||
// never deployed against a feature branch.
|
||||
//
|
||||
// On any error the helper logs and returns a generic reason — the
|
||||
// fan-out caller treats these the same as a normal dispatch failure.
|
||||
func (h *Handler) handlePreviewIntent(
|
||||
ctx context.Context,
|
||||
template store.Workload,
|
||||
intent *plugin.DeploymentIntent,
|
||||
branch string,
|
||||
) (bool, string) {
|
||||
deleted := intent.Metadata["preview_deleted"] == "1"
|
||||
if deleted {
|
||||
child, ok, err := preview.FindPreviewForBranch(h.store, template.ID, branch)
|
||||
if err != nil {
|
||||
slog.Warn("webhook: preview lookup failed",
|
||||
"template", template.Name, "branch", branch, "error", err)
|
||||
return false, ReasonPreviewError
|
||||
}
|
||||
if !ok {
|
||||
// Branch was deleted upstream but we never materialized a
|
||||
// preview for it — nothing to do. Report as a distinct noop so
|
||||
// it isn't bucketed as "no binding matched".
|
||||
return false, ReasonPreviewNoop
|
||||
}
|
||||
childPwl := toPluginWorkload(child)
|
||||
if err := h.plugins.DispatchTeardown(ctx, childPwl); err != nil {
|
||||
slog.Warn("webhook: preview teardown dispatch failed",
|
||||
"template", template.Name, "preview", child.Name, "error", err)
|
||||
return false, ReasonDispatchFailed
|
||||
}
|
||||
if err := h.store.DeleteWorkload(child.ID); err != nil {
|
||||
// Container is gone but the row is orphaned. Surface this as a
|
||||
// distinct reason so the partial failure is visible rather than
|
||||
// reported as a clean teardown; the operator can delete the row
|
||||
// from the dashboard if it sticks around.
|
||||
slog.Warn("webhook: preview row delete failed (orphaned row)",
|
||||
"template", template.Name, "preview", child.Name, "error", err)
|
||||
return true, ReasonPreviewOrphaned
|
||||
}
|
||||
slog.Info("webhook: preview torn down",
|
||||
"template", template.Name, "branch", branch, "preview", child.Name)
|
||||
return true, ReasonPreviewTorndown
|
||||
}
|
||||
|
||||
child, err := preview.MaterializeForBranch(h.store, template, branch)
|
||||
if err != nil {
|
||||
slog.Warn("webhook: preview materialize failed",
|
||||
"template", template.Name, "branch", branch, "error", err)
|
||||
return false, ReasonPreviewError
|
||||
}
|
||||
childPwl := toPluginWorkload(child)
|
||||
if err := h.plugins.DispatchPlugin(ctx, childPwl, *intent); err != nil {
|
||||
slog.Warn("webhook: preview dispatch failed",
|
||||
"template", template.Name, "preview", child.Name, "error", err)
|
||||
return false, ReasonDispatchFailed
|
||||
}
|
||||
slog.Info("webhook: triggered preview deploy",
|
||||
"template", template.Name, "branch", branch, "preview", child.Name, "reason", intent.Reason)
|
||||
return true, intent.Reason
|
||||
}
|
||||
|
||||
|
||||
@@ -327,6 +327,10 @@ func parseGitLabPushEvent(body []byte, headers http.Header) vendorParseResult {
|
||||
Ref: probe.Ref,
|
||||
CommitSHA: probe.After,
|
||||
Pusher: pusher,
|
||||
// GitLab does not emit `deleted: true`; the canonical signal
|
||||
// is an all-zero `after` SHA. Same parser helper used for the
|
||||
// GitHub / Gitea fallback so the two branches agree.
|
||||
Deleted: isZeroSHA(probe.After),
|
||||
},
|
||||
}
|
||||
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
||||
@@ -346,6 +350,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
||||
var probe struct {
|
||||
Ref string `json:"ref"`
|
||||
After string `json:"after"`
|
||||
Deleted bool `json:"deleted"`
|
||||
Repository struct {
|
||||
FullName string `json:"full_name"`
|
||||
CloneURL string `json:"clone_url"`
|
||||
@@ -370,6 +375,12 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
||||
if pusher == "" {
|
||||
pusher = probe.Pusher.Username
|
||||
}
|
||||
// Branch / tag deletion is signalled either by the explicit
|
||||
// `deleted: true` flag (GitHub / Gitea) or by an all-zero `after`
|
||||
// SHA (older shapes). Both are honoured so the preview-deploy flow
|
||||
// can tear down ephemeral workloads even when a vendor omits the
|
||||
// boolean flag.
|
||||
deleted := probe.Deleted || isZeroSHA(probe.After)
|
||||
evt := plugin.InboundEvent{
|
||||
Kind: "git-push",
|
||||
Git: &plugin.GitEvent{
|
||||
@@ -377,6 +388,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
||||
Ref: probe.Ref,
|
||||
CommitSHA: probe.After,
|
||||
Pusher: pusher,
|
||||
Deleted: deleted,
|
||||
},
|
||||
}
|
||||
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
||||
@@ -388,3 +400,19 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
||||
}
|
||||
return evt, nil
|
||||
}
|
||||
|
||||
// isZeroSHA returns true when sha is the canonical "no commit" sentinel
|
||||
// (40 zeros) that vendors emit on the `after` field of a branch- or
|
||||
// tag-delete push event. Length-tolerant because some test fixtures
|
||||
// truncate the SHA.
|
||||
func isZeroSHA(sha string) bool {
|
||||
if sha == "" {
|
||||
return false
|
||||
}
|
||||
for _, r := range sha {
|
||||
if r != '0' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(sha) >= 7
|
||||
}
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
package plugin
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/crypto"
|
||||
"github.com/alexei/tinyforge/internal/notify"
|
||||
)
|
||||
|
||||
// DispatchNotificationForWorkload sends `event` to every notification
|
||||
// route configured for the workload. Resolution order:
|
||||
//
|
||||
// 1. workload_notifications rows matching `event.Type` — multi-route
|
||||
// fan-out (e.g. Slack alerts + Discord successes per workload).
|
||||
// 2. If zero matching rows AND the legacy single-URL columns on the
|
||||
// workload row are set, send to that URL — backwards compat for
|
||||
// installs that pre-date the new table.
|
||||
// 3. Otherwise, fall through to settings.notification_url so the global
|
||||
// destination still fires for workloads with no per-row config.
|
||||
//
|
||||
// Secrets are decrypted via deps.EncKey before sending. A failed decrypt
|
||||
// degrades to "send unsigned" with a warning rather than dropping the
|
||||
// notification — the operator still gets the alert, they just need to
|
||||
// re-save the secret. Fire-and-forget: failures are logged inside
|
||||
// deps.Notifier and never bubble up here.
|
||||
//
|
||||
// Callers (static / dockerfile / image / compose plugins) pass an
|
||||
// already-populated Event; this helper does not synthesize the payload
|
||||
// shape, only the routing.
|
||||
func DispatchNotificationForWorkload(deps Deps, w Workload, event notify.Event) {
|
||||
if deps.Notifier == nil {
|
||||
return
|
||||
}
|
||||
rows, err := deps.Store.ListWorkloadNotifications(w.ID)
|
||||
if err != nil {
|
||||
slog.Warn("notify: list workload routes failed",
|
||||
"workload", w.ID, "error", err)
|
||||
rows = nil
|
||||
}
|
||||
|
||||
matched := 0
|
||||
for _, n := range rows {
|
||||
if !n.MatchesEventType(event.Type) {
|
||||
continue
|
||||
}
|
||||
matched++
|
||||
secret := ""
|
||||
if n.Secret != "" {
|
||||
dec, derr := crypto.Decrypt(deps.EncKey, n.Secret)
|
||||
if derr != nil {
|
||||
slog.Warn("notify: decrypt workload secret failed — sending unsigned",
|
||||
"workload", w.ID, "route", n.Name, "error", derr)
|
||||
} else {
|
||||
secret = dec
|
||||
}
|
||||
}
|
||||
deps.Notifier.SendSigned(n.URL, secret, notify.TierSite, event)
|
||||
}
|
||||
if matched > 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Legacy fallback: single per-workload destination on workloads.notification_url.
|
||||
if w.NotificationURL != "" {
|
||||
deps.Notifier.SendSigned(w.NotificationURL, w.NotificationSecret, notify.TierSite, event)
|
||||
return
|
||||
}
|
||||
|
||||
// Global fallback so a one-line config in settings still notifies
|
||||
// every workload without a per-row override.
|
||||
settings, err := deps.Store.GetSettings()
|
||||
if err != nil {
|
||||
slog.Warn("notify: settings lookup for global fallback failed",
|
||||
"workload", w.ID, "error", err)
|
||||
return
|
||||
}
|
||||
if settings.NotificationURL == "" {
|
||||
return
|
||||
}
|
||||
deps.Notifier.SendSigned(settings.NotificationURL, settings.NotificationSecret, notify.TierSettings, event)
|
||||
}
|
||||
@@ -32,6 +32,23 @@ type Config struct {
|
||||
|
||||
type source struct{}
|
||||
|
||||
// composeRunner is the slice of stack.Compose this plugin actually
|
||||
// drives. Defined locally per the "interfaces where they are used"
|
||||
// idiom so the plugin can be unit-tested without a real docker compose
|
||||
// binary. `*stack.Compose` satisfies it implicitly.
|
||||
type composeRunner interface {
|
||||
Up(ctx context.Context, projectName, yamlPath string) (string, error)
|
||||
Down(ctx context.Context, projectName string, removeVolumes bool) (string, error)
|
||||
Ps(ctx context.Context, projectName, yamlPath string) ([]stack.Service, error)
|
||||
}
|
||||
|
||||
// newComposeRunner returns the runner the plugin should call. Tests
|
||||
// swap this var with a fake; production code never touches it. The
|
||||
// indirection costs one function-pointer dereference per Deploy /
|
||||
// Teardown / Reconcile call — negligible against the docker compose
|
||||
// exec it gates.
|
||||
var newComposeRunner = func() composeRunner { return stack.NewCompose("") }
|
||||
|
||||
func init() { plugin.RegisterSource(&source{}) }
|
||||
|
||||
func (*source) Kind() string { return "compose" }
|
||||
@@ -82,7 +99,7 @@ func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload,
|
||||
return fmt.Errorf("compose source: write yaml: %w", err)
|
||||
}
|
||||
|
||||
compose := stack.NewCompose("")
|
||||
compose := newComposeRunner()
|
||||
out, err := compose.Up(ctx, projectName, yamlPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("compose source: docker compose up: %w (output: %s)", err, truncate(out, 1024))
|
||||
@@ -105,7 +122,7 @@ func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload
|
||||
cfg, _ := plugin.SourceConfigOf[Config](w)
|
||||
projectName := composeProjectName(cfg.ComposeProjectName, w)
|
||||
|
||||
compose := stack.NewCompose("")
|
||||
compose := newComposeRunner()
|
||||
if _, err := compose.Down(ctx, projectName, true); err != nil {
|
||||
// Log but proceed — the DB rows must not be orphaned.
|
||||
slog.Warn("compose source: docker compose down", "workload", w.ID, "error", err)
|
||||
@@ -139,7 +156,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
|
||||
projectName := composeProjectName(cfg.ComposeProjectName, w)
|
||||
yamlPath, _ := writeYAMLIfChanged(w.ID, cfg.ComposeYAML)
|
||||
|
||||
compose := stack.NewCompose("")
|
||||
compose := newComposeRunner()
|
||||
services, err := compose.Ps(ctx, projectName, yamlPath)
|
||||
if err != nil {
|
||||
// Likely no compose project running for this workload. Mark
|
||||
@@ -162,7 +179,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
|
||||
|
||||
// syncContainers shares its body with Reconcile minus the missing-row
|
||||
// fallback — Deploy expects compose ps to succeed since `up` just ran.
|
||||
func syncContainers(ctx context.Context, deps plugin.Deps, compose *stack.Compose, w plugin.Workload, projectName, yamlPath string) error {
|
||||
func syncContainers(ctx context.Context, deps plugin.Deps, compose composeRunner, w plugin.Workload, projectName, yamlPath string) error {
|
||||
services, err := compose.Ps(ctx, projectName, yamlPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("compose ps: %w", err)
|
||||
@@ -204,7 +221,17 @@ var projectNameSanitizer = regexp.MustCompile(`[^a-z0-9_-]`)
|
||||
|
||||
func composeProjectName(explicit string, w plugin.Workload) string {
|
||||
if explicit != "" {
|
||||
return explicit
|
||||
// Apply the same sanitizer to operator-supplied names so a value
|
||||
// like "--foo" cannot reach the docker CLI and be re-parsed as a
|
||||
// flag. Reuses the canonical lower+[^a-z0-9_-]→"-" + trim path.
|
||||
san := strings.ToLower(explicit)
|
||||
san = projectNameSanitizer.ReplaceAllString(san, "-")
|
||||
san = strings.Trim(san, "-")
|
||||
if san != "" {
|
||||
return san
|
||||
}
|
||||
// Fall through to the derived name if sanitization stripped
|
||||
// everything (operator passed e.g. "---" — degenerate input).
|
||||
}
|
||||
name := strings.ToLower(w.Name)
|
||||
name = projectNameSanitizer.ReplaceAllString(name, "-")
|
||||
|
||||
@@ -0,0 +1,512 @@
|
||||
package compose
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/stack"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// fakeRunner stands in for *stack.Compose. Every method records its
|
||||
// inputs and returns whatever the test set on the corresponding field.
|
||||
// Defaults are happy-path: empty services from Ps, no error from Up /
|
||||
// Down. Fields are slice-typed so a single fakeRunner can serve a
|
||||
// sequence of calls (Deploy issues Up + Ps in order).
|
||||
type fakeRunner struct {
|
||||
mu sync.Mutex
|
||||
|
||||
upCalls []runnerCall
|
||||
upOuts []string
|
||||
upErrs []error
|
||||
downCalls []runnerCall
|
||||
downOuts []string
|
||||
downErrs []error
|
||||
psCalls []runnerCall
|
||||
psResults [][]stack.Service
|
||||
psErrs []error
|
||||
upCallIdx int
|
||||
psCallIdx int
|
||||
downCallI int
|
||||
}
|
||||
|
||||
type runnerCall struct {
|
||||
ProjectName string
|
||||
YAMLPath string
|
||||
RemoveVolumes bool
|
||||
}
|
||||
|
||||
func (f *fakeRunner) Up(_ context.Context, projectName, yamlPath string) (string, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.upCalls = append(f.upCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
|
||||
out, err := pop(f.upOuts, f.upErrs, f.upCallIdx)
|
||||
f.upCallIdx++
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (f *fakeRunner) Down(_ context.Context, projectName string, removeVolumes bool) (string, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.downCalls = append(f.downCalls, runnerCall{ProjectName: projectName, RemoveVolumes: removeVolumes})
|
||||
out, err := pop(f.downOuts, f.downErrs, f.downCallI)
|
||||
f.downCallI++
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (f *fakeRunner) Ps(_ context.Context, projectName, yamlPath string) ([]stack.Service, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.psCalls = append(f.psCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
|
||||
|
||||
idx := f.psCallIdx
|
||||
f.psCallIdx++
|
||||
var svcs []stack.Service
|
||||
if idx < len(f.psResults) {
|
||||
svcs = f.psResults[idx]
|
||||
}
|
||||
var err error
|
||||
if idx < len(f.psErrs) {
|
||||
err = f.psErrs[idx]
|
||||
}
|
||||
return svcs, err
|
||||
}
|
||||
|
||||
// pop returns the nth element of outs/errs or zero values when n is
|
||||
// past the end. Lets a test set a single expected response without
|
||||
// padding slices for every other call.
|
||||
func pop(outs []string, errs []error, n int) (string, error) {
|
||||
var out string
|
||||
if n < len(outs) {
|
||||
out = outs[n]
|
||||
}
|
||||
var err error
|
||||
if n < len(errs) {
|
||||
err = errs[n]
|
||||
}
|
||||
return out, err
|
||||
}
|
||||
|
||||
// withFakeRunner swaps newComposeRunner for the duration of one test
|
||||
// and restores the original on cleanup. Tests that need to inspect the
|
||||
// fake post-hoc keep the returned pointer.
|
||||
func withFakeRunner(t *testing.T, f *fakeRunner) {
|
||||
t.Helper()
|
||||
orig := newComposeRunner
|
||||
newComposeRunner = func() composeRunner { return f }
|
||||
t.Cleanup(func() { newComposeRunner = orig })
|
||||
}
|
||||
|
||||
func testStore(t *testing.T) *store.Store {
|
||||
t.Helper()
|
||||
st, err := store.New(":memory:")
|
||||
if err != nil {
|
||||
t.Fatalf("open store: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { _ = st.Close() })
|
||||
return st
|
||||
}
|
||||
|
||||
// seedWorkload creates the parent workload row that container rows FK
|
||||
// onto. Returns the workload's ID so callers can reuse it.
|
||||
func seedWorkload(t *testing.T, st *store.Store, name, yamlText string) string {
|
||||
t.Helper()
|
||||
cfg := Config{ComposeYAML: yamlText}
|
||||
body, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal config: %v", err)
|
||||
}
|
||||
w, err := st.CreateWorkload(store.Workload{
|
||||
Kind: "plugin",
|
||||
Name: name,
|
||||
SourceKind: "compose",
|
||||
SourceConfig: string(body),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("create workload: %v", err)
|
||||
}
|
||||
return w.ID
|
||||
}
|
||||
|
||||
func TestDeploy_HappyPath(t *testing.T) {
|
||||
withTempDir(t) // isolates the YAML scratch dir under t.TempDir()
|
||||
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n web:\n image: nginx:alpine\n"
|
||||
wid := seedWorkload(t, deps.Store, "myapp", yamlText)
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "myapp",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
|
||||
fake := &fakeRunner{
|
||||
psResults: [][]stack.Service{{
|
||||
{Service: "web", State: "running", Status: "Up 5 seconds"},
|
||||
}},
|
||||
}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
if err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{}); err != nil {
|
||||
t.Fatalf("Deploy: %v", err)
|
||||
}
|
||||
|
||||
// Up called exactly once with the workload-derived project name.
|
||||
if len(fake.upCalls) != 1 {
|
||||
t.Fatalf("Up called %d times, want 1", len(fake.upCalls))
|
||||
}
|
||||
if !strings.HasPrefix(fake.upCalls[0].ProjectName, "tf-myapp-") {
|
||||
t.Errorf("Up projectName = %q, want prefix tf-myapp-", fake.upCalls[0].ProjectName)
|
||||
}
|
||||
if !strings.HasSuffix(fake.upCalls[0].YAMLPath, "compose.yml") {
|
||||
t.Errorf("Up yamlPath = %q, want suffix compose.yml", fake.upCalls[0].YAMLPath)
|
||||
}
|
||||
|
||||
// Ps follows Up to enumerate the resulting containers.
|
||||
if len(fake.psCalls) != 1 {
|
||||
t.Fatalf("Ps called %d times, want 1", len(fake.psCalls))
|
||||
}
|
||||
|
||||
// Service row written.
|
||||
row, err := deps.Store.GetContainerByID(wid + ":web")
|
||||
if err != nil {
|
||||
t.Fatalf("get container row: %v", err)
|
||||
}
|
||||
if row.WorkloadID != wid {
|
||||
t.Errorf("row.WorkloadID = %q, want %q", row.WorkloadID, wid)
|
||||
}
|
||||
if row.Role != "web" {
|
||||
t.Errorf("row.Role = %q, want %q", row.Role, "web")
|
||||
}
|
||||
if row.State != "running" {
|
||||
t.Errorf("row.State = %q, want %q", row.State, "running")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploy_EmptyYAMLConfig_RejectsBeforeExec(t *testing.T) {
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
wid := seedWorkload(t, deps.Store, "empty", "services:\n web:\n image: x\n")
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "empty",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: ""}),
|
||||
}
|
||||
|
||||
fake := &fakeRunner{}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
|
||||
if err == nil {
|
||||
t.Fatal("Deploy accepted empty compose_yaml")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "empty compose_yaml") {
|
||||
t.Errorf("error = %v, want substring \"empty compose_yaml\"", err)
|
||||
}
|
||||
if len(fake.upCalls) != 0 {
|
||||
t.Errorf("Up should not have been called; got %d calls", len(fake.upCalls))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploy_UpFailure_PropagatesAndIncludesTruncatedOutput(t *testing.T) {
|
||||
withTempDir(t)
|
||||
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n web:\n image: bad-image\n"
|
||||
wid := seedWorkload(t, deps.Store, "fail", yamlText)
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "fail",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
|
||||
bigOut := strings.Repeat("docker compose log noise ", 200) // > 1024 bytes
|
||||
fake := &fakeRunner{
|
||||
upOuts: []string{bigOut},
|
||||
upErrs: []error{errors.New("exit status 1")},
|
||||
}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
|
||||
if err == nil {
|
||||
t.Fatal("Deploy accepted Up failure")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "docker compose up") {
|
||||
t.Errorf("error = %v, want substring \"docker compose up\"", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "exit status 1") {
|
||||
t.Errorf("error = %v, want wrapped Up err", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "(truncated)") {
|
||||
t.Errorf("error = %v, want truncated-output marker", err)
|
||||
}
|
||||
// Ps must not be called when Up failed.
|
||||
if len(fake.psCalls) != 0 {
|
||||
t.Errorf("Ps called %d times after Up failure; want 0", len(fake.psCalls))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeploy_UpSucceedsButPsFails_SurfacesError(t *testing.T) {
|
||||
// `up` succeeded but enumerate failed — Deploy must surface so the UI
|
||||
// doesn't show an empty containers index for a running stack.
|
||||
withTempDir(t)
|
||||
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n web:\n image: nginx\n"
|
||||
wid := seedWorkload(t, deps.Store, "psfail", yamlText)
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "psfail",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
|
||||
fake := &fakeRunner{
|
||||
psErrs: []error{errors.New("compose ps boom")},
|
||||
}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
|
||||
if err == nil {
|
||||
t.Fatal("Deploy ignored Ps failure")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "sync container rows") {
|
||||
t.Errorf("error = %v, want substring \"sync container rows\"", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTeardown_DropsContainerRows_EvenWhenDownFails(t *testing.T) {
|
||||
// docker compose down failing must not orphan rows in the DB.
|
||||
withTempDir(t)
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
wid := seedWorkload(t, deps.Store, "tdown", "services:\n web:\n image: nginx\n")
|
||||
|
||||
// Seed two service rows the way Deploy would.
|
||||
for _, role := range []string{"web", "db"} {
|
||||
if err := deps.Store.UpsertContainer(store.Container{
|
||||
ID: wid + ":" + role,
|
||||
WorkloadID: wid,
|
||||
WorkloadKind: "compose",
|
||||
Role: role,
|
||||
Host: "local",
|
||||
State: "running",
|
||||
}); err != nil {
|
||||
t.Fatalf("seed container: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
fake := &fakeRunner{downErrs: []error{errors.New("compose project unknown")}}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "tdown",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
|
||||
}
|
||||
if err := src.Teardown(context.Background(), deps, w); err != nil {
|
||||
t.Fatalf("Teardown: %v", err)
|
||||
}
|
||||
|
||||
// Down requested removeVolumes=true (matches the docstring claim).
|
||||
if len(fake.downCalls) != 1 {
|
||||
t.Fatalf("Down calls = %d, want 1", len(fake.downCalls))
|
||||
}
|
||||
if !fake.downCalls[0].RemoveVolumes {
|
||||
t.Errorf("Down removeVolumes = false, want true (workload teardown is destructive)")
|
||||
}
|
||||
|
||||
// Rows gone despite the Down error.
|
||||
for _, role := range []string{"web", "db"} {
|
||||
if _, err := deps.Store.GetContainerByID(wid + ":" + role); !errors.Is(err, store.ErrNotFound) {
|
||||
t.Errorf("container row %q survived teardown: err=%v", role, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTeardown_HappyPath(t *testing.T) {
|
||||
withTempDir(t)
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
wid := seedWorkload(t, deps.Store, "tdown2", "services:\n web:\n image: nginx\n")
|
||||
|
||||
if err := deps.Store.UpsertContainer(store.Container{
|
||||
ID: wid + ":web",
|
||||
WorkloadID: wid,
|
||||
WorkloadKind: "compose",
|
||||
Role: "web",
|
||||
Host: "local",
|
||||
State: "running",
|
||||
}); err != nil {
|
||||
t.Fatalf("seed: %v", err)
|
||||
}
|
||||
|
||||
fake := &fakeRunner{}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "tdown2",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
|
||||
}
|
||||
if err := src.Teardown(context.Background(), deps, w); err != nil {
|
||||
t.Fatalf("Teardown: %v", err)
|
||||
}
|
||||
if len(fake.downCalls) != 1 {
|
||||
t.Errorf("Down calls = %d, want 1", len(fake.downCalls))
|
||||
}
|
||||
if _, err := deps.Store.GetContainerByID(wid + ":web"); !errors.Is(err, store.ErrNotFound) {
|
||||
t.Errorf("container row survived teardown: err=%v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReconcile_PsSuccess_UpsertsRows(t *testing.T) {
|
||||
withTempDir(t)
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n web:\n image: nginx\n db:\n image: postgres\n"
|
||||
wid := seedWorkload(t, deps.Store, "rec", yamlText)
|
||||
|
||||
fake := &fakeRunner{
|
||||
psResults: [][]stack.Service{{
|
||||
{Service: "web", State: "running"},
|
||||
{Service: "db", State: "running"},
|
||||
}},
|
||||
}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "rec",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
if err := src.Reconcile(context.Background(), deps, w); err != nil {
|
||||
t.Fatalf("Reconcile: %v", err)
|
||||
}
|
||||
|
||||
for _, role := range []string{"web", "db"} {
|
||||
row, err := deps.Store.GetContainerByID(wid + ":" + role)
|
||||
if err != nil {
|
||||
t.Errorf("row %q missing after reconcile: %v", role, err)
|
||||
continue
|
||||
}
|
||||
if row.State != "running" {
|
||||
t.Errorf("row %q state = %q, want \"running\"", role, row.State)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReconcile_PsFailure_MarksExistingRowsMissing(t *testing.T) {
|
||||
// When compose ps fails (project unknown to Docker), the reconciler
|
||||
// flips existing rows to "missing" rather than deleting them — the UI
|
||||
// surfaces the desync to the operator.
|
||||
withTempDir(t)
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n web:\n image: nginx\n"
|
||||
wid := seedWorkload(t, deps.Store, "missing", yamlText)
|
||||
|
||||
if err := deps.Store.UpsertContainer(store.Container{
|
||||
ID: wid + ":web",
|
||||
WorkloadID: wid,
|
||||
WorkloadKind: "compose",
|
||||
Role: "web",
|
||||
Host: "local",
|
||||
State: "running",
|
||||
}); err != nil {
|
||||
t.Fatalf("seed: %v", err)
|
||||
}
|
||||
|
||||
fake := &fakeRunner{psErrs: []error{errors.New("no such project")}}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "missing",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
if err := src.Reconcile(context.Background(), deps, w); err != nil {
|
||||
t.Fatalf("Reconcile returned %v; should be nil even on Ps failure", err)
|
||||
}
|
||||
|
||||
row, err := deps.Store.GetContainerByID(wid + ":web")
|
||||
if err != nil {
|
||||
t.Fatalf("row missing entirely (should be marked, not deleted): %v", err)
|
||||
}
|
||||
if row.State != "missing" {
|
||||
t.Errorf("row.State = %q, want \"missing\"", row.State)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReconcile_FallsBackToStatusWhenStateEmpty(t *testing.T) {
|
||||
// Some compose versions populate Status (human string) but not State
|
||||
// (enum) for non-running services. upsertServiceRow falls back to
|
||||
// Status; verify that here.
|
||||
withTempDir(t)
|
||||
deps := plugin.Deps{Store: testStore(t)}
|
||||
yamlText := "services:\n worker:\n image: alpine\n"
|
||||
wid := seedWorkload(t, deps.Store, "fallback", yamlText)
|
||||
|
||||
fake := &fakeRunner{
|
||||
psResults: [][]stack.Service{{
|
||||
{Service: "worker", State: "", Status: "Exit 0"},
|
||||
}},
|
||||
}
|
||||
withFakeRunner(t, fake)
|
||||
|
||||
src := &source{}
|
||||
w := plugin.Workload{
|
||||
ID: wid,
|
||||
Name: "fallback",
|
||||
SourceKind: "compose",
|
||||
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
|
||||
}
|
||||
if err := src.Reconcile(context.Background(), deps, w); err != nil {
|
||||
t.Fatalf("Reconcile: %v", err)
|
||||
}
|
||||
|
||||
row, err := deps.Store.GetContainerByID(wid + ":worker")
|
||||
if err != nil {
|
||||
t.Fatalf("get row: %v", err)
|
||||
}
|
||||
if row.State != "Exit 0" {
|
||||
t.Errorf("row.State = %q, want \"Exit 0\" (Status fallback)", row.State)
|
||||
}
|
||||
}
|
||||
|
||||
// mustMarshalConfig is a small helper that converts a Config to the
|
||||
// raw-JSON shape SourceConfig expects. Tests use it instead of
|
||||
// hand-rolling the string so a Config field rename can't drift the test
|
||||
// fixture from the production decoder.
|
||||
func mustMarshalConfig(t *testing.T, cfg Config) json.RawMessage {
|
||||
t.Helper()
|
||||
b, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal config: %v", err)
|
||||
}
|
||||
return json.RawMessage(b)
|
||||
}
|
||||
|
||||
// Compile-time guards: *stack.Compose must continue to satisfy
|
||||
// composeRunner so the production path keeps building, and the fake
|
||||
// must continue to satisfy it too so a drift in the interface shape
|
||||
// fails the build here rather than at runtime.
|
||||
var (
|
||||
_ composeRunner = (*stack.Compose)(nil)
|
||||
_ composeRunner = (*fakeRunner)(nil)
|
||||
)
|
||||
@@ -0,0 +1,574 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/crypto"
|
||||
"github.com/alexei/tinyforge/internal/docker"
|
||||
"github.com/alexei/tinyforge/internal/events"
|
||||
"github.com/alexei/tinyforge/internal/notify"
|
||||
"github.com/alexei/tinyforge/internal/proxy"
|
||||
"github.com/alexei/tinyforge/internal/staticsite"
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// healthCheckDelay is the grace window after StartContainer before we
|
||||
// probe IsContainerRunning. Mirrors the static plugin's window — short
|
||||
// enough not to slow happy-path deploys, long enough to catch
|
||||
// crash-on-boot failures (missing env, bad CMD, port conflict).
|
||||
const healthCheckDelay = 3 * time.Second
|
||||
|
||||
// deploy runs one end-to-end sync of a dockerfile workload:
|
||||
//
|
||||
// 1. fetch the latest commit SHA from the configured git provider
|
||||
// 2. skip if SHA + container + proxy are all still healthy
|
||||
// 3. clone the repo into a temp dir
|
||||
// 4. resolve the build context + Dockerfile location
|
||||
// 5. `docker build -t <tag> -f <dockerfile> <context>`
|
||||
// 6. recreate the container with the new image
|
||||
// 7. health-probe the container, surface logs on failure
|
||||
// 8. reconfigure the proxy route
|
||||
// 9. tear down the previous container (different ID) once we're sure
|
||||
// the new one is healthy and proxied
|
||||
//
|
||||
// Each step writes its own status update so the dashboard's runtime-
|
||||
// state panel can show a useful intermediate state when the deploy
|
||||
// stalls on the slow step (almost always the build).
|
||||
func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||
cfg, err := plugin.SourceConfigOf[Config](w)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dockerfile source: decode config: %w", err)
|
||||
}
|
||||
|
||||
prev, prevContainer, err := loadState(deps, w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Force a full rebuild on manual / promote / first-time deploys
|
||||
// (no Reason at all also implies manual). Schedule / git triggers
|
||||
// honour the unchanged-SHA short-circuit so cron polling does not
|
||||
// rebuild minute-by-minute when nothing changed.
|
||||
force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote"
|
||||
|
||||
// Decrypt the access token if present. Token never escapes this
|
||||
// frame: any error message routes through sanitizeError(_, token)
|
||||
// which redacts the literal substring.
|
||||
token := ""
|
||||
if cfg.AccessToken != "" {
|
||||
decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken)
|
||||
if derr != nil {
|
||||
slog.Warn("dockerfile source: failed to decrypt access token",
|
||||
"workload", w.Name, "error", derr)
|
||||
} else {
|
||||
token = decrypted
|
||||
}
|
||||
}
|
||||
|
||||
provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token)
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", prev.LastCommitSHA,
|
||||
sanitizeError(fmt.Sprintf("create provider: %v", err), token))
|
||||
return fmt.Errorf("create provider: %w", err)
|
||||
}
|
||||
|
||||
latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch)
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", prev.LastCommitSHA,
|
||||
sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token))
|
||||
return fmt.Errorf("get latest commit: %w", err)
|
||||
}
|
||||
|
||||
domain := primaryDomain(deps, w)
|
||||
|
||||
prevContainerID := ""
|
||||
prevProxyRouteID := ""
|
||||
if prevContainer != nil {
|
||||
prevContainerID = prevContainer.ContainerID
|
||||
prevProxyRouteID = prevContainer.ProxyRouteID
|
||||
}
|
||||
// Short-circuit: SHA unchanged AND container is still running AND
|
||||
// (if there's a public face) the proxy route still exists. Manual
|
||||
// deploys skip this entirely.
|
||||
//
|
||||
// We deliberately do NOT gate this on prev.Status == "deployed". A
|
||||
// transient failure (e.g. a one-off proxy-check error) leaves the
|
||||
// persisted status as "failed"; if we required "deployed" here, every
|
||||
// subsequent cron/git poll with the same SHA would fall through to a
|
||||
// full clone + docker build despite a perfectly healthy running
|
||||
// container — a rebuild storm that burns CPU/disk until a new commit
|
||||
// lands. Instead we trust the live container/proxy state and heal the
|
||||
// stale status via healUnchanged.
|
||||
if !force && latestSHA == prev.LastCommitSHA && prevContainerID != "" {
|
||||
running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID)
|
||||
switch {
|
||||
case !running:
|
||||
slog.Info("dockerfile: container not running, forcing redeploy", "workload", w.Name)
|
||||
case domain != "":
|
||||
proxyOK, perr := deps.Proxy.RouteExists(ctx, domain)
|
||||
switch {
|
||||
case perr != nil:
|
||||
slog.Warn("dockerfile: proxy check failed, forcing redeploy",
|
||||
"workload", w.Name, "error", perr)
|
||||
case !proxyOK:
|
||||
slog.Info("dockerfile: proxy route missing, forcing redeploy", "workload", w.Name)
|
||||
default:
|
||||
return healUnchanged(deps, w, prev, latestSHA)
|
||||
}
|
||||
default:
|
||||
return healUnchanged(deps, w, prev, latestSHA)
|
||||
}
|
||||
}
|
||||
|
||||
updateStatus(deps, w, "syncing", prev.LastCommitSHA, "")
|
||||
publishEvent(deps, w, "syncing")
|
||||
|
||||
// Clone the repo into a temp dir. We always download the entire
|
||||
// repo tree (folderPath = ""); a ContextPath subset is applied
|
||||
// at build time, not at download time, so a Dockerfile in
|
||||
// `./docker/Dockerfile` with `ContextPath=""` still works.
|
||||
cloneDir, err := os.MkdirTemp("", "tf-build-"+idShort(w)+"-*")
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", prev.LastCommitSHA,
|
||||
sanitizeError(fmt.Sprintf("create clone dir: %v", err), token))
|
||||
return fmt.Errorf("create clone dir: %w", err)
|
||||
}
|
||||
defer os.RemoveAll(cloneDir)
|
||||
|
||||
if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, "", cloneDir); err != nil {
|
||||
updateStatus(deps, w, "failed", prev.LastCommitSHA,
|
||||
sanitizeError(fmt.Sprintf("download repo: %v", err), token))
|
||||
return fmt.Errorf("download repo: %w", err)
|
||||
}
|
||||
|
||||
// Resolve the build context (with symlink-aware escape check) and
|
||||
// verify the Dockerfile is actually present before sending the
|
||||
// build off to the daemon.
|
||||
contextDir, err := resolveContextDir(cloneDir, cfg.ContextPath)
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("resolve context: %v", err), token))
|
||||
return fmt.Errorf("resolve context: %w", err)
|
||||
}
|
||||
if err := verifyDockerfileExists(contextDir, cfg.DockerfilePath); err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(err.Error(), token))
|
||||
return err
|
||||
}
|
||||
|
||||
imageTag := imageTagFor(w)
|
||||
updateStatus(deps, w, "building", latestSHA, "")
|
||||
publishEvent(deps, w, "building")
|
||||
// Bridge per-line build output onto the event bus so /api/events
|
||||
// subscribers (the dashboard's live tail) can show progress while
|
||||
// the daemon chugs. The bus is non-blocking — slow subscribers drop
|
||||
// events rather than backpressure the build — so this is safe to
|
||||
// call from the hot scan loop.
|
||||
logFn := func(line string) {
|
||||
publishBuildLog(deps, w, line)
|
||||
}
|
||||
if err := deps.Docker.BuildImageAt(ctx, contextDir, cfg.DockerfilePath, imageTag, logFn); err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("docker build: %v", err), token))
|
||||
return fmt.Errorf("docker build: %w", err)
|
||||
}
|
||||
|
||||
env := buildEnv(deps, w.ID)
|
||||
containerPort := strconv.Itoa(cfg.Port)
|
||||
|
||||
settings, err := deps.Store.GetSettings()
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("get settings: %v", err), token))
|
||||
return fmt.Errorf("get settings: %w", err)
|
||||
}
|
||||
|
||||
networkName := settings.Network
|
||||
networkID, err := deps.Docker.EnsureNetwork(ctx, networkName)
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("ensure network: %v", err), token))
|
||||
return fmt.Errorf("ensure network: %w", err)
|
||||
}
|
||||
|
||||
containerName := containerNameFor(w)
|
||||
|
||||
// Per-face proxy labels (Traefik consumes these; NPM ignores them).
|
||||
labels := map[string]string{}
|
||||
if domain != "" {
|
||||
if l := deps.Proxy.ContainerLabels(domain, cfg.Port); l != nil {
|
||||
for k, v := range l {
|
||||
labels[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cc := docker.ContainerConfig{
|
||||
Name: containerName,
|
||||
Image: imageTag,
|
||||
Env: env,
|
||||
ExposedPorts: []string{containerPort + "/tcp"},
|
||||
NetworkName: networkName,
|
||||
NetworkID: networkID,
|
||||
Labels: labels,
|
||||
WorkloadID: w.ID,
|
||||
// Dockerfile workloads are tagged as "build" so the dashboard
|
||||
// and any filtered query can distinguish them from static sites
|
||||
// (which serve files) and image-source containers (which pull
|
||||
// pre-built images from a registry).
|
||||
WorkloadKind: string(store.WorkloadKindBuild),
|
||||
Role: "",
|
||||
}
|
||||
|
||||
containerID, err := deps.Docker.CreateContainer(ctx, cc)
|
||||
if err != nil {
|
||||
// Name conflict — best-effort cleanup of any prior container
|
||||
// (by ID first; by name as a fallback) and one retry.
|
||||
if prevContainerID != "" {
|
||||
deps.Docker.StopContainer(ctx, prevContainerID, 10)
|
||||
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
|
||||
}
|
||||
removeContainerByName(ctx, deps, containerName)
|
||||
|
||||
containerID, err = deps.Docker.CreateContainer(ctx, cc)
|
||||
if err != nil {
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("create container: %v", err), token))
|
||||
return fmt.Errorf("create container: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := deps.Docker.StartContainer(ctx, containerID); err != nil {
|
||||
deps.Docker.RemoveContainer(ctx, containerID, true)
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("start container: %v", err), token))
|
||||
return fmt.Errorf("start container: %w", err)
|
||||
}
|
||||
|
||||
// Brief health-check window — catch crash-on-boot. ctx-aware so a
|
||||
// cancelled deploy returns promptly. On failure surface the tail
|
||||
// of the container's logs as the error reason; that's almost
|
||||
// always what the operator needs to debug.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
deps.Docker.RemoveContainer(ctx, containerID, true)
|
||||
updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check")
|
||||
return ctx.Err()
|
||||
case <-time.After(healthCheckDelay):
|
||||
}
|
||||
running, runErr := deps.Docker.IsContainerRunning(ctx, containerID)
|
||||
if runErr != nil || !running {
|
||||
logMsg := "container exited immediately after start"
|
||||
if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "40"); logErr == nil {
|
||||
buf, _ := io.ReadAll(logs)
|
||||
logs.Close()
|
||||
if len(buf) > 0 {
|
||||
// Pass `env` so any decrypted KEY=VALUE pair that the
|
||||
// container's startup output happens to echo (think
|
||||
// `RUN echo $DB_PASSWORD` in a debug Dockerfile) is
|
||||
// redacted before it lands in the operator-visible
|
||||
// last_error field.
|
||||
logMsg = sanitizeErrorWithSecrets(string(buf), token, env)
|
||||
}
|
||||
}
|
||||
deps.Docker.RemoveContainer(ctx, containerID, true)
|
||||
updateStatus(deps, w, "failed", latestSHA, logMsg)
|
||||
return fmt.Errorf("container not running: %s", logMsg)
|
||||
}
|
||||
|
||||
// Resolve proxy target: in-network DNS by default, NPM-remote
|
||||
// override uses (settings.ServerIP, hostPort).
|
||||
forwardHost := containerName
|
||||
forwardPort := cfg.Port
|
||||
if settings.NpmRemote && settings.ProxyProvider == "npm" {
|
||||
if settings.ServerIP != "" {
|
||||
hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp")
|
||||
if hpErr != nil {
|
||||
slog.Warn("dockerfile: could not get host port for remote NPM",
|
||||
"workload", w.Name, "error", hpErr)
|
||||
} else {
|
||||
forwardHost = settings.ServerIP
|
||||
forwardPort = int(hostPort)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configure proxy if a domain is set. Replace any prior route
|
||||
// in-place so traffic shifts atomically over to the new container.
|
||||
proxyRouteID := prevProxyRouteID
|
||||
if domain != "" {
|
||||
if prevProxyRouteID != "" {
|
||||
deps.Proxy.DeleteRoute(ctx, prevProxyRouteID)
|
||||
}
|
||||
routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{
|
||||
SSLCertificateID: settings.SSLCertificateID,
|
||||
})
|
||||
if rerr != nil {
|
||||
slog.Warn("dockerfile: failed to configure proxy",
|
||||
"workload", w.Name, "domain", domain,
|
||||
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr)
|
||||
} else {
|
||||
proxyRouteID = routeID
|
||||
slog.Info("dockerfile: proxy configured",
|
||||
"workload", w.Name, "domain", domain,
|
||||
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID)
|
||||
}
|
||||
}
|
||||
|
||||
// Drop the previous container only after the new one is healthy
|
||||
// + routed. Different-ID-than-previous tells us we created a
|
||||
// fresh one (vs returning the same ID via UpsertContainer reuse).
|
||||
if prevContainerID != "" && prevContainerID != containerID {
|
||||
deps.Docker.StopContainer(ctx, prevContainerID, 10)
|
||||
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
|
||||
}
|
||||
|
||||
// Single transactional write of new state + container metadata.
|
||||
// On failure: tear down the just-created container + proxy route
|
||||
// so we don't leave orphans behind for the next deploy to trip
|
||||
// over.
|
||||
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
|
||||
rs.LastCommitSHA = latestSHA
|
||||
rs.LastSyncAt = store.Now()
|
||||
rs.LastError = ""
|
||||
rs.Status = "deployed"
|
||||
|
||||
c.ContainerID = containerID
|
||||
c.ProxyRouteID = proxyRouteID
|
||||
c.Subdomain = domain
|
||||
c.State = "running"
|
||||
c.Port = cfg.Port
|
||||
c.ImageRef = imageTag
|
||||
}); err != nil {
|
||||
slog.Error("dockerfile: failed to persist deploy state — rolling back",
|
||||
"workload", w.Name, "error", err)
|
||||
if proxyRouteID != "" {
|
||||
deps.Proxy.DeleteRoute(ctx, proxyRouteID)
|
||||
}
|
||||
deps.Docker.StopContainer(ctx, containerID, 10)
|
||||
deps.Docker.RemoveContainer(ctx, containerID, true)
|
||||
updateStatus(deps, w, "failed", latestSHA,
|
||||
sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token))
|
||||
return fmt.Errorf("persist deploy state: %w", err)
|
||||
}
|
||||
|
||||
publishEvent(deps, w, "deployed")
|
||||
dispatchBuildNotification(deps, w, domain, "deployed", "")
|
||||
|
||||
slog.Info("dockerfile deployed",
|
||||
"workload", w.Name,
|
||||
"sha", shortSHA(latestSHA),
|
||||
"image", imageTag)
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateStatus writes the runtime-state status/error/commit and (on
|
||||
// terminal states) fires the side effects the static plugin's helper
|
||||
// does: failures land in the event log, and a "deployed" or "failed"
|
||||
// transition dispatches an outbound notification.
|
||||
//
|
||||
// The deploy success path calls saveState directly with the full
|
||||
// container metadata; this helper covers failure / intermediate
|
||||
// transitions where only state moves.
|
||||
func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) {
|
||||
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
|
||||
rs.Status = status
|
||||
rs.LastError = errMsg
|
||||
if commitSHA != "" {
|
||||
rs.LastCommitSHA = commitSHA
|
||||
}
|
||||
switch status {
|
||||
case "deployed":
|
||||
c.State = "running"
|
||||
case "stopped":
|
||||
c.State = "stopped"
|
||||
case "failed":
|
||||
c.State = "failed"
|
||||
case "syncing", "building":
|
||||
// Don't churn the container row's state during in-progress
|
||||
// build/sync — leave whatever the previous deploy left.
|
||||
}
|
||||
}); err != nil {
|
||||
slog.Error("dockerfile: failed to update status",
|
||||
"id", w.ID, "status", status, "error", err)
|
||||
}
|
||||
|
||||
if status == "failed" {
|
||||
publishEvent(deps, w, "failed: "+errMsg)
|
||||
}
|
||||
|
||||
if status == "deployed" || status == "failed" {
|
||||
dispatchBuildNotification(deps, w, primaryDomain(deps, w), status, errMsg)
|
||||
}
|
||||
}
|
||||
|
||||
// dispatchBuildNotification fans the build event out to every
|
||||
// configured notification route for the workload. Multi-destination
|
||||
// fan-out (workload_notifications rows + legacy single URL + global
|
||||
// settings fallback) is centralised in plugin.DispatchNotificationForWorkload
|
||||
// so the routing rules are identical across source kinds.
|
||||
func dispatchBuildNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
|
||||
eventType := "build_success"
|
||||
if status == "failed" {
|
||||
eventType = "build_failure"
|
||||
}
|
||||
siteURL := ""
|
||||
if domain != "" {
|
||||
siteURL = "https://" + domain
|
||||
}
|
||||
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
|
||||
Type: eventType,
|
||||
Project: w.Name,
|
||||
URL: siteURL,
|
||||
Error: errMsg,
|
||||
})
|
||||
}
|
||||
|
||||
// publishEvent emits a status event on the bus AND persists an
|
||||
// event_log row. Message shape mirrors the static plugin
|
||||
// ("Build %q: %s") so the dashboard's audit feed reads consistently
|
||||
// across both kinds.
|
||||
func publishEvent(deps plugin.Deps, w plugin.Workload, status string) {
|
||||
severity := "info"
|
||||
if strings.HasPrefix(status, "failed") {
|
||||
severity = "error"
|
||||
}
|
||||
message := fmt.Sprintf("Build %q: %s", w.Name, status)
|
||||
|
||||
metaBytes, err := json.Marshal(map[string]string{
|
||||
"workload_id": w.ID,
|
||||
"workload_name": w.Name,
|
||||
"status": status,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("dockerfile: marshal event metadata", "error", err)
|
||||
metaBytes = []byte("{}")
|
||||
}
|
||||
metadata := string(metaBytes)
|
||||
|
||||
evt, err := deps.Store.InsertEvent(store.EventLog{
|
||||
Source: "dockerfile",
|
||||
Severity: severity,
|
||||
Message: message,
|
||||
Metadata: metadata,
|
||||
})
|
||||
if err != nil {
|
||||
slog.Error("dockerfile: failed to persist event log", "error", err)
|
||||
return
|
||||
}
|
||||
deps.Events.Publish(events.Event{
|
||||
Type: events.EventLog,
|
||||
Payload: events.EventLogPayload{
|
||||
ID: evt.ID,
|
||||
Source: "dockerfile",
|
||||
Severity: severity,
|
||||
Message: message,
|
||||
Metadata: metadata,
|
||||
CreatedAt: evt.CreatedAt,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// publishBuildLog emits one EventBuildLog per non-empty daemon "stream"
|
||||
// line. The trailing newline the daemon emits per line is trimmed so the
|
||||
// UI can render each event as its own row without smuggled blanks.
|
||||
// Strictly best-effort: the bus drops events under backpressure (slow
|
||||
// subscriber, no subscriber at all) and never blocks the build loop.
|
||||
func publishBuildLog(deps plugin.Deps, w plugin.Workload, line string) {
|
||||
trimmed := strings.TrimRight(line, "\r\n")
|
||||
if trimmed == "" {
|
||||
return
|
||||
}
|
||||
deps.Events.Publish(events.Event{
|
||||
Type: events.EventBuildLog,
|
||||
Payload: events.BuildLogPayload{
|
||||
WorkloadID: w.ID,
|
||||
Line: trimmed,
|
||||
Stream: "stdout",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// healUnchanged is the no-rebuild short-circuit result: the SHA matches and
|
||||
// the live container + proxy are healthy, so there is nothing to deploy. If a
|
||||
// prior transient failure left the persisted status as something other than
|
||||
// "deployed", repair it so the dashboard reflects reality and we stop treating
|
||||
// a healthy workload as failed. We heal via saveState directly (NOT
|
||||
// updateStatus) so this reconciliation does not fire a spurious build-success
|
||||
// notification on every poll.
|
||||
func healUnchanged(deps plugin.Deps, w plugin.Workload, prev runtimeState, latestSHA string) error {
|
||||
slog.Info("dockerfile: no changes", "workload", w.Name, "sha", shortSHA(latestSHA))
|
||||
if prev.Status == "deployed" {
|
||||
return nil
|
||||
}
|
||||
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
|
||||
rs.Status = "deployed"
|
||||
rs.LastError = ""
|
||||
c.State = "running"
|
||||
}); err != nil {
|
||||
slog.Warn("dockerfile: failed to heal stale status to deployed",
|
||||
"workload", w.Name, "error", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// removeContainerByName enumerates Docker's view and best-effort drops
|
||||
// EVERY matching container so a name conflict in CreateContainer is
|
||||
// recoverable. Container names are unique per daemon, but the recovery
|
||||
// path exists precisely because a conflict occurred — a prior partial
|
||||
// deploy can leave more than one matching artifact, so we must not stop
|
||||
// at the first. Mirrors the static plugin's helper of the same name.
|
||||
func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) {
|
||||
containers, err := deps.Docker.ListContainers(ctx, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for _, c := range containers {
|
||||
if c.Name == name {
|
||||
deps.Docker.StopContainer(ctx, c.ID, 10)
|
||||
deps.Docker.RemoveContainer(ctx, c.ID, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// primaryDomain mirrors the static plugin's helper of the same name —
|
||||
// derives an FQDN from the workload's first enabled public face, with
|
||||
// the same bare-subdomain + settings.Domain fall-through.
|
||||
func primaryDomain(deps plugin.Deps, w plugin.Workload) string {
|
||||
for _, f := range w.PublicFaces {
|
||||
if f.Subdomain == "" && f.Domain == "" {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case f.Subdomain != "" && f.Domain != "":
|
||||
return f.Subdomain + "." + f.Domain
|
||||
case f.Subdomain == "" && f.Domain != "":
|
||||
return f.Domain
|
||||
case f.Subdomain != "" && f.Domain == "":
|
||||
settings, err := deps.Store.GetSettings()
|
||||
if err != nil || settings.Domain == "" {
|
||||
return f.Subdomain
|
||||
}
|
||||
return f.Subdomain + "." + settings.Domain
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// shortSHA truncates a commit SHA for log lines. Keeps the deploy log
|
||||
// readable without losing the "is this the same commit?" signal.
|
||||
func shortSHA(sha string) string {
|
||||
if len(sha) > 8 {
|
||||
return sha[:8]
|
||||
}
|
||||
return sha
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
// Package dockerfile implements the "dockerfile" source: a git-repo-backed
|
||||
// deployable that builds a Docker image from a user-supplied Dockerfile
|
||||
// and runs one container. This is the "self-hosted Vercel" Source —
|
||||
// users point at a Git repo containing a Dockerfile and Tinyforge
|
||||
// handles clone → build → run → proxy in one shot, with no external CI
|
||||
// pipeline.
|
||||
//
|
||||
// Architecturally the plugin sits between `static` (clones a Git repo,
|
||||
// builds an image, runs one container) and `image` (richer runtime
|
||||
// shape: ports, healthcheck, env, volumes). The deploy pipeline mirrors
|
||||
// static — same git-fetch, same image-tag/container-name shape, same
|
||||
// container-row state persistence — but the build step uses the
|
||||
// operator's Dockerfile instead of generating one.
|
||||
//
|
||||
// The full pipeline is implemented inline in this package
|
||||
// (deploy.go / teardown.go / reconcile.go) so a new dockerfile source
|
||||
// kind is usable immediately on init() — no separate registration step
|
||||
// in the deployer.
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// Config is the per-workload source config blob. Mirrors the shape of
|
||||
// the static plugin's Config so the UI wizard can largely reuse the
|
||||
// existing Git-discovery + branch-picker + repo-picker components.
|
||||
//
|
||||
// Build-side fields:
|
||||
//
|
||||
// - DockerfilePath: path to the Dockerfile *within the context*
|
||||
// directory. Defaults to "Dockerfile". Use e.g. "docker/Dockerfile"
|
||||
// when the operator's repo keeps Dockerfiles in a subfolder.
|
||||
// - ContextPath: subfolder of the cloned repo to use as the build
|
||||
// context. Defaults to "" (repo root). Use e.g. "./api" when the
|
||||
// repo's Dockerfile lives next to a backend service in a monorepo.
|
||||
//
|
||||
// Runtime-side fields:
|
||||
//
|
||||
// - Port: container port the workload listens on. Required.
|
||||
// - Healthcheck: optional curl-style probe; empty disables.
|
||||
//
|
||||
// Env vars and volume mounts are handled out-of-band via the
|
||||
// workload_env and workload_volumes tables, mirroring the image source.
|
||||
type Config struct {
|
||||
Provider string `json:"provider"` // "gitea" | "github" | "gitlab"; "" = autodetect
|
||||
BaseURL string `json:"base_url"` // e.g. https://git.example.com
|
||||
RepoOwner string `json:"repo_owner"`
|
||||
RepoName string `json:"repo_name"`
|
||||
Branch string `json:"branch"`
|
||||
ContextPath string `json:"context_path"` // path within repo (root by default)
|
||||
DockerfilePath string `json:"dockerfile_path"` // relative to context_path; "Dockerfile" by default
|
||||
AccessToken string `json:"access_token"` // encrypted; optional for public repos
|
||||
|
||||
Port int `json:"port"`
|
||||
Healthcheck string `json:"healthcheck,omitempty"`
|
||||
}
|
||||
|
||||
type source struct{}
|
||||
|
||||
// Eager registration — the deploy pipeline lives entirely inside this
|
||||
// package, so the kind is usable as soon as init() fires.
|
||||
func init() { plugin.RegisterSource(&source{}) }
|
||||
|
||||
func (*source) Kind() string { return "dockerfile" }
|
||||
|
||||
func (*source) SchemaSample() any {
|
||||
return Config{
|
||||
Provider: "gitea",
|
||||
BaseURL: "https://git.example.com",
|
||||
RepoOwner: "owner",
|
||||
RepoName: "myservice",
|
||||
Branch: "main",
|
||||
ContextPath: "",
|
||||
DockerfilePath: "Dockerfile",
|
||||
Port: 8080,
|
||||
}
|
||||
}
|
||||
|
||||
// Validate rejects obviously-malformed configs before the deploy
|
||||
// pipeline materializes a temp dir, downloads a repo, and burns
|
||||
// minutes of build time on input that was never going to work.
|
||||
func (*source) Validate(cfg json.RawMessage) error {
|
||||
var c Config
|
||||
if len(cfg) == 0 {
|
||||
return fmt.Errorf("dockerfile source: config is required")
|
||||
}
|
||||
if err := json.Unmarshal(cfg, &c); err != nil {
|
||||
return fmt.Errorf("dockerfile source: invalid json: %w", err)
|
||||
}
|
||||
if strings.TrimSpace(c.RepoOwner) == "" || strings.TrimSpace(c.RepoName) == "" {
|
||||
return fmt.Errorf("dockerfile source: repo_owner and repo_name are required")
|
||||
}
|
||||
if c.Port <= 0 || c.Port > 65535 {
|
||||
return fmt.Errorf("dockerfile source: port must be between 1 and 65535 (got %d)", c.Port)
|
||||
}
|
||||
// Defense in depth: a leading "/" or any ".." segment in
|
||||
// DockerfilePath / ContextPath would escape the build context. The
|
||||
// plugin's deploy() does its own normalization too; rejecting here
|
||||
// gives the operator a clear error at save-time instead of a
|
||||
// confusing "no such file" mid-build.
|
||||
for _, p := range []string{c.DockerfilePath, c.ContextPath} {
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(p, "/") {
|
||||
return fmt.Errorf("dockerfile source: %q must be relative", p)
|
||||
}
|
||||
if strings.Contains(p, "..") {
|
||||
return fmt.Errorf("dockerfile source: %q must not contain '..'", p)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||
return deploy(ctx, deps, w, intent)
|
||||
}
|
||||
|
||||
func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
|
||||
return teardown(ctx, deps, w)
|
||||
}
|
||||
|
||||
func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
|
||||
return reconcile(ctx, deps, w)
|
||||
}
|
||||
@@ -0,0 +1,288 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// ── Source interface plumbing ───────────────────────────────────────
|
||||
|
||||
func TestSource_Kind(t *testing.T) {
|
||||
if (&source{}).Kind() != "dockerfile" {
|
||||
t.Fatalf("Kind = %q, want \"dockerfile\"", (&source{}).Kind())
|
||||
}
|
||||
}
|
||||
|
||||
func TestSource_Registered_AtInit(t *testing.T) {
|
||||
// init() runs once on import; we just verify the registry returns
|
||||
// our concrete kind. A failure here is a regression of the global
|
||||
// plugin.RegisterSource path or our package-level init.
|
||||
got, err := plugin.GetSource("dockerfile")
|
||||
if err != nil {
|
||||
t.Fatalf("GetSource(dockerfile): %v", err)
|
||||
}
|
||||
if got.Kind() != "dockerfile" {
|
||||
t.Fatalf("registered source has wrong kind: %q", got.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
func TestSource_SchemaSample_RoundTrips(t *testing.T) {
|
||||
s := (&source{}).SchemaSample()
|
||||
raw, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal sample: %v", err)
|
||||
}
|
||||
if err := (&source{}).Validate(raw); err != nil {
|
||||
t.Fatalf("Validate(sample) = %v, want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Validate ────────────────────────────────────────────────────────
|
||||
|
||||
func TestValidate_RejectsEmpty(t *testing.T) {
|
||||
if err := (&source{}).Validate(nil); err == nil {
|
||||
t.Fatal("expected error on empty config, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_RejectsMissingRepo(t *testing.T) {
|
||||
cases := []Config{
|
||||
{RepoName: "x", Port: 80}, // owner missing
|
||||
{RepoOwner: "y", Port: 80}, // name missing
|
||||
{RepoOwner: " ", RepoName: "x", Port: 80}, // owner whitespace-only
|
||||
}
|
||||
for i, c := range cases {
|
||||
raw, _ := json.Marshal(c)
|
||||
if err := (&source{}).Validate(raw); err == nil {
|
||||
t.Errorf("case %d: expected error, got nil", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_RejectsBadPort(t *testing.T) {
|
||||
for _, port := range []int{0, -1, 70000} {
|
||||
raw, _ := json.Marshal(Config{RepoOwner: "a", RepoName: "b", Port: port})
|
||||
if err := (&source{}).Validate(raw); err == nil {
|
||||
t.Errorf("port %d: expected error, got nil", port)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_RejectsPathEscape(t *testing.T) {
|
||||
cases := []Config{
|
||||
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "/etc/passwd"},
|
||||
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "../../etc/passwd"},
|
||||
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "../../"},
|
||||
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "/etc"},
|
||||
}
|
||||
for i, c := range cases {
|
||||
raw, _ := json.Marshal(c)
|
||||
if err := (&source{}).Validate(raw); err == nil {
|
||||
t.Errorf("case %d: expected path-escape rejection, got nil", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidate_AcceptsValid(t *testing.T) {
|
||||
raw, _ := json.Marshal(Config{
|
||||
RepoOwner: "owner",
|
||||
RepoName: "repo",
|
||||
Port: 8080,
|
||||
DockerfilePath: "docker/Dockerfile",
|
||||
ContextPath: "services/api",
|
||||
})
|
||||
if err := (&source{}).Validate(raw); err != nil {
|
||||
t.Fatalf("Validate(valid) = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Naming helpers ──────────────────────────────────────────────────
|
||||
|
||||
func TestNaming_SameNameDifferentIDs_NoCollision(t *testing.T) {
|
||||
a := plugin.Workload{ID: "aaaaaaaa-rest", Name: "svc"}
|
||||
b := plugin.Workload{ID: "bbbbbbbb-rest", Name: "svc"}
|
||||
if containerNameFor(a) == containerNameFor(b) {
|
||||
t.Errorf("container names collide: %q", containerNameFor(a))
|
||||
}
|
||||
if imageTagFor(a) == imageTagFor(b) {
|
||||
t.Errorf("image tags collide: %q", imageTagFor(a))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNaming_ShortIDsPassThrough(t *testing.T) {
|
||||
w := plugin.Workload{ID: "abc", Name: "tiny"}
|
||||
if !strings.HasSuffix(containerNameFor(w), "-abc") {
|
||||
t.Errorf("container name lost short id: %q", containerNameFor(w))
|
||||
}
|
||||
}
|
||||
|
||||
// ── Context + Dockerfile resolution ─────────────────────────────────
|
||||
|
||||
func TestResolveContextDir_Empty_ReturnsRoot(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
got, err := resolveContextDir(dir, "")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveContextDir: %v", err)
|
||||
}
|
||||
if real, _ := filepath.EvalSymlinks(dir); got != real && got != dir {
|
||||
t.Errorf("got %q, want %q (or symlink-resolved equivalent)", got, dir)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveContextDir_Subfolder_OK(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
sub := filepath.Join(dir, "api")
|
||||
if err := os.MkdirAll(sub, 0o755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
got, err := resolveContextDir(dir, "api")
|
||||
if err != nil {
|
||||
t.Fatalf("resolveContextDir: %v", err)
|
||||
}
|
||||
if !strings.HasSuffix(got, "api") {
|
||||
t.Errorf("got %q, expected suffix 'api'", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveContextDir_NonexistentSubfolder(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if _, err := resolveContextDir(dir, "missing"); err == nil {
|
||||
t.Fatal("expected error for missing subfolder")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveContextDir_RejectsEscape(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// resolveContextDir is the second wall — Validate is the first.
|
||||
// We pass an absolute escape via a synthesized symlink. Even if
|
||||
// the user bypasses Validate (e.g. by direct DB edit), this must
|
||||
// still reject.
|
||||
outside := t.TempDir()
|
||||
link := filepath.Join(dir, "escape")
|
||||
if err := os.Symlink(outside, link); err != nil {
|
||||
t.Skipf("symlink unsupported in this environment: %v", err)
|
||||
}
|
||||
if _, err := resolveContextDir(dir, "escape"); err == nil {
|
||||
t.Fatal("expected escape-path rejection")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDockerfileExists_Present(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte("FROM scratch\n"), 0o644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
if err := verifyDockerfileExists(dir, ""); err != nil {
|
||||
t.Fatalf("verifyDockerfileExists(default) = %v, want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDockerfileExists_Missing(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := verifyDockerfileExists(dir, ""); err == nil {
|
||||
t.Fatal("expected error for missing Dockerfile")
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDockerfileExists_CustomPath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.MkdirAll(filepath.Join(dir, "docker"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(dir, "docker", "Dockerfile.prod"), []byte("FROM scratch\n"), 0o644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
if err := verifyDockerfileExists(dir, "docker/Dockerfile.prod"); err != nil {
|
||||
t.Fatalf("verifyDockerfileExists(custom) = %v, want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVerifyDockerfileExists_RejectsAbsolutePath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := verifyDockerfileExists(dir, "/etc/passwd"); err == nil {
|
||||
t.Fatal("expected error for absolute dockerfile path")
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sanitiser ───────────────────────────────────────────────────────
|
||||
|
||||
func TestSanitizeError_RedactsToken(t *testing.T) {
|
||||
tok := "ghp_supersecret"
|
||||
got := sanitizeError("401 from gitea token="+tok+" ok", tok)
|
||||
if strings.Contains(got, tok) {
|
||||
t.Errorf("token leaked: %q", got)
|
||||
}
|
||||
if !strings.Contains(got, "[REDACTED]") {
|
||||
t.Errorf("missing [REDACTED] marker: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeError_CollapsesWhitespace(t *testing.T) {
|
||||
got := sanitizeError("a\nb\rc\td", "")
|
||||
if strings.ContainsAny(got, "\n\r\t") {
|
||||
t.Errorf("did not collapse: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeError_TruncatesUTF8Safe(t *testing.T) {
|
||||
// 1000 copies of a 2-byte rune = 2000 bytes, well over the 240
|
||||
// cap. Output must remain valid UTF-8 (no torn rune at the cap).
|
||||
long := strings.Repeat("é", 1000)
|
||||
got := sanitizeError(long, "")
|
||||
if !strings.HasSuffix(got, "…") {
|
||||
t.Errorf("missing ellipsis: %q", got)
|
||||
}
|
||||
// Walk the result: every byte should be either an ASCII char or
|
||||
// part of a complete UTF-8 sequence. utf8.ValidString is the
|
||||
// canonical guard but a simple "ends on rune boundary" check
|
||||
// suffices for this fixture.
|
||||
if !isValidUTF8Slice([]byte(got)) {
|
||||
t.Errorf("truncation produced broken UTF-8: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func isValidUTF8Slice(b []byte) bool {
|
||||
for i := 0; i < len(b); {
|
||||
switch {
|
||||
case b[i] < 0x80:
|
||||
i++
|
||||
case b[i] < 0xC0:
|
||||
return false // continuation byte at sequence start
|
||||
case b[i] < 0xE0:
|
||||
if i+1 >= len(b) {
|
||||
return false
|
||||
}
|
||||
i += 2
|
||||
case b[i] < 0xF0:
|
||||
if i+2 >= len(b) {
|
||||
return false
|
||||
}
|
||||
i += 3
|
||||
default:
|
||||
if i+3 >= len(b) {
|
||||
return false
|
||||
}
|
||||
i += 4
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ── State row ID ────────────────────────────────────────────────────
|
||||
|
||||
func TestContainerRowID_Deterministic(t *testing.T) {
|
||||
w := plugin.Workload{ID: "abcd1234-rest"}
|
||||
a := containerRowID(w)
|
||||
b := containerRowID(w)
|
||||
if a != b {
|
||||
t.Errorf("containerRowID not deterministic: %q vs %q", a, b)
|
||||
}
|
||||
if !strings.HasSuffix(a, ":dockerfile") {
|
||||
t.Errorf("containerRowID missing suffix: %q", a)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/crypto"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// buildEnv flattens workload_env rows into the KEY=VALUE list Docker
|
||||
// expects. Mirrors the static plugin's env helper exactly so the two
|
||||
// plugins handle decrypt failures the same way: log + skip the one
|
||||
// entry rather than fail the deploy. Bricking a build because one
|
||||
// rotated key missed an env entry would be worse than running with
|
||||
// the variable unset and a single warning in the operator's log.
|
||||
func buildEnv(deps plugin.Deps, workloadID string) []string {
|
||||
rows, err := deps.Store.ListWorkloadEnv(workloadID)
|
||||
if err != nil {
|
||||
slog.Warn("dockerfile source: list workload env", "workload", workloadID, "error", err)
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(rows))
|
||||
for _, e := range rows {
|
||||
value := e.Value
|
||||
if e.Encrypted {
|
||||
decrypted, err := crypto.Decrypt(deps.EncKey, e.Value)
|
||||
if err != nil {
|
||||
slog.Warn("dockerfile source: decrypt env value",
|
||||
"workload", workloadID, "key", e.Key, "error", err)
|
||||
continue
|
||||
}
|
||||
value = decrypted
|
||||
}
|
||||
out = append(out, e.Key+"="+value)
|
||||
}
|
||||
return out
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// resolveContextDir picks the directory the Docker build context will
|
||||
// be packed from, defensively. Returns an error rather than a directory
|
||||
// outside the cloned tree even if ContextPath contains a tricky
|
||||
// sequence — Validate already rejects ".." and leading "/", but
|
||||
// EvalSymlinks here is the second wall.
|
||||
//
|
||||
// ctx may be "" (use cloneRoot as-is) or a relative subpath like
|
||||
// "./api" or "services/api".
|
||||
func resolveContextDir(cloneRoot, ctx string) (string, error) {
|
||||
cloneRoot, err := filepath.Abs(cloneRoot)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("abs cloneRoot: %w", err)
|
||||
}
|
||||
if real, err := filepath.EvalSymlinks(cloneRoot); err == nil {
|
||||
cloneRoot = real
|
||||
}
|
||||
if ctx == "" || ctx == "." || ctx == "./" {
|
||||
return cloneRoot, nil
|
||||
}
|
||||
candidate := filepath.Join(cloneRoot, filepath.FromSlash(ctx))
|
||||
candidate, err = filepath.Abs(candidate)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("abs candidate: %w", err)
|
||||
}
|
||||
// Resolve symlinks BEFORE the prefix check so a planted symlink
|
||||
// inside the clone cannot escape the build context.
|
||||
if real, err := filepath.EvalSymlinks(candidate); err == nil {
|
||||
candidate = real
|
||||
}
|
||||
if candidate != cloneRoot && !strings.HasPrefix(candidate, cloneRoot+string(filepath.Separator)) {
|
||||
return "", fmt.Errorf("context path %q escapes clone root", ctx)
|
||||
}
|
||||
info, err := os.Stat(candidate)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("stat context_path %q: %w", ctx, err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return "", fmt.Errorf("context_path %q is not a directory", ctx)
|
||||
}
|
||||
return candidate, nil
|
||||
}
|
||||
|
||||
// verifyDockerfileExists checks that the named Dockerfile is present in
|
||||
// the resolved context. Returns a focused error for the operator instead
|
||||
// of letting the daemon error out with a less obvious message later.
|
||||
//
|
||||
// dockerfilePath is the value from Config.DockerfilePath — relative to
|
||||
// the context dir, "Dockerfile" by default.
|
||||
func verifyDockerfileExists(contextDir, dockerfilePath string) error {
|
||||
if dockerfilePath == "" {
|
||||
dockerfilePath = "Dockerfile"
|
||||
}
|
||||
if strings.HasPrefix(dockerfilePath, "/") || strings.Contains(dockerfilePath, "..") {
|
||||
return fmt.Errorf("dockerfile_path %q must be relative and contain no '..'", dockerfilePath)
|
||||
}
|
||||
full := filepath.Join(contextDir, filepath.FromSlash(dockerfilePath))
|
||||
info, err := os.Stat(full)
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return fmt.Errorf("Dockerfile not found at %s/%s", filepath.Base(contextDir), dockerfilePath)
|
||||
}
|
||||
return fmt.Errorf("stat Dockerfile %q: %w", dockerfilePath, err)
|
||||
}
|
||||
if info.IsDir() {
|
||||
return fmt.Errorf("dockerfile_path %q points at a directory, not a file", dockerfilePath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sanitizeError clamps an error string before it lands in
|
||||
// containers.extra_json (last_error) or echoes through an outbound
|
||||
// notification webhook. Mirrors the static-plugin helper of the same
|
||||
// name so both plugins agree on the surface area they expose to
|
||||
// operators.
|
||||
func sanitizeError(msg, accessToken string) string {
|
||||
return sanitizeErrorWithSecrets(msg, accessToken, nil)
|
||||
}
|
||||
|
||||
// sanitizeErrorWithSecrets is the dockerfile-plugin-specific extension:
|
||||
// when capturing container build/runtime logs into last_error we ALSO
|
||||
// need to redact decrypted env-var values, because a malicious or
|
||||
// debug-laden Dockerfile can `RUN echo $SECRET` and land a runtime
|
||||
// secret in operator-readable state via /api/workloads/{id}/runtime-state.
|
||||
//
|
||||
// envKV is the same []string the docker client receives — entries shaped
|
||||
// "KEY=VALUE". We split on the first '=' and redact every non-empty
|
||||
// VALUE longer than 3 chars (shorter values produce too many false-
|
||||
// positive substring matches against words like "is" / "of").
|
||||
func sanitizeErrorWithSecrets(msg, accessToken string, envKV []string) string {
|
||||
if msg == "" {
|
||||
return ""
|
||||
}
|
||||
if accessToken != "" {
|
||||
msg = strings.ReplaceAll(msg, accessToken, "[REDACTED]")
|
||||
}
|
||||
for _, kv := range envKV {
|
||||
eq := strings.IndexByte(kv, '=')
|
||||
if eq < 0 {
|
||||
continue
|
||||
}
|
||||
value := kv[eq+1:]
|
||||
if len(value) < 4 {
|
||||
continue
|
||||
}
|
||||
msg = strings.ReplaceAll(msg, value, "[REDACTED]")
|
||||
}
|
||||
msg = strings.Map(func(r rune) rune {
|
||||
switch r {
|
||||
case '\n', '\r', '\t':
|
||||
return ' '
|
||||
}
|
||||
return r
|
||||
}, msg)
|
||||
const maxLen = 240
|
||||
if len(msg) > maxLen {
|
||||
// Rune-aware truncation: walk back to the previous rune
|
||||
// boundary so multi-byte chars at the cap don't tear.
|
||||
cut := maxLen
|
||||
for cut > 0 && !isRuneStart(msg[cut]) {
|
||||
cut--
|
||||
}
|
||||
msg = msg[:cut] + "…"
|
||||
}
|
||||
return msg
|
||||
}
|
||||
|
||||
// isRuneStart reports whether b is a leading byte of a UTF-8 sequence.
|
||||
// Used to walk back from a byte-offset cut to a rune boundary.
|
||||
func isRuneStart(b byte) bool {
|
||||
return b&0xC0 != 0x80
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// idShort is the first 8 chars of the workload ID. Same shape as the
|
||||
// static plugin — workload names are not UNIQUE in the schema, the ID
|
||||
// short suffix is what keeps two same-named workloads from clobbering
|
||||
// each other's container/image artifacts.
|
||||
func idShort(w plugin.Workload) string {
|
||||
if len(w.ID) < 8 {
|
||||
return w.ID
|
||||
}
|
||||
return w.ID[:8]
|
||||
}
|
||||
|
||||
// containerNameFor is the deterministic container name. Prefix `tf-build-`
|
||||
// distinguishes a dockerfile-built container from `dw-site-` (static) and
|
||||
// per-stage image names at a glance in `docker ps`.
|
||||
func containerNameFor(w plugin.Workload) string {
|
||||
return fmt.Sprintf("tf-build-%s-%s", w.Name, idShort(w))
|
||||
}
|
||||
|
||||
// imageTagFor is the deterministic image tag the build step emits. Same
|
||||
// shape as the container name so `docker images` shows the linkage at a
|
||||
// glance.
|
||||
func imageTagFor(w plugin.Workload) string {
|
||||
return fmt.Sprintf("tf-build-%s-%s:latest", w.Name, idShort(w))
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// reconcile syncs the container row's state column with Docker reality
|
||||
// for this workload's single container, and marks the runtime state as
|
||||
// "failed" if the container is gone or has crashed. Same shape as the
|
||||
// static plugin's reconcile — minimal, no automatic re-build on a
|
||||
// missing container. The dashboard surfaces the failed status; the
|
||||
// operator triggers redeploy explicitly.
|
||||
//
|
||||
// Auto-redeploy could be added later, but it should be gated on a
|
||||
// per-workload toggle: a crash loop with auto-rebuild would burn CPU
|
||||
// rebuilding the same broken commit forever.
|
||||
func reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
|
||||
st, prevContainer, err := loadState(deps, w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prevContainer == nil || prevContainer.ContainerID == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
running, err := deps.Docker.IsContainerRunning(ctx, prevContainer.ContainerID)
|
||||
if err != nil {
|
||||
// Most likely "no such container" — mark missing so the UI
|
||||
// surfaces it; runtime status moves to "failed" so the
|
||||
// dashboard and operator event triggers see the regression.
|
||||
if uerr := deps.Store.UpdateContainerState(prevContainer.ID, "missing"); uerr != nil {
|
||||
slog.Warn("dockerfile: mark missing", "workload", w.Name, "error", uerr)
|
||||
}
|
||||
if st.Status == "deployed" {
|
||||
if uerr := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
|
||||
rs.Status = "failed"
|
||||
rs.LastError = "container not found"
|
||||
c.State = "missing"
|
||||
}); uerr != nil {
|
||||
slog.Warn("dockerfile: persist missing-state", "workload", w.Name, "error", uerr)
|
||||
}
|
||||
publishEvent(deps, w, "failed: container not found")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
desired := "running"
|
||||
if !running {
|
||||
desired = "stopped"
|
||||
}
|
||||
if prevContainer.State != desired {
|
||||
if err := deps.Store.UpdateContainerState(prevContainer.ID, desired); err != nil {
|
||||
slog.Warn("dockerfile: state sync", "workload", w.Name, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !running && st.Status == "deployed" {
|
||||
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
|
||||
rs.Status = "failed"
|
||||
rs.LastError = "container stopped unexpectedly"
|
||||
c.State = "stopped"
|
||||
}); err != nil {
|
||||
slog.Warn("dockerfile: persist crashed-state", "workload", w.Name, "error", err)
|
||||
}
|
||||
publishEvent(deps, w, "failed: container stopped unexpectedly")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// runtimeState is the per-workload state we persist inside the
|
||||
// container row's extra_json blob. Mirrors the static plugin's
|
||||
// runtimeState shape so anyone reading the DB can interpret the two
|
||||
// kinds identically.
|
||||
//
|
||||
// LastImageDigest is the build's image ID — distinct from a registry
|
||||
// digest (we never push) but useful for "did the build actually
|
||||
// produce a different artifact?" diffing when we add caching later.
|
||||
type runtimeState struct {
|
||||
LastCommitSHA string `json:"last_commit_sha,omitempty"`
|
||||
LastImageDigest string `json:"last_image_digest,omitempty"`
|
||||
LastSyncAt string `json:"last_sync_at,omitempty"`
|
||||
LastError string `json:"last_error,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
}
|
||||
|
||||
// runtimeStateKeys lists every JSON field name owned by runtimeState.
|
||||
// saveState strips these from the generic map before re-emitting so
|
||||
// the typed values do not double-write under both their JSON tag and
|
||||
// any subsequent extension's tag.
|
||||
var runtimeStateKeys = []string{
|
||||
"last_commit_sha", "last_image_digest", "last_sync_at", "last_error", "status",
|
||||
}
|
||||
|
||||
// containerRowID is the deterministic container row ID. Stable across
|
||||
// redeploys so saveState upserts in place.
|
||||
func containerRowID(w plugin.Workload) string {
|
||||
return w.ID + ":dockerfile"
|
||||
}
|
||||
|
||||
// loadState returns the persisted runtime state plus the underlying
|
||||
// container row. Both values are zero on first deploy.
|
||||
func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Container, error) {
|
||||
row, err := deps.Store.GetContainerByID(containerRowID(w))
|
||||
if err != nil {
|
||||
if errors.Is(err, store.ErrNotFound) {
|
||||
return runtimeState{}, nil, nil
|
||||
}
|
||||
return runtimeState{}, nil, fmt.Errorf("dockerfile source: load state: %w", err)
|
||||
}
|
||||
st := runtimeState{}
|
||||
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
|
||||
if err := json.Unmarshal([]byte(row.ExtraJSON), &st); err != nil {
|
||||
slog.Debug("dockerfile source: decode extra_json", "workload", w.ID, "error", err)
|
||||
}
|
||||
}
|
||||
return st, &row, nil
|
||||
}
|
||||
|
||||
// saveLocks serializes per-workload RMW of the container row. Same
|
||||
// pattern as the static plugin — SQLite's MaxOpenConns=1 serializes
|
||||
// statements but not the caller's read-then-write intent, so two
|
||||
// concurrent deploys for the same workload could stomp each other's
|
||||
// container_id / proxy_route_id without this mutex.
|
||||
//
|
||||
// Entries are reference-counted and removed only when the last holder
|
||||
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
|
||||
// use-after-delete hazard of deleting an entry on teardown: deleting a
|
||||
// live entry while a concurrent saveState still holds (or is about to
|
||||
// lock) it would let a fresh saveState mint a SECOND mutex for the same
|
||||
// workload, losing the RMW serialization the lock exists to provide.
|
||||
var saveLocks struct {
|
||||
mu sync.Mutex
|
||||
locks map[string]*saveLock
|
||||
}
|
||||
|
||||
type saveLock struct {
|
||||
mu sync.Mutex
|
||||
refs int
|
||||
}
|
||||
|
||||
// acquireSaveLock returns the per-workload lock (creating it on first use),
|
||||
// registers this caller as a holder, and takes the lock. Pair with
|
||||
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
|
||||
// contend on the returned per-workload lock.
|
||||
func acquireSaveLock(workloadID string) *saveLock {
|
||||
saveLocks.mu.Lock()
|
||||
if saveLocks.locks == nil {
|
||||
saveLocks.locks = map[string]*saveLock{}
|
||||
}
|
||||
l, ok := saveLocks.locks[workloadID]
|
||||
if !ok {
|
||||
l = &saveLock{}
|
||||
saveLocks.locks[workloadID] = l
|
||||
}
|
||||
l.refs++
|
||||
saveLocks.mu.Unlock()
|
||||
l.mu.Lock()
|
||||
return l
|
||||
}
|
||||
|
||||
// releaseSaveLock unlocks and drops the caller's reference, removing the map
|
||||
// entry once no holders remain. Because refs is incremented under saveLocks.mu
|
||||
// before the entry can be observed for deletion, an entry with a pending
|
||||
// acquirer is never deleted.
|
||||
func releaseSaveLock(workloadID string, l *saveLock) {
|
||||
l.mu.Unlock()
|
||||
saveLocks.mu.Lock()
|
||||
l.refs--
|
||||
if l.refs == 0 {
|
||||
delete(saveLocks.locks, workloadID)
|
||||
}
|
||||
saveLocks.mu.Unlock()
|
||||
}
|
||||
|
||||
// saveState upserts the container row, calling mutate so callers can
|
||||
// adjust both the typed runtime state and the row's first-class fields
|
||||
// in one transaction. Unknown keys in extra_json survive the round-trip
|
||||
// so future writers can extend the blob without forcing this struct to
|
||||
// grow.
|
||||
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
|
||||
lk := acquireSaveLock(w.ID)
|
||||
defer releaseSaveLock(w.ID, lk)
|
||||
|
||||
prev, prevRow, err := loadState(deps, w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
row := store.Container{
|
||||
ID: containerRowID(w),
|
||||
WorkloadID: w.ID,
|
||||
WorkloadKind: string(store.WorkloadKindBuild),
|
||||
Host: "local",
|
||||
}
|
||||
if prevRow != nil {
|
||||
row = *prevRow
|
||||
}
|
||||
|
||||
generic := map[string]json.RawMessage{}
|
||||
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
|
||||
if err := json.Unmarshal([]byte(row.ExtraJSON), &generic); err != nil {
|
||||
slog.Debug("dockerfile source: decode extra_json (generic)", "workload", w.ID, "error", err)
|
||||
}
|
||||
}
|
||||
for _, k := range runtimeStateKeys {
|
||||
delete(generic, k)
|
||||
}
|
||||
|
||||
state := prev
|
||||
mutate(&state, &row)
|
||||
|
||||
typedBytes, err := json.Marshal(state)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dockerfile source: marshal state: %w", err)
|
||||
}
|
||||
typedMap := map[string]json.RawMessage{}
|
||||
if err := json.Unmarshal(typedBytes, &typedMap); err != nil {
|
||||
return fmt.Errorf("dockerfile source: re-decode typed state: %w", err)
|
||||
}
|
||||
for k, v := range typedMap {
|
||||
generic[k] = v
|
||||
}
|
||||
|
||||
merged, err := json.Marshal(generic)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dockerfile source: marshal merged state: %w", err)
|
||||
}
|
||||
row.ExtraJSON = string(merged)
|
||||
row.LastSeenAt = store.Now()
|
||||
|
||||
if err := deps.Store.UpsertContainer(row); err != nil {
|
||||
return fmt.Errorf("dockerfile source: upsert container row: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package dockerfile
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||
)
|
||||
|
||||
// teardown drops every artifact deploy created: the running container,
|
||||
// the proxy route, the container index row. Idempotent — a workload
|
||||
// that never deployed is a no-op.
|
||||
//
|
||||
// The built image tag is left in place: removing it would invalidate
|
||||
// the docker build cache (next deploy of the same workload would
|
||||
// rebuild from scratch). Operators can prune unused images via the
|
||||
// existing Settings → Prune Images path.
|
||||
func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
|
||||
_, prevContainer, err := loadState(deps, w)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if prevContainer == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Proxy first so traffic stops landing on a container that is
|
||||
// about to disappear.
|
||||
if prevContainer.ProxyRouteID != "" {
|
||||
if err := deps.Proxy.DeleteRoute(ctx, prevContainer.ProxyRouteID); err != nil {
|
||||
slog.Warn("dockerfile: failed to remove proxy route", "workload", w.Name, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if prevContainer.ContainerID != "" {
|
||||
if err := deps.Docker.RemoveContainer(ctx, prevContainer.ContainerID, true); err != nil {
|
||||
slog.Warn("dockerfile: failed to remove container", "workload", w.Name, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||
slog.Warn("dockerfile: failed to delete container row", "workload", w.Name, "error", err)
|
||||
}
|
||||
// The per-workload save-mutex is reference-counted (see state.go) and
|
||||
// frees itself when the last holder releases, so teardown no longer
|
||||
// deletes it explicitly — doing so could race a concurrent saveState
|
||||
// and break the RMW serialization the lock provides.
|
||||
return nil
|
||||
}
|
||||
@@ -444,22 +444,12 @@ func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg
|
||||
}
|
||||
|
||||
// dispatchSiteNotification fires a site_sync_success or
|
||||
// site_sync_failure event to the configured outbound webhook.
|
||||
// Resolution: per-workload URL+secret first, then fall through to
|
||||
// settings.notification_url/secret. Always best-effort.
|
||||
// site_sync_failure event for the workload via the shared multi-route
|
||||
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
|
||||
// order (workload_notifications → legacy single URL → settings global)
|
||||
// is identical to the dockerfile plugin's path so receivers see
|
||||
// consistent fan-out behaviour across source kinds.
|
||||
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
|
||||
if deps.Notifier == nil {
|
||||
return
|
||||
}
|
||||
settings, err := deps.Store.GetSettings()
|
||||
if err != nil {
|
||||
slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err)
|
||||
return
|
||||
}
|
||||
url, secret, tier := resolveSiteTarget(w, settings)
|
||||
if url == "" {
|
||||
return
|
||||
}
|
||||
eventType := "site_sync_success"
|
||||
if status == "failed" {
|
||||
eventType = "site_sync_failure"
|
||||
@@ -468,7 +458,7 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
|
||||
if domain != "" {
|
||||
siteURL = "https://" + domain
|
||||
}
|
||||
deps.Notifier.SendSigned(url, secret, tier, notify.Event{
|
||||
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
|
||||
Type: eventType,
|
||||
Project: w.Name,
|
||||
URL: siteURL,
|
||||
@@ -476,16 +466,6 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
|
||||
})
|
||||
}
|
||||
|
||||
// resolveSiteTarget mirrors the legacy resolveSiteTarget helper but
|
||||
// reads notification config off the workload row (where it now lives
|
||||
// post-refactor) rather than the static_sites row.
|
||||
func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) {
|
||||
if w.NotificationURL != "" {
|
||||
return w.NotificationURL, w.NotificationSecret, notify.TierSite
|
||||
}
|
||||
return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings
|
||||
}
|
||||
|
||||
// publishEvent emits a static_site_status event on the bus AND
|
||||
// persists an event_log row so the dashboard's audit trail picks it
|
||||
// up. Message format ("Static site \"%s\": %s") is preserved verbatim
|
||||
|
||||
@@ -165,30 +165,42 @@ func TestContainerRowID_Deterministic(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockFor_ReturnsSameLockForSameWorkload(t *testing.T) {
|
||||
// Suffix by t.Name() so the package-global saveLocks map cannot
|
||||
// bleed key state between tests (or between -count=N runs).
|
||||
func TestSaveLock_FreedWhenIdle(t *testing.T) {
|
||||
// After the last holder releases, the reference-counted entry must be
|
||||
// removed from the map so the lock table cannot grow without bound.
|
||||
// Suffix by t.Name() so the package-global saveLocks map cannot bleed
|
||||
// key state between tests (or between -count=N runs).
|
||||
key := t.Name() + "-wid"
|
||||
a := lockFor(key)
|
||||
b := lockFor(key)
|
||||
if a != b {
|
||||
t.Fatalf("lockFor returned distinct locks for same workload: %p vs %p", a, b)
|
||||
lk := acquireSaveLock(key)
|
||||
saveLocks.mu.Lock()
|
||||
_, present := saveLocks.locks[key]
|
||||
saveLocks.mu.Unlock()
|
||||
if !present {
|
||||
t.Fatal("acquireSaveLock did not register the entry while held")
|
||||
}
|
||||
releaseSaveLock(key, lk)
|
||||
saveLocks.mu.Lock()
|
||||
_, stillPresent := saveLocks.locks[key]
|
||||
saveLocks.mu.Unlock()
|
||||
if stillPresent {
|
||||
t.Fatal("releaseSaveLock left the entry behind after the last holder released")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockFor_ReturnsDistinctLocksForDifferentWorkloads(t *testing.T) {
|
||||
a := lockFor(t.Name() + "-a")
|
||||
b := lockFor(t.Name() + "-b")
|
||||
if a == b {
|
||||
t.Fatalf("lockFor returned same lock for different workloads: %p", a)
|
||||
}
|
||||
func TestSaveLock_DistinctWorkloadsDoNotSerialize(t *testing.T) {
|
||||
// Two different workloads must be lockable at the same time. If they
|
||||
// shared a mutex the second acquire would block forever (deadlock).
|
||||
a := acquireSaveLock(t.Name() + "-a")
|
||||
b := acquireSaveLock(t.Name() + "-b")
|
||||
releaseSaveLock(t.Name()+"-b", b)
|
||||
releaseSaveLock(t.Name()+"-a", a)
|
||||
}
|
||||
|
||||
func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
|
||||
// Two goroutines holding the same lock must run sequentially. The
|
||||
// counter would race past 2 if locking were broken; with the lock,
|
||||
// the increment is observed monotonically.
|
||||
lk := lockFor(t.Name() + "-wid")
|
||||
func TestSaveLock_SerializesConcurrentAcquisitions(t *testing.T) {
|
||||
// Goroutines acquiring the same workload's lock must run sequentially.
|
||||
// The counter would race past 1 if locking were broken; with the lock,
|
||||
// peak in-flight stays at 1.
|
||||
key := t.Name() + "-wid"
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
mu sync.Mutex
|
||||
@@ -199,8 +211,8 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
lk.Lock()
|
||||
defer lk.Unlock()
|
||||
lk := acquireSaveLock(key)
|
||||
defer releaseSaveLock(key, lk)
|
||||
|
||||
mu.Lock()
|
||||
counter++
|
||||
@@ -216,15 +228,15 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
|
||||
}
|
||||
wg.Wait()
|
||||
if peak != 1 {
|
||||
t.Fatalf("lockFor failed to serialize: peak in-flight = %d, want 1", peak)
|
||||
t.Fatalf("acquireSaveLock failed to serialize: peak in-flight = %d, want 1", peak)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
|
||||
// Distinct workloads acquired in parallel must not panic on map
|
||||
// access — exercises the outer-mutex protection inside lockFor.
|
||||
// Each iteration uses a unique key so the test stresses the
|
||||
// insertion path (the common case for "first deploy" callers).
|
||||
func TestSaveLock_ConcurrentMapAccessIsSafe(t *testing.T) {
|
||||
// Distinct workloads acquired+released in parallel must not panic on map
|
||||
// access — exercises the outer-mutex protection inside acquire/release.
|
||||
// Each iteration uses a unique key so the test stresses the insertion +
|
||||
// refcount-cleanup paths (the common case for "first deploy" callers).
|
||||
prefix := t.Name() + "-"
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 50; i++ {
|
||||
@@ -232,9 +244,9 @@ func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
lk := lockFor(prefix + strconv.Itoa(i))
|
||||
lk.Lock()
|
||||
lk.Unlock()
|
||||
key := prefix + strconv.Itoa(i)
|
||||
lk := acquireSaveLock(key)
|
||||
releaseSaveLock(key, lk)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
@@ -80,26 +80,55 @@ func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Contai
|
||||
// container_id / proxy_route_id and orphaning Docker resources. The
|
||||
// mutex caps the concurrency at 1 per workload; cross-workload
|
||||
// parallelism is unaffected.
|
||||
//
|
||||
// Entries are reference-counted and removed only when the last holder
|
||||
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
|
||||
// use-after-delete hazard of deleting an entry on teardown: deleting a
|
||||
// live entry while a concurrent saveState still holds (or is about to
|
||||
// lock) it would let a fresh saveState mint a SECOND mutex for the same
|
||||
// workload, losing the RMW serialization the lock exists to provide.
|
||||
var saveLocks struct {
|
||||
mu sync.Mutex
|
||||
locks map[string]*sync.Mutex
|
||||
locks map[string]*saveLock
|
||||
}
|
||||
|
||||
// lockFor returns the per-workload mutex, creating it on first use.
|
||||
// The outer mutex is held only briefly during map lookup; the returned
|
||||
// per-workload lock is what callers actually contend on.
|
||||
func lockFor(workloadID string) *sync.Mutex {
|
||||
type saveLock struct {
|
||||
mu sync.Mutex
|
||||
refs int
|
||||
}
|
||||
|
||||
// acquireSaveLock returns the per-workload lock (creating it on first use),
|
||||
// registers this caller as a holder, and takes the lock. Pair with
|
||||
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
|
||||
// contend on the returned per-workload lock.
|
||||
func acquireSaveLock(workloadID string) *saveLock {
|
||||
saveLocks.mu.Lock()
|
||||
defer saveLocks.mu.Unlock()
|
||||
if saveLocks.locks == nil {
|
||||
saveLocks.locks = map[string]*sync.Mutex{}
|
||||
saveLocks.locks = map[string]*saveLock{}
|
||||
}
|
||||
m, ok := saveLocks.locks[workloadID]
|
||||
l, ok := saveLocks.locks[workloadID]
|
||||
if !ok {
|
||||
m = &sync.Mutex{}
|
||||
saveLocks.locks[workloadID] = m
|
||||
l = &saveLock{}
|
||||
saveLocks.locks[workloadID] = l
|
||||
}
|
||||
return m
|
||||
l.refs++
|
||||
saveLocks.mu.Unlock()
|
||||
l.mu.Lock()
|
||||
return l
|
||||
}
|
||||
|
||||
// releaseSaveLock unlocks and drops the caller's reference, removing the map
|
||||
// entry once no holders remain. Because refs is incremented under saveLocks.mu
|
||||
// before the entry can be observed for deletion, an entry with a pending
|
||||
// acquirer is never deleted.
|
||||
func releaseSaveLock(workloadID string, l *saveLock) {
|
||||
l.mu.Unlock()
|
||||
saveLocks.mu.Lock()
|
||||
l.refs--
|
||||
if l.refs == 0 {
|
||||
delete(saveLocks.locks, workloadID)
|
||||
}
|
||||
saveLocks.mu.Unlock()
|
||||
}
|
||||
|
||||
// saveState upserts the container row, calling mutate so callers can
|
||||
@@ -115,9 +144,8 @@ func lockFor(workloadID string) *sync.Mutex {
|
||||
// Per-workload mutex serializes concurrent callers so two parallel
|
||||
// Deploys can't read the same prior state and race their writes.
|
||||
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
|
||||
lk := lockFor(w.ID)
|
||||
lk.Lock()
|
||||
defer lk.Unlock()
|
||||
lk := acquireSaveLock(w.ID)
|
||||
defer releaseSaveLock(w.ID, lk)
|
||||
|
||||
prev, prevRow, err := loadState(deps, w)
|
||||
if err != nil {
|
||||
|
||||
@@ -185,14 +185,23 @@ func TestSaveState_RecoversFromInvalidExtraJSON(t *testing.T) {
|
||||
deps, _ := testDeps(t)
|
||||
w := plugin.Workload{ID: t.Name() + "-wid", Name: "site"}
|
||||
|
||||
// UpsertContainer now validates extra_json at the boundary, so this
|
||||
// test seeds a valid row first and corrupts it via raw SQL to
|
||||
// simulate a pre-existing bad row from an upgrade / external edit.
|
||||
if err := deps.Store.UpsertContainer(store.Container{
|
||||
ID: containerRowID(w),
|
||||
WorkloadID: w.ID,
|
||||
WorkloadKind: string(store.WorkloadKindSite),
|
||||
Host: "local",
|
||||
ExtraJSON: `{not json`,
|
||||
ExtraJSON: `{}`,
|
||||
}); err != nil {
|
||||
t.Fatalf("seed bad row: %v", err)
|
||||
t.Fatalf("seed row: %v", err)
|
||||
}
|
||||
if _, err := deps.Store.DB().Exec(
|
||||
`UPDATE containers SET extra_json = ? WHERE id = ?`,
|
||||
`{not json`, containerRowID(w),
|
||||
); err != nil {
|
||||
t.Fatalf("corrupt extra_json: %v", err)
|
||||
}
|
||||
|
||||
err := saveState(deps, w, func(state *runtimeState, _ *store.Container) {
|
||||
|
||||
@@ -66,5 +66,8 @@ func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
|
||||
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||
slog.Warn("static site: failed to delete container row", "site", w.Name, "error", err)
|
||||
}
|
||||
// The per-workload save-mutex is reference-counted (see state.go) and
|
||||
// frees itself when the last holder releases, so teardown no longer
|
||||
// deletes it explicitly — doing so could race a concurrent saveState.
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -18,11 +18,19 @@ import (
|
||||
// match the event repo). Mode controls whether branch pushes or tag
|
||||
// pushes fire the deploy. Branch is exact-matched when Mode=="push";
|
||||
// TagPattern is glob-matched when Mode=="tag".
|
||||
//
|
||||
// BranchPattern is the preview-deploy escape hatch: when non-empty in
|
||||
// "push" mode it overrides Branch and matches the event branch as a glob
|
||||
// (`feat/*`, `release-*`, `*` for "any branch"). The trigger returns an
|
||||
// intent whose Metadata["preview_branch"] holds the matched branch — the
|
||||
// dispatcher uses that signal to materialize an ephemeral per-branch
|
||||
// child workload rather than redeploying the parent.
|
||||
type Config struct {
|
||||
Repo string `json:"repo"`
|
||||
Mode string `json:"mode"` // "push" | "tag"
|
||||
Branch string `json:"branch"`
|
||||
TagPattern string `json:"tag_pattern"`
|
||||
Repo string `json:"repo"`
|
||||
Mode string `json:"mode"` // "push" | "tag"
|
||||
Branch string `json:"branch"`
|
||||
BranchPattern string `json:"branch_pattern"`
|
||||
TagPattern string `json:"tag_pattern"`
|
||||
}
|
||||
|
||||
type trigger struct{}
|
||||
@@ -49,7 +57,15 @@ func (*trigger) Validate(cfg json.RawMessage) error {
|
||||
}
|
||||
switch c.Mode {
|
||||
case "push":
|
||||
// Branch is optional ("" means any branch).
|
||||
// Branch is optional ("" means any branch). BranchPattern is
|
||||
// validated as a path.Match glob if present; misconfigured
|
||||
// patterns are rejected at the boundary rather than letting them
|
||||
// fail silently inside Match.
|
||||
if c.BranchPattern != "" {
|
||||
if _, err := path.Match(c.BranchPattern, "probe"); err != nil {
|
||||
return fmt.Errorf("git trigger: invalid branch_pattern %q: %w", c.BranchPattern, err)
|
||||
}
|
||||
}
|
||||
case "tag":
|
||||
pattern := c.TagPattern
|
||||
if pattern == "" {
|
||||
@@ -90,8 +106,24 @@ func (*trigger) Match(ctx context.Context, deps plugin.Deps, w plugin.Workload,
|
||||
if evt.Git.Tag != "" {
|
||||
meta["tag"] = evt.Git.Tag
|
||||
}
|
||||
// Preview-deploy signal: when BranchPattern is set AND the matched
|
||||
// branch is NOT the configured baseline Branch, flag this dispatch
|
||||
// for materialization as a per-branch child workload. The dispatcher
|
||||
// reads preview_branch and decides whether to spawn a preview row;
|
||||
// a baseline-branch push falls through to a normal redeploy of the
|
||||
// template itself.
|
||||
if cfg.Mode == "push" && cfg.BranchPattern != "" && evt.Git.Branch != "" && evt.Git.Branch != cfg.Branch {
|
||||
meta["preview_branch"] = evt.Git.Branch
|
||||
if evt.Git.Deleted {
|
||||
meta["preview_deleted"] = "1"
|
||||
}
|
||||
}
|
||||
reason := "git-push"
|
||||
if meta["preview_deleted"] == "1" {
|
||||
reason = "git-branch-deleted"
|
||||
}
|
||||
return &plugin.DeploymentIntent{
|
||||
Reason: "git-push",
|
||||
Reason: reason,
|
||||
Reference: evt.Git.CommitSHA,
|
||||
Metadata: meta,
|
||||
TriggeredAt: time.Now().UTC(),
|
||||
@@ -106,6 +138,17 @@ func refMatches(cfg Config, ref string) bool {
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
// Pattern-mode preview filter: any branch whose name matches the
|
||||
// glob is in scope. The baseline `cfg.Branch` is also allowed so
|
||||
// pushes to the template's primary branch keep redeploying the
|
||||
// template itself.
|
||||
if cfg.BranchPattern != "" {
|
||||
if cfg.Branch != "" && cfg.Branch == branch {
|
||||
return true
|
||||
}
|
||||
matched, err := path.Match(cfg.BranchPattern, branch)
|
||||
return err == nil && matched
|
||||
}
|
||||
return cfg.Branch == "" || cfg.Branch == branch
|
||||
case "tag":
|
||||
tag, ok := strings.CutPrefix(ref, "refs/tags/")
|
||||
|
||||
@@ -56,14 +56,21 @@ type ImagePushEvent struct {
|
||||
|
||||
// GitEvent covers both push (commits) and tag-create flavors. Vendor is
|
||||
// "gitea" | "github" | "gitlab" | "" (autodetected).
|
||||
//
|
||||
// Deleted is true when the push event reports a branch / tag was deleted.
|
||||
// Used by the preview-deploy flow to tear down ephemeral per-branch
|
||||
// workloads when a feature branch is removed upstream. Inferred from
|
||||
// GitHub-style `deleted: true` and Gitea's identical convention; GitLab
|
||||
// signals deletion via after-SHA zeros (parsed at vendor level).
|
||||
type GitEvent struct {
|
||||
Vendor string
|
||||
Repo string // owner/name
|
||||
Ref string // refs/heads/main or refs/tags/v1.2.3
|
||||
Branch string // populated for branch refs
|
||||
Tag string // populated for tag refs
|
||||
Vendor string
|
||||
Repo string // owner/name
|
||||
Ref string // refs/heads/main or refs/tags/v1.2.3
|
||||
Branch string // populated for branch refs
|
||||
Tag string // populated for tag refs
|
||||
CommitSHA string
|
||||
Pusher string
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
// ManualEvent represents a user-initiated deploy from the UI or API.
|
||||
|
||||
@@ -0,0 +1,239 @@
|
||||
// Package preview implements branch-pattern preview deploys. A "template"
|
||||
// workload is one whose git trigger has a BranchPattern configured; when
|
||||
// an inbound push event names a branch other than the template's primary
|
||||
// Branch, the dispatcher materializes (or reuses) a child workload via
|
||||
// MaterializeForBranch and dispatches the deploy against the child. The
|
||||
// child is then torn down on a matching branch-delete event.
|
||||
//
|
||||
// The package is intentionally narrow:
|
||||
// - it does not know about Docker, the proxy, or any plugin internals
|
||||
// - it operates over a Store interface so the webhook handler can mock
|
||||
// it in tests
|
||||
// - it owns the per-branch naming + subdomain mangling so the wiring
|
||||
// code (trigger fan-out) stays a pure dispatch path
|
||||
package preview
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// Store is the slice of the persistence layer the preview package needs.
|
||||
// Defined locally so tests can fake it without dragging the full Store.
|
||||
type Store interface {
|
||||
GetWorkloadByID(id string) (store.Workload, error)
|
||||
ListChildrenByParent(parentID string) ([]store.Workload, error)
|
||||
CreateWorkload(w store.Workload) (store.Workload, error)
|
||||
DeleteWorkload(id string) error
|
||||
}
|
||||
|
||||
// branchSlugPattern strips characters that are unsafe inside a Docker
|
||||
// container name, hostname label, or filesystem path. Compiled once.
|
||||
var branchSlugPattern = regexp.MustCompile(`[^a-z0-9-]+`)
|
||||
|
||||
// slugifyBranch converts a git ref-component into a safe slug. Lowercase,
|
||||
// hyphen-only, length-capped to 32 so name + slug fit inside the Docker
|
||||
// 63-char container-name and 63-char DNS-label limits with room for the
|
||||
// `tf-build-` prefix.
|
||||
func slugifyBranch(branch string) string {
|
||||
b := strings.ToLower(branch)
|
||||
b = strings.ReplaceAll(b, "/", "-")
|
||||
b = branchSlugPattern.ReplaceAllString(b, "-")
|
||||
b = strings.Trim(b, "-")
|
||||
if b == "" {
|
||||
return "branch"
|
||||
}
|
||||
if len(b) > 32 {
|
||||
b = strings.Trim(b[:32], "-")
|
||||
if b == "" {
|
||||
b = "branch"
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// findExistingPreview returns the child workload whose source_config
|
||||
// already names `branch`, if any. Linear scan over the children list —
|
||||
// fine because the bound is "branches a single team keeps open at once"
|
||||
// which is in the dozens, not thousands.
|
||||
func findExistingPreview(children []store.Workload, branch string) (store.Workload, bool) {
|
||||
for _, c := range children {
|
||||
var cfg struct {
|
||||
Branch string `json:"branch"`
|
||||
}
|
||||
if c.SourceConfig != "" {
|
||||
_ = json.Unmarshal([]byte(c.SourceConfig), &cfg)
|
||||
}
|
||||
if cfg.Branch == branch {
|
||||
return c, true
|
||||
}
|
||||
}
|
||||
return store.Workload{}, false
|
||||
}
|
||||
|
||||
// patchSourceConfigBranch returns a copy of the template's source_config
|
||||
// with the `branch` field replaced. Unknown keys round-trip so plugin-
|
||||
// specific config (port, dockerfile path, storage settings, ...) survive.
|
||||
// A malformed source_config is replaced rather than propagated so the
|
||||
// preview workload has a clean baseline.
|
||||
func patchSourceConfigBranch(sourceConfig, branch string) (string, error) {
|
||||
if branch == "" {
|
||||
return "", fmt.Errorf("preview: branch is empty")
|
||||
}
|
||||
m := map[string]json.RawMessage{}
|
||||
if sourceConfig != "" && sourceConfig != "{}" {
|
||||
if err := json.Unmarshal([]byte(sourceConfig), &m); err != nil {
|
||||
m = map[string]json.RawMessage{}
|
||||
}
|
||||
}
|
||||
enc, err := json.Marshal(branch)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("preview: encode branch: %w", err)
|
||||
}
|
||||
m["branch"] = enc
|
||||
out, err := json.Marshal(m)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("preview: encode source_config: %w", err)
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// patchPublicFacesSubdomain prefixes every public face's Subdomain with
|
||||
// the branch slug so two preview deploys never collide on the same FQDN.
|
||||
// Faces with no subdomain are left untouched — the operator clearly
|
||||
// didn't want a per-branch host carved out for that face.
|
||||
func patchPublicFacesSubdomain(publicFaces, slug string) (string, error) {
|
||||
if publicFaces == "" || publicFaces == "[]" {
|
||||
return publicFaces, nil
|
||||
}
|
||||
var faces []map[string]any
|
||||
if err := json.Unmarshal([]byte(publicFaces), &faces); err != nil {
|
||||
// Malformed faces MUST fail loudly: returning the template's faces
|
||||
// verbatim would give the preview the SAME subdomains as the
|
||||
// template, so the preview's proxy route would clobber the template's
|
||||
// (the exact collision the slug prefix exists to prevent).
|
||||
return "", fmt.Errorf("preview: parse public_faces: %w", err)
|
||||
}
|
||||
for _, f := range faces {
|
||||
sub, ok := f["subdomain"].(string)
|
||||
if !ok || sub == "" {
|
||||
continue
|
||||
}
|
||||
f["subdomain"] = slug + "-" + sub
|
||||
}
|
||||
out, err := json.Marshal(faces)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("preview: re-encode public_faces: %w", err)
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// IsPreviewChild reports whether child was materialized as a branch preview
|
||||
// of template (vs. an operator-created stage-chain member that merely shares
|
||||
// the parent link — both use parent_workload_id). It reverses the exact
|
||||
// MaterializeForBranch naming formula — name == template.Name + "/" +
|
||||
// slugifyBranch(child's branch) — so a hand-named stage workload under the
|
||||
// same parent is never mistaken for a preview and cascade-deleted.
|
||||
func IsPreviewChild(template, child store.Workload) bool {
|
||||
if child.ParentWorkloadID != template.ID {
|
||||
return false
|
||||
}
|
||||
var cfg struct {
|
||||
Branch string `json:"branch"`
|
||||
}
|
||||
if child.SourceConfig != "" {
|
||||
_ = json.Unmarshal([]byte(child.SourceConfig), &cfg)
|
||||
}
|
||||
if cfg.Branch == "" {
|
||||
return false
|
||||
}
|
||||
return child.Name == template.Name+"/"+slugifyBranch(cfg.Branch)
|
||||
}
|
||||
|
||||
// ListPreviewChildren returns every preview workload materialized from
|
||||
// template. Used by the delete path to cascade-teardown previews so deleting
|
||||
// a template does not orphan their containers, proxy routes, and rows.
|
||||
func ListPreviewChildren(s Store, template store.Workload) ([]store.Workload, error) {
|
||||
children, err := s.ListChildrenByParent(template.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("preview: list children: %w", err)
|
||||
}
|
||||
out := make([]store.Workload, 0, len(children))
|
||||
for _, c := range children {
|
||||
if IsPreviewChild(template, c) {
|
||||
out = append(out, c)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// MaterializeForBranch returns the existing preview workload for
|
||||
// (template, branch) or creates one if none exists. The new workload
|
||||
// inherits the template's source kind, trigger kind, notification
|
||||
// settings, and public faces (with the branch slug prefixed onto each
|
||||
// subdomain). Idempotent: a second call with the same arguments returns
|
||||
// the same workload row.
|
||||
func MaterializeForBranch(s Store, template store.Workload, branch string) (store.Workload, error) {
|
||||
if branch == "" {
|
||||
return store.Workload{}, fmt.Errorf("preview: branch is required")
|
||||
}
|
||||
|
||||
children, err := s.ListChildrenByParent(template.ID)
|
||||
if err != nil {
|
||||
return store.Workload{}, fmt.Errorf("preview: list children: %w", err)
|
||||
}
|
||||
if existing, ok := findExistingPreview(children, branch); ok {
|
||||
return existing, nil
|
||||
}
|
||||
|
||||
slug := slugifyBranch(branch)
|
||||
newCfg, err := patchSourceConfigBranch(template.SourceConfig, branch)
|
||||
if err != nil {
|
||||
return store.Workload{}, err
|
||||
}
|
||||
newFaces, err := patchPublicFacesSubdomain(template.PublicFaces, slug)
|
||||
if err != nil {
|
||||
return store.Workload{}, err
|
||||
}
|
||||
|
||||
// Webhook + notification secrets are NOT copied to the preview. The
|
||||
// trigger dispatch reaches previews via the parent's trigger binding,
|
||||
// not via a per-preview inbound webhook, so the preview never needs
|
||||
// its own signing secret. Keeping these empty also stops the preview
|
||||
// from masquerading as a first-class workload in webhook routes.
|
||||
child := store.Workload{
|
||||
Kind: template.Kind,
|
||||
Name: template.Name + "/" + slug,
|
||||
AppID: template.AppID,
|
||||
SourceKind: template.SourceKind,
|
||||
SourceConfig: newCfg,
|
||||
TriggerKind: template.TriggerKind,
|
||||
TriggerConfig: template.TriggerConfig,
|
||||
PublicFaces: newFaces,
|
||||
ParentWorkloadID: template.ID,
|
||||
}
|
||||
created, err := s.CreateWorkload(child)
|
||||
if err != nil {
|
||||
return store.Workload{}, fmt.Errorf("preview: create child: %w", err)
|
||||
}
|
||||
return created, nil
|
||||
}
|
||||
|
||||
// FindPreviewForBranch looks up an existing preview without creating
|
||||
// one. Returns (Workload{}, false, nil) when no preview exists. Errors
|
||||
// only on a store failure.
|
||||
func FindPreviewForBranch(s Store, templateID, branch string) (store.Workload, bool, error) {
|
||||
if templateID == "" || branch == "" {
|
||||
return store.Workload{}, false, nil
|
||||
}
|
||||
children, err := s.ListChildrenByParent(templateID)
|
||||
if err != nil {
|
||||
return store.Workload{}, false, fmt.Errorf("preview: list children: %w", err)
|
||||
}
|
||||
w, ok := findExistingPreview(children, branch)
|
||||
return w, ok, nil
|
||||
}
|
||||
@@ -0,0 +1,200 @@
|
||||
package preview
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// fakeStore is a minimal in-memory store satisfying the preview.Store
|
||||
// interface. Tests verify business logic without the SQLite layer.
|
||||
type fakeStore struct {
|
||||
workloads map[string]store.Workload
|
||||
createErr error
|
||||
}
|
||||
|
||||
func newFakeStore() *fakeStore {
|
||||
return &fakeStore{workloads: map[string]store.Workload{}}
|
||||
}
|
||||
|
||||
func (f *fakeStore) GetWorkloadByID(id string) (store.Workload, error) {
|
||||
w, ok := f.workloads[id]
|
||||
if !ok {
|
||||
return store.Workload{}, errors.New("not found")
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
func (f *fakeStore) ListChildrenByParent(parentID string) ([]store.Workload, error) {
|
||||
out := []store.Workload{}
|
||||
for _, w := range f.workloads {
|
||||
if w.ParentWorkloadID == parentID {
|
||||
out = append(out, w)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (f *fakeStore) CreateWorkload(w store.Workload) (store.Workload, error) {
|
||||
if f.createErr != nil {
|
||||
return store.Workload{}, f.createErr
|
||||
}
|
||||
if w.ID == "" {
|
||||
w.ID = "preview-" + w.Name
|
||||
}
|
||||
f.workloads[w.ID] = w
|
||||
return w, nil
|
||||
}
|
||||
|
||||
func (f *fakeStore) DeleteWorkload(id string) error {
|
||||
delete(f.workloads, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestSlugifyBranch_StripsUnsafeChars(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"main", "main"},
|
||||
{"Feature/User-Auth", "feature-user-auth"},
|
||||
{"PR#42", "pr-42"},
|
||||
{"release/v1.2.3", "release-v1-2-3"},
|
||||
{"___", "branch"},
|
||||
{strings.Repeat("a", 50), strings.Repeat("a", 32)},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := slugifyBranch(c.in)
|
||||
if got != c.want {
|
||||
t.Errorf("slugifyBranch(%q) = %q, want %q", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPatchSourceConfigBranch_PreservesUnknownKeys(t *testing.T) {
|
||||
src := `{"port":3000,"dockerfile_path":"Dockerfile","branch":"main","provider":"github"}`
|
||||
out, err := patchSourceConfigBranch(src, "feat/x")
|
||||
if err != nil {
|
||||
t.Fatalf("patch: %v", err)
|
||||
}
|
||||
var got map[string]any
|
||||
if err := json.Unmarshal([]byte(out), &got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got["branch"] != "feat/x" {
|
||||
t.Errorf("branch = %v, want feat/x", got["branch"])
|
||||
}
|
||||
if got["port"] == nil || got["dockerfile_path"] == nil || got["provider"] == nil {
|
||||
t.Errorf("unknown keys dropped: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPatchPublicFacesSubdomain_PrefixesSubdomains(t *testing.T) {
|
||||
faces := `[{"subdomain":"app","domain":"example.com"},{"subdomain":"","domain":"raw.example.com"}]`
|
||||
out, err := patchPublicFacesSubdomain(faces, "feat-x")
|
||||
if err != nil {
|
||||
t.Fatalf("patch: %v", err)
|
||||
}
|
||||
var got []map[string]any
|
||||
if err := json.Unmarshal([]byte(out), &got); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if got[0]["subdomain"] != "feat-x-app" {
|
||||
t.Errorf("first subdomain = %v, want feat-x-app", got[0]["subdomain"])
|
||||
}
|
||||
if got[1]["subdomain"] != "" {
|
||||
t.Errorf("empty subdomain must stay empty, got %v", got[1]["subdomain"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaterializeForBranch_CreatesNewWhenMissing(t *testing.T) {
|
||||
fs := newFakeStore()
|
||||
template := store.Workload{
|
||||
ID: "tmpl-1",
|
||||
Kind: "project",
|
||||
Name: "myapp",
|
||||
AppID: "app-1",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"main","port":3000}`,
|
||||
TriggerKind: "git",
|
||||
PublicFaces: `[{"subdomain":"www","domain":"x.test"}]`,
|
||||
}
|
||||
fs.workloads[template.ID] = template
|
||||
|
||||
child, err := MaterializeForBranch(fs, template, "feat/login")
|
||||
if err != nil {
|
||||
t.Fatalf("materialize: %v", err)
|
||||
}
|
||||
if child.ParentWorkloadID != template.ID {
|
||||
t.Errorf("parent = %q, want %q", child.ParentWorkloadID, template.ID)
|
||||
}
|
||||
if !strings.Contains(child.Name, "feat-login") {
|
||||
t.Errorf("name = %q, want it to include slug", child.Name)
|
||||
}
|
||||
var cfg map[string]any
|
||||
if err := json.Unmarshal([]byte(child.SourceConfig), &cfg); err != nil {
|
||||
t.Fatalf("decode child source_config: %v", err)
|
||||
}
|
||||
if cfg["branch"] != "feat/login" {
|
||||
t.Errorf("child branch = %v, want feat/login", cfg["branch"])
|
||||
}
|
||||
if cfg["port"] == nil {
|
||||
t.Errorf("child should inherit template port; got %+v", cfg)
|
||||
}
|
||||
var faces []map[string]any
|
||||
if err := json.Unmarshal([]byte(child.PublicFaces), &faces); err != nil {
|
||||
t.Fatalf("decode child faces: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(faces[0]["subdomain"].(string), "feat-login-") {
|
||||
t.Errorf("face subdomain = %v, want feat-login- prefix", faces[0]["subdomain"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaterializeForBranch_ReusesExisting(t *testing.T) {
|
||||
fs := newFakeStore()
|
||||
template := store.Workload{
|
||||
ID: "tmpl-1",
|
||||
Kind: "project",
|
||||
Name: "myapp",
|
||||
SourceKind: "dockerfile",
|
||||
SourceConfig: `{"branch":"main"}`,
|
||||
}
|
||||
fs.workloads[template.ID] = template
|
||||
|
||||
first, err := MaterializeForBranch(fs, template, "feat/x")
|
||||
if err != nil {
|
||||
t.Fatalf("first materialize: %v", err)
|
||||
}
|
||||
second, err := MaterializeForBranch(fs, template, "feat/x")
|
||||
if err != nil {
|
||||
t.Fatalf("second materialize: %v", err)
|
||||
}
|
||||
if first.ID != second.ID {
|
||||
t.Errorf("expected idempotence: got %q then %q", first.ID, second.ID)
|
||||
}
|
||||
if len(fs.workloads) != 2 {
|
||||
t.Errorf("expected exactly one preview created, store has %d", len(fs.workloads))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaterializeForBranch_RejectsEmptyBranch(t *testing.T) {
|
||||
fs := newFakeStore()
|
||||
_, err := MaterializeForBranch(fs, store.Workload{ID: "tmpl"}, "")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty branch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPreviewForBranch_MissingReturnsFalse(t *testing.T) {
|
||||
fs := newFakeStore()
|
||||
_, ok, err := FindPreviewForBranch(fs, "tmpl", "feat/x")
|
||||
if err != nil {
|
||||
t.Fatalf("find: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Error("expected ok=false for missing preview")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user