feat(apps): stepped creation wizard, branch previews, and app-creation fixes

This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
This commit is contained in:
2026-05-29 02:09:54 +03:00
parent 956943edbb
commit 410a131cec
112 changed files with 13285 additions and 2765 deletions
+4 -5
View File
@@ -16,13 +16,12 @@ import (
)
// rateLimitedLogin wraps the login handler with per-IP rate limiting.
// Uses clientIP() so X-Forwarded-For is honored only when the request
// arrives from a configured trusted-proxy CIDR — preventing remote
// attackers from spoofing the header to bypass the per-IP login limiter.
func (s *Server) rateLimitedLogin(rl *rateLimiter) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
ip := r.RemoteAddr
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
ip = fwd
}
if !rl.allow(ip) {
if !rl.allow(clientIP(r)) {
respondError(w, http.StatusTooManyRequests, "too many login attempts, try again later")
return
}
+73 -32
View File
@@ -1,7 +1,6 @@
package api
import (
"io"
"log/slog"
"net/http"
"os"
@@ -118,7 +117,22 @@ func (s *Server) deleteBackup(w http.ResponseWriter, r *http.Request) {
}
// restoreBackup handles POST /api/backups/{id}/restore.
// This replaces the current database with the backup and triggers a graceful shutdown.
//
// Restore happens in three documented stages so a failure at any stage
// leaves the live DB intact:
//
// 1. PRE-FLIGHT (sync, before the HTTP response): PrepareRestore opens
// the candidate read-only and runs `PRAGMA integrity_check`. If it
// fails the live DB is untouched and we return 400 with the reason.
//
// 2. SAFETY NET: a pre-restore backup of the LIVE DB is created so the
// operator can roll back even if the candidate is later discovered
// to be missing data.
//
// 3. SWAP (async, after the response is flushed): close the live DB,
// atomic-rename the candidate over the live path, wipe WAL/SHM,
// trigger graceful shutdown. supervisord / systemd / docker
// restart=on-failure brings the process back with the new DB.
func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
if s.backupEngine == nil {
respondError(w, http.StatusServiceUnavailable, "backup engine not initialized")
@@ -126,13 +140,44 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
}
id := chi.URLParam(r, "id")
restorePath, err := s.backupEngine.RestorePath(id)
if err != nil {
respondError(w, http.StatusNotFound, "backup not found: "+err.Error())
// CSRF / accidental-fire guard: the restore endpoint is the most
// destructive surface in the API (replaces the whole DB). Even
// though it sits behind AdminOnly + Bearer JWT, a blind cross-site
// POST or a misclicked button in any open admin tab can fire it.
// Require the operator's client to echo X-Confirm-Restore: <id>
// — matching the path param — so a CSRF post-form / image-src
// trick can't trigger restore (browsers don't let cross-origin
// requests set custom headers without a preflight).
if confirm := r.Header.Get("X-Confirm-Restore"); confirm != id {
respondError(w, http.StatusBadRequest,
"missing or mismatched X-Confirm-Restore header (must equal backup id)")
return
}
// Create a safety backup before restore so the user can undo if needed.
// Single-flight guard: a rapid double-click would otherwise spawn
// two goroutines racing s.store.Close() and the candidate-over-
// live rename. CAS to true here; if someone else won, return 409.
if !s.restoreInFlight.CompareAndSwap(false, true) {
respondError(w, http.StatusConflict, "a restore is already in progress")
return
}
// Do NOT release the flag — the restore path triggers shutdown.
// A failed restore is also terminal (the DB may be closed); a
// fresh process boot is the recovery path.
// PRE-FLIGHT: refuse before touching anything if the candidate is
// not a valid SQLite database or fails integrity_check. This is the
// guard the prior code lacked — a corrupt backup would silently
// overwrite a healthy live DB.
restorePath, err := s.backupEngine.PrepareRestore(id)
if err != nil {
respondError(w, http.StatusBadRequest, err.Error())
return
}
// SAFETY NET: pre-restore snapshot of the live DB. A failure here
// is logged but does not abort — the integrity-checked candidate
// is still safer than refusing to restore.
if _, err := s.backupEngine.CreateBackup("pre-restore"); err != nil {
slog.Warn("failed to create pre-restore backup", "error", err)
}
@@ -153,41 +198,37 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
go func() {
time.Sleep(500 * time.Millisecond)
// Close the current database to release locks.
// Once we begin closing the live DB the process can no longer serve
// requests against a sane store, so EVERY exit path from here must
// trigger shutdown. Returning early would leave the server limping
// on a closed/half-swapped database with no path to recovery except
// an external kill. shutdownFunc → graceful shutdown → main returns
// → deferred releaseLock()/db.Close() run, and the supervisor reopens
// whatever DB is on disk on the next boot.
triggerShutdown := func() {
if s.shutdownFunc != nil {
s.shutdownFunc()
}
}
// Close the current database to release locks. AtomicReplaceDB
// expects the live file to be unmapped before swap (especially
// important on Windows where open files cannot be renamed over).
if err := s.store.Close(); err != nil {
slog.Error("restore: failed to close database", "error", err)
slog.Error("restore: failed to close database, restarting", "error", err)
triggerShutdown()
return
}
// Copy the backup file over the main database using streaming (no full read into memory).
src, err := os.Open(restorePath)
if err != nil {
slog.Error("restore: failed to open backup file", "error", err)
if err := s.backupEngine.AtomicReplaceDB(restorePath, s.dbPath); err != nil {
slog.Error("restore: atomic replace failed, restarting", "error", err)
triggerShutdown()
return
}
defer src.Close()
dst, err := os.Create(s.dbPath)
if err != nil {
slog.Error("restore: failed to create database file", "error", err)
return
}
defer dst.Close()
if _, err := io.Copy(dst, src); err != nil {
slog.Error("restore: failed to copy backup to database", "error", err)
return
}
// Remove WAL and SHM files to ensure clean state.
os.Remove(s.dbPath + "-wal")
os.Remove(s.dbPath + "-shm")
slog.Info("restore: database replaced, triggering shutdown")
// Signal the server to shut down gracefully so it can be restarted.
if s.shutdownFunc != nil {
s.shutdownFunc()
}
triggerShutdown()
}()
}
+49
View File
@@ -9,6 +9,7 @@ import (
"strings"
"time"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/staticsite"
)
@@ -350,6 +351,54 @@ func (s *Server) listImageConflicts(w http.ResponseWriter, r *http.Request) {
respondJSON(w, http.StatusOK, conflicts)
}
// inspectImageRequest is the body for POST /api/discovery/image/inspect.
type inspectImageRequest struct {
Image string `json:"image"`
}
// inspectImageResponse mirrors the frontend InspectResult shape the
// new-app wizard pre-fills from: the first exposed port (parsed to int,
// 0 when none) and the image's HEALTHCHECK command string.
type inspectImageResponse struct {
Port int `json:"port"`
Healthcheck string `json:"healthcheck"`
}
// inspectImageMetadata inspects a LOCAL image and returns its first
// exposed port + healthcheck so the wizard can pre-fill those fields.
// POST /api/discovery/image/inspect.
//
// This inspects local images only — it does not pull. When the image is
// not present locally the docker call fails; we return a generic,
// non-leaky 400 rather than the git-specific upstreamError so a raw
// docker daemon string (which may echo the ref) never reaches the client.
func (s *Server) inspectImageMetadata(w http.ResponseWriter, r *http.Request) {
var req inspectImageRequest
if !decodeJSON(w, r, &req) {
return
}
image := strings.TrimSpace(req.Image)
if image == "" {
respondError(w, http.StatusBadRequest, "image is required")
return
}
ctx, cancel := context.WithTimeout(r.Context(), discoveryTimeout)
defer cancel()
info, err := s.docker.InspectImage(ctx, image)
if err != nil {
slog.Warn("inspect image metadata failed", "error", err)
respondError(w, http.StatusBadRequest, "could not inspect image — make sure it is pulled locally and the reference is correct")
return
}
respondJSON(w, http.StatusOK, inspectImageResponse{
Port: docker.ExtractPort(info.ExposedPorts),
Healthcheck: info.Healthcheck,
})
}
// stripImageTag returns the image reference with the trailing :tag
// removed, taking care to leave a registry port (e.g. registry:5000/foo)
// intact. Digest references (image@sha256:...) are returned unchanged.
+64
View File
@@ -0,0 +1,64 @@
package api
import (
"context"
"log/slog"
"net/http"
"time"
"github.com/alexei/tinyforge/internal/metrics"
)
// livez always returns 200 if the process is up. Used by container
// orchestrators / load balancers / Docker HEALTHCHECK as the "is the
// binary alive" probe. Intentionally does NOT touch the DB or Docker —
// a slow DB must not cause restart loops.
func (s *Server) livez(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
_, _ = w.Write([]byte("ok\n"))
}
// readyz returns 200 only when the process can actually serve traffic:
// SQLite is reachable, the encryption key is loaded, the deployer is
// not draining. The response body is intentionally minimal — the
// specific failing probe name is recorded in slog (operator-visible)
// rather than returned to unauthenticated callers. This avoids handing
// reconnaissance to an attacker who can hit /readyz during an outage
// ("DB down" vs "encryption key missing" leaks operational state).
func (s *Server) readyz(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
defer cancel()
// DB ping: cheap and exact — exercises the connection pool, file
// lock, and busy-timeout. A failing ping means SQLite WAL is wedged
// or the data dir is gone.
if err := s.store.DB().PingContext(ctx); err != nil {
slog.Warn("readyz: db ping failed", "error", err)
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("not ready\n"))
return
}
// Encryption key sanity: if it's zero we cannot decrypt any stored
// secret, so the deployer paths will all explode at first use.
if s.encKey == ([32]byte{}) {
slog.Warn("readyz: encryption key not loaded")
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("not ready\n"))
return
}
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
_, _ = w.Write([]byte("ready\n"))
}
// metricsExport writes the process-wide metrics registry in Prometheus
// text format. Admin-only by router placement; surface is intentionally
// thin (no histograms / quantiles, only counters) to keep the binary
// dependency-free.
func (s *Server) metricsExport(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
_ = metrics.DefaultRegistry.WritePrometheus(w)
}
+318 -7
View File
@@ -1,14 +1,119 @@
package api
import (
"context"
"crypto/rand"
"encoding/hex"
"log/slog"
"net"
"net/http"
"os"
"runtime/debug"
"strings"
"sync"
"time"
"github.com/alexei/tinyforge/internal/metrics"
)
// requestIDKey is the context key under which the generated/forwarded
// X-Request-ID is stored. Exported indirectly via RequestIDFromContext
// so handlers and services downstream of the API layer can thread it
// into their own slog calls without re-extracting from headers.
type requestIDKeyType struct{}
var requestIDKey = requestIDKeyType{}
// RequestIDFromContext returns the correlation ID for the request, or
// "" when called outside the API request path.
func RequestIDFromContext(ctx context.Context) string {
if v, ok := ctx.Value(requestIDKey).(string); ok {
return v
}
return ""
}
// requestID middleware ensures every request has a stable correlation
// ID. Honors a caller-supplied X-Request-ID when the request comes from
// a trusted proxy AND the value matches a safe character set; otherwise
// generates a fresh 128-bit ID. The ID is echoed back as X-Request-ID
// and stitched into every subsequent slog call via the context value
// the `logging` middleware reads.
//
// Format clamp: a compromised reverse proxy (or one that mis-parses an
// untrusted header) could forward an ID containing newlines, semicolons,
// or other separator characters. Those would corrupt structured log
// parsers that assume one record per line / key-value. Restricting to
// `[A-Za-z0-9._-]{1,64}` covers UUIDs, hex IDs, and trace-context IDs
// without any sharp edges.
func requestID(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rid := r.Header.Get("X-Request-ID")
if rid == "" || !isTrustedPeer(r) || !isValidRequestID(rid) {
rid = newRequestID()
}
w.Header().Set("X-Request-ID", rid)
ctx := context.WithValue(r.Context(), requestIDKey, rid)
next.ServeHTTP(w, r.WithContext(ctx))
})
}
// isValidRequestID enforces `[A-Za-z0-9._-]{1,64}` without compiling a
// regex on the request path. Single linear scan, no allocations.
func isValidRequestID(s string) bool {
if len(s) == 0 || len(s) > 64 {
return false
}
for i := 0; i < len(s); i++ {
c := s[i]
switch {
case c >= 'A' && c <= 'Z':
case c >= 'a' && c <= 'z':
case c >= '0' && c <= '9':
case c == '.' || c == '_' || c == '-':
default:
return false
}
}
return true
}
// isTrustedPeer is a thin wrapper around the TRUSTED_PROXY_CIDRS allow-
// list — we honor a forwarded request-id only from upstreams we already
// trust for X-Forwarded-For. Otherwise an internet client could spam
// log files with attacker-chosen IDs.
func isTrustedPeer(r *http.Request) bool {
peer := r.RemoteAddr
if host, _, err := net.SplitHostPort(peer); err == nil {
peer = host
}
if len(trustedProxyCIDRs) == 0 {
return false
}
ip := net.ParseIP(peer)
if ip == nil {
return false
}
for _, n := range trustedProxyCIDRs {
if n.Contains(ip) {
return true
}
}
return false
}
func newRequestID() string {
var b [16]byte
if _, err := rand.Read(b[:]); err != nil {
// Fall back to time-based suffix if crypto/rand is unavailable
// — extremely unlikely outside of broken environments, but the
// ID is for tracing not security, so a deterministic fallback
// is preferable to a panic.
return "ts-" + time.Now().UTC().Format("20060102T150405.000000000")
}
return hex.EncodeToString(b[:])
}
// logging is an HTTP middleware that logs every request with method, path,
// status code, and duration. Webhook URLs are redacted before being logged
// because the secret is the only authenticator — leaking it to log
@@ -20,15 +125,58 @@ func logging(next http.Handler) http.Handler {
next.ServeHTTP(wrapped, r)
slog.Info("http request",
fields := []any{
"method", r.Method,
"path", redactPath(r.URL.Path),
"status", wrapped.status,
"duration", time.Since(start).String(),
)
}
if rq := redactQuery(r.URL.RawQuery); rq != "" {
fields = append(fields, "query", rq)
}
if rid := RequestIDFromContext(r.Context()); rid != "" {
fields = append(fields, "request_id", rid)
}
slog.Info("http request", fields...)
// Lightweight per-request counter. Bucket by status class so
// the cardinality stays at 5 × #methods regardless of how many
// distinct response codes we emit.
metrics.HTTPRequestsTotal.Inc(bucketMethod(r.Method), statusClass(wrapped.status))
})
}
// bucketMethod normalises HTTP method names against the standard set
// so a malicious client cannot spam arbitrary method tokens (RFC 7230
// allows any token) and inflate the metrics map. Anything off the
// allow-list collapses to "other".
func bucketMethod(m string) string {
switch m {
case "GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS", "CONNECT", "TRACE":
return m
}
return "other"
}
// statusClass buckets a status code into "1xx".."5xx" / "other". Keeps
// metrics cardinality bounded so a chatty endpoint can't explode the
// metrics map with one series per distinct response code.
func statusClass(code int) string {
switch {
case code >= 100 && code < 200:
return "1xx"
case code >= 200 && code < 300:
return "2xx"
case code >= 300 && code < 400:
return "3xx"
case code >= 400 && code < 500:
return "4xx"
case code >= 500 && code < 600:
return "5xx"
}
return "other"
}
// redactPath strips secrets from URL paths that carry them in segments.
// Only the canonical /api/webhook/triggers/{secret} surface remains after
// the hard cutover.
@@ -40,6 +188,45 @@ func redactPath(path string) string {
return path
}
// redactQueryKeys is the case-insensitive set of query-parameter names whose
// values are masked before a URL lands in the request log. `token` is used by
// SSE/EventSource when a custom header can't be set; the rest are
// defence-in-depth against sensitive values ever appearing in a query string.
var redactQueryKeys = map[string]struct{}{
"token": {},
"secret": {},
"password": {},
"passwd": {},
"api_key": {},
"apikey": {},
"access_token": {},
"client_secret": {},
"sig": {},
"signature": {},
}
// redactQuery masks the values of sensitive query parameters (see
// redactQueryKeys) in a URL's raw query before it lands in the request log.
// Key matching is case-insensitive. Returns the input unchanged when there is
// nothing to redact so a malformed URL surfaces naturally.
func redactQuery(rawQuery string) string {
if rawQuery == "" {
return ""
}
parts := strings.Split(rawQuery, "&")
for i, p := range parts {
eq := strings.IndexByte(p, '=')
if eq < 0 {
continue
}
key := strings.ToLower(p[:eq])
if _, ok := redactQueryKeys[key]; ok {
parts[i] = p[:eq+1] + "***"
}
}
return strings.Join(parts, "&")
}
// recovery is an HTTP middleware that catches panics and returns a 500 response.
func recovery(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -54,16 +241,49 @@ func recovery(next http.Handler) http.Handler {
}
// securityHeaders sets standard security headers on all responses.
//
// Strict-Transport-Security is emitted only when the request arrived
// over HTTPS (direct TLS or forwarded). Emitting HSTS over plain HTTP
// is harmless to compliant browsers but flags as an issue in scanners
// and confuses some reverse proxies.
//
// The CSP keeps `'unsafe-inline'` for now because SvelteKit injects
// inline boot scripts and styles; removing it requires a nonce-based
// strategy threaded through the SvelteKit handle hook. Tracked as a
// follow-up; documented in the security report.
func securityHeaders(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Content-Type-Options", "nosniff")
w.Header().Set("X-Frame-Options", "DENY")
w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
w.Header().Set("Content-Security-Policy", "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self'; font-src 'self'")
w.Header().Set("Permissions-Policy", "camera=(), microphone=(), geolocation=(), payment=()")
w.Header().Set("Content-Security-Policy",
"default-src 'self'; "+
"script-src 'self' 'unsafe-inline'; "+
"style-src 'self' 'unsafe-inline'; "+
"img-src 'self' data:; "+
"connect-src 'self'; "+
"font-src 'self'; "+
"frame-ancestors 'none'; "+
"base-uri 'self'; "+
"form-action 'self'")
if isHTTPS(r) {
w.Header().Set("Strict-Transport-Security", "max-age=31536000; includeSubDomains")
}
next.ServeHTTP(w, r)
})
}
func isHTTPS(r *http.Request) bool {
if r.TLS != nil {
return true
}
if r.Header.Get("X-Forwarded-Proto") == "https" {
return true
}
return false
}
// cors is an HTTP middleware that handles CORS for same-origin requests.
// The frontend is served from the same origin, so cross-origin requests are not expected.
func cors(next http.Handler) http.Handler {
@@ -164,10 +384,7 @@ func jsonContentType(next http.Handler) http.Handler {
func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ip := r.RemoteAddr
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
ip = fwd
}
ip := clientIP(r)
if !rl.allow(ip) {
respondError(w, http.StatusTooManyRequests, "rate limit exceeded")
return
@@ -177,6 +394,100 @@ func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
}
}
// trustedProxyCIDRs is the parsed allow-list of upstream proxy networks
// whose X-Forwarded-For header we honor. Set TRUSTED_PROXY_CIDRS to a
// comma-separated list of CIDRs (e.g. "127.0.0.1/32,10.0.0.0/8") to
// enable. When unset (the default) X-Forwarded-For is ignored entirely
// and rate limiting + audit logging use r.RemoteAddr — preventing a
// remote attacker from spoofing the header to bypass per-IP limiters.
var trustedProxyCIDRs = parseTrustedProxyCIDRs(os.Getenv("TRUSTED_PROXY_CIDRS"))
func parseTrustedProxyCIDRs(raw string) []*net.IPNet {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil
}
var nets []*net.IPNet
for _, p := range strings.Split(raw, ",") {
p = strings.TrimSpace(p)
if p == "" {
continue
}
// Allow bare IPs as /32 (IPv4) or /128 (IPv6).
if !strings.Contains(p, "/") {
if ip := net.ParseIP(p); ip != nil {
if ip.To4() != nil {
p += "/32"
} else {
p += "/128"
}
}
}
_, n, err := net.ParseCIDR(p)
if err != nil {
slog.Warn("ignoring invalid TRUSTED_PROXY_CIDRS entry", "value", p, "error", err)
continue
}
nets = append(nets, n)
}
return nets
}
// clientIP returns the per-request "client" address used for rate-limit
// keying and audit attribution. X-Forwarded-For is honored ONLY when the
// direct peer (r.RemoteAddr) belongs to a configured trusted-proxy CIDR;
// otherwise the header is ignored to prevent header-spoofing bypasses.
func clientIP(r *http.Request) string {
peer := r.RemoteAddr
if host, _, err := net.SplitHostPort(peer); err == nil {
peer = host
}
if len(trustedProxyCIDRs) == 0 {
return peer
}
peerIP := net.ParseIP(peer)
if peerIP == nil || !isTrustedProxy(peerIP) {
return peer
}
fwd := r.Header.Get("X-Forwarded-For")
if fwd == "" {
return peer
}
// Walk X-Forwarded-For from the RIGHTMOST entry (the address closest to
// us, appended by our trusted peer) leftward, skipping entries that are
// themselves trusted proxies, and return the first untrusted address.
// The LEFTMOST entry is fully client-controlled — trusting it (as a
// naive `fwd[:firstComma]` does) lets an attacker spoof their rate-limit
// and audit identity by prepending a forged value, defeating the per-IP
// login limiter.
parts := strings.Split(fwd, ",")
for i := len(parts) - 1; i >= 0; i-- {
candidate := strings.TrimSpace(parts[i])
ip := net.ParseIP(candidate)
if ip == nil {
continue
}
if isTrustedProxy(ip) {
continue
}
return candidate
}
// Every forwarded entry was a trusted proxy (or unparseable) — fall back
// to the direct peer.
return peer
}
// isTrustedProxy reports whether ip falls within a configured
// trusted-proxy CIDR.
func isTrustedProxy(ip net.IP) bool {
for _, n := range trustedProxyCIDRs {
if n.Contains(ip) {
return true
}
}
return false
}
// statusRecorder wraps http.ResponseWriter to capture the status code.
type statusRecorder struct {
http.ResponseWriter
+74 -12
View File
@@ -4,6 +4,7 @@ import (
"context"
"log/slog"
"sync"
"sync/atomic"
"github.com/go-chi/chi/v5"
@@ -61,6 +62,13 @@ type Server struct {
shutdownFunc func() // called after restore to trigger graceful shutdown
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
onProxyProviderChanged func(provider proxy.Provider) // called when proxy provider changes
// restoreInFlight is a process-wide guard against double-firing
// the restore endpoint. A rapid double-click would otherwise
// schedule two goroutines racing s.store.Close() and the
// candidate-over-live rename. CAS to true at the entry point;
// reject the second caller with 409 Conflict.
restoreInFlight atomic.Bool
}
// NewServer creates a new API Server with all required dependencies.
@@ -157,13 +165,32 @@ func (s *Server) SetDNSProviderChangedCallback(fn DNSProviderChangedFunc) {
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
// Decrypt the OIDC client secret if it's encrypted.
// Decrypt the OIDC client secret. The prior code did a try-decrypt
// and silently treated failures as plaintext — under a rotated key
// that sent ciphertext upstream to the OP. Now:
// - If the value carries the tf1: envelope → fail loud on
// decrypt failure (rotated key / corrupted ciphertext).
// - If the value is unprefixed (legacy ciphertext from v0 or true
// plaintext from an old migration) → try decrypt; on failure
// accept as plaintext (the only safe legacy interpretation).
clientSecret := as.OIDCClientSecret
if clientSecret != "" {
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
switch {
case crypto.HasEnvelope(clientSecret):
decrypted, err := crypto.Decrypt(s.encKey, clientSecret)
if err != nil {
slog.Error("OIDC client secret could not be decrypted — refusing to initialize provider",
"error", err,
"hint", "rotate ENCRYPTION_KEY back, OR re-save OIDC settings to re-encrypt with the current key")
return
}
clientSecret = decrypted
default:
// Legacy v0 value: try decrypt; on failure assume plaintext.
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
clientSecret = decrypted
}
}
// If decrypt fails, assume it's already plaintext (migration scenario).
}
provider, err := auth.NewOIDCProvider(ctx, auth.OIDCConfig{
IssuerURL: as.OIDCIssuerURL,
@@ -183,12 +210,29 @@ func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
func (s *Server) Router() chi.Router {
r := chi.NewRouter()
// Global middleware.
// Global middleware. requestID runs first so every downstream log
// line (and the access log emitted by `logging`) carries the same
// correlation id, plus the response carries it back on the
// X-Request-ID header for the operator to grep across services.
r.Use(requestID)
r.Use(recovery)
r.Use(securityHeaders)
r.Use(logging)
r.Use(cors)
// Unauthenticated health probes — mounted at the root so container
// orchestrators / load balancers can hit them without knowing about
// the /api prefix. /livez intentionally does no work and stays
// unbounded; /readyz pings the DB and is rate-limited to keep an
// unauthenticated flood from serialising behind SQLite's single
// writer connection (busy-timeout = 5s) and log-amplifying every
// request via the structured access log. The 10-per-minute budget
// is the existing rateLimiter default — generous for k8s readiness
// probes (typically every 5-10s), restrictive for an attacker.
r.Get("/livez", s.livez)
readyLimiter := newRateLimiter()
r.With(rateLimitMiddleware(readyLimiter)).Get("/readyz", s.readyz)
loginLimiter := newRateLimiter()
webhookLimiter := newRateLimiter()
@@ -232,6 +276,7 @@ func (s *Server) Router() chi.Router {
r.Post("/discovery/git/branches", s.listGitBranches)
r.Post("/discovery/git/tree", s.listGitTree)
r.Get("/discovery/image/conflicts", s.listImageConflicts)
r.Post("/discovery/image/inspect", s.inspectImageMetadata)
})
// Read-only endpoints (any authenticated user).
@@ -245,16 +290,18 @@ func (s *Server) Router() chi.Router {
r.Get("/events/log/stats", s.getEventLogStats)
r.Get("/registries", s.listRegistries)
r.Route("/registries/{id}", func(r chi.Router) {
// All registry probes are admin-gated. The /tags and
// /images endpoints used to be open to any authenticated
// user, but they make outbound requests using the
// admin-encrypted registry token — a viewer could
// effectively drive arbitrary requests against a private
// registry under admin credentials.
r.Use(auth.AdminOnly)
r.Get("/tags/*", s.listRegistryTags)
r.Get("/images", s.listRegistryImages)
// Admin-only registry mutations.
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
r.Put("/", s.updateRegistry)
r.Delete("/", s.deleteRegistry)
r.Post("/test", s.testRegistry)
})
r.Put("/", s.updateRegistry)
r.Delete("/", s.deleteRegistry)
r.Post("/test", s.testRegistry)
})
r.Get("/settings", s.getSettings)
r.Get("/settings/npm-certificates", s.listNpmCertificates)
@@ -312,6 +359,15 @@ func (s *Server) Router() chi.Router {
// of /triggers/{id}/bindings keyed on the workload side.
r.Get("/triggers", s.listBindingsForWorkload)
r.With(auth.AdminOnly).Post("/triggers", s.bindTriggerToWorkload)
// Per-workload notification routes — multi-destination
// fan-out (Slack channel + Discord webhook + ...). When
// zero rows are configured the dispatcher falls back to
// the legacy single-URL columns on the workload row.
r.Get("/notifications", s.listWorkloadNotifications)
r.With(auth.AdminOnly).Post("/notifications", s.createWorkloadNotification)
r.With(auth.AdminOnly).Put("/notifications/{nid}", s.updateWorkloadNotification)
r.With(auth.AdminOnly).Delete("/notifications/{nid}", s.deleteWorkloadNotification)
})
// Global container index, joined to workload + app names.
@@ -379,6 +435,12 @@ func (s *Server) Router() chi.Router {
r.Group(func(r chi.Router) {
r.Use(auth.AdminOnly)
// Prometheus-format metrics export. Admin-only so the
// counter cardinality cannot be enumerated by a low-trust
// viewer to map internal endpoints / sources / outcomes.
// Scrape with bearer auth from your Prometheus job.
r.Get("/metrics", s.metricsExport)
// Config export (reveals registry/global details).
r.Get("/config/export", s.exportConfig)
+19 -2
View File
@@ -32,9 +32,26 @@ func (s *Server) streamEvents(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
flusher.Flush()
// Subscribe to instance status, deploy status, and persistent event log events.
// Build logs are high-volume: a single verbose `docker build` can emit
// thousands of lines. Streaming them to EVERY connection would flood each
// subscriber's bounded bus buffer and evict status/log events for ALL
// clients. So build logs are delivered ONLY to connections that opt in
// with ?workload_id=<id>, and only for that workload. Connections without
// the param (e.g. the global dashboard) never receive build-log frames.
buildLogWorkloadID := r.URL.Query().Get("workload_id")
sub := s.eventBus.Subscribe(func(evt events.Event) bool {
return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus || evt.Type == events.EventLog
switch evt.Type {
case events.EventInstanceStatus, events.EventDeployStatus, events.EventLog:
return true
case events.EventBuildLog:
if buildLogWorkloadID == "" {
return false
}
p, ok := evt.Payload.(events.BuildLogPayload)
return ok && p.WorkloadID == buildLogWorkloadID
default:
return false
}
})
defer s.eventBus.Unsubscribe(sub)
+27 -3
View File
@@ -89,12 +89,16 @@ func toTriggerViewWithCount(row store.TriggerWithBindingCount) triggerView {
// triggerRequest is the create/update body. Config is opaque per kind.
// Auto-generates a webhook secret on create when WebhookEnabled is true;
// the secret is exposed only via the /webhook subresource.
//
// WebhookRequireSignature is a *bool so we can distinguish "field omitted
// by client" (nil → apply secure default of true when webhook is enabled)
// from an explicit opt-out (false → respected).
type triggerRequest struct {
Kind string `json:"kind"`
Name string `json:"name"`
Config json.RawMessage `json:"config"`
WebhookEnabled bool `json:"webhook_enabled"`
WebhookRequireSignature bool `json:"webhook_require_signature"`
WebhookRequireSignature *bool `json:"webhook_require_signature,omitempty"`
}
// Same per-blob caps used on the workload pluginWorkloadRequest path —
@@ -134,12 +138,26 @@ func (s *Server) getTrigger(w http.ResponseWriter, r *http.Request) {
// buildTriggerFromRequest assembles a store.Trigger ready for insert.
// Centralized so the standalone create endpoint and the inline-bind
// endpoint cannot drift on secret-generation defaults.
//
// SECURITY: a new trigger with webhook enabled defaults to require_signature
// = true. Operators can opt out at create time for receivers that do not
// support HMAC, but the safer default avoids the "freshly-created trigger
// accepts unsigned posts to its URL" footgun.
func buildTriggerFromRequest(req triggerRequest) store.Trigger {
// Secure default: if webhook is enabled and the operator did NOT
// explicitly set require_signature, force it on. Explicit false is
// preserved (legacy receivers without HMAC support still work).
requireSig := false
if req.WebhookRequireSignature != nil {
requireSig = *req.WebhookRequireSignature
} else if req.WebhookEnabled {
requireSig = true
}
t := store.Trigger{
Kind: req.Kind,
Name: strings.TrimSpace(req.Name),
Config: string(req.Config),
WebhookRequireSignature: req.WebhookRequireSignature,
WebhookRequireSignature: requireSig,
}
if req.WebhookEnabled {
t.WebhookSecret = generateWebhookSecret()
@@ -199,7 +217,13 @@ func (s *Server) updateTrigger(w http.ResponseWriter, r *http.Request) {
if len(req.Config) > 0 {
existing.Config = string(req.Config)
}
existing.WebhookRequireSignature = req.WebhookRequireSignature
if req.WebhookRequireSignature != nil {
existing.WebhookRequireSignature = *req.WebhookRequireSignature
} else if req.WebhookEnabled && !existing.WebhookRequireSignature {
// Re-enabling webhook without specifying the signature flag —
// take the secure default.
existing.WebhookRequireSignature = true
}
wasEnabled := existing.WebhookSecret != ""
if req.WebhookEnabled && !wasEnabled {
// false→true transition: rotate both secrets so re-enabling
+44 -7
View File
@@ -13,18 +13,29 @@ import (
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
"github.com/alexei/tinyforge/internal/workload/preview"
)
// chainNode is the lightweight shape returned by /chain — we deliberately
// don't return full plugin.Workload values for ancestor/descendant rows
// because the secret fields don't belong in a chain-traversal response.
//
// IsPreview / PreviewBranch surface branch-preview children to the UI so it
// can render them in a dedicated "Preview environments" panel rather than as
// undistinguished stage children. They are computed against the chain's
// `self` workload via preview.IsPreviewChild — the canonical "this child is a
// branch preview" test that reverses the MaterializeForBranch naming formula.
// Both are zero-valued (false / "") for the parent and self nodes and for
// operator-created stage children.
type chainNode struct {
ID string `json:"id"`
Name string `json:"name"`
SourceKind string `json:"source_kind"`
TriggerKind string `json:"trigger_kind"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
ID string `json:"id"`
Name string `json:"name"`
SourceKind string `json:"source_kind"`
TriggerKind string `json:"trigger_kind"`
IsPreview bool `json:"is_preview"`
PreviewBranch string `json:"preview_branch,omitempty"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
func chainNodeOf(w store.Workload) chainNode {
@@ -38,6 +49,32 @@ func chainNodeOf(w store.Workload) chainNode {
}
}
// previewBranchOf extracts the branch a preview child was materialized for
// from its source_config (the `branch` key MaterializeForBranch wrote).
// Returns "" on a missing/malformed config — the caller only calls this for
// rows preview.IsPreviewChild already confirmed, so a blank result just means
// the JSON couldn't be decoded.
func previewBranchOf(w store.Workload) string {
var cfg struct {
Branch string `json:"branch"`
}
if w.SourceConfig != "" {
_ = json.Unmarshal([]byte(w.SourceConfig), &cfg)
}
return cfg.Branch
}
// childChainNode builds a chainNode for a child row, marking it as a branch
// preview (and attaching its branch) when it was materialized from `self`.
func childChainNode(self, child store.Workload) chainNode {
node := chainNodeOf(child)
if preview.IsPreviewChild(self, child) {
node.IsPreview = true
node.PreviewBranch = previewBranchOf(child)
}
return node
}
// getWorkloadChain handles GET /api/workloads/{id}/chain.
//
// Returns the workload's parent (or nil), itself, and its direct children
@@ -76,7 +113,7 @@ func (s *Server) getWorkloadChain(w http.ResponseWriter, r *http.Request) {
}
children := make([]chainNode, 0, len(childRows))
for _, c := range childRows {
children = append(children, chainNodeOf(c))
children = append(children, childChainNode(self, c))
}
respondJSON(w, http.StatusOK, map[string]any{
+147
View File
@@ -0,0 +1,147 @@
package api
import (
"testing"
"github.com/alexei/tinyforge/internal/store"
)
// TestChildChainNode_MarksPreviewChildren verifies the /chain DTO builder
// distinguishes branch-preview children (materialized by the preview package)
// from operator-created stage children that merely share the parent link.
// The discriminator is preview.IsPreviewChild, which reverses the
// MaterializeForBranch naming formula: name == template.Name + "/" + slug.
func TestChildChainNode_MarksPreviewChildren(t *testing.T) {
template := store.Workload{
ID: "tmpl-1",
Name: "myapp",
SourceKind: "dockerfile",
}
tests := []struct {
name string
child store.Workload
wantPrev bool
wantBranch string
}{
{
name: "preview child is marked with its branch",
child: store.Workload{
ID: "child-prev",
Name: "myapp/feat-login",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"feat/login","port":3000}`,
ParentWorkloadID: "tmpl-1",
},
wantPrev: true,
wantBranch: "feat/login",
},
{
name: "operator-named stage child sharing the parent is not a preview",
child: store.Workload{
ID: "child-stage",
Name: "myapp-staging",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"main"}`,
ParentWorkloadID: "tmpl-1",
},
wantPrev: false,
wantBranch: "",
},
{
name: "child of a different parent is not a preview of self",
child: store.Workload{
ID: "child-other",
Name: "myapp/feat-login",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"feat/login"}`,
ParentWorkloadID: "some-other-template",
},
wantPrev: false,
wantBranch: "",
},
{
name: "child with no branch in source_config is not a preview",
child: store.Workload{
ID: "child-nobranch",
Name: "myapp/feat-login",
SourceKind: "dockerfile",
SourceConfig: `{}`,
ParentWorkloadID: "tmpl-1",
},
wantPrev: false,
wantBranch: "",
},
{
// Same parent + a valid branch, but the name carries an extra
// suffix so it fails ONLY the slug-equality check (expected
// "myapp/feat-login", got "myapp/feat-login-staging"). The
// branch alone must not be enough to mark a preview.
name: "valid branch but name fails the slug match is not a preview",
child: store.Workload{
ID: "child-slugmiss",
Name: "myapp/feat-login-staging",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"feat/login","port":3000}`,
ParentWorkloadID: "tmpl-1",
},
wantPrev: false,
wantBranch: "",
},
{
// Uppercase + slash branch: slugifyBranch lowercases and maps
// "/" -> "-", so "Feature/Login" -> "feature-login" and the name
// "myapp/feature-login" matches. PreviewBranch must echo the RAW
// branch from source_config ("Feature/Login"), not the slug.
name: "uppercase slash branch matches and keeps raw branch",
child: store.Workload{
ID: "child-upper",
Name: "myapp/feature-login",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"Feature/Login","port":8080}`,
ParentWorkloadID: "tmpl-1",
},
wantPrev: true,
wantBranch: "Feature/Login",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
node := childChainNode(template, tc.child)
if node.IsPreview != tc.wantPrev {
t.Errorf("IsPreview = %v, want %v", node.IsPreview, tc.wantPrev)
}
if node.PreviewBranch != tc.wantBranch {
t.Errorf("PreviewBranch = %q, want %q", node.PreviewBranch, tc.wantBranch)
}
// Base fields must always round-trip regardless of preview status.
if node.ID != tc.child.ID || node.Name != tc.child.Name {
t.Errorf("base fields mangled: got id=%q name=%q", node.ID, node.Name)
}
})
}
}
// TestPreviewBranchOf_ToleratesMalformedConfig confirms the branch extractor
// returns "" rather than panicking on a missing or invalid source_config.
func TestPreviewBranchOf_ToleratesMalformedConfig(t *testing.T) {
cases := []struct {
name string
cfg string
want string
}{
{"valid branch", `{"branch":"release/v1"}`, "release/v1"},
{"empty config", ``, ""},
{"empty object", `{}`, ""},
{"malformed json", `{not-json`, ""},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := previewBranchOf(store.Workload{SourceConfig: c.cfg})
if got != c.want {
t.Errorf("previewBranchOf(%q) = %q, want %q", c.cfg, got, c.want)
}
})
}
}
+231
View File
@@ -0,0 +1,231 @@
package api
import (
"errors"
"log/slog"
"net/http"
"strings"
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/store"
)
// workloadNotificationRow is the JSON shape returned to clients. The
// `secret_set` boolean replaces the actual ciphertext: once stored a
// secret is write-only, mirroring how workload_env hides encrypted
// values. Rotating means submitting a new value.
type workloadNotificationRow struct {
ID string `json:"id"`
WorkloadID string `json:"workload_id"`
Name string `json:"name"`
URL string `json:"url"`
SecretSet bool `json:"secret_set"`
EventTypes string `json:"event_types"`
Enabled bool `json:"enabled"`
SortOrder int `json:"sort_order"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
func toWorkloadNotificationRow(n store.WorkloadNotification) workloadNotificationRow {
return workloadNotificationRow{
ID: n.ID,
WorkloadID: n.WorkloadID,
Name: n.Name,
URL: n.URL,
SecretSet: n.Secret != "",
EventTypes: n.EventTypes,
Enabled: n.Enabled,
SortOrder: n.SortOrder,
CreatedAt: n.CreatedAt,
UpdatedAt: n.UpdatedAt,
}
}
func (s *Server) listWorkloadNotifications(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if _, err := s.store.GetWorkloadByID(id); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload")
return
}
respondError(w, http.StatusInternalServerError, "get workload")
return
}
rows, err := s.store.ListWorkloadNotifications(id)
if err != nil {
respondError(w, http.StatusInternalServerError, "list workload notifications")
return
}
out := make([]workloadNotificationRow, 0, len(rows))
for _, n := range rows {
out = append(out, toWorkloadNotificationRow(n))
}
respondJSON(w, http.StatusOK, out)
}
// workloadNotificationRequest is the POST/PUT body. Secret is the raw
// plaintext webhook signing key; the server encrypts it at rest with
// the global encryption key before INSERT. An empty Secret on UPDATE
// leaves the stored secret untouched so the operator can edit the URL
// or event filter without re-entering the secret each time.
type workloadNotificationRequest struct {
Name string `json:"name"`
URL string `json:"url"`
Secret string `json:"secret"`
EventTypes string `json:"event_types"`
Enabled *bool `json:"enabled"`
SortOrder int `json:"sort_order"`
}
func (s *Server) createWorkloadNotification(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
if _, err := s.store.GetWorkloadByID(id); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload")
return
}
respondError(w, http.StatusInternalServerError, "get workload")
return
}
var req workloadNotificationRequest
if !decodeJSONStrict(w, r, &req) {
return
}
req.URL = strings.TrimSpace(req.URL)
req.Name = strings.TrimSpace(req.Name)
if req.URL == "" {
respondError(w, http.StatusBadRequest, "url is required")
return
}
encSecret := ""
if req.Secret != "" {
v, err := crypto.Encrypt(s.encKey, req.Secret)
if err != nil {
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
respondError(w, http.StatusInternalServerError, "encrypt secret")
return
}
encSecret = v
}
enabled := true
if req.Enabled != nil {
enabled = *req.Enabled
}
created, err := s.store.CreateWorkloadNotification(store.WorkloadNotification{
WorkloadID: id,
Name: req.Name,
URL: req.URL,
Secret: encSecret,
EventTypes: req.EventTypes,
Enabled: enabled,
SortOrder: req.SortOrder,
})
if err != nil {
slog.Error("workload notifications: create", "workload", id, "error", err)
respondError(w, http.StatusInternalServerError, "create workload notification")
return
}
respondJSON(w, http.StatusCreated, toWorkloadNotificationRow(created))
}
func (s *Server) updateWorkloadNotification(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
nid := chi.URLParam(r, "nid")
if _, err := s.store.GetWorkloadByID(id); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload")
return
}
respondError(w, http.StatusInternalServerError, "get workload")
return
}
existing, err := s.store.GetWorkloadNotification(nid)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload_notification")
return
}
respondError(w, http.StatusInternalServerError, "get workload_notification")
return
}
if existing.WorkloadID != id {
// Route mismatch — the row exists but under a different workload.
// Return 404 rather than 403 so we don't leak the existence of
// foreign rows to an unauthorised caller.
respondNotFound(w, "workload_notification")
return
}
var req workloadNotificationRequest
if !decodeJSONStrict(w, r, &req) {
return
}
req.URL = strings.TrimSpace(req.URL)
req.Name = strings.TrimSpace(req.Name)
if req.URL == "" {
respondError(w, http.StatusBadRequest, "url is required")
return
}
existing.Name = req.Name
existing.URL = req.URL
existing.EventTypes = req.EventTypes
existing.SortOrder = req.SortOrder
if req.Enabled != nil {
existing.Enabled = *req.Enabled
}
// Empty Secret on UPDATE preserves the stored ciphertext — explicit
// rotation requires sending the new plaintext. This avoids forcing
// the operator to re-enter their secret on every URL edit.
if req.Secret != "" {
v, err := crypto.Encrypt(s.encKey, req.Secret)
if err != nil {
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
respondError(w, http.StatusInternalServerError, "encrypt secret")
return
}
existing.Secret = v
}
if err := s.store.UpdateWorkloadNotification(existing); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload_notification")
return
}
slog.Error("workload notifications: update", "workload", id, "error", err)
respondError(w, http.StatusInternalServerError, "update workload notification")
return
}
respondJSON(w, http.StatusOK, toWorkloadNotificationRow(existing))
}
func (s *Server) deleteWorkloadNotification(w http.ResponseWriter, r *http.Request) {
id := chi.URLParam(r, "id")
nid := chi.URLParam(r, "nid")
existing, err := s.store.GetWorkloadNotification(nid)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload_notification")
return
}
respondError(w, http.StatusInternalServerError, "get workload_notification")
return
}
if existing.WorkloadID != id {
respondNotFound(w, "workload_notification")
return
}
if err := s.store.DeleteWorkloadNotification(nid); err != nil {
if errors.Is(err, store.ErrNotFound) {
respondNotFound(w, "workload_notification")
return
}
slog.Error("workload notifications: delete", "workload", id, "error", err)
respondError(w, http.StatusInternalServerError, "delete workload notification")
return
}
respondJSON(w, http.StatusOK, map[string]any{"success": true})
}
+17 -6
View File
@@ -82,16 +82,27 @@ func (s *Server) getWorkloadRuntimeState(w http.ResponseWriter, r *http.Request)
payload := runtimeStatePayload{SourceKind: workload.SourceKind}
if workload.SourceKind != "static" {
// Both static and dockerfile sources persist their runtime state into
// containers.extra_json under a deterministic row id. The shapes
// match (status / last_commit_sha / last_sync_at / last_error) so the
// handler can decode them identically. The suffix differs per source
// kind: static uses ":site", dockerfile uses ":dockerfile".
var rowSuffix string
switch workload.SourceKind {
case "static":
rowSuffix = ":site"
case "dockerfile":
rowSuffix = ":dockerfile"
default:
respondJSON(w, http.StatusOK, payload)
return
}
// The static plugin owns one container row per workload at the
// deterministic ID <workloadID>:site. A missing row means the
// workload has never been deployed — return HasState=false so the
// UI can prompt the operator to deploy.
row, err := s.store.GetContainerByID(id + ":site")
// The owning plugin maintains one container row per workload at the
// deterministic ID. A missing row means the workload has never been
// deployed — return HasState=false so the UI can prompt the operator
// to deploy.
row, err := s.store.GetContainerByID(id + rowSuffix)
if err != nil {
if errors.Is(err, store.ErrNotFound) {
respondJSON(w, http.StatusOK, payload)
+65 -1
View File
@@ -130,6 +130,13 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
SourceKind: "static",
SourceConfig: `{"provider":"gitea"}`,
})
// Seed a row with a valid extra_json first, then corrupt it via raw
// SQL. Prior to the write-side validateExtraJSON guard this test
// could pass a malformed string straight to UpsertContainer; the
// guard now rejects that at the boundary, which is the correct
// behaviour. The reader resilience this test verifies remains
// relevant for pre-existing bad rows from upgrades or external
// manipulation, so we still produce one via direct SQL.
if err := e.store.UpsertContainer(store.Container{
ID: wl.ID + ":site",
WorkloadID: wl.ID,
@@ -137,10 +144,16 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
Host: "local",
ContainerID: "abc",
State: "running",
ExtraJSON: `{this is not json`,
ExtraJSON: `{}`,
}); err != nil {
t.Fatalf("seed: %v", err)
}
if _, err := e.store.DB().Exec(
`UPDATE containers SET extra_json = ? WHERE id = ?`,
`{this is not json`, wl.ID+":site",
); err != nil {
t.Fatalf("corrupt extra_json: %v", err)
}
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
if resp.StatusCode != http.StatusOK {
t.Fatalf("status = %d, want 200 (decode is non-fatal)", resp.StatusCode)
@@ -155,6 +168,57 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
}
}
func TestGetWorkloadRuntimeState_DockerfileSourceDeployed_DecodesExtraJSON(t *testing.T) {
e := newAPITestEnv(t)
wl, err := e.store.CreateWorkload(store.Workload{
Kind: string(store.WorkloadKindProject),
Name: "build-app",
SourceKind: "dockerfile",
SourceConfig: `{"provider":"gitea","port":3000}`,
})
if err != nil {
t.Fatalf("seed workload: %v", err)
}
extra, _ := json.Marshal(map[string]any{
"status": "deployed",
"last_commit_sha": "deadbeef",
"last_sync_at": "2026-05-23T10:00:00Z",
"last_error": "",
})
if err := e.store.UpsertContainer(store.Container{
ID: wl.ID + ":dockerfile",
WorkloadID: wl.ID,
WorkloadKind: string(store.WorkloadKindBuild),
Host: "local",
ContainerID: "ffeeddcc",
State: "running",
ExtraJSON: string(extra),
}); err != nil {
t.Fatalf("seed container: %v", err)
}
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
if resp.StatusCode != http.StatusOK {
t.Fatalf("status = %d, want 200", resp.StatusCode)
}
var got runtimeStatePayload
if errMsg := decodeEnvelope(t, resp, &got); errMsg != "" {
t.Fatalf("envelope error: %q", errMsg)
}
if !got.HasState {
t.Fatalf("HasState = false, want true")
}
if got.SourceKind != "dockerfile" {
t.Errorf("SourceKind = %q, want dockerfile", got.SourceKind)
}
if got.ContainerID != "ffeeddcc" || got.State != "running" {
t.Errorf("container fields = (%q,%q), want (ffeeddcc, running)", got.ContainerID, got.State)
}
if got.Status != "deployed" || got.LastCommitSHA != "deadbeef" {
t.Errorf("runtime fields = %+v, want deployed/deadbeef", got)
}
}
// =============================================================================
// GET /api/workloads/{id}/storage
// =============================================================================
+23
View File
@@ -14,6 +14,7 @@ import (
"github.com/alexei/tinyforge/internal/auth"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
"github.com/alexei/tinyforge/internal/workload/preview"
)
// pluginWorkloadRequest is the JSON body accepted by create + update.
@@ -227,6 +228,28 @@ func (s *Server) deletePluginWorkload(w http.ResponseWriter, r *http.Request) {
return
}
// Cascade-teardown any branch previews materialized from this workload
// so deleting a template does not orphan their containers, proxy routes,
// and rows. Operator-managed stage-chain children (which share the same
// parent link) are deliberately left alone — only previews are auto-owned
// by the template (see preview.IsPreviewChild).
if previews, err := preview.ListPreviewChildren(s.store, row); err != nil {
slog.Warn("delete workload: list preview children", "workload", id, "error", err)
} else {
for _, child := range previews {
if child.SourceKind != "" {
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(child)); err != nil {
slog.Warn("delete workload: preview child teardown error",
"workload", id, "child", child.ID, "error", err)
}
}
if err := s.store.DeleteWorkload(child.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("delete workload: preview child delete error",
"workload", id, "child", child.ID, "error", err)
}
}
}
if row.SourceKind != "" {
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(row)); err != nil {
slog.Warn("delete workload: teardown error",
+7 -1
View File
@@ -85,9 +85,15 @@ func (la *LocalAuth) cleanBlacklist() {
}
}
// bcryptCost is the work factor used for new password hashes. Bumped from
// the library default (10) to 12 so cost grows with hardware. Existing
// hashes at lower costs still verify — bcrypt encodes the cost in the
// stored hash itself.
const bcryptCost = 12
// HashPassword hashes a plaintext password using bcrypt.
func HashPassword(password string) (string, error) {
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcryptCost)
if err != nil {
return "", fmt.Errorf("hash password: %w", err)
}
+162
View File
@@ -1,13 +1,17 @@
package backup
import (
"database/sql"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"sync"
"time"
_ "modernc.org/sqlite" // read-only candidate inspection via PRAGMA integrity_check
"github.com/alexei/tinyforge/internal/store"
)
@@ -129,6 +133,17 @@ func (e *Engine) RestorePath(id string) (string, error) {
return "", fmt.Errorf("get backup: %w", err)
}
// Filename comes from a DB row. Defence-in-depth: a backup file must live
// directly under backupDir, so reject any value carrying a path separator
// or traversal before joining. A poisoned row (future import path, manual
// insert) must never let restore read — and then atomically copy over the
// live DB — an arbitrary file. CreateBackup builds safe base names; this
// enforces the same invariant on read.
if backup.Filename == "" || backup.Filename == "." || backup.Filename == ".." ||
backup.Filename != filepath.Base(backup.Filename) {
return "", fmt.Errorf("backup: invalid filename %q", backup.Filename)
}
filePath := filepath.Join(e.backupDir, backup.Filename)
if _, err := os.Stat(filePath); err != nil {
return "", fmt.Errorf("backup file not found: %w", err)
@@ -137,6 +152,153 @@ func (e *Engine) RestorePath(id string) (string, error) {
return filePath, nil
}
// PrepareRestore validates a backup candidate before the caller swaps it
// over the live DB. Runs three checks in order:
//
// 1. The candidate file exists and is non-empty.
// 2. SQLite header magic matches (catches corrupted or partial downloads).
// 3. `PRAGMA integrity_check` against a temp copy returns "ok"
// (catches WAL/page corruption that the header check misses).
//
// On success returns the candidate path. On failure returns a wrapped
// error describing which probe rejected the file, so the operator can
// see exactly why a "restore" was refused rather than getting a corrupt
// DB at next boot.
//
// We use a *temp copy* for integrity_check because attaching the
// candidate read-only into the live process would still hold a file
// handle SQLite considers writable on Windows.
func (e *Engine) PrepareRestore(id string) (string, error) {
path, err := e.RestorePath(id)
if err != nil {
return "", err
}
info, err := os.Stat(path)
if err != nil {
return "", fmt.Errorf("restore: stat candidate: %w", err)
}
if info.Size() < 100 {
return "", fmt.Errorf("restore: candidate %s is suspiciously small (%d bytes)", path, info.Size())
}
// SQLite file header: "SQLite format 3\x00" (16 bytes).
hdr, err := readHead(path, 16)
if err != nil {
return "", fmt.Errorf("restore: read header: %w", err)
}
if string(hdr) != "SQLite format 3\x00" {
return "", fmt.Errorf("restore: candidate %s is not a SQLite database (header mismatch)", path)
}
if err := integrityCheck(path); err != nil {
return "", fmt.Errorf("restore: integrity check failed: %w", err)
}
return path, nil
}
func readHead(path string, n int) ([]byte, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
buf := make([]byte, n)
// io.ReadFull (not f.Read) guarantees the buffer is filled.
// A bare Read can short-return on some filesystems / on small
// files, which would skew the SQLite-header magic check below.
if _, err := io.ReadFull(f, buf); err != nil {
return nil, err
}
return buf, nil
}
// integrityCheck opens the candidate read-only and runs
// `PRAGMA integrity_check`. We use immutable=1 so the driver does not
// try to create WAL/SHM sidecars or upgrade the journal mode on the
// candidate — both of which fail with "attempt to write a readonly
// database" against a backup file. Anything other than the single row
// `"ok"` is treated as corruption.
func integrityCheck(path string) error {
db, err := sql.Open("sqlite", "file:"+path+"?mode=ro&immutable=1")
if err != nil {
return fmt.Errorf("open candidate: %w", err)
}
defer db.Close()
rows, err := db.Query("PRAGMA integrity_check")
if err != nil {
return fmt.Errorf("pragma integrity_check: %w", err)
}
defer rows.Close()
if !rows.Next() {
return fmt.Errorf("integrity_check returned no rows")
}
var result string
if err := rows.Scan(&result); err != nil {
return fmt.Errorf("scan integrity_check: %w", err)
}
if result != "ok" {
return fmt.Errorf("integrity_check: %s", result)
}
return nil
}
// AtomicReplaceDB writes a backup candidate into place atomically.
// The caller is expected to:
// 1. Call PrepareRestore(id) → candidatePath.
// 2. Take a "pre-restore" backup of the current DB via CreateBackup.
// 3. Close the live *sql.DB.
// 4. Call AtomicReplaceDB(candidatePath, livePath).
// 5. Trigger graceful shutdown; main() will re-open on next start.
//
// AtomicReplaceDB also wipes WAL/SHM sidecar files so the new DB starts
// from a clean checkpoint state. Failure to remove sidecars is logged
// but non-fatal — SQLite recreates them on open.
func (e *Engine) AtomicReplaceDB(candidatePath, livePath string) error {
// Copy candidate to a tmp file next to the live DB, then rename
// atomically. On Windows os.Rename across volumes fails, so we
// keep tmp on the same dir as the destination.
tmp := livePath + ".restore.tmp"
if err := copyFile(candidatePath, tmp); err != nil {
return fmt.Errorf("copy candidate to %s: %w", tmp, err)
}
// Best-effort: remove WAL/SHM so SQLite re-checkpoints from the
// restored main file rather than a stale WAL pointing at the old
// DB's pages.
for _, sidecar := range []string{livePath + "-wal", livePath + "-shm"} {
if err := os.Remove(sidecar); err != nil && !os.IsNotExist(err) {
slog.Warn("restore: remove sidecar", "path", sidecar, "error", err)
}
}
if err := os.Rename(tmp, livePath); err != nil {
// Clean up tmp on rename failure so we don't leak a partial file.
_ = os.Remove(tmp)
return fmt.Errorf("rename %s → %s: %w", tmp, livePath, err)
}
slog.Info("restore: database file replaced atomically", "live", livePath)
return nil
}
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
if err != nil {
return err
}
if _, err := io.Copy(out, in); err != nil {
_ = out.Close()
return err
}
return out.Close()
}
// Prune removes old backups exceeding the retention count.
// Returns the number of backups pruned.
func (e *Engine) Prune(retentionCount int) (int, error) {
+113
View File
@@ -0,0 +1,113 @@
package backup
import (
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/alexei/tinyforge/internal/store"
)
// newTestEngine spins up an isolated store + engine pair for tests.
// Each test gets its own tempdir so backup files do not collide.
func newTestEngine(t *testing.T) (*Engine, *store.Store, string) {
t.Helper()
dir := t.TempDir()
dbPath := filepath.Join(dir, "tinyforge.db")
st, err := store.New(dbPath)
if err != nil {
t.Fatalf("store.New: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
eng, err := New(st, dbPath, dir)
if err != nil {
t.Fatalf("backup.New: %v", err)
}
return eng, st, dbPath
}
func TestPrepareRestore_RejectsTinyFile(t *testing.T) {
eng, st, _ := newTestEngine(t)
// Plant a backup row with a tiny file masquerading as a backup.
tinyPath := filepath.Join(eng.BackupDir(), "tinyforge-manual-junk.db")
if err := os.WriteFile(tinyPath, []byte("hi"), 0o600); err != nil {
t.Fatalf("write tiny: %v", err)
}
bk, err := st.CreateBackup(store.Backup{
Filename: "tinyforge-manual-junk.db",
SizeBytes: 2,
BackupType: "manual",
})
if err != nil {
t.Fatalf("CreateBackup row: %v", err)
}
if _, err := eng.PrepareRestore(bk.ID); err == nil {
t.Fatal("expected PrepareRestore to reject tiny file, got nil")
} else if !strings.Contains(err.Error(), "suspiciously small") {
t.Errorf("error = %v, want 'suspiciously small'", err)
}
}
func TestPrepareRestore_RejectsNonSQLite(t *testing.T) {
eng, st, _ := newTestEngine(t)
// 200 bytes of non-SQLite garbage: passes the size check, fails
// the header magic check.
garbagePath := filepath.Join(eng.BackupDir(), "tinyforge-manual-bogus.db")
junk := make([]byte, 200)
for i := range junk {
junk[i] = byte('x')
}
if err := os.WriteFile(garbagePath, junk, 0o600); err != nil {
t.Fatalf("write junk: %v", err)
}
bk, err := st.CreateBackup(store.Backup{
Filename: "tinyforge-manual-bogus.db",
SizeBytes: int64(len(junk)),
BackupType: "manual",
})
if err != nil {
t.Fatalf("CreateBackup row: %v", err)
}
if _, err := eng.PrepareRestore(bk.ID); err == nil {
t.Fatal("expected PrepareRestore to reject non-SQLite blob, got nil")
} else if !strings.Contains(err.Error(), "header") {
t.Errorf("error = %v, want header mismatch", err)
}
}
func TestPrepareRestore_AcceptsValidVacuumInto(t *testing.T) {
eng, _, _ := newTestEngine(t)
// A fresh CreateBackup from the engine itself is, by construction,
// a valid SQLite database — VACUUM INTO produces a clean copy.
bk, err := eng.CreateBackup("manual")
if err != nil {
t.Fatalf("CreateBackup: %v", err)
}
path, err := eng.PrepareRestore(bk.ID)
if err != nil {
t.Fatalf("PrepareRestore on valid backup: %v", err)
}
if path == "" {
t.Errorf("PrepareRestore returned empty path")
}
}
func TestPrepareRestore_UnknownID(t *testing.T) {
eng, _, _ := newTestEngine(t)
_, err := eng.PrepareRestore("nonexistent-id")
if err == nil {
t.Fatal("expected error for unknown id, got nil")
}
if errors.Is(err, store.ErrNotFound) {
// fine — wrapped through RestorePath
}
}
+46 -10
View File
@@ -10,11 +10,26 @@ import (
"fmt"
"io"
"os"
"strings"
)
// ErrNoKey is returned when ENCRYPTION_KEY is not set.
var ErrNoKey = errors.New("ENCRYPTION_KEY environment variable is not set")
// ErrDecryptFailed wraps any cipher.Open / decoder failure. Callers
// upgrading from the silent-fallback pattern (treat-as-plaintext when
// decrypt errored) MUST instead surface this — a rotated key would
// otherwise silently leak ciphertext to upstream services as if it
// were plaintext.
var ErrDecryptFailed = errors.New("crypto: decrypt failed (wrong key, corrupted ciphertext, or unversioned legacy value)")
// envelopeV1Prefix tags ciphertext produced by Encrypt going forward.
// Older databases may carry unprefixed hex blobs from the v0 era; those
// are still readable via Decrypt for backward compatibility, but every
// new write goes through EncryptV1 and emits the prefix so a future key
// rotation has a clean fail-loud signal.
const envelopeV1Prefix = "tf1:"
// DeriveKey computes a 32-byte AES-256 key from the given passphrase using SHA-256.
// This is acceptable when ENCRYPTION_KEY is a high-entropy random string (e.g., 32+ hex chars).
// For human-chosen passphrases, consider Argon2id or PBKDF2 with a salt instead.
@@ -35,7 +50,8 @@ func KeyFromEnv() ([32]byte, error) {
}
// Encrypt encrypts plaintext using AES-256-GCM with a random nonce.
// The returned ciphertext is hex-encoded: nonce || ciphertext+tag.
// Returns a versioned envelope (tf1:<hex>) so downstream readers can
// distinguish ciphertext from accidentally-stored plaintext.
func Encrypt(key [32]byte, plaintext string) (string, error) {
block, err := aes.NewCipher(key[:])
if err != nil {
@@ -53,14 +69,34 @@ func Encrypt(key [32]byte, plaintext string) (string, error) {
}
sealed := gcm.Seal(nonce, nonce, []byte(plaintext), nil)
return hex.EncodeToString(sealed), nil
return envelopeV1Prefix + hex.EncodeToString(sealed), nil
}
// Decrypt decrypts a hex-encoded ciphertext produced by Encrypt.
func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
data, err := hex.DecodeString(ciphertextHex)
// HasEnvelope reports whether the value is a v1-prefixed ciphertext.
// Useful for router-level "decrypt only if encrypted" decision points
// that previously relied on `err == nil` from a try-decrypt — that
// pattern silently masked rotated-key failures.
func HasEnvelope(value string) bool {
return strings.HasPrefix(value, envelopeV1Prefix)
}
// Decrypt decrypts an envelope (tf1:<hex>). For backward compatibility
// it also accepts unprefixed hex from the v0 era — but only when the
// resulting plaintext is valid; a wrong key for legacy data now returns
// ErrDecryptFailed instead of silently treating ciphertext as
// plaintext.
//
// Callers MUST NOT swallow the error and fall back to "use as-is".
// That pattern is the exact footgun the envelope versioning removes.
func Decrypt(key [32]byte, ciphertext string) (string, error) {
hexBlob := ciphertext
if strings.HasPrefix(hexBlob, envelopeV1Prefix) {
hexBlob = hexBlob[len(envelopeV1Prefix):]
}
data, err := hex.DecodeString(hexBlob)
if err != nil {
return "", fmt.Errorf("decode hex: %w", err)
return "", fmt.Errorf("%w: decode hex: %v", ErrDecryptFailed, err)
}
block, err := aes.NewCipher(key[:])
@@ -75,15 +111,15 @@ func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
nonceSize := gcm.NonceSize()
if len(data) < nonceSize {
return "", errors.New("ciphertext too short")
return "", fmt.Errorf("%w: ciphertext too short", ErrDecryptFailed)
}
nonce := data[:nonceSize]
ciphertext := data[nonceSize:]
body := data[nonceSize:]
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
plaintext, err := gcm.Open(nil, nonce, body, nil)
if err != nil {
return "", fmt.Errorf("decrypt: %w", err)
return "", fmt.Errorf("%w: %v", ErrDecryptFailed, err)
}
return string(plaintext), nil
+30 -5
View File
@@ -34,7 +34,19 @@ type Deployer struct {
dnsMu sync.RWMutex
dns dns.Provider // nil when wildcard DNS is active
// proxyMu protects hot-swap of d.proxy from runtime settings updates
// (SetProxyProvider) racing with PluginDeps() reads on the deploy path.
proxyMu sync.RWMutex
// Graceful shutdown: tracks in-progress deploys.
//
// drainMu serializes the "is-draining check + activeWg.Add(1)" in
// beginDispatch against the "set shuttingDown + Wait()" in Drain. Without
// it, a dispatch could pass the draining check, Drain could then flip the
// flag and start Wait() with a zero counter, and the dispatch could call
// Add(1) concurrently with Wait — a documented sync.WaitGroup misuse
// (panic risk) that also lets a deploy slip past the drain barrier.
drainMu sync.Mutex
activeWg sync.WaitGroup
shuttingDown atomic.Bool
}
@@ -73,7 +85,11 @@ func New(
}
// SetProxyProvider updates the proxy provider at runtime (e.g., when settings change).
// Guarded by proxyMu so concurrent deploys that read d.proxy via PluginDeps()
// observe a coherent value (previously a torn-pointer race under -race).
func (d *Deployer) SetProxyProvider(provider proxy.Provider) {
d.proxyMu.Lock()
defer d.proxyMu.Unlock()
d.proxy = provider
}
@@ -110,8 +126,11 @@ func (d *Deployer) SetDNSProvider(provider dns.Provider) {
// Drain waits for all in-progress deploys to complete. Call this during graceful shutdown.
func (d *Deployer) Drain() {
if !d.shuttingDown.CompareAndSwap(false, true) {
// Already draining.
d.drainMu.Lock()
already := d.shuttingDown.Swap(true)
d.drainMu.Unlock()
if already {
slog.Info("deployer: drain already in progress")
}
slog.Info("deployer: draining in-progress deploys")
d.activeWg.Wait()
@@ -121,11 +140,17 @@ func (d *Deployer) Drain() {
// ShuttingDown reports whether Drain() has been called.
func (d *Deployer) ShuttingDown() bool { return d.shuttingDown.Load() }
// rejectIfDraining is exposed in case any plugin wants the same hard-stop
// behaviour the legacy pipeline used.
func (d *Deployer) rejectIfDraining() error {
// beginDispatch atomically rejects when draining and otherwise registers the
// in-flight unit on activeWg. The shuttingDown check and the Add(1) MUST be
// done together under drainMu (see the field comment): Drain sets the flag
// under the same mutex before Wait(), so once Wait() observes a zero counter
// no further Add can race it. Callers must defer d.activeWg.Done() on success.
func (d *Deployer) beginDispatch() error {
d.drainMu.Lock()
defer d.drainMu.Unlock()
if d.shuttingDown.Load() {
return fmt.Errorf("deployer is shutting down, rejecting new deploy")
}
d.activeWg.Add(1)
return nil
}
+38 -4
View File
@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"github.com/alexei/tinyforge/internal/metrics"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
@@ -14,16 +15,37 @@ import (
// triggers + image deploys still go through the legacy path, while
// /api/hooks/generic + the unified webhook ingress go through here.
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
if err := d.beginDispatch(); err != nil {
metrics.DeploysTotal.Inc(w.SourceKind, "rejected_draining")
return err
}
defer d.activeWg.Done()
src, err := plugin.GetSource(w.SourceKind)
if err != nil {
// Unknown source: use the constant "unknown" sentinel for the
// label so a typo-spam attack can't grow the metrics map with
// one series per bogus source_kind. The actual user-supplied
// value still surfaces via the wrapped error / event log.
metrics.DeploysTotal.Inc("unknown", "unknown_source")
return fmt.Errorf("dispatch %s: %w", w.Name, err)
}
return src.Deploy(ctx, d.PluginDeps(), w, intent)
err = src.Deploy(ctx, d.PluginDeps(), w, intent)
outcome := "success"
if err != nil {
outcome = "failure"
}
metrics.DeploysTotal.Inc(w.SourceKind, outcome)
return err
}
// DispatchTeardown routes a teardown call to the matching Source plugin.
// Used when a workload is deleted.
// Used when a workload is deleted. Tracked via activeWg so Drain() honours
// in-progress teardowns just like deploys.
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
if err := d.beginDispatch(); err != nil {
return err
}
defer d.activeWg.Done()
src, err := plugin.GetSource(w.SourceKind)
if err != nil {
return fmt.Errorf("dispatch teardown %s: %w", w.Name, err)
@@ -33,8 +55,17 @@ func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) erro
// DispatchReconcile routes a Reconcile call. Periodic reconciler iterates
// every Workload and calls this; idle Sources should make it a cheap
// no-op.
// no-op. Tracked via activeWg so a long-running reconcile blocks Drain().
func (d *Deployer) DispatchReconcile(ctx context.Context, w plugin.Workload) error {
if err := d.beginDispatch(); err != nil {
// Silent skip — reconcile is a periodic tick, not a user-initiated
// action, so we don't want to surface "draining" errors back to the
// reconciler loop. The next tick after restart will catch up. Routing
// through beginDispatch keeps the activeWg.Add atomic with the drain
// check (see Drain) instead of a bare shuttingDown.Load + Add race.
return nil
}
defer d.activeWg.Done()
src, err := plugin.GetSource(w.SourceKind)
if err != nil {
return fmt.Errorf("dispatch reconcile %s: %w", w.Name, err)
@@ -52,10 +83,13 @@ func (d *Deployer) PluginDeps() plugin.Deps {
d.dnsMu.RLock()
dnsProvider := d.dns
d.dnsMu.RUnlock()
d.proxyMu.RLock()
proxyProvider := d.proxy
d.proxyMu.RUnlock()
return plugin.Deps{
Store: d.store,
Docker: d.docker,
Proxy: d.proxy,
Proxy: proxyProvider,
DNS: dnsProvider,
Health: d.health,
Notifier: d.notifier,
+119 -20
View File
@@ -2,20 +2,58 @@ package docker
import (
"archive/tar"
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/moby/moby/api/types/build"
"github.com/moby/moby/client"
)
// BuildImage builds a Docker image from a directory containing a Dockerfile.
// The directory is packaged as a tar archive and sent to the Docker daemon.
// The tag parameter is the image name:tag to apply (e.g., "dw-site-myapp:latest").
// BuildImage builds a Docker image from a directory containing a Dockerfile
// at the context root. Kept as a thin wrapper around BuildImageAt for the
// static-site plugin which always emits its generated Dockerfile at the
// context root. New code should prefer BuildImageAt so the Dockerfile path
// is explicit.
func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
return c.BuildImageAt(ctx, contextDir, "Dockerfile", tag, nil)
}
// BuildImageAt builds a Docker image from a tar of contextDir, using the
// Dockerfile at `dockerfile` *inside* the context (typically "Dockerfile"
// but may be e.g. "docker/Dockerfile" when the user-supplied repo layout
// keeps Dockerfiles in a subfolder).
//
// The dockerfile argument is the path *relative to contextDir*. Empty
// strings are normalised to "Dockerfile" so callers can pass through a
// user config value without sanitising twice.
//
// logFn, if non-nil, is invoked for every non-empty `stream` line the
// daemon emits during the build. Callers use this to forward live build
// progress (e.g. SSE bus). Errors from the daemon are NOT delivered via
// logFn — they surface as the returned error so the caller's failure
// path stays the single source of truth.
func (c *Client) BuildImageAt(ctx context.Context, contextDir, dockerfile, tag string, logFn func(line string)) error {
if dockerfile == "" {
dockerfile = "Dockerfile"
}
// Normalise to forward slashes — the tar entry names use them and the
// Docker daemon expects the same.
dockerfile = filepath.ToSlash(dockerfile)
// Defence-in-depth: the dockerfile path is relative to contextDir and
// is increasingly user/config-supplied (subfolder Dockerfiles). Reject
// absolute paths and any `..` traversal at the boundary so a value like
// "../../etc/passwd" can never be handed to the daemon's build options,
// regardless of which builder backend resolves it.
if filepath.IsAbs(dockerfile) || strings.HasPrefix(dockerfile, "/") ||
dockerfile == ".." || strings.HasPrefix(dockerfile, "../") || strings.Contains(dockerfile, "/../") {
return fmt.Errorf("docker build: invalid dockerfile path %q (must be relative to the build context, no traversal)", dockerfile)
}
// Create tar archive of the build context.
pr, pw := io.Pipe()
@@ -50,16 +88,14 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
return nil
}
file, err := os.Open(path)
if err != nil {
return fmt.Errorf("open %s: %w", path, err)
// Per-file close, NOT defer. `defer file.Close()` inside the
// WalkFunc only runs when the outer goroutine returns — for a
// build context with thousands of files (node_modules-heavy
// repo) that leaks one fd per file until the walk completes
// and trips EMFILE on default ulimit=1024 systems.
if err := streamFileIntoTar(tw, path, relPath); err != nil {
return err
}
defer file.Close()
if _, err := io.Copy(tw, file); err != nil {
return fmt.Errorf("copy %s to tar: %w", relPath, err)
}
return nil
})
@@ -69,8 +105,16 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
pw.CloseWithError(err)
}()
// Pin the legacy builder explicitly. On Docker Engine 23+ BuildKit
// is the default for the CLI, but the daemon honours the explicit
// Version field on ImageBuildOptions. Legacy builder does NOT support
// `RUN --mount=type=bind,source=/host` so a malicious Dockerfile
// cannot mount host paths into the build context. Switching to
// BuildKit later requires (a) Dockerfile-content validation to
// reject bind-mount hints, or (b) an explicit per-workload opt-in.
resp, err := c.api.ImageBuild(ctx, pr, client.ImageBuildOptions{
Dockerfile: "Dockerfile",
Version: build.BuilderV1,
Dockerfile: dockerfile,
Tags: []string{tag},
Remove: true,
ForceRemove: true,
@@ -80,16 +124,71 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
}
defer resp.Body.Close()
// Read the build output to completion (required for the build to finish).
output, err := io.ReadAll(resp.Body)
if err != nil {
// Drain the daemon's NDJSON stream to completion. The stream MUST
// be read for the build to finish — closing the body early aborts
// the build. We parse line-by-line into the {Stream, Error} shape
// the daemon emits so an honest `{"error":"..."}` line surfaces
// without false positives from informational `{"stream":"error
// handling: retrying..."}` chatter that the old strings.Contains
// path would have flagged.
type buildLine struct {
Stream string `json:"stream,omitempty"`
Error string `json:"error,omitempty"`
}
scanner := bufio.NewScanner(resp.Body)
// Some build steps emit single lines exceeding the default 64 KiB
// (e.g. a fat go-mod-download dump). Bump to 1 MiB so we don't
// silently truncate and miss the trailing error line.
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
var firstErr string
for scanner.Scan() {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
var bl buildLine
if err := json.Unmarshal(line, &bl); err != nil {
// Non-JSON line — daemon shouldn't produce these, but
// don't fail the build over a parse hiccup.
continue
}
if bl.Error != "" && firstErr == "" {
firstErr = bl.Error
}
if logFn != nil && bl.Stream != "" {
logFn(bl.Stream)
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("read build output for %s: %w", tag, err)
}
// Check for error in build output.
if strings.Contains(string(output), `"error"`) {
return fmt.Errorf("build image %s: build errors in output", tag)
if firstErr != "" {
return fmt.Errorf("build image %s: %s", tag, firstErr)
}
return nil
}
// streamFileIntoTar opens path, copies its contents into the tar writer
// under the given relPath header, and closes the file *before returning*
// — i.e. once per file, not deferred to the end of the entire walk.
// Extracted so the per-iteration close discipline is obvious at the
// callsite and the file handle isn't accidentally hoisted into the
// caller's defer stack via a future refactor.
func streamFileIntoTar(tw *tar.Writer, path, relPath string) error {
file, err := os.Open(path)
if err != nil {
return fmt.Errorf("open %s: %w", path, err)
}
_, copyErr := io.Copy(tw, file)
// Close BEFORE returning so the fd is released even on copy
// failure. Capture both errors so the more-specific copy error
// wins when both fire.
if cerr := file.Close(); cerr != nil && copyErr == nil {
copyErr = cerr
}
if copyErr != nil {
return fmt.Errorf("copy %s to tar: %w", relPath, copyErr)
}
return nil
}
+15
View File
@@ -27,6 +27,13 @@ const (
// EventStackStatus is emitted when a compose stack status changes.
EventStackStatus EventType = "stack_status"
// EventBuildLog is emitted for each line of a streaming image build.
// Per-line events are ephemeral (not persisted to the event_log) — they
// exist to drive a live tail UI during the slow "building" phase of a
// dockerfile-source deploy. Subscribers should filter by WorkloadID
// because every dockerfile deploy on the box publishes on the same bus.
EventBuildLog EventType = "build_log"
)
// Event is a single event published on the bus.
@@ -77,6 +84,14 @@ type StaticSiteStatusPayload struct {
Status string `json:"status"`
}
// BuildLogPayload is the payload for EventBuildLog events. One event
// per non-empty line read off the daemon's NDJSON build stream.
type BuildLogPayload struct {
WorkloadID string `json:"workload_id"`
Line string `json:"line"`
Stream string `json:"stream,omitempty"`
}
// StackStatusPayload is the payload for EventStackStatus events.
type StackStatusPayload struct {
StackID string `json:"stack_id"`
+250
View File
@@ -0,0 +1,250 @@
// Package metrics provides a minimal Prometheus text-format exposition
// of Tinyforge's operational counters. We deliberately do NOT import the
// official client_golang library: the metrics set here is small, the text
// format is simple, and avoiding the dependency keeps `tinyforge` a fast
// single-binary install.
//
// Every counter is a sync/atomic.Int64 — cheap, lock-free, and safe to
// touch from any goroutine. Histograms / gauges aren't modeled yet; the
// few we need (request latency p50/p99) live downstream of slog and can
// be added when the operator actually wants them.
package metrics
import (
"fmt"
"io"
"log/slog"
"sort"
"strings"
"sync"
"sync/atomic"
)
// Registry holds the process-wide counter set. A single zero-value
// Registry is ready to use — see DefaultRegistry below for the
// recommended way to grab the global handle.
type Registry struct {
mu sync.RWMutex
counters map[string]*counter
}
type counter struct {
name string
help string
labels []string // label names, ordered as declared at registration
series map[string]*atomic.Int64
// seriesMu only protects insertion of new label tuples — increments
// on existing tuples are lock-free via the atomic.
seriesMu sync.Mutex
}
// DefaultRegistry is the process-wide registry. All Tinyforge metrics
// register against it. Tests can instantiate their own Registry.
var DefaultRegistry = newRegistry()
func newRegistry() *Registry {
return &Registry{counters: make(map[string]*counter)}
}
// NewCounter declares a counter on the default registry. Call once at
// package init or during NewServer; subsequent calls with the same name
// return the existing counter so re-registration is safe.
//
// label names define the dimensions; calls to Inc must pass values in
// the same order. Use the empty slice for label-less counters.
func NewCounter(name, help string, labels ...string) *Counter {
return DefaultRegistry.NewCounter(name, help, labels...)
}
// NewCounter on a specific Registry — useful in tests.
func (r *Registry) NewCounter(name, help string, labels ...string) *Counter {
r.mu.Lock()
defer r.mu.Unlock()
if c, ok := r.counters[name]; ok {
return &Counter{c: c}
}
c := &counter{
name: name,
help: help,
labels: append([]string(nil), labels...),
series: make(map[string]*atomic.Int64),
}
r.counters[name] = c
return &Counter{c: c}
}
// Counter is the public handle returned by NewCounter. Pass it around as
// a value — the underlying state lives on the registry.
type Counter struct {
c *counter
}
// Inc atomically increments the counter for the given label values.
// Passing the wrong number of values is a programmer error; we surface
// it as a panic during testing rather than silently aggregating into a
// bogus series.
func (c Counter) Inc(labelValues ...string) {
c.Add(1, labelValues...)
}
// Add atomically adds delta. Negative delta is rejected (counters are
// monotonic by definition).
func (c Counter) Add(delta int64, labelValues ...string) {
if delta < 0 {
return
}
if len(labelValues) != len(c.c.labels) {
// Programmer error. This used to panic to surface the bug, but Add
// runs on hot paths (HTTP middleware, deploy dispatch) and several
// callers are off the request goroutine, where a panic would take
// down the whole process rather than a single request. Log loudly
// and drop the sample so a mislabeled call site can never crash the
// server; the bug still shows up immediately in the logs and in
// tests via the error output.
slog.Error("metrics: label count mismatch — dropping sample",
"counter", c.c.name, "want", len(c.c.labels), "got", len(labelValues))
return
}
key := encodeKey(labelValues)
c.c.seriesMu.Lock()
v, ok := c.c.series[key]
if !ok {
v = new(atomic.Int64)
c.c.series[key] = v
}
c.c.seriesMu.Unlock()
v.Add(delta)
}
// encodeKey joins label values with a 0x1f separator. Prometheus label
// values may contain anything except `"` and `\n`, which we escape on
// exposition only — the key here is just a map index.
func encodeKey(values []string) string {
return strings.Join(values, "\x1f")
}
// WritePrometheus dumps the registry in the text exposition format
// Prometheus / VictoriaMetrics / OpenMetrics understands. Stable
// ordering: counters alphabetical by name; series alphabetical by
// encoded label tuple.
func (r *Registry) WritePrometheus(w io.Writer) error {
r.mu.RLock()
names := make([]string, 0, len(r.counters))
for n := range r.counters {
names = append(names, n)
}
r.mu.RUnlock()
sort.Strings(names)
for _, name := range names {
r.mu.RLock()
c := r.counters[name]
r.mu.RUnlock()
if err := writeCounter(w, c); err != nil {
return err
}
}
return nil
}
func writeCounter(w io.Writer, c *counter) error {
if _, err := fmt.Fprintf(w, "# HELP %s %s\n# TYPE %s counter\n", c.name, escapeHelp(c.help), c.name); err != nil {
return err
}
// Snapshot the series map under a SINGLE lock acquisition. The
// previous shape acquired+released seriesMu twice per emitted
// series (once for the key list, once per Load), contending with
// every hot-path Inc on the HTTP request path. The *atomic.Int64
// pointers are stable for the lifetime of the registry (we never
// delete entries), so reading them after the unlock is safe.
type sample struct {
key string
val *atomic.Int64
}
c.seriesMu.Lock()
samples := make([]sample, 0, len(c.series))
for k, v := range c.series {
samples = append(samples, sample{k, v})
}
c.seriesMu.Unlock()
sort.Slice(samples, func(i, j int) bool { return samples[i].key < samples[j].key })
for _, s := range samples {
val := s.val.Load()
labels := decodeKey(s.key, c.labels)
if labels == "" {
if _, err := fmt.Fprintf(w, "%s %d\n", c.name, val); err != nil {
return err
}
continue
}
if _, err := fmt.Fprintf(w, "%s{%s} %d\n", c.name, labels, val); err != nil {
return err
}
}
return nil
}
func decodeKey(key string, names []string) string {
if key == "" || len(names) == 0 {
return ""
}
values := strings.Split(key, "\x1f")
if len(values) != len(names) {
// Should not happen — encodeKey/decode are symmetric.
return ""
}
parts := make([]string, len(names))
for i, n := range names {
parts[i] = fmt.Sprintf(`%s="%s"`, n, escapeLabelValue(values[i]))
}
return strings.Join(parts, ",")
}
func escapeHelp(s string) string {
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n")
return r.Replace(s)
}
func escapeLabelValue(s string) string {
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n", `"`, `\"`)
return r.Replace(s)
}
// ── Pre-declared counters ────────────────────────────────────────────
//
// These are the counters Tinyforge surfaces to operators. Adding more is
// a one-line NewCounter call at the call site — no central catalogue,
// just keep names lowercase_snake with the `tinyforge_` prefix.
var (
HTTPRequestsTotal = NewCounter(
"tinyforge_http_requests_total",
"Total HTTP requests handled, partitioned by method and outcome class.",
"method", "status_class",
)
DeploysTotal = NewCounter(
"tinyforge_deploys_total",
"Total deploys dispatched, partitioned by source kind and outcome.",
"source_kind", "outcome",
)
WebhookDeliveriesTotal = NewCounter(
"tinyforge_webhook_deliveries_total",
"Total inbound webhook deliveries, partitioned by outcome.",
"outcome",
)
SchedulerTicksTotal = NewCounter(
"tinyforge_scheduler_ticks_total",
"Total scheduler ticks. The dispatched counter is the success measure.",
)
SchedulerDispatchedTotal = NewCounter(
"tinyforge_scheduler_dispatched_total",
"Triggers actually dispatched by the scheduler.",
)
OutboundNotifyTotal = NewCounter(
"tinyforge_outbound_notify_total",
"Outbound notification dispatch attempts, partitioned by outcome.",
"outcome",
)
)
+76 -5
View File
@@ -16,6 +16,8 @@ import (
"time"
"github.com/google/uuid"
"github.com/alexei/tinyforge/internal/metrics"
)
// Event represents a deployment / site-sync notification payload.
@@ -83,17 +85,68 @@ type TestResult struct {
// Notifications are fire-and-forget by default — failures are logged but do
// not propagate. SendSyncForTest is the exception, used only by the manual
// test endpoint.
//
// outboundSem caps the number of in-flight outbound notifications. Without
// it a single burst (e.g. 1000 event triggers firing on a noisy log scan)
// would spawn 1000 simultaneous TCP connections, which both DoSes the
// receiver and exhausts local FDs.
type Notifier struct {
httpClient *http.Client
wg sync.WaitGroup
httpClient *http.Client
wg sync.WaitGroup
outboundSem chan struct{}
}
// maxOutboundNotifications bounds the in-flight outbound webhook fan-out.
// Sized to keep small bursts non-blocking while preventing a runaway storm
// from starving the rest of the process. Tunable later via settings if any
// operator legitimately needs more concurrency.
const maxOutboundNotifications = 32
// New creates a Notifier with sensible defaults.
func New() *Notifier {
// Transport with bounded host pooling so a slow receiver cannot pin
// arbitrarily many sockets open. MaxConnsPerHost mirrors the worker
// pool size; idle pruning keeps long-lived processes from holding
// stale TCP entries indefinitely.
//
// NOTE: we deliberately do NOT apply the staticsite SSRF dialer here.
// Notification URLs are admin-configured, and an admin already has
// Docker-socket (host-root-equivalent) access, so the SSRF surface adds
// nothing they couldn't already reach. Blocking loopback/private targets
// would instead break the common self-hosted pattern of notifying a
// same-host sidecar/bridge (e.g. service-to-notification-bridge on
// 127.0.0.1). See the security review (rated LOW / out of trust boundary).
tr := &http.Transport{
MaxIdleConns: 64,
MaxIdleConnsPerHost: 8,
MaxConnsPerHost: maxOutboundNotifications,
IdleConnTimeout: 90 * time.Second,
}
return &Notifier{
httpClient: &http.Client{
Timeout: 10 * time.Second,
Timeout: 10 * time.Second,
Transport: tr,
},
outboundSem: make(chan struct{}, maxOutboundNotifications),
}
}
// acquireSlot reserves an outbound slot, respecting ctx so a backed-up
// queue cannot starve a request that already has its own deadline.
func (n *Notifier) acquireSlot(ctx context.Context) bool {
select {
case n.outboundSem <- struct{}{}:
return true
case <-ctx.Done():
return false
}
}
func (n *Notifier) releaseSlot() {
select {
case <-n.outboundSem:
default:
// Drained during shutdown — never block.
}
}
@@ -128,8 +181,15 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
n.wg.Add(1)
go func() {
defer n.wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
if !n.acquireSlot(ctx) {
slog.Warn("notify: dropped — outbound queue saturated",
"tier", tier, "host", safeHost(webhookURL), "delivery", delivery, "event", event.Type)
metrics.OutboundNotifyTotal.Inc("dropped")
return
}
defer n.releaseSlot()
_, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
// URL host only — never log the secret or full URL with user-info.
@@ -138,11 +198,13 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
slog.Warn("notify: webhook send failed",
"tier", tier, "host", host, "delivery", delivery,
"event", event.Type, "signed", secret != "", "error", err)
metrics.OutboundNotifyTotal.Inc("failure")
return
}
slog.Info("notify: webhook dispatched",
"tier", tier, "host", host, "delivery", delivery,
"event", event.Type, "signed", secret != "")
metrics.OutboundNotifyTotal.Inc("success")
}()
}
@@ -166,8 +228,15 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
n.wg.Add(1)
go func() {
defer n.wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
if !n.acquireSlot(ctx) {
slog.Warn("notify: dropped trigger payload — outbound queue saturated",
"tier", TierEventTrigger, "host", safeHost(webhookURL), "delivery", delivery, "event", eventType)
metrics.OutboundNotifyTotal.Inc("dropped")
return
}
defer n.releaseSlot()
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
host := safeHost(webhookURL)
@@ -175,11 +244,13 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
slog.Warn("notify: trigger webhook send failed",
"tier", TierEventTrigger, "host", host, "delivery", delivery,
"event", eventType, "signed", secret != "", "error", err)
metrics.OutboundNotifyTotal.Inc("failure")
return
}
slog.Info("notify: trigger webhook dispatched",
"tier", TierEventTrigger, "host", host, "delivery", delivery,
"event", eventType, "signed", secret != "")
metrics.OutboundNotifyTotal.Inc("success")
}()
}
+3
View File
@@ -27,6 +27,7 @@ import (
"sync"
"time"
"github.com/alexei/tinyforge/internal/metrics"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
"github.com/alexei/tinyforge/internal/workload/plugin/trigger/schedule"
@@ -124,6 +125,7 @@ func (s *Scheduler) loop(ctx context.Context) {
// TickOnce runs a single sweep. Exposed for tests and for the boot
// kick. On error per-trigger the loop continues with the next row.
func (s *Scheduler) TickOnce(ctx context.Context) {
metrics.SchedulerTicksTotal.Inc()
rows, err := s.store.ListTriggers("schedule")
if err != nil {
slog.Warn("scheduler: list triggers", "error", err)
@@ -226,5 +228,6 @@ func (s *Scheduler) fire(ctx context.Context, t store.Trigger, now time.Time) {
slog.Warn("scheduler: dispatch", "trigger", t.Name, "error", err)
return
}
metrics.SchedulerDispatchedTotal.Inc()
slog.Info("scheduler: fired", "trigger", t.Name, "kind", t.Kind, "at", ts)
}
+13 -3
View File
@@ -92,17 +92,27 @@ func (c *Compose) Ps(ctx context.Context, projectName, yamlPath string) ([]Servi
}
// Logs runs `docker compose -p <projectName> logs --no-color --tail=<n> <service>`.
// If service is empty, logs for all services are returned.
// If service is empty, logs for all services are returned. The service arg
// is preceded by `--` so a service name that begins with `-` cannot be
// re-parsed as a flag by the docker CLI (flag-injection guard).
func (c *Compose) Logs(ctx context.Context, projectName, service string, tail int) (string, error) {
args := []string{"logs", "--no-color", fmt.Sprintf("--tail=%d", tail)}
if service != "" {
args = append(args, service)
args = append(args, "--", service)
}
return c.run(ctx, projectName, args...)
}
// run executes `docker compose -p <projectName> <args...>` and returns combined output.
// run executes `docker compose -p <projectName> <args...>` and returns
// combined output. projectName is verified not to begin with `-` because
// `docker compose -p '--foo'` would otherwise be re-parsed as a flag —
// the callers already sanitize project names through projectNameSanitizer,
// but a belt-and-braces refusal here means any future caller cannot
// accidentally bypass the sanitizer.
func (c *Compose) run(ctx context.Context, projectName string, args ...string) (string, error) {
if projectName == "" || strings.HasPrefix(projectName, "-") {
return "", fmt.Errorf("docker compose: refusing project name %q", projectName)
}
full := append([]string{"compose", "-p", projectName}, args...)
cmd := exec.CommandContext(ctx, c.binary, full...)
var buf bytes.Buffer
+146 -6
View File
@@ -2,6 +2,7 @@ package stack
import (
"fmt"
"strings"
"gopkg.in/yaml.v3"
)
@@ -15,11 +16,25 @@ type ComposeSpec struct {
}
// ServiceSpec captures the subset of compose service fields we inspect.
//
// All host-escape-adjacent fields are decoded here even though Tinyforge
// itself never reads them at runtime — surfacing them to Validate() is the
// only way to *reject* them. Add new fields here when blocking a new
// escape vector.
type ServiceSpec struct {
Image string `yaml:"image,omitempty"`
Ports []any `yaml:"ports,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Privileged bool `yaml:"privileged,omitempty"`
Image string `yaml:"image,omitempty"`
Build any `yaml:"build,omitempty"` // banned — see Validate
Ports []any `yaml:"ports,omitempty"`
Labels map[string]string `yaml:"labels,omitempty"`
Privileged bool `yaml:"privileged,omitempty"`
Volumes []any `yaml:"volumes,omitempty"`
NetworkMode string `yaml:"network_mode,omitempty"`
Pid string `yaml:"pid,omitempty"`
Ipc string `yaml:"ipc,omitempty"`
UsernsMode string `yaml:"userns_mode,omitempty"`
CapAdd []string `yaml:"cap_add,omitempty"`
Devices []any `yaml:"devices,omitempty"`
SecurityOpt []string `yaml:"security_opt,omitempty"`
}
// Parse decodes YAML into a ComposeSpec. Returns a descriptive error on failure.
@@ -35,10 +50,20 @@ func Parse(yamlText string) (ComposeSpec, error) {
}
// Validate enforces Tinyforge-level constraints beyond compose schema validity.
// All blocked fields below are documented host-escape vectors: any one of
// them on its own gives the container root on the host. Tinyforge already
// owns the docker socket, so the threat model is "any admin == host root,"
// and these blocks raise the bar for any *future* viewer-to-admin
// escalation as well as honest-mistake guardrails.
//
// Current rules:
// - No service may set `privileged: true`.
// - Every service must declare an image (compose supports build: too, but
// Tinyforge v1 disallows building from context to avoid arbitrary-code exec).
// - Every service must declare an image (build contexts disallowed).
// - No host-IPC / host-PID / host-userns / host networking.
// - No `cap_add`, `security_opt`, `devices`.
// - `volumes` may not bind-mount the docker socket, /, /etc, /var, /proc,
// /sys, /root, or /home — list is conservative; operators with real
// bind-mount needs should ship a Source plugin or a dedicated wizard.
func Validate(spec ComposeSpec) error {
for name, svc := range spec.Services {
if svc.Privileged {
@@ -47,6 +72,121 @@ func Validate(spec ComposeSpec) error {
if svc.Image == "" {
return fmt.Errorf("service %q: image is required (build contexts not supported)", name)
}
if svc.Build != nil {
return fmt.Errorf("service %q: build: is not supported (use image:)", name)
}
if isBlockedNamespaceMode(svc.NetworkMode) {
return fmt.Errorf("service %q: network_mode %q is not allowed", name, svc.NetworkMode)
}
if isBlockedNamespaceMode(svc.Pid) {
return fmt.Errorf("service %q: pid: %q is not allowed", name, svc.Pid)
}
if isBlockedNamespaceMode(svc.Ipc) {
return fmt.Errorf("service %q: ipc: %q is not allowed", name, svc.Ipc)
}
if isHostMode(svc.UsernsMode) {
return fmt.Errorf("service %q: userns_mode %q is not allowed", name, svc.UsernsMode)
}
if len(svc.CapAdd) > 0 {
return fmt.Errorf("service %q: cap_add is not allowed", name)
}
if len(svc.SecurityOpt) > 0 {
return fmt.Errorf("service %q: security_opt is not allowed", name)
}
if len(svc.Devices) > 0 {
return fmt.Errorf("service %q: devices is not allowed", name)
}
for _, v := range svc.Volumes {
if host, ok := bindMountHostPath(v); ok {
if isBlockedBindMount(host) {
return fmt.Errorf("service %q: bind-mounting %q is not allowed", name, host)
}
}
}
}
return nil
}
// isHostMode reports a host-namespace share, i.e. network_mode / pid / ipc /
// userns_mode set to "host". (It deliberately does NOT match "host-gateway",
// which is an extra_hosts value, not a namespace mode — matching it here only
// produced misleading rejections.)
func isHostMode(v string) bool {
return v == "host"
}
// isBlockedNamespaceMode reports a namespace mode that must be rejected for
// network_mode / pid / ipc: either host sharing ("host") or joining another
// container's / compose service's namespace ("container:<id>",
// "service:<name>"). The container/service joins are a lateral-movement and
// sandbox-escape vector — a malicious service could attach to a victim
// container's network or PID namespace.
func isBlockedNamespaceMode(v string) bool {
return isHostMode(v) ||
strings.HasPrefix(v, "container:") ||
strings.HasPrefix(v, "service:")
}
// bindMountHostPath extracts the host-side path from a compose volume
// declaration. Compose accepts two shapes: a short string "src:dst[:mode]"
// and a long form map with a "source" key. Returns ok=false for named
// volumes (no host source).
func bindMountHostPath(v any) (string, bool) {
switch t := v.(type) {
case string:
// "named:/in/container" has no '/' or '.' prefix on the source.
if t == "" {
return "", false
}
parts := strings.SplitN(t, ":", 3)
src := parts[0]
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
return src, true
}
return "", false
case map[string]any:
if typ, _ := t["type"].(string); typ != "" && typ != "bind" {
return "", false
}
if src, ok := t["source"].(string); ok {
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
return src, true
}
}
}
return "", false
}
// isBlockedBindMount returns true for paths that obviously escape the
// container's intended sandbox. Conservative deny-list — operators with
// legitimate bind-mount needs should write a dedicated Source plugin
// rather than tunnel them through compose.
func isBlockedBindMount(host string) bool {
// Normalize trailing slash so "/var" and "/var/" both match.
clean := strings.TrimRight(host, "/")
if clean == "" || clean == "/" {
return true
}
// Relative ("./x", "../x", ".") and home-relative ("~/...") sources are
// resolved by Docker against the compose working directory (which
// Tinyforge controls and never intends as a host-bind source) or left
// unexpanded — and "../" can climb out of that directory entirely. The
// absolute-prefix deny-list below can't see these, so reject them
// outright rather than give a false sense of coverage.
if strings.HasPrefix(clean, ".") || strings.HasPrefix(clean, "~") {
return true
}
// Specific blocked files / sockets.
switch clean {
case "/var/run/docker.sock", "/run/docker.sock":
return true
}
// Blocked prefixes (cover sub-paths too).
blocked := []string{"/etc", "/var", "/proc", "/sys", "/root", "/home", "/boot", "/dev"}
for _, p := range blocked {
if clean == p || strings.HasPrefix(clean, p+"/") {
return true
}
}
return false
}
+62 -28
View File
@@ -50,34 +50,7 @@ func ValidateBaseURL(raw string) error {
func NewSafeHTTPClient(timeout time.Duration) *http.Client {
dialer := &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second}
transport := &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
// If the caller passed a literal IP, skip the DNS round-trip.
if literal := net.ParseIP(host); literal != nil {
if reason := blockReason(literal); reason != "" {
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, literal, reason)
}
return dialer.DialContext(ctx, network, addr)
}
ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, err
}
if len(ips) == 0 {
return nil, fmt.Errorf("no addresses for %s", host)
}
for _, ip := range ips {
if reason := blockReason(ip.IP); reason != "" {
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, ip.IP, reason)
}
}
// Bind to the first resolved IP so a rebind between resolution
// and connect cannot redirect the request to a blocked address.
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
},
DialContext: SafeDialContext(dialer),
MaxIdleConns: 16,
IdleConnTimeout: 30 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
@@ -85,6 +58,43 @@ func NewSafeHTTPClient(timeout time.Duration) *http.Client {
return &http.Client{Timeout: timeout, Transport: transport}
}
// SafeDialContext returns a DialContext that rejects loopback, link-local,
// multicast, unspecified, and cloud-metadata addresses at connect time,
// re-resolving and binding to the resolved IP so a DNS rebind between
// resolution and connect cannot slip through. Exposed so other transports
// (e.g. the outbound notification client) can apply the same SSRF policy
// without duplicating it or losing their own connection-pool tuning.
func SafeDialContext(dialer *net.Dialer) func(ctx context.Context, network, addr string) (net.Conn, error) {
return func(ctx context.Context, network, addr string) (net.Conn, error) {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
// If the caller passed a literal IP, skip the DNS round-trip.
if literal := net.ParseIP(host); literal != nil {
if reason := blockReason(literal); reason != "" {
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, literal, reason)
}
return dialer.DialContext(ctx, network, addr)
}
ips, err := net.DefaultResolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, err
}
if len(ips) == 0 {
return nil, fmt.Errorf("no addresses for %s", host)
}
for _, ip := range ips {
if reason := blockReason(ip.IP); reason != "" {
return nil, fmt.Errorf("%w: %s (%s)", ErrBlockedAddress, ip.IP, reason)
}
}
// Bind to the first resolved IP so a rebind between resolution
// and connect cannot redirect the request to a blocked address.
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
}
}
// blockReason returns a human label for why an IP is rejected, or ""
// if the IP is allowed. Centralized so all callers share the same
// policy.
@@ -92,6 +102,13 @@ func blockReason(ip net.IP) string {
if ip == nil {
return "nil address"
}
// Normalize IPv4-mapped IPv6 (::ffff:x.x.x.x) so the loopback / link-local
// classifiers below catch them. net.IP.To4() returns the 4-byte form for
// IPv4-mapped addresses; net's IsLoopback already handles this, but pin
// the conversion to avoid future surprises if the std-lib semantics drift.
if v4 := ip.To4(); v4 != nil {
ip = v4
}
switch {
case ip.IsLoopback():
return "loopback"
@@ -104,5 +121,22 @@ func blockReason(ip net.IP) string {
case ip.IsMulticast():
return "multicast"
}
// Cloud metadata endpoints — AWS / GCP / Azure are covered by the
// link-local block (169.254.169.254). The rest must be enumerated.
if metadataIPSet[ip.String()] {
return "cloud metadata endpoint"
}
return ""
}
// metadataIPSet enumerates well-known cloud metadata IPs that are NOT
// covered by net.IP.IsLinkLocalUnicast. Updating this set is the lightest
// way to keep up with new providers without changing the policy shape.
var metadataIPSet = map[string]bool{
// Alibaba Cloud ECS metadata.
"100.100.100.200": true,
// Oracle Cloud Infrastructure metadata.
"192.0.0.192": true,
// AWS IMDS over IPv6 (ULA — not link-local, must be listed).
"fd00:ec2::254": true,
}
+5 -5
View File
@@ -234,17 +234,17 @@ func (c *Collector) sampleAll(ctx context.Context, targets []target) []store.Con
found := make([]bool, len(targets))
var wg sync.WaitGroup
loop:
for i, t := range targets {
// Acquire the semaphore in the parent loop so ctx cancellation
// short-circuits the queue rather than spawning goroutines that
// block on an unreachable slot.
// block on an unreachable slot. The labelled break exits the for
// loop directly; a bare `break` inside `select` would only break
// the select and let the loop continue.
select {
case sem <- struct{}{}:
case <-ctx.Done():
break
}
if ctx.Err() != nil {
break
break loop
}
wg.Add(1)
go func(i int, t target) {
+29
View File
@@ -2,6 +2,7 @@ package store
import (
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
@@ -9,6 +10,22 @@ import (
"github.com/google/uuid"
)
// validateExtraJSON ensures the extra_json column never receives an
// invalid JSON document. The codemap (docs/CODEMAPS/container-extra-json.md)
// is explicit that readers tolerate unknown keys — but only if the value
// is valid JSON at all. A buggy plugin writing `"not json"` would silently
// break every reader, with no schema-level check to catch it. Guarding at
// the store boundary keeps the invariant cheap and obvious.
func validateExtraJSON(v string) error {
if v == "" {
return nil
}
if !json.Valid([]byte(v)) {
return fmt.Errorf("extra_json: not valid JSON (%d bytes)", len(v))
}
return nil
}
// containerColumns is the canonical column list for `containers` queries.
// stage_id is populated by the deployer for project containers (so ListProxyRoutes
// survives stage renames) and left empty for stacks and sites.
@@ -42,6 +59,9 @@ func (s *Store) CreateContainer(c Container) (Container, error) {
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
if err := validateExtraJSON(c.ExtraJSON); err != nil {
return Container{}, err
}
_, err := s.db.Exec(
`INSERT INTO containers (`+containerColumns+`)
@@ -77,6 +97,9 @@ func (s *Store) UpsertContainer(c Container) error {
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
if err := validateExtraJSON(c.ExtraJSON); err != nil {
return err
}
// SQLite UPSERT — INSERT...ON CONFLICT(id) DO UPDATE.
_, err := s.db.Exec(
@@ -129,6 +152,9 @@ func (s *Store) ReconcileContainer(c Container) error {
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
if err := validateExtraJSON(c.ExtraJSON); err != nil {
return err
}
// extra_json is deliberately NOT in the ON CONFLICT SET clause: the
// reconciler can't observe per-face route IDs from Docker, and
@@ -321,6 +347,9 @@ func (s *Store) UpdateContainer(c Container) error {
if c.ExtraJSON == "" {
c.ExtraJSON = "{}"
}
if err := validateExtraJSON(c.ExtraJSON); err != nil {
return err
}
result, err := s.db.Exec(
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, stage_id=?, container_id=?,
image_ref=?, image_tag=?, host=?, state=?, port=?,
+171
View File
@@ -0,0 +1,171 @@
package store
import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
)
// ErrLockHeld is returned when another Tinyforge process appears to be
// running against the same data directory. SQLite + SetMaxOpenConns(1)
// makes this otherwise-silent collision a recipe for double-fired
// schedulers, double-polled registries, and `extra_json` RMW corruption.
var ErrLockHeld = errors.New("data directory is locked by another tinyforge process")
// Lockfile is a portable PID file. AcquireLockfile takes it; the returned
// Release function removes it. The contract:
//
// - Lockfile is created with O_CREATE|O_EXCL — atomic on POSIX, atomic
// on NTFS / ReFS via the equivalent.
// - On collision, the existing file's PID is read; if the PID is dead,
// we treat the lock as stale (process crashed without cleanup),
// reclaim it, and proceed. Live PID → ErrLockHeld.
// - flock is intentionally not used: cross-platform consistency wins
// over advisory-lock semantics for the single-instance use case.
type Lockfile struct {
path string
}
// AcquireLockfile creates a PID-file lock under dataDir. Returns a
// Release function the caller must defer. If another live process holds
// the lock, returns ErrLockHeld with a hint pointing at the lockfile.
//
// Reclaim atomicity: when the existing lockfile names a dead PID, the
// replacement is serialized through an auxiliary reclaim lock (see
// reclaimStaleLock) so that, of N processes booting concurrently against
// the same stale lockfile, EXACTLY ONE reclaims it and the rest get
// ErrLockHeld. A bare `os.Remove`+`O_EXCL` retry — or a rename, which is
// "last-writer-wins" — cannot guarantee this: multiple reclaimers can each
// end up believing they own the lock, defeating the single-instance guard.
func AcquireLockfile(dataDir string) (release func(), err error) {
path := filepath.Join(dataDir, "tinyforge.lock")
// First try: clean acquire.
if rel, ok, err := tryCreateExclusive(path); ok {
return rel, nil
} else if err != nil {
return nil, err
}
// Existing lockfile — read PID and decide whether to reclaim.
pid, readErr := readLockPID(path)
if readErr == nil && processAlive(pid) {
return nil, fmt.Errorf("%w (held by pid %d, lockfile=%s)", ErrLockHeld, pid, path)
}
// Stale lock (dead pid) or malformed file — reclaim under serialization.
reason := "malformed existing lockfile"
if readErr == nil {
reason = fmt.Sprintf("stale lockfile (dead pid %d)", pid)
}
return reclaimStaleLock(path, reason)
}
// tryCreateExclusive attempts an atomic O_CREATE|O_EXCL create at path.
// Returns (release, true, nil) on success; (nil, false, nil) when the
// file already exists; (nil, false, err) on any other error.
func tryCreateExclusive(path string) (func(), bool, error) {
f, openErr := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if openErr != nil {
if os.IsExist(openErr) {
return nil, false, nil
}
return nil, false, fmt.Errorf("open lockfile: %w", openErr)
}
if _, err := fmt.Fprintf(f, "%d\n", os.Getpid()); err != nil {
_ = f.Close()
_ = os.Remove(path)
return nil, false, fmt.Errorf("write lockfile: %w", err)
}
if err := f.Close(); err != nil {
_ = os.Remove(path)
return nil, false, fmt.Errorf("close lockfile: %w", err)
}
return func() { _ = os.Remove(path) }, true, nil
}
// reclaimStaleLock replaces a stale/malformed lockfile with one holding our
// PID, serialized by an auxiliary reclaim lock. Holding the reclaim lock
// (O_EXCL) guarantees that only one process performs the remove-and-recreate
// of the main lockfile at a time, so concurrent reclaimers cannot each end
// up "owning" the lock the way a rename or unguarded remove+create would
// allow. The reclaim lock is itself liveness-checked so a reclaimer that
// crashed mid-reclaim cannot wedge startup forever.
func reclaimStaleLock(lockPath, reason string) (func(), error) {
reclaimPath := lockPath + ".reclaim"
if err := acquireReclaimLock(reclaimPath); err != nil {
return nil, fmt.Errorf("%w (%v; %s)", ErrLockHeld, err, reason)
}
defer func() { _ = os.Remove(reclaimPath) }()
// Serialized now. Re-check the main lock: another process may have fully
// reclaimed it between our liveness probe and our taking the reclaim lock.
if pid, perr := readLockPID(lockPath); perr == nil && processAlive(pid) {
return nil, fmt.Errorf("%w (reclaimed by pid %d while we waited; %s)",
ErrLockHeld, pid, reason)
}
// Safe to replace: remove the stale file, then create a fresh exclusive
// one. Both run while we hold the reclaim lock, so no other reclaimer can
// observe the gap.
if err := os.Remove(lockPath); err != nil && !os.IsNotExist(err) {
return nil, fmt.Errorf("%w (could not remove stale lockfile %s: %v; %s)",
ErrLockHeld, lockPath, err, reason)
}
rel, ok, err := tryCreateExclusive(lockPath)
if err != nil {
return nil, err
}
if !ok {
// Should be impossible while we hold the reclaim lock; fail safe.
return nil, fmt.Errorf("%w (lockfile reappeared during reclaim of %s; %s)",
ErrLockHeld, lockPath, reason)
}
return rel, nil
}
// acquireReclaimLock takes the auxiliary reclaim lock with O_EXCL. An
// existing reclaim lock is honoured only while its recorded PID is alive (a
// genuine concurrent reclaim); a stale one (dead/foreign PID) is removed once
// and re-attempted so a crashed reclaimer cannot block boot indefinitely. Of
// concurrent callers, O_EXCL ensures at most one acquires it; the rest fail
// and back off to ErrLockHeld.
func acquireReclaimLock(reclaimPath string) error {
for attempt := 0; attempt < 2; attempt++ {
f, err := os.OpenFile(reclaimPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err == nil {
if _, werr := fmt.Fprintf(f, "%d\n", os.Getpid()); werr != nil {
_ = f.Close()
_ = os.Remove(reclaimPath)
return fmt.Errorf("write reclaim lock %s: %v", reclaimPath, werr)
}
return f.Close()
}
if !os.IsExist(err) {
return fmt.Errorf("create reclaim lock %s: %v", reclaimPath, err)
}
// Reclaim lock present. A live owner means a real concurrent reclaim.
if pid, perr := readLockPID(reclaimPath); perr == nil && processAlive(pid) {
return fmt.Errorf("concurrent reclaim in progress (pid %d)", pid)
}
// Stale reclaim lock — clear it and retry the exclusive create once.
if rerr := os.Remove(reclaimPath); rerr != nil && !os.IsNotExist(rerr) {
return fmt.Errorf("remove stale reclaim lock %s: %v", reclaimPath, rerr)
}
}
return fmt.Errorf("could not acquire reclaim lock %s after retry", reclaimPath)
}
func readLockPID(path string) (int, error) {
data, err := os.ReadFile(path)
if err != nil {
return 0, err
}
pidStr := strings.TrimSpace(string(data))
if pidStr == "" {
return 0, errors.New("empty lockfile")
}
return strconv.Atoi(pidStr)
}
+137
View File
@@ -0,0 +1,137 @@
package store
import (
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"testing"
)
func TestAcquireLockfile_FreshDir(t *testing.T) {
dir := t.TempDir()
release, err := AcquireLockfile(dir)
if err != nil {
t.Fatalf("AcquireLockfile: %v", err)
}
defer release()
// Lockfile should exist with our PID.
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
if err != nil {
t.Fatalf("read lockfile: %v", err)
}
want := fmt.Sprintf("%d\n", os.Getpid())
if string(data) != want {
t.Errorf("lockfile content = %q, want %q", data, want)
}
}
func TestAcquireLockfile_HeldByLivePID_Refused(t *testing.T) {
dir := t.TempDir()
// Plant a lockfile holding the current PID (which is obviously alive).
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
[]byte(fmt.Sprintf("%d\n", os.Getpid())), 0o600); err != nil {
t.Fatalf("plant lockfile: %v", err)
}
release, err := AcquireLockfile(dir)
if err == nil {
release()
t.Fatal("expected ErrLockHeld, got nil")
}
if !errors.Is(err, ErrLockHeld) {
t.Errorf("error = %v, want wrap of ErrLockHeld", err)
}
}
func TestAcquireLockfile_StalePID_Reclaimed(t *testing.T) {
dir := t.TempDir()
// PID 1 is init/launchd/systemd on POSIX and the System Idle Process
// on Windows — never our process, and very unlikely to be dead. We
// use a deliberately-impossible PID instead: a 31-bit value far
// above any plausible system maximum.
stalePID := 2147483640
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
t.Fatalf("plant stale lockfile: %v", err)
}
release, err := AcquireLockfile(dir)
if err != nil {
t.Fatalf("expected reclaim of stale lock, got: %v", err)
}
defer release()
// Verify it now holds OUR pid, not the stale one.
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
if err != nil {
t.Fatalf("read lockfile after reclaim: %v", err)
}
want := fmt.Sprintf("%d\n", os.Getpid())
if string(data) != want {
t.Errorf("lockfile content after reclaim = %q, want %q", data, want)
}
}
func TestAcquireLockfile_ConcurrentReclaim_SingleWinner(t *testing.T) {
dir := t.TempDir()
// Plant a stale lockfile (impossibly high, certainly-dead PID), then have
// many goroutines race to reclaim it. Exactly one must win; the rest must
// be refused with ErrLockHeld. A "last-writer-wins" reclaim would let
// several goroutines all believe they own the lock.
stalePID := 2147483640
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
t.Fatalf("plant stale lockfile: %v", err)
}
const n = 16
var (
wg sync.WaitGroup
mu sync.Mutex
winners int
releases []func()
)
start := make(chan struct{})
for i := 0; i < n; i++ {
wg.Add(1)
go func() {
defer wg.Done()
<-start
release, err := AcquireLockfile(dir)
if err != nil {
if !errors.Is(err, ErrLockHeld) {
t.Errorf("loser error = %v, want wrap of ErrLockHeld", err)
}
return
}
mu.Lock()
winners++
releases = append(releases, release)
mu.Unlock()
}()
}
close(start)
wg.Wait()
for _, r := range releases {
r()
}
if winners != 1 {
t.Fatalf("concurrent reclaim winners = %d, want exactly 1", winners)
}
}
func TestAcquireLockfile_ReleaseRemovesFile(t *testing.T) {
dir := t.TempDir()
release, err := AcquireLockfile(dir)
if err != nil {
t.Fatalf("AcquireLockfile: %v", err)
}
release()
path := filepath.Join(dir, "tinyforge.lock")
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Errorf("lockfile still present after release: %v", err)
}
}
+33
View File
@@ -0,0 +1,33 @@
//go:build !windows
package store
import (
"errors"
"os"
"syscall"
)
// processAlive checks whether the given PID belongs to a running process.
// On POSIX, kill(pid, 0) sends no signal but returns ESRCH if the PID is
// dead, EPERM if alive-but-foreign-owned (still "alive" for our purposes).
//
// os.FindProcess never returns a non-nil error on Linux / macOS / *BSD
// for any PID value — it just records the integer. The probe is purely
// the Signal(0) result. We keep the FindProcess call to obtain the
// *os.Process handle Signal needs; we don't branch on its error.
func processAlive(pid int) bool {
if pid <= 0 {
return false
}
proc, _ := os.FindProcess(pid)
if proc == nil {
return false
}
err := proc.Signal(syscall.Signal(0))
if err == nil {
return true
}
// EPERM = alive but not ours; ESRCH = dead.
return errors.Is(err, os.ErrPermission) || errors.Is(err, syscall.EPERM)
}
+30
View File
@@ -0,0 +1,30 @@
//go:build windows
package store
import (
"golang.org/x/sys/windows"
)
// processAlive returns true when the given PID is currently held by a
// running Windows process. OpenProcess with PROCESS_QUERY_LIMITED_INFORMATION
// is the supported way to check liveness without elevation.
func processAlive(pid int) bool {
if pid <= 0 {
return false
}
h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if err != nil {
return false
}
defer windows.CloseHandle(h)
var exitCode uint32
if err := windows.GetExitCodeProcess(h, &exitCode); err != nil {
// Conservative: if we can't ask, assume alive so we don't reclaim
// an active lock. Worst case the operator sees ErrLockHeld and
// removes the lockfile by hand.
return true
}
const stillActive = 259 // STILL_ACTIVE
return exitCode == stillActive
}
+33
View File
@@ -278,12 +278,20 @@ const (
// containers.workload_kind and workloads.kind. After the hard cutover the
// backing project / stack / static_site tables are gone — these constants
// are just strings used to filter the unified containers index in the UI.
//
// `build` is the dockerfile-source kind: a container built from a
// Dockerfile in a Git repo. Operationally it looks like a site (one
// container, one optional public face) but its origin is the build
// pipeline, not a static-asset extract. Dashboard filters that need to
// distinguish "I built this from source" from "I served files from a
// repo" should key on this value.
type WorkloadKind string
const (
WorkloadKindProject WorkloadKind = "project"
WorkloadKindStack WorkloadKind = "stack"
WorkloadKindSite WorkloadKind = "site"
WorkloadKindBuild WorkloadKind = "build"
)
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
@@ -316,6 +324,31 @@ type Workload struct {
UpdatedAt string `json:"updated_at"`
}
// WorkloadNotification is one configured outbound notification route for
// a workload. Multiple rows per workload model the "one Slack channel
// for failures, one Discord webhook for successes" routing the legacy
// single notification_url column could not express.
//
// EventTypes is a comma-separated allow-list (e.g. "build_failure" or
// "deploy_success,deploy_failure"). An empty EventTypes means the row
// fires for every event type — the cheapest way to keep the existing
// single-destination behaviour expressible in the new shape.
//
// Secret round-trips through the same crypto envelope as other stored
// secrets; the API layer strips it from responses.
type WorkloadNotification struct {
ID string `json:"id"`
WorkloadID string `json:"workload_id"`
Name string `json:"name"`
URL string `json:"url"`
Secret string `json:"-"`
EventTypes string `json:"event_types"`
Enabled bool `json:"enabled"`
SortOrder int `json:"sort_order"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
// Container is the normalized index of every Tinyforge-managed container.
// Replaces the project-specific Instance table after migration. Subdomain/
// proxy fields are hoisted as first-class columns because ListProxyRoutes,
+232 -2
View File
@@ -55,11 +55,20 @@ func New(dbPath string) (*Store, error) {
db.SetMaxOpenConns(1)
db.SetConnMaxLifetime(0)
// Enable WAL mode and foreign keys for better concurrency and referential integrity.
// Enable WAL mode and foreign keys for better concurrency and
// referential integrity. `synchronous=NORMAL` pairs with WAL to skip
// the per-write fsync — the OS still flushes on checkpoint, durability
// is preserved across clean shutdowns, and crashes lose at most the
// last few committed transactions (acceptable for a tinyforge box).
// cache_size=-20000 = 20 MiB page cache, temp_store=MEMORY keeps
// indexer scratch off disk; both are pure perf knobs.
pragmas := []string{
"PRAGMA journal_mode=WAL",
"PRAGMA synchronous=NORMAL",
"PRAGMA foreign_keys=ON",
"PRAGMA busy_timeout=5000",
"PRAGMA cache_size=-20000",
"PRAGMA temp_store=MEMORY",
}
for _, p := range pragmas {
if _, err := db.Exec(p); err != nil {
@@ -284,6 +293,24 @@ func (s *Store) runMigrations() error {
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// workload_notifications: per-workload notification destinations.
// Each row is one route (Slack channel, Discord webhook, generic
// receiver, ...). event_types is a comma-separated allow-list —
// empty means "all events". When zero rows exist for a workload
// the dispatcher falls back to the legacy single notification_url
// column on workloads so existing setups keep working unchanged.
`CREATE TABLE IF NOT EXISTS workload_notifications (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
name TEXT NOT NULL,
url TEXT NOT NULL,
secret TEXT NOT NULL DEFAULT '',
event_types TEXT NOT NULL DEFAULT '',
enabled INTEGER NOT NULL DEFAULT 1,
sort_order INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// workload_trigger_bindings: many-to-many between workloads and
// triggers. binding_config is the per-binding override applied on
// top of trigger.config (top-level JSON merge, binding wins).
@@ -427,6 +454,7 @@ func (s *Store) runMigrations() error {
`CREATE UNIQUE INDEX IF NOT EXISTS idx_triggers_webhook_secret ON triggers(webhook_secret) WHERE webhook_secret != ''`,
`CREATE INDEX IF NOT EXISTS idx_bindings_workload ON workload_trigger_bindings(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_bindings_trigger ON workload_trigger_bindings(trigger_id)`,
`CREATE INDEX IF NOT EXISTS idx_workload_notifs_workload ON workload_notifications(workload_id)`,
}
for _, idx := range indexes {
if _, err := s.db.Exec(idx); err != nil {
@@ -434,13 +462,215 @@ func (s *Store) runMigrations() error {
}
}
if err := s.backfillTriggersFromWorkloads(); err != nil {
// schema_versions table gates one-shot data migrations like the
// trigger backfill below. Without this, the backfill scan ran on
// every boot even on fully-migrated DBs — wasted I/O and (more
// importantly) made it impossible to tell whether a "no rows
// processed" was a clean state or a missed-migration bug.
if _, err := s.db.Exec(`CREATE TABLE IF NOT EXISTS schema_versions (
version INTEGER PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)`); err != nil {
return fmt.Errorf("create schema_versions: %w", err)
}
if err := s.runOnce(1, "trigger backfill", s.backfillTriggersFromWorkloads); err != nil {
// Backfill failure is non-fatal — we log and let the operator
// retry. The version is only recorded on success.
slog.Warn("trigger backfill", "error", err)
}
return nil
}
// runOnce executes fn at most one time per database lifetime, recording
// success in schema_versions. Useful for data migrations whose source
// table eventually disappears (so re-running becomes pointless or
// dangerous).
func (s *Store) runOnce(version int, label string, fn func() error) error {
var applied int
if err := s.db.QueryRow(`SELECT COUNT(*) FROM schema_versions WHERE version = ?`, version).Scan(&applied); err != nil {
return fmt.Errorf("check %s: %w", label, err)
}
if applied > 0 {
return nil
}
if err := fn(); err != nil {
return err
}
if _, err := s.db.Exec(`INSERT INTO schema_versions (version) VALUES (?)`, version); err != nil {
return fmt.Errorf("mark %s applied: %w", label, err)
}
slog.Info("schema migration applied", "version", version, "label", label)
return nil
}
// RunOnce is the public counterpart of runOnce, exposed so cmd/server can
// gate post-store-open migrations (e.g. crypto re-encryption that needs
// the ENCRYPTION_KEY which Store does not own) through the same
// schema_versions ledger.
func (s *Store) RunOnce(version int, label string, fn func() error) error {
return s.runOnce(version, label, fn)
}
// EnvelopeMigrator describes the contract a crypto package implements to
// rewrite legacy unprefixed-hex ciphertext as versioned envelope values.
// hasEnvelope reports whether a value already carries the new prefix.
// decrypt returns plaintext for either form; encrypt always produces the
// new envelope form. By accepting closures the store stays free of any
// import on internal/crypto, mirroring the rest of the package layout.
type EnvelopeMigrator struct {
HasEnvelope func(value string) bool
Decrypt func(ciphertext string) (string, error)
Encrypt func(plaintext string) (string, error)
}
// MigrateSecretsToEnvelope walks every column known to carry an encrypted
// secret and rewrites legacy unprefixed-hex values into the new
// envelope form using the current encryption key.
//
// Behaviour, per-row:
// - empty value → skip (no secret stored)
// - already-envelope value → skip (already migrated)
// - decrypt fails → skip (value is either plaintext from a v0 boot
// OR ciphertext from a rotated key; either way we cannot safely
// re-encrypt and leaving it alone preserves the existing read
// semantics)
// - decrypt succeeds → encrypt to envelope form + UPDATE
//
// The whole sweep runs in a single transaction so a power-loss
// mid-migration leaves the DB in either the pre- or post-migration
// state, never half. Idempotent via schema_versions version 2 — the
// next boot is a no-op.
//
// Columns covered:
// - settings.npm_password
// - settings.cloudflare_api_token
// - auth_settings.oidc_client_secret
// - registries.token
// - workload_env.value WHERE encrypted=1
func (s *Store) MigrateSecretsToEnvelope(m EnvelopeMigrator) error {
return s.runOnce(2, "secrets envelope migration", func() error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("begin: %w", err)
}
defer func() { _ = tx.Rollback() }()
// Single-row tables (settings, auth_settings) — read-update inline.
singleRowColumns := []struct {
table, column string
}{
{"settings", "npm_password"},
{"settings", "cloudflare_api_token"},
{"auth_settings", "oidc_client_secret"},
}
for _, c := range singleRowColumns {
var v string
err := tx.QueryRow(
fmt.Sprintf(`SELECT %s FROM %s LIMIT 1`, c.column, c.table),
).Scan(&v)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
continue
}
// auth_settings may not exist on a brand-new DB until
// the OIDC code touches it; treat as nothing-to-migrate.
slog.Debug("envelope migration: column read skipped",
"table", c.table, "column", c.column, "error", err)
continue
}
migrated, ok := tryMigrate(m, v)
if !ok {
continue
}
if _, err := tx.Exec(
fmt.Sprintf(`UPDATE %s SET %s = ?`, c.table, c.column),
migrated,
); err != nil {
return fmt.Errorf("update %s.%s: %w", c.table, c.column, err)
}
}
// Multi-row: registries.token
if err := migrateRowColumn(tx, m,
`SELECT id, token FROM registries WHERE token != ''`,
`UPDATE registries SET token = ? WHERE id = ?`,
); err != nil {
return fmt.Errorf("registries.token: %w", err)
}
// Multi-row: workload_env.value WHERE encrypted=1
if err := migrateRowColumn(tx, m,
`SELECT id, value FROM workload_env WHERE encrypted = 1 AND value != ''`,
`UPDATE workload_env SET value = ? WHERE id = ?`,
); err != nil {
return fmt.Errorf("workload_env.value: %w", err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit: %w", err)
}
return nil
})
}
// migrateRowColumn applies the envelope rewrite to every (id, value)
// pair returned by selectQ. updateQ takes (newValue, id) as parameters.
// Each row is its own attempt; one row failing migration (decrypt fail)
// does not abort the others.
func migrateRowColumn(tx *sql.Tx, m EnvelopeMigrator, selectQ, updateQ string) error {
rows, err := tx.Query(selectQ)
if err != nil {
return err
}
defer rows.Close()
type pending struct{ id, newValue string }
var updates []pending
for rows.Next() {
var id, value string
if err := rows.Scan(&id, &value); err != nil {
return err
}
newValue, ok := tryMigrate(m, value)
if !ok {
continue
}
updates = append(updates, pending{id, newValue})
}
if err := rows.Err(); err != nil {
return err
}
for _, u := range updates {
if _, err := tx.Exec(updateQ, u.newValue, u.id); err != nil {
return err
}
}
return nil
}
// tryMigrate returns the envelope-form ciphertext + true when the input
// is a legacy unprefixed value that decrypts successfully with the
// current key. Returns ("", false) for anything else: empty, already
// envelope, plaintext, or decrypt-failed (rotated-key case).
func tryMigrate(m EnvelopeMigrator, v string) (string, bool) {
if v == "" {
return "", false
}
if m.HasEnvelope(v) {
return "", false
}
plaintext, err := m.Decrypt(v)
if err != nil {
return "", false
}
enc, err := m.Encrypt(plaintext)
if err != nil {
return "", false
}
return enc, true
}
// backfillTriggersFromWorkloads converts embedded trigger config on
// workload rows into standalone trigger + binding rows. Runs once per
// boot and is idempotent — only workloads with non-empty trigger_kind
+159
View File
@@ -0,0 +1,159 @@
package store
import (
"database/sql"
"errors"
"fmt"
"strings"
"github.com/google/uuid"
)
const workloadNotificationColumns = `id, workload_id, name, url, secret,
event_types, enabled, sort_order, created_at, updated_at`
func scanWorkloadNotification(scanner interface{ Scan(...any) error }) (WorkloadNotification, error) {
var n WorkloadNotification
var enabled int
err := scanner.Scan(
&n.ID, &n.WorkloadID, &n.Name, &n.URL, &n.Secret,
&n.EventTypes, &enabled, &n.SortOrder, &n.CreatedAt, &n.UpdatedAt,
)
n.Enabled = enabled != 0
return n, err
}
// CreateWorkloadNotification inserts a notification route. Returns the
// populated row (with assigned id + timestamps) so callers don't need to
// follow up with a Get.
func (s *Store) CreateWorkloadNotification(n WorkloadNotification) (WorkloadNotification, error) {
if n.WorkloadID == "" {
return WorkloadNotification{}, fmt.Errorf("workload_id is required")
}
if n.URL == "" {
return WorkloadNotification{}, fmt.Errorf("url is required")
}
if n.ID == "" {
n.ID = uuid.New().String()
}
n.CreatedAt = Now()
n.UpdatedAt = n.CreatedAt
_, err := s.db.Exec(
`INSERT INTO workload_notifications (`+workloadNotificationColumns+`)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
n.ID, n.WorkloadID, n.Name, n.URL, n.Secret,
n.EventTypes, BoolToInt(n.Enabled), n.SortOrder, n.CreatedAt, n.UpdatedAt,
)
if err != nil {
return WorkloadNotification{}, fmt.Errorf("insert workload_notification: %w", err)
}
return n, nil
}
// ListWorkloadNotifications returns every notification row for a
// workload ordered by (sort_order, created_at) so the UI stays stable
// across reorderings.
func (s *Store) ListWorkloadNotifications(workloadID string) ([]WorkloadNotification, error) {
rows, err := s.db.Query(
`SELECT `+workloadNotificationColumns+`
FROM workload_notifications
WHERE workload_id = ?
ORDER BY sort_order, created_at`,
workloadID,
)
if err != nil {
return nil, fmt.Errorf("list workload_notifications: %w", err)
}
defer rows.Close()
out := []WorkloadNotification{}
for rows.Next() {
n, err := scanWorkloadNotification(rows)
if err != nil {
return nil, fmt.Errorf("scan workload_notification: %w", err)
}
out = append(out, n)
}
return out, rows.Err()
}
// GetWorkloadNotification fetches one notification row by id. Returns
// ErrNotFound when the row does not exist so callers can return 404
// cleanly.
func (s *Store) GetWorkloadNotification(id string) (WorkloadNotification, error) {
n, err := scanWorkloadNotification(s.db.QueryRow(
`SELECT `+workloadNotificationColumns+`
FROM workload_notifications WHERE id = ?`, id,
))
if errors.Is(err, sql.ErrNoRows) {
return WorkloadNotification{}, fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
}
if err != nil {
return WorkloadNotification{}, fmt.Errorf("query workload_notification: %w", err)
}
return n, nil
}
// UpdateWorkloadNotification rewrites an existing row. WorkloadID is
// immutable — re-anchoring a route to a different workload would invite
// silent reassignments after a paste-bug in the UI; recreate instead.
func (s *Store) UpdateWorkloadNotification(n WorkloadNotification) error {
if n.ID == "" {
return fmt.Errorf("id is required")
}
if n.URL == "" {
return fmt.Errorf("url is required")
}
n.UpdatedAt = Now()
res, err := s.db.Exec(
`UPDATE workload_notifications
SET name = ?, url = ?, secret = ?, event_types = ?,
enabled = ?, sort_order = ?, updated_at = ?
WHERE id = ?`,
n.Name, n.URL, n.Secret, n.EventTypes,
BoolToInt(n.Enabled), n.SortOrder, n.UpdatedAt, n.ID,
)
if err != nil {
return fmt.Errorf("update workload_notification: %w", err)
}
rows, _ := res.RowsAffected()
if rows == 0 {
return fmt.Errorf("workload_notification %s: %w", n.ID, ErrNotFound)
}
return nil
}
// DeleteWorkloadNotification drops a single notification row.
// Idempotent: missing id returns ErrNotFound so the API can map it to
// 404 cleanly.
func (s *Store) DeleteWorkloadNotification(id string) error {
res, err := s.db.Exec(`DELETE FROM workload_notifications WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete workload_notification: %w", err)
}
rows, _ := res.RowsAffected()
if rows == 0 {
return fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
}
return nil
}
// MatchesEventType returns true when the notification row's EventTypes
// allow-list includes eventType (or is empty, meaning "match all").
// Helper exported so the notification dispatcher can fan-out filtering
// inline without duplicating the comma-split parser.
func (n WorkloadNotification) MatchesEventType(eventType string) bool {
if !n.Enabled {
return false
}
if n.EventTypes == "" {
return true
}
for _, et := range strings.Split(n.EventTypes, ",") {
if strings.TrimSpace(et) == eventType {
return true
}
}
return false
}
@@ -0,0 +1,170 @@
package store
import (
"errors"
"testing"
)
// seedWorkloadForNotifications creates a minimal workload row so the FK
// constraint on workload_notifications is satisfied. Returns the new
// workload's ID for tests to reference.
func seedWorkloadForNotifications(t *testing.T, s *Store, name string) string {
t.Helper()
w, err := s.CreateWorkload(Workload{
Kind: string(WorkloadKindProject),
Name: name,
SourceKind: "image",
})
if err != nil {
t.Fatalf("seed workload: %v", err)
}
return w.ID
}
func TestCreateWorkloadNotification_RoundTrip(t *testing.T) {
s := newTestStore(t)
wlID := seedWorkloadForNotifications(t, s, "app1")
created, err := s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID,
Name: "Slack alerts",
URL: "https://hooks.slack.test/x",
Secret: "shh",
EventTypes: "deploy_failure,build_failure",
Enabled: true,
})
if err != nil {
t.Fatalf("CreateWorkloadNotification: %v", err)
}
if created.ID == "" {
t.Fatal("expected ID to be assigned")
}
got, err := s.GetWorkloadNotification(created.ID)
if err != nil {
t.Fatalf("Get: %v", err)
}
if got.URL != "https://hooks.slack.test/x" || got.Name != "Slack alerts" {
t.Errorf("row mismatch: %+v", got)
}
if !got.Enabled {
t.Error("expected Enabled=true")
}
if got.EventTypes != "deploy_failure,build_failure" {
t.Errorf("event_types = %q", got.EventTypes)
}
}
func TestCreateWorkloadNotification_RejectsMissingURL(t *testing.T) {
s := newTestStore(t)
wlID := seedWorkloadForNotifications(t, s, "app1")
_, err := s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID,
Name: "broken",
URL: "",
})
if err == nil {
t.Fatal("expected URL validation error")
}
}
func TestListWorkloadNotifications_SortedByOrder(t *testing.T) {
s := newTestStore(t)
wlID := seedWorkloadForNotifications(t, s, "app1")
// Insert out of order; ListWorkloadNotifications should return
// them sorted by SortOrder ascending.
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID, Name: "C", URL: "https://c.test", SortOrder: 30,
})
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID, Name: "A", URL: "https://a.test", SortOrder: 10,
})
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID, Name: "B", URL: "https://b.test", SortOrder: 20,
})
rows, err := s.ListWorkloadNotifications(wlID)
if err != nil {
t.Fatalf("list: %v", err)
}
if len(rows) != 3 {
t.Fatalf("len = %d, want 3", len(rows))
}
if rows[0].Name != "A" || rows[1].Name != "B" || rows[2].Name != "C" {
t.Errorf("sort order wrong: %q %q %q", rows[0].Name, rows[1].Name, rows[2].Name)
}
}
func TestUpdateWorkloadNotification_PersistsChanges(t *testing.T) {
s := newTestStore(t)
wlID := seedWorkloadForNotifications(t, s, "app1")
n, _ := s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID, Name: "old", URL: "https://old.test", Enabled: true,
})
n.Name = "new"
n.URL = "https://new.test"
n.Enabled = false
n.EventTypes = "deploy_success"
if err := s.UpdateWorkloadNotification(n); err != nil {
t.Fatalf("update: %v", err)
}
got, _ := s.GetWorkloadNotification(n.ID)
if got.Name != "new" || got.URL != "https://new.test" || got.Enabled {
t.Errorf("update did not persist: %+v", got)
}
}
func TestDeleteWorkloadNotification_ReturnsNotFoundForMissing(t *testing.T) {
s := newTestStore(t)
err := s.DeleteWorkloadNotification("nope")
if !errors.Is(err, ErrNotFound) {
t.Errorf("expected ErrNotFound, got %v", err)
}
}
func TestDeleteWorkloadNotification_CascadesFromWorkload(t *testing.T) {
s := newTestStore(t)
wlID := seedWorkloadForNotifications(t, s, "app1")
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
WorkloadID: wlID, Name: "x", URL: "https://x.test",
})
if err := s.DeleteWorkload(wlID); err != nil {
t.Fatalf("delete workload: %v", err)
}
rows, err := s.ListWorkloadNotifications(wlID)
if err != nil {
t.Fatalf("list after cascade: %v", err)
}
if len(rows) != 0 {
t.Errorf("expected cascade delete to remove rows, got %d", len(rows))
}
}
func TestMatchesEventType_AllowList(t *testing.T) {
cases := []struct {
eventTypes string
probe string
want bool
}{
{"", "deploy_success", true}, // empty = all
{"deploy_success,deploy_failure", "deploy_success", true},
{"deploy_success,deploy_failure", "build_failure", false},
{"build_failure", "build_failure", true},
{" deploy_success , build_failure ", "build_failure", true}, // whitespace tolerated
}
for _, c := range cases {
n := WorkloadNotification{Enabled: true, EventTypes: c.eventTypes}
got := n.MatchesEventType(c.probe)
if got != c.want {
t.Errorf("MatchesEventType(%q, %q) = %v, want %v", c.eventTypes, c.probe, got, c.want)
}
}
}
func TestMatchesEventType_DisabledNeverMatches(t *testing.T) {
n := WorkloadNotification{Enabled: false, EventTypes: ""}
if n.MatchesEventType("any") {
t.Error("disabled row should never match")
}
}
+20 -4
View File
@@ -173,11 +173,24 @@ func (s *Store) UpdateWorkload(w Workload) error {
return nil
}
// DeleteWorkload removes a workload row. Cascading deletes for the matching
// project/stack/site row stay with the kind-specific Delete functions; this
// only removes the workload entry.
// DeleteWorkload removes a workload row. Cascading deletes for FK-backed
// child tables (workload_env, workload_volumes, workload_trigger_bindings)
// happen via SQLite's ON DELETE CASCADE. The `containers` table doesn't
// yet have an FK to workloads (planned migration — see ops notes), so we
// drop its rows explicitly here in the same transaction to prevent zombie
// container rows from outliving their owning workload.
func (s *Store) DeleteWorkload(id string) error {
result, err := s.db.Exec(`DELETE FROM workloads WHERE id = ?`, id)
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("begin: %w", err)
}
defer func() { _ = tx.Rollback() }()
// Explicit container cleanup until the FK migration lands.
if _, err := tx.Exec(`DELETE FROM containers WHERE workload_id = ?`, id); err != nil {
return fmt.Errorf("delete containers: %w", err)
}
result, err := tx.Exec(`DELETE FROM workloads WHERE id = ?`, id)
if err != nil {
return fmt.Errorf("delete workload: %w", err)
}
@@ -188,6 +201,9 @@ func (s *Store) DeleteWorkload(id string) error {
if n == 0 {
return fmt.Errorf("workload %s: %w", id, ErrNotFound)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit: %w", err)
}
return nil
}
+63 -3
View File
@@ -169,6 +169,18 @@ func SaveFile(rootPath, relativePath string, r io.Reader) error {
// safePath resolves a relative path within rootPath and validates it doesn't escape.
// Resolves symlinks to prevent symlink-based traversal attacks.
//
// The check used to be `strings.HasPrefix(absResolved, absRoot)` which has
// a classic boundary bug: a sibling root at /data/vol10 would pass the
// prefix test for /data/vol1. The fix enforces a separator boundary so
// the only allowed cases are absResolved == absRoot OR absResolved begins
// with absRoot + separator.
//
// For paths that don't yet exist (e.g. SaveFile creating a new file),
// EvalSymlinks returns an error and we fall back to the lexical path.
// In that case we walk every existing ancestor with EvalSymlinks too —
// if any ancestor is a symlink that escapes the root, we reject. This
// closes the prior gap where pre-planted symlinks could divert writes.
func safePath(rootPath, relativePath string) (string, error) {
if relativePath == "" {
return rootPath, nil
@@ -176,7 +188,7 @@ func safePath(rootPath, relativePath string) (string, error) {
// Clean and ensure no traversal.
cleaned := filepath.Clean(relativePath)
if strings.Contains(cleaned, "..") {
if cleaned == ".." || strings.HasPrefix(cleaned, ".."+string(filepath.Separator)) || strings.Contains(cleaned, string(filepath.Separator)+".."+string(filepath.Separator)) {
return "", fmt.Errorf("path traversal not allowed")
}
@@ -191,18 +203,66 @@ func safePath(rootPath, relativePath string) (string, error) {
absRoot = realRoot
}
// Resolve the target path including symlinks.
// Resolve the target path. If the leaf doesn't exist (write path),
// walk parent directories — any of which may already be a symlink.
absResolved, err := filepath.Abs(absPath)
if err != nil {
return "", fmt.Errorf("resolve path: %w", err)
}
if realResolved, err := filepath.EvalSymlinks(absResolved); err == nil {
absResolved = realResolved
} else {
// Leaf missing — resolve the deepest existing ancestor and
// re-join the unresolved tail. This catches a pre-planted
// symlink in any parent dir. An error here means an ancestor
// could not be resolved (e.g. a symlink we cannot follow): we MUST
// reject rather than fall back to the lexical path, which still
// carries the absRoot prefix and would let a symlink ancestor that
// escapes the root slip past the boundary check below.
resolved, tailErr := resolveExistingAncestor(absResolved)
if tailErr != nil {
return "", fmt.Errorf("path traversal not allowed")
}
if resolved != "" {
absResolved = resolved
}
}
if !strings.HasPrefix(absResolved, absRoot) {
if absResolved != absRoot && !strings.HasPrefix(absResolved, absRoot+string(filepath.Separator)) {
return "", fmt.Errorf("path traversal not allowed")
}
return absPath, nil
}
// resolveExistingAncestor walks p upward until it finds an existing
// directory, resolves its symlinks, then rejoins the missing tail.
// Returns ("", nil) when no ancestor exists (vanishingly rare).
func resolveExistingAncestor(p string) (string, error) {
tail := ""
cur := p
for {
if cur == "" || cur == "/" || cur == filepath.VolumeName(cur)+string(filepath.Separator) {
return "", nil
}
info, err := os.Lstat(cur)
if err == nil {
real, rerr := filepath.EvalSymlinks(cur)
if rerr != nil {
return "", rerr
}
_ = info
if tail == "" {
return real, nil
}
return filepath.Join(real, tail), nil
}
// Move one level up.
parent := filepath.Dir(cur)
if parent == cur {
return "", nil
}
tail = filepath.Join(filepath.Base(cur), tail)
cur = parent
}
}
+6
View File
@@ -131,8 +131,14 @@ const maxWebhookBodyBytes = 256 * 1024 // 256 KiB
// PluginDispatcher is what the plugin-workload webhook handler needs from
// the deployer: the canonical Source-dispatch entry point plus access to
// the same Deps bundle so Trigger.Match can read store / crypto.
//
// DispatchTeardown is required so the preview-deploy flow can tear down
// an ephemeral per-branch child workload when its upstream branch is
// deleted. Same teardown path the API /workloads/{id} DELETE route uses;
// nil error on a clean teardown lets the caller delete the workload row.
type PluginDispatcher interface {
DispatchPlugin(ctx context.Context, w pluginWorkload, intent pluginIntent) error
DispatchTeardown(ctx context.Context, w pluginWorkload) error
PluginDeps() pluginDeps
}
+98 -2
View File
@@ -13,8 +13,10 @@ import (
"github.com/go-chi/chi/v5"
"github.com/alexei/tinyforge/internal/metrics"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
"github.com/alexei/tinyforge/internal/workload/preview"
)
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
@@ -44,6 +46,17 @@ const (
ReasonConfigError = "config merge error"
ReasonMatchError = "match error"
ReasonDispatchFailed = "dispatch failed"
ReasonPreviewError = "preview materialize error"
ReasonPreviewTorndown = "preview torn down"
// ReasonPreviewNoop: a branch-delete webhook arrived but no preview was
// ever materialized for that branch — a legitimate clean skip, distinct
// from "no binding matched" so it isn't misreported as a wiring problem.
ReasonPreviewNoop = "preview noop"
// ReasonPreviewOrphaned: the preview container was torn down but its
// workload row could not be deleted, leaving an orphan row. Surfaced
// distinctly so the partial failure is visible rather than masquerading
// as a clean teardown.
ReasonPreviewOrphaned = "preview torn down (row orphaned)"
)
// handleTriggerWebhook processes an inbound webhook for a first-class
@@ -172,7 +185,7 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
switch {
case r.Deployed:
deployed++
case r.Reason == ReasonBindingDisabled:
case r.Reason == ReasonBindingDisabled, r.Reason == ReasonPreviewNoop:
skipped++
case r.Reason == ReasonNoMatch:
noMatch++
@@ -194,8 +207,10 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
case noMatch == len(results)-skipped:
delivery.Detail = "no binding matched"
default:
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d", skipped, errored)
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d nomatch=%d",
skipped, errored, noMatch)
}
metrics.WebhookDeliveriesTotal.Inc(delivery.Outcome)
respondWebhookJSON(w, http.StatusOK, map[string]any{
"success": true,
"trigger": trg.Name,
@@ -326,6 +341,18 @@ func (h *Handler) fireBinding(
if intent.TriggeredBy == "" {
intent.TriggeredBy = "trigger-webhook"
}
// Preview-deploy fork: the git trigger plugin attaches preview_branch
// metadata when BranchPattern matches a non-baseline branch. Route
// the dispatch through a per-branch child workload rather than
// redeploying the parent template. The fork is intentionally before
// the dispatch so the template's container never gets clobbered by
// a feature-branch push.
if previewBranch := intent.Metadata["preview_branch"]; previewBranch != "" {
fired, reason := h.handlePreviewIntent(ctx, row, intent, previewBranch)
return fired, reason
}
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
slog.Warn("webhook: dispatch failed",
"trigger", trg.Name, "workload", row.Name, "error", err)
@@ -336,3 +363,72 @@ func (h *Handler) fireBinding(
return true, intent.Reason
}
// handlePreviewIntent dispatches an intent that targeted a non-baseline
// branch on a preview-template workload. Two paths:
//
// 1. Branch deleted: find the matching preview workload, dispatch
// Teardown, then delete the workload row so the dashboard reflects
// the upstream state.
// 2. Branch pushed: materialize (or reuse) the preview workload, then
// dispatch the deploy against it. The template workload itself is
// never deployed against a feature branch.
//
// On any error the helper logs and returns a generic reason — the
// fan-out caller treats these the same as a normal dispatch failure.
func (h *Handler) handlePreviewIntent(
ctx context.Context,
template store.Workload,
intent *plugin.DeploymentIntent,
branch string,
) (bool, string) {
deleted := intent.Metadata["preview_deleted"] == "1"
if deleted {
child, ok, err := preview.FindPreviewForBranch(h.store, template.ID, branch)
if err != nil {
slog.Warn("webhook: preview lookup failed",
"template", template.Name, "branch", branch, "error", err)
return false, ReasonPreviewError
}
if !ok {
// Branch was deleted upstream but we never materialized a
// preview for it — nothing to do. Report as a distinct noop so
// it isn't bucketed as "no binding matched".
return false, ReasonPreviewNoop
}
childPwl := toPluginWorkload(child)
if err := h.plugins.DispatchTeardown(ctx, childPwl); err != nil {
slog.Warn("webhook: preview teardown dispatch failed",
"template", template.Name, "preview", child.Name, "error", err)
return false, ReasonDispatchFailed
}
if err := h.store.DeleteWorkload(child.ID); err != nil {
// Container is gone but the row is orphaned. Surface this as a
// distinct reason so the partial failure is visible rather than
// reported as a clean teardown; the operator can delete the row
// from the dashboard if it sticks around.
slog.Warn("webhook: preview row delete failed (orphaned row)",
"template", template.Name, "preview", child.Name, "error", err)
return true, ReasonPreviewOrphaned
}
slog.Info("webhook: preview torn down",
"template", template.Name, "branch", branch, "preview", child.Name)
return true, ReasonPreviewTorndown
}
child, err := preview.MaterializeForBranch(h.store, template, branch)
if err != nil {
slog.Warn("webhook: preview materialize failed",
"template", template.Name, "branch", branch, "error", err)
return false, ReasonPreviewError
}
childPwl := toPluginWorkload(child)
if err := h.plugins.DispatchPlugin(ctx, childPwl, *intent); err != nil {
slog.Warn("webhook: preview dispatch failed",
"template", template.Name, "preview", child.Name, "error", err)
return false, ReasonDispatchFailed
}
slog.Info("webhook: triggered preview deploy",
"template", template.Name, "branch", branch, "preview", child.Name, "reason", intent.Reason)
return true, intent.Reason
}
+28
View File
@@ -327,6 +327,10 @@ func parseGitLabPushEvent(body []byte, headers http.Header) vendorParseResult {
Ref: probe.Ref,
CommitSHA: probe.After,
Pusher: pusher,
// GitLab does not emit `deleted: true`; the canonical signal
// is an all-zero `after` SHA. Same parser helper used for the
// GitHub / Gitea fallback so the two branches agree.
Deleted: isZeroSHA(probe.After),
},
}
if strings.HasPrefix(probe.Ref, "refs/heads/") {
@@ -346,6 +350,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
var probe struct {
Ref string `json:"ref"`
After string `json:"after"`
Deleted bool `json:"deleted"`
Repository struct {
FullName string `json:"full_name"`
CloneURL string `json:"clone_url"`
@@ -370,6 +375,12 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
if pusher == "" {
pusher = probe.Pusher.Username
}
// Branch / tag deletion is signalled either by the explicit
// `deleted: true` flag (GitHub / Gitea) or by an all-zero `after`
// SHA (older shapes). Both are honoured so the preview-deploy flow
// can tear down ephemeral workloads even when a vendor omits the
// boolean flag.
deleted := probe.Deleted || isZeroSHA(probe.After)
evt := plugin.InboundEvent{
Kind: "git-push",
Git: &plugin.GitEvent{
@@ -377,6 +388,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
Ref: probe.Ref,
CommitSHA: probe.After,
Pusher: pusher,
Deleted: deleted,
},
}
if strings.HasPrefix(probe.Ref, "refs/heads/") {
@@ -388,3 +400,19 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
}
return evt, nil
}
// isZeroSHA returns true when sha is the canonical "no commit" sentinel
// (40 zeros) that vendors emit on the `after` field of a branch- or
// tag-delete push event. Length-tolerant because some test fixtures
// truncate the SHA.
func isZeroSHA(sha string) bool {
if sha == "" {
return false
}
for _, r := range sha {
if r != '0' {
return false
}
}
return len(sha) >= 7
}
+81
View File
@@ -0,0 +1,81 @@
package plugin
import (
"log/slog"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/notify"
)
// DispatchNotificationForWorkload sends `event` to every notification
// route configured for the workload. Resolution order:
//
// 1. workload_notifications rows matching `event.Type` — multi-route
// fan-out (e.g. Slack alerts + Discord successes per workload).
// 2. If zero matching rows AND the legacy single-URL columns on the
// workload row are set, send to that URL — backwards compat for
// installs that pre-date the new table.
// 3. Otherwise, fall through to settings.notification_url so the global
// destination still fires for workloads with no per-row config.
//
// Secrets are decrypted via deps.EncKey before sending. A failed decrypt
// degrades to "send unsigned" with a warning rather than dropping the
// notification — the operator still gets the alert, they just need to
// re-save the secret. Fire-and-forget: failures are logged inside
// deps.Notifier and never bubble up here.
//
// Callers (static / dockerfile / image / compose plugins) pass an
// already-populated Event; this helper does not synthesize the payload
// shape, only the routing.
func DispatchNotificationForWorkload(deps Deps, w Workload, event notify.Event) {
if deps.Notifier == nil {
return
}
rows, err := deps.Store.ListWorkloadNotifications(w.ID)
if err != nil {
slog.Warn("notify: list workload routes failed",
"workload", w.ID, "error", err)
rows = nil
}
matched := 0
for _, n := range rows {
if !n.MatchesEventType(event.Type) {
continue
}
matched++
secret := ""
if n.Secret != "" {
dec, derr := crypto.Decrypt(deps.EncKey, n.Secret)
if derr != nil {
slog.Warn("notify: decrypt workload secret failed — sending unsigned",
"workload", w.ID, "route", n.Name, "error", derr)
} else {
secret = dec
}
}
deps.Notifier.SendSigned(n.URL, secret, notify.TierSite, event)
}
if matched > 0 {
return
}
// Legacy fallback: single per-workload destination on workloads.notification_url.
if w.NotificationURL != "" {
deps.Notifier.SendSigned(w.NotificationURL, w.NotificationSecret, notify.TierSite, event)
return
}
// Global fallback so a one-line config in settings still notifies
// every workload without a per-row override.
settings, err := deps.Store.GetSettings()
if err != nil {
slog.Warn("notify: settings lookup for global fallback failed",
"workload", w.ID, "error", err)
return
}
if settings.NotificationURL == "" {
return
}
deps.Notifier.SendSigned(settings.NotificationURL, settings.NotificationSecret, notify.TierSettings, event)
}
@@ -32,6 +32,23 @@ type Config struct {
type source struct{}
// composeRunner is the slice of stack.Compose this plugin actually
// drives. Defined locally per the "interfaces where they are used"
// idiom so the plugin can be unit-tested without a real docker compose
// binary. `*stack.Compose` satisfies it implicitly.
type composeRunner interface {
Up(ctx context.Context, projectName, yamlPath string) (string, error)
Down(ctx context.Context, projectName string, removeVolumes bool) (string, error)
Ps(ctx context.Context, projectName, yamlPath string) ([]stack.Service, error)
}
// newComposeRunner returns the runner the plugin should call. Tests
// swap this var with a fake; production code never touches it. The
// indirection costs one function-pointer dereference per Deploy /
// Teardown / Reconcile call — negligible against the docker compose
// exec it gates.
var newComposeRunner = func() composeRunner { return stack.NewCompose("") }
func init() { plugin.RegisterSource(&source{}) }
func (*source) Kind() string { return "compose" }
@@ -82,7 +99,7 @@ func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload,
return fmt.Errorf("compose source: write yaml: %w", err)
}
compose := stack.NewCompose("")
compose := newComposeRunner()
out, err := compose.Up(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose source: docker compose up: %w (output: %s)", err, truncate(out, 1024))
@@ -105,7 +122,7 @@ func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload
cfg, _ := plugin.SourceConfigOf[Config](w)
projectName := composeProjectName(cfg.ComposeProjectName, w)
compose := stack.NewCompose("")
compose := newComposeRunner()
if _, err := compose.Down(ctx, projectName, true); err != nil {
// Log but proceed — the DB rows must not be orphaned.
slog.Warn("compose source: docker compose down", "workload", w.ID, "error", err)
@@ -139,7 +156,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
projectName := composeProjectName(cfg.ComposeProjectName, w)
yamlPath, _ := writeYAMLIfChanged(w.ID, cfg.ComposeYAML)
compose := stack.NewCompose("")
compose := newComposeRunner()
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
// Likely no compose project running for this workload. Mark
@@ -162,7 +179,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
// syncContainers shares its body with Reconcile minus the missing-row
// fallback — Deploy expects compose ps to succeed since `up` just ran.
func syncContainers(ctx context.Context, deps plugin.Deps, compose *stack.Compose, w plugin.Workload, projectName, yamlPath string) error {
func syncContainers(ctx context.Context, deps plugin.Deps, compose composeRunner, w plugin.Workload, projectName, yamlPath string) error {
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose ps: %w", err)
@@ -204,7 +221,17 @@ var projectNameSanitizer = regexp.MustCompile(`[^a-z0-9_-]`)
func composeProjectName(explicit string, w plugin.Workload) string {
if explicit != "" {
return explicit
// Apply the same sanitizer to operator-supplied names so a value
// like "--foo" cannot reach the docker CLI and be re-parsed as a
// flag. Reuses the canonical lower+[^a-z0-9_-]→"-" + trim path.
san := strings.ToLower(explicit)
san = projectNameSanitizer.ReplaceAllString(san, "-")
san = strings.Trim(san, "-")
if san != "" {
return san
}
// Fall through to the derived name if sanitization stripped
// everything (operator passed e.g. "---" — degenerate input).
}
name := strings.ToLower(w.Name)
name = projectNameSanitizer.ReplaceAllString(name, "-")
@@ -0,0 +1,512 @@
package compose
import (
"context"
"encoding/json"
"errors"
"strings"
"sync"
"testing"
"github.com/alexei/tinyforge/internal/stack"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// fakeRunner stands in for *stack.Compose. Every method records its
// inputs and returns whatever the test set on the corresponding field.
// Defaults are happy-path: empty services from Ps, no error from Up /
// Down. Fields are slice-typed so a single fakeRunner can serve a
// sequence of calls (Deploy issues Up + Ps in order).
type fakeRunner struct {
mu sync.Mutex
upCalls []runnerCall
upOuts []string
upErrs []error
downCalls []runnerCall
downOuts []string
downErrs []error
psCalls []runnerCall
psResults [][]stack.Service
psErrs []error
upCallIdx int
psCallIdx int
downCallI int
}
type runnerCall struct {
ProjectName string
YAMLPath string
RemoveVolumes bool
}
func (f *fakeRunner) Up(_ context.Context, projectName, yamlPath string) (string, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.upCalls = append(f.upCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
out, err := pop(f.upOuts, f.upErrs, f.upCallIdx)
f.upCallIdx++
return out, err
}
func (f *fakeRunner) Down(_ context.Context, projectName string, removeVolumes bool) (string, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.downCalls = append(f.downCalls, runnerCall{ProjectName: projectName, RemoveVolumes: removeVolumes})
out, err := pop(f.downOuts, f.downErrs, f.downCallI)
f.downCallI++
return out, err
}
func (f *fakeRunner) Ps(_ context.Context, projectName, yamlPath string) ([]stack.Service, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.psCalls = append(f.psCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
idx := f.psCallIdx
f.psCallIdx++
var svcs []stack.Service
if idx < len(f.psResults) {
svcs = f.psResults[idx]
}
var err error
if idx < len(f.psErrs) {
err = f.psErrs[idx]
}
return svcs, err
}
// pop returns the nth element of outs/errs or zero values when n is
// past the end. Lets a test set a single expected response without
// padding slices for every other call.
func pop(outs []string, errs []error, n int) (string, error) {
var out string
if n < len(outs) {
out = outs[n]
}
var err error
if n < len(errs) {
err = errs[n]
}
return out, err
}
// withFakeRunner swaps newComposeRunner for the duration of one test
// and restores the original on cleanup. Tests that need to inspect the
// fake post-hoc keep the returned pointer.
func withFakeRunner(t *testing.T, f *fakeRunner) {
t.Helper()
orig := newComposeRunner
newComposeRunner = func() composeRunner { return f }
t.Cleanup(func() { newComposeRunner = orig })
}
func testStore(t *testing.T) *store.Store {
t.Helper()
st, err := store.New(":memory:")
if err != nil {
t.Fatalf("open store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
return st
}
// seedWorkload creates the parent workload row that container rows FK
// onto. Returns the workload's ID so callers can reuse it.
func seedWorkload(t *testing.T, st *store.Store, name, yamlText string) string {
t.Helper()
cfg := Config{ComposeYAML: yamlText}
body, err := json.Marshal(cfg)
if err != nil {
t.Fatalf("marshal config: %v", err)
}
w, err := st.CreateWorkload(store.Workload{
Kind: "plugin",
Name: name,
SourceKind: "compose",
SourceConfig: string(body),
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
return w.ID
}
func TestDeploy_HappyPath(t *testing.T) {
withTempDir(t) // isolates the YAML scratch dir under t.TempDir()
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx:alpine\n"
wid := seedWorkload(t, deps.Store, "myapp", yamlText)
w := plugin.Workload{
ID: wid,
Name: "myapp",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "web", State: "running", Status: "Up 5 seconds"},
}},
}
withFakeRunner(t, fake)
src := &source{}
if err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{}); err != nil {
t.Fatalf("Deploy: %v", err)
}
// Up called exactly once with the workload-derived project name.
if len(fake.upCalls) != 1 {
t.Fatalf("Up called %d times, want 1", len(fake.upCalls))
}
if !strings.HasPrefix(fake.upCalls[0].ProjectName, "tf-myapp-") {
t.Errorf("Up projectName = %q, want prefix tf-myapp-", fake.upCalls[0].ProjectName)
}
if !strings.HasSuffix(fake.upCalls[0].YAMLPath, "compose.yml") {
t.Errorf("Up yamlPath = %q, want suffix compose.yml", fake.upCalls[0].YAMLPath)
}
// Ps follows Up to enumerate the resulting containers.
if len(fake.psCalls) != 1 {
t.Fatalf("Ps called %d times, want 1", len(fake.psCalls))
}
// Service row written.
row, err := deps.Store.GetContainerByID(wid + ":web")
if err != nil {
t.Fatalf("get container row: %v", err)
}
if row.WorkloadID != wid {
t.Errorf("row.WorkloadID = %q, want %q", row.WorkloadID, wid)
}
if row.Role != "web" {
t.Errorf("row.Role = %q, want %q", row.Role, "web")
}
if row.State != "running" {
t.Errorf("row.State = %q, want %q", row.State, "running")
}
}
func TestDeploy_EmptyYAMLConfig_RejectsBeforeExec(t *testing.T) {
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "empty", "services:\n web:\n image: x\n")
w := plugin.Workload{
ID: wid,
Name: "empty",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: ""}),
}
fake := &fakeRunner{}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy accepted empty compose_yaml")
}
if !strings.Contains(err.Error(), "empty compose_yaml") {
t.Errorf("error = %v, want substring \"empty compose_yaml\"", err)
}
if len(fake.upCalls) != 0 {
t.Errorf("Up should not have been called; got %d calls", len(fake.upCalls))
}
}
func TestDeploy_UpFailure_PropagatesAndIncludesTruncatedOutput(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: bad-image\n"
wid := seedWorkload(t, deps.Store, "fail", yamlText)
w := plugin.Workload{
ID: wid,
Name: "fail",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
bigOut := strings.Repeat("docker compose log noise ", 200) // > 1024 bytes
fake := &fakeRunner{
upOuts: []string{bigOut},
upErrs: []error{errors.New("exit status 1")},
}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy accepted Up failure")
}
if !strings.Contains(err.Error(), "docker compose up") {
t.Errorf("error = %v, want substring \"docker compose up\"", err)
}
if !strings.Contains(err.Error(), "exit status 1") {
t.Errorf("error = %v, want wrapped Up err", err)
}
if !strings.Contains(err.Error(), "(truncated)") {
t.Errorf("error = %v, want truncated-output marker", err)
}
// Ps must not be called when Up failed.
if len(fake.psCalls) != 0 {
t.Errorf("Ps called %d times after Up failure; want 0", len(fake.psCalls))
}
}
func TestDeploy_UpSucceedsButPsFails_SurfacesError(t *testing.T) {
// `up` succeeded but enumerate failed — Deploy must surface so the UI
// doesn't show an empty containers index for a running stack.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n"
wid := seedWorkload(t, deps.Store, "psfail", yamlText)
w := plugin.Workload{
ID: wid,
Name: "psfail",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
fake := &fakeRunner{
psErrs: []error{errors.New("compose ps boom")},
}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy ignored Ps failure")
}
if !strings.Contains(err.Error(), "sync container rows") {
t.Errorf("error = %v, want substring \"sync container rows\"", err)
}
}
func TestTeardown_DropsContainerRows_EvenWhenDownFails(t *testing.T) {
// docker compose down failing must not orphan rows in the DB.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "tdown", "services:\n web:\n image: nginx\n")
// Seed two service rows the way Deploy would.
for _, role := range []string{"web", "db"} {
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":" + role,
WorkloadID: wid,
WorkloadKind: "compose",
Role: role,
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed container: %v", err)
}
}
fake := &fakeRunner{downErrs: []error{errors.New("compose project unknown")}}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "tdown",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
}
if err := src.Teardown(context.Background(), deps, w); err != nil {
t.Fatalf("Teardown: %v", err)
}
// Down requested removeVolumes=true (matches the docstring claim).
if len(fake.downCalls) != 1 {
t.Fatalf("Down calls = %d, want 1", len(fake.downCalls))
}
if !fake.downCalls[0].RemoveVolumes {
t.Errorf("Down removeVolumes = false, want true (workload teardown is destructive)")
}
// Rows gone despite the Down error.
for _, role := range []string{"web", "db"} {
if _, err := deps.Store.GetContainerByID(wid + ":" + role); !errors.Is(err, store.ErrNotFound) {
t.Errorf("container row %q survived teardown: err=%v", role, err)
}
}
}
func TestTeardown_HappyPath(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "tdown2", "services:\n web:\n image: nginx\n")
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":web",
WorkloadID: wid,
WorkloadKind: "compose",
Role: "web",
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed: %v", err)
}
fake := &fakeRunner{}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "tdown2",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
}
if err := src.Teardown(context.Background(), deps, w); err != nil {
t.Fatalf("Teardown: %v", err)
}
if len(fake.downCalls) != 1 {
t.Errorf("Down calls = %d, want 1", len(fake.downCalls))
}
if _, err := deps.Store.GetContainerByID(wid + ":web"); !errors.Is(err, store.ErrNotFound) {
t.Errorf("container row survived teardown: err=%v", err)
}
}
func TestReconcile_PsSuccess_UpsertsRows(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n db:\n image: postgres\n"
wid := seedWorkload(t, deps.Store, "rec", yamlText)
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "web", State: "running"},
{Service: "db", State: "running"},
}},
}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "rec",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile: %v", err)
}
for _, role := range []string{"web", "db"} {
row, err := deps.Store.GetContainerByID(wid + ":" + role)
if err != nil {
t.Errorf("row %q missing after reconcile: %v", role, err)
continue
}
if row.State != "running" {
t.Errorf("row %q state = %q, want \"running\"", role, row.State)
}
}
}
func TestReconcile_PsFailure_MarksExistingRowsMissing(t *testing.T) {
// When compose ps fails (project unknown to Docker), the reconciler
// flips existing rows to "missing" rather than deleting them — the UI
// surfaces the desync to the operator.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n"
wid := seedWorkload(t, deps.Store, "missing", yamlText)
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":web",
WorkloadID: wid,
WorkloadKind: "compose",
Role: "web",
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed: %v", err)
}
fake := &fakeRunner{psErrs: []error{errors.New("no such project")}}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "missing",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile returned %v; should be nil even on Ps failure", err)
}
row, err := deps.Store.GetContainerByID(wid + ":web")
if err != nil {
t.Fatalf("row missing entirely (should be marked, not deleted): %v", err)
}
if row.State != "missing" {
t.Errorf("row.State = %q, want \"missing\"", row.State)
}
}
func TestReconcile_FallsBackToStatusWhenStateEmpty(t *testing.T) {
// Some compose versions populate Status (human string) but not State
// (enum) for non-running services. upsertServiceRow falls back to
// Status; verify that here.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n worker:\n image: alpine\n"
wid := seedWorkload(t, deps.Store, "fallback", yamlText)
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "worker", State: "", Status: "Exit 0"},
}},
}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "fallback",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile: %v", err)
}
row, err := deps.Store.GetContainerByID(wid + ":worker")
if err != nil {
t.Fatalf("get row: %v", err)
}
if row.State != "Exit 0" {
t.Errorf("row.State = %q, want \"Exit 0\" (Status fallback)", row.State)
}
}
// mustMarshalConfig is a small helper that converts a Config to the
// raw-JSON shape SourceConfig expects. Tests use it instead of
// hand-rolling the string so a Config field rename can't drift the test
// fixture from the production decoder.
func mustMarshalConfig(t *testing.T, cfg Config) json.RawMessage {
t.Helper()
b, err := json.Marshal(cfg)
if err != nil {
t.Fatalf("marshal config: %v", err)
}
return json.RawMessage(b)
}
// Compile-time guards: *stack.Compose must continue to satisfy
// composeRunner so the production path keeps building, and the fake
// must continue to satisfy it too so a drift in the interface shape
// fails the build here rather than at runtime.
var (
_ composeRunner = (*stack.Compose)(nil)
_ composeRunner = (*fakeRunner)(nil)
)
@@ -0,0 +1,574 @@
package dockerfile
import (
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"strconv"
"strings"
"time"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// healthCheckDelay is the grace window after StartContainer before we
// probe IsContainerRunning. Mirrors the static plugin's window — short
// enough not to slow happy-path deploys, long enough to catch
// crash-on-boot failures (missing env, bad CMD, port conflict).
const healthCheckDelay = 3 * time.Second
// deploy runs one end-to-end sync of a dockerfile workload:
//
// 1. fetch the latest commit SHA from the configured git provider
// 2. skip if SHA + container + proxy are all still healthy
// 3. clone the repo into a temp dir
// 4. resolve the build context + Dockerfile location
// 5. `docker build -t <tag> -f <dockerfile> <context>`
// 6. recreate the container with the new image
// 7. health-probe the container, surface logs on failure
// 8. reconfigure the proxy route
// 9. tear down the previous container (different ID) once we're sure
// the new one is healthy and proxied
//
// Each step writes its own status update so the dashboard's runtime-
// state panel can show a useful intermediate state when the deploy
// stalls on the slow step (almost always the build).
func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("dockerfile source: decode config: %w", err)
}
prev, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
// Force a full rebuild on manual / promote / first-time deploys
// (no Reason at all also implies manual). Schedule / git triggers
// honour the unchanged-SHA short-circuit so cron polling does not
// rebuild minute-by-minute when nothing changed.
force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote"
// Decrypt the access token if present. Token never escapes this
// frame: any error message routes through sanitizeError(_, token)
// which redacts the literal substring.
token := ""
if cfg.AccessToken != "" {
decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken)
if derr != nil {
slog.Warn("dockerfile source: failed to decrypt access token",
"workload", w.Name, "error", derr)
} else {
token = decrypted
}
}
provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create provider: %v", err), token))
return fmt.Errorf("create provider: %w", err)
}
latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token))
return fmt.Errorf("get latest commit: %w", err)
}
domain := primaryDomain(deps, w)
prevContainerID := ""
prevProxyRouteID := ""
if prevContainer != nil {
prevContainerID = prevContainer.ContainerID
prevProxyRouteID = prevContainer.ProxyRouteID
}
// Short-circuit: SHA unchanged AND container is still running AND
// (if there's a public face) the proxy route still exists. Manual
// deploys skip this entirely.
//
// We deliberately do NOT gate this on prev.Status == "deployed". A
// transient failure (e.g. a one-off proxy-check error) leaves the
// persisted status as "failed"; if we required "deployed" here, every
// subsequent cron/git poll with the same SHA would fall through to a
// full clone + docker build despite a perfectly healthy running
// container — a rebuild storm that burns CPU/disk until a new commit
// lands. Instead we trust the live container/proxy state and heal the
// stale status via healUnchanged.
if !force && latestSHA == prev.LastCommitSHA && prevContainerID != "" {
running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID)
switch {
case !running:
slog.Info("dockerfile: container not running, forcing redeploy", "workload", w.Name)
case domain != "":
proxyOK, perr := deps.Proxy.RouteExists(ctx, domain)
switch {
case perr != nil:
slog.Warn("dockerfile: proxy check failed, forcing redeploy",
"workload", w.Name, "error", perr)
case !proxyOK:
slog.Info("dockerfile: proxy route missing, forcing redeploy", "workload", w.Name)
default:
return healUnchanged(deps, w, prev, latestSHA)
}
default:
return healUnchanged(deps, w, prev, latestSHA)
}
}
updateStatus(deps, w, "syncing", prev.LastCommitSHA, "")
publishEvent(deps, w, "syncing")
// Clone the repo into a temp dir. We always download the entire
// repo tree (folderPath = ""); a ContextPath subset is applied
// at build time, not at download time, so a Dockerfile in
// `./docker/Dockerfile` with `ContextPath=""` still works.
cloneDir, err := os.MkdirTemp("", "tf-build-"+idShort(w)+"-*")
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create clone dir: %v", err), token))
return fmt.Errorf("create clone dir: %w", err)
}
defer os.RemoveAll(cloneDir)
if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, "", cloneDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("download repo: %v", err), token))
return fmt.Errorf("download repo: %w", err)
}
// Resolve the build context (with symlink-aware escape check) and
// verify the Dockerfile is actually present before sending the
// build off to the daemon.
contextDir, err := resolveContextDir(cloneDir, cfg.ContextPath)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("resolve context: %v", err), token))
return fmt.Errorf("resolve context: %w", err)
}
if err := verifyDockerfileExists(contextDir, cfg.DockerfilePath); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(err.Error(), token))
return err
}
imageTag := imageTagFor(w)
updateStatus(deps, w, "building", latestSHA, "")
publishEvent(deps, w, "building")
// Bridge per-line build output onto the event bus so /api/events
// subscribers (the dashboard's live tail) can show progress while
// the daemon chugs. The bus is non-blocking — slow subscribers drop
// events rather than backpressure the build — so this is safe to
// call from the hot scan loop.
logFn := func(line string) {
publishBuildLog(deps, w, line)
}
if err := deps.Docker.BuildImageAt(ctx, contextDir, cfg.DockerfilePath, imageTag, logFn); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("docker build: %v", err), token))
return fmt.Errorf("docker build: %w", err)
}
env := buildEnv(deps, w.ID)
containerPort := strconv.Itoa(cfg.Port)
settings, err := deps.Store.GetSettings()
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("get settings: %v", err), token))
return fmt.Errorf("get settings: %w", err)
}
networkName := settings.Network
networkID, err := deps.Docker.EnsureNetwork(ctx, networkName)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("ensure network: %v", err), token))
return fmt.Errorf("ensure network: %w", err)
}
containerName := containerNameFor(w)
// Per-face proxy labels (Traefik consumes these; NPM ignores them).
labels := map[string]string{}
if domain != "" {
if l := deps.Proxy.ContainerLabels(domain, cfg.Port); l != nil {
for k, v := range l {
labels[k] = v
}
}
}
cc := docker.ContainerConfig{
Name: containerName,
Image: imageTag,
Env: env,
ExposedPorts: []string{containerPort + "/tcp"},
NetworkName: networkName,
NetworkID: networkID,
Labels: labels,
WorkloadID: w.ID,
// Dockerfile workloads are tagged as "build" so the dashboard
// and any filtered query can distinguish them from static sites
// (which serve files) and image-source containers (which pull
// pre-built images from a registry).
WorkloadKind: string(store.WorkloadKindBuild),
Role: "",
}
containerID, err := deps.Docker.CreateContainer(ctx, cc)
if err != nil {
// Name conflict — best-effort cleanup of any prior container
// (by ID first; by name as a fallback) and one retry.
if prevContainerID != "" {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
removeContainerByName(ctx, deps, containerName)
containerID, err = deps.Docker.CreateContainer(ctx, cc)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create container: %v", err), token))
return fmt.Errorf("create container: %w", err)
}
}
if err := deps.Docker.StartContainer(ctx, containerID); err != nil {
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("start container: %v", err), token))
return fmt.Errorf("start container: %w", err)
}
// Brief health-check window — catch crash-on-boot. ctx-aware so a
// cancelled deploy returns promptly. On failure surface the tail
// of the container's logs as the error reason; that's almost
// always what the operator needs to debug.
select {
case <-ctx.Done():
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check")
return ctx.Err()
case <-time.After(healthCheckDelay):
}
running, runErr := deps.Docker.IsContainerRunning(ctx, containerID)
if runErr != nil || !running {
logMsg := "container exited immediately after start"
if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "40"); logErr == nil {
buf, _ := io.ReadAll(logs)
logs.Close()
if len(buf) > 0 {
// Pass `env` so any decrypted KEY=VALUE pair that the
// container's startup output happens to echo (think
// `RUN echo $DB_PASSWORD` in a debug Dockerfile) is
// redacted before it lands in the operator-visible
// last_error field.
logMsg = sanitizeErrorWithSecrets(string(buf), token, env)
}
}
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, logMsg)
return fmt.Errorf("container not running: %s", logMsg)
}
// Resolve proxy target: in-network DNS by default, NPM-remote
// override uses (settings.ServerIP, hostPort).
forwardHost := containerName
forwardPort := cfg.Port
if settings.NpmRemote && settings.ProxyProvider == "npm" {
if settings.ServerIP != "" {
hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp")
if hpErr != nil {
slog.Warn("dockerfile: could not get host port for remote NPM",
"workload", w.Name, "error", hpErr)
} else {
forwardHost = settings.ServerIP
forwardPort = int(hostPort)
}
}
}
// Configure proxy if a domain is set. Replace any prior route
// in-place so traffic shifts atomically over to the new container.
proxyRouteID := prevProxyRouteID
if domain != "" {
if prevProxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, prevProxyRouteID)
}
routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{
SSLCertificateID: settings.SSLCertificateID,
})
if rerr != nil {
slog.Warn("dockerfile: failed to configure proxy",
"workload", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr)
} else {
proxyRouteID = routeID
slog.Info("dockerfile: proxy configured",
"workload", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID)
}
}
// Drop the previous container only after the new one is healthy
// + routed. Different-ID-than-previous tells us we created a
// fresh one (vs returning the same ID via UpsertContainer reuse).
if prevContainerID != "" && prevContainerID != containerID {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
// Single transactional write of new state + container metadata.
// On failure: tear down the just-created container + proxy route
// so we don't leave orphans behind for the next deploy to trip
// over.
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.LastCommitSHA = latestSHA
rs.LastSyncAt = store.Now()
rs.LastError = ""
rs.Status = "deployed"
c.ContainerID = containerID
c.ProxyRouteID = proxyRouteID
c.Subdomain = domain
c.State = "running"
c.Port = cfg.Port
c.ImageRef = imageTag
}); err != nil {
slog.Error("dockerfile: failed to persist deploy state — rolling back",
"workload", w.Name, "error", err)
if proxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, proxyRouteID)
}
deps.Docker.StopContainer(ctx, containerID, 10)
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token))
return fmt.Errorf("persist deploy state: %w", err)
}
publishEvent(deps, w, "deployed")
dispatchBuildNotification(deps, w, domain, "deployed", "")
slog.Info("dockerfile deployed",
"workload", w.Name,
"sha", shortSHA(latestSHA),
"image", imageTag)
return nil
}
// updateStatus writes the runtime-state status/error/commit and (on
// terminal states) fires the side effects the static plugin's helper
// does: failures land in the event log, and a "deployed" or "failed"
// transition dispatches an outbound notification.
//
// The deploy success path calls saveState directly with the full
// container metadata; this helper covers failure / intermediate
// transitions where only state moves.
func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = status
rs.LastError = errMsg
if commitSHA != "" {
rs.LastCommitSHA = commitSHA
}
switch status {
case "deployed":
c.State = "running"
case "stopped":
c.State = "stopped"
case "failed":
c.State = "failed"
case "syncing", "building":
// Don't churn the container row's state during in-progress
// build/sync — leave whatever the previous deploy left.
}
}); err != nil {
slog.Error("dockerfile: failed to update status",
"id", w.ID, "status", status, "error", err)
}
if status == "failed" {
publishEvent(deps, w, "failed: "+errMsg)
}
if status == "deployed" || status == "failed" {
dispatchBuildNotification(deps, w, primaryDomain(deps, w), status, errMsg)
}
}
// dispatchBuildNotification fans the build event out to every
// configured notification route for the workload. Multi-destination
// fan-out (workload_notifications rows + legacy single URL + global
// settings fallback) is centralised in plugin.DispatchNotificationForWorkload
// so the routing rules are identical across source kinds.
func dispatchBuildNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
eventType := "build_success"
if status == "failed" {
eventType = "build_failure"
}
siteURL := ""
if domain != "" {
siteURL = "https://" + domain
}
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
Error: errMsg,
})
}
// publishEvent emits a status event on the bus AND persists an
// event_log row. Message shape mirrors the static plugin
// ("Build %q: %s") so the dashboard's audit feed reads consistently
// across both kinds.
func publishEvent(deps plugin.Deps, w plugin.Workload, status string) {
severity := "info"
if strings.HasPrefix(status, "failed") {
severity = "error"
}
message := fmt.Sprintf("Build %q: %s", w.Name, status)
metaBytes, err := json.Marshal(map[string]string{
"workload_id": w.ID,
"workload_name": w.Name,
"status": status,
})
if err != nil {
slog.Error("dockerfile: marshal event metadata", "error", err)
metaBytes = []byte("{}")
}
metadata := string(metaBytes)
evt, err := deps.Store.InsertEvent(store.EventLog{
Source: "dockerfile",
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
slog.Error("dockerfile: failed to persist event log", "error", err)
return
}
deps.Events.Publish(events.Event{
Type: events.EventLog,
Payload: events.EventLogPayload{
ID: evt.ID,
Source: "dockerfile",
Severity: severity,
Message: message,
Metadata: metadata,
CreatedAt: evt.CreatedAt,
},
})
}
// publishBuildLog emits one EventBuildLog per non-empty daemon "stream"
// line. The trailing newline the daemon emits per line is trimmed so the
// UI can render each event as its own row without smuggled blanks.
// Strictly best-effort: the bus drops events under backpressure (slow
// subscriber, no subscriber at all) and never blocks the build loop.
func publishBuildLog(deps plugin.Deps, w plugin.Workload, line string) {
trimmed := strings.TrimRight(line, "\r\n")
if trimmed == "" {
return
}
deps.Events.Publish(events.Event{
Type: events.EventBuildLog,
Payload: events.BuildLogPayload{
WorkloadID: w.ID,
Line: trimmed,
Stream: "stdout",
},
})
}
// healUnchanged is the no-rebuild short-circuit result: the SHA matches and
// the live container + proxy are healthy, so there is nothing to deploy. If a
// prior transient failure left the persisted status as something other than
// "deployed", repair it so the dashboard reflects reality and we stop treating
// a healthy workload as failed. We heal via saveState directly (NOT
// updateStatus) so this reconciliation does not fire a spurious build-success
// notification on every poll.
func healUnchanged(deps plugin.Deps, w plugin.Workload, prev runtimeState, latestSHA string) error {
slog.Info("dockerfile: no changes", "workload", w.Name, "sha", shortSHA(latestSHA))
if prev.Status == "deployed" {
return nil
}
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "deployed"
rs.LastError = ""
c.State = "running"
}); err != nil {
slog.Warn("dockerfile: failed to heal stale status to deployed",
"workload", w.Name, "error", err)
}
return nil
}
// removeContainerByName enumerates Docker's view and best-effort drops
// EVERY matching container so a name conflict in CreateContainer is
// recoverable. Container names are unique per daemon, but the recovery
// path exists precisely because a conflict occurred — a prior partial
// deploy can leave more than one matching artifact, so we must not stop
// at the first. Mirrors the static plugin's helper of the same name.
func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) {
containers, err := deps.Docker.ListContainers(ctx, nil)
if err != nil {
return
}
for _, c := range containers {
if c.Name == name {
deps.Docker.StopContainer(ctx, c.ID, 10)
deps.Docker.RemoveContainer(ctx, c.ID, true)
}
}
}
// primaryDomain mirrors the static plugin's helper of the same name —
// derives an FQDN from the workload's first enabled public face, with
// the same bare-subdomain + settings.Domain fall-through.
func primaryDomain(deps plugin.Deps, w plugin.Workload) string {
for _, f := range w.PublicFaces {
if f.Subdomain == "" && f.Domain == "" {
continue
}
switch {
case f.Subdomain != "" && f.Domain != "":
return f.Subdomain + "." + f.Domain
case f.Subdomain == "" && f.Domain != "":
return f.Domain
case f.Subdomain != "" && f.Domain == "":
settings, err := deps.Store.GetSettings()
if err != nil || settings.Domain == "" {
return f.Subdomain
}
return f.Subdomain + "." + settings.Domain
}
}
return ""
}
// shortSHA truncates a commit SHA for log lines. Keeps the deploy log
// readable without losing the "is this the same commit?" signal.
func shortSHA(sha string) string {
if len(sha) > 8 {
return sha[:8]
}
return sha
}
@@ -0,0 +1,131 @@
// Package dockerfile implements the "dockerfile" source: a git-repo-backed
// deployable that builds a Docker image from a user-supplied Dockerfile
// and runs one container. This is the "self-hosted Vercel" Source —
// users point at a Git repo containing a Dockerfile and Tinyforge
// handles clone → build → run → proxy in one shot, with no external CI
// pipeline.
//
// Architecturally the plugin sits between `static` (clones a Git repo,
// builds an image, runs one container) and `image` (richer runtime
// shape: ports, healthcheck, env, volumes). The deploy pipeline mirrors
// static — same git-fetch, same image-tag/container-name shape, same
// container-row state persistence — but the build step uses the
// operator's Dockerfile instead of generating one.
//
// The full pipeline is implemented inline in this package
// (deploy.go / teardown.go / reconcile.go) so a new dockerfile source
// kind is usable immediately on init() — no separate registration step
// in the deployer.
package dockerfile
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// Config is the per-workload source config blob. Mirrors the shape of
// the static plugin's Config so the UI wizard can largely reuse the
// existing Git-discovery + branch-picker + repo-picker components.
//
// Build-side fields:
//
// - DockerfilePath: path to the Dockerfile *within the context*
// directory. Defaults to "Dockerfile". Use e.g. "docker/Dockerfile"
// when the operator's repo keeps Dockerfiles in a subfolder.
// - ContextPath: subfolder of the cloned repo to use as the build
// context. Defaults to "" (repo root). Use e.g. "./api" when the
// repo's Dockerfile lives next to a backend service in a monorepo.
//
// Runtime-side fields:
//
// - Port: container port the workload listens on. Required.
// - Healthcheck: optional curl-style probe; empty disables.
//
// Env vars and volume mounts are handled out-of-band via the
// workload_env and workload_volumes tables, mirroring the image source.
type Config struct {
Provider string `json:"provider"` // "gitea" | "github" | "gitlab"; "" = autodetect
BaseURL string `json:"base_url"` // e.g. https://git.example.com
RepoOwner string `json:"repo_owner"`
RepoName string `json:"repo_name"`
Branch string `json:"branch"`
ContextPath string `json:"context_path"` // path within repo (root by default)
DockerfilePath string `json:"dockerfile_path"` // relative to context_path; "Dockerfile" by default
AccessToken string `json:"access_token"` // encrypted; optional for public repos
Port int `json:"port"`
Healthcheck string `json:"healthcheck,omitempty"`
}
type source struct{}
// Eager registration — the deploy pipeline lives entirely inside this
// package, so the kind is usable as soon as init() fires.
func init() { plugin.RegisterSource(&source{}) }
func (*source) Kind() string { return "dockerfile" }
func (*source) SchemaSample() any {
return Config{
Provider: "gitea",
BaseURL: "https://git.example.com",
RepoOwner: "owner",
RepoName: "myservice",
Branch: "main",
ContextPath: "",
DockerfilePath: "Dockerfile",
Port: 8080,
}
}
// Validate rejects obviously-malformed configs before the deploy
// pipeline materializes a temp dir, downloads a repo, and burns
// minutes of build time on input that was never going to work.
func (*source) Validate(cfg json.RawMessage) error {
var c Config
if len(cfg) == 0 {
return fmt.Errorf("dockerfile source: config is required")
}
if err := json.Unmarshal(cfg, &c); err != nil {
return fmt.Errorf("dockerfile source: invalid json: %w", err)
}
if strings.TrimSpace(c.RepoOwner) == "" || strings.TrimSpace(c.RepoName) == "" {
return fmt.Errorf("dockerfile source: repo_owner and repo_name are required")
}
if c.Port <= 0 || c.Port > 65535 {
return fmt.Errorf("dockerfile source: port must be between 1 and 65535 (got %d)", c.Port)
}
// Defense in depth: a leading "/" or any ".." segment in
// DockerfilePath / ContextPath would escape the build context. The
// plugin's deploy() does its own normalization too; rejecting here
// gives the operator a clear error at save-time instead of a
// confusing "no such file" mid-build.
for _, p := range []string{c.DockerfilePath, c.ContextPath} {
if p == "" {
continue
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("dockerfile source: %q must be relative", p)
}
if strings.Contains(p, "..") {
return fmt.Errorf("dockerfile source: %q must not contain '..'", p)
}
}
return nil
}
func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
return deploy(ctx, deps, w, intent)
}
func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
return teardown(ctx, deps, w)
}
func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
return reconcile(ctx, deps, w)
}
@@ -0,0 +1,288 @@
package dockerfile
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// ── Source interface plumbing ───────────────────────────────────────
func TestSource_Kind(t *testing.T) {
if (&source{}).Kind() != "dockerfile" {
t.Fatalf("Kind = %q, want \"dockerfile\"", (&source{}).Kind())
}
}
func TestSource_Registered_AtInit(t *testing.T) {
// init() runs once on import; we just verify the registry returns
// our concrete kind. A failure here is a regression of the global
// plugin.RegisterSource path or our package-level init.
got, err := plugin.GetSource("dockerfile")
if err != nil {
t.Fatalf("GetSource(dockerfile): %v", err)
}
if got.Kind() != "dockerfile" {
t.Fatalf("registered source has wrong kind: %q", got.Kind())
}
}
func TestSource_SchemaSample_RoundTrips(t *testing.T) {
s := (&source{}).SchemaSample()
raw, err := json.Marshal(s)
if err != nil {
t.Fatalf("marshal sample: %v", err)
}
if err := (&source{}).Validate(raw); err != nil {
t.Fatalf("Validate(sample) = %v, want nil", err)
}
}
// ── Validate ────────────────────────────────────────────────────────
func TestValidate_RejectsEmpty(t *testing.T) {
if err := (&source{}).Validate(nil); err == nil {
t.Fatal("expected error on empty config, got nil")
}
}
func TestValidate_RejectsMissingRepo(t *testing.T) {
cases := []Config{
{RepoName: "x", Port: 80}, // owner missing
{RepoOwner: "y", Port: 80}, // name missing
{RepoOwner: " ", RepoName: "x", Port: 80}, // owner whitespace-only
}
for i, c := range cases {
raw, _ := json.Marshal(c)
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("case %d: expected error, got nil", i)
}
}
}
func TestValidate_RejectsBadPort(t *testing.T) {
for _, port := range []int{0, -1, 70000} {
raw, _ := json.Marshal(Config{RepoOwner: "a", RepoName: "b", Port: port})
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("port %d: expected error, got nil", port)
}
}
}
func TestValidate_RejectsPathEscape(t *testing.T) {
cases := []Config{
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "/etc/passwd"},
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "../../etc/passwd"},
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "../../"},
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "/etc"},
}
for i, c := range cases {
raw, _ := json.Marshal(c)
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("case %d: expected path-escape rejection, got nil", i)
}
}
}
func TestValidate_AcceptsValid(t *testing.T) {
raw, _ := json.Marshal(Config{
RepoOwner: "owner",
RepoName: "repo",
Port: 8080,
DockerfilePath: "docker/Dockerfile",
ContextPath: "services/api",
})
if err := (&source{}).Validate(raw); err != nil {
t.Fatalf("Validate(valid) = %v", err)
}
}
// ── Naming helpers ──────────────────────────────────────────────────
func TestNaming_SameNameDifferentIDs_NoCollision(t *testing.T) {
a := plugin.Workload{ID: "aaaaaaaa-rest", Name: "svc"}
b := plugin.Workload{ID: "bbbbbbbb-rest", Name: "svc"}
if containerNameFor(a) == containerNameFor(b) {
t.Errorf("container names collide: %q", containerNameFor(a))
}
if imageTagFor(a) == imageTagFor(b) {
t.Errorf("image tags collide: %q", imageTagFor(a))
}
}
func TestNaming_ShortIDsPassThrough(t *testing.T) {
w := plugin.Workload{ID: "abc", Name: "tiny"}
if !strings.HasSuffix(containerNameFor(w), "-abc") {
t.Errorf("container name lost short id: %q", containerNameFor(w))
}
}
// ── Context + Dockerfile resolution ─────────────────────────────────
func TestResolveContextDir_Empty_ReturnsRoot(t *testing.T) {
dir := t.TempDir()
got, err := resolveContextDir(dir, "")
if err != nil {
t.Fatalf("resolveContextDir: %v", err)
}
if real, _ := filepath.EvalSymlinks(dir); got != real && got != dir {
t.Errorf("got %q, want %q (or symlink-resolved equivalent)", got, dir)
}
}
func TestResolveContextDir_Subfolder_OK(t *testing.T) {
dir := t.TempDir()
sub := filepath.Join(dir, "api")
if err := os.MkdirAll(sub, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
got, err := resolveContextDir(dir, "api")
if err != nil {
t.Fatalf("resolveContextDir: %v", err)
}
if !strings.HasSuffix(got, "api") {
t.Errorf("got %q, expected suffix 'api'", got)
}
}
func TestResolveContextDir_NonexistentSubfolder(t *testing.T) {
dir := t.TempDir()
if _, err := resolveContextDir(dir, "missing"); err == nil {
t.Fatal("expected error for missing subfolder")
}
}
func TestResolveContextDir_RejectsEscape(t *testing.T) {
dir := t.TempDir()
// resolveContextDir is the second wall — Validate is the first.
// We pass an absolute escape via a synthesized symlink. Even if
// the user bypasses Validate (e.g. by direct DB edit), this must
// still reject.
outside := t.TempDir()
link := filepath.Join(dir, "escape")
if err := os.Symlink(outside, link); err != nil {
t.Skipf("symlink unsupported in this environment: %v", err)
}
if _, err := resolveContextDir(dir, "escape"); err == nil {
t.Fatal("expected escape-path rejection")
}
}
func TestVerifyDockerfileExists_Present(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte("FROM scratch\n"), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
if err := verifyDockerfileExists(dir, ""); err != nil {
t.Fatalf("verifyDockerfileExists(default) = %v, want nil", err)
}
}
func TestVerifyDockerfileExists_Missing(t *testing.T) {
dir := t.TempDir()
if err := verifyDockerfileExists(dir, ""); err == nil {
t.Fatal("expected error for missing Dockerfile")
}
}
func TestVerifyDockerfileExists_CustomPath(t *testing.T) {
dir := t.TempDir()
if err := os.MkdirAll(filepath.Join(dir, "docker"), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "docker", "Dockerfile.prod"), []byte("FROM scratch\n"), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
if err := verifyDockerfileExists(dir, "docker/Dockerfile.prod"); err != nil {
t.Fatalf("verifyDockerfileExists(custom) = %v, want nil", err)
}
}
func TestVerifyDockerfileExists_RejectsAbsolutePath(t *testing.T) {
dir := t.TempDir()
if err := verifyDockerfileExists(dir, "/etc/passwd"); err == nil {
t.Fatal("expected error for absolute dockerfile path")
}
}
// ── Sanitiser ───────────────────────────────────────────────────────
func TestSanitizeError_RedactsToken(t *testing.T) {
tok := "ghp_supersecret"
got := sanitizeError("401 from gitea token="+tok+" ok", tok)
if strings.Contains(got, tok) {
t.Errorf("token leaked: %q", got)
}
if !strings.Contains(got, "[REDACTED]") {
t.Errorf("missing [REDACTED] marker: %q", got)
}
}
func TestSanitizeError_CollapsesWhitespace(t *testing.T) {
got := sanitizeError("a\nb\rc\td", "")
if strings.ContainsAny(got, "\n\r\t") {
t.Errorf("did not collapse: %q", got)
}
}
func TestSanitizeError_TruncatesUTF8Safe(t *testing.T) {
// 1000 copies of a 2-byte rune = 2000 bytes, well over the 240
// cap. Output must remain valid UTF-8 (no torn rune at the cap).
long := strings.Repeat("é", 1000)
got := sanitizeError(long, "")
if !strings.HasSuffix(got, "…") {
t.Errorf("missing ellipsis: %q", got)
}
// Walk the result: every byte should be either an ASCII char or
// part of a complete UTF-8 sequence. utf8.ValidString is the
// canonical guard but a simple "ends on rune boundary" check
// suffices for this fixture.
if !isValidUTF8Slice([]byte(got)) {
t.Errorf("truncation produced broken UTF-8: %q", got)
}
}
func isValidUTF8Slice(b []byte) bool {
for i := 0; i < len(b); {
switch {
case b[i] < 0x80:
i++
case b[i] < 0xC0:
return false // continuation byte at sequence start
case b[i] < 0xE0:
if i+1 >= len(b) {
return false
}
i += 2
case b[i] < 0xF0:
if i+2 >= len(b) {
return false
}
i += 3
default:
if i+3 >= len(b) {
return false
}
i += 4
}
}
return true
}
// ── State row ID ────────────────────────────────────────────────────
func TestContainerRowID_Deterministic(t *testing.T) {
w := plugin.Workload{ID: "abcd1234-rest"}
a := containerRowID(w)
b := containerRowID(w)
if a != b {
t.Errorf("containerRowID not deterministic: %q vs %q", a, b)
}
if !strings.HasSuffix(a, ":dockerfile") {
t.Errorf("containerRowID missing suffix: %q", a)
}
}
@@ -0,0 +1,37 @@
package dockerfile
import (
"log/slog"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// buildEnv flattens workload_env rows into the KEY=VALUE list Docker
// expects. Mirrors the static plugin's env helper exactly so the two
// plugins handle decrypt failures the same way: log + skip the one
// entry rather than fail the deploy. Bricking a build because one
// rotated key missed an env entry would be worse than running with
// the variable unset and a single warning in the operator's log.
func buildEnv(deps plugin.Deps, workloadID string) []string {
rows, err := deps.Store.ListWorkloadEnv(workloadID)
if err != nil {
slog.Warn("dockerfile source: list workload env", "workload", workloadID, "error", err)
return nil
}
out := make([]string, 0, len(rows))
for _, e := range rows {
value := e.Value
if e.Encrypted {
decrypted, err := crypto.Decrypt(deps.EncKey, e.Value)
if err != nil {
slog.Warn("dockerfile source: decrypt env value",
"workload", workloadID, "key", e.Key, "error", err)
continue
}
value = decrypted
}
out = append(out, e.Key+"="+value)
}
return out
}
@@ -0,0 +1,141 @@
package dockerfile
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
)
// resolveContextDir picks the directory the Docker build context will
// be packed from, defensively. Returns an error rather than a directory
// outside the cloned tree even if ContextPath contains a tricky
// sequence — Validate already rejects ".." and leading "/", but
// EvalSymlinks here is the second wall.
//
// ctx may be "" (use cloneRoot as-is) or a relative subpath like
// "./api" or "services/api".
func resolveContextDir(cloneRoot, ctx string) (string, error) {
cloneRoot, err := filepath.Abs(cloneRoot)
if err != nil {
return "", fmt.Errorf("abs cloneRoot: %w", err)
}
if real, err := filepath.EvalSymlinks(cloneRoot); err == nil {
cloneRoot = real
}
if ctx == "" || ctx == "." || ctx == "./" {
return cloneRoot, nil
}
candidate := filepath.Join(cloneRoot, filepath.FromSlash(ctx))
candidate, err = filepath.Abs(candidate)
if err != nil {
return "", fmt.Errorf("abs candidate: %w", err)
}
// Resolve symlinks BEFORE the prefix check so a planted symlink
// inside the clone cannot escape the build context.
if real, err := filepath.EvalSymlinks(candidate); err == nil {
candidate = real
}
if candidate != cloneRoot && !strings.HasPrefix(candidate, cloneRoot+string(filepath.Separator)) {
return "", fmt.Errorf("context path %q escapes clone root", ctx)
}
info, err := os.Stat(candidate)
if err != nil {
return "", fmt.Errorf("stat context_path %q: %w", ctx, err)
}
if !info.IsDir() {
return "", fmt.Errorf("context_path %q is not a directory", ctx)
}
return candidate, nil
}
// verifyDockerfileExists checks that the named Dockerfile is present in
// the resolved context. Returns a focused error for the operator instead
// of letting the daemon error out with a less obvious message later.
//
// dockerfilePath is the value from Config.DockerfilePath — relative to
// the context dir, "Dockerfile" by default.
func verifyDockerfileExists(contextDir, dockerfilePath string) error {
if dockerfilePath == "" {
dockerfilePath = "Dockerfile"
}
if strings.HasPrefix(dockerfilePath, "/") || strings.Contains(dockerfilePath, "..") {
return fmt.Errorf("dockerfile_path %q must be relative and contain no '..'", dockerfilePath)
}
full := filepath.Join(contextDir, filepath.FromSlash(dockerfilePath))
info, err := os.Stat(full)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("Dockerfile not found at %s/%s", filepath.Base(contextDir), dockerfilePath)
}
return fmt.Errorf("stat Dockerfile %q: %w", dockerfilePath, err)
}
if info.IsDir() {
return fmt.Errorf("dockerfile_path %q points at a directory, not a file", dockerfilePath)
}
return nil
}
// sanitizeError clamps an error string before it lands in
// containers.extra_json (last_error) or echoes through an outbound
// notification webhook. Mirrors the static-plugin helper of the same
// name so both plugins agree on the surface area they expose to
// operators.
func sanitizeError(msg, accessToken string) string {
return sanitizeErrorWithSecrets(msg, accessToken, nil)
}
// sanitizeErrorWithSecrets is the dockerfile-plugin-specific extension:
// when capturing container build/runtime logs into last_error we ALSO
// need to redact decrypted env-var values, because a malicious or
// debug-laden Dockerfile can `RUN echo $SECRET` and land a runtime
// secret in operator-readable state via /api/workloads/{id}/runtime-state.
//
// envKV is the same []string the docker client receives — entries shaped
// "KEY=VALUE". We split on the first '=' and redact every non-empty
// VALUE longer than 3 chars (shorter values produce too many false-
// positive substring matches against words like "is" / "of").
func sanitizeErrorWithSecrets(msg, accessToken string, envKV []string) string {
if msg == "" {
return ""
}
if accessToken != "" {
msg = strings.ReplaceAll(msg, accessToken, "[REDACTED]")
}
for _, kv := range envKV {
eq := strings.IndexByte(kv, '=')
if eq < 0 {
continue
}
value := kv[eq+1:]
if len(value) < 4 {
continue
}
msg = strings.ReplaceAll(msg, value, "[REDACTED]")
}
msg = strings.Map(func(r rune) rune {
switch r {
case '\n', '\r', '\t':
return ' '
}
return r
}, msg)
const maxLen = 240
if len(msg) > maxLen {
// Rune-aware truncation: walk back to the previous rune
// boundary so multi-byte chars at the cap don't tear.
cut := maxLen
for cut > 0 && !isRuneStart(msg[cut]) {
cut--
}
msg = msg[:cut] + "…"
}
return msg
}
// isRuneStart reports whether b is a leading byte of a UTF-8 sequence.
// Used to walk back from a byte-offset cut to a rune boundary.
func isRuneStart(b byte) bool {
return b&0xC0 != 0x80
}
@@ -0,0 +1,32 @@
package dockerfile
import (
"fmt"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// idShort is the first 8 chars of the workload ID. Same shape as the
// static plugin — workload names are not UNIQUE in the schema, the ID
// short suffix is what keeps two same-named workloads from clobbering
// each other's container/image artifacts.
func idShort(w plugin.Workload) string {
if len(w.ID) < 8 {
return w.ID
}
return w.ID[:8]
}
// containerNameFor is the deterministic container name. Prefix `tf-build-`
// distinguishes a dockerfile-built container from `dw-site-` (static) and
// per-stage image names at a glance in `docker ps`.
func containerNameFor(w plugin.Workload) string {
return fmt.Sprintf("tf-build-%s-%s", w.Name, idShort(w))
}
// imageTagFor is the deterministic image tag the build step emits. Same
// shape as the container name so `docker images` shows the linkage at a
// glance.
func imageTagFor(w plugin.Workload) string {
return fmt.Sprintf("tf-build-%s-%s:latest", w.Name, idShort(w))
}
@@ -0,0 +1,72 @@
package dockerfile
import (
"context"
"log/slog"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// reconcile syncs the container row's state column with Docker reality
// for this workload's single container, and marks the runtime state as
// "failed" if the container is gone or has crashed. Same shape as the
// static plugin's reconcile — minimal, no automatic re-build on a
// missing container. The dashboard surfaces the failed status; the
// operator triggers redeploy explicitly.
//
// Auto-redeploy could be added later, but it should be gated on a
// per-workload toggle: a crash loop with auto-rebuild would burn CPU
// rebuilding the same broken commit forever.
func reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
st, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
if prevContainer == nil || prevContainer.ContainerID == "" {
return nil
}
running, err := deps.Docker.IsContainerRunning(ctx, prevContainer.ContainerID)
if err != nil {
// Most likely "no such container" — mark missing so the UI
// surfaces it; runtime status moves to "failed" so the
// dashboard and operator event triggers see the regression.
if uerr := deps.Store.UpdateContainerState(prevContainer.ID, "missing"); uerr != nil {
slog.Warn("dockerfile: mark missing", "workload", w.Name, "error", uerr)
}
if st.Status == "deployed" {
if uerr := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "failed"
rs.LastError = "container not found"
c.State = "missing"
}); uerr != nil {
slog.Warn("dockerfile: persist missing-state", "workload", w.Name, "error", uerr)
}
publishEvent(deps, w, "failed: container not found")
}
return nil
}
desired := "running"
if !running {
desired = "stopped"
}
if prevContainer.State != desired {
if err := deps.Store.UpdateContainerState(prevContainer.ID, desired); err != nil {
slog.Warn("dockerfile: state sync", "workload", w.Name, "error", err)
}
}
if !running && st.Status == "deployed" {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "failed"
rs.LastError = "container stopped unexpectedly"
c.State = "stopped"
}); err != nil {
slog.Warn("dockerfile: persist crashed-state", "workload", w.Name, "error", err)
}
publishEvent(deps, w, "failed: container stopped unexpectedly")
}
return nil
}
@@ -0,0 +1,179 @@
package dockerfile
import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"sync"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// runtimeState is the per-workload state we persist inside the
// container row's extra_json blob. Mirrors the static plugin's
// runtimeState shape so anyone reading the DB can interpret the two
// kinds identically.
//
// LastImageDigest is the build's image ID — distinct from a registry
// digest (we never push) but useful for "did the build actually
// produce a different artifact?" diffing when we add caching later.
type runtimeState struct {
LastCommitSHA string `json:"last_commit_sha,omitempty"`
LastImageDigest string `json:"last_image_digest,omitempty"`
LastSyncAt string `json:"last_sync_at,omitempty"`
LastError string `json:"last_error,omitempty"`
Status string `json:"status,omitempty"`
}
// runtimeStateKeys lists every JSON field name owned by runtimeState.
// saveState strips these from the generic map before re-emitting so
// the typed values do not double-write under both their JSON tag and
// any subsequent extension's tag.
var runtimeStateKeys = []string{
"last_commit_sha", "last_image_digest", "last_sync_at", "last_error", "status",
}
// containerRowID is the deterministic container row ID. Stable across
// redeploys so saveState upserts in place.
func containerRowID(w plugin.Workload) string {
return w.ID + ":dockerfile"
}
// loadState returns the persisted runtime state plus the underlying
// container row. Both values are zero on first deploy.
func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Container, error) {
row, err := deps.Store.GetContainerByID(containerRowID(w))
if err != nil {
if errors.Is(err, store.ErrNotFound) {
return runtimeState{}, nil, nil
}
return runtimeState{}, nil, fmt.Errorf("dockerfile source: load state: %w", err)
}
st := runtimeState{}
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
if err := json.Unmarshal([]byte(row.ExtraJSON), &st); err != nil {
slog.Debug("dockerfile source: decode extra_json", "workload", w.ID, "error", err)
}
}
return st, &row, nil
}
// saveLocks serializes per-workload RMW of the container row. Same
// pattern as the static plugin — SQLite's MaxOpenConns=1 serializes
// statements but not the caller's read-then-write intent, so two
// concurrent deploys for the same workload could stomp each other's
// container_id / proxy_route_id without this mutex.
//
// Entries are reference-counted and removed only when the last holder
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
// use-after-delete hazard of deleting an entry on teardown: deleting a
// live entry while a concurrent saveState still holds (or is about to
// lock) it would let a fresh saveState mint a SECOND mutex for the same
// workload, losing the RMW serialization the lock exists to provide.
var saveLocks struct {
mu sync.Mutex
locks map[string]*saveLock
}
type saveLock struct {
mu sync.Mutex
refs int
}
// acquireSaveLock returns the per-workload lock (creating it on first use),
// registers this caller as a holder, and takes the lock. Pair with
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
// contend on the returned per-workload lock.
func acquireSaveLock(workloadID string) *saveLock {
saveLocks.mu.Lock()
if saveLocks.locks == nil {
saveLocks.locks = map[string]*saveLock{}
}
l, ok := saveLocks.locks[workloadID]
if !ok {
l = &saveLock{}
saveLocks.locks[workloadID] = l
}
l.refs++
saveLocks.mu.Unlock()
l.mu.Lock()
return l
}
// releaseSaveLock unlocks and drops the caller's reference, removing the map
// entry once no holders remain. Because refs is incremented under saveLocks.mu
// before the entry can be observed for deletion, an entry with a pending
// acquirer is never deleted.
func releaseSaveLock(workloadID string, l *saveLock) {
l.mu.Unlock()
saveLocks.mu.Lock()
l.refs--
if l.refs == 0 {
delete(saveLocks.locks, workloadID)
}
saveLocks.mu.Unlock()
}
// saveState upserts the container row, calling mutate so callers can
// adjust both the typed runtime state and the row's first-class fields
// in one transaction. Unknown keys in extra_json survive the round-trip
// so future writers can extend the blob without forcing this struct to
// grow.
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
lk := acquireSaveLock(w.ID)
defer releaseSaveLock(w.ID, lk)
prev, prevRow, err := loadState(deps, w)
if err != nil {
return err
}
row := store.Container{
ID: containerRowID(w),
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindBuild),
Host: "local",
}
if prevRow != nil {
row = *prevRow
}
generic := map[string]json.RawMessage{}
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
if err := json.Unmarshal([]byte(row.ExtraJSON), &generic); err != nil {
slog.Debug("dockerfile source: decode extra_json (generic)", "workload", w.ID, "error", err)
}
}
for _, k := range runtimeStateKeys {
delete(generic, k)
}
state := prev
mutate(&state, &row)
typedBytes, err := json.Marshal(state)
if err != nil {
return fmt.Errorf("dockerfile source: marshal state: %w", err)
}
typedMap := map[string]json.RawMessage{}
if err := json.Unmarshal(typedBytes, &typedMap); err != nil {
return fmt.Errorf("dockerfile source: re-decode typed state: %w", err)
}
for k, v := range typedMap {
generic[k] = v
}
merged, err := json.Marshal(generic)
if err != nil {
return fmt.Errorf("dockerfile source: marshal merged state: %w", err)
}
row.ExtraJSON = string(merged)
row.LastSeenAt = store.Now()
if err := deps.Store.UpsertContainer(row); err != nil {
return fmt.Errorf("dockerfile source: upsert container row: %w", err)
}
return nil
}
@@ -0,0 +1,51 @@
package dockerfile
import (
"context"
"errors"
"log/slog"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// teardown drops every artifact deploy created: the running container,
// the proxy route, the container index row. Idempotent — a workload
// that never deployed is a no-op.
//
// The built image tag is left in place: removing it would invalidate
// the docker build cache (next deploy of the same workload would
// rebuild from scratch). Operators can prune unused images via the
// existing Settings → Prune Images path.
func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
_, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
if prevContainer == nil {
return nil
}
// Proxy first so traffic stops landing on a container that is
// about to disappear.
if prevContainer.ProxyRouteID != "" {
if err := deps.Proxy.DeleteRoute(ctx, prevContainer.ProxyRouteID); err != nil {
slog.Warn("dockerfile: failed to remove proxy route", "workload", w.Name, "error", err)
}
}
if prevContainer.ContainerID != "" {
if err := deps.Docker.RemoveContainer(ctx, prevContainer.ContainerID, true); err != nil {
slog.Warn("dockerfile: failed to remove container", "workload", w.Name, "error", err)
}
}
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("dockerfile: failed to delete container row", "workload", w.Name, "error", err)
}
// The per-workload save-mutex is reference-counted (see state.go) and
// frees itself when the last holder releases, so teardown no longer
// deletes it explicitly — doing so could race a concurrent saveState
// and break the RMW serialization the lock provides.
return nil
}
@@ -444,22 +444,12 @@ func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg
}
// dispatchSiteNotification fires a site_sync_success or
// site_sync_failure event to the configured outbound webhook.
// Resolution: per-workload URL+secret first, then fall through to
// settings.notification_url/secret. Always best-effort.
// site_sync_failure event for the workload via the shared multi-route
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
// order (workload_notifications → legacy single URL → settings global)
// is identical to the dockerfile plugin's path so receivers see
// consistent fan-out behaviour across source kinds.
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
if deps.Notifier == nil {
return
}
settings, err := deps.Store.GetSettings()
if err != nil {
slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err)
return
}
url, secret, tier := resolveSiteTarget(w, settings)
if url == "" {
return
}
eventType := "site_sync_success"
if status == "failed" {
eventType = "site_sync_failure"
@@ -468,7 +458,7 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
if domain != "" {
siteURL = "https://" + domain
}
deps.Notifier.SendSigned(url, secret, tier, notify.Event{
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
@@ -476,16 +466,6 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
})
}
// resolveSiteTarget mirrors the legacy resolveSiteTarget helper but
// reads notification config off the workload row (where it now lives
// post-refactor) rather than the static_sites row.
func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) {
if w.NotificationURL != "" {
return w.NotificationURL, w.NotificationSecret, notify.TierSite
}
return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings
}
// publishEvent emits a static_site_status event on the bus AND
// persists an event_log row so the dashboard's audit trail picks it
// up. Message format ("Static site \"%s\": %s") is preserved verbatim
@@ -165,30 +165,42 @@ func TestContainerRowID_Deterministic(t *testing.T) {
}
}
func TestLockFor_ReturnsSameLockForSameWorkload(t *testing.T) {
// Suffix by t.Name() so the package-global saveLocks map cannot
// bleed key state between tests (or between -count=N runs).
func TestSaveLock_FreedWhenIdle(t *testing.T) {
// After the last holder releases, the reference-counted entry must be
// removed from the map so the lock table cannot grow without bound.
// Suffix by t.Name() so the package-global saveLocks map cannot bleed
// key state between tests (or between -count=N runs).
key := t.Name() + "-wid"
a := lockFor(key)
b := lockFor(key)
if a != b {
t.Fatalf("lockFor returned distinct locks for same workload: %p vs %p", a, b)
lk := acquireSaveLock(key)
saveLocks.mu.Lock()
_, present := saveLocks.locks[key]
saveLocks.mu.Unlock()
if !present {
t.Fatal("acquireSaveLock did not register the entry while held")
}
releaseSaveLock(key, lk)
saveLocks.mu.Lock()
_, stillPresent := saveLocks.locks[key]
saveLocks.mu.Unlock()
if stillPresent {
t.Fatal("releaseSaveLock left the entry behind after the last holder released")
}
}
func TestLockFor_ReturnsDistinctLocksForDifferentWorkloads(t *testing.T) {
a := lockFor(t.Name() + "-a")
b := lockFor(t.Name() + "-b")
if a == b {
t.Fatalf("lockFor returned same lock for different workloads: %p", a)
}
func TestSaveLock_DistinctWorkloadsDoNotSerialize(t *testing.T) {
// Two different workloads must be lockable at the same time. If they
// shared a mutex the second acquire would block forever (deadlock).
a := acquireSaveLock(t.Name() + "-a")
b := acquireSaveLock(t.Name() + "-b")
releaseSaveLock(t.Name()+"-b", b)
releaseSaveLock(t.Name()+"-a", a)
}
func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
// Two goroutines holding the same lock must run sequentially. The
// counter would race past 2 if locking were broken; with the lock,
// the increment is observed monotonically.
lk := lockFor(t.Name() + "-wid")
func TestSaveLock_SerializesConcurrentAcquisitions(t *testing.T) {
// Goroutines acquiring the same workload's lock must run sequentially.
// The counter would race past 1 if locking were broken; with the lock,
// peak in-flight stays at 1.
key := t.Name() + "-wid"
var (
wg sync.WaitGroup
mu sync.Mutex
@@ -199,8 +211,8 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(key)
defer releaseSaveLock(key, lk)
mu.Lock()
counter++
@@ -216,15 +228,15 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
}
wg.Wait()
if peak != 1 {
t.Fatalf("lockFor failed to serialize: peak in-flight = %d, want 1", peak)
t.Fatalf("acquireSaveLock failed to serialize: peak in-flight = %d, want 1", peak)
}
}
func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired in parallel must not panic on map
// access — exercises the outer-mutex protection inside lockFor.
// Each iteration uses a unique key so the test stresses the
// insertion path (the common case for "first deploy" callers).
func TestSaveLock_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired+released in parallel must not panic on map
// access — exercises the outer-mutex protection inside acquire/release.
// Each iteration uses a unique key so the test stresses the insertion +
// refcount-cleanup paths (the common case for "first deploy" callers).
prefix := t.Name() + "-"
var wg sync.WaitGroup
for i := 0; i < 50; i++ {
@@ -232,9 +244,9 @@ func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk := lockFor(prefix + strconv.Itoa(i))
lk.Lock()
lk.Unlock()
key := prefix + strconv.Itoa(i)
lk := acquireSaveLock(key)
releaseSaveLock(key, lk)
}()
}
wg.Wait()
+42 -14
View File
@@ -80,26 +80,55 @@ func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Contai
// container_id / proxy_route_id and orphaning Docker resources. The
// mutex caps the concurrency at 1 per workload; cross-workload
// parallelism is unaffected.
//
// Entries are reference-counted and removed only when the last holder
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
// use-after-delete hazard of deleting an entry on teardown: deleting a
// live entry while a concurrent saveState still holds (or is about to
// lock) it would let a fresh saveState mint a SECOND mutex for the same
// workload, losing the RMW serialization the lock exists to provide.
var saveLocks struct {
mu sync.Mutex
locks map[string]*sync.Mutex
locks map[string]*saveLock
}
// lockFor returns the per-workload mutex, creating it on first use.
// The outer mutex is held only briefly during map lookup; the returned
// per-workload lock is what callers actually contend on.
func lockFor(workloadID string) *sync.Mutex {
type saveLock struct {
mu sync.Mutex
refs int
}
// acquireSaveLock returns the per-workload lock (creating it on first use),
// registers this caller as a holder, and takes the lock. Pair with
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
// contend on the returned per-workload lock.
func acquireSaveLock(workloadID string) *saveLock {
saveLocks.mu.Lock()
defer saveLocks.mu.Unlock()
if saveLocks.locks == nil {
saveLocks.locks = map[string]*sync.Mutex{}
saveLocks.locks = map[string]*saveLock{}
}
m, ok := saveLocks.locks[workloadID]
l, ok := saveLocks.locks[workloadID]
if !ok {
m = &sync.Mutex{}
saveLocks.locks[workloadID] = m
l = &saveLock{}
saveLocks.locks[workloadID] = l
}
return m
l.refs++
saveLocks.mu.Unlock()
l.mu.Lock()
return l
}
// releaseSaveLock unlocks and drops the caller's reference, removing the map
// entry once no holders remain. Because refs is incremented under saveLocks.mu
// before the entry can be observed for deletion, an entry with a pending
// acquirer is never deleted.
func releaseSaveLock(workloadID string, l *saveLock) {
l.mu.Unlock()
saveLocks.mu.Lock()
l.refs--
if l.refs == 0 {
delete(saveLocks.locks, workloadID)
}
saveLocks.mu.Unlock()
}
// saveState upserts the container row, calling mutate so callers can
@@ -115,9 +144,8 @@ func lockFor(workloadID string) *sync.Mutex {
// Per-workload mutex serializes concurrent callers so two parallel
// Deploys can't read the same prior state and race their writes.
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
lk := lockFor(w.ID)
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(w.ID)
defer releaseSaveLock(w.ID, lk)
prev, prevRow, err := loadState(deps, w)
if err != nil {
@@ -185,14 +185,23 @@ func TestSaveState_RecoversFromInvalidExtraJSON(t *testing.T) {
deps, _ := testDeps(t)
w := plugin.Workload{ID: t.Name() + "-wid", Name: "site"}
// UpsertContainer now validates extra_json at the boundary, so this
// test seeds a valid row first and corrupts it via raw SQL to
// simulate a pre-existing bad row from an upgrade / external edit.
if err := deps.Store.UpsertContainer(store.Container{
ID: containerRowID(w),
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindSite),
Host: "local",
ExtraJSON: `{not json`,
ExtraJSON: `{}`,
}); err != nil {
t.Fatalf("seed bad row: %v", err)
t.Fatalf("seed row: %v", err)
}
if _, err := deps.Store.DB().Exec(
`UPDATE containers SET extra_json = ? WHERE id = ?`,
`{not json`, containerRowID(w),
); err != nil {
t.Fatalf("corrupt extra_json: %v", err)
}
err := saveState(deps, w, func(state *runtimeState, _ *store.Container) {
@@ -66,5 +66,8 @@ func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("static site: failed to delete container row", "site", w.Name, "error", err)
}
// The per-workload save-mutex is reference-counted (see state.go) and
// frees itself when the last holder releases, so teardown no longer
// deletes it explicitly — doing so could race a concurrent saveState.
return nil
}
+49 -6
View File
@@ -18,11 +18,19 @@ import (
// match the event repo). Mode controls whether branch pushes or tag
// pushes fire the deploy. Branch is exact-matched when Mode=="push";
// TagPattern is glob-matched when Mode=="tag".
//
// BranchPattern is the preview-deploy escape hatch: when non-empty in
// "push" mode it overrides Branch and matches the event branch as a glob
// (`feat/*`, `release-*`, `*` for "any branch"). The trigger returns an
// intent whose Metadata["preview_branch"] holds the matched branch — the
// dispatcher uses that signal to materialize an ephemeral per-branch
// child workload rather than redeploying the parent.
type Config struct {
Repo string `json:"repo"`
Mode string `json:"mode"` // "push" | "tag"
Branch string `json:"branch"`
TagPattern string `json:"tag_pattern"`
Repo string `json:"repo"`
Mode string `json:"mode"` // "push" | "tag"
Branch string `json:"branch"`
BranchPattern string `json:"branch_pattern"`
TagPattern string `json:"tag_pattern"`
}
type trigger struct{}
@@ -49,7 +57,15 @@ func (*trigger) Validate(cfg json.RawMessage) error {
}
switch c.Mode {
case "push":
// Branch is optional ("" means any branch).
// Branch is optional ("" means any branch). BranchPattern is
// validated as a path.Match glob if present; misconfigured
// patterns are rejected at the boundary rather than letting them
// fail silently inside Match.
if c.BranchPattern != "" {
if _, err := path.Match(c.BranchPattern, "probe"); err != nil {
return fmt.Errorf("git trigger: invalid branch_pattern %q: %w", c.BranchPattern, err)
}
}
case "tag":
pattern := c.TagPattern
if pattern == "" {
@@ -90,8 +106,24 @@ func (*trigger) Match(ctx context.Context, deps plugin.Deps, w plugin.Workload,
if evt.Git.Tag != "" {
meta["tag"] = evt.Git.Tag
}
// Preview-deploy signal: when BranchPattern is set AND the matched
// branch is NOT the configured baseline Branch, flag this dispatch
// for materialization as a per-branch child workload. The dispatcher
// reads preview_branch and decides whether to spawn a preview row;
// a baseline-branch push falls through to a normal redeploy of the
// template itself.
if cfg.Mode == "push" && cfg.BranchPattern != "" && evt.Git.Branch != "" && evt.Git.Branch != cfg.Branch {
meta["preview_branch"] = evt.Git.Branch
if evt.Git.Deleted {
meta["preview_deleted"] = "1"
}
}
reason := "git-push"
if meta["preview_deleted"] == "1" {
reason = "git-branch-deleted"
}
return &plugin.DeploymentIntent{
Reason: "git-push",
Reason: reason,
Reference: evt.Git.CommitSHA,
Metadata: meta,
TriggeredAt: time.Now().UTC(),
@@ -106,6 +138,17 @@ func refMatches(cfg Config, ref string) bool {
if !ok {
return false
}
// Pattern-mode preview filter: any branch whose name matches the
// glob is in scope. The baseline `cfg.Branch` is also allowed so
// pushes to the template's primary branch keep redeploying the
// template itself.
if cfg.BranchPattern != "" {
if cfg.Branch != "" && cfg.Branch == branch {
return true
}
matched, err := path.Match(cfg.BranchPattern, branch)
return err == nil && matched
}
return cfg.Branch == "" || cfg.Branch == branch
case "tag":
tag, ok := strings.CutPrefix(ref, "refs/tags/")
+12 -5
View File
@@ -56,14 +56,21 @@ type ImagePushEvent struct {
// GitEvent covers both push (commits) and tag-create flavors. Vendor is
// "gitea" | "github" | "gitlab" | "" (autodetected).
//
// Deleted is true when the push event reports a branch / tag was deleted.
// Used by the preview-deploy flow to tear down ephemeral per-branch
// workloads when a feature branch is removed upstream. Inferred from
// GitHub-style `deleted: true` and Gitea's identical convention; GitLab
// signals deletion via after-SHA zeros (parsed at vendor level).
type GitEvent struct {
Vendor string
Repo string // owner/name
Ref string // refs/heads/main or refs/tags/v1.2.3
Branch string // populated for branch refs
Tag string // populated for tag refs
Vendor string
Repo string // owner/name
Ref string // refs/heads/main or refs/tags/v1.2.3
Branch string // populated for branch refs
Tag string // populated for tag refs
CommitSHA string
Pusher string
Deleted bool
}
// ManualEvent represents a user-initiated deploy from the UI or API.
+239
View File
@@ -0,0 +1,239 @@
// Package preview implements branch-pattern preview deploys. A "template"
// workload is one whose git trigger has a BranchPattern configured; when
// an inbound push event names a branch other than the template's primary
// Branch, the dispatcher materializes (or reuses) a child workload via
// MaterializeForBranch and dispatches the deploy against the child. The
// child is then torn down on a matching branch-delete event.
//
// The package is intentionally narrow:
// - it does not know about Docker, the proxy, or any plugin internals
// - it operates over a Store interface so the webhook handler can mock
// it in tests
// - it owns the per-branch naming + subdomain mangling so the wiring
// code (trigger fan-out) stays a pure dispatch path
package preview
import (
"encoding/json"
"fmt"
"regexp"
"strings"
"github.com/alexei/tinyforge/internal/store"
)
// Store is the slice of the persistence layer the preview package needs.
// Defined locally so tests can fake it without dragging the full Store.
type Store interface {
GetWorkloadByID(id string) (store.Workload, error)
ListChildrenByParent(parentID string) ([]store.Workload, error)
CreateWorkload(w store.Workload) (store.Workload, error)
DeleteWorkload(id string) error
}
// branchSlugPattern strips characters that are unsafe inside a Docker
// container name, hostname label, or filesystem path. Compiled once.
var branchSlugPattern = regexp.MustCompile(`[^a-z0-9-]+`)
// slugifyBranch converts a git ref-component into a safe slug. Lowercase,
// hyphen-only, length-capped to 32 so name + slug fit inside the Docker
// 63-char container-name and 63-char DNS-label limits with room for the
// `tf-build-` prefix.
func slugifyBranch(branch string) string {
b := strings.ToLower(branch)
b = strings.ReplaceAll(b, "/", "-")
b = branchSlugPattern.ReplaceAllString(b, "-")
b = strings.Trim(b, "-")
if b == "" {
return "branch"
}
if len(b) > 32 {
b = strings.Trim(b[:32], "-")
if b == "" {
b = "branch"
}
}
return b
}
// findExistingPreview returns the child workload whose source_config
// already names `branch`, if any. Linear scan over the children list —
// fine because the bound is "branches a single team keeps open at once"
// which is in the dozens, not thousands.
func findExistingPreview(children []store.Workload, branch string) (store.Workload, bool) {
for _, c := range children {
var cfg struct {
Branch string `json:"branch"`
}
if c.SourceConfig != "" {
_ = json.Unmarshal([]byte(c.SourceConfig), &cfg)
}
if cfg.Branch == branch {
return c, true
}
}
return store.Workload{}, false
}
// patchSourceConfigBranch returns a copy of the template's source_config
// with the `branch` field replaced. Unknown keys round-trip so plugin-
// specific config (port, dockerfile path, storage settings, ...) survive.
// A malformed source_config is replaced rather than propagated so the
// preview workload has a clean baseline.
func patchSourceConfigBranch(sourceConfig, branch string) (string, error) {
if branch == "" {
return "", fmt.Errorf("preview: branch is empty")
}
m := map[string]json.RawMessage{}
if sourceConfig != "" && sourceConfig != "{}" {
if err := json.Unmarshal([]byte(sourceConfig), &m); err != nil {
m = map[string]json.RawMessage{}
}
}
enc, err := json.Marshal(branch)
if err != nil {
return "", fmt.Errorf("preview: encode branch: %w", err)
}
m["branch"] = enc
out, err := json.Marshal(m)
if err != nil {
return "", fmt.Errorf("preview: encode source_config: %w", err)
}
return string(out), nil
}
// patchPublicFacesSubdomain prefixes every public face's Subdomain with
// the branch slug so two preview deploys never collide on the same FQDN.
// Faces with no subdomain are left untouched — the operator clearly
// didn't want a per-branch host carved out for that face.
func patchPublicFacesSubdomain(publicFaces, slug string) (string, error) {
if publicFaces == "" || publicFaces == "[]" {
return publicFaces, nil
}
var faces []map[string]any
if err := json.Unmarshal([]byte(publicFaces), &faces); err != nil {
// Malformed faces MUST fail loudly: returning the template's faces
// verbatim would give the preview the SAME subdomains as the
// template, so the preview's proxy route would clobber the template's
// (the exact collision the slug prefix exists to prevent).
return "", fmt.Errorf("preview: parse public_faces: %w", err)
}
for _, f := range faces {
sub, ok := f["subdomain"].(string)
if !ok || sub == "" {
continue
}
f["subdomain"] = slug + "-" + sub
}
out, err := json.Marshal(faces)
if err != nil {
return "", fmt.Errorf("preview: re-encode public_faces: %w", err)
}
return string(out), nil
}
// IsPreviewChild reports whether child was materialized as a branch preview
// of template (vs. an operator-created stage-chain member that merely shares
// the parent link — both use parent_workload_id). It reverses the exact
// MaterializeForBranch naming formula — name == template.Name + "/" +
// slugifyBranch(child's branch) — so a hand-named stage workload under the
// same parent is never mistaken for a preview and cascade-deleted.
func IsPreviewChild(template, child store.Workload) bool {
if child.ParentWorkloadID != template.ID {
return false
}
var cfg struct {
Branch string `json:"branch"`
}
if child.SourceConfig != "" {
_ = json.Unmarshal([]byte(child.SourceConfig), &cfg)
}
if cfg.Branch == "" {
return false
}
return child.Name == template.Name+"/"+slugifyBranch(cfg.Branch)
}
// ListPreviewChildren returns every preview workload materialized from
// template. Used by the delete path to cascade-teardown previews so deleting
// a template does not orphan their containers, proxy routes, and rows.
func ListPreviewChildren(s Store, template store.Workload) ([]store.Workload, error) {
children, err := s.ListChildrenByParent(template.ID)
if err != nil {
return nil, fmt.Errorf("preview: list children: %w", err)
}
out := make([]store.Workload, 0, len(children))
for _, c := range children {
if IsPreviewChild(template, c) {
out = append(out, c)
}
}
return out, nil
}
// MaterializeForBranch returns the existing preview workload for
// (template, branch) or creates one if none exists. The new workload
// inherits the template's source kind, trigger kind, notification
// settings, and public faces (with the branch slug prefixed onto each
// subdomain). Idempotent: a second call with the same arguments returns
// the same workload row.
func MaterializeForBranch(s Store, template store.Workload, branch string) (store.Workload, error) {
if branch == "" {
return store.Workload{}, fmt.Errorf("preview: branch is required")
}
children, err := s.ListChildrenByParent(template.ID)
if err != nil {
return store.Workload{}, fmt.Errorf("preview: list children: %w", err)
}
if existing, ok := findExistingPreview(children, branch); ok {
return existing, nil
}
slug := slugifyBranch(branch)
newCfg, err := patchSourceConfigBranch(template.SourceConfig, branch)
if err != nil {
return store.Workload{}, err
}
newFaces, err := patchPublicFacesSubdomain(template.PublicFaces, slug)
if err != nil {
return store.Workload{}, err
}
// Webhook + notification secrets are NOT copied to the preview. The
// trigger dispatch reaches previews via the parent's trigger binding,
// not via a per-preview inbound webhook, so the preview never needs
// its own signing secret. Keeping these empty also stops the preview
// from masquerading as a first-class workload in webhook routes.
child := store.Workload{
Kind: template.Kind,
Name: template.Name + "/" + slug,
AppID: template.AppID,
SourceKind: template.SourceKind,
SourceConfig: newCfg,
TriggerKind: template.TriggerKind,
TriggerConfig: template.TriggerConfig,
PublicFaces: newFaces,
ParentWorkloadID: template.ID,
}
created, err := s.CreateWorkload(child)
if err != nil {
return store.Workload{}, fmt.Errorf("preview: create child: %w", err)
}
return created, nil
}
// FindPreviewForBranch looks up an existing preview without creating
// one. Returns (Workload{}, false, nil) when no preview exists. Errors
// only on a store failure.
func FindPreviewForBranch(s Store, templateID, branch string) (store.Workload, bool, error) {
if templateID == "" || branch == "" {
return store.Workload{}, false, nil
}
children, err := s.ListChildrenByParent(templateID)
if err != nil {
return store.Workload{}, false, fmt.Errorf("preview: list children: %w", err)
}
w, ok := findExistingPreview(children, branch)
return w, ok, nil
}
+200
View File
@@ -0,0 +1,200 @@
package preview
import (
"encoding/json"
"errors"
"strings"
"testing"
"github.com/alexei/tinyforge/internal/store"
)
// fakeStore is a minimal in-memory store satisfying the preview.Store
// interface. Tests verify business logic without the SQLite layer.
type fakeStore struct {
workloads map[string]store.Workload
createErr error
}
func newFakeStore() *fakeStore {
return &fakeStore{workloads: map[string]store.Workload{}}
}
func (f *fakeStore) GetWorkloadByID(id string) (store.Workload, error) {
w, ok := f.workloads[id]
if !ok {
return store.Workload{}, errors.New("not found")
}
return w, nil
}
func (f *fakeStore) ListChildrenByParent(parentID string) ([]store.Workload, error) {
out := []store.Workload{}
for _, w := range f.workloads {
if w.ParentWorkloadID == parentID {
out = append(out, w)
}
}
return out, nil
}
func (f *fakeStore) CreateWorkload(w store.Workload) (store.Workload, error) {
if f.createErr != nil {
return store.Workload{}, f.createErr
}
if w.ID == "" {
w.ID = "preview-" + w.Name
}
f.workloads[w.ID] = w
return w, nil
}
func (f *fakeStore) DeleteWorkload(id string) error {
delete(f.workloads, id)
return nil
}
func TestSlugifyBranch_StripsUnsafeChars(t *testing.T) {
cases := []struct {
in string
want string
}{
{"main", "main"},
{"Feature/User-Auth", "feature-user-auth"},
{"PR#42", "pr-42"},
{"release/v1.2.3", "release-v1-2-3"},
{"___", "branch"},
{strings.Repeat("a", 50), strings.Repeat("a", 32)},
}
for _, c := range cases {
got := slugifyBranch(c.in)
if got != c.want {
t.Errorf("slugifyBranch(%q) = %q, want %q", c.in, got, c.want)
}
}
}
func TestPatchSourceConfigBranch_PreservesUnknownKeys(t *testing.T) {
src := `{"port":3000,"dockerfile_path":"Dockerfile","branch":"main","provider":"github"}`
out, err := patchSourceConfigBranch(src, "feat/x")
if err != nil {
t.Fatalf("patch: %v", err)
}
var got map[string]any
if err := json.Unmarshal([]byte(out), &got); err != nil {
t.Fatalf("decode: %v", err)
}
if got["branch"] != "feat/x" {
t.Errorf("branch = %v, want feat/x", got["branch"])
}
if got["port"] == nil || got["dockerfile_path"] == nil || got["provider"] == nil {
t.Errorf("unknown keys dropped: %+v", got)
}
}
func TestPatchPublicFacesSubdomain_PrefixesSubdomains(t *testing.T) {
faces := `[{"subdomain":"app","domain":"example.com"},{"subdomain":"","domain":"raw.example.com"}]`
out, err := patchPublicFacesSubdomain(faces, "feat-x")
if err != nil {
t.Fatalf("patch: %v", err)
}
var got []map[string]any
if err := json.Unmarshal([]byte(out), &got); err != nil {
t.Fatalf("decode: %v", err)
}
if got[0]["subdomain"] != "feat-x-app" {
t.Errorf("first subdomain = %v, want feat-x-app", got[0]["subdomain"])
}
if got[1]["subdomain"] != "" {
t.Errorf("empty subdomain must stay empty, got %v", got[1]["subdomain"])
}
}
func TestMaterializeForBranch_CreatesNewWhenMissing(t *testing.T) {
fs := newFakeStore()
template := store.Workload{
ID: "tmpl-1",
Kind: "project",
Name: "myapp",
AppID: "app-1",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"main","port":3000}`,
TriggerKind: "git",
PublicFaces: `[{"subdomain":"www","domain":"x.test"}]`,
}
fs.workloads[template.ID] = template
child, err := MaterializeForBranch(fs, template, "feat/login")
if err != nil {
t.Fatalf("materialize: %v", err)
}
if child.ParentWorkloadID != template.ID {
t.Errorf("parent = %q, want %q", child.ParentWorkloadID, template.ID)
}
if !strings.Contains(child.Name, "feat-login") {
t.Errorf("name = %q, want it to include slug", child.Name)
}
var cfg map[string]any
if err := json.Unmarshal([]byte(child.SourceConfig), &cfg); err != nil {
t.Fatalf("decode child source_config: %v", err)
}
if cfg["branch"] != "feat/login" {
t.Errorf("child branch = %v, want feat/login", cfg["branch"])
}
if cfg["port"] == nil {
t.Errorf("child should inherit template port; got %+v", cfg)
}
var faces []map[string]any
if err := json.Unmarshal([]byte(child.PublicFaces), &faces); err != nil {
t.Fatalf("decode child faces: %v", err)
}
if !strings.HasPrefix(faces[0]["subdomain"].(string), "feat-login-") {
t.Errorf("face subdomain = %v, want feat-login- prefix", faces[0]["subdomain"])
}
}
func TestMaterializeForBranch_ReusesExisting(t *testing.T) {
fs := newFakeStore()
template := store.Workload{
ID: "tmpl-1",
Kind: "project",
Name: "myapp",
SourceKind: "dockerfile",
SourceConfig: `{"branch":"main"}`,
}
fs.workloads[template.ID] = template
first, err := MaterializeForBranch(fs, template, "feat/x")
if err != nil {
t.Fatalf("first materialize: %v", err)
}
second, err := MaterializeForBranch(fs, template, "feat/x")
if err != nil {
t.Fatalf("second materialize: %v", err)
}
if first.ID != second.ID {
t.Errorf("expected idempotence: got %q then %q", first.ID, second.ID)
}
if len(fs.workloads) != 2 {
t.Errorf("expected exactly one preview created, store has %d", len(fs.workloads))
}
}
func TestMaterializeForBranch_RejectsEmptyBranch(t *testing.T) {
fs := newFakeStore()
_, err := MaterializeForBranch(fs, store.Workload{ID: "tmpl"}, "")
if err == nil {
t.Fatal("expected error for empty branch")
}
}
func TestFindPreviewForBranch_MissingReturnsFalse(t *testing.T) {
fs := newFakeStore()
_, ok, err := FindPreviewForBranch(fs, "tmpl", "feat/x")
if err != nil {
t.Fatalf("find: %v", err)
}
if ok {
t.Error("expected ok=false for missing preview")
}
}