Files
alexei.dolgolyov 7733e64b08 feat(gitops): config-as-code via .tinyforge.yml for repo-backed workloads
A dockerfile or static workload can opt in to reading its deploy config from a
.tinyforge.yml in its own repo. Tinyforge fetches the file, shows field-level
drift vs the live config, and an admin applies it with an explicit Sync. The
repo becomes the source of truth for the declared fields. Manual-sync only;
no auto-apply on deploy, no multi-workload reconcile, no create/delete in v1.

Scope is deliberately source-aware and source_config-resident: dockerfile
declares port/healthcheck/deploy_strategy, static declares deploy_strategy.
The file never carries repo coords or secrets (those stay in the encrypted
DB), which keeps credentials out of the repo.

Backend:
- internal/gitops: Spec/ParseSpec (KnownFields rejects unknown keys), a
  source-aware ApplyPlan/BuildPlan, MergeAndValidate (omitted-field-preserving
  deep merge + validate-the-merged-result-then-commit — never a partial
  config), declared-only Drift with normalization, and Fetch with
  ok/no_file/fetch_failed/invalid statuses and token-redacted messages.
- staticsite: DownloadFile added to GitProvider + Gitea/GitHub/GitLab impls,
  reusing each provider's SSRF-safe client; 64 KiB cap; ErrFileNotFound.
- store: 4 additive gitops_* columns + setters (disjoint from UpdateWorkload
  so the edit-form save and a sync never clobber each other).
- api: GET /workloads/{id}/gitops (status + raw + live drift + managed_fields),
  PUT /gitops (admin, enable/path, traversal-safe), POST /gitops/sync (admin,
  per-workload locked read->merge->validate->write, audited to event_log).

Frontend:
- GitOpsPanel.svelte: status pill, a purpose-built field-level drift view,
  .tinyforge.yml preview, enable ToggleSwitch, Sync via ConfirmDialog; all five
  statuses handled, admin affordances gated on the real viewer role.
- GitOps-managed badge (list + detail hero) and a read-only edit-form banner.
- api.ts fetchers + types; i18n apps.detail.gitops.* (en + ru parity).

Built phase-by-phase with an adversarial plan review (caught 5 design flaws
pre-implementation) and an independent review per phase (go / security / ts /
final) — all APPROVE, 0 CRITICAL/HIGH. docs/gitops.md documents the schema and
what's intentionally out of v1. Plan: plans/gitops/.
2026-06-21 23:32:02 +03:00

987 lines
41 KiB
Go

package store
import (
"database/sql"
"errors"
"fmt"
"log/slog"
"strings"
"time"
"github.com/google/uuid"
_ "modernc.org/sqlite"
)
// ErrNotFound is returned when a requested entity does not exist.
var ErrNotFound = errors.New("not found")
// ErrUnique is returned when a write violates a UNIQUE constraint.
// Translating the driver-specific message at the store boundary lets
// callers use errors.Is instead of fragile substring matching on
// err.Error(); the SQLite driver's wording is not part of any contract.
var ErrUnique = errors.New("unique constraint violation")
// translateSQLError maps a driver-level error onto one of the store's
// sentinel errors when possible. Returns the original error unchanged
// when no mapping applies. The returned error wraps the original via
// %w so callers that need the raw message still have it.
func translateSQLError(err error) error {
if err == nil {
return nil
}
msg := err.Error()
// modernc.org/sqlite returns text like
// "constraint failed: UNIQUE constraint failed: triggers.name (2067)"
// Match case-insensitively in case the driver wording shifts.
if strings.Contains(strings.ToUpper(msg), "UNIQUE") {
return fmt.Errorf("%w: %v", ErrUnique, err)
}
return err
}
// Store wraps the SQLite database connection and provides access to all query methods.
type Store struct {
db *sql.DB
}
// New opens a SQLite database at the given path and runs auto-migration.
func New(dbPath string) (*Store, error) {
db, err := sql.Open("sqlite", dbPath)
if err != nil {
return nil, fmt.Errorf("open database: %w", err)
}
// SQLite only allows one writer at a time. Limit connections to prevent SQLITE_BUSY.
db.SetMaxOpenConns(1)
db.SetConnMaxLifetime(0)
// Enable WAL mode and foreign keys for better concurrency and
// referential integrity. `synchronous=NORMAL` pairs with WAL to skip
// the per-write fsync — the OS still flushes on checkpoint, durability
// is preserved across clean shutdowns, and crashes lose at most the
// last few committed transactions (acceptable for a tinyforge box).
// cache_size=-20000 = 20 MiB page cache, temp_store=MEMORY keeps
// indexer scratch off disk; both are pure perf knobs.
pragmas := []string{
"PRAGMA journal_mode=WAL",
"PRAGMA synchronous=NORMAL",
"PRAGMA foreign_keys=ON",
"PRAGMA busy_timeout=5000",
"PRAGMA cache_size=-20000",
"PRAGMA temp_store=MEMORY",
}
for _, p := range pragmas {
if _, err := db.Exec(p); err != nil {
db.Close()
return nil, fmt.Errorf("exec pragma %q: %w", p, err)
}
}
s := &Store{db: db}
if err := s.migrate(); err != nil {
db.Close()
return nil, fmt.Errorf("migrate: %w", err)
}
return s, nil
}
// Close closes the underlying database connection.
func (s *Store) Close() error {
return s.db.Close()
}
// DB returns the underlying *sql.DB for advanced operations like transactions.
func (s *Store) DB() *sql.DB {
return s.db
}
// migrate creates all tables if they do not already exist, then runs
// incremental migrations for schema changes added after initial release.
func (s *Store) migrate() error {
if _, err := s.db.Exec(schema); err != nil {
return err
}
return s.runMigrations()
}
// runMigrations applies additive schema changes that cannot be expressed
// with CREATE TABLE IF NOT EXISTS, plus the hard-cutover drops that
// remove every legacy project/stage/stack/static_site/deploy table.
func (s *Store) runMigrations() error {
migrations := []string{
// Set default network for existing databases with empty network.
`UPDATE settings SET network = 'tinyforge' WHERE network = ''`,
// Settings column adds that survive the cutover. SQLite is tolerant
// of "duplicate column" errors at the apply step, so re-running on
// a fully-migrated DB is a no-op.
`ALTER TABLE settings ADD COLUMN base_volume_path TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN ssl_certificate_id INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE settings ADD COLUMN stale_threshold_days INTEGER NOT NULL DEFAULT 7`,
`ALTER TABLE settings ADD COLUMN allowed_volume_paths TEXT NOT NULL DEFAULT '[]'`,
`ALTER TABLE settings ADD COLUMN wildcard_dns INTEGER NOT NULL DEFAULT 1`,
`ALTER TABLE settings ADD COLUMN dns_provider TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN cloudflare_api_token TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN cloudflare_zone_id TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN backup_enabled INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE settings ADD COLUMN backup_interval_hours INTEGER NOT NULL DEFAULT 24`,
`ALTER TABLE settings ADD COLUMN backup_retention_count INTEGER NOT NULL DEFAULT 10`,
`ALTER TABLE settings ADD COLUMN proxy_provider TEXT NOT NULL DEFAULT 'npm'`,
`ALTER TABLE settings ADD COLUMN traefik_entrypoint TEXT NOT NULL DEFAULT 'websecure'`,
`ALTER TABLE settings ADD COLUMN traefik_cert_resolver TEXT NOT NULL DEFAULT 'letsencrypt'`,
`ALTER TABLE settings ADD COLUMN traefik_network TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN traefik_api_url TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN npm_remote INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE settings ADD COLUMN npm_access_list_id INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE settings ADD COLUMN public_ip TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN image_prune_threshold_mb INTEGER NOT NULL DEFAULT 1024`,
`ALTER TABLE settings ADD COLUMN stats_interval_seconds INTEGER NOT NULL DEFAULT 15`,
`ALTER TABLE settings ADD COLUMN stats_retention_hours INTEGER NOT NULL DEFAULT 2`,
`ALTER TABLE settings ADD COLUMN notification_secret TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE settings ADD COLUMN auto_backup_before_deploy INTEGER NOT NULL DEFAULT 0`,
// Registries — owner column.
`ALTER TABLE registries ADD COLUMN owner TEXT NOT NULL DEFAULT ''`,
// Webhook delivery audit log persists every inbound webhook
// request so operators can debug "why didn't my deploy fire?"
// without grepping daemon logs.
`CREATE TABLE IF NOT EXISTS webhook_deliveries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
target_type TEXT NOT NULL,
target_id TEXT NOT NULL DEFAULT '',
target_name TEXT NOT NULL DEFAULT '',
received_at TEXT NOT NULL DEFAULT (datetime('now')),
source_ip TEXT NOT NULL DEFAULT '',
signature_state TEXT NOT NULL DEFAULT '',
status_code INTEGER NOT NULL DEFAULT 0,
outcome TEXT NOT NULL DEFAULT '',
detail TEXT NOT NULL DEFAULT '',
body_size INTEGER NOT NULL DEFAULT 0
)`,
`CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_target ON webhook_deliveries(target_type, target_id, received_at)`,
`CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_received_at ON webhook_deliveries(received_at)`,
// Containers — stage_id is now an opaque string set by the source
// plugin (image plugin uses it for the deploy-target tag). No FK
// semantics: the legacy `stages` table this column once joined to
// is gone; the column is just a free-form discriminator the
// proxies / dashboard views read to disambiguate sibling rows.
`ALTER TABLE containers ADD COLUMN stage_id TEXT NOT NULL DEFAULT ''`,
// Workload-first refactor columns. Land additively so old databases
// (which have a bare workloads table) pick them up on the next boot.
`ALTER TABLE workloads ADD COLUMN source_kind TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE workloads ADD COLUMN source_config TEXT NOT NULL DEFAULT '{}'`,
`ALTER TABLE workloads ADD COLUMN trigger_kind TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE workloads ADD COLUMN trigger_config TEXT NOT NULL DEFAULT '{}'`,
`ALTER TABLE workloads ADD COLUMN public_faces TEXT NOT NULL DEFAULT '[]'`,
`ALTER TABLE workloads ADD COLUMN parent_workload_id TEXT NOT NULL DEFAULT ''`,
// GitOps config-as-code: a dockerfile/static workload may read its
// deploy config from a .tinyforge.yml in its own repo. Opt-in per
// workload; all four land additively so existing rows default to
// "GitOps off" and stay byte-identical.
`ALTER TABLE workloads ADD COLUMN gitops_enabled INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE workloads ADD COLUMN gitops_path TEXT NOT NULL DEFAULT '.tinyforge.yml'`,
`ALTER TABLE workloads ADD COLUMN gitops_last_sync_at TEXT NOT NULL DEFAULT ''`,
`ALTER TABLE workloads ADD COLUMN gitops_commit_sha TEXT NOT NULL DEFAULT ''`,
// Schedule trigger needs a column to remember when it last fired so
// the scheduler can compute next-fire windows across restarts.
// Empty string = never fired. Pre-trigger-split DBs land the column
// here so the scheduler can read/write it on first boot.
`ALTER TABLE triggers ADD COLUMN last_fired_at TEXT NOT NULL DEFAULT ''`,
// Per-app deploy/activity timeline: scope each event_log row to the
// workload that produced it so the dashboard can query a workload's
// deploy history. Empty string = unscoped (the existing non-deploy
// loggers don't set it). Additive ADD COLUMN — the loop below
// tolerates the "duplicate column" error on fully-migrated DBs.
`ALTER TABLE event_log ADD COLUMN workload_id TEXT NOT NULL DEFAULT ''`,
// Hard cutover: drop every legacy table. Idempotent — DROP TABLE
// IF EXISTS is a no-op once the table is gone. Operators upgrading
// from a pre-cutover build will lose any project / stack / static
// site rows; the upgrade notes call this out explicitly.
`DROP TABLE IF EXISTS deploy_logs`,
`DROP TABLE IF EXISTS deploys`,
`DROP TABLE IF EXISTS stage_env`,
`DROP TABLE IF EXISTS stages`,
`DROP TABLE IF EXISTS poll_states`,
`DROP TABLE IF EXISTS volumes`,
`DROP TABLE IF EXISTS static_site_secrets`,
`DROP TABLE IF EXISTS static_sites`,
`DROP TABLE IF EXISTS stack_deploys`,
`DROP TABLE IF EXISTS stack_revisions`,
`DROP TABLE IF EXISTS stacks`,
`DROP TABLE IF EXISTS projects`,
}
// Workload refactor tables (2026-05-09). Workload is the unifying primitive
// over Project / Stack / StaticSite; Container is the normalized index of
// every Tinyforge-managed container; Apps is an optional grouping. These
// live alongside (not inside) the schema constant so existing databases
// pick them up on restart.
workloadTables := []string{
`CREATE TABLE IF NOT EXISTS workloads (
id TEXT PRIMARY KEY,
kind TEXT NOT NULL,
ref_id TEXT NOT NULL,
name TEXT NOT NULL,
app_id TEXT NOT NULL DEFAULT '',
source_kind TEXT NOT NULL DEFAULT '',
source_config TEXT NOT NULL DEFAULT '{}',
trigger_kind TEXT NOT NULL DEFAULT '',
trigger_config TEXT NOT NULL DEFAULT '{}',
public_faces TEXT NOT NULL DEFAULT '[]',
parent_workload_id TEXT NOT NULL DEFAULT '',
notification_url TEXT NOT NULL DEFAULT '',
notification_secret TEXT NOT NULL DEFAULT '',
webhook_secret TEXT NOT NULL DEFAULT '',
webhook_signing_secret TEXT NOT NULL DEFAULT '',
webhook_require_signature INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(kind, ref_id)
)`,
`CREATE TABLE IF NOT EXISTS containers (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL,
workload_kind TEXT NOT NULL,
role TEXT NOT NULL DEFAULT '',
stage_id TEXT NOT NULL DEFAULT '',
container_id TEXT NOT NULL DEFAULT '',
image_ref TEXT NOT NULL DEFAULT '',
image_tag TEXT NOT NULL DEFAULT '',
host TEXT NOT NULL DEFAULT 'local',
state TEXT NOT NULL DEFAULT '',
port INTEGER NOT NULL DEFAULT 0,
subdomain TEXT NOT NULL DEFAULT '',
proxy_route_id TEXT NOT NULL DEFAULT '',
npm_proxy_id INTEGER NOT NULL DEFAULT 0,
last_seen_at TEXT NOT NULL DEFAULT '',
extra_json TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
`CREATE TABLE IF NOT EXISTS apps (
id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
description TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// workload_env: per-workload env overrides (encrypt-at-rest for
// secrets). Functional analog of stage_env. Workload deletion
// cascades through the FK so orphan rows are impossible.
`CREATE TABLE IF NOT EXISTS workload_env (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
key TEXT NOT NULL,
value TEXT NOT NULL DEFAULT '',
encrypted INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(workload_id, key)
)`,
// workload_volumes: per-workload mount declarations. Mirrors the
// legacy `volumes` table shape (source / target / scope / name)
// but keyed on workload_id. UNIQUE on (workload_id, target) so a
// re-add overwrites instead of duplicating.
`CREATE TABLE IF NOT EXISTS workload_volumes (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
source TEXT NOT NULL DEFAULT '',
target TEXT NOT NULL,
scope TEXT NOT NULL DEFAULT 'absolute',
name TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(workload_id, target)
)`,
// volume_snapshots: per-workload archives of host-bind data
// volumes (tar.gz). Mirrors the backups table shape but scoped to a
// workload and self-describing via the manifest column so a restore
// can re-resolve each target. ON DELETE CASCADE so deleting an app
// drops its snapshot rows (the files are pruned separately).
`CREATE TABLE IF NOT EXISTS volume_snapshots (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
label TEXT NOT NULL DEFAULT '',
filename TEXT NOT NULL,
size_bytes INTEGER NOT NULL DEFAULT 0,
manifest TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// triggers: first-class redeploy signal sources. Webhook secrets
// move from workload onto the trigger so one webhook URL can fan
// out to multiple workloads via workload_trigger_bindings.
`CREATE TABLE IF NOT EXISTS triggers (
id TEXT PRIMARY KEY,
kind TEXT NOT NULL,
name TEXT NOT NULL UNIQUE,
config TEXT NOT NULL DEFAULT '{}',
webhook_secret TEXT NOT NULL DEFAULT '',
webhook_signing_secret TEXT NOT NULL DEFAULT '',
webhook_require_signature INTEGER NOT NULL DEFAULT 0,
last_fired_at TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// workload_notifications: per-workload notification destinations.
// Each row is one route (Slack channel, Discord webhook, generic
// receiver, ...). event_types is a comma-separated allow-list —
// empty means "all events". When zero rows exist for a workload
// the dispatcher falls back to the legacy single notification_url
// column on workloads so existing setups keep working unchanged.
`CREATE TABLE IF NOT EXISTS workload_notifications (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
name TEXT NOT NULL,
url TEXT NOT NULL,
secret TEXT NOT NULL DEFAULT '',
event_types TEXT NOT NULL DEFAULT '',
enabled INTEGER NOT NULL DEFAULT 1,
sort_order INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// workload_trigger_bindings: many-to-many between workloads and
// triggers. binding_config is the per-binding override applied on
// top of trigger.config (top-level JSON merge, binding wins).
`CREATE TABLE IF NOT EXISTS workload_trigger_bindings (
id TEXT PRIMARY KEY,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
trigger_id TEXT NOT NULL REFERENCES triggers(id) ON DELETE CASCADE,
binding_config TEXT NOT NULL DEFAULT '{}',
enabled INTEGER NOT NULL DEFAULT 1,
sort_order INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(workload_id, trigger_id)
)`,
}
for _, t := range workloadTables {
if _, err := s.db.Exec(t); err != nil {
return fmt.Errorf("create workload table: %w", err)
}
}
// Additive stack tables (2026-04-16). Created here rather than in the
// schema constant so older databases pick them up on restart.
statsTables := []string{
`CREATE TABLE IF NOT EXISTS container_stats_samples (
id INTEGER PRIMARY KEY AUTOINCREMENT,
container_id TEXT NOT NULL,
owner_type TEXT NOT NULL,
owner_id TEXT NOT NULL,
ts INTEGER NOT NULL,
cpu_percent REAL NOT NULL DEFAULT 0,
memory_usage INTEGER NOT NULL DEFAULT 0,
memory_limit INTEGER NOT NULL DEFAULT 0,
network_rx INTEGER NOT NULL DEFAULT 0,
network_tx INTEGER NOT NULL DEFAULT 0,
block_read INTEGER NOT NULL DEFAULT 0,
block_write INTEGER NOT NULL DEFAULT 0
)`,
`CREATE TABLE IF NOT EXISTS system_stats_samples (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts INTEGER NOT NULL,
ncpu INTEGER NOT NULL DEFAULT 0,
memory_total INTEGER NOT NULL DEFAULT 0,
workload_cpu_percent REAL NOT NULL DEFAULT 0,
workload_mem_usage INTEGER NOT NULL DEFAULT 0,
containers_running INTEGER NOT NULL DEFAULT 0,
disk_total_bytes INTEGER NOT NULL DEFAULT 0
)`,
}
for _, t := range statsTables {
if _, err := s.db.Exec(t); err != nil {
return fmt.Errorf("create stats table: %w", err)
}
}
// Observability: event_triggers — consume EventLog entries off the
// bus and dispatch webhook actions. Schema kept flat (comma-list
// filters, single optional regex) — see LOGSCAN_AND_TRIGGERS_TODO.md.
observabilityTables := []string{
`CREATE TABLE IF NOT EXISTS event_triggers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
filter_severity TEXT NOT NULL DEFAULT '',
filter_source TEXT NOT NULL DEFAULT '',
filter_message_regex TEXT NOT NULL DEFAULT '',
action_type TEXT NOT NULL DEFAULT 'webhook',
action_target TEXT NOT NULL DEFAULT '',
action_secret TEXT NOT NULL DEFAULT '',
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
// log_scan_rules: regex patterns the log-scanner manager
// applies to container log lines. WorkloadID is nullable (via
// "" sentinel) so a global rule can have OverridesID = 0 and
// per-workload overrides reference the global's id.
`CREATE TABLE IF NOT EXISTS log_scan_rules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
workload_id TEXT NOT NULL DEFAULT '',
overrides_id INTEGER NOT NULL DEFAULT 0,
name TEXT NOT NULL,
pattern TEXT NOT NULL,
severity TEXT NOT NULL DEFAULT 'warn',
streams TEXT NOT NULL DEFAULT 'all',
cooldown_seconds INTEGER NOT NULL DEFAULT 60,
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
// metric_alert_rules: threshold rules the metric-alert manager
// evaluates against recent container stats samples. WorkloadID is
// nullable (via "" sentinel) so a global rule applies to every
// workload; a non-empty value scopes it to one workload.
`CREATE TABLE IF NOT EXISTS metric_alert_rules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
workload_id TEXT NOT NULL DEFAULT '',
name TEXT NOT NULL DEFAULT '',
metric TEXT NOT NULL,
comparator TEXT NOT NULL,
threshold REAL NOT NULL DEFAULT 0,
severity TEXT NOT NULL DEFAULT 'warn',
cooldown_seconds INTEGER NOT NULL DEFAULT 300,
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
`CREATE INDEX IF NOT EXISTS idx_metric_alert_rules_workload ON metric_alert_rules(workload_id)`,
// shared_secrets: env vars shared across workloads by scope. Scope
// "global" applies to every workload; "app" applies only to
// workloads whose app_id matches. Resolved into a workload's
// container env as a low-precedence default (see
// internal/workload/plugin/env.go).
`CREATE TABLE IF NOT EXISTS shared_secrets (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
value TEXT NOT NULL DEFAULT '',
encrypted INTEGER NOT NULL DEFAULT 1,
scope TEXT NOT NULL,
app_id TEXT NOT NULL DEFAULT '',
description TEXT NOT NULL DEFAULT '',
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)`,
`CREATE UNIQUE INDEX IF NOT EXISTS idx_shared_secrets_scope_name ON shared_secrets(scope, app_id, name)`,
`CREATE INDEX IF NOT EXISTS idx_shared_secrets_app ON shared_secrets(app_id)`,
// deploy_history: structured, version-pinned ledger of every deploy
// dispatch (success AND failure) per workload. Distinct from the
// free-text event_log — this carries the replayable `reference` the
// rollback action redeploys from. `error` holds only a generic,
// secret-free marker (the raw source error can echo registry-auth /
// compose stdout, so it goes to slog only). FK cascade is backed by
// PRAGMA foreign_keys=ON, but DeleteWorkload also deletes these rows
// explicitly (matching the containers cleanup convention).
`CREATE TABLE IF NOT EXISTS deploy_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
source_kind TEXT NOT NULL DEFAULT '',
reference TEXT NOT NULL DEFAULT '',
reason TEXT NOT NULL DEFAULT '',
triggered_by TEXT NOT NULL DEFAULT '',
note TEXT NOT NULL DEFAULT '',
outcome TEXT NOT NULL DEFAULT '',
error TEXT NOT NULL DEFAULT '',
started_at TEXT NOT NULL DEFAULT '',
finished_at TEXT NOT NULL DEFAULT ''
)`,
`CREATE INDEX IF NOT EXISTS idx_deploy_history_workload ON deploy_history(workload_id, id DESC)`,
}
for _, t := range observabilityTables {
if _, err := s.db.Exec(t); err != nil {
return fmt.Errorf("create observability table: %w", err)
}
}
for _, m := range migrations {
if _, err := s.db.Exec(m); err != nil {
// "duplicate column" / "already exists" are expected when a
// migration has already been applied. "no such table" is
// expected for obsolete ALTER TABLE migrations targeting tables
// the workload refactor dropped (e.g. instances). Anything
// else must surface — silently running against the wrong shape
// is worse than a startup failure.
msg := err.Error()
if !strings.Contains(msg, "duplicate column") &&
!strings.Contains(msg, "already exists") &&
!strings.Contains(msg, "no such table") {
return fmt.Errorf("apply migration %q: %w", m, err)
}
}
}
// Create indexes on foreign key columns for query performance. Only
// indexes targeting tables that still exist after the hard cutover.
indexes := []string{
`CREATE INDEX IF NOT EXISTS idx_event_log_severity ON event_log(severity)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_source ON event_log(source)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_created_at ON event_log(created_at)`,
`CREATE INDEX IF NOT EXISTS idx_event_log_workload ON event_log(workload_id, created_at)`,
`CREATE INDEX IF NOT EXISTS idx_dns_records_consumer ON dns_records(consumer_type, consumer_id)`,
`CREATE INDEX IF NOT EXISTS idx_container_stats_owner_ts ON container_stats_samples(owner_type, owner_id, ts)`,
`CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`,
`CREATE INDEX IF NOT EXISTS idx_container_stats_ts ON container_stats_samples(ts)`,
`CREATE INDEX IF NOT EXISTS idx_system_stats_ts ON system_stats_samples(ts)`,
// Workload refactor indexes.
`CREATE INDEX IF NOT EXISTS idx_workloads_kind ON workloads(kind)`,
`CREATE INDEX IF NOT EXISTS idx_workloads_app_id ON workloads(app_id) WHERE app_id != ''`,
`CREATE INDEX IF NOT EXISTS idx_workloads_ref ON workloads(kind, ref_id)`,
`CREATE UNIQUE INDEX IF NOT EXISTS idx_workloads_webhook_secret ON workloads(webhook_secret) WHERE webhook_secret != ''`,
`CREATE INDEX IF NOT EXISTS idx_containers_workload ON containers(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_containers_state ON containers(state)`,
`CREATE INDEX IF NOT EXISTS idx_containers_container_id ON containers(container_id) WHERE container_id != ''`,
`CREATE INDEX IF NOT EXISTS idx_containers_kind ON containers(workload_kind)`,
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_volume_snapshots_workload ON volume_snapshots(workload_id)`,
// Trigger-split indexes.
`CREATE INDEX IF NOT EXISTS idx_triggers_kind ON triggers(kind)`,
`CREATE UNIQUE INDEX IF NOT EXISTS idx_triggers_webhook_secret ON triggers(webhook_secret) WHERE webhook_secret != ''`,
`CREATE INDEX IF NOT EXISTS idx_bindings_workload ON workload_trigger_bindings(workload_id)`,
`CREATE INDEX IF NOT EXISTS idx_bindings_trigger ON workload_trigger_bindings(trigger_id)`,
`CREATE INDEX IF NOT EXISTS idx_workload_notifs_workload ON workload_notifications(workload_id)`,
}
for _, idx := range indexes {
if _, err := s.db.Exec(idx); err != nil {
return fmt.Errorf("create index: %w", err)
}
}
// schema_versions table gates one-shot data migrations like the
// trigger backfill below. Without this, the backfill scan ran on
// every boot even on fully-migrated DBs — wasted I/O and (more
// importantly) made it impossible to tell whether a "no rows
// processed" was a clean state or a missed-migration bug.
if _, err := s.db.Exec(`CREATE TABLE IF NOT EXISTS schema_versions (
version INTEGER PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)`); err != nil {
return fmt.Errorf("create schema_versions: %w", err)
}
if err := s.runOnce(1, "trigger backfill", s.backfillTriggersFromWorkloads); err != nil {
// Backfill failure is non-fatal — we log and let the operator
// retry. The version is only recorded on success.
slog.Warn("trigger backfill", "error", err)
}
return nil
}
// runOnce executes fn at most one time per database lifetime, recording
// success in schema_versions. Useful for data migrations whose source
// table eventually disappears (so re-running becomes pointless or
// dangerous).
func (s *Store) runOnce(version int, label string, fn func() error) error {
var applied int
if err := s.db.QueryRow(`SELECT COUNT(*) FROM schema_versions WHERE version = ?`, version).Scan(&applied); err != nil {
return fmt.Errorf("check %s: %w", label, err)
}
if applied > 0 {
return nil
}
if err := fn(); err != nil {
return err
}
if _, err := s.db.Exec(`INSERT INTO schema_versions (version) VALUES (?)`, version); err != nil {
return fmt.Errorf("mark %s applied: %w", label, err)
}
slog.Info("schema migration applied", "version", version, "label", label)
return nil
}
// RunOnce is the public counterpart of runOnce, exposed so cmd/server can
// gate post-store-open migrations (e.g. crypto re-encryption that needs
// the ENCRYPTION_KEY which Store does not own) through the same
// schema_versions ledger.
func (s *Store) RunOnce(version int, label string, fn func() error) error {
return s.runOnce(version, label, fn)
}
// EnvelopeMigrator describes the contract a crypto package implements to
// rewrite legacy unprefixed-hex ciphertext as versioned envelope values.
// hasEnvelope reports whether a value already carries the new prefix.
// decrypt returns plaintext for either form; encrypt always produces the
// new envelope form. By accepting closures the store stays free of any
// import on internal/crypto, mirroring the rest of the package layout.
type EnvelopeMigrator struct {
HasEnvelope func(value string) bool
Decrypt func(ciphertext string) (string, error)
Encrypt func(plaintext string) (string, error)
}
// MigrateSecretsToEnvelope walks every column known to carry an encrypted
// secret and rewrites legacy unprefixed-hex values into the new
// envelope form using the current encryption key.
//
// Behaviour, per-row:
// - empty value → skip (no secret stored)
// - already-envelope value → skip (already migrated)
// - decrypt fails → skip (value is either plaintext from a v0 boot
// OR ciphertext from a rotated key; either way we cannot safely
// re-encrypt and leaving it alone preserves the existing read
// semantics)
// - decrypt succeeds → encrypt to envelope form + UPDATE
//
// The whole sweep runs in a single transaction so a power-loss
// mid-migration leaves the DB in either the pre- or post-migration
// state, never half. Idempotent via schema_versions version 2 — the
// next boot is a no-op.
//
// Columns covered:
// - settings.npm_password
// - settings.cloudflare_api_token
// - auth_settings.oidc_client_secret
// - registries.token
// - workload_env.value WHERE encrypted=1
func (s *Store) MigrateSecretsToEnvelope(m EnvelopeMigrator) error {
return s.runOnce(2, "secrets envelope migration", func() error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("begin: %w", err)
}
defer func() { _ = tx.Rollback() }()
// Single-row tables (settings, auth_settings) — read-update inline.
singleRowColumns := []struct {
table, column string
}{
{"settings", "npm_password"},
{"settings", "cloudflare_api_token"},
{"auth_settings", "oidc_client_secret"},
}
for _, c := range singleRowColumns {
var v string
err := tx.QueryRow(
fmt.Sprintf(`SELECT %s FROM %s LIMIT 1`, c.column, c.table),
).Scan(&v)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
continue
}
// auth_settings may not exist on a brand-new DB until
// the OIDC code touches it; treat as nothing-to-migrate.
slog.Debug("envelope migration: column read skipped",
"table", c.table, "column", c.column, "error", err)
continue
}
migrated, ok := tryMigrate(m, v)
if !ok {
continue
}
if _, err := tx.Exec(
fmt.Sprintf(`UPDATE %s SET %s = ?`, c.table, c.column),
migrated,
); err != nil {
return fmt.Errorf("update %s.%s: %w", c.table, c.column, err)
}
}
// Multi-row: registries.token
if err := migrateRowColumn(tx, m,
`SELECT id, token FROM registries WHERE token != ''`,
`UPDATE registries SET token = ? WHERE id = ?`,
); err != nil {
return fmt.Errorf("registries.token: %w", err)
}
// Multi-row: workload_env.value WHERE encrypted=1
if err := migrateRowColumn(tx, m,
`SELECT id, value FROM workload_env WHERE encrypted = 1 AND value != ''`,
`UPDATE workload_env SET value = ? WHERE id = ?`,
); err != nil {
return fmt.Errorf("workload_env.value: %w", err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit: %w", err)
}
return nil
})
}
// migrateRowColumn applies the envelope rewrite to every (id, value)
// pair returned by selectQ. updateQ takes (newValue, id) as parameters.
// Each row is its own attempt; one row failing migration (decrypt fail)
// does not abort the others.
func migrateRowColumn(tx *sql.Tx, m EnvelopeMigrator, selectQ, updateQ string) error {
rows, err := tx.Query(selectQ)
if err != nil {
return err
}
defer rows.Close()
type pending struct{ id, newValue string }
var updates []pending
for rows.Next() {
var id, value string
if err := rows.Scan(&id, &value); err != nil {
return err
}
newValue, ok := tryMigrate(m, value)
if !ok {
continue
}
updates = append(updates, pending{id, newValue})
}
if err := rows.Err(); err != nil {
return err
}
for _, u := range updates {
if _, err := tx.Exec(updateQ, u.newValue, u.id); err != nil {
return err
}
}
return nil
}
// tryMigrate returns the envelope-form ciphertext + true when the input
// is a legacy unprefixed value that decrypts successfully with the
// current key. Returns ("", false) for anything else: empty, already
// envelope, plaintext, or decrypt-failed (rotated-key case).
func tryMigrate(m EnvelopeMigrator, v string) (string, bool) {
if v == "" {
return "", false
}
if m.HasEnvelope(v) {
return "", false
}
plaintext, err := m.Decrypt(v)
if err != nil {
return "", false
}
enc, err := m.Encrypt(plaintext)
if err != nil {
return "", false
}
return enc, true
}
// backfillTriggersFromWorkloads converts embedded trigger config on
// workload rows into standalone trigger + binding rows. Runs once per
// boot and is idempotent — only workloads with non-empty trigger_kind
// AND no existing binding produce a new trigger record.
//
// Each per-workload backfill runs inside a transaction so a partial
// failure (binding insert fails after trigger insert succeeds) rolls
// back cleanly; otherwise an orphan trigger row would survive forever
// because the next boot's bindings-count check sees zero bindings and
// tries to re-insert under the same UNIQUE name.
//
// Trigger names are unconditionally suffixed with the workload's id
// short-prefix to make collisions impossible across two workloads with
// identical (name, kind) — the "Foo [registry]" + "Foo [registry]" case
// would otherwise have one of them silently dropped.
//
// Why on every boot: the trigger-split refactor is a clean break (no
// formal migration). Existing dev databases have triggers embedded in
// workloads.trigger_kind / trigger_config; this lifts them into the new
// shape so URLs and behavior survive the upgrade.
func (s *Store) backfillTriggersFromWorkloads() error {
rows, err := s.db.Query(
`SELECT id, name, trigger_kind, trigger_config,
webhook_secret, webhook_signing_secret, webhook_require_signature
FROM workloads
WHERE trigger_kind != ''`,
)
if err != nil {
return fmt.Errorf("scan workloads for backfill: %w", err)
}
defer rows.Close()
type embedded struct {
id, name, kind, config string
webhookSecret, webhookSigningSecret string
requireSig int
}
var pending []embedded
for rows.Next() {
var e embedded
if err := rows.Scan(&e.id, &e.name, &e.kind, &e.config,
&e.webhookSecret, &e.webhookSigningSecret, &e.requireSig); err != nil {
return fmt.Errorf("scan workload row: %w", err)
}
pending = append(pending, e)
}
if err := rows.Err(); err != nil {
return err
}
for _, e := range pending {
if err := s.backfillOneTrigger(e.id, e.name, e.kind, e.config,
e.webhookSecret, e.webhookSigningSecret, e.requireSig); err != nil {
slog.Warn("trigger backfill: workload skipped",
"workload", e.id, "error", err)
}
}
return nil
}
// backfillOneTrigger lifts one embedded trigger into its own row + binding
// inside a single transaction. Idempotent: a workload that already has at
// least one binding is left alone.
func (s *Store) backfillOneTrigger(workloadID, workloadName, kind, config,
webhookSecret, webhookSigningSecret string, requireSig int) error {
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf("begin: %w", err)
}
defer func() { _ = tx.Rollback() }()
var existing int
if err := tx.QueryRow(
`SELECT COUNT(*) FROM workload_trigger_bindings WHERE workload_id = ?`,
workloadID,
).Scan(&existing); err != nil {
return fmt.Errorf("count bindings: %w", err)
}
if existing > 0 {
return nil
}
idShort := workloadID
if len(idShort) > 8 {
idShort = idShort[:8]
}
triggerName := fmt.Sprintf("%s [%s] %s", workloadName, kind, idShort)
triggerID := uuid.New().String()
now := Now()
if _, err := tx.Exec(
`INSERT INTO triggers (id, kind, name, config,
webhook_secret, webhook_signing_secret, webhook_require_signature,
created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
triggerID, kind, triggerName, config,
webhookSecret, webhookSigningSecret, requireSig,
now, now,
); err != nil {
return fmt.Errorf("insert trigger: %w", err)
}
bindingID := uuid.New().String()
if _, err := tx.Exec(
`INSERT INTO workload_trigger_bindings
(id, workload_id, trigger_id, binding_config, enabled, sort_order, created_at, updated_at)
VALUES (?, ?, ?, '{}', 1, 0, ?, ?)`,
bindingID, workloadID, triggerID, now, now,
); err != nil {
return fmt.Errorf("insert binding: %w", err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit: %w", err)
}
return nil
}
const schema = `
CREATE TABLE IF NOT EXISTS registries (
id TEXT PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
url TEXT NOT NULL,
type TEXT NOT NULL DEFAULT 'generic',
token TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS settings (
id INTEGER PRIMARY KEY CHECK (id = 1),
domain TEXT NOT NULL DEFAULT '',
server_ip TEXT NOT NULL DEFAULT '',
public_ip TEXT NOT NULL DEFAULT '',
network TEXT NOT NULL DEFAULT 'tinyforge',
subdomain_pattern TEXT NOT NULL DEFAULT 'stage-{stage}-{project}',
notification_url TEXT NOT NULL DEFAULT '',
notification_secret TEXT NOT NULL DEFAULT '',
npm_url TEXT NOT NULL DEFAULT '',
npm_email TEXT NOT NULL DEFAULT '',
npm_password TEXT NOT NULL DEFAULT '',
webhook_secret TEXT NOT NULL DEFAULT '',
polling_interval TEXT NOT NULL DEFAULT '5m',
base_volume_path TEXT NOT NULL DEFAULT '',
ssl_certificate_id INTEGER NOT NULL DEFAULT 0,
npm_remote INTEGER NOT NULL DEFAULT 0,
image_prune_threshold_mb INTEGER NOT NULL DEFAULT 1024,
npm_access_list_id INTEGER NOT NULL DEFAULT 0,
traefik_entrypoint TEXT NOT NULL DEFAULT 'websecure',
traefik_cert_resolver TEXT NOT NULL DEFAULT 'letsencrypt',
traefik_network TEXT NOT NULL DEFAULT '',
traefik_api_url TEXT NOT NULL DEFAULT '',
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS users (
id TEXT PRIMARY KEY,
username TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL DEFAULT '',
email TEXT NOT NULL DEFAULT '',
role TEXT NOT NULL DEFAULT 'viewer',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS auth_settings (
id INTEGER PRIMARY KEY CHECK (id = 1),
auth_mode TEXT NOT NULL DEFAULT 'local',
oidc_client_id TEXT NOT NULL DEFAULT '',
oidc_client_secret TEXT NOT NULL DEFAULT '',
oidc_issuer_url TEXT NOT NULL DEFAULT '',
oidc_redirect_url TEXT NOT NULL DEFAULT ''
);
-- Seed the settings row if it does not exist.
INSERT OR IGNORE INTO settings (id) VALUES (1);
-- Seed the auth_settings row if it does not exist.
INSERT OR IGNORE INTO auth_settings (id) VALUES (1);
CREATE TABLE IF NOT EXISTS event_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source TEXT NOT NULL DEFAULT '',
severity TEXT NOT NULL DEFAULT 'info',
message TEXT NOT NULL DEFAULT '',
metadata TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS standalone_proxies (
id TEXT PRIMARY KEY,
domain TEXT NOT NULL UNIQUE,
destination_url TEXT NOT NULL DEFAULT '',
destination_port INTEGER NOT NULL DEFAULT 0,
ssl_certificate_id INTEGER NOT NULL DEFAULT 0,
npm_proxy_id INTEGER NOT NULL DEFAULT 0,
health_status TEXT NOT NULL DEFAULT 'unknown',
health_checked_at TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS dns_records (
id TEXT PRIMARY KEY,
fqdn TEXT NOT NULL UNIQUE,
provider_record_id TEXT NOT NULL DEFAULT '',
consumer_type TEXT NOT NULL DEFAULT '',
consumer_id TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS backups (
id TEXT PRIMARY KEY,
filename TEXT NOT NULL UNIQUE,
size_bytes INTEGER NOT NULL DEFAULT 0,
backup_type TEXT NOT NULL DEFAULT 'manual',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
`
// Now returns the current time formatted for SQLite storage.
func Now() string {
return time.Now().UTC().Format("2006-01-02 15:04:05")
}