Files
alexei.dolgolyov 410a131cec feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
2026-05-29 02:09:54 +03:00

373 lines
11 KiB
Go

package backup
import (
"database/sql"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"sync"
"time"
_ "modernc.org/sqlite" // read-only candidate inspection via PRAGMA integrity_check
"github.com/alexei/tinyforge/internal/store"
)
// Engine manages database backup operations.
type Engine struct {
mu sync.Mutex
store *store.Store
dbPath string
backupDir string
}
// New creates a new backup engine. It ensures the backup directory exists.
func New(st *store.Store, dbPath, dataDir string) (*Engine, error) {
backupDir := filepath.Join(dataDir, "backups")
if err := os.MkdirAll(backupDir, 0o755); err != nil {
return nil, fmt.Errorf("create backup directory: %w", err)
}
return &Engine{
store: st,
dbPath: dbPath,
backupDir: backupDir,
}, nil
}
// BackupDir returns the path to the backup directory.
func (e *Engine) BackupDir() string {
return e.backupDir
}
// CreateBackup creates a new database backup using VACUUM INTO.
// Returns the backup metadata record.
func (e *Engine) CreateBackup(backupType string) (store.Backup, error) {
// Validate backup type to prevent path traversal via filename.
switch backupType {
case "manual", "auto", "pre-restore", "pre-deploy":
// valid
default:
return store.Backup{}, fmt.Errorf("invalid backup type: %q", backupType)
}
e.mu.Lock()
defer e.mu.Unlock()
timestamp := time.Now().UTC().Format("20060102-150405")
filename := fmt.Sprintf("tinyforge-%s-%s.db", backupType, timestamp)
destPath := filepath.Join(e.backupDir, filename)
// VACUUM INTO creates a clean, standalone copy of the database.
// It is safe to use while the database is open and in WAL mode.
_, err := e.store.DB().Exec(`VACUUM INTO ?`, destPath)
if err != nil {
return store.Backup{}, fmt.Errorf("vacuum into %s: %w", destPath, err)
}
// Get file size.
info, err := os.Stat(destPath)
if err != nil {
return store.Backup{}, fmt.Errorf("stat backup file: %w", err)
}
// Store metadata.
backup, err := e.store.CreateBackup(store.Backup{
Filename: filename,
SizeBytes: info.Size(),
BackupType: backupType,
})
if err != nil {
// Best effort: remove the file if metadata insert fails.
os.Remove(destPath)
return store.Backup{}, fmt.Errorf("store backup metadata: %w", err)
}
slog.Info("backup created", "id", backup.ID, "filename", filename, "size", info.Size(), "type", backupType)
return backup, nil
}
// ListBackups returns all backup records.
func (e *Engine) ListBackups() ([]store.Backup, error) {
return e.store.ListBackups()
}
// GetBackup returns a single backup record.
func (e *Engine) GetBackup(id string) (store.Backup, error) {
return e.store.GetBackup(id)
}
// FilePath returns the full filesystem path for a backup.
func (e *Engine) FilePath(backup store.Backup) string {
return filepath.Join(e.backupDir, backup.Filename)
}
// DeleteBackup removes a backup file and its metadata record.
func (e *Engine) DeleteBackup(id string) error {
backup, err := e.store.GetBackup(id)
if err != nil {
return fmt.Errorf("get backup: %w", err)
}
// Remove file.
filePath := filepath.Join(e.backupDir, backup.Filename)
if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove backup file: %w", err)
}
// Remove metadata.
if err := e.store.DeleteBackup(id); err != nil {
return fmt.Errorf("delete backup metadata: %w", err)
}
slog.Info("backup deleted", "id", id, "filename", backup.Filename)
return nil
}
// RestorePath returns the path of a backup file for restore operations.
// The caller is responsible for actually replacing the database.
func (e *Engine) RestorePath(id string) (string, error) {
backup, err := e.store.GetBackup(id)
if err != nil {
return "", fmt.Errorf("get backup: %w", err)
}
// Filename comes from a DB row. Defence-in-depth: a backup file must live
// directly under backupDir, so reject any value carrying a path separator
// or traversal before joining. A poisoned row (future import path, manual
// insert) must never let restore read — and then atomically copy over the
// live DB — an arbitrary file. CreateBackup builds safe base names; this
// enforces the same invariant on read.
if backup.Filename == "" || backup.Filename == "." || backup.Filename == ".." ||
backup.Filename != filepath.Base(backup.Filename) {
return "", fmt.Errorf("backup: invalid filename %q", backup.Filename)
}
filePath := filepath.Join(e.backupDir, backup.Filename)
if _, err := os.Stat(filePath); err != nil {
return "", fmt.Errorf("backup file not found: %w", err)
}
return filePath, nil
}
// PrepareRestore validates a backup candidate before the caller swaps it
// over the live DB. Runs three checks in order:
//
// 1. The candidate file exists and is non-empty.
// 2. SQLite header magic matches (catches corrupted or partial downloads).
// 3. `PRAGMA integrity_check` against a temp copy returns "ok"
// (catches WAL/page corruption that the header check misses).
//
// On success returns the candidate path. On failure returns a wrapped
// error describing which probe rejected the file, so the operator can
// see exactly why a "restore" was refused rather than getting a corrupt
// DB at next boot.
//
// We use a *temp copy* for integrity_check because attaching the
// candidate read-only into the live process would still hold a file
// handle SQLite considers writable on Windows.
func (e *Engine) PrepareRestore(id string) (string, error) {
path, err := e.RestorePath(id)
if err != nil {
return "", err
}
info, err := os.Stat(path)
if err != nil {
return "", fmt.Errorf("restore: stat candidate: %w", err)
}
if info.Size() < 100 {
return "", fmt.Errorf("restore: candidate %s is suspiciously small (%d bytes)", path, info.Size())
}
// SQLite file header: "SQLite format 3\x00" (16 bytes).
hdr, err := readHead(path, 16)
if err != nil {
return "", fmt.Errorf("restore: read header: %w", err)
}
if string(hdr) != "SQLite format 3\x00" {
return "", fmt.Errorf("restore: candidate %s is not a SQLite database (header mismatch)", path)
}
if err := integrityCheck(path); err != nil {
return "", fmt.Errorf("restore: integrity check failed: %w", err)
}
return path, nil
}
func readHead(path string, n int) ([]byte, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
buf := make([]byte, n)
// io.ReadFull (not f.Read) guarantees the buffer is filled.
// A bare Read can short-return on some filesystems / on small
// files, which would skew the SQLite-header magic check below.
if _, err := io.ReadFull(f, buf); err != nil {
return nil, err
}
return buf, nil
}
// integrityCheck opens the candidate read-only and runs
// `PRAGMA integrity_check`. We use immutable=1 so the driver does not
// try to create WAL/SHM sidecars or upgrade the journal mode on the
// candidate — both of which fail with "attempt to write a readonly
// database" against a backup file. Anything other than the single row
// `"ok"` is treated as corruption.
func integrityCheck(path string) error {
db, err := sql.Open("sqlite", "file:"+path+"?mode=ro&immutable=1")
if err != nil {
return fmt.Errorf("open candidate: %w", err)
}
defer db.Close()
rows, err := db.Query("PRAGMA integrity_check")
if err != nil {
return fmt.Errorf("pragma integrity_check: %w", err)
}
defer rows.Close()
if !rows.Next() {
return fmt.Errorf("integrity_check returned no rows")
}
var result string
if err := rows.Scan(&result); err != nil {
return fmt.Errorf("scan integrity_check: %w", err)
}
if result != "ok" {
return fmt.Errorf("integrity_check: %s", result)
}
return nil
}
// AtomicReplaceDB writes a backup candidate into place atomically.
// The caller is expected to:
// 1. Call PrepareRestore(id) → candidatePath.
// 2. Take a "pre-restore" backup of the current DB via CreateBackup.
// 3. Close the live *sql.DB.
// 4. Call AtomicReplaceDB(candidatePath, livePath).
// 5. Trigger graceful shutdown; main() will re-open on next start.
//
// AtomicReplaceDB also wipes WAL/SHM sidecar files so the new DB starts
// from a clean checkpoint state. Failure to remove sidecars is logged
// but non-fatal — SQLite recreates them on open.
func (e *Engine) AtomicReplaceDB(candidatePath, livePath string) error {
// Copy candidate to a tmp file next to the live DB, then rename
// atomically. On Windows os.Rename across volumes fails, so we
// keep tmp on the same dir as the destination.
tmp := livePath + ".restore.tmp"
if err := copyFile(candidatePath, tmp); err != nil {
return fmt.Errorf("copy candidate to %s: %w", tmp, err)
}
// Best-effort: remove WAL/SHM so SQLite re-checkpoints from the
// restored main file rather than a stale WAL pointing at the old
// DB's pages.
for _, sidecar := range []string{livePath + "-wal", livePath + "-shm"} {
if err := os.Remove(sidecar); err != nil && !os.IsNotExist(err) {
slog.Warn("restore: remove sidecar", "path", sidecar, "error", err)
}
}
if err := os.Rename(tmp, livePath); err != nil {
// Clean up tmp on rename failure so we don't leak a partial file.
_ = os.Remove(tmp)
return fmt.Errorf("rename %s → %s: %w", tmp, livePath, err)
}
slog.Info("restore: database file replaced atomically", "live", livePath)
return nil
}
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
if err != nil {
return err
}
if _, err := io.Copy(out, in); err != nil {
_ = out.Close()
return err
}
return out.Close()
}
// Prune removes old backups exceeding the retention count.
// Returns the number of backups pruned.
func (e *Engine) Prune(retentionCount int) (int, error) {
if retentionCount <= 0 {
return 0, nil
}
count, err := e.store.CountBackups()
if err != nil {
return 0, fmt.Errorf("count backups: %w", err)
}
excess := count - retentionCount
if excess <= 0 {
return 0, nil
}
oldest, err := e.store.GetOldestBackups(excess)
if err != nil {
return 0, fmt.Errorf("get oldest backups: %w", err)
}
pruned := 0
for _, b := range oldest {
if err := e.DeleteBackup(b.ID); err != nil {
slog.Warn("prune: failed to delete backup", "id", b.ID, "error", err)
continue
}
pruned++
}
if pruned > 0 {
slog.Info("backups pruned", "pruned", pruned, "retention", retentionCount)
}
return pruned, nil
}
// CleanOrphans removes backup files in the backup directory that have no metadata record.
func (e *Engine) CleanOrphans() (int, error) {
entries, err := os.ReadDir(e.backupDir)
if err != nil {
return 0, fmt.Errorf("read backup directory: %w", err)
}
backups, err := e.store.ListBackups()
if err != nil {
return 0, fmt.Errorf("list backups: %w", err)
}
tracked := make(map[string]bool, len(backups))
for _, b := range backups {
tracked[b.Filename] = true
}
cleaned := 0
for _, entry := range entries {
if entry.IsDir() {
continue
}
if !tracked[entry.Name()] {
filePath := filepath.Join(e.backupDir, entry.Name())
if err := os.Remove(filePath); err != nil {
slog.Warn("clean orphan: failed to remove file", "file", entry.Name(), "error", err)
continue
}
slog.Info("removed orphaned backup file", "file", entry.Name())
cleaned++
}
}
return cleaned, nil
}