410a131cec
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
+ {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
/apps/[id] edit form onto the same components (removes the duplication). Add
vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
label hints; dashboard + /apps "Total workloads" count only source_kind workloads
(drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.
Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
373 lines
11 KiB
Go
373 lines
11 KiB
Go
package backup
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
_ "modernc.org/sqlite" // read-only candidate inspection via PRAGMA integrity_check
|
|
|
|
"github.com/alexei/tinyforge/internal/store"
|
|
)
|
|
|
|
// Engine manages database backup operations.
|
|
type Engine struct {
|
|
mu sync.Mutex
|
|
store *store.Store
|
|
dbPath string
|
|
backupDir string
|
|
}
|
|
|
|
// New creates a new backup engine. It ensures the backup directory exists.
|
|
func New(st *store.Store, dbPath, dataDir string) (*Engine, error) {
|
|
backupDir := filepath.Join(dataDir, "backups")
|
|
if err := os.MkdirAll(backupDir, 0o755); err != nil {
|
|
return nil, fmt.Errorf("create backup directory: %w", err)
|
|
}
|
|
return &Engine{
|
|
store: st,
|
|
dbPath: dbPath,
|
|
backupDir: backupDir,
|
|
}, nil
|
|
}
|
|
|
|
// BackupDir returns the path to the backup directory.
|
|
func (e *Engine) BackupDir() string {
|
|
return e.backupDir
|
|
}
|
|
|
|
// CreateBackup creates a new database backup using VACUUM INTO.
|
|
// Returns the backup metadata record.
|
|
func (e *Engine) CreateBackup(backupType string) (store.Backup, error) {
|
|
// Validate backup type to prevent path traversal via filename.
|
|
switch backupType {
|
|
case "manual", "auto", "pre-restore", "pre-deploy":
|
|
// valid
|
|
default:
|
|
return store.Backup{}, fmt.Errorf("invalid backup type: %q", backupType)
|
|
}
|
|
|
|
e.mu.Lock()
|
|
defer e.mu.Unlock()
|
|
|
|
timestamp := time.Now().UTC().Format("20060102-150405")
|
|
filename := fmt.Sprintf("tinyforge-%s-%s.db", backupType, timestamp)
|
|
destPath := filepath.Join(e.backupDir, filename)
|
|
|
|
// VACUUM INTO creates a clean, standalone copy of the database.
|
|
// It is safe to use while the database is open and in WAL mode.
|
|
_, err := e.store.DB().Exec(`VACUUM INTO ?`, destPath)
|
|
if err != nil {
|
|
return store.Backup{}, fmt.Errorf("vacuum into %s: %w", destPath, err)
|
|
}
|
|
|
|
// Get file size.
|
|
info, err := os.Stat(destPath)
|
|
if err != nil {
|
|
return store.Backup{}, fmt.Errorf("stat backup file: %w", err)
|
|
}
|
|
|
|
// Store metadata.
|
|
backup, err := e.store.CreateBackup(store.Backup{
|
|
Filename: filename,
|
|
SizeBytes: info.Size(),
|
|
BackupType: backupType,
|
|
})
|
|
if err != nil {
|
|
// Best effort: remove the file if metadata insert fails.
|
|
os.Remove(destPath)
|
|
return store.Backup{}, fmt.Errorf("store backup metadata: %w", err)
|
|
}
|
|
|
|
slog.Info("backup created", "id", backup.ID, "filename", filename, "size", info.Size(), "type", backupType)
|
|
return backup, nil
|
|
}
|
|
|
|
// ListBackups returns all backup records.
|
|
func (e *Engine) ListBackups() ([]store.Backup, error) {
|
|
return e.store.ListBackups()
|
|
}
|
|
|
|
// GetBackup returns a single backup record.
|
|
func (e *Engine) GetBackup(id string) (store.Backup, error) {
|
|
return e.store.GetBackup(id)
|
|
}
|
|
|
|
// FilePath returns the full filesystem path for a backup.
|
|
func (e *Engine) FilePath(backup store.Backup) string {
|
|
return filepath.Join(e.backupDir, backup.Filename)
|
|
}
|
|
|
|
// DeleteBackup removes a backup file and its metadata record.
|
|
func (e *Engine) DeleteBackup(id string) error {
|
|
backup, err := e.store.GetBackup(id)
|
|
if err != nil {
|
|
return fmt.Errorf("get backup: %w", err)
|
|
}
|
|
|
|
// Remove file.
|
|
filePath := filepath.Join(e.backupDir, backup.Filename)
|
|
if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) {
|
|
return fmt.Errorf("remove backup file: %w", err)
|
|
}
|
|
|
|
// Remove metadata.
|
|
if err := e.store.DeleteBackup(id); err != nil {
|
|
return fmt.Errorf("delete backup metadata: %w", err)
|
|
}
|
|
|
|
slog.Info("backup deleted", "id", id, "filename", backup.Filename)
|
|
return nil
|
|
}
|
|
|
|
// RestorePath returns the path of a backup file for restore operations.
|
|
// The caller is responsible for actually replacing the database.
|
|
func (e *Engine) RestorePath(id string) (string, error) {
|
|
backup, err := e.store.GetBackup(id)
|
|
if err != nil {
|
|
return "", fmt.Errorf("get backup: %w", err)
|
|
}
|
|
|
|
// Filename comes from a DB row. Defence-in-depth: a backup file must live
|
|
// directly under backupDir, so reject any value carrying a path separator
|
|
// or traversal before joining. A poisoned row (future import path, manual
|
|
// insert) must never let restore read — and then atomically copy over the
|
|
// live DB — an arbitrary file. CreateBackup builds safe base names; this
|
|
// enforces the same invariant on read.
|
|
if backup.Filename == "" || backup.Filename == "." || backup.Filename == ".." ||
|
|
backup.Filename != filepath.Base(backup.Filename) {
|
|
return "", fmt.Errorf("backup: invalid filename %q", backup.Filename)
|
|
}
|
|
|
|
filePath := filepath.Join(e.backupDir, backup.Filename)
|
|
if _, err := os.Stat(filePath); err != nil {
|
|
return "", fmt.Errorf("backup file not found: %w", err)
|
|
}
|
|
|
|
return filePath, nil
|
|
}
|
|
|
|
// PrepareRestore validates a backup candidate before the caller swaps it
|
|
// over the live DB. Runs three checks in order:
|
|
//
|
|
// 1. The candidate file exists and is non-empty.
|
|
// 2. SQLite header magic matches (catches corrupted or partial downloads).
|
|
// 3. `PRAGMA integrity_check` against a temp copy returns "ok"
|
|
// (catches WAL/page corruption that the header check misses).
|
|
//
|
|
// On success returns the candidate path. On failure returns a wrapped
|
|
// error describing which probe rejected the file, so the operator can
|
|
// see exactly why a "restore" was refused rather than getting a corrupt
|
|
// DB at next boot.
|
|
//
|
|
// We use a *temp copy* for integrity_check because attaching the
|
|
// candidate read-only into the live process would still hold a file
|
|
// handle SQLite considers writable on Windows.
|
|
func (e *Engine) PrepareRestore(id string) (string, error) {
|
|
path, err := e.RestorePath(id)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return "", fmt.Errorf("restore: stat candidate: %w", err)
|
|
}
|
|
if info.Size() < 100 {
|
|
return "", fmt.Errorf("restore: candidate %s is suspiciously small (%d bytes)", path, info.Size())
|
|
}
|
|
|
|
// SQLite file header: "SQLite format 3\x00" (16 bytes).
|
|
hdr, err := readHead(path, 16)
|
|
if err != nil {
|
|
return "", fmt.Errorf("restore: read header: %w", err)
|
|
}
|
|
if string(hdr) != "SQLite format 3\x00" {
|
|
return "", fmt.Errorf("restore: candidate %s is not a SQLite database (header mismatch)", path)
|
|
}
|
|
|
|
if err := integrityCheck(path); err != nil {
|
|
return "", fmt.Errorf("restore: integrity check failed: %w", err)
|
|
}
|
|
|
|
return path, nil
|
|
}
|
|
|
|
func readHead(path string, n int) ([]byte, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
buf := make([]byte, n)
|
|
// io.ReadFull (not f.Read) guarantees the buffer is filled.
|
|
// A bare Read can short-return on some filesystems / on small
|
|
// files, which would skew the SQLite-header magic check below.
|
|
if _, err := io.ReadFull(f, buf); err != nil {
|
|
return nil, err
|
|
}
|
|
return buf, nil
|
|
}
|
|
|
|
// integrityCheck opens the candidate read-only and runs
|
|
// `PRAGMA integrity_check`. We use immutable=1 so the driver does not
|
|
// try to create WAL/SHM sidecars or upgrade the journal mode on the
|
|
// candidate — both of which fail with "attempt to write a readonly
|
|
// database" against a backup file. Anything other than the single row
|
|
// `"ok"` is treated as corruption.
|
|
func integrityCheck(path string) error {
|
|
db, err := sql.Open("sqlite", "file:"+path+"?mode=ro&immutable=1")
|
|
if err != nil {
|
|
return fmt.Errorf("open candidate: %w", err)
|
|
}
|
|
defer db.Close()
|
|
|
|
rows, err := db.Query("PRAGMA integrity_check")
|
|
if err != nil {
|
|
return fmt.Errorf("pragma integrity_check: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
if !rows.Next() {
|
|
return fmt.Errorf("integrity_check returned no rows")
|
|
}
|
|
var result string
|
|
if err := rows.Scan(&result); err != nil {
|
|
return fmt.Errorf("scan integrity_check: %w", err)
|
|
}
|
|
if result != "ok" {
|
|
return fmt.Errorf("integrity_check: %s", result)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AtomicReplaceDB writes a backup candidate into place atomically.
|
|
// The caller is expected to:
|
|
// 1. Call PrepareRestore(id) → candidatePath.
|
|
// 2. Take a "pre-restore" backup of the current DB via CreateBackup.
|
|
// 3. Close the live *sql.DB.
|
|
// 4. Call AtomicReplaceDB(candidatePath, livePath).
|
|
// 5. Trigger graceful shutdown; main() will re-open on next start.
|
|
//
|
|
// AtomicReplaceDB also wipes WAL/SHM sidecar files so the new DB starts
|
|
// from a clean checkpoint state. Failure to remove sidecars is logged
|
|
// but non-fatal — SQLite recreates them on open.
|
|
func (e *Engine) AtomicReplaceDB(candidatePath, livePath string) error {
|
|
// Copy candidate to a tmp file next to the live DB, then rename
|
|
// atomically. On Windows os.Rename across volumes fails, so we
|
|
// keep tmp on the same dir as the destination.
|
|
tmp := livePath + ".restore.tmp"
|
|
if err := copyFile(candidatePath, tmp); err != nil {
|
|
return fmt.Errorf("copy candidate to %s: %w", tmp, err)
|
|
}
|
|
// Best-effort: remove WAL/SHM so SQLite re-checkpoints from the
|
|
// restored main file rather than a stale WAL pointing at the old
|
|
// DB's pages.
|
|
for _, sidecar := range []string{livePath + "-wal", livePath + "-shm"} {
|
|
if err := os.Remove(sidecar); err != nil && !os.IsNotExist(err) {
|
|
slog.Warn("restore: remove sidecar", "path", sidecar, "error", err)
|
|
}
|
|
}
|
|
if err := os.Rename(tmp, livePath); err != nil {
|
|
// Clean up tmp on rename failure so we don't leak a partial file.
|
|
_ = os.Remove(tmp)
|
|
return fmt.Errorf("rename %s → %s: %w", tmp, livePath, err)
|
|
}
|
|
slog.Info("restore: database file replaced atomically", "live", livePath)
|
|
return nil
|
|
}
|
|
|
|
func copyFile(src, dst string) error {
|
|
in, err := os.Open(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer in.Close()
|
|
out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, err := io.Copy(out, in); err != nil {
|
|
_ = out.Close()
|
|
return err
|
|
}
|
|
return out.Close()
|
|
}
|
|
|
|
// Prune removes old backups exceeding the retention count.
|
|
// Returns the number of backups pruned.
|
|
func (e *Engine) Prune(retentionCount int) (int, error) {
|
|
if retentionCount <= 0 {
|
|
return 0, nil
|
|
}
|
|
|
|
count, err := e.store.CountBackups()
|
|
if err != nil {
|
|
return 0, fmt.Errorf("count backups: %w", err)
|
|
}
|
|
|
|
excess := count - retentionCount
|
|
if excess <= 0 {
|
|
return 0, nil
|
|
}
|
|
|
|
oldest, err := e.store.GetOldestBackups(excess)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("get oldest backups: %w", err)
|
|
}
|
|
|
|
pruned := 0
|
|
for _, b := range oldest {
|
|
if err := e.DeleteBackup(b.ID); err != nil {
|
|
slog.Warn("prune: failed to delete backup", "id", b.ID, "error", err)
|
|
continue
|
|
}
|
|
pruned++
|
|
}
|
|
|
|
if pruned > 0 {
|
|
slog.Info("backups pruned", "pruned", pruned, "retention", retentionCount)
|
|
}
|
|
return pruned, nil
|
|
}
|
|
|
|
// CleanOrphans removes backup files in the backup directory that have no metadata record.
|
|
func (e *Engine) CleanOrphans() (int, error) {
|
|
entries, err := os.ReadDir(e.backupDir)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("read backup directory: %w", err)
|
|
}
|
|
|
|
backups, err := e.store.ListBackups()
|
|
if err != nil {
|
|
return 0, fmt.Errorf("list backups: %w", err)
|
|
}
|
|
|
|
tracked := make(map[string]bool, len(backups))
|
|
for _, b := range backups {
|
|
tracked[b.Filename] = true
|
|
}
|
|
|
|
cleaned := 0
|
|
for _, entry := range entries {
|
|
if entry.IsDir() {
|
|
continue
|
|
}
|
|
if !tracked[entry.Name()] {
|
|
filePath := filepath.Join(e.backupDir, entry.Name())
|
|
if err := os.Remove(filePath); err != nil {
|
|
slog.Warn("clean orphan: failed to remove file", "file", entry.Name(), "error", err)
|
|
continue
|
|
}
|
|
slog.Info("removed orphaned backup file", "file", entry.Name())
|
|
cleaned++
|
|
}
|
|
}
|
|
return cleaned, nil
|
|
}
|