feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
+ {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
/apps/[id] edit form onto the same components (removes the duplication). Add
vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
label hints; dashboard + /apps "Total workloads" count only source_kind workloads
(drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.
Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
This commit is contained in:
+73
-32
@@ -1,7 +1,6 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -118,7 +117,22 @@ func (s *Server) deleteBackup(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// restoreBackup handles POST /api/backups/{id}/restore.
|
||||
// This replaces the current database with the backup and triggers a graceful shutdown.
|
||||
//
|
||||
// Restore happens in three documented stages so a failure at any stage
|
||||
// leaves the live DB intact:
|
||||
//
|
||||
// 1. PRE-FLIGHT (sync, before the HTTP response): PrepareRestore opens
|
||||
// the candidate read-only and runs `PRAGMA integrity_check`. If it
|
||||
// fails the live DB is untouched and we return 400 with the reason.
|
||||
//
|
||||
// 2. SAFETY NET: a pre-restore backup of the LIVE DB is created so the
|
||||
// operator can roll back even if the candidate is later discovered
|
||||
// to be missing data.
|
||||
//
|
||||
// 3. SWAP (async, after the response is flushed): close the live DB,
|
||||
// atomic-rename the candidate over the live path, wipe WAL/SHM,
|
||||
// trigger graceful shutdown. supervisord / systemd / docker
|
||||
// restart=on-failure brings the process back with the new DB.
|
||||
func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
if s.backupEngine == nil {
|
||||
respondError(w, http.StatusServiceUnavailable, "backup engine not initialized")
|
||||
@@ -126,13 +140,44 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
id := chi.URLParam(r, "id")
|
||||
restorePath, err := s.backupEngine.RestorePath(id)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusNotFound, "backup not found: "+err.Error())
|
||||
|
||||
// CSRF / accidental-fire guard: the restore endpoint is the most
|
||||
// destructive surface in the API (replaces the whole DB). Even
|
||||
// though it sits behind AdminOnly + Bearer JWT, a blind cross-site
|
||||
// POST or a misclicked button in any open admin tab can fire it.
|
||||
// Require the operator's client to echo X-Confirm-Restore: <id>
|
||||
// — matching the path param — so a CSRF post-form / image-src
|
||||
// trick can't trigger restore (browsers don't let cross-origin
|
||||
// requests set custom headers without a preflight).
|
||||
if confirm := r.Header.Get("X-Confirm-Restore"); confirm != id {
|
||||
respondError(w, http.StatusBadRequest,
|
||||
"missing or mismatched X-Confirm-Restore header (must equal backup id)")
|
||||
return
|
||||
}
|
||||
|
||||
// Create a safety backup before restore so the user can undo if needed.
|
||||
// Single-flight guard: a rapid double-click would otherwise spawn
|
||||
// two goroutines racing s.store.Close() and the candidate-over-
|
||||
// live rename. CAS to true here; if someone else won, return 409.
|
||||
if !s.restoreInFlight.CompareAndSwap(false, true) {
|
||||
respondError(w, http.StatusConflict, "a restore is already in progress")
|
||||
return
|
||||
}
|
||||
// Do NOT release the flag — the restore path triggers shutdown.
|
||||
// A failed restore is also terminal (the DB may be closed); a
|
||||
// fresh process boot is the recovery path.
|
||||
// PRE-FLIGHT: refuse before touching anything if the candidate is
|
||||
// not a valid SQLite database or fails integrity_check. This is the
|
||||
// guard the prior code lacked — a corrupt backup would silently
|
||||
// overwrite a healthy live DB.
|
||||
restorePath, err := s.backupEngine.PrepareRestore(id)
|
||||
if err != nil {
|
||||
respondError(w, http.StatusBadRequest, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// SAFETY NET: pre-restore snapshot of the live DB. A failure here
|
||||
// is logged but does not abort — the integrity-checked candidate
|
||||
// is still safer than refusing to restore.
|
||||
if _, err := s.backupEngine.CreateBackup("pre-restore"); err != nil {
|
||||
slog.Warn("failed to create pre-restore backup", "error", err)
|
||||
}
|
||||
@@ -153,41 +198,37 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||
go func() {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
|
||||
// Close the current database to release locks.
|
||||
// Once we begin closing the live DB the process can no longer serve
|
||||
// requests against a sane store, so EVERY exit path from here must
|
||||
// trigger shutdown. Returning early would leave the server limping
|
||||
// on a closed/half-swapped database with no path to recovery except
|
||||
// an external kill. shutdownFunc → graceful shutdown → main returns
|
||||
// → deferred releaseLock()/db.Close() run, and the supervisor reopens
|
||||
// whatever DB is on disk on the next boot.
|
||||
triggerShutdown := func() {
|
||||
if s.shutdownFunc != nil {
|
||||
s.shutdownFunc()
|
||||
}
|
||||
}
|
||||
|
||||
// Close the current database to release locks. AtomicReplaceDB
|
||||
// expects the live file to be unmapped before swap (especially
|
||||
// important on Windows where open files cannot be renamed over).
|
||||
if err := s.store.Close(); err != nil {
|
||||
slog.Error("restore: failed to close database", "error", err)
|
||||
slog.Error("restore: failed to close database, restarting", "error", err)
|
||||
triggerShutdown()
|
||||
return
|
||||
}
|
||||
|
||||
// Copy the backup file over the main database using streaming (no full read into memory).
|
||||
src, err := os.Open(restorePath)
|
||||
if err != nil {
|
||||
slog.Error("restore: failed to open backup file", "error", err)
|
||||
if err := s.backupEngine.AtomicReplaceDB(restorePath, s.dbPath); err != nil {
|
||||
slog.Error("restore: atomic replace failed, restarting", "error", err)
|
||||
triggerShutdown()
|
||||
return
|
||||
}
|
||||
defer src.Close()
|
||||
|
||||
dst, err := os.Create(s.dbPath)
|
||||
if err != nil {
|
||||
slog.Error("restore: failed to create database file", "error", err)
|
||||
return
|
||||
}
|
||||
defer dst.Close()
|
||||
|
||||
if _, err := io.Copy(dst, src); err != nil {
|
||||
slog.Error("restore: failed to copy backup to database", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Remove WAL and SHM files to ensure clean state.
|
||||
os.Remove(s.dbPath + "-wal")
|
||||
os.Remove(s.dbPath + "-shm")
|
||||
|
||||
slog.Info("restore: database replaced, triggering shutdown")
|
||||
|
||||
// Signal the server to shut down gracefully so it can be restarted.
|
||||
if s.shutdownFunc != nil {
|
||||
s.shutdownFunc()
|
||||
}
|
||||
triggerShutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user