feat(apps): stepped creation wizard, branch previews, and app-creation fixes

This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
This commit is contained in:
2026-05-29 02:09:54 +03:00
parent 956943edbb
commit 410a131cec
112 changed files with 13285 additions and 2765 deletions
@@ -444,22 +444,12 @@ func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg
}
// dispatchSiteNotification fires a site_sync_success or
// site_sync_failure event to the configured outbound webhook.
// Resolution: per-workload URL+secret first, then fall through to
// settings.notification_url/secret. Always best-effort.
// site_sync_failure event for the workload via the shared multi-route
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
// order (workload_notifications → legacy single URL → settings global)
// is identical to the dockerfile plugin's path so receivers see
// consistent fan-out behaviour across source kinds.
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
if deps.Notifier == nil {
return
}
settings, err := deps.Store.GetSettings()
if err != nil {
slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err)
return
}
url, secret, tier := resolveSiteTarget(w, settings)
if url == "" {
return
}
eventType := "site_sync_success"
if status == "failed" {
eventType = "site_sync_failure"
@@ -468,7 +458,7 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
if domain != "" {
siteURL = "https://" + domain
}
deps.Notifier.SendSigned(url, secret, tier, notify.Event{
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
@@ -476,16 +466,6 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
})
}
// resolveSiteTarget mirrors the legacy resolveSiteTarget helper but
// reads notification config off the workload row (where it now lives
// post-refactor) rather than the static_sites row.
func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) {
if w.NotificationURL != "" {
return w.NotificationURL, w.NotificationSecret, notify.TierSite
}
return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings
}
// publishEvent emits a static_site_status event on the bus AND
// persists an event_log row so the dashboard's audit trail picks it
// up. Message format ("Static site \"%s\": %s") is preserved verbatim
@@ -165,30 +165,42 @@ func TestContainerRowID_Deterministic(t *testing.T) {
}
}
func TestLockFor_ReturnsSameLockForSameWorkload(t *testing.T) {
// Suffix by t.Name() so the package-global saveLocks map cannot
// bleed key state between tests (or between -count=N runs).
func TestSaveLock_FreedWhenIdle(t *testing.T) {
// After the last holder releases, the reference-counted entry must be
// removed from the map so the lock table cannot grow without bound.
// Suffix by t.Name() so the package-global saveLocks map cannot bleed
// key state between tests (or between -count=N runs).
key := t.Name() + "-wid"
a := lockFor(key)
b := lockFor(key)
if a != b {
t.Fatalf("lockFor returned distinct locks for same workload: %p vs %p", a, b)
lk := acquireSaveLock(key)
saveLocks.mu.Lock()
_, present := saveLocks.locks[key]
saveLocks.mu.Unlock()
if !present {
t.Fatal("acquireSaveLock did not register the entry while held")
}
releaseSaveLock(key, lk)
saveLocks.mu.Lock()
_, stillPresent := saveLocks.locks[key]
saveLocks.mu.Unlock()
if stillPresent {
t.Fatal("releaseSaveLock left the entry behind after the last holder released")
}
}
func TestLockFor_ReturnsDistinctLocksForDifferentWorkloads(t *testing.T) {
a := lockFor(t.Name() + "-a")
b := lockFor(t.Name() + "-b")
if a == b {
t.Fatalf("lockFor returned same lock for different workloads: %p", a)
}
func TestSaveLock_DistinctWorkloadsDoNotSerialize(t *testing.T) {
// Two different workloads must be lockable at the same time. If they
// shared a mutex the second acquire would block forever (deadlock).
a := acquireSaveLock(t.Name() + "-a")
b := acquireSaveLock(t.Name() + "-b")
releaseSaveLock(t.Name()+"-b", b)
releaseSaveLock(t.Name()+"-a", a)
}
func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
// Two goroutines holding the same lock must run sequentially. The
// counter would race past 2 if locking were broken; with the lock,
// the increment is observed monotonically.
lk := lockFor(t.Name() + "-wid")
func TestSaveLock_SerializesConcurrentAcquisitions(t *testing.T) {
// Goroutines acquiring the same workload's lock must run sequentially.
// The counter would race past 1 if locking were broken; with the lock,
// peak in-flight stays at 1.
key := t.Name() + "-wid"
var (
wg sync.WaitGroup
mu sync.Mutex
@@ -199,8 +211,8 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(key)
defer releaseSaveLock(key, lk)
mu.Lock()
counter++
@@ -216,15 +228,15 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
}
wg.Wait()
if peak != 1 {
t.Fatalf("lockFor failed to serialize: peak in-flight = %d, want 1", peak)
t.Fatalf("acquireSaveLock failed to serialize: peak in-flight = %d, want 1", peak)
}
}
func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired in parallel must not panic on map
// access — exercises the outer-mutex protection inside lockFor.
// Each iteration uses a unique key so the test stresses the
// insertion path (the common case for "first deploy" callers).
func TestSaveLock_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired+released in parallel must not panic on map
// access — exercises the outer-mutex protection inside acquire/release.
// Each iteration uses a unique key so the test stresses the insertion +
// refcount-cleanup paths (the common case for "first deploy" callers).
prefix := t.Name() + "-"
var wg sync.WaitGroup
for i := 0; i < 50; i++ {
@@ -232,9 +244,9 @@ func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk := lockFor(prefix + strconv.Itoa(i))
lk.Lock()
lk.Unlock()
key := prefix + strconv.Itoa(i)
lk := acquireSaveLock(key)
releaseSaveLock(key, lk)
}()
}
wg.Wait()
+42 -14
View File
@@ -80,26 +80,55 @@ func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Contai
// container_id / proxy_route_id and orphaning Docker resources. The
// mutex caps the concurrency at 1 per workload; cross-workload
// parallelism is unaffected.
//
// Entries are reference-counted and removed only when the last holder
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
// use-after-delete hazard of deleting an entry on teardown: deleting a
// live entry while a concurrent saveState still holds (or is about to
// lock) it would let a fresh saveState mint a SECOND mutex for the same
// workload, losing the RMW serialization the lock exists to provide.
var saveLocks struct {
mu sync.Mutex
locks map[string]*sync.Mutex
locks map[string]*saveLock
}
// lockFor returns the per-workload mutex, creating it on first use.
// The outer mutex is held only briefly during map lookup; the returned
// per-workload lock is what callers actually contend on.
func lockFor(workloadID string) *sync.Mutex {
type saveLock struct {
mu sync.Mutex
refs int
}
// acquireSaveLock returns the per-workload lock (creating it on first use),
// registers this caller as a holder, and takes the lock. Pair with
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
// contend on the returned per-workload lock.
func acquireSaveLock(workloadID string) *saveLock {
saveLocks.mu.Lock()
defer saveLocks.mu.Unlock()
if saveLocks.locks == nil {
saveLocks.locks = map[string]*sync.Mutex{}
saveLocks.locks = map[string]*saveLock{}
}
m, ok := saveLocks.locks[workloadID]
l, ok := saveLocks.locks[workloadID]
if !ok {
m = &sync.Mutex{}
saveLocks.locks[workloadID] = m
l = &saveLock{}
saveLocks.locks[workloadID] = l
}
return m
l.refs++
saveLocks.mu.Unlock()
l.mu.Lock()
return l
}
// releaseSaveLock unlocks and drops the caller's reference, removing the map
// entry once no holders remain. Because refs is incremented under saveLocks.mu
// before the entry can be observed for deletion, an entry with a pending
// acquirer is never deleted.
func releaseSaveLock(workloadID string, l *saveLock) {
l.mu.Unlock()
saveLocks.mu.Lock()
l.refs--
if l.refs == 0 {
delete(saveLocks.locks, workloadID)
}
saveLocks.mu.Unlock()
}
// saveState upserts the container row, calling mutate so callers can
@@ -115,9 +144,8 @@ func lockFor(workloadID string) *sync.Mutex {
// Per-workload mutex serializes concurrent callers so two parallel
// Deploys can't read the same prior state and race their writes.
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
lk := lockFor(w.ID)
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(w.ID)
defer releaseSaveLock(w.ID, lk)
prev, prevRow, err := loadState(deps, w)
if err != nil {
@@ -185,14 +185,23 @@ func TestSaveState_RecoversFromInvalidExtraJSON(t *testing.T) {
deps, _ := testDeps(t)
w := plugin.Workload{ID: t.Name() + "-wid", Name: "site"}
// UpsertContainer now validates extra_json at the boundary, so this
// test seeds a valid row first and corrupts it via raw SQL to
// simulate a pre-existing bad row from an upgrade / external edit.
if err := deps.Store.UpsertContainer(store.Container{
ID: containerRowID(w),
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindSite),
Host: "local",
ExtraJSON: `{not json`,
ExtraJSON: `{}`,
}); err != nil {
t.Fatalf("seed bad row: %v", err)
t.Fatalf("seed row: %v", err)
}
if _, err := deps.Store.DB().Exec(
`UPDATE containers SET extra_json = ? WHERE id = ?`,
`{not json`, containerRowID(w),
); err != nil {
t.Fatalf("corrupt extra_json: %v", err)
}
err := saveState(deps, w, func(state *runtimeState, _ *store.Container) {
@@ -66,5 +66,8 @@ func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("static site: failed to delete container row", "site", w.Name, "error", err)
}
// The per-workload save-mutex is reference-counted (see state.go) and
// frees itself when the last holder releases, so teardown no longer
// deletes it explicitly — doing so could race a concurrent saveState.
return nil
}