feat(apps): stepped creation wizard, branch previews, and app-creation fixes

This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
This commit is contained in:
2026-05-29 02:09:54 +03:00
parent 956943edbb
commit 410a131cec
112 changed files with 13285 additions and 2765 deletions
@@ -32,6 +32,23 @@ type Config struct {
type source struct{}
// composeRunner is the slice of stack.Compose this plugin actually
// drives. Defined locally per the "interfaces where they are used"
// idiom so the plugin can be unit-tested without a real docker compose
// binary. `*stack.Compose` satisfies it implicitly.
type composeRunner interface {
Up(ctx context.Context, projectName, yamlPath string) (string, error)
Down(ctx context.Context, projectName string, removeVolumes bool) (string, error)
Ps(ctx context.Context, projectName, yamlPath string) ([]stack.Service, error)
}
// newComposeRunner returns the runner the plugin should call. Tests
// swap this var with a fake; production code never touches it. The
// indirection costs one function-pointer dereference per Deploy /
// Teardown / Reconcile call — negligible against the docker compose
// exec it gates.
var newComposeRunner = func() composeRunner { return stack.NewCompose("") }
func init() { plugin.RegisterSource(&source{}) }
func (*source) Kind() string { return "compose" }
@@ -82,7 +99,7 @@ func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload,
return fmt.Errorf("compose source: write yaml: %w", err)
}
compose := stack.NewCompose("")
compose := newComposeRunner()
out, err := compose.Up(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose source: docker compose up: %w (output: %s)", err, truncate(out, 1024))
@@ -105,7 +122,7 @@ func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload
cfg, _ := plugin.SourceConfigOf[Config](w)
projectName := composeProjectName(cfg.ComposeProjectName, w)
compose := stack.NewCompose("")
compose := newComposeRunner()
if _, err := compose.Down(ctx, projectName, true); err != nil {
// Log but proceed — the DB rows must not be orphaned.
slog.Warn("compose source: docker compose down", "workload", w.ID, "error", err)
@@ -139,7 +156,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
projectName := composeProjectName(cfg.ComposeProjectName, w)
yamlPath, _ := writeYAMLIfChanged(w.ID, cfg.ComposeYAML)
compose := stack.NewCompose("")
compose := newComposeRunner()
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
// Likely no compose project running for this workload. Mark
@@ -162,7 +179,7 @@ func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workloa
// syncContainers shares its body with Reconcile minus the missing-row
// fallback — Deploy expects compose ps to succeed since `up` just ran.
func syncContainers(ctx context.Context, deps plugin.Deps, compose *stack.Compose, w plugin.Workload, projectName, yamlPath string) error {
func syncContainers(ctx context.Context, deps plugin.Deps, compose composeRunner, w plugin.Workload, projectName, yamlPath string) error {
services, err := compose.Ps(ctx, projectName, yamlPath)
if err != nil {
return fmt.Errorf("compose ps: %w", err)
@@ -204,7 +221,17 @@ var projectNameSanitizer = regexp.MustCompile(`[^a-z0-9_-]`)
func composeProjectName(explicit string, w plugin.Workload) string {
if explicit != "" {
return explicit
// Apply the same sanitizer to operator-supplied names so a value
// like "--foo" cannot reach the docker CLI and be re-parsed as a
// flag. Reuses the canonical lower+[^a-z0-9_-]→"-" + trim path.
san := strings.ToLower(explicit)
san = projectNameSanitizer.ReplaceAllString(san, "-")
san = strings.Trim(san, "-")
if san != "" {
return san
}
// Fall through to the derived name if sanitization stripped
// everything (operator passed e.g. "---" — degenerate input).
}
name := strings.ToLower(w.Name)
name = projectNameSanitizer.ReplaceAllString(name, "-")
@@ -0,0 +1,512 @@
package compose
import (
"context"
"encoding/json"
"errors"
"strings"
"sync"
"testing"
"github.com/alexei/tinyforge/internal/stack"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// fakeRunner stands in for *stack.Compose. Every method records its
// inputs and returns whatever the test set on the corresponding field.
// Defaults are happy-path: empty services from Ps, no error from Up /
// Down. Fields are slice-typed so a single fakeRunner can serve a
// sequence of calls (Deploy issues Up + Ps in order).
type fakeRunner struct {
mu sync.Mutex
upCalls []runnerCall
upOuts []string
upErrs []error
downCalls []runnerCall
downOuts []string
downErrs []error
psCalls []runnerCall
psResults [][]stack.Service
psErrs []error
upCallIdx int
psCallIdx int
downCallI int
}
type runnerCall struct {
ProjectName string
YAMLPath string
RemoveVolumes bool
}
func (f *fakeRunner) Up(_ context.Context, projectName, yamlPath string) (string, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.upCalls = append(f.upCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
out, err := pop(f.upOuts, f.upErrs, f.upCallIdx)
f.upCallIdx++
return out, err
}
func (f *fakeRunner) Down(_ context.Context, projectName string, removeVolumes bool) (string, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.downCalls = append(f.downCalls, runnerCall{ProjectName: projectName, RemoveVolumes: removeVolumes})
out, err := pop(f.downOuts, f.downErrs, f.downCallI)
f.downCallI++
return out, err
}
func (f *fakeRunner) Ps(_ context.Context, projectName, yamlPath string) ([]stack.Service, error) {
f.mu.Lock()
defer f.mu.Unlock()
f.psCalls = append(f.psCalls, runnerCall{ProjectName: projectName, YAMLPath: yamlPath})
idx := f.psCallIdx
f.psCallIdx++
var svcs []stack.Service
if idx < len(f.psResults) {
svcs = f.psResults[idx]
}
var err error
if idx < len(f.psErrs) {
err = f.psErrs[idx]
}
return svcs, err
}
// pop returns the nth element of outs/errs or zero values when n is
// past the end. Lets a test set a single expected response without
// padding slices for every other call.
func pop(outs []string, errs []error, n int) (string, error) {
var out string
if n < len(outs) {
out = outs[n]
}
var err error
if n < len(errs) {
err = errs[n]
}
return out, err
}
// withFakeRunner swaps newComposeRunner for the duration of one test
// and restores the original on cleanup. Tests that need to inspect the
// fake post-hoc keep the returned pointer.
func withFakeRunner(t *testing.T, f *fakeRunner) {
t.Helper()
orig := newComposeRunner
newComposeRunner = func() composeRunner { return f }
t.Cleanup(func() { newComposeRunner = orig })
}
func testStore(t *testing.T) *store.Store {
t.Helper()
st, err := store.New(":memory:")
if err != nil {
t.Fatalf("open store: %v", err)
}
t.Cleanup(func() { _ = st.Close() })
return st
}
// seedWorkload creates the parent workload row that container rows FK
// onto. Returns the workload's ID so callers can reuse it.
func seedWorkload(t *testing.T, st *store.Store, name, yamlText string) string {
t.Helper()
cfg := Config{ComposeYAML: yamlText}
body, err := json.Marshal(cfg)
if err != nil {
t.Fatalf("marshal config: %v", err)
}
w, err := st.CreateWorkload(store.Workload{
Kind: "plugin",
Name: name,
SourceKind: "compose",
SourceConfig: string(body),
})
if err != nil {
t.Fatalf("create workload: %v", err)
}
return w.ID
}
func TestDeploy_HappyPath(t *testing.T) {
withTempDir(t) // isolates the YAML scratch dir under t.TempDir()
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx:alpine\n"
wid := seedWorkload(t, deps.Store, "myapp", yamlText)
w := plugin.Workload{
ID: wid,
Name: "myapp",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "web", State: "running", Status: "Up 5 seconds"},
}},
}
withFakeRunner(t, fake)
src := &source{}
if err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{}); err != nil {
t.Fatalf("Deploy: %v", err)
}
// Up called exactly once with the workload-derived project name.
if len(fake.upCalls) != 1 {
t.Fatalf("Up called %d times, want 1", len(fake.upCalls))
}
if !strings.HasPrefix(fake.upCalls[0].ProjectName, "tf-myapp-") {
t.Errorf("Up projectName = %q, want prefix tf-myapp-", fake.upCalls[0].ProjectName)
}
if !strings.HasSuffix(fake.upCalls[0].YAMLPath, "compose.yml") {
t.Errorf("Up yamlPath = %q, want suffix compose.yml", fake.upCalls[0].YAMLPath)
}
// Ps follows Up to enumerate the resulting containers.
if len(fake.psCalls) != 1 {
t.Fatalf("Ps called %d times, want 1", len(fake.psCalls))
}
// Service row written.
row, err := deps.Store.GetContainerByID(wid + ":web")
if err != nil {
t.Fatalf("get container row: %v", err)
}
if row.WorkloadID != wid {
t.Errorf("row.WorkloadID = %q, want %q", row.WorkloadID, wid)
}
if row.Role != "web" {
t.Errorf("row.Role = %q, want %q", row.Role, "web")
}
if row.State != "running" {
t.Errorf("row.State = %q, want %q", row.State, "running")
}
}
func TestDeploy_EmptyYAMLConfig_RejectsBeforeExec(t *testing.T) {
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "empty", "services:\n web:\n image: x\n")
w := plugin.Workload{
ID: wid,
Name: "empty",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: ""}),
}
fake := &fakeRunner{}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy accepted empty compose_yaml")
}
if !strings.Contains(err.Error(), "empty compose_yaml") {
t.Errorf("error = %v, want substring \"empty compose_yaml\"", err)
}
if len(fake.upCalls) != 0 {
t.Errorf("Up should not have been called; got %d calls", len(fake.upCalls))
}
}
func TestDeploy_UpFailure_PropagatesAndIncludesTruncatedOutput(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: bad-image\n"
wid := seedWorkload(t, deps.Store, "fail", yamlText)
w := plugin.Workload{
ID: wid,
Name: "fail",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
bigOut := strings.Repeat("docker compose log noise ", 200) // > 1024 bytes
fake := &fakeRunner{
upOuts: []string{bigOut},
upErrs: []error{errors.New("exit status 1")},
}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy accepted Up failure")
}
if !strings.Contains(err.Error(), "docker compose up") {
t.Errorf("error = %v, want substring \"docker compose up\"", err)
}
if !strings.Contains(err.Error(), "exit status 1") {
t.Errorf("error = %v, want wrapped Up err", err)
}
if !strings.Contains(err.Error(), "(truncated)") {
t.Errorf("error = %v, want truncated-output marker", err)
}
// Ps must not be called when Up failed.
if len(fake.psCalls) != 0 {
t.Errorf("Ps called %d times after Up failure; want 0", len(fake.psCalls))
}
}
func TestDeploy_UpSucceedsButPsFails_SurfacesError(t *testing.T) {
// `up` succeeded but enumerate failed — Deploy must surface so the UI
// doesn't show an empty containers index for a running stack.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n"
wid := seedWorkload(t, deps.Store, "psfail", yamlText)
w := plugin.Workload{
ID: wid,
Name: "psfail",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
fake := &fakeRunner{
psErrs: []error{errors.New("compose ps boom")},
}
withFakeRunner(t, fake)
src := &source{}
err := src.Deploy(context.Background(), deps, w, plugin.DeploymentIntent{})
if err == nil {
t.Fatal("Deploy ignored Ps failure")
}
if !strings.Contains(err.Error(), "sync container rows") {
t.Errorf("error = %v, want substring \"sync container rows\"", err)
}
}
func TestTeardown_DropsContainerRows_EvenWhenDownFails(t *testing.T) {
// docker compose down failing must not orphan rows in the DB.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "tdown", "services:\n web:\n image: nginx\n")
// Seed two service rows the way Deploy would.
for _, role := range []string{"web", "db"} {
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":" + role,
WorkloadID: wid,
WorkloadKind: "compose",
Role: role,
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed container: %v", err)
}
}
fake := &fakeRunner{downErrs: []error{errors.New("compose project unknown")}}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "tdown",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
}
if err := src.Teardown(context.Background(), deps, w); err != nil {
t.Fatalf("Teardown: %v", err)
}
// Down requested removeVolumes=true (matches the docstring claim).
if len(fake.downCalls) != 1 {
t.Fatalf("Down calls = %d, want 1", len(fake.downCalls))
}
if !fake.downCalls[0].RemoveVolumes {
t.Errorf("Down removeVolumes = false, want true (workload teardown is destructive)")
}
// Rows gone despite the Down error.
for _, role := range []string{"web", "db"} {
if _, err := deps.Store.GetContainerByID(wid + ":" + role); !errors.Is(err, store.ErrNotFound) {
t.Errorf("container row %q survived teardown: err=%v", role, err)
}
}
}
func TestTeardown_HappyPath(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
wid := seedWorkload(t, deps.Store, "tdown2", "services:\n web:\n image: nginx\n")
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":web",
WorkloadID: wid,
WorkloadKind: "compose",
Role: "web",
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed: %v", err)
}
fake := &fakeRunner{}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "tdown2",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: "services:\n web:\n image: nginx\n"}),
}
if err := src.Teardown(context.Background(), deps, w); err != nil {
t.Fatalf("Teardown: %v", err)
}
if len(fake.downCalls) != 1 {
t.Errorf("Down calls = %d, want 1", len(fake.downCalls))
}
if _, err := deps.Store.GetContainerByID(wid + ":web"); !errors.Is(err, store.ErrNotFound) {
t.Errorf("container row survived teardown: err=%v", err)
}
}
func TestReconcile_PsSuccess_UpsertsRows(t *testing.T) {
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n db:\n image: postgres\n"
wid := seedWorkload(t, deps.Store, "rec", yamlText)
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "web", State: "running"},
{Service: "db", State: "running"},
}},
}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "rec",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile: %v", err)
}
for _, role := range []string{"web", "db"} {
row, err := deps.Store.GetContainerByID(wid + ":" + role)
if err != nil {
t.Errorf("row %q missing after reconcile: %v", role, err)
continue
}
if row.State != "running" {
t.Errorf("row %q state = %q, want \"running\"", role, row.State)
}
}
}
func TestReconcile_PsFailure_MarksExistingRowsMissing(t *testing.T) {
// When compose ps fails (project unknown to Docker), the reconciler
// flips existing rows to "missing" rather than deleting them — the UI
// surfaces the desync to the operator.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n web:\n image: nginx\n"
wid := seedWorkload(t, deps.Store, "missing", yamlText)
if err := deps.Store.UpsertContainer(store.Container{
ID: wid + ":web",
WorkloadID: wid,
WorkloadKind: "compose",
Role: "web",
Host: "local",
State: "running",
}); err != nil {
t.Fatalf("seed: %v", err)
}
fake := &fakeRunner{psErrs: []error{errors.New("no such project")}}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "missing",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile returned %v; should be nil even on Ps failure", err)
}
row, err := deps.Store.GetContainerByID(wid + ":web")
if err != nil {
t.Fatalf("row missing entirely (should be marked, not deleted): %v", err)
}
if row.State != "missing" {
t.Errorf("row.State = %q, want \"missing\"", row.State)
}
}
func TestReconcile_FallsBackToStatusWhenStateEmpty(t *testing.T) {
// Some compose versions populate Status (human string) but not State
// (enum) for non-running services. upsertServiceRow falls back to
// Status; verify that here.
withTempDir(t)
deps := plugin.Deps{Store: testStore(t)}
yamlText := "services:\n worker:\n image: alpine\n"
wid := seedWorkload(t, deps.Store, "fallback", yamlText)
fake := &fakeRunner{
psResults: [][]stack.Service{{
{Service: "worker", State: "", Status: "Exit 0"},
}},
}
withFakeRunner(t, fake)
src := &source{}
w := plugin.Workload{
ID: wid,
Name: "fallback",
SourceKind: "compose",
SourceConfig: mustMarshalConfig(t, Config{ComposeYAML: yamlText}),
}
if err := src.Reconcile(context.Background(), deps, w); err != nil {
t.Fatalf("Reconcile: %v", err)
}
row, err := deps.Store.GetContainerByID(wid + ":worker")
if err != nil {
t.Fatalf("get row: %v", err)
}
if row.State != "Exit 0" {
t.Errorf("row.State = %q, want \"Exit 0\" (Status fallback)", row.State)
}
}
// mustMarshalConfig is a small helper that converts a Config to the
// raw-JSON shape SourceConfig expects. Tests use it instead of
// hand-rolling the string so a Config field rename can't drift the test
// fixture from the production decoder.
func mustMarshalConfig(t *testing.T, cfg Config) json.RawMessage {
t.Helper()
b, err := json.Marshal(cfg)
if err != nil {
t.Fatalf("marshal config: %v", err)
}
return json.RawMessage(b)
}
// Compile-time guards: *stack.Compose must continue to satisfy
// composeRunner so the production path keeps building, and the fake
// must continue to satisfy it too so a drift in the interface shape
// fails the build here rather than at runtime.
var (
_ composeRunner = (*stack.Compose)(nil)
_ composeRunner = (*fakeRunner)(nil)
)
@@ -0,0 +1,574 @@
package dockerfile
import (
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"strconv"
"strings"
"time"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// healthCheckDelay is the grace window after StartContainer before we
// probe IsContainerRunning. Mirrors the static plugin's window — short
// enough not to slow happy-path deploys, long enough to catch
// crash-on-boot failures (missing env, bad CMD, port conflict).
const healthCheckDelay = 3 * time.Second
// deploy runs one end-to-end sync of a dockerfile workload:
//
// 1. fetch the latest commit SHA from the configured git provider
// 2. skip if SHA + container + proxy are all still healthy
// 3. clone the repo into a temp dir
// 4. resolve the build context + Dockerfile location
// 5. `docker build -t <tag> -f <dockerfile> <context>`
// 6. recreate the container with the new image
// 7. health-probe the container, surface logs on failure
// 8. reconfigure the proxy route
// 9. tear down the previous container (different ID) once we're sure
// the new one is healthy and proxied
//
// Each step writes its own status update so the dashboard's runtime-
// state panel can show a useful intermediate state when the deploy
// stalls on the slow step (almost always the build).
func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("dockerfile source: decode config: %w", err)
}
prev, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
// Force a full rebuild on manual / promote / first-time deploys
// (no Reason at all also implies manual). Schedule / git triggers
// honour the unchanged-SHA short-circuit so cron polling does not
// rebuild minute-by-minute when nothing changed.
force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote"
// Decrypt the access token if present. Token never escapes this
// frame: any error message routes through sanitizeError(_, token)
// which redacts the literal substring.
token := ""
if cfg.AccessToken != "" {
decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken)
if derr != nil {
slog.Warn("dockerfile source: failed to decrypt access token",
"workload", w.Name, "error", derr)
} else {
token = decrypted
}
}
provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create provider: %v", err), token))
return fmt.Errorf("create provider: %w", err)
}
latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token))
return fmt.Errorf("get latest commit: %w", err)
}
domain := primaryDomain(deps, w)
prevContainerID := ""
prevProxyRouteID := ""
if prevContainer != nil {
prevContainerID = prevContainer.ContainerID
prevProxyRouteID = prevContainer.ProxyRouteID
}
// Short-circuit: SHA unchanged AND container is still running AND
// (if there's a public face) the proxy route still exists. Manual
// deploys skip this entirely.
//
// We deliberately do NOT gate this on prev.Status == "deployed". A
// transient failure (e.g. a one-off proxy-check error) leaves the
// persisted status as "failed"; if we required "deployed" here, every
// subsequent cron/git poll with the same SHA would fall through to a
// full clone + docker build despite a perfectly healthy running
// container — a rebuild storm that burns CPU/disk until a new commit
// lands. Instead we trust the live container/proxy state and heal the
// stale status via healUnchanged.
if !force && latestSHA == prev.LastCommitSHA && prevContainerID != "" {
running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID)
switch {
case !running:
slog.Info("dockerfile: container not running, forcing redeploy", "workload", w.Name)
case domain != "":
proxyOK, perr := deps.Proxy.RouteExists(ctx, domain)
switch {
case perr != nil:
slog.Warn("dockerfile: proxy check failed, forcing redeploy",
"workload", w.Name, "error", perr)
case !proxyOK:
slog.Info("dockerfile: proxy route missing, forcing redeploy", "workload", w.Name)
default:
return healUnchanged(deps, w, prev, latestSHA)
}
default:
return healUnchanged(deps, w, prev, latestSHA)
}
}
updateStatus(deps, w, "syncing", prev.LastCommitSHA, "")
publishEvent(deps, w, "syncing")
// Clone the repo into a temp dir. We always download the entire
// repo tree (folderPath = ""); a ContextPath subset is applied
// at build time, not at download time, so a Dockerfile in
// `./docker/Dockerfile` with `ContextPath=""` still works.
cloneDir, err := os.MkdirTemp("", "tf-build-"+idShort(w)+"-*")
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create clone dir: %v", err), token))
return fmt.Errorf("create clone dir: %w", err)
}
defer os.RemoveAll(cloneDir)
if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, "", cloneDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("download repo: %v", err), token))
return fmt.Errorf("download repo: %w", err)
}
// Resolve the build context (with symlink-aware escape check) and
// verify the Dockerfile is actually present before sending the
// build off to the daemon.
contextDir, err := resolveContextDir(cloneDir, cfg.ContextPath)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("resolve context: %v", err), token))
return fmt.Errorf("resolve context: %w", err)
}
if err := verifyDockerfileExists(contextDir, cfg.DockerfilePath); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(err.Error(), token))
return err
}
imageTag := imageTagFor(w)
updateStatus(deps, w, "building", latestSHA, "")
publishEvent(deps, w, "building")
// Bridge per-line build output onto the event bus so /api/events
// subscribers (the dashboard's live tail) can show progress while
// the daemon chugs. The bus is non-blocking — slow subscribers drop
// events rather than backpressure the build — so this is safe to
// call from the hot scan loop.
logFn := func(line string) {
publishBuildLog(deps, w, line)
}
if err := deps.Docker.BuildImageAt(ctx, contextDir, cfg.DockerfilePath, imageTag, logFn); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("docker build: %v", err), token))
return fmt.Errorf("docker build: %w", err)
}
env := buildEnv(deps, w.ID)
containerPort := strconv.Itoa(cfg.Port)
settings, err := deps.Store.GetSettings()
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("get settings: %v", err), token))
return fmt.Errorf("get settings: %w", err)
}
networkName := settings.Network
networkID, err := deps.Docker.EnsureNetwork(ctx, networkName)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("ensure network: %v", err), token))
return fmt.Errorf("ensure network: %w", err)
}
containerName := containerNameFor(w)
// Per-face proxy labels (Traefik consumes these; NPM ignores them).
labels := map[string]string{}
if domain != "" {
if l := deps.Proxy.ContainerLabels(domain, cfg.Port); l != nil {
for k, v := range l {
labels[k] = v
}
}
}
cc := docker.ContainerConfig{
Name: containerName,
Image: imageTag,
Env: env,
ExposedPorts: []string{containerPort + "/tcp"},
NetworkName: networkName,
NetworkID: networkID,
Labels: labels,
WorkloadID: w.ID,
// Dockerfile workloads are tagged as "build" so the dashboard
// and any filtered query can distinguish them from static sites
// (which serve files) and image-source containers (which pull
// pre-built images from a registry).
WorkloadKind: string(store.WorkloadKindBuild),
Role: "",
}
containerID, err := deps.Docker.CreateContainer(ctx, cc)
if err != nil {
// Name conflict — best-effort cleanup of any prior container
// (by ID first; by name as a fallback) and one retry.
if prevContainerID != "" {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
removeContainerByName(ctx, deps, containerName)
containerID, err = deps.Docker.CreateContainer(ctx, cc)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create container: %v", err), token))
return fmt.Errorf("create container: %w", err)
}
}
if err := deps.Docker.StartContainer(ctx, containerID); err != nil {
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("start container: %v", err), token))
return fmt.Errorf("start container: %w", err)
}
// Brief health-check window — catch crash-on-boot. ctx-aware so a
// cancelled deploy returns promptly. On failure surface the tail
// of the container's logs as the error reason; that's almost
// always what the operator needs to debug.
select {
case <-ctx.Done():
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check")
return ctx.Err()
case <-time.After(healthCheckDelay):
}
running, runErr := deps.Docker.IsContainerRunning(ctx, containerID)
if runErr != nil || !running {
logMsg := "container exited immediately after start"
if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "40"); logErr == nil {
buf, _ := io.ReadAll(logs)
logs.Close()
if len(buf) > 0 {
// Pass `env` so any decrypted KEY=VALUE pair that the
// container's startup output happens to echo (think
// `RUN echo $DB_PASSWORD` in a debug Dockerfile) is
// redacted before it lands in the operator-visible
// last_error field.
logMsg = sanitizeErrorWithSecrets(string(buf), token, env)
}
}
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, logMsg)
return fmt.Errorf("container not running: %s", logMsg)
}
// Resolve proxy target: in-network DNS by default, NPM-remote
// override uses (settings.ServerIP, hostPort).
forwardHost := containerName
forwardPort := cfg.Port
if settings.NpmRemote && settings.ProxyProvider == "npm" {
if settings.ServerIP != "" {
hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp")
if hpErr != nil {
slog.Warn("dockerfile: could not get host port for remote NPM",
"workload", w.Name, "error", hpErr)
} else {
forwardHost = settings.ServerIP
forwardPort = int(hostPort)
}
}
}
// Configure proxy if a domain is set. Replace any prior route
// in-place so traffic shifts atomically over to the new container.
proxyRouteID := prevProxyRouteID
if domain != "" {
if prevProxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, prevProxyRouteID)
}
routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{
SSLCertificateID: settings.SSLCertificateID,
})
if rerr != nil {
slog.Warn("dockerfile: failed to configure proxy",
"workload", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr)
} else {
proxyRouteID = routeID
slog.Info("dockerfile: proxy configured",
"workload", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID)
}
}
// Drop the previous container only after the new one is healthy
// + routed. Different-ID-than-previous tells us we created a
// fresh one (vs returning the same ID via UpsertContainer reuse).
if prevContainerID != "" && prevContainerID != containerID {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
// Single transactional write of new state + container metadata.
// On failure: tear down the just-created container + proxy route
// so we don't leave orphans behind for the next deploy to trip
// over.
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.LastCommitSHA = latestSHA
rs.LastSyncAt = store.Now()
rs.LastError = ""
rs.Status = "deployed"
c.ContainerID = containerID
c.ProxyRouteID = proxyRouteID
c.Subdomain = domain
c.State = "running"
c.Port = cfg.Port
c.ImageRef = imageTag
}); err != nil {
slog.Error("dockerfile: failed to persist deploy state — rolling back",
"workload", w.Name, "error", err)
if proxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, proxyRouteID)
}
deps.Docker.StopContainer(ctx, containerID, 10)
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token))
return fmt.Errorf("persist deploy state: %w", err)
}
publishEvent(deps, w, "deployed")
dispatchBuildNotification(deps, w, domain, "deployed", "")
slog.Info("dockerfile deployed",
"workload", w.Name,
"sha", shortSHA(latestSHA),
"image", imageTag)
return nil
}
// updateStatus writes the runtime-state status/error/commit and (on
// terminal states) fires the side effects the static plugin's helper
// does: failures land in the event log, and a "deployed" or "failed"
// transition dispatches an outbound notification.
//
// The deploy success path calls saveState directly with the full
// container metadata; this helper covers failure / intermediate
// transitions where only state moves.
func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = status
rs.LastError = errMsg
if commitSHA != "" {
rs.LastCommitSHA = commitSHA
}
switch status {
case "deployed":
c.State = "running"
case "stopped":
c.State = "stopped"
case "failed":
c.State = "failed"
case "syncing", "building":
// Don't churn the container row's state during in-progress
// build/sync — leave whatever the previous deploy left.
}
}); err != nil {
slog.Error("dockerfile: failed to update status",
"id", w.ID, "status", status, "error", err)
}
if status == "failed" {
publishEvent(deps, w, "failed: "+errMsg)
}
if status == "deployed" || status == "failed" {
dispatchBuildNotification(deps, w, primaryDomain(deps, w), status, errMsg)
}
}
// dispatchBuildNotification fans the build event out to every
// configured notification route for the workload. Multi-destination
// fan-out (workload_notifications rows + legacy single URL + global
// settings fallback) is centralised in plugin.DispatchNotificationForWorkload
// so the routing rules are identical across source kinds.
func dispatchBuildNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
eventType := "build_success"
if status == "failed" {
eventType = "build_failure"
}
siteURL := ""
if domain != "" {
siteURL = "https://" + domain
}
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
Error: errMsg,
})
}
// publishEvent emits a status event on the bus AND persists an
// event_log row. Message shape mirrors the static plugin
// ("Build %q: %s") so the dashboard's audit feed reads consistently
// across both kinds.
func publishEvent(deps plugin.Deps, w plugin.Workload, status string) {
severity := "info"
if strings.HasPrefix(status, "failed") {
severity = "error"
}
message := fmt.Sprintf("Build %q: %s", w.Name, status)
metaBytes, err := json.Marshal(map[string]string{
"workload_id": w.ID,
"workload_name": w.Name,
"status": status,
})
if err != nil {
slog.Error("dockerfile: marshal event metadata", "error", err)
metaBytes = []byte("{}")
}
metadata := string(metaBytes)
evt, err := deps.Store.InsertEvent(store.EventLog{
Source: "dockerfile",
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
slog.Error("dockerfile: failed to persist event log", "error", err)
return
}
deps.Events.Publish(events.Event{
Type: events.EventLog,
Payload: events.EventLogPayload{
ID: evt.ID,
Source: "dockerfile",
Severity: severity,
Message: message,
Metadata: metadata,
CreatedAt: evt.CreatedAt,
},
})
}
// publishBuildLog emits one EventBuildLog per non-empty daemon "stream"
// line. The trailing newline the daemon emits per line is trimmed so the
// UI can render each event as its own row without smuggled blanks.
// Strictly best-effort: the bus drops events under backpressure (slow
// subscriber, no subscriber at all) and never blocks the build loop.
func publishBuildLog(deps plugin.Deps, w plugin.Workload, line string) {
trimmed := strings.TrimRight(line, "\r\n")
if trimmed == "" {
return
}
deps.Events.Publish(events.Event{
Type: events.EventBuildLog,
Payload: events.BuildLogPayload{
WorkloadID: w.ID,
Line: trimmed,
Stream: "stdout",
},
})
}
// healUnchanged is the no-rebuild short-circuit result: the SHA matches and
// the live container + proxy are healthy, so there is nothing to deploy. If a
// prior transient failure left the persisted status as something other than
// "deployed", repair it so the dashboard reflects reality and we stop treating
// a healthy workload as failed. We heal via saveState directly (NOT
// updateStatus) so this reconciliation does not fire a spurious build-success
// notification on every poll.
func healUnchanged(deps plugin.Deps, w plugin.Workload, prev runtimeState, latestSHA string) error {
slog.Info("dockerfile: no changes", "workload", w.Name, "sha", shortSHA(latestSHA))
if prev.Status == "deployed" {
return nil
}
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "deployed"
rs.LastError = ""
c.State = "running"
}); err != nil {
slog.Warn("dockerfile: failed to heal stale status to deployed",
"workload", w.Name, "error", err)
}
return nil
}
// removeContainerByName enumerates Docker's view and best-effort drops
// EVERY matching container so a name conflict in CreateContainer is
// recoverable. Container names are unique per daemon, but the recovery
// path exists precisely because a conflict occurred — a prior partial
// deploy can leave more than one matching artifact, so we must not stop
// at the first. Mirrors the static plugin's helper of the same name.
func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) {
containers, err := deps.Docker.ListContainers(ctx, nil)
if err != nil {
return
}
for _, c := range containers {
if c.Name == name {
deps.Docker.StopContainer(ctx, c.ID, 10)
deps.Docker.RemoveContainer(ctx, c.ID, true)
}
}
}
// primaryDomain mirrors the static plugin's helper of the same name —
// derives an FQDN from the workload's first enabled public face, with
// the same bare-subdomain + settings.Domain fall-through.
func primaryDomain(deps plugin.Deps, w plugin.Workload) string {
for _, f := range w.PublicFaces {
if f.Subdomain == "" && f.Domain == "" {
continue
}
switch {
case f.Subdomain != "" && f.Domain != "":
return f.Subdomain + "." + f.Domain
case f.Subdomain == "" && f.Domain != "":
return f.Domain
case f.Subdomain != "" && f.Domain == "":
settings, err := deps.Store.GetSettings()
if err != nil || settings.Domain == "" {
return f.Subdomain
}
return f.Subdomain + "." + settings.Domain
}
}
return ""
}
// shortSHA truncates a commit SHA for log lines. Keeps the deploy log
// readable without losing the "is this the same commit?" signal.
func shortSHA(sha string) string {
if len(sha) > 8 {
return sha[:8]
}
return sha
}
@@ -0,0 +1,131 @@
// Package dockerfile implements the "dockerfile" source: a git-repo-backed
// deployable that builds a Docker image from a user-supplied Dockerfile
// and runs one container. This is the "self-hosted Vercel" Source —
// users point at a Git repo containing a Dockerfile and Tinyforge
// handles clone → build → run → proxy in one shot, with no external CI
// pipeline.
//
// Architecturally the plugin sits between `static` (clones a Git repo,
// builds an image, runs one container) and `image` (richer runtime
// shape: ports, healthcheck, env, volumes). The deploy pipeline mirrors
// static — same git-fetch, same image-tag/container-name shape, same
// container-row state persistence — but the build step uses the
// operator's Dockerfile instead of generating one.
//
// The full pipeline is implemented inline in this package
// (deploy.go / teardown.go / reconcile.go) so a new dockerfile source
// kind is usable immediately on init() — no separate registration step
// in the deployer.
package dockerfile
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// Config is the per-workload source config blob. Mirrors the shape of
// the static plugin's Config so the UI wizard can largely reuse the
// existing Git-discovery + branch-picker + repo-picker components.
//
// Build-side fields:
//
// - DockerfilePath: path to the Dockerfile *within the context*
// directory. Defaults to "Dockerfile". Use e.g. "docker/Dockerfile"
// when the operator's repo keeps Dockerfiles in a subfolder.
// - ContextPath: subfolder of the cloned repo to use as the build
// context. Defaults to "" (repo root). Use e.g. "./api" when the
// repo's Dockerfile lives next to a backend service in a monorepo.
//
// Runtime-side fields:
//
// - Port: container port the workload listens on. Required.
// - Healthcheck: optional curl-style probe; empty disables.
//
// Env vars and volume mounts are handled out-of-band via the
// workload_env and workload_volumes tables, mirroring the image source.
type Config struct {
Provider string `json:"provider"` // "gitea" | "github" | "gitlab"; "" = autodetect
BaseURL string `json:"base_url"` // e.g. https://git.example.com
RepoOwner string `json:"repo_owner"`
RepoName string `json:"repo_name"`
Branch string `json:"branch"`
ContextPath string `json:"context_path"` // path within repo (root by default)
DockerfilePath string `json:"dockerfile_path"` // relative to context_path; "Dockerfile" by default
AccessToken string `json:"access_token"` // encrypted; optional for public repos
Port int `json:"port"`
Healthcheck string `json:"healthcheck,omitempty"`
}
type source struct{}
// Eager registration — the deploy pipeline lives entirely inside this
// package, so the kind is usable as soon as init() fires.
func init() { plugin.RegisterSource(&source{}) }
func (*source) Kind() string { return "dockerfile" }
func (*source) SchemaSample() any {
return Config{
Provider: "gitea",
BaseURL: "https://git.example.com",
RepoOwner: "owner",
RepoName: "myservice",
Branch: "main",
ContextPath: "",
DockerfilePath: "Dockerfile",
Port: 8080,
}
}
// Validate rejects obviously-malformed configs before the deploy
// pipeline materializes a temp dir, downloads a repo, and burns
// minutes of build time on input that was never going to work.
func (*source) Validate(cfg json.RawMessage) error {
var c Config
if len(cfg) == 0 {
return fmt.Errorf("dockerfile source: config is required")
}
if err := json.Unmarshal(cfg, &c); err != nil {
return fmt.Errorf("dockerfile source: invalid json: %w", err)
}
if strings.TrimSpace(c.RepoOwner) == "" || strings.TrimSpace(c.RepoName) == "" {
return fmt.Errorf("dockerfile source: repo_owner and repo_name are required")
}
if c.Port <= 0 || c.Port > 65535 {
return fmt.Errorf("dockerfile source: port must be between 1 and 65535 (got %d)", c.Port)
}
// Defense in depth: a leading "/" or any ".." segment in
// DockerfilePath / ContextPath would escape the build context. The
// plugin's deploy() does its own normalization too; rejecting here
// gives the operator a clear error at save-time instead of a
// confusing "no such file" mid-build.
for _, p := range []string{c.DockerfilePath, c.ContextPath} {
if p == "" {
continue
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("dockerfile source: %q must be relative", p)
}
if strings.Contains(p, "..") {
return fmt.Errorf("dockerfile source: %q must not contain '..'", p)
}
}
return nil
}
func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
return deploy(ctx, deps, w, intent)
}
func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
return teardown(ctx, deps, w)
}
func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
return reconcile(ctx, deps, w)
}
@@ -0,0 +1,288 @@
package dockerfile
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// ── Source interface plumbing ───────────────────────────────────────
func TestSource_Kind(t *testing.T) {
if (&source{}).Kind() != "dockerfile" {
t.Fatalf("Kind = %q, want \"dockerfile\"", (&source{}).Kind())
}
}
func TestSource_Registered_AtInit(t *testing.T) {
// init() runs once on import; we just verify the registry returns
// our concrete kind. A failure here is a regression of the global
// plugin.RegisterSource path or our package-level init.
got, err := plugin.GetSource("dockerfile")
if err != nil {
t.Fatalf("GetSource(dockerfile): %v", err)
}
if got.Kind() != "dockerfile" {
t.Fatalf("registered source has wrong kind: %q", got.Kind())
}
}
func TestSource_SchemaSample_RoundTrips(t *testing.T) {
s := (&source{}).SchemaSample()
raw, err := json.Marshal(s)
if err != nil {
t.Fatalf("marshal sample: %v", err)
}
if err := (&source{}).Validate(raw); err != nil {
t.Fatalf("Validate(sample) = %v, want nil", err)
}
}
// ── Validate ────────────────────────────────────────────────────────
func TestValidate_RejectsEmpty(t *testing.T) {
if err := (&source{}).Validate(nil); err == nil {
t.Fatal("expected error on empty config, got nil")
}
}
func TestValidate_RejectsMissingRepo(t *testing.T) {
cases := []Config{
{RepoName: "x", Port: 80}, // owner missing
{RepoOwner: "y", Port: 80}, // name missing
{RepoOwner: " ", RepoName: "x", Port: 80}, // owner whitespace-only
}
for i, c := range cases {
raw, _ := json.Marshal(c)
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("case %d: expected error, got nil", i)
}
}
}
func TestValidate_RejectsBadPort(t *testing.T) {
for _, port := range []int{0, -1, 70000} {
raw, _ := json.Marshal(Config{RepoOwner: "a", RepoName: "b", Port: port})
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("port %d: expected error, got nil", port)
}
}
}
func TestValidate_RejectsPathEscape(t *testing.T) {
cases := []Config{
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "/etc/passwd"},
{RepoOwner: "a", RepoName: "b", Port: 80, DockerfilePath: "../../etc/passwd"},
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "../../"},
{RepoOwner: "a", RepoName: "b", Port: 80, ContextPath: "/etc"},
}
for i, c := range cases {
raw, _ := json.Marshal(c)
if err := (&source{}).Validate(raw); err == nil {
t.Errorf("case %d: expected path-escape rejection, got nil", i)
}
}
}
func TestValidate_AcceptsValid(t *testing.T) {
raw, _ := json.Marshal(Config{
RepoOwner: "owner",
RepoName: "repo",
Port: 8080,
DockerfilePath: "docker/Dockerfile",
ContextPath: "services/api",
})
if err := (&source{}).Validate(raw); err != nil {
t.Fatalf("Validate(valid) = %v", err)
}
}
// ── Naming helpers ──────────────────────────────────────────────────
func TestNaming_SameNameDifferentIDs_NoCollision(t *testing.T) {
a := plugin.Workload{ID: "aaaaaaaa-rest", Name: "svc"}
b := plugin.Workload{ID: "bbbbbbbb-rest", Name: "svc"}
if containerNameFor(a) == containerNameFor(b) {
t.Errorf("container names collide: %q", containerNameFor(a))
}
if imageTagFor(a) == imageTagFor(b) {
t.Errorf("image tags collide: %q", imageTagFor(a))
}
}
func TestNaming_ShortIDsPassThrough(t *testing.T) {
w := plugin.Workload{ID: "abc", Name: "tiny"}
if !strings.HasSuffix(containerNameFor(w), "-abc") {
t.Errorf("container name lost short id: %q", containerNameFor(w))
}
}
// ── Context + Dockerfile resolution ─────────────────────────────────
func TestResolveContextDir_Empty_ReturnsRoot(t *testing.T) {
dir := t.TempDir()
got, err := resolveContextDir(dir, "")
if err != nil {
t.Fatalf("resolveContextDir: %v", err)
}
if real, _ := filepath.EvalSymlinks(dir); got != real && got != dir {
t.Errorf("got %q, want %q (or symlink-resolved equivalent)", got, dir)
}
}
func TestResolveContextDir_Subfolder_OK(t *testing.T) {
dir := t.TempDir()
sub := filepath.Join(dir, "api")
if err := os.MkdirAll(sub, 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
got, err := resolveContextDir(dir, "api")
if err != nil {
t.Fatalf("resolveContextDir: %v", err)
}
if !strings.HasSuffix(got, "api") {
t.Errorf("got %q, expected suffix 'api'", got)
}
}
func TestResolveContextDir_NonexistentSubfolder(t *testing.T) {
dir := t.TempDir()
if _, err := resolveContextDir(dir, "missing"); err == nil {
t.Fatal("expected error for missing subfolder")
}
}
func TestResolveContextDir_RejectsEscape(t *testing.T) {
dir := t.TempDir()
// resolveContextDir is the second wall — Validate is the first.
// We pass an absolute escape via a synthesized symlink. Even if
// the user bypasses Validate (e.g. by direct DB edit), this must
// still reject.
outside := t.TempDir()
link := filepath.Join(dir, "escape")
if err := os.Symlink(outside, link); err != nil {
t.Skipf("symlink unsupported in this environment: %v", err)
}
if _, err := resolveContextDir(dir, "escape"); err == nil {
t.Fatal("expected escape-path rejection")
}
}
func TestVerifyDockerfileExists_Present(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "Dockerfile"), []byte("FROM scratch\n"), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
if err := verifyDockerfileExists(dir, ""); err != nil {
t.Fatalf("verifyDockerfileExists(default) = %v, want nil", err)
}
}
func TestVerifyDockerfileExists_Missing(t *testing.T) {
dir := t.TempDir()
if err := verifyDockerfileExists(dir, ""); err == nil {
t.Fatal("expected error for missing Dockerfile")
}
}
func TestVerifyDockerfileExists_CustomPath(t *testing.T) {
dir := t.TempDir()
if err := os.MkdirAll(filepath.Join(dir, "docker"), 0o755); err != nil {
t.Fatalf("mkdir: %v", err)
}
if err := os.WriteFile(filepath.Join(dir, "docker", "Dockerfile.prod"), []byte("FROM scratch\n"), 0o644); err != nil {
t.Fatalf("write: %v", err)
}
if err := verifyDockerfileExists(dir, "docker/Dockerfile.prod"); err != nil {
t.Fatalf("verifyDockerfileExists(custom) = %v, want nil", err)
}
}
func TestVerifyDockerfileExists_RejectsAbsolutePath(t *testing.T) {
dir := t.TempDir()
if err := verifyDockerfileExists(dir, "/etc/passwd"); err == nil {
t.Fatal("expected error for absolute dockerfile path")
}
}
// ── Sanitiser ───────────────────────────────────────────────────────
func TestSanitizeError_RedactsToken(t *testing.T) {
tok := "ghp_supersecret"
got := sanitizeError("401 from gitea token="+tok+" ok", tok)
if strings.Contains(got, tok) {
t.Errorf("token leaked: %q", got)
}
if !strings.Contains(got, "[REDACTED]") {
t.Errorf("missing [REDACTED] marker: %q", got)
}
}
func TestSanitizeError_CollapsesWhitespace(t *testing.T) {
got := sanitizeError("a\nb\rc\td", "")
if strings.ContainsAny(got, "\n\r\t") {
t.Errorf("did not collapse: %q", got)
}
}
func TestSanitizeError_TruncatesUTF8Safe(t *testing.T) {
// 1000 copies of a 2-byte rune = 2000 bytes, well over the 240
// cap. Output must remain valid UTF-8 (no torn rune at the cap).
long := strings.Repeat("é", 1000)
got := sanitizeError(long, "")
if !strings.HasSuffix(got, "…") {
t.Errorf("missing ellipsis: %q", got)
}
// Walk the result: every byte should be either an ASCII char or
// part of a complete UTF-8 sequence. utf8.ValidString is the
// canonical guard but a simple "ends on rune boundary" check
// suffices for this fixture.
if !isValidUTF8Slice([]byte(got)) {
t.Errorf("truncation produced broken UTF-8: %q", got)
}
}
func isValidUTF8Slice(b []byte) bool {
for i := 0; i < len(b); {
switch {
case b[i] < 0x80:
i++
case b[i] < 0xC0:
return false // continuation byte at sequence start
case b[i] < 0xE0:
if i+1 >= len(b) {
return false
}
i += 2
case b[i] < 0xF0:
if i+2 >= len(b) {
return false
}
i += 3
default:
if i+3 >= len(b) {
return false
}
i += 4
}
}
return true
}
// ── State row ID ────────────────────────────────────────────────────
func TestContainerRowID_Deterministic(t *testing.T) {
w := plugin.Workload{ID: "abcd1234-rest"}
a := containerRowID(w)
b := containerRowID(w)
if a != b {
t.Errorf("containerRowID not deterministic: %q vs %q", a, b)
}
if !strings.HasSuffix(a, ":dockerfile") {
t.Errorf("containerRowID missing suffix: %q", a)
}
}
@@ -0,0 +1,37 @@
package dockerfile
import (
"log/slog"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// buildEnv flattens workload_env rows into the KEY=VALUE list Docker
// expects. Mirrors the static plugin's env helper exactly so the two
// plugins handle decrypt failures the same way: log + skip the one
// entry rather than fail the deploy. Bricking a build because one
// rotated key missed an env entry would be worse than running with
// the variable unset and a single warning in the operator's log.
func buildEnv(deps plugin.Deps, workloadID string) []string {
rows, err := deps.Store.ListWorkloadEnv(workloadID)
if err != nil {
slog.Warn("dockerfile source: list workload env", "workload", workloadID, "error", err)
return nil
}
out := make([]string, 0, len(rows))
for _, e := range rows {
value := e.Value
if e.Encrypted {
decrypted, err := crypto.Decrypt(deps.EncKey, e.Value)
if err != nil {
slog.Warn("dockerfile source: decrypt env value",
"workload", workloadID, "key", e.Key, "error", err)
continue
}
value = decrypted
}
out = append(out, e.Key+"="+value)
}
return out
}
@@ -0,0 +1,141 @@
package dockerfile
import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
)
// resolveContextDir picks the directory the Docker build context will
// be packed from, defensively. Returns an error rather than a directory
// outside the cloned tree even if ContextPath contains a tricky
// sequence — Validate already rejects ".." and leading "/", but
// EvalSymlinks here is the second wall.
//
// ctx may be "" (use cloneRoot as-is) or a relative subpath like
// "./api" or "services/api".
func resolveContextDir(cloneRoot, ctx string) (string, error) {
cloneRoot, err := filepath.Abs(cloneRoot)
if err != nil {
return "", fmt.Errorf("abs cloneRoot: %w", err)
}
if real, err := filepath.EvalSymlinks(cloneRoot); err == nil {
cloneRoot = real
}
if ctx == "" || ctx == "." || ctx == "./" {
return cloneRoot, nil
}
candidate := filepath.Join(cloneRoot, filepath.FromSlash(ctx))
candidate, err = filepath.Abs(candidate)
if err != nil {
return "", fmt.Errorf("abs candidate: %w", err)
}
// Resolve symlinks BEFORE the prefix check so a planted symlink
// inside the clone cannot escape the build context.
if real, err := filepath.EvalSymlinks(candidate); err == nil {
candidate = real
}
if candidate != cloneRoot && !strings.HasPrefix(candidate, cloneRoot+string(filepath.Separator)) {
return "", fmt.Errorf("context path %q escapes clone root", ctx)
}
info, err := os.Stat(candidate)
if err != nil {
return "", fmt.Errorf("stat context_path %q: %w", ctx, err)
}
if !info.IsDir() {
return "", fmt.Errorf("context_path %q is not a directory", ctx)
}
return candidate, nil
}
// verifyDockerfileExists checks that the named Dockerfile is present in
// the resolved context. Returns a focused error for the operator instead
// of letting the daemon error out with a less obvious message later.
//
// dockerfilePath is the value from Config.DockerfilePath — relative to
// the context dir, "Dockerfile" by default.
func verifyDockerfileExists(contextDir, dockerfilePath string) error {
if dockerfilePath == "" {
dockerfilePath = "Dockerfile"
}
if strings.HasPrefix(dockerfilePath, "/") || strings.Contains(dockerfilePath, "..") {
return fmt.Errorf("dockerfile_path %q must be relative and contain no '..'", dockerfilePath)
}
full := filepath.Join(contextDir, filepath.FromSlash(dockerfilePath))
info, err := os.Stat(full)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("Dockerfile not found at %s/%s", filepath.Base(contextDir), dockerfilePath)
}
return fmt.Errorf("stat Dockerfile %q: %w", dockerfilePath, err)
}
if info.IsDir() {
return fmt.Errorf("dockerfile_path %q points at a directory, not a file", dockerfilePath)
}
return nil
}
// sanitizeError clamps an error string before it lands in
// containers.extra_json (last_error) or echoes through an outbound
// notification webhook. Mirrors the static-plugin helper of the same
// name so both plugins agree on the surface area they expose to
// operators.
func sanitizeError(msg, accessToken string) string {
return sanitizeErrorWithSecrets(msg, accessToken, nil)
}
// sanitizeErrorWithSecrets is the dockerfile-plugin-specific extension:
// when capturing container build/runtime logs into last_error we ALSO
// need to redact decrypted env-var values, because a malicious or
// debug-laden Dockerfile can `RUN echo $SECRET` and land a runtime
// secret in operator-readable state via /api/workloads/{id}/runtime-state.
//
// envKV is the same []string the docker client receives — entries shaped
// "KEY=VALUE". We split on the first '=' and redact every non-empty
// VALUE longer than 3 chars (shorter values produce too many false-
// positive substring matches against words like "is" / "of").
func sanitizeErrorWithSecrets(msg, accessToken string, envKV []string) string {
if msg == "" {
return ""
}
if accessToken != "" {
msg = strings.ReplaceAll(msg, accessToken, "[REDACTED]")
}
for _, kv := range envKV {
eq := strings.IndexByte(kv, '=')
if eq < 0 {
continue
}
value := kv[eq+1:]
if len(value) < 4 {
continue
}
msg = strings.ReplaceAll(msg, value, "[REDACTED]")
}
msg = strings.Map(func(r rune) rune {
switch r {
case '\n', '\r', '\t':
return ' '
}
return r
}, msg)
const maxLen = 240
if len(msg) > maxLen {
// Rune-aware truncation: walk back to the previous rune
// boundary so multi-byte chars at the cap don't tear.
cut := maxLen
for cut > 0 && !isRuneStart(msg[cut]) {
cut--
}
msg = msg[:cut] + "…"
}
return msg
}
// isRuneStart reports whether b is a leading byte of a UTF-8 sequence.
// Used to walk back from a byte-offset cut to a rune boundary.
func isRuneStart(b byte) bool {
return b&0xC0 != 0x80
}
@@ -0,0 +1,32 @@
package dockerfile
import (
"fmt"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// idShort is the first 8 chars of the workload ID. Same shape as the
// static plugin — workload names are not UNIQUE in the schema, the ID
// short suffix is what keeps two same-named workloads from clobbering
// each other's container/image artifacts.
func idShort(w plugin.Workload) string {
if len(w.ID) < 8 {
return w.ID
}
return w.ID[:8]
}
// containerNameFor is the deterministic container name. Prefix `tf-build-`
// distinguishes a dockerfile-built container from `dw-site-` (static) and
// per-stage image names at a glance in `docker ps`.
func containerNameFor(w plugin.Workload) string {
return fmt.Sprintf("tf-build-%s-%s", w.Name, idShort(w))
}
// imageTagFor is the deterministic image tag the build step emits. Same
// shape as the container name so `docker images` shows the linkage at a
// glance.
func imageTagFor(w plugin.Workload) string {
return fmt.Sprintf("tf-build-%s-%s:latest", w.Name, idShort(w))
}
@@ -0,0 +1,72 @@
package dockerfile
import (
"context"
"log/slog"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// reconcile syncs the container row's state column with Docker reality
// for this workload's single container, and marks the runtime state as
// "failed" if the container is gone or has crashed. Same shape as the
// static plugin's reconcile — minimal, no automatic re-build on a
// missing container. The dashboard surfaces the failed status; the
// operator triggers redeploy explicitly.
//
// Auto-redeploy could be added later, but it should be gated on a
// per-workload toggle: a crash loop with auto-rebuild would burn CPU
// rebuilding the same broken commit forever.
func reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
st, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
if prevContainer == nil || prevContainer.ContainerID == "" {
return nil
}
running, err := deps.Docker.IsContainerRunning(ctx, prevContainer.ContainerID)
if err != nil {
// Most likely "no such container" — mark missing so the UI
// surfaces it; runtime status moves to "failed" so the
// dashboard and operator event triggers see the regression.
if uerr := deps.Store.UpdateContainerState(prevContainer.ID, "missing"); uerr != nil {
slog.Warn("dockerfile: mark missing", "workload", w.Name, "error", uerr)
}
if st.Status == "deployed" {
if uerr := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "failed"
rs.LastError = "container not found"
c.State = "missing"
}); uerr != nil {
slog.Warn("dockerfile: persist missing-state", "workload", w.Name, "error", uerr)
}
publishEvent(deps, w, "failed: container not found")
}
return nil
}
desired := "running"
if !running {
desired = "stopped"
}
if prevContainer.State != desired {
if err := deps.Store.UpdateContainerState(prevContainer.ID, desired); err != nil {
slog.Warn("dockerfile: state sync", "workload", w.Name, "error", err)
}
}
if !running && st.Status == "deployed" {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = "failed"
rs.LastError = "container stopped unexpectedly"
c.State = "stopped"
}); err != nil {
slog.Warn("dockerfile: persist crashed-state", "workload", w.Name, "error", err)
}
publishEvent(deps, w, "failed: container stopped unexpectedly")
}
return nil
}
@@ -0,0 +1,179 @@
package dockerfile
import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"sync"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// runtimeState is the per-workload state we persist inside the
// container row's extra_json blob. Mirrors the static plugin's
// runtimeState shape so anyone reading the DB can interpret the two
// kinds identically.
//
// LastImageDigest is the build's image ID — distinct from a registry
// digest (we never push) but useful for "did the build actually
// produce a different artifact?" diffing when we add caching later.
type runtimeState struct {
LastCommitSHA string `json:"last_commit_sha,omitempty"`
LastImageDigest string `json:"last_image_digest,omitempty"`
LastSyncAt string `json:"last_sync_at,omitempty"`
LastError string `json:"last_error,omitempty"`
Status string `json:"status,omitempty"`
}
// runtimeStateKeys lists every JSON field name owned by runtimeState.
// saveState strips these from the generic map before re-emitting so
// the typed values do not double-write under both their JSON tag and
// any subsequent extension's tag.
var runtimeStateKeys = []string{
"last_commit_sha", "last_image_digest", "last_sync_at", "last_error", "status",
}
// containerRowID is the deterministic container row ID. Stable across
// redeploys so saveState upserts in place.
func containerRowID(w plugin.Workload) string {
return w.ID + ":dockerfile"
}
// loadState returns the persisted runtime state plus the underlying
// container row. Both values are zero on first deploy.
func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Container, error) {
row, err := deps.Store.GetContainerByID(containerRowID(w))
if err != nil {
if errors.Is(err, store.ErrNotFound) {
return runtimeState{}, nil, nil
}
return runtimeState{}, nil, fmt.Errorf("dockerfile source: load state: %w", err)
}
st := runtimeState{}
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
if err := json.Unmarshal([]byte(row.ExtraJSON), &st); err != nil {
slog.Debug("dockerfile source: decode extra_json", "workload", w.ID, "error", err)
}
}
return st, &row, nil
}
// saveLocks serializes per-workload RMW of the container row. Same
// pattern as the static plugin — SQLite's MaxOpenConns=1 serializes
// statements but not the caller's read-then-write intent, so two
// concurrent deploys for the same workload could stomp each other's
// container_id / proxy_route_id without this mutex.
//
// Entries are reference-counted and removed only when the last holder
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
// use-after-delete hazard of deleting an entry on teardown: deleting a
// live entry while a concurrent saveState still holds (or is about to
// lock) it would let a fresh saveState mint a SECOND mutex for the same
// workload, losing the RMW serialization the lock exists to provide.
var saveLocks struct {
mu sync.Mutex
locks map[string]*saveLock
}
type saveLock struct {
mu sync.Mutex
refs int
}
// acquireSaveLock returns the per-workload lock (creating it on first use),
// registers this caller as a holder, and takes the lock. Pair with
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
// contend on the returned per-workload lock.
func acquireSaveLock(workloadID string) *saveLock {
saveLocks.mu.Lock()
if saveLocks.locks == nil {
saveLocks.locks = map[string]*saveLock{}
}
l, ok := saveLocks.locks[workloadID]
if !ok {
l = &saveLock{}
saveLocks.locks[workloadID] = l
}
l.refs++
saveLocks.mu.Unlock()
l.mu.Lock()
return l
}
// releaseSaveLock unlocks and drops the caller's reference, removing the map
// entry once no holders remain. Because refs is incremented under saveLocks.mu
// before the entry can be observed for deletion, an entry with a pending
// acquirer is never deleted.
func releaseSaveLock(workloadID string, l *saveLock) {
l.mu.Unlock()
saveLocks.mu.Lock()
l.refs--
if l.refs == 0 {
delete(saveLocks.locks, workloadID)
}
saveLocks.mu.Unlock()
}
// saveState upserts the container row, calling mutate so callers can
// adjust both the typed runtime state and the row's first-class fields
// in one transaction. Unknown keys in extra_json survive the round-trip
// so future writers can extend the blob without forcing this struct to
// grow.
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
lk := acquireSaveLock(w.ID)
defer releaseSaveLock(w.ID, lk)
prev, prevRow, err := loadState(deps, w)
if err != nil {
return err
}
row := store.Container{
ID: containerRowID(w),
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindBuild),
Host: "local",
}
if prevRow != nil {
row = *prevRow
}
generic := map[string]json.RawMessage{}
if row.ExtraJSON != "" && row.ExtraJSON != "{}" {
if err := json.Unmarshal([]byte(row.ExtraJSON), &generic); err != nil {
slog.Debug("dockerfile source: decode extra_json (generic)", "workload", w.ID, "error", err)
}
}
for _, k := range runtimeStateKeys {
delete(generic, k)
}
state := prev
mutate(&state, &row)
typedBytes, err := json.Marshal(state)
if err != nil {
return fmt.Errorf("dockerfile source: marshal state: %w", err)
}
typedMap := map[string]json.RawMessage{}
if err := json.Unmarshal(typedBytes, &typedMap); err != nil {
return fmt.Errorf("dockerfile source: re-decode typed state: %w", err)
}
for k, v := range typedMap {
generic[k] = v
}
merged, err := json.Marshal(generic)
if err != nil {
return fmt.Errorf("dockerfile source: marshal merged state: %w", err)
}
row.ExtraJSON = string(merged)
row.LastSeenAt = store.Now()
if err := deps.Store.UpsertContainer(row); err != nil {
return fmt.Errorf("dockerfile source: upsert container row: %w", err)
}
return nil
}
@@ -0,0 +1,51 @@
package dockerfile
import (
"context"
"errors"
"log/slog"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// teardown drops every artifact deploy created: the running container,
// the proxy route, the container index row. Idempotent — a workload
// that never deployed is a no-op.
//
// The built image tag is left in place: removing it would invalidate
// the docker build cache (next deploy of the same workload would
// rebuild from scratch). Operators can prune unused images via the
// existing Settings → Prune Images path.
func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
_, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
if prevContainer == nil {
return nil
}
// Proxy first so traffic stops landing on a container that is
// about to disappear.
if prevContainer.ProxyRouteID != "" {
if err := deps.Proxy.DeleteRoute(ctx, prevContainer.ProxyRouteID); err != nil {
slog.Warn("dockerfile: failed to remove proxy route", "workload", w.Name, "error", err)
}
}
if prevContainer.ContainerID != "" {
if err := deps.Docker.RemoveContainer(ctx, prevContainer.ContainerID, true); err != nil {
slog.Warn("dockerfile: failed to remove container", "workload", w.Name, "error", err)
}
}
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("dockerfile: failed to delete container row", "workload", w.Name, "error", err)
}
// The per-workload save-mutex is reference-counted (see state.go) and
// frees itself when the last holder releases, so teardown no longer
// deletes it explicitly — doing so could race a concurrent saveState
// and break the RMW serialization the lock provides.
return nil
}
@@ -444,22 +444,12 @@ func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg
}
// dispatchSiteNotification fires a site_sync_success or
// site_sync_failure event to the configured outbound webhook.
// Resolution: per-workload URL+secret first, then fall through to
// settings.notification_url/secret. Always best-effort.
// site_sync_failure event for the workload via the shared multi-route
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
// order (workload_notifications → legacy single URL → settings global)
// is identical to the dockerfile plugin's path so receivers see
// consistent fan-out behaviour across source kinds.
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
if deps.Notifier == nil {
return
}
settings, err := deps.Store.GetSettings()
if err != nil {
slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err)
return
}
url, secret, tier := resolveSiteTarget(w, settings)
if url == "" {
return
}
eventType := "site_sync_success"
if status == "failed" {
eventType = "site_sync_failure"
@@ -468,7 +458,7 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
if domain != "" {
siteURL = "https://" + domain
}
deps.Notifier.SendSigned(url, secret, tier, notify.Event{
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
@@ -476,16 +466,6 @@ func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, statu
})
}
// resolveSiteTarget mirrors the legacy resolveSiteTarget helper but
// reads notification config off the workload row (where it now lives
// post-refactor) rather than the static_sites row.
func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) {
if w.NotificationURL != "" {
return w.NotificationURL, w.NotificationSecret, notify.TierSite
}
return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings
}
// publishEvent emits a static_site_status event on the bus AND
// persists an event_log row so the dashboard's audit trail picks it
// up. Message format ("Static site \"%s\": %s") is preserved verbatim
@@ -165,30 +165,42 @@ func TestContainerRowID_Deterministic(t *testing.T) {
}
}
func TestLockFor_ReturnsSameLockForSameWorkload(t *testing.T) {
// Suffix by t.Name() so the package-global saveLocks map cannot
// bleed key state between tests (or between -count=N runs).
func TestSaveLock_FreedWhenIdle(t *testing.T) {
// After the last holder releases, the reference-counted entry must be
// removed from the map so the lock table cannot grow without bound.
// Suffix by t.Name() so the package-global saveLocks map cannot bleed
// key state between tests (or between -count=N runs).
key := t.Name() + "-wid"
a := lockFor(key)
b := lockFor(key)
if a != b {
t.Fatalf("lockFor returned distinct locks for same workload: %p vs %p", a, b)
lk := acquireSaveLock(key)
saveLocks.mu.Lock()
_, present := saveLocks.locks[key]
saveLocks.mu.Unlock()
if !present {
t.Fatal("acquireSaveLock did not register the entry while held")
}
releaseSaveLock(key, lk)
saveLocks.mu.Lock()
_, stillPresent := saveLocks.locks[key]
saveLocks.mu.Unlock()
if stillPresent {
t.Fatal("releaseSaveLock left the entry behind after the last holder released")
}
}
func TestLockFor_ReturnsDistinctLocksForDifferentWorkloads(t *testing.T) {
a := lockFor(t.Name() + "-a")
b := lockFor(t.Name() + "-b")
if a == b {
t.Fatalf("lockFor returned same lock for different workloads: %p", a)
}
func TestSaveLock_DistinctWorkloadsDoNotSerialize(t *testing.T) {
// Two different workloads must be lockable at the same time. If they
// shared a mutex the second acquire would block forever (deadlock).
a := acquireSaveLock(t.Name() + "-a")
b := acquireSaveLock(t.Name() + "-b")
releaseSaveLock(t.Name()+"-b", b)
releaseSaveLock(t.Name()+"-a", a)
}
func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
// Two goroutines holding the same lock must run sequentially. The
// counter would race past 2 if locking were broken; with the lock,
// the increment is observed monotonically.
lk := lockFor(t.Name() + "-wid")
func TestSaveLock_SerializesConcurrentAcquisitions(t *testing.T) {
// Goroutines acquiring the same workload's lock must run sequentially.
// The counter would race past 1 if locking were broken; with the lock,
// peak in-flight stays at 1.
key := t.Name() + "-wid"
var (
wg sync.WaitGroup
mu sync.Mutex
@@ -199,8 +211,8 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(key)
defer releaseSaveLock(key, lk)
mu.Lock()
counter++
@@ -216,15 +228,15 @@ func TestLockFor_SerializesConcurrentAcquisitions(t *testing.T) {
}
wg.Wait()
if peak != 1 {
t.Fatalf("lockFor failed to serialize: peak in-flight = %d, want 1", peak)
t.Fatalf("acquireSaveLock failed to serialize: peak in-flight = %d, want 1", peak)
}
}
func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired in parallel must not panic on map
// access — exercises the outer-mutex protection inside lockFor.
// Each iteration uses a unique key so the test stresses the
// insertion path (the common case for "first deploy" callers).
func TestSaveLock_ConcurrentMapAccessIsSafe(t *testing.T) {
// Distinct workloads acquired+released in parallel must not panic on map
// access — exercises the outer-mutex protection inside acquire/release.
// Each iteration uses a unique key so the test stresses the insertion +
// refcount-cleanup paths (the common case for "first deploy" callers).
prefix := t.Name() + "-"
var wg sync.WaitGroup
for i := 0; i < 50; i++ {
@@ -232,9 +244,9 @@ func TestLockFor_ConcurrentMapAccessIsSafe(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
lk := lockFor(prefix + strconv.Itoa(i))
lk.Lock()
lk.Unlock()
key := prefix + strconv.Itoa(i)
lk := acquireSaveLock(key)
releaseSaveLock(key, lk)
}()
}
wg.Wait()
+42 -14
View File
@@ -80,26 +80,55 @@ func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Contai
// container_id / proxy_route_id and orphaning Docker resources. The
// mutex caps the concurrency at 1 per workload; cross-workload
// parallelism is unaffected.
//
// Entries are reference-counted and removed only when the last holder
// releases. This bounds memory (no per-workload-ID leak) WITHOUT the
// use-after-delete hazard of deleting an entry on teardown: deleting a
// live entry while a concurrent saveState still holds (or is about to
// lock) it would let a fresh saveState mint a SECOND mutex for the same
// workload, losing the RMW serialization the lock exists to provide.
var saveLocks struct {
mu sync.Mutex
locks map[string]*sync.Mutex
locks map[string]*saveLock
}
// lockFor returns the per-workload mutex, creating it on first use.
// The outer mutex is held only briefly during map lookup; the returned
// per-workload lock is what callers actually contend on.
func lockFor(workloadID string) *sync.Mutex {
type saveLock struct {
mu sync.Mutex
refs int
}
// acquireSaveLock returns the per-workload lock (creating it on first use),
// registers this caller as a holder, and takes the lock. Pair with
// releaseSaveLock. The outer mutex is held only for the bookkeeping; callers
// contend on the returned per-workload lock.
func acquireSaveLock(workloadID string) *saveLock {
saveLocks.mu.Lock()
defer saveLocks.mu.Unlock()
if saveLocks.locks == nil {
saveLocks.locks = map[string]*sync.Mutex{}
saveLocks.locks = map[string]*saveLock{}
}
m, ok := saveLocks.locks[workloadID]
l, ok := saveLocks.locks[workloadID]
if !ok {
m = &sync.Mutex{}
saveLocks.locks[workloadID] = m
l = &saveLock{}
saveLocks.locks[workloadID] = l
}
return m
l.refs++
saveLocks.mu.Unlock()
l.mu.Lock()
return l
}
// releaseSaveLock unlocks and drops the caller's reference, removing the map
// entry once no holders remain. Because refs is incremented under saveLocks.mu
// before the entry can be observed for deletion, an entry with a pending
// acquirer is never deleted.
func releaseSaveLock(workloadID string, l *saveLock) {
l.mu.Unlock()
saveLocks.mu.Lock()
l.refs--
if l.refs == 0 {
delete(saveLocks.locks, workloadID)
}
saveLocks.mu.Unlock()
}
// saveState upserts the container row, calling mutate so callers can
@@ -115,9 +144,8 @@ func lockFor(workloadID string) *sync.Mutex {
// Per-workload mutex serializes concurrent callers so two parallel
// Deploys can't read the same prior state and race their writes.
func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error {
lk := lockFor(w.ID)
lk.Lock()
defer lk.Unlock()
lk := acquireSaveLock(w.ID)
defer releaseSaveLock(w.ID, lk)
prev, prevRow, err := loadState(deps, w)
if err != nil {
@@ -185,14 +185,23 @@ func TestSaveState_RecoversFromInvalidExtraJSON(t *testing.T) {
deps, _ := testDeps(t)
w := plugin.Workload{ID: t.Name() + "-wid", Name: "site"}
// UpsertContainer now validates extra_json at the boundary, so this
// test seeds a valid row first and corrupts it via raw SQL to
// simulate a pre-existing bad row from an upgrade / external edit.
if err := deps.Store.UpsertContainer(store.Container{
ID: containerRowID(w),
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindSite),
Host: "local",
ExtraJSON: `{not json`,
ExtraJSON: `{}`,
}); err != nil {
t.Fatalf("seed bad row: %v", err)
t.Fatalf("seed row: %v", err)
}
if _, err := deps.Store.DB().Exec(
`UPDATE containers SET extra_json = ? WHERE id = ?`,
`{not json`, containerRowID(w),
); err != nil {
t.Fatalf("corrupt extra_json: %v", err)
}
err := saveState(deps, w, func(state *runtimeState, _ *store.Container) {
@@ -66,5 +66,8 @@ func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error {
if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
slog.Warn("static site: failed to delete container row", "site", w.Name, "error", err)
}
// The per-workload save-mutex is reference-counted (see state.go) and
// frees itself when the last holder releases, so teardown no longer
// deletes it explicitly — doing so could race a concurrent saveState.
return nil
}