diff --git a/cmd/server/main.go b/cmd/server/main.go index 9bce3e1..a3d20fa 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -348,11 +348,11 @@ func main() { // Initialize static site manager and health checker. staticSiteMgr := staticsite.NewManager(db, dockerClient, proxyProvider, eventBus, notifier, encKey) webhookHandler.SetSiteSyncTriggerer(staticSiteMgr) - // Wire the plugin static source's backend to the manager. After this - // call the "static" kind appears in /api/hooks/kinds and the /apps/new - // picker; before it, the source registers no kind, so the frontend - // silently omits it. - wireStaticBackend(db, staticSiteMgr) + // The plugin static source registers itself eagerly in its init() + // now that the deploy pipeline is implemented inline (see + // internal/workload/plugin/source/static). The legacy Manager kept + // here keeps the /api/sites/* HTTP routes alive during the cutover + // window. staticSiteHealth := staticsite.NewHealthChecker(db, dockerClient, staticSiteMgr) if err := staticSiteHealth.Start("2m"); err != nil { slog.Warn("failed to start static site health checker", "error", err) diff --git a/cmd/server/static_backend.go b/cmd/server/static_backend.go deleted file mode 100644 index 52a1441..0000000 --- a/cmd/server/static_backend.go +++ /dev/null @@ -1,133 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/alexei/tinyforge/internal/staticsite" - "github.com/alexei/tinyforge/internal/store" - "github.com/alexei/tinyforge/internal/workload/plugin" - "github.com/alexei/tinyforge/internal/workload/plugin/source/static" -) - -// staticBackend is the bridge between the plugin static source and the -// existing staticsite.Manager. The Manager operates on store.StaticSite -// rows keyed by site ID; this adapter keeps a phantom static_sites row -// for every plugin-native static workload (row ID = workload ID) so the -// Manager's deploy pipeline runs unchanged. -// -// The phantom row carries no UI weight — the legacy /api/static_sites -// endpoints will still surface it during the cutover window, which is -// fine: it lets operators inspect state through the existing legacy UI -// until /apps grows the equivalent screens. When the legacy cutover -// finishes, we can rewrite the static source to operate against the -// containers table directly and drop this adapter. -type staticBackend struct { - store *store.Store - mgr *staticsite.Manager -} - -func newStaticBackend(st *store.Store, mgr *staticsite.Manager) *staticBackend { - return &staticBackend{store: st, mgr: mgr} -} - -func (b *staticBackend) Deploy(ctx context.Context, _ plugin.Deps, w plugin.Workload, _ plugin.DeploymentIntent) error { - cfg, err := plugin.SourceConfigOf[static.Config](w) - if err != nil { - return fmt.Errorf("static backend: decode config: %w", err) - } - site, err := b.syncPhantomSite(w, cfg) - if err != nil { - return err - } - return b.mgr.Deploy(ctx, site.ID, true /* force */) -} - -func (b *staticBackend) Teardown(ctx context.Context, _ plugin.Deps, w plugin.Workload) error { - // Stop best-effort (the row may not exist yet if Deploy never ran). - if _, err := b.store.GetStaticSiteByID(w.ID); err == nil { - if err := b.mgr.Stop(ctx, w.ID); err != nil { - // Log via the manager's own pipeline; we keep going so the - // phantom row is always dropped. - _ = err - } - _ = b.store.DeleteStaticSite(w.ID) - } - return nil -} - -func (b *staticBackend) Reconcile(_ context.Context, _ plugin.Deps, w plugin.Workload) error { - // The staticsite.HealthChecker already polls every site row; no - // per-tick work is needed here. Reconcile becomes a no-op until the - // inline port lands. - _ = w - return nil -} - -// syncPhantomSite upserts a store.StaticSite keyed on the workload ID, -// translating the plugin Config into the legacy shape. It is also where -// we shape the "single public face" expectation of the legacy table into -// a single domain string. -func (b *staticBackend) syncPhantomSite(w plugin.Workload, cfg static.Config) (store.StaticSite, error) { - domain := "" - for _, f := range w.PublicFaces { - // Pick the first enabled face. The API validator already caps - // faces at one for v1, but iterate defensively. - if f.Subdomain != "" || f.Domain != "" { - d := f.Domain - sub := f.Subdomain - switch { - case sub != "" && d != "": - domain = sub + "." + d - case sub == "" && d != "": - domain = d - case sub != "" && d == "": - // Domain falls back to settings.domain inside the - // Manager. Leave empty — Manager handles it. - domain = sub - } - break - } - } - - site := store.StaticSite{ - ID: w.ID, - Name: w.Name, - Provider: cfg.Provider, - GiteaURL: cfg.BaseURL, - RepoOwner: cfg.RepoOwner, - RepoName: cfg.RepoName, - Branch: cfg.Branch, - FolderPath: cfg.FolderPath, - AccessToken: cfg.AccessToken, - Domain: domain, - Mode: cfg.Mode, - RenderMarkdown: cfg.RenderMarkdown, - SyncTrigger: "manual", - StorageEnabled: cfg.StorageEnabled, - StorageLimitMB: cfg.StorageLimitMB, - NotificationURL: w.NotificationURL, - NotificationSecret: w.NotificationSecret, - WebhookSecret: w.WebhookSecret, - WebhookSigningSecret: w.WebhookSigningSecret, - WebhookRequireSignature: w.WebhookRequireSignature, - } - if err := b.store.UpsertStaticSiteWithID(site); err != nil { - return store.StaticSite{}, fmt.Errorf("static backend: sync phantom site: %w", err) - } - return site, nil -} - -// wireStaticBackend installs the adapter so the plugin static source -// becomes deployable. Called once from main() after the staticsite -// Manager is constructed. Safe to call multiple times only because -// static.SetBackend itself panics on the second call — keeping the -// invariant explicit. -func wireStaticBackend(st *store.Store, mgr *staticsite.Manager) { - static.SetBackend(newStaticBackend(st, mgr)) -} - -// Unused but kept so the json import is referenced if we ever need to -// inspect raw SourceConfig blobs here for debugging. -var _ = json.Marshal diff --git a/docs/WORKLOAD_REFACTOR_TODO.md b/docs/WORKLOAD_REFACTOR_TODO.md index af72d55..6d52a35 100644 --- a/docs/WORKLOAD_REFACTOR_TODO.md +++ b/docs/WORKLOAD_REFACTOR_TODO.md @@ -9,26 +9,30 @@ order. > ## Current focus (read this first) > -> **Triggers as first-class reusable entities — DONE** (2026-05-16). The -> trigger-split arc shipped end-to-end: `triggers` + `workload_trigger_bindings` -> tables, boot-time backfill, fan-out webhook handler at -> `/api/webhook/triggers/{secret}` with bounded concurrency, `/api/triggers` -> CRUD + `/api/bindings/{id}` + workload-side bind endpoints, full `/triggers` -> frontend (list, new, detail), workload-page bindings panel + per-binding -> override editor, i18n EN+RU. +> **Triggers as first-class reusable entities — DONE** (2026-05-16) and +> **Static source inline port — DONE** (2026-05-16). The phantom-row +> adapter (`cmd/server/static_backend.go`) is gone; the static plugin +> now operates directly on `plugin.Workload` + `containers` + +> `workload_env`, with runtime state (`last_commit_sha`, `last_sync_at`, +> `last_error`, `status`) carried in `containers.extra_json`. Provider +> downloads enforce path-traversal rejection, error strings are +> sanitized before persistence, and Docker resource names are suffixed +> with the workload ID short prefix to dodge name collisions. > -> **Next on Priority 1** is the **static source inline port** (~2150 LOC -> across 8 files; details in the section below). After that, the -> **hard legacy cutover** (drop `/api/projects`, `/api/stacks`, `/api/sites`, -> `/api/stages` + their tables and frontends) clears the deck. +> **Next on Priority 1** is the **hard legacy cutover** — drop +> `/api/projects`, `/api/stacks`, `/api/sites`, `/api/stages` handlers, +> drop their tables, delete `internal/stack/` + `internal/staticsite/` +> packages, delete frontend `/projects` / `/stacks` / `/sites` routes. +> The `internal/staticsite` package stays alive only for the legacy +> `/api/sites/*` HTTP routes — once those drop, it dies with them. ## Status at a glance | Item | Priority | Status | | ---- | -------- | ------ | | Triggers as first-class reusable entities | 1 | **DONE** (2026-05-16) | -| Static source inline port | 1 | **PENDING — current focus** | -| Hard legacy cutover | 1 | **PENDING** — gated by static port (volume scopes blocker is resolved) | +| Static source inline port | 1 | **DONE** (2026-05-16) | +| Hard legacy cutover | 1 | **PENDING — current focus** | | Generalized volume scopes | 2 | DONE | | Kind-aware editors (compose / image / static) | 2 | DONE | | Vendor-specific webhook parsing | 2 | DONE | @@ -148,39 +152,99 @@ replaced with `` to honor the project rule. typescript-reviewer subagents** — 0 CRITICAL; 5 HIGH and 4 MEDIUM findings addressed inline before merge. -### Static source inline port — ~2150 LOC across 8 files +### ~~Static source inline port~~ — DONE (2026-05-16) -The current `internal/workload/plugin/source/static/` delegates to -`staticsite.Manager` via a phantom-row adapter -(`cmd/server/static_backend.go`) that keeps a synthetic row in the legacy -`static_sites` table per workload. This works but blocks the hard cutover — -you can't drop `static_sites` until the adapter is gone. +The phantom-row adapter (`cmd/server/static_backend.go`) is deleted; the +static plugin now operates directly on `plugin.Workload`, the `containers` +table, and `workload_env`. The deploy pipeline body lives inline in +`internal/workload/plugin/source/static/{deploy,teardown,reconcile, +state,env,build,naming,static}.go`. -To port inline, the deploy pipeline body has to move into -`internal/workload/plugin/source/static/`: +**State migration:** the legacy `static_sites` columns +(`last_commit_sha`, `last_sync_at`, `last_error`, `status`, +`container_id`, `proxy_route_id`) are now persisted on the container +row keyed `:site` — deterministic ID, single row per +workload. First-class fields (`container_id`, `proxy_route_id`, +`subdomain`, `state`, `port`, `image_ref`) move into their dedicated +columns on the `containers` table; the rest live in +`containers.extra_json` via a typed `runtimeState` struct that +preserves unknown keys on round-trip (so future writers can extend +`extra_json` without forcing this struct to grow). `workload_env` +replaces `static_site_secrets` for plugin-native workloads. -| Source file | Lines | What to keep / port | -| --- | --- | --- | -| `internal/staticsite/manager.go` | 834 | Deploy / Stop / status pipeline. State should move to `containers` rows + `workload_env` instead of `static_sites`. | -| `internal/staticsite/gitea_content.go` | 360 | Keep as helper — Gitea content download/listing. | -| `internal/staticsite/github_provider.go` | 276 | Keep as helper. | -| `internal/staticsite/gitlab_provider.go` | 254 | Keep as helper. | -| `internal/staticsite/healthcheck.go` | 111 | Convert to plugin Reconcile body. | -| `internal/staticsite/markdown.go` | 83 | Keep as helper. | -| `internal/staticsite/provider.go` | 171 | Keep — provider abstraction. | -| `internal/staticsite/deno/` | (sub-pkg) | Keep — Dockerfile + router.ts codegen. | +**Reused helpers:** `internal/staticsite/{provider,gitea_content, +github_provider,gitlab_provider,markdown,deno}` stay alive (and +exported) as helpers — providers are still imported via +`staticsite.NewGitProvider`. The `staticsite.Manager` itself stays +alive only to service the legacy `/api/sites/*` HTTP routes; once +those drop in the cutover the package can be deleted entirely. -Estimated as its own dedicated turn (or two). Strategy: keep the provider -abstraction + helpers exported; rewrite only `Manager.Deploy` body into a new -`source/static/deploy.go` that operates against `plugin.Workload` directly and -writes container rows + workload_env rather than the `static_sites` table. +**Hardening landed alongside the port** (from `go-reviewer` + +`security-reviewer` subagent passes — 1 CRITICAL, 5 HIGH, 3 MEDIUM +addressed before merge): + +- **Path-traversal defense:** providers (`gitea_content.go`, + `github_provider.go`, `gitlab_provider.go`) reject any tree entry + whose resolved local path escapes `destDir`; the static plugin's + `verifyDownloadInsideRoot` walks the build dir post-download as a + second line of defense; `copyDir` uses `filepath.WalkDir` + `Lstat` + to refuse symlinks and non-regular files. +- **Error sanitization:** a `sanitizeError` helper redacts the + decrypted access token, collapses to one line, and clamps to 240 + bytes before any error string lands in `runtimeState.LastError` + (persisted in `extra_json`) or fans out to the notification + webhook. +- **Resource naming with workload-ID short suffix:** container, + image, and storage volume names all carry `idShort(w)` so two + workloads sharing a name can't clobber each other's resources + (workload `name` is not UNIQUE in the schema). +- **Per-workload mutex on `saveState`:** serializes the read-modify- + write of `containers.extra_json` so two parallel deploys for the + same workload can't race to clobber each other's + `container_id` / `proxy_route_id`. +- **`saveState` failure on the success path is fatal:** rolls back + the just-created container + proxy route and writes a "failed" + state, so we don't leak a running container with no row pointing + at it. +- **`primaryDomain` reads `settings.Domain`** to complete a bare + subdomain face into a full FQDN (matches legacy Manager behavior). +- **`time.Sleep` honors `ctx.Done()`** during the post-start health + window. +- **`json.Marshal` for event metadata + `strings.HasPrefix` for + failed-status detection** — replaces the prior fmt.Sprintf JSON + template + brittle slice expression. + +**Touch points (final):** + +- `internal/workload/plugin/source/static/{static,deploy,teardown, + reconcile,state,env,build,naming}.go` — the inline plugin. +- `internal/staticsite/{gitea_content,github_provider, + gitlab_provider}.go` — added the path-traversal guards. +- `cmd/server/main.go` — `wireStaticBackend(...)` call removed; the + existing blank import on `_ "internal/workload/plugin/source/ + static"` now drives `init()` registration. +- `cmd/server/static_backend.go` — deleted. + +**Behavioral notes for operators:** + +- Plugin-native static workloads no longer write to the `static_sites` + table at all — anything querying that table for plugin-native + workloads (operator dashboards, ad-hoc SQL) sees stale or absent + values. The legacy `/api/sites/*` routes still serve original rows + unchanged. +- Container labels `tinyforge.static-site` / `tinyforge.static-site-name` + are no longer set on plugin-native deploys; the canonical + `tinyforge.workload.id` / `.kind` labels (added by + `docker.ContainerConfig`) cover ownership. +- Container, image, and volume names all gained an 8-char ID suffix + (e.g. `dw-site-mysite-a1b2c3d4`). Existing legacy-deployed sites + keep their old `dw-site-mysite` shape until they're redeployed + through the plugin path. ### Hard legacy cutover -Sole remaining blocker is the static source inline port above. The -generalized-volume-scopes blocker is resolved (legacy `ResolvePath` -stays in place for legacy callers and dies with the cutover). When the -static port lands: +The static-source inline port (above) is now complete; the cutover is +unblocked. Proceeding with the cutover means: - Delete `/api/projects`, `/api/stacks`, `/api/sites`, `/api/stages` handlers. - Drop tables: `projects`, `stages`, `stacks`, `stack_revisions`, diff --git a/internal/staticsite/gitea_content.go b/internal/staticsite/gitea_content.go index df1b1b2..c1072e9 100644 --- a/internal/staticsite/gitea_content.go +++ b/internal/staticsite/gitea_content.go @@ -268,6 +268,17 @@ func (f *GiteaContentFetcher) DownloadFolder(ctx context.Context, owner, repo, b relativePath := strings.TrimPrefix(entry.Path, prefix) localPath := filepath.Join(destDir, filepath.FromSlash(relativePath)) + // Path-traversal defense: reject anything whose resolved + // destination escapes destDir. A hostile (or compromised) + // Gitea instance could return tree entries with `..` in + // the path; filepath.Join cleans them and would otherwise + // write outside the build context. + cleanDest := filepath.Clean(destDir) + if cleanRel := filepath.Clean(localPath); cleanRel != cleanDest && + !strings.HasPrefix(cleanRel, cleanDest+string(os.PathSeparator)) { + return fmt.Errorf("rejecting tree entry outside dest: %s", relativePath) + } + // Create parent directories. if err := os.MkdirAll(filepath.Dir(localPath), 0o755); err != nil { return fmt.Errorf("create directory for %s: %w", relativePath, err) diff --git a/internal/staticsite/github_provider.go b/internal/staticsite/github_provider.go index 7916dc0..6bed971 100644 --- a/internal/staticsite/github_provider.go +++ b/internal/staticsite/github_provider.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "net/http" + "os" "path/filepath" "strings" "time" @@ -221,6 +222,15 @@ func (g *GitHubProvider) DownloadFolder(ctx context.Context, owner, repo, branch relativePath := strings.TrimPrefix(entry.Path, prefix) localPath := filepath.Join(destDir, filepath.FromSlash(relativePath)) + // Path-traversal defense: refuse tree entries whose resolved + // path escapes destDir. A hostile/compromised GHE could + // otherwise deliver `..`-laden entries. + cleanDest := filepath.Clean(destDir) + if cleanRel := filepath.Clean(localPath); cleanRel != cleanDest && + !strings.HasPrefix(cleanRel, cleanDest+string(os.PathSeparator)) { + return fmt.Errorf("rejecting tree entry outside dest: %s", relativePath) + } + // GitHub raw content URL. // For github.com: https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{path} // For GHE: {baseURL}/{owner}/{repo}/raw/{branch}/{path} diff --git a/internal/staticsite/gitlab_provider.go b/internal/staticsite/gitlab_provider.go index 8799fc8..ad0308e 100644 --- a/internal/staticsite/gitlab_provider.go +++ b/internal/staticsite/gitlab_provider.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "net/url" + "os" "path/filepath" "strings" "time" @@ -208,6 +209,15 @@ func (g *GitLabProvider) DownloadFolder(ctx context.Context, owner, repo, branch relativePath := strings.TrimPrefix(entry.Path, prefix) localPath := filepath.Join(destDir, filepath.FromSlash(relativePath)) + // Path-traversal defense: reject tree entries whose resolved + // path escapes destDir (e.g. `../etc/passwd` smuggled through + // a hostile self-hosted GitLab). + cleanDest := filepath.Clean(destDir) + if cleanRel := filepath.Clean(localPath); cleanRel != cleanDest && + !strings.HasPrefix(cleanRel, cleanDest+string(os.PathSeparator)) { + return fmt.Errorf("rejecting tree entry outside dest: %s", relativePath) + } + // GitLab raw file URL: {base}/{owner}/{repo}/-/raw/{branch}/{path} fileURL := fmt.Sprintf("%s/%s/%s/-/raw/%s/%s", g.rawBase, owner, repo, branch, entry.Path) diff --git a/internal/workload/plugin/source/static/build.go b/internal/workload/plugin/source/static/build.go new file mode 100644 index 0000000..9868e05 --- /dev/null +++ b/internal/workload/plugin/source/static/build.go @@ -0,0 +1,149 @@ +package static + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/alexei/tinyforge/internal/staticsite/deno" +) + +// prepareDenoBuild assembles the Deno container build context: api/ +// becomes the routes directory, every other file lands under public/, +// and a generated router.ts + Dockerfile finishes the context. +// +// Ported verbatim from internal/staticsite/manager.go so the legacy and +// plugin-native paths produce byte-identical containers from the same +// repo content. Fall back to copy when os.Rename hits EXDEV (cross- +// device) — the build dir and context dir live under the same temp +// root in production but tests may straddle filesystems. +func prepareDenoBuild(srcDir, contextDir string) error { + apiSrc := filepath.Join(srcDir, "api") + apiDst := filepath.Join(contextDir, "api") + if err := os.Rename(apiSrc, apiDst); err != nil { + return fmt.Errorf("move api dir: %w", err) + } + + publicDir := filepath.Join(contextDir, "public") + if err := os.Rename(srcDir, publicDir); err != nil { + // EXDEV (cross-device) — fall back to copy. + if err := copyDir(srcDir, publicDir); err != nil { + return fmt.Errorf("copy public dir: %w", err) + } + } + + routes, err := deno.ScanRoutes(apiDst) + if err != nil { + return fmt.Errorf("scan routes: %w", err) + } + + routerSrc, err := deno.GenerateRouter(routes) + if err != nil { + return fmt.Errorf("generate router: %w", err) + } + + if err := os.WriteFile(filepath.Join(contextDir, "router.ts"), []byte(routerSrc), 0o644); err != nil { + return fmt.Errorf("write router.ts: %w", err) + } + + dockerfile := deno.GenerateDockerfile() + if err := os.WriteFile(filepath.Join(contextDir, "Dockerfile"), []byte(dockerfile), 0o644); err != nil { + return fmt.Errorf("write Dockerfile: %w", err) + } + + return nil +} + +// prepareStaticBuild assembles the nginx container build context: copy +// every file in srcDir into contextDir and emit the static Dockerfile. +func prepareStaticBuild(srcDir, contextDir string) error { + if err := copyDir(srcDir, contextDir); err != nil { + return fmt.Errorf("copy files: %w", err) + } + + dockerfile := deno.GenerateStaticDockerfile() + if err := os.WriteFile(filepath.Join(contextDir, "Dockerfile"), []byte(dockerfile), 0o644); err != nil { + return fmt.Errorf("write Dockerfile: %w", err) + } + + return nil +} + +// copyDir recursively copies a directory tree, preserving file modes. +// +// Defense in depth against attacker-controlled provider responses: +// uses Lstat (via filepath.WalkDir + d.Type()) so symlinks are +// rejected outright instead of dereferenced — a hostile repo could +// otherwise drop a symlink that copyDir would chase outside the +// build context (or through a dangling chain at build-time). Also +// rejects entries whose resolved destination escapes dst. +func copyDir(src, dst string) error { + cleanSrc := filepath.Clean(src) + cleanDst := filepath.Clean(dst) + return filepath.WalkDir(cleanSrc, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + + // Reject anything that isn't a regular file or directory. + // In particular: symlinks, devices, sockets, named pipes — + // none of which belong in a static-site build context. + if !d.IsDir() && !d.Type().IsRegular() { + return fmt.Errorf("refusing to copy non-regular entry %s (mode %s)", path, d.Type()) + } + + relPath, err := filepath.Rel(cleanSrc, path) + if err != nil { + return err + } + dstPath := filepath.Join(cleanDst, relPath) + + // Belt-and-braces: filepath.Rel + Join shouldn't ever produce + // an escaping path, but if a future refactor introduces one + // (e.g. allowing non-cleaned roots), surface it loudly here + // rather than silently writing outside the build context. + if !strings.HasPrefix(dstPath, cleanDst+string(os.PathSeparator)) && dstPath != cleanDst { + return fmt.Errorf("refusing to write outside build context: %s", dstPath) + } + + if d.IsDir() { + return os.MkdirAll(dstPath, 0o755) + } + + info, err := d.Info() + if err != nil { + return err + } + data, err := os.ReadFile(path) + if err != nil { + return err + } + return os.WriteFile(dstPath, data, info.Mode()) + }) +} + +// verifyDownloadInsideRoot walks root and rejects any entry that has +// resolved to a symlink or whose resolved path escapes root. Used as a +// post-download guard against attacker-controlled tree responses from +// the Git provider — even though the providers themselves should +// never write outside their destination, this is the second line of +// defense and runs before the build context copy so a malicious +// download is contained. +func verifyDownloadInsideRoot(root string) error { + cleanRoot := filepath.Clean(root) + return filepath.WalkDir(cleanRoot, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() && !d.Type().IsRegular() { + return fmt.Errorf("downloaded tree contains non-regular entry %s (mode %s)", + path, d.Type()) + } + clean := filepath.Clean(path) + if clean != cleanRoot && !strings.HasPrefix(clean, cleanRoot+string(os.PathSeparator)) { + return fmt.Errorf("downloaded entry escapes root: %s", clean) + } + return nil + }) +} diff --git a/internal/workload/plugin/source/static/deploy.go b/internal/workload/plugin/source/static/deploy.go new file mode 100644 index 0000000..8b0553d --- /dev/null +++ b/internal/workload/plugin/source/static/deploy.go @@ -0,0 +1,594 @@ +package static + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/moby/moby/api/types/mount" + + "github.com/alexei/tinyforge/internal/crypto" + "github.com/alexei/tinyforge/internal/docker" + "github.com/alexei/tinyforge/internal/events" + "github.com/alexei/tinyforge/internal/notify" + "github.com/alexei/tinyforge/internal/proxy" + "github.com/alexei/tinyforge/internal/staticsite" + "github.com/alexei/tinyforge/internal/store" + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// healthCheckDelay is the brief grace window after StartContainer +// before we probe IsContainerRunning. Short enough not to bog down a +// healthy deploy; long enough to catch crash-on-boot failures +// (missing env var, bad Dockerfile, port conflict). +const healthCheckDelay = 3 * time.Second + +// deploy runs one full sync of a static workload: fetch the latest +// commit, optionally rebuild the image, recreate the container, and +// reconfigure the proxy. Ported from internal/staticsite/manager.go to +// operate directly on plugin.Workload + container row state, without +// the legacy static_sites table. +// +// Behavior parity with the legacy path is the explicit goal — the +// log-line format ("Static site \"%s\": %s") and event payload shapes +// are preserved so log scrapers and SSE clients keep working through +// the cutover. +func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error { + cfg, err := plugin.SourceConfigOf[Config](w) + if err != nil { + return fmt.Errorf("static source: decode config: %w", err) + } + + prev, prevContainer, err := loadState(deps, w) + if err != nil { + return err + } + + // Manual / first-time deploys force a full rebuild even when the + // commit SHA is unchanged. The legacy Manager.Deploy was called + // with force=true from the adapter; preserve that semantic by + // treating any non-cron / non-git intent as forcing. + force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote" + + // Decrypt the access token if present. Kept in a local so the + // sanitizer can scrub it from any error string before persisting. + token := "" + if cfg.AccessToken != "" { + decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken) + if derr != nil { + slog.Warn("static source: failed to decrypt access token", "site", w.Name, "error", derr) + } else { + token = decrypted + } + } + + provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token) + if err != nil { + updateStatus(deps, w, "failed", prev.LastCommitSHA, + sanitizeError(fmt.Sprintf("create provider: %v", err), token)) + return fmt.Errorf("create provider: %w", err) + } + + latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch) + if err != nil { + updateStatus(deps, w, "failed", prev.LastCommitSHA, + sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token)) + return fmt.Errorf("get latest commit: %w", err) + } + + // Resolve the public-facing domain from the workload's first enabled + // public face. Mirrors the synthetic-row adapter's logic so the + // proxy registration sees the same FQDN it did before. + domain := primaryDomain(deps, w) + + // Skip redeploy when nothing changed AND we have a live container + + // (if applicable) live proxy route. Manual deploys always force. + prevContainerID := "" + prevProxyRouteID := "" + if prevContainer != nil { + prevContainerID = prevContainer.ContainerID + prevProxyRouteID = prevContainer.ProxyRouteID + } + if !force && latestSHA == prev.LastCommitSHA && prev.Status == "deployed" && prevContainerID != "" { + running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID) + if !running { + slog.Info("static site: container not running, forcing redeploy", "site", w.Name) + } else if domain != "" { + proxyOK, perr := deps.Proxy.RouteExists(ctx, domain) + if perr != nil { + slog.Warn("static site: proxy check failed, forcing redeploy", "site", w.Name, "error", perr) + } else if !proxyOK { + slog.Info("static site: proxy route missing, forcing redeploy", "site", w.Name) + } else { + slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) + return nil + } + } else { + slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) + return nil + } + } + + // Mark syncing. + updateStatus(deps, w, "syncing", prev.LastCommitSHA, "") + publishEvent(deps, w, "syncing") + + // Build context — temp dir cleaned up on every exit path. + buildDir, err := os.MkdirTemp("", "dw-site-"+idShort(w)+"-*") + if err != nil { + updateStatus(deps, w, "failed", prev.LastCommitSHA, + sanitizeError(fmt.Sprintf("create temp dir: %v", err), token)) + return fmt.Errorf("create temp dir: %w", err) + } + defer os.RemoveAll(buildDir) + + if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, cfg.FolderPath, buildDir); err != nil { + updateStatus(deps, w, "failed", prev.LastCommitSHA, + sanitizeError(fmt.Sprintf("download folder: %v", err), token)) + return fmt.Errorf("download folder: %w", err) + } + + // Defense in depth: providers should never write outside buildDir, + // but a hostile self-hosted Gitea/GitLab the operator pointed at + // could in principle return a tree entry that escapes. Verify + // before the copy step materializes the build context. + if err := verifyDownloadInsideRoot(buildDir); err != nil { + updateStatus(deps, w, "failed", prev.LastCommitSHA, + sanitizeError(fmt.Sprintf("downloaded tree rejected: %v", err), token)) + return fmt.Errorf("downloaded tree rejected: %w", err) + } + + if cfg.RenderMarkdown { + if err := staticsite.RenderMarkdownFiles(buildDir); err != nil { + slog.Warn("static site: markdown rendering failed", "site", w.Name, "error", err) + } + } + + // Detect mode: deno requires an api/ folder. Fall back to static if + // the operator declared deno but the repo doesn't carry routes. + mode := cfg.Mode + apiDir := filepath.Join(buildDir, "api") + hasAPI := false + if info, err := os.Stat(apiDir); err == nil && info.IsDir() { + hasAPI = true + } + if mode == "deno" && !hasAPI { + mode = "static" + slog.Info("static site: no api/ folder found, falling back to static mode", "site", w.Name) + } + + imageTag := imageTagFor(w) + contextDir, err := os.MkdirTemp("", "dw-site-build-*") + if err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("create build context: %v", err), token)) + return fmt.Errorf("create build context dir: %w", err) + } + defer os.RemoveAll(contextDir) + + if mode == "deno" { + if err := prepareDenoBuild(buildDir, contextDir); err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("prepare deno build: %v", err), token)) + return fmt.Errorf("prepare deno build: %w", err) + } + } else { + if err := prepareStaticBuild(buildDir, contextDir); err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("prepare static build: %v", err), token)) + return fmt.Errorf("prepare static build: %w", err) + } + } + + if err := deps.Docker.BuildImage(ctx, contextDir, imageTag); err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("build image: %v", err), token)) + return fmt.Errorf("build image: %w", err) + } + + env := buildEnv(deps, w.ID) + + containerPort := "80" + if mode == "deno" { + containerPort = "8000" + } + + settings, err := deps.Store.GetSettings() + if err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("get settings: %v", err), token)) + return fmt.Errorf("get settings: %w", err) + } + + networkName := settings.Network + networkID, err := deps.Docker.EnsureNetwork(ctx, networkName) + if err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("ensure network: %v", err), token)) + return fmt.Errorf("ensure network: %w", err) + } + + containerName := containerNameFor(w) + + var mounts []mount.Mount + if cfg.StorageEnabled && mode == "deno" { + volName, volErr := deps.Docker.EnsureSiteVolume(ctx, siteVolumeKey(w)) + if volErr != nil { + slog.Warn("static site: failed to ensure storage volume", "site", w.Name, "error", volErr) + } else { + mounts = append(mounts, mount.Mount{ + Type: mount.TypeVolume, + Source: volName, + Target: "/app/data", + }) + slog.Info("static site: storage volume attached", "site", w.Name, "volume", volName) + } + } + + // Per-face proxy labels (Traefik picks these up; NPM ignores them). + // Static workloads have at most one face today, but iterate for + // future multi-face parity with the image source. + labels := map[string]string{} + if domain != "" { + port, _ := strconv.Atoi(containerPort) + if l := deps.Proxy.ContainerLabels(domain, port); l != nil { + for k, v := range l { + labels[k] = v + } + } + } + + cc := docker.ContainerConfig{ + Name: containerName, + Image: imageTag, + Env: env, + ExposedPorts: []string{containerPort + "/tcp"}, + NetworkName: networkName, + NetworkID: networkID, + Mounts: mounts, + Labels: labels, + WorkloadID: w.ID, + WorkloadKind: string(store.WorkloadKindSite), + Role: "", + } + + containerID, err := deps.Docker.CreateContainer(ctx, cc) + if err != nil { + // Container with this name might already exist — best-effort + // cleanup of any prior container by ID and by name, then retry. + if prevContainerID != "" { + deps.Docker.StopContainer(ctx, prevContainerID, 10) + deps.Docker.RemoveContainer(ctx, prevContainerID, true) + } + removeContainerByName(ctx, deps, containerName) + + containerID, err = deps.Docker.CreateContainer(ctx, cc) + if err != nil { + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("create container: %v", err), token)) + return fmt.Errorf("create container: %w", err) + } + } + + if err := deps.Docker.StartContainer(ctx, containerID); err != nil { + deps.Docker.RemoveContainer(ctx, containerID, true) + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("start container: %v", err), token)) + return fmt.Errorf("start container: %w", err) + } + + // Brief health-check window — verify the container survives a few + // seconds after start, surfacing the tail of its logs as the + // failure reason if it crashes. Honor ctx so a cancelled deploy + // returns promptly instead of waiting out the full delay. + select { + case <-ctx.Done(): + deps.Docker.RemoveContainer(ctx, containerID, true) + updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check") + return ctx.Err() + case <-time.After(healthCheckDelay): + } + running, runErr := deps.Docker.IsContainerRunning(ctx, containerID) + if runErr != nil || !running { + logMsg := "container exited immediately after start" + if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "20"); logErr == nil { + buf, _ := io.ReadAll(logs) + logs.Close() + if len(buf) > 0 { + logMsg = sanitizeError(string(buf), token) + } + } + deps.Docker.RemoveContainer(ctx, containerID, true) + updateStatus(deps, w, "failed", latestSHA, logMsg) + return fmt.Errorf("container not running: %s", logMsg) + } + + // Resolve proxy target. Default to in-network DNS (containerName); + // switch to (settings.ServerIP, hostPort) under NPM remote mode. + internalPort, _ := strconv.Atoi(containerPort) + forwardHost := containerName + forwardPort := internalPort + if settings.NpmRemote && settings.ProxyProvider == "npm" { + if settings.ServerIP != "" { + hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp") + if hpErr != nil { + slog.Warn("static site: could not get host port for remote NPM", "site", w.Name, "error", hpErr) + } else { + forwardHost = settings.ServerIP + forwardPort = int(hostPort) + } + } + } + + // Configure proxy if a domain is set. Replace any prior route in- + // place so traffic shifts atomically. + proxyRouteID := prevProxyRouteID + if domain != "" { + if prevProxyRouteID != "" { + deps.Proxy.DeleteRoute(ctx, prevProxyRouteID) + } + routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{ + SSLCertificateID: settings.SSLCertificateID, + }) + if rerr != nil { + slog.Warn("static site: failed to configure proxy", + "site", w.Name, "domain", domain, + "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr) + } else { + proxyRouteID = routeID + slog.Info("static site: proxy configured", + "site", w.Name, "domain", domain, + "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID) + } + } + + // Drop the old container if a fresh one was created (different ID). + if prevContainerID != "" && prevContainerID != containerID { + deps.Docker.StopContainer(ctx, prevContainerID, 10) + deps.Docker.RemoveContainer(ctx, prevContainerID, true) + } + + // Single transactional write of the new state + container metadata. + // On failure: tear down the just-created container and proxy route + // so we don't leave orphans behind. The next deploy would otherwise + // see no row and try to create a third container. + if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { + rs.LastCommitSHA = latestSHA + rs.LastSyncAt = store.Now() + rs.LastError = "" + rs.Status = "deployed" + + c.ContainerID = containerID + c.ProxyRouteID = proxyRouteID + c.Subdomain = domain + c.State = "running" + c.Port = internalPort + c.ImageRef = imageTag + }); err != nil { + slog.Error("static site: failed to persist deploy state — rolling back", + "site", w.Name, "error", err) + if proxyRouteID != "" { + deps.Proxy.DeleteRoute(ctx, proxyRouteID) + } + deps.Docker.StopContainer(ctx, containerID, 10) + deps.Docker.RemoveContainer(ctx, containerID, true) + // Best-effort failure-state write so the operator sees the + // deploy failed instead of a silent gap. If even this fails + // we have nothing left to log. + updateStatus(deps, w, "failed", latestSHA, + sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token)) + return fmt.Errorf("persist deploy state: %w", err) + } + + publishEvent(deps, w, "deployed") + + // updateStatus normally fires the terminal-state notification; the + // success path above wrote state via saveState directly, so dispatch + // the deployed notification explicitly here. + dispatchSiteNotification(deps, w, domain, "deployed", "") + + shaDisplay := latestSHA + if len(shaDisplay) > 8 { + shaDisplay = shaDisplay[:8] + } + slog.Info("static site deployed", "site", w.Name, "sha", shaDisplay, "mode", mode) + return nil +} + +// updateStatus writes the runtime state's status/error/commit fields +// and fires the side effects the legacy Manager.updateStatus did: +// failures land in the event log, and terminal transitions trigger an +// outbound notification. +// +// On the deploy success path saveState is called directly (with the +// full container metadata in the same write); this helper covers the +// failure / intermediate transitions where only state moves. +func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) { + if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { + rs.Status = status + rs.LastError = errMsg + if commitSHA != "" { + rs.LastCommitSHA = commitSHA + } + // Reflect status into the container row state column so the + // global containers index stays useful for filtered queries. + switch status { + case "deployed": + c.State = "running" + case "stopped": + c.State = "stopped" + case "failed": + c.State = "failed" + case "syncing": + // Don't churn the container row's state during in-progress + // syncs — leave it on whatever value the previous deploy left. + } + }); err != nil { + slog.Error("static site: failed to update status", "id", w.ID, "status", status, "error", err) + } + + if status == "failed" { + publishEvent(deps, w, "failed: "+errMsg) + } + + if status == "deployed" || status == "failed" { + dispatchSiteNotification(deps, w, primaryDomain(deps, w), status, errMsg) + } +} + +// dispatchSiteNotification fires a site_sync_success or +// site_sync_failure event to the configured outbound webhook. +// Resolution: per-workload URL+secret first, then fall through to +// settings.notification_url/secret. Always best-effort. +func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) { + if deps.Notifier == nil { + return + } + settings, err := deps.Store.GetSettings() + if err != nil { + slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err) + return + } + url, secret, tier := resolveSiteTarget(w, settings) + if url == "" { + return + } + eventType := "site_sync_success" + if status == "failed" { + eventType = "site_sync_failure" + } + siteURL := "" + if domain != "" { + siteURL = "https://" + domain + } + deps.Notifier.SendSigned(url, secret, tier, notify.Event{ + Type: eventType, + Project: w.Name, + URL: siteURL, + Error: errMsg, + }) +} + +// resolveSiteTarget mirrors the legacy resolveSiteTarget helper but +// reads notification config off the workload row (where it now lives +// post-refactor) rather than the static_sites row. +func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) { + if w.NotificationURL != "" { + return w.NotificationURL, w.NotificationSecret, notify.TierSite + } + return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings +} + +// publishEvent emits a static_site_status event on the bus AND +// persists an event_log row so the dashboard's audit trail picks it +// up. Message format ("Static site \"%s\": %s") is preserved verbatim +// from the legacy Manager.publishEvent so log scrapers and operator- +// configured event triggers keep matching. +func publishEvent(deps plugin.Deps, w plugin.Workload, status string) { + deps.Events.Publish(events.Event{ + Type: events.EventStaticSiteStatus, + Payload: events.StaticSiteStatusPayload{ + SiteID: w.ID, + Name: w.Name, + Status: status, + }, + }) + + severity := "info" + if strings.HasPrefix(status, "failed") { + severity = "error" + } + message := fmt.Sprintf("Static site %q: %s", w.Name, status) + + // Build metadata via json.Marshal so workload names containing + // quotes or backslashes don't produce invalid JSON for downstream + // log-scan consumers. + metaBytes, err := json.Marshal(map[string]string{ + "site_id": w.ID, + "site_name": w.Name, + "status": status, + }) + if err != nil { + slog.Error("static site: marshal event metadata", "error", err) + metaBytes = []byte("{}") + } + metadata := string(metaBytes) + + evt, err := deps.Store.InsertEvent(store.EventLog{ + Source: "static_site", + Severity: severity, + Message: message, + Metadata: metadata, + }) + if err != nil { + slog.Error("static site: failed to persist event log", "error", err) + return + } + deps.Events.Publish(events.Event{ + Type: events.EventLog, + Payload: events.EventLogPayload{ + ID: evt.ID, + Source: "static_site", + Severity: severity, + Message: message, + Metadata: metadata, + CreatedAt: evt.CreatedAt, + }, + }) +} + +// removeContainerByName mirrors the legacy helper: enumerate Docker's +// view and best-effort drop the matching container so a name conflict +// in CreateContainer is recoverable. Best-effort. +func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) { + containers, err := deps.Docker.ListContainers(ctx, nil) + if err != nil { + return + } + for _, c := range containers { + if c.Name == name { + deps.Docker.StopContainer(ctx, c.ID, 10) + deps.Docker.RemoveContainer(ctx, c.ID, true) + return + } + } +} + +// primaryDomain derives the public-facing FQDN from the workload's +// first enabled public face. Static workloads support at most one +// face today, but iterate defensively in case the API contract +// loosens later. An empty return means "no proxy registration"; the +// container still runs and is reachable inside the docker network. +// +// For the bare-subdomain case (Domain == "" but Subdomain != "") the +// helper appends settings.Domain to form a complete FQDN — matching +// the legacy Manager which let settings.Domain fall through silently. +// On a settings lookup failure the bare subdomain is returned as-is +// so the proxy still gets *something* to register. +func primaryDomain(deps plugin.Deps, w plugin.Workload) string { + for _, f := range w.PublicFaces { + if f.Subdomain == "" && f.Domain == "" { + continue + } + switch { + case f.Subdomain != "" && f.Domain != "": + return f.Subdomain + "." + f.Domain + case f.Subdomain == "" && f.Domain != "": + return f.Domain + case f.Subdomain != "" && f.Domain == "": + settings, err := deps.Store.GetSettings() + if err != nil || settings.Domain == "" { + return f.Subdomain + } + return f.Subdomain + "." + settings.Domain + } + } + return "" +} diff --git a/internal/workload/plugin/source/static/env.go b/internal/workload/plugin/source/static/env.go new file mode 100644 index 0000000..74b4da8 --- /dev/null +++ b/internal/workload/plugin/source/static/env.go @@ -0,0 +1,42 @@ +package static + +import ( + "log/slog" + + "github.com/alexei/tinyforge/internal/crypto" + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// buildEnv flattens workload_env rows into the KEY=VALUE list Docker +// expects. Mirrors image/image.go:buildEnv but without an embedded +// cfg.Env map — the static source only carries env via workload_env +// today (legacy static_site_secrets has been replaced by the unified +// workload_env table during the workload refactor). +// +// Encrypted rows are decrypted lazily so plaintext never lives in the +// store output. A decrypt failure logs and skips the entry rather than +// failing the whole deploy: bricking a sync because one rotated key +// missed an env entry would be worse than running with the variable +// unset and surfacing the warning. +func buildEnv(deps plugin.Deps, workloadID string) []string { + rows, err := deps.Store.ListWorkloadEnv(workloadID) + if err != nil { + slog.Warn("static source: list workload env", "workload", workloadID, "error", err) + return nil + } + out := make([]string, 0, len(rows)) + for _, e := range rows { + value := e.Value + if e.Encrypted { + decrypted, err := crypto.Decrypt(deps.EncKey, e.Value) + if err != nil { + slog.Warn("static source: decrypt env value", + "workload", workloadID, "key", e.Key, "error", err) + continue + } + value = decrypted + } + out = append(out, e.Key+"="+value) + } + return out +} diff --git a/internal/workload/plugin/source/static/naming.go b/internal/workload/plugin/source/static/naming.go new file mode 100644 index 0000000..666af9e --- /dev/null +++ b/internal/workload/plugin/source/static/naming.go @@ -0,0 +1,79 @@ +package static + +import ( + "fmt" + "strings" + + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// idShort returns the first 8 chars of a workload ID, used as the +// uniqueness suffix on every Docker resource (container, image, +// volume) the static source materializes. Workload names are not +// UNIQUE in the schema today; including the ID short prevents two +// workloads with the same name from clobbering each other's +// container, image, or storage volume. +func idShort(w plugin.Workload) string { + if len(w.ID) < 8 { + return w.ID + } + return w.ID[:8] +} + +// containerNameFor is the deterministic container name. Includes +// w.Name for visual continuity in `docker ps` plus the ID short for +// uniqueness. +func containerNameFor(w plugin.Workload) string { + return fmt.Sprintf("dw-site-%s-%s", w.Name, idShort(w)) +} + +// imageTagFor is the deterministic image tag — same shape as the +// container name so the linkage between an image and the workload +// that owns it stays obvious from `docker images`. +func imageTagFor(w plugin.Workload) string { + return fmt.Sprintf("dw-site-%s-%s:latest", w.Name, idShort(w)) +} + +// siteVolumeKey is the input to docker.SiteVolumeName / EnsureSiteVolume +// / RemoveSiteVolume. Composing it here (instead of building the full +// name ourselves) keeps the naming concern in one place — those docker +// helpers wrap the value with their own `tinyforge-site-...-data` +// envelope. Including idShort prevents two workloads sharing a name +// from sharing one persistent volume. +func siteVolumeKey(w plugin.Workload) string { + return fmt.Sprintf("%s-%s", w.Name, idShort(w)) +} + +// sanitizeError clamps an error string so persisting it (in +// containers.extra_json's last_error) or echoing it (via the +// outbound notification webhook) cannot leak a multi-line response +// body, an HTTP header echoing the access token, or a stack trace. +// +// Strategy: +// - Reduce to a single line (replace any newline / tab with space). +// - Cap to a short maxLen so a very long Gitea/GitHub error body +// never round-trips into operator-visible state. +// - Redact the access token verbatim if it appears in the message +// (defense in depth — providers shouldn't echo tokens but a +// misbehaving one could). +func sanitizeError(msg, accessToken string) string { + if msg == "" { + return "" + } + if accessToken != "" { + msg = strings.ReplaceAll(msg, accessToken, "[REDACTED]") + } + // Collapse whitespace runs onto one line. + msg = strings.Map(func(r rune) rune { + switch r { + case '\n', '\r', '\t': + return ' ' + } + return r + }, msg) + const maxLen = 240 + if len(msg) > maxLen { + msg = msg[:maxLen] + "…" + } + return msg +} diff --git a/internal/workload/plugin/source/static/reconcile.go b/internal/workload/plugin/source/static/reconcile.go new file mode 100644 index 0000000..804f5c4 --- /dev/null +++ b/internal/workload/plugin/source/static/reconcile.go @@ -0,0 +1,71 @@ +package static + +import ( + "context" + "log/slog" + + "github.com/alexei/tinyforge/internal/store" + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// reconcile syncs the container row's state column with Docker reality +// for this workload's single container, and marks the runtime state as +// "failed" if the container is gone or has crashed since the last +// deploy. Intentionally minimal — the legacy HealthChecker still +// services rows in the static_sites table, so we don't need to mirror +// its full behavior here. Future versions can re-deploy on a missing +// container; today we just keep the index honest. +func reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error { + st, prevContainer, err := loadState(deps, w) + if err != nil { + return err + } + if prevContainer == nil || prevContainer.ContainerID == "" { + return nil + } + + running, err := deps.Docker.IsContainerRunning(ctx, prevContainer.ContainerID) + if err != nil { + // Most likely "no such container" — mark the row missing so + // the UI surfaces it; the runtime state's status moves to + // "failed" so the dashboard does not falsely report deployed. + if uerr := deps.Store.UpdateContainerState(prevContainer.ID, "missing"); uerr != nil { + slog.Warn("static source: mark missing", "site", w.Name, "error", uerr) + } + if st.Status == "deployed" { + if uerr := saveState(deps, w, func(rs *runtimeState, c *store.Container) { + rs.Status = "failed" + rs.LastError = "container not found" + c.State = "missing" + }); uerr != nil { + slog.Warn("static source: persist missing-state", "site", w.Name, "error", uerr) + } + publishEvent(deps, w, "failed: container not found") + } + return nil + } + + desired := "running" + if !running { + desired = "stopped" + } + if prevContainer.State != desired { + if err := deps.Store.UpdateContainerState(prevContainer.ID, desired); err != nil { + slog.Warn("static source: state sync", "site", w.Name, "error", err) + } + } + + // Keep runtime status honest: a deployed-then-crashed container + // should report failed so the dashboard / event triggers fire. + if !running && st.Status == "deployed" { + if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { + rs.Status = "failed" + rs.LastError = "container stopped unexpectedly" + c.State = "stopped" + }); err != nil { + slog.Warn("static source: persist crashed-state", "site", w.Name, "error", err) + } + publishEvent(deps, w, "failed: container stopped unexpectedly") + } + return nil +} diff --git a/internal/workload/plugin/source/static/state.go b/internal/workload/plugin/source/static/state.go new file mode 100644 index 0000000..d58cc21 --- /dev/null +++ b/internal/workload/plugin/source/static/state.go @@ -0,0 +1,180 @@ +package static + +import ( + "encoding/json" + "errors" + "fmt" + "log/slog" + "sync" + + "github.com/alexei/tinyforge/internal/store" + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// runtimeState is the per-workload state that the legacy static_sites +// table used to track on its own row (last_commit_sha, last_sync_at, +// status/error). With the cutover off the synthetic-row adapter these +// fields live inside the container row's extra_json blob keyed by the +// deterministic row ID `:site`. +// +// Unknown keys in extra_json are preserved across read+write so future +// writers (e.g. per-face route maps) can extend the blob without +// forcing this struct to grow. Decoding into a typed wrapper on its +// own would silently drop them; saveState round-trips through a +// generic map first, then merges the typed fields. +type runtimeState struct { + LastCommitSHA string `json:"last_commit_sha,omitempty"` + LastSyncAt string `json:"last_sync_at,omitempty"` + LastError string `json:"last_error,omitempty"` + // Status mirrors the legacy static_sites.status column ("syncing", + // "deployed", "failed", "stopped"). Kept in extra_json so callers + // can still answer "is this site healthy?" without a Docker probe. + Status string `json:"status,omitempty"` +} + +// runtimeStateKeys lists every JSON field name owned by runtimeState. +// saveState strips these from the generic map before re-emitting so +// the typed values don't double-write under both their JSON tag and +// any subsequent extension's tag — and so that clearing a field +// (LastError → "") actually removes the key instead of being shadowed +// by a stale carry-over. +var runtimeStateKeys = []string{"last_commit_sha", "last_sync_at", "last_error", "status"} + +// containerRowID is the deterministic ID for the single container row +// owned by a static workload. Stable across redeploys so saveState can +// upsert in place. +func containerRowID(w plugin.Workload) string { + return w.ID + ":site" +} + +// loadState returns the persisted runtime state plus the underlying +// container row. Both values are zero on first deploy (no row yet); +// callers must tolerate a nil container without treating it as an +// error. +func loadState(deps plugin.Deps, w plugin.Workload) (runtimeState, *store.Container, error) { + row, err := deps.Store.GetContainerByID(containerRowID(w)) + if err != nil { + if errors.Is(err, store.ErrNotFound) { + return runtimeState{}, nil, nil + } + return runtimeState{}, nil, fmt.Errorf("static source: load state: %w", err) + } + st := runtimeState{} + if row.ExtraJSON != "" && row.ExtraJSON != "{}" { + if err := json.Unmarshal([]byte(row.ExtraJSON), &st); err != nil { + // The row is the source of truth for container_id / + // proxy_route_id; only the optional state blob is at risk + // and we fall back to zero state. Log so this is debuggable + // after the fact. + slog.Debug("static source: decode extra_json", "workload", w.ID, "error", err) + } + } + return st, &row, nil +} + +// saveLocks serializes per-workload read-modify-write of the container +// row inside saveState. With SQLite's MaxOpenConns=1, two parallel +// deploys for the same workload would still race the read+write +// against each other (the DB serializes statements but not the +// caller's intent), letting the loser's write stomp the winner's +// container_id / proxy_route_id and orphaning Docker resources. The +// mutex caps the concurrency at 1 per workload; cross-workload +// parallelism is unaffected. +var saveLocks struct { + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// lockFor returns the per-workload mutex, creating it on first use. +// The outer mutex is held only briefly during map lookup; the returned +// per-workload lock is what callers actually contend on. +func lockFor(workloadID string) *sync.Mutex { + saveLocks.mu.Lock() + defer saveLocks.mu.Unlock() + if saveLocks.locks == nil { + saveLocks.locks = map[string]*sync.Mutex{} + } + m, ok := saveLocks.locks[workloadID] + if !ok { + m = &sync.Mutex{} + saveLocks.locks[workloadID] = m + } + return m +} + +// saveState upserts the container row, calling mutate so callers can +// adjust both the runtime state blob (extra_json) and the row's +// first-class fields (container_id, proxy_route_id, state, etc.) in a +// single transaction. +// +// The mutate callback receives a pointer to a runtimeState seeded from +// the existing extra_json; on save the typed fields are merged back on +// top of any unknown keys so future-writer values (e.g. per-face +// route maps) survive the round-trip. +// +// Per-workload mutex serializes concurrent callers so two parallel +// Deploys can't read the same prior state and race their writes. +func saveState(deps plugin.Deps, w plugin.Workload, mutate func(*runtimeState, *store.Container)) error { + lk := lockFor(w.ID) + lk.Lock() + defer lk.Unlock() + + prev, prevRow, err := loadState(deps, w) + if err != nil { + return err + } + + row := store.Container{ + ID: containerRowID(w), + WorkloadID: w.ID, + WorkloadKind: string(store.WorkloadKindSite), + Host: "local", + } + if prevRow != nil { + row = *prevRow + } + + // Round-trip extra_json through a generic map so unknown keys + // survive. Strip the typed-state keys before the merge so that + // a typed field cleared to its zero value (e.g. LastError = "") + // actually drops the key instead of being re-introduced by the + // generic decode. + generic := map[string]json.RawMessage{} + if row.ExtraJSON != "" && row.ExtraJSON != "{}" { + if err := json.Unmarshal([]byte(row.ExtraJSON), &generic); err != nil { + slog.Debug("static source: decode extra_json (generic)", "workload", w.ID, "error", err) + } + } + for _, k := range runtimeStateKeys { + delete(generic, k) + } + + state := prev + mutate(&state, &row) + + // Re-emit typed fields into the generic map so they win over any + // historical key with the same name. + typedBytes, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("static source: marshal state: %w", err) + } + typedMap := map[string]json.RawMessage{} + if err := json.Unmarshal(typedBytes, &typedMap); err != nil { + return fmt.Errorf("static source: re-decode typed state: %w", err) + } + for k, v := range typedMap { + generic[k] = v + } + + merged, err := json.Marshal(generic) + if err != nil { + return fmt.Errorf("static source: marshal merged state: %w", err) + } + row.ExtraJSON = string(merged) + row.LastSeenAt = store.Now() + + if err := deps.Store.UpsertContainer(row); err != nil { + return fmt.Errorf("static source: upsert container row: %w", err) + } + return nil +} diff --git a/internal/workload/plugin/source/static/static.go b/internal/workload/plugin/source/static/static.go index 7cb9115..63e153e 100644 --- a/internal/workload/plugin/source/static/static.go +++ b/internal/workload/plugin/source/static/static.go @@ -1,16 +1,16 @@ // Package static implements the "static" source: a git-folder-backed -// deployable that can serve plain files or run a Deno backend. Builds an -// image from the cloned folder and runs one container. +// deployable that can serve plain files or run a Deno backend. Builds +// an image from the cloned folder and runs one container. // -// The full deploy pipeline lives in internal/staticsite (git providers, -// markdown rendering, Dockerfile codegen, Deno scaffolding, image build, -// proxy registration) and is wired in via a function variable so that -// neither this package nor staticsite has to depend on the other. +// The full deploy pipeline is implemented inline in this package +// (deploy.go / teardown.go / reconcile.go). It operates directly on +// plugin.Workload + the containers / workload_env tables — there is no +// longer a synthetic static_sites row backing each workload. // -// cmd/server/main.go (or any caller with access to both packages) -// populates DeployFn / TeardownFn / ReconcileFn at startup; until then, -// Source methods return an explicit error so misconfiguration surfaces -// loudly instead of silently failing. +// The legacy internal/staticsite package remains alive to serve the +// /api/sites/* HTTP routes and the existing static_sites table; this +// plugin does not depend on it for state, only for git-provider +// helpers and Deno scaffolding generation. package static import ( @@ -18,15 +18,13 @@ import ( "encoding/json" "fmt" "strings" - "sync" - "sync/atomic" "github.com/alexei/tinyforge/internal/workload/plugin" ) -// Config is the per-workload source config blob. Mirrors the fields that -// used to live on the static_sites table, less anything moved to Workload -// (notification config, webhook secrets, public_face). +// Config is the per-workload source config blob. Mirrors the fields +// that used to live on the static_sites table, less anything moved to +// Workload (notification config, webhook secrets, public_face). type Config struct { Provider string `json:"provider"` // "gitea" | "github" | "gitlab"; "" = autodetect BaseURL string `json:"base_url"` // e.g. https://git.example.com @@ -41,55 +39,12 @@ type Config struct { StorageLimitMB int `json:"storage_limit_mb"` } -// Backend captures the deploy lifecycle of a static site. main.go wires -// an implementation that adapts internal/staticsite.Manager to this -// interface; the plugin contract sees only this shape so it stays -// independent of any specific manager type. -type Backend interface { - Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error - Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error - Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error -} - -var ( - backendMu sync.RWMutex - backend Backend - backendSet atomic.Bool -) - -// SetBackend wires the staticsite-package adapter into this Source AND -// registers the source with the plugin registry. MUST be called exactly -// once from cmd/server/main.go before any plugin invocation. Subsequent -// calls panic — a swapped backend at runtime is a trust-boundary -// inversion (a future plugin loaded via blank import could replace -// deploy/teardown logic that handles git tokens). -func SetBackend(b Backend) { - if !backendSet.CompareAndSwap(false, true) { - panic("static: backend already wired (SetBackend may be called once)") - } - backendMu.Lock() - backend = b - backendMu.Unlock() - plugin.RegisterSource(&source{}) -} - -func currentBackend() (Backend, error) { - backendMu.RLock() - defer backendMu.RUnlock() - if backend == nil { - return nil, fmt.Errorf("static source: backend not wired; call static.SetBackend from main.go") - } - return backend, nil -} - type source struct{} -// Static source registers itself only after SetBackend is called from -// main.go. Eager init() registration would advertise "static" via -// /api/hooks/kinds before there is anything to dispatch to — frontends -// would render it in pickers and operators would hit "backend not wired" -// at deploy time. Lazy registration keeps the kind invisible until it's -// actually usable. +// Eager registration — the deploy pipeline lives entirely inside this +// package now, so the kind is usable as soon as init() fires. No more +// "backend not wired" failure mode at deploy time. +func init() { plugin.RegisterSource(&source{}) } func (*source) Kind() string { return "static" } @@ -123,25 +78,13 @@ func (*source) Validate(cfg json.RawMessage) error { } func (*source) Deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error { - b, err := currentBackend() - if err != nil { - return err - } - return b.Deploy(ctx, deps, w, intent) + return deploy(ctx, deps, w, intent) } func (*source) Teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error { - b, err := currentBackend() - if err != nil { - return err - } - return b.Teardown(ctx, deps, w) + return teardown(ctx, deps, w) } func (*source) Reconcile(ctx context.Context, deps plugin.Deps, w plugin.Workload) error { - b, err := currentBackend() - if err != nil { - return err - } - return b.Reconcile(ctx, deps, w) + return reconcile(ctx, deps, w) } diff --git a/internal/workload/plugin/source/static/teardown.go b/internal/workload/plugin/source/static/teardown.go new file mode 100644 index 0000000..3eabd76 --- /dev/null +++ b/internal/workload/plugin/source/static/teardown.go @@ -0,0 +1,70 @@ +package static + +import ( + "context" + "errors" + "fmt" + "log/slog" + + "github.com/alexei/tinyforge/internal/store" + "github.com/alexei/tinyforge/internal/workload/plugin" +) + +// teardown drops every artifact deploy created: the running container, +// the proxy route, the optional storage volume, and the container +// index row. Idempotent — a workload that never deployed is a no-op. +// +// Mirrors the legacy Manager.Remove + Stop combination: stop is +// implicit in RemoveContainer(force=true), and the volume removal +// happens only when storage was opted into (the named volume is +// otherwise nonexistent and best-effort delete would log a noisy +// warning). +func teardown(ctx context.Context, deps plugin.Deps, w plugin.Workload) error { + cfg, err := plugin.SourceConfigOf[Config](w) + if err != nil { + return fmt.Errorf("static source: decode config: %w", err) + } + + _, prevContainer, err := loadState(deps, w) + if err != nil { + return err + } + if prevContainer == nil { + // Nothing was ever deployed — best-effort volume cleanup in + // case storage was provisioned but the deploy crashed before + // state landed, then return. + if cfg.StorageEnabled { + if err := deps.Docker.RemoveSiteVolume(ctx, siteVolumeKey(w)); err != nil { + slog.Debug("static site: storage volume cleanup", "site", w.Name, "error", err) + } + } + return nil + } + + // Drop proxy route first so traffic stops landing on a container + // that is about to disappear. + if prevContainer.ProxyRouteID != "" { + if err := deps.Proxy.DeleteRoute(ctx, prevContainer.ProxyRouteID); err != nil { + slog.Warn("static site: failed to remove proxy route", "site", w.Name, "error", err) + } + } + + if prevContainer.ContainerID != "" { + if err := deps.Docker.RemoveContainer(ctx, prevContainer.ContainerID, true); err != nil { + slog.Warn("static site: failed to remove container", "site", w.Name, "error", err) + } + } + + if cfg.StorageEnabled { + if err := deps.Docker.RemoveSiteVolume(ctx, siteVolumeKey(w)); err != nil { + slog.Warn("static site: failed to remove storage volume", "site", w.Name, "error", err) + } + } + + // Delete the container row last so a partial failure leaves enough + // state for a retry. ErrNotFound is fine. + if err := deps.Store.DeleteContainer(prevContainer.ID); err != nil && !errors.Is(err, store.ErrNotFound) { + slog.Warn("static site: failed to delete container row", "site", w.Name, "error", err) + } + return nil +}