Files
tiny-forge/internal/workload/plugin/source/static/deploy.go
T
alexei.dolgolyov 410a131cec feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
2026-05-29 02:09:54 +03:00

575 lines
20 KiB
Go

package static
import (
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/moby/moby/api/types/mount"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// healthCheckDelay is the brief grace window after StartContainer
// before we probe IsContainerRunning. Short enough not to bog down a
// healthy deploy; long enough to catch crash-on-boot failures
// (missing env var, bad Dockerfile, port conflict).
const healthCheckDelay = 3 * time.Second
// deploy runs one full sync of a static workload: fetch the latest
// commit, optionally rebuild the image, recreate the container, and
// reconfigure the proxy. Ported from internal/staticsite/manager.go to
// operate directly on plugin.Workload + container row state, without
// the legacy static_sites table.
//
// Behavior parity with the legacy path is the explicit goal — the
// log-line format ("Static site \"%s\": %s") and event payload shapes
// are preserved so log scrapers and SSE clients keep working through
// the cutover.
func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("static source: decode config: %w", err)
}
prev, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
// Manual / first-time deploys force a full rebuild even when the
// commit SHA is unchanged. The legacy Manager.Deploy was called
// with force=true from the adapter; preserve that semantic by
// treating any non-cron / non-git intent as forcing.
force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote"
// Decrypt the access token if present. Kept in a local so the
// sanitizer can scrub it from any error string before persisting.
token := ""
if cfg.AccessToken != "" {
decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken)
if derr != nil {
slog.Warn("static source: failed to decrypt access token", "site", w.Name, "error", derr)
} else {
token = decrypted
}
}
provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create provider: %v", err), token))
return fmt.Errorf("create provider: %w", err)
}
latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token))
return fmt.Errorf("get latest commit: %w", err)
}
// Resolve the public-facing domain from the workload's first enabled
// public face. Mirrors the synthetic-row adapter's logic so the
// proxy registration sees the same FQDN it did before.
domain := primaryDomain(deps, w)
// Skip redeploy when nothing changed AND we have a live container +
// (if applicable) live proxy route. Manual deploys always force.
prevContainerID := ""
prevProxyRouteID := ""
if prevContainer != nil {
prevContainerID = prevContainer.ContainerID
prevProxyRouteID = prevContainer.ProxyRouteID
}
if !force && latestSHA == prev.LastCommitSHA && prev.Status == "deployed" && prevContainerID != "" {
running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID)
if !running {
slog.Info("static site: container not running, forcing redeploy", "site", w.Name)
} else if domain != "" {
proxyOK, perr := deps.Proxy.RouteExists(ctx, domain)
if perr != nil {
slog.Warn("static site: proxy check failed, forcing redeploy", "site", w.Name, "error", perr)
} else if !proxyOK {
slog.Info("static site: proxy route missing, forcing redeploy", "site", w.Name)
} else {
slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA)
return nil
}
} else {
slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA)
return nil
}
}
// Mark syncing.
updateStatus(deps, w, "syncing", prev.LastCommitSHA, "")
publishEvent(deps, w, "syncing")
// Build context — temp dir cleaned up on every exit path.
buildDir, err := os.MkdirTemp("", "dw-site-"+idShort(w)+"-*")
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create temp dir: %v", err), token))
return fmt.Errorf("create temp dir: %w", err)
}
defer os.RemoveAll(buildDir)
if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, cfg.FolderPath, buildDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("download folder: %v", err), token))
return fmt.Errorf("download folder: %w", err)
}
// Defense in depth: providers should never write outside buildDir,
// but a hostile self-hosted Gitea/GitLab the operator pointed at
// could in principle return a tree entry that escapes. Verify
// before the copy step materializes the build context.
if err := verifyDownloadInsideRoot(buildDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("downloaded tree rejected: %v", err), token))
return fmt.Errorf("downloaded tree rejected: %w", err)
}
if cfg.RenderMarkdown {
if err := staticsite.RenderMarkdownFiles(buildDir); err != nil {
slog.Warn("static site: markdown rendering failed", "site", w.Name, "error", err)
}
}
// Detect mode: deno requires an api/ folder. Fall back to static if
// the operator declared deno but the repo doesn't carry routes.
mode := cfg.Mode
apiDir := filepath.Join(buildDir, "api")
hasAPI := false
if info, err := os.Stat(apiDir); err == nil && info.IsDir() {
hasAPI = true
}
if mode == "deno" && !hasAPI {
mode = "static"
slog.Info("static site: no api/ folder found, falling back to static mode", "site", w.Name)
}
imageTag := imageTagFor(w)
contextDir, err := os.MkdirTemp("", "dw-site-build-*")
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create build context: %v", err), token))
return fmt.Errorf("create build context dir: %w", err)
}
defer os.RemoveAll(contextDir)
if mode == "deno" {
if err := prepareDenoBuild(buildDir, contextDir); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("prepare deno build: %v", err), token))
return fmt.Errorf("prepare deno build: %w", err)
}
} else {
if err := prepareStaticBuild(buildDir, contextDir); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("prepare static build: %v", err), token))
return fmt.Errorf("prepare static build: %w", err)
}
}
if err := deps.Docker.BuildImage(ctx, contextDir, imageTag); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("build image: %v", err), token))
return fmt.Errorf("build image: %w", err)
}
env := buildEnv(deps, w.ID)
containerPort := "80"
if mode == "deno" {
containerPort = "8000"
}
settings, err := deps.Store.GetSettings()
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("get settings: %v", err), token))
return fmt.Errorf("get settings: %w", err)
}
networkName := settings.Network
networkID, err := deps.Docker.EnsureNetwork(ctx, networkName)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("ensure network: %v", err), token))
return fmt.Errorf("ensure network: %w", err)
}
containerName := containerNameFor(w)
var mounts []mount.Mount
if cfg.StorageEnabled && mode == "deno" {
volName, volErr := deps.Docker.EnsureSiteVolume(ctx, siteVolumeKey(w))
if volErr != nil {
slog.Warn("static site: failed to ensure storage volume", "site", w.Name, "error", volErr)
} else {
mounts = append(mounts, mount.Mount{
Type: mount.TypeVolume,
Source: volName,
Target: "/app/data",
})
slog.Info("static site: storage volume attached", "site", w.Name, "volume", volName)
}
}
// Per-face proxy labels (Traefik picks these up; NPM ignores them).
// Static workloads have at most one face today, but iterate for
// future multi-face parity with the image source.
labels := map[string]string{}
if domain != "" {
port, _ := strconv.Atoi(containerPort)
if l := deps.Proxy.ContainerLabels(domain, port); l != nil {
for k, v := range l {
labels[k] = v
}
}
}
cc := docker.ContainerConfig{
Name: containerName,
Image: imageTag,
Env: env,
ExposedPorts: []string{containerPort + "/tcp"},
NetworkName: networkName,
NetworkID: networkID,
Mounts: mounts,
Labels: labels,
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindSite),
Role: "",
}
containerID, err := deps.Docker.CreateContainer(ctx, cc)
if err != nil {
// Container with this name might already exist — best-effort
// cleanup of any prior container by ID and by name, then retry.
if prevContainerID != "" {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
removeContainerByName(ctx, deps, containerName)
containerID, err = deps.Docker.CreateContainer(ctx, cc)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create container: %v", err), token))
return fmt.Errorf("create container: %w", err)
}
}
if err := deps.Docker.StartContainer(ctx, containerID); err != nil {
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("start container: %v", err), token))
return fmt.Errorf("start container: %w", err)
}
// Brief health-check window — verify the container survives a few
// seconds after start, surfacing the tail of its logs as the
// failure reason if it crashes. Honor ctx so a cancelled deploy
// returns promptly instead of waiting out the full delay.
select {
case <-ctx.Done():
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check")
return ctx.Err()
case <-time.After(healthCheckDelay):
}
running, runErr := deps.Docker.IsContainerRunning(ctx, containerID)
if runErr != nil || !running {
logMsg := "container exited immediately after start"
if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "20"); logErr == nil {
buf, _ := io.ReadAll(logs)
logs.Close()
if len(buf) > 0 {
logMsg = sanitizeError(string(buf), token)
}
}
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, logMsg)
return fmt.Errorf("container not running: %s", logMsg)
}
// Resolve proxy target. Default to in-network DNS (containerName);
// switch to (settings.ServerIP, hostPort) under NPM remote mode.
internalPort, _ := strconv.Atoi(containerPort)
forwardHost := containerName
forwardPort := internalPort
if settings.NpmRemote && settings.ProxyProvider == "npm" {
if settings.ServerIP != "" {
hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp")
if hpErr != nil {
slog.Warn("static site: could not get host port for remote NPM", "site", w.Name, "error", hpErr)
} else {
forwardHost = settings.ServerIP
forwardPort = int(hostPort)
}
}
}
// Configure proxy if a domain is set. Replace any prior route in-
// place so traffic shifts atomically.
proxyRouteID := prevProxyRouteID
if domain != "" {
if prevProxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, prevProxyRouteID)
}
routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{
SSLCertificateID: settings.SSLCertificateID,
})
if rerr != nil {
slog.Warn("static site: failed to configure proxy",
"site", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr)
} else {
proxyRouteID = routeID
slog.Info("static site: proxy configured",
"site", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID)
}
}
// Drop the old container if a fresh one was created (different ID).
if prevContainerID != "" && prevContainerID != containerID {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
// Single transactional write of the new state + container metadata.
// On failure: tear down the just-created container and proxy route
// so we don't leave orphans behind. The next deploy would otherwise
// see no row and try to create a third container.
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.LastCommitSHA = latestSHA
rs.LastSyncAt = store.Now()
rs.LastError = ""
rs.Status = "deployed"
c.ContainerID = containerID
c.ProxyRouteID = proxyRouteID
c.Subdomain = domain
c.State = "running"
c.Port = internalPort
c.ImageRef = imageTag
}); err != nil {
slog.Error("static site: failed to persist deploy state — rolling back",
"site", w.Name, "error", err)
if proxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, proxyRouteID)
}
deps.Docker.StopContainer(ctx, containerID, 10)
deps.Docker.RemoveContainer(ctx, containerID, true)
// Best-effort failure-state write so the operator sees the
// deploy failed instead of a silent gap. If even this fails
// we have nothing left to log.
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token))
return fmt.Errorf("persist deploy state: %w", err)
}
publishEvent(deps, w, "deployed")
// updateStatus normally fires the terminal-state notification; the
// success path above wrote state via saveState directly, so dispatch
// the deployed notification explicitly here.
dispatchSiteNotification(deps, w, domain, "deployed", "")
shaDisplay := latestSHA
if len(shaDisplay) > 8 {
shaDisplay = shaDisplay[:8]
}
slog.Info("static site deployed", "site", w.Name, "sha", shaDisplay, "mode", mode)
return nil
}
// updateStatus writes the runtime state's status/error/commit fields
// and fires the side effects the legacy Manager.updateStatus did:
// failures land in the event log, and terminal transitions trigger an
// outbound notification.
//
// On the deploy success path saveState is called directly (with the
// full container metadata in the same write); this helper covers the
// failure / intermediate transitions where only state moves.
func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = status
rs.LastError = errMsg
if commitSHA != "" {
rs.LastCommitSHA = commitSHA
}
// Reflect status into the container row state column so the
// global containers index stays useful for filtered queries.
switch status {
case "deployed":
c.State = "running"
case "stopped":
c.State = "stopped"
case "failed":
c.State = "failed"
case "syncing":
// Don't churn the container row's state during in-progress
// syncs — leave it on whatever value the previous deploy left.
}
}); err != nil {
slog.Error("static site: failed to update status", "id", w.ID, "status", status, "error", err)
}
if status == "failed" {
publishEvent(deps, w, "failed: "+errMsg)
}
if status == "deployed" || status == "failed" {
dispatchSiteNotification(deps, w, primaryDomain(deps, w), status, errMsg)
}
}
// dispatchSiteNotification fires a site_sync_success or
// site_sync_failure event for the workload via the shared multi-route
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
// order (workload_notifications → legacy single URL → settings global)
// is identical to the dockerfile plugin's path so receivers see
// consistent fan-out behaviour across source kinds.
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
eventType := "site_sync_success"
if status == "failed" {
eventType = "site_sync_failure"
}
siteURL := ""
if domain != "" {
siteURL = "https://" + domain
}
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
Error: errMsg,
})
}
// publishEvent emits a static_site_status event on the bus AND
// persists an event_log row so the dashboard's audit trail picks it
// up. Message format ("Static site \"%s\": %s") is preserved verbatim
// from the legacy Manager.publishEvent so log scrapers and operator-
// configured event triggers keep matching.
func publishEvent(deps plugin.Deps, w plugin.Workload, status string) {
deps.Events.Publish(events.Event{
Type: events.EventStaticSiteStatus,
Payload: events.StaticSiteStatusPayload{
SiteID: w.ID,
Name: w.Name,
Status: status,
},
})
severity := "info"
if strings.HasPrefix(status, "failed") {
severity = "error"
}
message := fmt.Sprintf("Static site %q: %s", w.Name, status)
// Build metadata via json.Marshal so workload names containing
// quotes or backslashes don't produce invalid JSON for downstream
// log-scan consumers.
metaBytes, err := json.Marshal(map[string]string{
"site_id": w.ID,
"site_name": w.Name,
"status": status,
})
if err != nil {
slog.Error("static site: marshal event metadata", "error", err)
metaBytes = []byte("{}")
}
metadata := string(metaBytes)
evt, err := deps.Store.InsertEvent(store.EventLog{
Source: "static_site",
Severity: severity,
Message: message,
Metadata: metadata,
})
if err != nil {
slog.Error("static site: failed to persist event log", "error", err)
return
}
deps.Events.Publish(events.Event{
Type: events.EventLog,
Payload: events.EventLogPayload{
ID: evt.ID,
Source: "static_site",
Severity: severity,
Message: message,
Metadata: metadata,
CreatedAt: evt.CreatedAt,
},
})
}
// removeContainerByName mirrors the legacy helper: enumerate Docker's
// view and best-effort drop the matching container so a name conflict
// in CreateContainer is recoverable. Best-effort.
func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) {
containers, err := deps.Docker.ListContainers(ctx, nil)
if err != nil {
return
}
for _, c := range containers {
if c.Name == name {
deps.Docker.StopContainer(ctx, c.ID, 10)
deps.Docker.RemoveContainer(ctx, c.ID, true)
return
}
}
}
// primaryDomain derives the public-facing FQDN from the workload's
// first enabled public face. Static workloads support at most one
// face today, but iterate defensively in case the API contract
// loosens later. An empty return means "no proxy registration"; the
// container still runs and is reachable inside the docker network.
//
// For the bare-subdomain case (Domain == "" but Subdomain != "") the
// helper appends settings.Domain to form a complete FQDN — matching
// the legacy Manager which let settings.Domain fall through silently.
// On a settings lookup failure the bare subdomain is returned as-is
// so the proxy still gets *something* to register.
func primaryDomain(deps plugin.Deps, w plugin.Workload) string {
for _, f := range w.PublicFaces {
if f.Subdomain == "" && f.Domain == "" {
continue
}
switch {
case f.Subdomain != "" && f.Domain != "":
return f.Subdomain + "." + f.Domain
case f.Subdomain == "" && f.Domain != "":
return f.Domain
case f.Subdomain != "" && f.Domain == "":
settings, err := deps.Store.GetSettings()
if err != nil || settings.Domain == "" {
return f.Subdomain
}
return f.Subdomain + "." + settings.Domain
}
}
return ""
}