package static import ( "context" "fmt" "io" "log/slog" "os" "path/filepath" "strconv" "time" "github.com/moby/moby/api/types/mount" "github.com/alexei/tinyforge/internal/crypto" "github.com/alexei/tinyforge/internal/docker" "github.com/alexei/tinyforge/internal/events" "github.com/alexei/tinyforge/internal/notify" "github.com/alexei/tinyforge/internal/proxy" "github.com/alexei/tinyforge/internal/staticsite" "github.com/alexei/tinyforge/internal/store" "github.com/alexei/tinyforge/internal/workload/plugin" ) // healthCheckDelay is the brief grace window after StartContainer // before we probe IsContainerRunning. Short enough not to bog down a // healthy deploy; long enough to catch crash-on-boot failures // (missing env var, bad Dockerfile, port conflict). const healthCheckDelay = 3 * time.Second // deploy runs one full sync of a static workload: fetch the latest // commit, optionally rebuild the image, recreate the container, and // reconfigure the proxy. Ported from internal/staticsite/manager.go to // operate directly on plugin.Workload + container row state, without // the legacy static_sites table. // // Behavior parity with the legacy path is the explicit goal — the // log-line format ("Static site \"%s\": %s") and event payload shapes // are preserved so log scrapers and SSE clients keep working through // the cutover. func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) (retErr error) { cfg, err := plugin.SourceConfigOf[Config](w) if err != nil { return fmt.Errorf("static source: decode config: %w", err) } prev, prevContainer, err := loadState(deps, w) if err != nil { return err } // Manual / first-time deploys force a full rebuild even when the // commit SHA is unchanged. The legacy Manager.Deploy was called // with force=true from the adapter; preserve that semantic by // treating any non-cron / non-git intent as forcing. force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote" // Decrypt the access token if present. Kept in a local so the // sanitizer can scrub it from any error string before persisting. token := "" if cfg.AccessToken != "" { decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken) if derr != nil { slog.Warn("static source: failed to decrypt access token", "site", w.Name, "error", derr) } else { token = decrypted } } provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token) if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("create provider: %v", err), token)) return fmt.Errorf("create provider: %w", err) } latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch) if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token)) return fmt.Errorf("get latest commit: %w", err) } // Resolve the public-facing domain from the workload's first enabled // public face. Mirrors the synthetic-row adapter's logic so the // proxy registration sees the same FQDN it did before. domain := primaryDomain(deps, w) // Commit-status reporter (best-effort; gated on cfg.ReportCommitStatus). // Built here once latestSHA + domain are known. A deferred terminal // report fires Success/Failure based on the deploy's outcome, but ONLY // once an actual deploy began (deployStarted) — the unchanged-SHA // short-circuit below returns before that flips, so no status is // reported when nothing was deployed. retErr is the named return the // defer inspects. reporter := newCommitStatusReporter(provider, cfg, latestSHA, statusTargetURL(domain)) deployStarted := false defer func() { if !deployStarted { return } if retErr != nil { reporter.report(ctx, w, staticsite.CommitStatusFailure, "Tinyforge: deploy failed") } else { reporter.report(ctx, w, staticsite.CommitStatusSuccess, "Tinyforge: deployed") } }() // Skip redeploy when nothing changed AND we have a live container + // (if applicable) live proxy route. Manual deploys always force. prevContainerID := "" prevProxyRouteID := "" if prevContainer != nil { prevContainerID = prevContainer.ContainerID prevProxyRouteID = prevContainer.ProxyRouteID } if !force && latestSHA == prev.LastCommitSHA && prev.Status == "deployed" && prevContainerID != "" { running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID) if !running { slog.Info("static site: container not running, forcing redeploy", "site", w.Name) } else if domain != "" { proxyOK, perr := deps.Proxy.RouteExists(ctx, domain) if perr != nil { slog.Warn("static site: proxy check failed, forcing redeploy", "site", w.Name, "error", perr) } else if !proxyOK { slog.Info("static site: proxy route missing, forcing redeploy", "site", w.Name) } else { slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) return nil } } else { slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) return nil } } // Mark syncing. From here on a deploy is genuinely underway, so the // deferred terminal status report should fire. Push a "pending" commit // status (best-effort) and arm the deferred Success/Failure report. updateStatus(deps, w, "syncing", prev.LastCommitSHA, "") publishEvent(deps, w, "syncing") deployStarted = true reporter.report(ctx, w, staticsite.CommitStatusPending, "Tinyforge: deploying") // Build context — temp dir cleaned up on every exit path. buildDir, err := os.MkdirTemp("", "dw-site-"+idShort(w)+"-*") if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("create temp dir: %v", err), token)) return fmt.Errorf("create temp dir: %w", err) } defer os.RemoveAll(buildDir) if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, cfg.FolderPath, buildDir); err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("download folder: %v", err), token)) return fmt.Errorf("download folder: %w", err) } // Defense in depth: providers should never write outside buildDir, // but a hostile self-hosted Gitea/GitLab the operator pointed at // could in principle return a tree entry that escapes. Verify // before the copy step materializes the build context. if err := verifyDownloadInsideRoot(buildDir); err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("downloaded tree rejected: %v", err), token)) return fmt.Errorf("downloaded tree rejected: %w", err) } if cfg.RenderMarkdown { if err := staticsite.RenderMarkdownFiles(buildDir); err != nil { slog.Warn("static site: markdown rendering failed", "site", w.Name, "error", err) } } // Detect mode: deno requires an api/ folder. Fall back to static if // the operator declared deno but the repo doesn't carry routes. mode := cfg.Mode apiDir := filepath.Join(buildDir, "api") hasAPI := false if info, err := os.Stat(apiDir); err == nil && info.IsDir() { hasAPI = true } if mode == "deno" && !hasAPI { mode = "static" slog.Info("static site: no api/ folder found, falling back to static mode", "site", w.Name) } imageTag := imageTagFor(w) contextDir, err := os.MkdirTemp("", "dw-site-build-*") if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("create build context: %v", err), token)) return fmt.Errorf("create build context dir: %w", err) } defer os.RemoveAll(contextDir) if mode == "deno" { if err := prepareDenoBuild(buildDir, contextDir); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("prepare deno build: %v", err), token)) return fmt.Errorf("prepare deno build: %w", err) } } else { if err := prepareStaticBuild(buildDir, contextDir); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("prepare static build: %v", err), token)) return fmt.Errorf("prepare static build: %w", err) } } if err := deps.Docker.BuildImage(ctx, contextDir, imageTag); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("build image: %v", err), token)) return fmt.Errorf("build image: %w", err) } env := buildEnv(deps, w.ID) containerPort := "80" if mode == "deno" { containerPort = "8000" } settings, err := deps.Store.GetSettings() if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("get settings: %v", err), token)) return fmt.Errorf("get settings: %w", err) } networkName := settings.Network networkID, err := deps.Docker.EnsureNetwork(ctx, networkName) if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("ensure network: %v", err), token)) return fmt.Errorf("ensure network: %w", err) } containerName := containerNameFor(w) var mounts []mount.Mount if cfg.StorageEnabled && mode == "deno" { volName, volErr := deps.Docker.EnsureSiteVolume(ctx, siteVolumeKey(w)) if volErr != nil { slog.Warn("static site: failed to ensure storage volume", "site", w.Name, "error", volErr) } else { mounts = append(mounts, mount.Mount{ Type: mount.TypeVolume, Source: volName, Target: "/app/data", }) slog.Info("static site: storage volume attached", "site", w.Name, "volume", volName) } } // Per-face proxy labels (Traefik picks these up; NPM ignores them). // Static workloads have at most one face today, but iterate for // future multi-face parity with the image source. labels := map[string]string{} if domain != "" { port, _ := strconv.Atoi(containerPort) if l := deps.Proxy.ContainerLabels(domain, port); l != nil { for k, v := range l { labels[k] = v } } } cc := docker.ContainerConfig{ Name: containerName, Image: imageTag, Env: env, ExposedPorts: []string{containerPort + "/tcp"}, NetworkName: networkName, NetworkID: networkID, Mounts: mounts, Labels: labels, WorkloadID: w.ID, WorkloadKind: string(store.WorkloadKindSite), Role: "", } containerID, err := deps.Docker.CreateContainer(ctx, cc) if err != nil { // Container with this name might already exist — best-effort // cleanup of any prior container by ID and by name, then retry. if prevContainerID != "" { deps.Docker.StopContainer(ctx, prevContainerID, 10) deps.Docker.RemoveContainer(ctx, prevContainerID, true) } removeContainerByName(ctx, deps, containerName) containerID, err = deps.Docker.CreateContainer(ctx, cc) if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("create container: %v", err), token)) return fmt.Errorf("create container: %w", err) } } if err := deps.Docker.StartContainer(ctx, containerID); err != nil { deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("start container: %v", err), token)) return fmt.Errorf("start container: %w", err) } // Brief health-check window — verify the container survives a few // seconds after start, surfacing the tail of its logs as the // failure reason if it crashes. Honor ctx so a cancelled deploy // returns promptly instead of waiting out the full delay. select { case <-ctx.Done(): deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check") return ctx.Err() case <-time.After(healthCheckDelay): } running, runErr := deps.Docker.IsContainerRunning(ctx, containerID) if runErr != nil || !running { logMsg := "container exited immediately after start" if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "20"); logErr == nil { buf, _ := io.ReadAll(logs) logs.Close() if len(buf) > 0 { logMsg = sanitizeError(string(buf), token) } } deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, logMsg) return fmt.Errorf("container not running: %s", logMsg) } // Resolve proxy target. Default to in-network DNS (containerName); // switch to (settings.ServerIP, hostPort) under NPM remote mode. internalPort, _ := strconv.Atoi(containerPort) forwardHost := containerName forwardPort := internalPort if settings.NpmRemote && settings.ProxyProvider == "npm" { if settings.ServerIP != "" { hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp") if hpErr != nil { slog.Warn("static site: could not get host port for remote NPM", "site", w.Name, "error", hpErr) } else { forwardHost = settings.ServerIP forwardPort = int(hostPort) } } } // Configure proxy if a domain is set. Replace any prior route in- // place so traffic shifts atomically. proxyRouteID := prevProxyRouteID if domain != "" { if prevProxyRouteID != "" { deps.Proxy.DeleteRoute(ctx, prevProxyRouteID) } routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{ SSLCertificateID: settings.SSLCertificateID, }) if rerr != nil { slog.Warn("static site: failed to configure proxy", "site", w.Name, "domain", domain, "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr) } else { proxyRouteID = routeID slog.Info("static site: proxy configured", "site", w.Name, "domain", domain, "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID) } } // Drop the old container if a fresh one was created (different ID). if prevContainerID != "" && prevContainerID != containerID { deps.Docker.StopContainer(ctx, prevContainerID, 10) deps.Docker.RemoveContainer(ctx, prevContainerID, true) } // Single transactional write of the new state + container metadata. // On failure: tear down the just-created container and proxy route // so we don't leave orphans behind. The next deploy would otherwise // see no row and try to create a third container. if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { rs.LastCommitSHA = latestSHA rs.LastSyncAt = store.Now() rs.LastError = "" rs.Status = "deployed" c.ContainerID = containerID c.ProxyRouteID = proxyRouteID c.Subdomain = domain c.State = "running" c.Port = internalPort c.ImageRef = imageTag }); err != nil { slog.Error("static site: failed to persist deploy state — rolling back", "site", w.Name, "error", err) if proxyRouteID != "" { deps.Proxy.DeleteRoute(ctx, proxyRouteID) } deps.Docker.StopContainer(ctx, containerID, 10) deps.Docker.RemoveContainer(ctx, containerID, true) // Best-effort failure-state write so the operator sees the // deploy failed instead of a silent gap. If even this fails // we have nothing left to log. updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token)) return fmt.Errorf("persist deploy state: %w", err) } publishEvent(deps, w, "deployed") // updateStatus normally fires the terminal-state notification; the // success path above wrote state via saveState directly, so dispatch // the deployed notification explicitly here. dispatchSiteNotification(deps, w, domain, "deployed", "") shaDisplay := latestSHA if len(shaDisplay) > 8 { shaDisplay = shaDisplay[:8] } slog.Info("static site deployed", "site", w.Name, "sha", shaDisplay, "mode", mode) return nil } // commitStatusReporter pushes deploy outcomes back to the git provider as // a commit status, gated on the per-workload report_commit_status flag. // It is strictly best-effort: every call is wrapped so a reporting failure // is logged at Warn and NEVER propagates to fail or block the deploy. // // The provider + identifiers are captured once at deploy start so the hot // transition points (pending/success/failure) read as one-liners. A nil // receiver (reporting disabled) makes report a no-op, so callers don't have // to guard each site. type commitStatusReporter struct { provider staticsite.GitProvider owner string repo string sha string targetURL string enabled bool } // newCommitStatusReporter builds a reporter from the decoded config. When // report_commit_status is off (or the SHA is empty) the returned reporter's // report method is inert. func newCommitStatusReporter(provider staticsite.GitProvider, cfg Config, sha, targetURL string) *commitStatusReporter { return &commitStatusReporter{ provider: provider, owner: cfg.RepoOwner, repo: cfg.RepoName, sha: sha, targetURL: targetURL, enabled: cfg.ReportCommitStatus, } } // report sends one commit status, swallowing (and logging) any error. Safe // to call on a disabled reporter or with a nil provider/empty SHA. func (r *commitStatusReporter) report(ctx context.Context, w plugin.Workload, status staticsite.CommitStatus, description string) { if r == nil || !r.enabled || r.provider == nil || r.sha == "" { return } if err := r.provider.SetCommitStatus(ctx, r.owner, r.repo, r.sha, status, r.targetURL, description); err != nil { slog.Warn("static site: commit-status report failed (ignored)", "site", w.Name, "status", string(status), "error", err) } } // statusTargetURL derives the https URL the commit status links back to — // the workload's primary public face, or "" when it has none. func statusTargetURL(domain string) string { if domain == "" { return "" } return "https://" + domain } // updateStatus writes the runtime state's status/error/commit fields // and fires the side effects the legacy Manager.updateStatus did: // failures land in the event log, and terminal transitions trigger an // outbound notification. // // On the deploy success path saveState is called directly (with the // full container metadata in the same write); this helper covers the // failure / intermediate transitions where only state moves. func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) { if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { rs.Status = status rs.LastError = errMsg if commitSHA != "" { rs.LastCommitSHA = commitSHA } // Reflect status into the container row state column so the // global containers index stays useful for filtered queries. switch status { case "deployed": c.State = "running" case "stopped": c.State = "stopped" case "failed": c.State = "failed" case "syncing": // Don't churn the container row's state during in-progress // syncs — leave it on whatever value the previous deploy left. } }); err != nil { slog.Error("static site: failed to update status", "id", w.ID, "status", status, "error", err) } if status == "failed" { publishEvent(deps, w, "failed: "+errMsg) } if status == "deployed" || status == "failed" { dispatchSiteNotification(deps, w, primaryDomain(deps, w), status, errMsg) } } // dispatchSiteNotification fires a site_sync_success or // site_sync_failure event for the workload via the shared multi-route // dispatcher in plugin.DispatchNotificationForWorkload. Resolution // order (workload_notifications → legacy single URL → settings global) // is identical to the dockerfile plugin's path so receivers see // consistent fan-out behaviour across source kinds. func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) { eventType := "site_sync_success" if status == "failed" { eventType = "site_sync_failure" } siteURL := "" if domain != "" { siteURL = "https://" + domain } plugin.DispatchNotificationForWorkload(deps, w, notify.Event{ Type: eventType, Project: w.Name, URL: siteURL, Error: errMsg, }) } // publishEvent emits a static_site_status event on the bus (drives the // dashboard's per-site status pill) AND records a workload-scoped deploy // event in the audit log. The audit InsertEvent + bus publish is // centralised in plugin.EmitDeployEvent so the message/metadata shape and // per-workload timeline are identical across all source kinds. This // standardises the metadata key from the legacy "site_id" to "workload_id"; // no consumer reads the old key (verified repo-wide). func publishEvent(deps plugin.Deps, w plugin.Workload, status string) { deps.Events.Publish(events.Event{ Type: events.EventStaticSiteStatus, Payload: events.StaticSiteStatusPayload{ SiteID: w.ID, Name: w.Name, Status: status, }, }) plugin.EmitDeployEvent(deps, w, "static_site", status) } // removeContainerByName mirrors the legacy helper: enumerate Docker's // view and best-effort drop the matching container so a name conflict // in CreateContainer is recoverable. Best-effort. func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) { containers, err := deps.Docker.ListContainers(ctx, nil) if err != nil { return } for _, c := range containers { if c.Name == name { deps.Docker.StopContainer(ctx, c.ID, 10) deps.Docker.RemoveContainer(ctx, c.ID, true) return } } } // primaryDomain derives the public-facing FQDN from the workload's // first enabled public face. Static workloads support at most one // face today, but iterate defensively in case the API contract // loosens later. An empty return means "no proxy registration"; the // container still runs and is reachable inside the docker network. // // For the bare-subdomain case (Domain == "" but Subdomain != "") the // helper appends settings.Domain to form a complete FQDN — matching // the legacy Manager which let settings.Domain fall through silently. // On a settings lookup failure the bare subdomain is returned as-is // so the proxy still gets *something* to register. func primaryDomain(deps plugin.Deps, w plugin.Workload) string { for _, f := range w.PublicFaces { if f.Subdomain == "" && f.Domain == "" { continue } switch { case f.Subdomain != "" && f.Domain != "": return f.Subdomain + "." + f.Domain case f.Subdomain == "" && f.Domain != "": return f.Domain case f.Subdomain != "" && f.Domain == "": settings, err := deps.Store.GetSettings() if err != nil || settings.Domain == "" { return f.Subdomain } return f.Subdomain + "." + settings.Domain } } return "" }