package static import ( "context" "encoding/json" "fmt" "io" "log/slog" "os" "path/filepath" "strconv" "strings" "time" "github.com/moby/moby/api/types/mount" "github.com/alexei/tinyforge/internal/crypto" "github.com/alexei/tinyforge/internal/docker" "github.com/alexei/tinyforge/internal/events" "github.com/alexei/tinyforge/internal/notify" "github.com/alexei/tinyforge/internal/proxy" "github.com/alexei/tinyforge/internal/staticsite" "github.com/alexei/tinyforge/internal/store" "github.com/alexei/tinyforge/internal/workload/plugin" ) // healthCheckDelay is the brief grace window after StartContainer // before we probe IsContainerRunning. Short enough not to bog down a // healthy deploy; long enough to catch crash-on-boot failures // (missing env var, bad Dockerfile, port conflict). const healthCheckDelay = 3 * time.Second // deploy runs one full sync of a static workload: fetch the latest // commit, optionally rebuild the image, recreate the container, and // reconfigure the proxy. Ported from internal/staticsite/manager.go to // operate directly on plugin.Workload + container row state, without // the legacy static_sites table. // // Behavior parity with the legacy path is the explicit goal — the // log-line format ("Static site \"%s\": %s") and event payload shapes // are preserved so log scrapers and SSE clients keep working through // the cutover. func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) error { cfg, err := plugin.SourceConfigOf[Config](w) if err != nil { return fmt.Errorf("static source: decode config: %w", err) } prev, prevContainer, err := loadState(deps, w) if err != nil { return err } // Manual / first-time deploys force a full rebuild even when the // commit SHA is unchanged. The legacy Manager.Deploy was called // with force=true from the adapter; preserve that semantic by // treating any non-cron / non-git intent as forcing. force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote" // Decrypt the access token if present. Kept in a local so the // sanitizer can scrub it from any error string before persisting. token := "" if cfg.AccessToken != "" { decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken) if derr != nil { slog.Warn("static source: failed to decrypt access token", "site", w.Name, "error", derr) } else { token = decrypted } } provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token) if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("create provider: %v", err), token)) return fmt.Errorf("create provider: %w", err) } latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch) if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token)) return fmt.Errorf("get latest commit: %w", err) } // Resolve the public-facing domain from the workload's first enabled // public face. Mirrors the synthetic-row adapter's logic so the // proxy registration sees the same FQDN it did before. domain := primaryDomain(deps, w) // Skip redeploy when nothing changed AND we have a live container + // (if applicable) live proxy route. Manual deploys always force. prevContainerID := "" prevProxyRouteID := "" if prevContainer != nil { prevContainerID = prevContainer.ContainerID prevProxyRouteID = prevContainer.ProxyRouteID } if !force && latestSHA == prev.LastCommitSHA && prev.Status == "deployed" && prevContainerID != "" { running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID) if !running { slog.Info("static site: container not running, forcing redeploy", "site", w.Name) } else if domain != "" { proxyOK, perr := deps.Proxy.RouteExists(ctx, domain) if perr != nil { slog.Warn("static site: proxy check failed, forcing redeploy", "site", w.Name, "error", perr) } else if !proxyOK { slog.Info("static site: proxy route missing, forcing redeploy", "site", w.Name) } else { slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) return nil } } else { slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA) return nil } } // Mark syncing. updateStatus(deps, w, "syncing", prev.LastCommitSHA, "") publishEvent(deps, w, "syncing") // Build context — temp dir cleaned up on every exit path. buildDir, err := os.MkdirTemp("", "dw-site-"+idShort(w)+"-*") if err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("create temp dir: %v", err), token)) return fmt.Errorf("create temp dir: %w", err) } defer os.RemoveAll(buildDir) if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, cfg.FolderPath, buildDir); err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("download folder: %v", err), token)) return fmt.Errorf("download folder: %w", err) } // Defense in depth: providers should never write outside buildDir, // but a hostile self-hosted Gitea/GitLab the operator pointed at // could in principle return a tree entry that escapes. Verify // before the copy step materializes the build context. if err := verifyDownloadInsideRoot(buildDir); err != nil { updateStatus(deps, w, "failed", prev.LastCommitSHA, sanitizeError(fmt.Sprintf("downloaded tree rejected: %v", err), token)) return fmt.Errorf("downloaded tree rejected: %w", err) } if cfg.RenderMarkdown { if err := staticsite.RenderMarkdownFiles(buildDir); err != nil { slog.Warn("static site: markdown rendering failed", "site", w.Name, "error", err) } } // Detect mode: deno requires an api/ folder. Fall back to static if // the operator declared deno but the repo doesn't carry routes. mode := cfg.Mode apiDir := filepath.Join(buildDir, "api") hasAPI := false if info, err := os.Stat(apiDir); err == nil && info.IsDir() { hasAPI = true } if mode == "deno" && !hasAPI { mode = "static" slog.Info("static site: no api/ folder found, falling back to static mode", "site", w.Name) } imageTag := imageTagFor(w) contextDir, err := os.MkdirTemp("", "dw-site-build-*") if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("create build context: %v", err), token)) return fmt.Errorf("create build context dir: %w", err) } defer os.RemoveAll(contextDir) if mode == "deno" { if err := prepareDenoBuild(buildDir, contextDir); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("prepare deno build: %v", err), token)) return fmt.Errorf("prepare deno build: %w", err) } } else { if err := prepareStaticBuild(buildDir, contextDir); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("prepare static build: %v", err), token)) return fmt.Errorf("prepare static build: %w", err) } } if err := deps.Docker.BuildImage(ctx, contextDir, imageTag); err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("build image: %v", err), token)) return fmt.Errorf("build image: %w", err) } env := buildEnv(deps, w.ID) containerPort := "80" if mode == "deno" { containerPort = "8000" } settings, err := deps.Store.GetSettings() if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("get settings: %v", err), token)) return fmt.Errorf("get settings: %w", err) } networkName := settings.Network networkID, err := deps.Docker.EnsureNetwork(ctx, networkName) if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("ensure network: %v", err), token)) return fmt.Errorf("ensure network: %w", err) } containerName := containerNameFor(w) var mounts []mount.Mount if cfg.StorageEnabled && mode == "deno" { volName, volErr := deps.Docker.EnsureSiteVolume(ctx, siteVolumeKey(w)) if volErr != nil { slog.Warn("static site: failed to ensure storage volume", "site", w.Name, "error", volErr) } else { mounts = append(mounts, mount.Mount{ Type: mount.TypeVolume, Source: volName, Target: "/app/data", }) slog.Info("static site: storage volume attached", "site", w.Name, "volume", volName) } } // Per-face proxy labels (Traefik picks these up; NPM ignores them). // Static workloads have at most one face today, but iterate for // future multi-face parity with the image source. labels := map[string]string{} if domain != "" { port, _ := strconv.Atoi(containerPort) if l := deps.Proxy.ContainerLabels(domain, port); l != nil { for k, v := range l { labels[k] = v } } } cc := docker.ContainerConfig{ Name: containerName, Image: imageTag, Env: env, ExposedPorts: []string{containerPort + "/tcp"}, NetworkName: networkName, NetworkID: networkID, Mounts: mounts, Labels: labels, WorkloadID: w.ID, WorkloadKind: string(store.WorkloadKindSite), Role: "", } containerID, err := deps.Docker.CreateContainer(ctx, cc) if err != nil { // Container with this name might already exist — best-effort // cleanup of any prior container by ID and by name, then retry. if prevContainerID != "" { deps.Docker.StopContainer(ctx, prevContainerID, 10) deps.Docker.RemoveContainer(ctx, prevContainerID, true) } removeContainerByName(ctx, deps, containerName) containerID, err = deps.Docker.CreateContainer(ctx, cc) if err != nil { updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("create container: %v", err), token)) return fmt.Errorf("create container: %w", err) } } if err := deps.Docker.StartContainer(ctx, containerID); err != nil { deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("start container: %v", err), token)) return fmt.Errorf("start container: %w", err) } // Brief health-check window — verify the container survives a few // seconds after start, surfacing the tail of its logs as the // failure reason if it crashes. Honor ctx so a cancelled deploy // returns promptly instead of waiting out the full delay. select { case <-ctx.Done(): deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check") return ctx.Err() case <-time.After(healthCheckDelay): } running, runErr := deps.Docker.IsContainerRunning(ctx, containerID) if runErr != nil || !running { logMsg := "container exited immediately after start" if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "20"); logErr == nil { buf, _ := io.ReadAll(logs) logs.Close() if len(buf) > 0 { logMsg = sanitizeError(string(buf), token) } } deps.Docker.RemoveContainer(ctx, containerID, true) updateStatus(deps, w, "failed", latestSHA, logMsg) return fmt.Errorf("container not running: %s", logMsg) } // Resolve proxy target. Default to in-network DNS (containerName); // switch to (settings.ServerIP, hostPort) under NPM remote mode. internalPort, _ := strconv.Atoi(containerPort) forwardHost := containerName forwardPort := internalPort if settings.NpmRemote && settings.ProxyProvider == "npm" { if settings.ServerIP != "" { hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp") if hpErr != nil { slog.Warn("static site: could not get host port for remote NPM", "site", w.Name, "error", hpErr) } else { forwardHost = settings.ServerIP forwardPort = int(hostPort) } } } // Configure proxy if a domain is set. Replace any prior route in- // place so traffic shifts atomically. proxyRouteID := prevProxyRouteID if domain != "" { if prevProxyRouteID != "" { deps.Proxy.DeleteRoute(ctx, prevProxyRouteID) } routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{ SSLCertificateID: settings.SSLCertificateID, }) if rerr != nil { slog.Warn("static site: failed to configure proxy", "site", w.Name, "domain", domain, "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr) } else { proxyRouteID = routeID slog.Info("static site: proxy configured", "site", w.Name, "domain", domain, "target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID) } } // Drop the old container if a fresh one was created (different ID). if prevContainerID != "" && prevContainerID != containerID { deps.Docker.StopContainer(ctx, prevContainerID, 10) deps.Docker.RemoveContainer(ctx, prevContainerID, true) } // Single transactional write of the new state + container metadata. // On failure: tear down the just-created container and proxy route // so we don't leave orphans behind. The next deploy would otherwise // see no row and try to create a third container. if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { rs.LastCommitSHA = latestSHA rs.LastSyncAt = store.Now() rs.LastError = "" rs.Status = "deployed" c.ContainerID = containerID c.ProxyRouteID = proxyRouteID c.Subdomain = domain c.State = "running" c.Port = internalPort c.ImageRef = imageTag }); err != nil { slog.Error("static site: failed to persist deploy state — rolling back", "site", w.Name, "error", err) if proxyRouteID != "" { deps.Proxy.DeleteRoute(ctx, proxyRouteID) } deps.Docker.StopContainer(ctx, containerID, 10) deps.Docker.RemoveContainer(ctx, containerID, true) // Best-effort failure-state write so the operator sees the // deploy failed instead of a silent gap. If even this fails // we have nothing left to log. updateStatus(deps, w, "failed", latestSHA, sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token)) return fmt.Errorf("persist deploy state: %w", err) } publishEvent(deps, w, "deployed") // updateStatus normally fires the terminal-state notification; the // success path above wrote state via saveState directly, so dispatch // the deployed notification explicitly here. dispatchSiteNotification(deps, w, domain, "deployed", "") shaDisplay := latestSHA if len(shaDisplay) > 8 { shaDisplay = shaDisplay[:8] } slog.Info("static site deployed", "site", w.Name, "sha", shaDisplay, "mode", mode) return nil } // updateStatus writes the runtime state's status/error/commit fields // and fires the side effects the legacy Manager.updateStatus did: // failures land in the event log, and terminal transitions trigger an // outbound notification. // // On the deploy success path saveState is called directly (with the // full container metadata in the same write); this helper covers the // failure / intermediate transitions where only state moves. func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) { if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) { rs.Status = status rs.LastError = errMsg if commitSHA != "" { rs.LastCommitSHA = commitSHA } // Reflect status into the container row state column so the // global containers index stays useful for filtered queries. switch status { case "deployed": c.State = "running" case "stopped": c.State = "stopped" case "failed": c.State = "failed" case "syncing": // Don't churn the container row's state during in-progress // syncs — leave it on whatever value the previous deploy left. } }); err != nil { slog.Error("static site: failed to update status", "id", w.ID, "status", status, "error", err) } if status == "failed" { publishEvent(deps, w, "failed: "+errMsg) } if status == "deployed" || status == "failed" { dispatchSiteNotification(deps, w, primaryDomain(deps, w), status, errMsg) } } // dispatchSiteNotification fires a site_sync_success or // site_sync_failure event to the configured outbound webhook. // Resolution: per-workload URL+secret first, then fall through to // settings.notification_url/secret. Always best-effort. func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) { if deps.Notifier == nil { return } settings, err := deps.Store.GetSettings() if err != nil { slog.Warn("static site: notify settings lookup failed", "site", w.ID, "error", err) return } url, secret, tier := resolveSiteTarget(w, settings) if url == "" { return } eventType := "site_sync_success" if status == "failed" { eventType = "site_sync_failure" } siteURL := "" if domain != "" { siteURL = "https://" + domain } deps.Notifier.SendSigned(url, secret, tier, notify.Event{ Type: eventType, Project: w.Name, URL: siteURL, Error: errMsg, }) } // resolveSiteTarget mirrors the legacy resolveSiteTarget helper but // reads notification config off the workload row (where it now lives // post-refactor) rather than the static_sites row. func resolveSiteTarget(w plugin.Workload, settings store.Settings) (string, string, notify.Tier) { if w.NotificationURL != "" { return w.NotificationURL, w.NotificationSecret, notify.TierSite } return settings.NotificationURL, settings.NotificationSecret, notify.TierSettings } // publishEvent emits a static_site_status event on the bus AND // persists an event_log row so the dashboard's audit trail picks it // up. Message format ("Static site \"%s\": %s") is preserved verbatim // from the legacy Manager.publishEvent so log scrapers and operator- // configured event triggers keep matching. func publishEvent(deps plugin.Deps, w plugin.Workload, status string) { deps.Events.Publish(events.Event{ Type: events.EventStaticSiteStatus, Payload: events.StaticSiteStatusPayload{ SiteID: w.ID, Name: w.Name, Status: status, }, }) severity := "info" if strings.HasPrefix(status, "failed") { severity = "error" } message := fmt.Sprintf("Static site %q: %s", w.Name, status) // Build metadata via json.Marshal so workload names containing // quotes or backslashes don't produce invalid JSON for downstream // log-scan consumers. metaBytes, err := json.Marshal(map[string]string{ "site_id": w.ID, "site_name": w.Name, "status": status, }) if err != nil { slog.Error("static site: marshal event metadata", "error", err) metaBytes = []byte("{}") } metadata := string(metaBytes) evt, err := deps.Store.InsertEvent(store.EventLog{ Source: "static_site", Severity: severity, Message: message, Metadata: metadata, }) if err != nil { slog.Error("static site: failed to persist event log", "error", err) return } deps.Events.Publish(events.Event{ Type: events.EventLog, Payload: events.EventLogPayload{ ID: evt.ID, Source: "static_site", Severity: severity, Message: message, Metadata: metadata, CreatedAt: evt.CreatedAt, }, }) } // removeContainerByName mirrors the legacy helper: enumerate Docker's // view and best-effort drop the matching container so a name conflict // in CreateContainer is recoverable. Best-effort. func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) { containers, err := deps.Docker.ListContainers(ctx, nil) if err != nil { return } for _, c := range containers { if c.Name == name { deps.Docker.StopContainer(ctx, c.ID, 10) deps.Docker.RemoveContainer(ctx, c.ID, true) return } } } // primaryDomain derives the public-facing FQDN from the workload's // first enabled public face. Static workloads support at most one // face today, but iterate defensively in case the API contract // loosens later. An empty return means "no proxy registration"; the // container still runs and is reachable inside the docker network. // // For the bare-subdomain case (Domain == "" but Subdomain != "") the // helper appends settings.Domain to form a complete FQDN — matching // the legacy Manager which let settings.Domain fall through silently. // On a settings lookup failure the bare subdomain is returned as-is // so the proxy still gets *something* to register. func primaryDomain(deps plugin.Deps, w plugin.Workload) string { for _, f := range w.PublicFaces { if f.Subdomain == "" && f.Domain == "" { continue } switch { case f.Subdomain != "" && f.Domain != "": return f.Subdomain + "." + f.Domain case f.Subdomain == "" && f.Domain != "": return f.Domain case f.Subdomain != "" && f.Domain == "": settings, err := deps.Store.GetSettings() if err != nil || settings.Domain == "" { return f.Subdomain } return f.Subdomain + "." + settings.Domain } } return "" }