Files
tiny-forge/internal/workload/plugin/source/static/deploy.go
T
alexei.dolgolyov 93b6911b34 feat(apps): per-app deploy/activity timeline
Every deploy across all four source kinds now writes a workload-scoped
event via a shared plugin.EmitDeployEvent helper (replacing the inline
emit duplicated in static/dockerfile, standardizing static's metadata
key site_id->workload_id, and adding emission to image+compose which
were silent). New indexed event_log.workload_id column, EventLogFilter
.WorkloadID, and GET /api/workloads/{id}/events (id pinned from path).

Frontend: a forge "Activity" panel on /apps/[id] reusing EventLogEntry,
live SSE prepend filtered by workload_id, load-more pagination, an
All/Errors severity filter, and a shared toEventLogEntry mapper. en/ru
i18n parity.

Security: compose's failure status emits a generic reason instead of raw
`docker compose up` output, which can echo app secrets and egresses to
operator webhooks (NotificationURL + event-trigger actions); full detail
stays only in the returned error. Rune-safe 256-rune status cap.

Reviewed: go + typescript APPROVE; security HIGH fixed.
2026-05-29 13:51:17 +03:00

612 lines
22 KiB
Go

package static
import (
"context"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strconv"
"time"
"github.com/moby/moby/api/types/mount"
"github.com/alexei/tinyforge/internal/crypto"
"github.com/alexei/tinyforge/internal/docker"
"github.com/alexei/tinyforge/internal/events"
"github.com/alexei/tinyforge/internal/notify"
"github.com/alexei/tinyforge/internal/proxy"
"github.com/alexei/tinyforge/internal/staticsite"
"github.com/alexei/tinyforge/internal/store"
"github.com/alexei/tinyforge/internal/workload/plugin"
)
// healthCheckDelay is the brief grace window after StartContainer
// before we probe IsContainerRunning. Short enough not to bog down a
// healthy deploy; long enough to catch crash-on-boot failures
// (missing env var, bad Dockerfile, port conflict).
const healthCheckDelay = 3 * time.Second
// deploy runs one full sync of a static workload: fetch the latest
// commit, optionally rebuild the image, recreate the container, and
// reconfigure the proxy. Ported from internal/staticsite/manager.go to
// operate directly on plugin.Workload + container row state, without
// the legacy static_sites table.
//
// Behavior parity with the legacy path is the explicit goal — the
// log-line format ("Static site \"%s\": %s") and event payload shapes
// are preserved so log scrapers and SSE clients keep working through
// the cutover.
func deploy(ctx context.Context, deps plugin.Deps, w plugin.Workload, intent plugin.DeploymentIntent) (retErr error) {
cfg, err := plugin.SourceConfigOf[Config](w)
if err != nil {
return fmt.Errorf("static source: decode config: %w", err)
}
prev, prevContainer, err := loadState(deps, w)
if err != nil {
return err
}
// Manual / first-time deploys force a full rebuild even when the
// commit SHA is unchanged. The legacy Manager.Deploy was called
// with force=true from the adapter; preserve that semantic by
// treating any non-cron / non-git intent as forcing.
force := intent.Reason == "" || intent.Reason == "manual" || intent.Reason == "promote"
// Decrypt the access token if present. Kept in a local so the
// sanitizer can scrub it from any error string before persisting.
token := ""
if cfg.AccessToken != "" {
decrypted, derr := crypto.Decrypt(deps.EncKey, cfg.AccessToken)
if derr != nil {
slog.Warn("static source: failed to decrypt access token", "site", w.Name, "error", derr)
} else {
token = decrypted
}
}
provider, err := staticsite.NewGitProvider(staticsite.ProviderType(cfg.Provider), cfg.BaseURL, token)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create provider: %v", err), token))
return fmt.Errorf("create provider: %w", err)
}
latestSHA, err := provider.GetLatestCommitSHA(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch)
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("fetch commit SHA: %v", err), token))
return fmt.Errorf("get latest commit: %w", err)
}
// Resolve the public-facing domain from the workload's first enabled
// public face. Mirrors the synthetic-row adapter's logic so the
// proxy registration sees the same FQDN it did before.
domain := primaryDomain(deps, w)
// Commit-status reporter (best-effort; gated on cfg.ReportCommitStatus).
// Built here once latestSHA + domain are known. A deferred terminal
// report fires Success/Failure based on the deploy's outcome, but ONLY
// once an actual deploy began (deployStarted) — the unchanged-SHA
// short-circuit below returns before that flips, so no status is
// reported when nothing was deployed. retErr is the named return the
// defer inspects.
reporter := newCommitStatusReporter(provider, cfg, latestSHA, statusTargetURL(domain))
deployStarted := false
defer func() {
if !deployStarted {
return
}
if retErr != nil {
reporter.report(ctx, w, staticsite.CommitStatusFailure, "Tinyforge: deploy failed")
} else {
reporter.report(ctx, w, staticsite.CommitStatusSuccess, "Tinyforge: deployed")
}
}()
// Skip redeploy when nothing changed AND we have a live container +
// (if applicable) live proxy route. Manual deploys always force.
prevContainerID := ""
prevProxyRouteID := ""
if prevContainer != nil {
prevContainerID = prevContainer.ContainerID
prevProxyRouteID = prevContainer.ProxyRouteID
}
if !force && latestSHA == prev.LastCommitSHA && prev.Status == "deployed" && prevContainerID != "" {
running, _ := deps.Docker.IsContainerRunning(ctx, prevContainerID)
if !running {
slog.Info("static site: container not running, forcing redeploy", "site", w.Name)
} else if domain != "" {
proxyOK, perr := deps.Proxy.RouteExists(ctx, domain)
if perr != nil {
slog.Warn("static site: proxy check failed, forcing redeploy", "site", w.Name, "error", perr)
} else if !proxyOK {
slog.Info("static site: proxy route missing, forcing redeploy", "site", w.Name)
} else {
slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA)
return nil
}
} else {
slog.Info("static site: no changes", "site", w.Name, "sha", latestSHA)
return nil
}
}
// Mark syncing. From here on a deploy is genuinely underway, so the
// deferred terminal status report should fire. Push a "pending" commit
// status (best-effort) and arm the deferred Success/Failure report.
updateStatus(deps, w, "syncing", prev.LastCommitSHA, "")
publishEvent(deps, w, "syncing")
deployStarted = true
reporter.report(ctx, w, staticsite.CommitStatusPending, "Tinyforge: deploying")
// Build context — temp dir cleaned up on every exit path.
buildDir, err := os.MkdirTemp("", "dw-site-"+idShort(w)+"-*")
if err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("create temp dir: %v", err), token))
return fmt.Errorf("create temp dir: %w", err)
}
defer os.RemoveAll(buildDir)
if err := provider.DownloadFolder(ctx, cfg.RepoOwner, cfg.RepoName, cfg.Branch, cfg.FolderPath, buildDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("download folder: %v", err), token))
return fmt.Errorf("download folder: %w", err)
}
// Defense in depth: providers should never write outside buildDir,
// but a hostile self-hosted Gitea/GitLab the operator pointed at
// could in principle return a tree entry that escapes. Verify
// before the copy step materializes the build context.
if err := verifyDownloadInsideRoot(buildDir); err != nil {
updateStatus(deps, w, "failed", prev.LastCommitSHA,
sanitizeError(fmt.Sprintf("downloaded tree rejected: %v", err), token))
return fmt.Errorf("downloaded tree rejected: %w", err)
}
if cfg.RenderMarkdown {
if err := staticsite.RenderMarkdownFiles(buildDir); err != nil {
slog.Warn("static site: markdown rendering failed", "site", w.Name, "error", err)
}
}
// Detect mode: deno requires an api/ folder. Fall back to static if
// the operator declared deno but the repo doesn't carry routes.
mode := cfg.Mode
apiDir := filepath.Join(buildDir, "api")
hasAPI := false
if info, err := os.Stat(apiDir); err == nil && info.IsDir() {
hasAPI = true
}
if mode == "deno" && !hasAPI {
mode = "static"
slog.Info("static site: no api/ folder found, falling back to static mode", "site", w.Name)
}
imageTag := imageTagFor(w)
contextDir, err := os.MkdirTemp("", "dw-site-build-*")
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create build context: %v", err), token))
return fmt.Errorf("create build context dir: %w", err)
}
defer os.RemoveAll(contextDir)
if mode == "deno" {
if err := prepareDenoBuild(buildDir, contextDir); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("prepare deno build: %v", err), token))
return fmt.Errorf("prepare deno build: %w", err)
}
} else {
if err := prepareStaticBuild(buildDir, contextDir); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("prepare static build: %v", err), token))
return fmt.Errorf("prepare static build: %w", err)
}
}
if err := deps.Docker.BuildImage(ctx, contextDir, imageTag); err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("build image: %v", err), token))
return fmt.Errorf("build image: %w", err)
}
env := buildEnv(deps, w.ID)
containerPort := "80"
if mode == "deno" {
containerPort = "8000"
}
settings, err := deps.Store.GetSettings()
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("get settings: %v", err), token))
return fmt.Errorf("get settings: %w", err)
}
networkName := settings.Network
networkID, err := deps.Docker.EnsureNetwork(ctx, networkName)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("ensure network: %v", err), token))
return fmt.Errorf("ensure network: %w", err)
}
containerName := containerNameFor(w)
var mounts []mount.Mount
if cfg.StorageEnabled && mode == "deno" {
volName, volErr := deps.Docker.EnsureSiteVolume(ctx, siteVolumeKey(w))
if volErr != nil {
slog.Warn("static site: failed to ensure storage volume", "site", w.Name, "error", volErr)
} else {
mounts = append(mounts, mount.Mount{
Type: mount.TypeVolume,
Source: volName,
Target: "/app/data",
})
slog.Info("static site: storage volume attached", "site", w.Name, "volume", volName)
}
}
// Per-face proxy labels (Traefik picks these up; NPM ignores them).
// Static workloads have at most one face today, but iterate for
// future multi-face parity with the image source.
labels := map[string]string{}
if domain != "" {
port, _ := strconv.Atoi(containerPort)
if l := deps.Proxy.ContainerLabels(domain, port); l != nil {
for k, v := range l {
labels[k] = v
}
}
}
cc := docker.ContainerConfig{
Name: containerName,
Image: imageTag,
Env: env,
ExposedPorts: []string{containerPort + "/tcp"},
NetworkName: networkName,
NetworkID: networkID,
Mounts: mounts,
Labels: labels,
WorkloadID: w.ID,
WorkloadKind: string(store.WorkloadKindSite),
Role: "",
}
containerID, err := deps.Docker.CreateContainer(ctx, cc)
if err != nil {
// Container with this name might already exist — best-effort
// cleanup of any prior container by ID and by name, then retry.
if prevContainerID != "" {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
removeContainerByName(ctx, deps, containerName)
containerID, err = deps.Docker.CreateContainer(ctx, cc)
if err != nil {
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("create container: %v", err), token))
return fmt.Errorf("create container: %w", err)
}
}
if err := deps.Docker.StartContainer(ctx, containerID); err != nil {
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("start container: %v", err), token))
return fmt.Errorf("start container: %w", err)
}
// Brief health-check window — verify the container survives a few
// seconds after start, surfacing the tail of its logs as the
// failure reason if it crashes. Honor ctx so a cancelled deploy
// returns promptly instead of waiting out the full delay.
select {
case <-ctx.Done():
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, "deploy cancelled before health check")
return ctx.Err()
case <-time.After(healthCheckDelay):
}
running, runErr := deps.Docker.IsContainerRunning(ctx, containerID)
if runErr != nil || !running {
logMsg := "container exited immediately after start"
if logs, logErr := deps.Docker.ContainerLogs(ctx, containerID, false, "20"); logErr == nil {
buf, _ := io.ReadAll(logs)
logs.Close()
if len(buf) > 0 {
logMsg = sanitizeError(string(buf), token)
}
}
deps.Docker.RemoveContainer(ctx, containerID, true)
updateStatus(deps, w, "failed", latestSHA, logMsg)
return fmt.Errorf("container not running: %s", logMsg)
}
// Resolve proxy target. Default to in-network DNS (containerName);
// switch to (settings.ServerIP, hostPort) under NPM remote mode.
internalPort, _ := strconv.Atoi(containerPort)
forwardHost := containerName
forwardPort := internalPort
if settings.NpmRemote && settings.ProxyProvider == "npm" {
if settings.ServerIP != "" {
hostPort, hpErr := deps.Docker.InspectContainerPort(ctx, containerID, containerPort+"/tcp")
if hpErr != nil {
slog.Warn("static site: could not get host port for remote NPM", "site", w.Name, "error", hpErr)
} else {
forwardHost = settings.ServerIP
forwardPort = int(hostPort)
}
}
}
// Configure proxy if a domain is set. Replace any prior route in-
// place so traffic shifts atomically.
proxyRouteID := prevProxyRouteID
if domain != "" {
if prevProxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, prevProxyRouteID)
}
routeID, rerr := deps.Proxy.ConfigureRoute(ctx, domain, forwardHost, forwardPort, proxy.RouteOptions{
SSLCertificateID: settings.SSLCertificateID,
})
if rerr != nil {
slog.Warn("static site: failed to configure proxy",
"site", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "error", rerr)
} else {
proxyRouteID = routeID
slog.Info("static site: proxy configured",
"site", w.Name, "domain", domain,
"target", fmt.Sprintf("%s:%d", forwardHost, forwardPort), "routeID", routeID)
}
}
// Drop the old container if a fresh one was created (different ID).
if prevContainerID != "" && prevContainerID != containerID {
deps.Docker.StopContainer(ctx, prevContainerID, 10)
deps.Docker.RemoveContainer(ctx, prevContainerID, true)
}
// Single transactional write of the new state + container metadata.
// On failure: tear down the just-created container and proxy route
// so we don't leave orphans behind. The next deploy would otherwise
// see no row and try to create a third container.
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.LastCommitSHA = latestSHA
rs.LastSyncAt = store.Now()
rs.LastError = ""
rs.Status = "deployed"
c.ContainerID = containerID
c.ProxyRouteID = proxyRouteID
c.Subdomain = domain
c.State = "running"
c.Port = internalPort
c.ImageRef = imageTag
}); err != nil {
slog.Error("static site: failed to persist deploy state — rolling back",
"site", w.Name, "error", err)
if proxyRouteID != "" {
deps.Proxy.DeleteRoute(ctx, proxyRouteID)
}
deps.Docker.StopContainer(ctx, containerID, 10)
deps.Docker.RemoveContainer(ctx, containerID, true)
// Best-effort failure-state write so the operator sees the
// deploy failed instead of a silent gap. If even this fails
// we have nothing left to log.
updateStatus(deps, w, "failed", latestSHA,
sanitizeError(fmt.Sprintf("persist deploy state: %v", err), token))
return fmt.Errorf("persist deploy state: %w", err)
}
publishEvent(deps, w, "deployed")
// updateStatus normally fires the terminal-state notification; the
// success path above wrote state via saveState directly, so dispatch
// the deployed notification explicitly here.
dispatchSiteNotification(deps, w, domain, "deployed", "")
shaDisplay := latestSHA
if len(shaDisplay) > 8 {
shaDisplay = shaDisplay[:8]
}
slog.Info("static site deployed", "site", w.Name, "sha", shaDisplay, "mode", mode)
return nil
}
// commitStatusReporter pushes deploy outcomes back to the git provider as
// a commit status, gated on the per-workload report_commit_status flag.
// It is strictly best-effort: every call is wrapped so a reporting failure
// is logged at Warn and NEVER propagates to fail or block the deploy.
//
// The provider + identifiers are captured once at deploy start so the hot
// transition points (pending/success/failure) read as one-liners. A nil
// receiver (reporting disabled) makes report a no-op, so callers don't have
// to guard each site.
type commitStatusReporter struct {
provider staticsite.GitProvider
owner string
repo string
sha string
targetURL string
enabled bool
}
// newCommitStatusReporter builds a reporter from the decoded config. When
// report_commit_status is off (or the SHA is empty) the returned reporter's
// report method is inert.
func newCommitStatusReporter(provider staticsite.GitProvider, cfg Config, sha, targetURL string) *commitStatusReporter {
return &commitStatusReporter{
provider: provider,
owner: cfg.RepoOwner,
repo: cfg.RepoName,
sha: sha,
targetURL: targetURL,
enabled: cfg.ReportCommitStatus,
}
}
// report sends one commit status, swallowing (and logging) any error. Safe
// to call on a disabled reporter or with a nil provider/empty SHA.
func (r *commitStatusReporter) report(ctx context.Context, w plugin.Workload, status staticsite.CommitStatus, description string) {
if r == nil || !r.enabled || r.provider == nil || r.sha == "" {
return
}
if err := r.provider.SetCommitStatus(ctx, r.owner, r.repo, r.sha, status, r.targetURL, description); err != nil {
slog.Warn("static site: commit-status report failed (ignored)",
"site", w.Name, "status", string(status), "error", err)
}
}
// statusTargetURL derives the https URL the commit status links back to —
// the workload's primary public face, or "" when it has none.
func statusTargetURL(domain string) string {
if domain == "" {
return ""
}
return "https://" + domain
}
// updateStatus writes the runtime state's status/error/commit fields
// and fires the side effects the legacy Manager.updateStatus did:
// failures land in the event log, and terminal transitions trigger an
// outbound notification.
//
// On the deploy success path saveState is called directly (with the
// full container metadata in the same write); this helper covers the
// failure / intermediate transitions where only state moves.
func updateStatus(deps plugin.Deps, w plugin.Workload, status, commitSHA, errMsg string) {
if err := saveState(deps, w, func(rs *runtimeState, c *store.Container) {
rs.Status = status
rs.LastError = errMsg
if commitSHA != "" {
rs.LastCommitSHA = commitSHA
}
// Reflect status into the container row state column so the
// global containers index stays useful for filtered queries.
switch status {
case "deployed":
c.State = "running"
case "stopped":
c.State = "stopped"
case "failed":
c.State = "failed"
case "syncing":
// Don't churn the container row's state during in-progress
// syncs — leave it on whatever value the previous deploy left.
}
}); err != nil {
slog.Error("static site: failed to update status", "id", w.ID, "status", status, "error", err)
}
if status == "failed" {
publishEvent(deps, w, "failed: "+errMsg)
}
if status == "deployed" || status == "failed" {
dispatchSiteNotification(deps, w, primaryDomain(deps, w), status, errMsg)
}
}
// dispatchSiteNotification fires a site_sync_success or
// site_sync_failure event for the workload via the shared multi-route
// dispatcher in plugin.DispatchNotificationForWorkload. Resolution
// order (workload_notifications → legacy single URL → settings global)
// is identical to the dockerfile plugin's path so receivers see
// consistent fan-out behaviour across source kinds.
func dispatchSiteNotification(deps plugin.Deps, w plugin.Workload, domain, status, errMsg string) {
eventType := "site_sync_success"
if status == "failed" {
eventType = "site_sync_failure"
}
siteURL := ""
if domain != "" {
siteURL = "https://" + domain
}
plugin.DispatchNotificationForWorkload(deps, w, notify.Event{
Type: eventType,
Project: w.Name,
URL: siteURL,
Error: errMsg,
})
}
// publishEvent emits a static_site_status event on the bus (drives the
// dashboard's per-site status pill) AND records a workload-scoped deploy
// event in the audit log. The audit InsertEvent + bus publish is
// centralised in plugin.EmitDeployEvent so the message/metadata shape and
// per-workload timeline are identical across all source kinds. This
// standardises the metadata key from the legacy "site_id" to "workload_id";
// no consumer reads the old key (verified repo-wide).
func publishEvent(deps plugin.Deps, w plugin.Workload, status string) {
deps.Events.Publish(events.Event{
Type: events.EventStaticSiteStatus,
Payload: events.StaticSiteStatusPayload{
SiteID: w.ID,
Name: w.Name,
Status: status,
},
})
plugin.EmitDeployEvent(deps, w, "static_site", status)
}
// removeContainerByName mirrors the legacy helper: enumerate Docker's
// view and best-effort drop the matching container so a name conflict
// in CreateContainer is recoverable. Best-effort.
func removeContainerByName(ctx context.Context, deps plugin.Deps, name string) {
containers, err := deps.Docker.ListContainers(ctx, nil)
if err != nil {
return
}
for _, c := range containers {
if c.Name == name {
deps.Docker.StopContainer(ctx, c.ID, 10)
deps.Docker.RemoveContainer(ctx, c.ID, true)
return
}
}
}
// primaryDomain derives the public-facing FQDN from the workload's
// first enabled public face. Static workloads support at most one
// face today, but iterate defensively in case the API contract
// loosens later. An empty return means "no proxy registration"; the
// container still runs and is reachable inside the docker network.
//
// For the bare-subdomain case (Domain == "" but Subdomain != "") the
// helper appends settings.Domain to form a complete FQDN — matching
// the legacy Manager which let settings.Domain fall through silently.
// On a settings lookup failure the bare subdomain is returned as-is
// so the proxy still gets *something* to register.
func primaryDomain(deps plugin.Deps, w plugin.Workload) string {
for _, f := range w.PublicFaces {
if f.Subdomain == "" && f.Domain == "" {
continue
}
switch {
case f.Subdomain != "" && f.Domain != "":
return f.Subdomain + "." + f.Domain
case f.Subdomain == "" && f.Domain != "":
return f.Domain
case f.Subdomain != "" && f.Domain == "":
settings, err := deps.Store.GetSettings()
if err != nil || settings.Domain == "" {
return f.Subdomain
}
return f.Subdomain + "." + settings.Domain
}
}
return ""
}