fix: harden security, fix concurrency bugs, and address review findings
Build / build (push) Successful in 11m42s
Build / build (push) Successful in 11m42s
Security: - rate limit /api/webhook routes per-IP and cap concurrent site syncs - global SSE connection cap (256) with new sse_gate - validate ?tail= and cap JSON log responses at 4 MiB - strip ANSI/CSI/OSC and control bytes from streamed log lines - redact webhook secret from request log middleware - scrub host details from /api/health for non-admin viewers - drop container_id from /api/system/stats/top for non-admins - generate webhook secrets via crypto/rand; require >=32 chars on insert - verify iid path consistency in streamContainerLogs - LimitReader on site webhook body; reject malformed non-empty bodies Concurrency / correctness: - stats collector: Stop() no longer hangs without Start(), semaphore acquired in parent loop so ctx cancellation short-circuits the queue, in-flight tick cancellable via shared base context, zero-ts guard - webhook handler: replace fire-and-forget goroutine with WaitGroup-tracked workers + Drain() wired into graceful shutdown - $derived(() => ...) mis-idiom fixed in ContainerStats / InstanceCard / ProjectCard (returned function instead of value) - SystemResourcesCard: rename `window` and `t` locals to avoid shadowing globalThis.window and the i18n `t` import Quality / performance: - replace O(n^2) insertion sort with sort.Slice in stats top - runMigrations only swallows duplicate-column / already-exists errors - PruneStatsSamplesBefore wrapped in a transaction - collapse N+1 in unusedImageStats / pruneImages to one ListAllInstances pass; surface DB errors instead of silently treating them as inactive - run Docker Info + DiskUsage in parallel via errgroup - container log SSE emits `: ping` heartbeat every 20 s - imageMatches case-insensitive on registry host (RFC behaviour) - log warning on invalid stage tag pattern instead of silent skip - reject malformed non-empty site webhook payloads Frontend / i18n: - shared formatBytes utility replaces three local copies - statsInterval store drives dynamic "no samples / collection disabled" copy across ContainerStats and SystemResourcesCard - top consumers row now shows owner_name (project/stage or site name) - drop seven `as any` casts on the Settings type; add cloudflare_api_token write-only field - move "Service status", "Docker daemon", "Docker unreachable", "Proxy unreachable", "reachable", and "Docker daemon is not reachable." strings into en/ru i18n bundles
This commit is contained in:
@@ -5,15 +5,27 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"github.com/alexei/tinyforge/internal/store"
|
||||
)
|
||||
|
||||
// maxSiteConcurrentSyncs caps fan-out of background site syncs triggered by
|
||||
// webhooks. Above this limit, requests are rejected with 503.
|
||||
const maxSiteConcurrentSyncs = 4
|
||||
|
||||
// maxWebhookBodyBytes caps the request body size for webhook payloads. The
|
||||
// /api routes already wrap the body with MaxBytesReader, but the webhook
|
||||
// router relies on its own limit so changes to the parent middleware can't
|
||||
// silently increase the cap.
|
||||
const maxWebhookBodyBytes = 256 * 1024 // 256 KiB
|
||||
|
||||
// DeployTriggerer is called when a webhook determines a deploy should happen.
|
||||
// Same interface as registry.DeployTriggerer — kept separate to avoid import cycles.
|
||||
type DeployTriggerer interface {
|
||||
@@ -114,12 +126,28 @@ type Handler struct {
|
||||
store *store.Store
|
||||
deployer DeployTriggerer
|
||||
sites SiteSyncTriggerer
|
||||
|
||||
// Site sync coordination — webhooks fire syncs in the background; Drain
|
||||
// blocks until those goroutines finish, so a graceful shutdown does not
|
||||
// kill an in-flight git fetch + container rebuild.
|
||||
siteSyncCtx context.Context
|
||||
siteSyncCancel context.CancelFunc
|
||||
siteSyncWG sync.WaitGroup
|
||||
siteSyncSem chan struct{}
|
||||
}
|
||||
|
||||
// NewHandler creates a new webhook Handler. The sites triggerer is optional
|
||||
// and may be nil (site webhooks will return 404).
|
||||
func NewHandler(st *store.Store, deployer DeployTriggerer, sites SiteSyncTriggerer) *Handler {
|
||||
return &Handler{store: st, deployer: deployer, sites: sites}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &Handler{
|
||||
store: st,
|
||||
deployer: deployer,
|
||||
sites: sites,
|
||||
siteSyncCtx: ctx,
|
||||
siteSyncCancel: cancel,
|
||||
siteSyncSem: make(chan struct{}, maxSiteConcurrentSyncs),
|
||||
}
|
||||
}
|
||||
|
||||
// SetSiteSyncTriggerer injects the static-site manager after construction.
|
||||
@@ -130,6 +158,13 @@ func (h *Handler) SetSiteSyncTriggerer(s SiteSyncTriggerer) {
|
||||
h.sites = s
|
||||
}
|
||||
|
||||
// Drain cancels in-flight site syncs and waits for their goroutines to exit.
|
||||
// Safe to call from a graceful-shutdown path.
|
||||
func (h *Handler) Drain() {
|
||||
h.siteSyncCancel()
|
||||
h.siteSyncWG.Wait()
|
||||
}
|
||||
|
||||
// Route returns a chi router with the webhook endpoints mounted.
|
||||
//
|
||||
// Routes:
|
||||
@@ -183,7 +218,8 @@ func (h *Handler) handleWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
var payload Payload
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
dec := json.NewDecoder(io.LimitReader(r.Body, maxWebhookBodyBytes))
|
||||
if err := dec.Decode(&payload); err != nil {
|
||||
respondWebhookError(w, http.StatusBadRequest, "invalid JSON payload")
|
||||
return
|
||||
}
|
||||
@@ -302,10 +338,20 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Body is optional — decode best-effort.
|
||||
// Body is optional. We attempt to decode but accept an empty body (no Ref
|
||||
// filter); a malformed non-empty body is treated as bad-request to avoid
|
||||
// silently bypassing the branch/tag filter.
|
||||
var payload SitePayload
|
||||
if r.ContentLength > 0 {
|
||||
_ = json.NewDecoder(r.Body).Decode(&payload)
|
||||
body, err := io.ReadAll(io.LimitReader(r.Body, maxWebhookBodyBytes))
|
||||
if err != nil {
|
||||
respondWebhookError(w, http.StatusBadRequest, "failed to read request body")
|
||||
return
|
||||
}
|
||||
if len(body) > 0 {
|
||||
if err := json.Unmarshal(body, &payload); err != nil {
|
||||
respondWebhookError(w, http.StatusBadRequest, "invalid JSON payload")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if payload.Ref != "" && !siteRefMatches(site, payload.Ref) {
|
||||
@@ -320,9 +366,20 @@ func (h *Handler) handleSiteWebhook(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Fire and forget — sync may take a while (git fetch + container rebuild).
|
||||
// Cap concurrent syncs so a runaway CI cannot fan out unbounded
|
||||
// git-clone goroutines.
|
||||
select {
|
||||
case h.siteSyncSem <- struct{}{}:
|
||||
default:
|
||||
respondWebhookError(w, http.StatusServiceUnavailable, "site sync queue full")
|
||||
return
|
||||
}
|
||||
|
||||
h.siteSyncWG.Add(1)
|
||||
go func(siteID, siteName string) {
|
||||
if err := h.sites.Deploy(context.Background(), siteID, false); err != nil {
|
||||
defer h.siteSyncWG.Done()
|
||||
defer func() { <-h.siteSyncSem }()
|
||||
if err := h.sites.Deploy(h.siteSyncCtx, siteID, false); err != nil {
|
||||
slog.Error("webhook: site sync failed", "site", siteName, "error", err)
|
||||
}
|
||||
}(site.ID, site.Name)
|
||||
|
||||
Reference in New Issue
Block a user