Files
tiny-forge/internal/notify/notifier.go
T
alexei.dolgolyov 410a131cec feat(apps): stepped creation wizard, branch previews, and app-creation fixes
This session (frontend focus):
- Rebuild /apps/new as a 4-step wizard (Basics → Configure → Trigger → Review):
  WizardRail, SourceKindPicker card grid, AppManifest review, per-step validation,
  ConfirmDialog-based unsaved-changes guard.
- Extract lib/workload/sourceForms.ts (single source of truth for source_config)
  + {Image,Compose,Static,Dockerfile}SourceForm + StaticDiscoveryWizard; fold the
  /apps/[id] edit form onto the same components (removes the duplication). Add
  vitest + sourceForms unit tests.
- Branch preview environments UI: /chain is_preview/preview_branch + a Preview
  environments panel on /apps/[id] (per-branch URLs, ConfirmDialog teardown, armed
  state); RegistryImagePicker on the registry trigger and the image source.
- Fixes: image-inspect 404 -> admin-gated POST /api/discovery/image/inspect;
  conflict-panel blur flicker; friendly localized discovery errors; CPU/Memory
  label hints; dashboard + /apps "Total workloads" count only source_kind workloads
  (drop stale trigger_kind gate); NPM cert/access-list name cache; EntityPicker
  empty-list guard.
- Update CLAUDE.md frontend conventions + add a Build & Test section.

Also captures pre-existing in-progress platform work (not from this session):
workload notifications, Prometheus metrics export, store lockfile, health probes,
backup hardening, and related store/webhook/scheduler changes.
2026-05-29 02:09:54 +03:00

430 lines
15 KiB
Go

package notify
import (
"bytes"
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"net/url"
"sync"
"time"
"github.com/google/uuid"
"github.com/alexei/tinyforge/internal/metrics"
)
// Event represents a deployment / site-sync notification payload.
//
// Field naming preserves backwards compatibility with the original
// deploy_success/deploy_failure events; site events reuse Project for the
// site name and leave Stage/ImageTag empty.
type Event struct {
Type string `json:"type"` // deploy_success, deploy_failure, site_sync_success, site_sync_failure, test
Project string `json:"project"`
Stage string `json:"stage"`
ImageTag string `json:"image_tag"`
Subdomain string `json:"subdomain"`
URL string `json:"url,omitempty"`
Error string `json:"error,omitempty"`
Timestamp string `json:"timestamp"`
}
// Tier identifies which configuration layer supplied the URL+secret used for
// a particular dispatch. Recorded in logs and the test-endpoint response so
// operators can debug fall-through behaviour.
type Tier string
const (
TierSettings Tier = "settings"
TierProject Tier = "project"
TierStage Tier = "stage"
TierSite Tier = "site"
TierEventTrigger Tier = "event_trigger"
)
// Header names for outgoing webhooks. The signature header name matches
// GitHub/Gitea/Forgejo so receivers built for those providers (and the
// service-to-notification-bridge generic webhook provider) verify out of the
// box. The X-Tinyforge-* headers are advisory and not covered by the HMAC.
const (
HeaderSignature = "X-Hub-Signature-256"
HeaderEvent = "X-Tinyforge-Event"
HeaderDelivery = "X-Tinyforge-Delivery"
HeaderTimestamp = "X-Tinyforge-Timestamp"
HeaderTier = "X-Tinyforge-Tier"
)
// userAgent is reported on every outgoing webhook request so operators can
// filter their access logs by source. Versioned tag is added later if/when
// we wire build-time variables; for now a static identifier is enough.
const userAgent = "Tinyforge-Webhook/1"
// TestResult is what /api/.../notification-test returns to the UI: the
// receiver's status code, latency, a short response preview, and whether a
// signature was sent (so the operator can tell at a glance if signing is
// configured for this tier).
type TestResult struct {
URL string `json:"url"`
Tier Tier `json:"tier"`
StatusCode int `json:"status_code"`
LatencyMs int64 `json:"latency_ms"`
SignatureSent bool `json:"signature_sent"`
DeliveryID string `json:"delivery_id"`
ResponseSnippet string `json:"response_snippet"`
Error string `json:"error,omitempty"`
}
// Notifier sends webhook notifications for deploy and site-sync events.
// Notifications are fire-and-forget by default — failures are logged but do
// not propagate. SendSyncForTest is the exception, used only by the manual
// test endpoint.
//
// outboundSem caps the number of in-flight outbound notifications. Without
// it a single burst (e.g. 1000 event triggers firing on a noisy log scan)
// would spawn 1000 simultaneous TCP connections, which both DoSes the
// receiver and exhausts local FDs.
type Notifier struct {
httpClient *http.Client
wg sync.WaitGroup
outboundSem chan struct{}
}
// maxOutboundNotifications bounds the in-flight outbound webhook fan-out.
// Sized to keep small bursts non-blocking while preventing a runaway storm
// from starving the rest of the process. Tunable later via settings if any
// operator legitimately needs more concurrency.
const maxOutboundNotifications = 32
// New creates a Notifier with sensible defaults.
func New() *Notifier {
// Transport with bounded host pooling so a slow receiver cannot pin
// arbitrarily many sockets open. MaxConnsPerHost mirrors the worker
// pool size; idle pruning keeps long-lived processes from holding
// stale TCP entries indefinitely.
//
// NOTE: we deliberately do NOT apply the staticsite SSRF dialer here.
// Notification URLs are admin-configured, and an admin already has
// Docker-socket (host-root-equivalent) access, so the SSRF surface adds
// nothing they couldn't already reach. Blocking loopback/private targets
// would instead break the common self-hosted pattern of notifying a
// same-host sidecar/bridge (e.g. service-to-notification-bridge on
// 127.0.0.1). See the security review (rated LOW / out of trust boundary).
tr := &http.Transport{
MaxIdleConns: 64,
MaxIdleConnsPerHost: 8,
MaxConnsPerHost: maxOutboundNotifications,
IdleConnTimeout: 90 * time.Second,
}
return &Notifier{
httpClient: &http.Client{
Timeout: 10 * time.Second,
Transport: tr,
},
outboundSem: make(chan struct{}, maxOutboundNotifications),
}
}
// acquireSlot reserves an outbound slot, respecting ctx so a backed-up
// queue cannot starve a request that already has its own deadline.
func (n *Notifier) acquireSlot(ctx context.Context) bool {
select {
case n.outboundSem <- struct{}{}:
return true
case <-ctx.Done():
return false
}
}
func (n *Notifier) releaseSlot() {
select {
case <-n.outboundSem:
default:
// Drained during shutdown — never block.
}
}
// Drain waits for all in-flight notifications to complete.
func (n *Notifier) Drain() {
n.wg.Wait()
}
// Send dispatches an unsigned event to the given URL in the background.
// Retained for callsites that don't yet have access to a signing secret;
// new code should prefer SendSigned which records the resolution tier.
func (n *Notifier) Send(webhookURL string, event Event) {
n.SendSigned(webhookURL, "", TierSettings, event)
}
// SendSigned dispatches an event, signing it with HMAC-SHA256 if secret is
// non-empty. The signature is computed over the exact JSON bytes sent on the
// wire (so receivers must verify the raw body, not a re-serialised copy).
//
// Empty secret => unsigned send (no X-Hub-Signature-256 header), preserving
// the legacy behaviour for receivers that pre-date HMAC support.
func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event) {
if webhookURL == "" {
return
}
if event.Timestamp == "" {
event.Timestamp = time.Now().UTC().Format(time.RFC3339)
}
delivery := uuid.NewString()
n.wg.Add(1)
go func() {
defer n.wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
if !n.acquireSlot(ctx) {
slog.Warn("notify: dropped — outbound queue saturated",
"tier", tier, "host", safeHost(webhookURL), "delivery", delivery, "event", event.Type)
metrics.OutboundNotifyTotal.Inc("dropped")
return
}
defer n.releaseSlot()
_, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
// URL host only — never log the secret or full URL with user-info.
host := safeHost(webhookURL)
if err != nil {
slog.Warn("notify: webhook send failed",
"tier", tier, "host", host, "delivery", delivery,
"event", event.Type, "signed", secret != "", "error", err)
metrics.OutboundNotifyTotal.Inc("failure")
return
}
slog.Info("notify: webhook dispatched",
"tier", tier, "host", host, "delivery", delivery,
"event", event.Type, "signed", secret != "")
metrics.OutboundNotifyTotal.Inc("success")
}()
}
// SendPayload dispatches an arbitrary JSON payload to the given URL,
// signed with HMAC-SHA256 when secret is non-empty. Used by the
// event-trigger dispatcher: event-log → trigger filter → webhook
// delivery. The eventType travels in the X-Tinyforge-Event header so
// receivers can route by it without parsing the body.
//
// Fire-and-forget. Failures are logged at warn but never propagate;
// trigger reliability is observed via webhook_deliveries (audit trail)
// and the dispatcher remaining bus-driven means delivery hiccups
// cannot back-pressure event publishing.
func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any) {
if webhookURL == "" {
return
}
delivery := uuid.NewString()
timestamp := time.Now().UTC().Format(time.RFC3339)
n.wg.Add(1)
go func() {
defer n.wg.Done()
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
if !n.acquireSlot(ctx) {
slog.Warn("notify: dropped trigger payload — outbound queue saturated",
"tier", TierEventTrigger, "host", safeHost(webhookURL), "delivery", delivery, "event", eventType)
metrics.OutboundNotifyTotal.Inc("dropped")
return
}
defer n.releaseSlot()
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
host := safeHost(webhookURL)
if err != nil {
slog.Warn("notify: trigger webhook send failed",
"tier", TierEventTrigger, "host", host, "delivery", delivery,
"event", eventType, "signed", secret != "", "error", err)
metrics.OutboundNotifyTotal.Inc("failure")
return
}
slog.Info("notify: trigger webhook dispatched",
"tier", TierEventTrigger, "host", host, "delivery", delivery,
"event", eventType, "signed", secret != "")
metrics.OutboundNotifyTotal.Inc("success")
}()
}
// SendSyncForTest performs a synchronous, single-shot send for the "Send
// test" UI button. Returns a TestResult describing what the receiver
// answered with so the operator can confirm wiring without watching server
// logs. Errors are reported via the Error field rather than the returned
// error to keep the API ergonomic for the handler.
func (n *Notifier) SendSyncForTest(ctx context.Context, webhookURL, secret string, tier Tier, event Event) TestResult {
if event.Timestamp == "" {
event.Timestamp = time.Now().UTC().Format(time.RFC3339)
}
delivery := uuid.NewString()
result := TestResult{
URL: webhookURL,
Tier: tier,
SignatureSent: secret != "",
DeliveryID: delivery,
}
if webhookURL == "" {
result.Error = "no webhook URL configured for this tier"
return result
}
start := time.Now()
resp, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
result.LatencyMs = time.Since(start).Milliseconds()
if err != nil {
result.Error = err.Error()
if resp != nil {
result.StatusCode = resp.StatusCode
result.ResponseSnippet = resp.BodyPreview
}
return result
}
result.StatusCode = resp.StatusCode
result.ResponseSnippet = resp.BodyPreview
return result
}
// SendSyncForTestPayload is the arbitrary-payload counterpart to
// SendSyncForTest. Returns the same TestResult shape but sends an
// arbitrary payload + event-type pair through the shared HTTP+HMAC
// core. Used by the event-trigger /test endpoint so the operator's
// receiver sees the same envelope shape it will receive during normal
// dispatch — verifying a different payload would defeat the test's
// purpose.
func (n *Notifier) SendSyncForTestPayload(ctx context.Context, webhookURL, secret string, tier Tier, eventType string, payload any) TestResult {
delivery := uuid.NewString()
timestamp := time.Now().UTC().Format(time.RFC3339)
result := TestResult{
URL: webhookURL,
Tier: tier,
SignatureSent: secret != "",
DeliveryID: delivery,
}
if webhookURL == "" {
result.Error = "no webhook URL configured for this tier"
return result
}
start := time.Now()
resp, err := n.doSendRaw(ctx, webhookURL, secret, tier, delivery, eventType, timestamp, payload)
result.LatencyMs = time.Since(start).Milliseconds()
if err != nil {
result.Error = err.Error()
if resp != nil {
result.StatusCode = resp.StatusCode
result.ResponseSnippet = resp.BodyPreview
}
return result
}
result.StatusCode = resp.StatusCode
result.ResponseSnippet = resp.BodyPreview
return result
}
// sendResponse captures the small subset of the receiver's response we want
// to surface back to the operator (status + a body preview). Distinct from
// http.Response so callers don't accidentally hold an unread body.
type sendResponse struct {
StatusCode int
BodyPreview string
}
// doSend performs the HTTP POST, signs the body if a secret is configured,
// and returns either a sendResponse (for the test path) or an error.
//
// The request body bytes are computed once so the HMAC signature matches
// exactly what travels on the wire. Receivers MUST verify against the raw
// body, not a re-serialised copy.
func (n *Notifier) doSend(ctx context.Context, webhookURL, secret string, tier Tier, delivery string, event Event) (*sendResponse, error) {
return n.doSendRaw(ctx, webhookURL, secret, tier, delivery, event.Type, event.Timestamp, event)
}
// doSendRaw is the shared HTTP+HMAC core. It serializes any payload to
// JSON, signs the resulting bytes (if a secret is configured) and
// dispatches with the same Tinyforge headers as the legacy deploy-event
// path. Separated out so SendPayload can reuse it without forcing the
// caller to fit into the Event shape.
func (n *Notifier) doSendRaw(ctx context.Context, webhookURL, secret string, tier Tier, delivery, eventType, timestamp string, payload any) (*sendResponse, error) {
body, err := json.Marshal(payload)
if err != nil {
return nil, fmt.Errorf("marshal notification: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, webhookURL, bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("create notification request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("User-Agent", userAgent)
req.Header.Set(HeaderEvent, eventType)
req.Header.Set(HeaderDelivery, delivery)
req.Header.Set(HeaderTimestamp, timestamp)
req.Header.Set(HeaderTier, string(tier))
if secret != "" {
req.Header.Set(HeaderSignature, "sha256="+sign(secret, body))
}
resp, err := n.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("send notification: %w", err)
}
defer resp.Body.Close()
preview, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
out := &sendResponse{
StatusCode: resp.StatusCode,
BodyPreview: string(preview),
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return out, fmt.Errorf("notification webhook returned status %d", resp.StatusCode)
}
return out, nil
}
// sign returns the lowercase-hex HMAC-SHA256 of body using secret as the
// key. The "sha256=" prefix is added by the caller to match GitHub's
// X-Hub-Signature-256 wire format.
func sign(secret string, body []byte) string {
mac := hmac.New(sha256.New, []byte(secret))
mac.Write(body)
return hex.EncodeToString(mac.Sum(nil))
}
// VerifySignature is the receiver-side counterpart to sign(). Exported so
// our own tests (and any future incoming-webhook receiver in this repo) can
// re-use the exact construction without duplicating the HMAC code.
//
// signatureHeader accepts either the raw hex digest or the GitHub-style
// "sha256=<hex>" envelope.
func VerifySignature(secret string, body []byte, signatureHeader string) bool {
if secret == "" || signatureHeader == "" {
return false
}
got := signatureHeader
if len(got) > 7 && got[:7] == "sha256=" {
got = got[7:]
}
want := sign(secret, body)
// hmac.Equal is the constant-time comparator; bytes.Equal would leak
// timing information about the first differing byte.
return hmac.Equal([]byte(got), []byte(want))
}
// safeHost extracts the host (and optional port) from a webhook URL for
// logging. Returns the input unchanged if parsing fails so we never silently
// swallow a malformed URL — operators see the failure mode either way.
func safeHost(raw string) string {
u, err := url.Parse(raw)
if err != nil || u.Host == "" {
return "(unparseable)"
}
return u.Host
}