Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 80868e0f7a | |||
| 6492944c8f | |||
| c2ca6c0b73 | |||
| ec8c0cd891 | |||
| 192204a51c | |||
| 6b45ed62bb | |||
| 2ba49b9bb6 | |||
| 00503b4c0a | |||
| 97f338fba3 | |||
| 15e5b186cd | |||
| fa6d5bd3ba | |||
| bd7a11d4e7 | |||
| 7576f54e76 | |||
| 2e26f555c5 | |||
| cdb9fd57d1 | |||
| 5c17885197 | |||
| 93b6911b34 | |||
| 3071cda512 | |||
| 410a131cec | |||
| 956943edbb | |||
| 279f373f80 |
+45
-7
@@ -1,9 +1,47 @@
|
|||||||
|
# VCS / tooling
|
||||||
.git
|
.git
|
||||||
node_modules
|
.gitignore
|
||||||
web/node_modules
|
|
||||||
web/build
|
|
||||||
data
|
|
||||||
*.md
|
|
||||||
plans/
|
|
||||||
.claude/
|
|
||||||
.dockerignore
|
.dockerignore
|
||||||
|
.gitea/
|
||||||
|
.github/
|
||||||
|
.claude/
|
||||||
|
.code-review-graph/
|
||||||
|
.vex.toml
|
||||||
|
.facts-sync.json
|
||||||
|
.facts-suggestions.md
|
||||||
|
|
||||||
|
# Node / frontend build artifacts (frontend stage rebuilds web/build)
|
||||||
|
node_modules/
|
||||||
|
web/node_modules/
|
||||||
|
web/build/
|
||||||
|
web/.svelte-kit/
|
||||||
|
|
||||||
|
# Runtime / local data
|
||||||
|
data/
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Compiled binaries (rebuilt inside the image)
|
||||||
|
tinyforge
|
||||||
|
tinyforge.exe
|
||||||
|
tinyforge-server.exe
|
||||||
|
server.exe
|
||||||
|
docker-watcher
|
||||||
|
docker-watcher.exe
|
||||||
|
docker-watcher.exe~
|
||||||
|
/cli
|
||||||
|
/cli.exe
|
||||||
|
|
||||||
|
# Build/orchestration files not needed inside the image
|
||||||
|
Dockerfile
|
||||||
|
docker-compose.yml
|
||||||
|
Makefile
|
||||||
|
*.example.yaml
|
||||||
|
|
||||||
|
# Docs / planning / design (not needed at runtime)
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
plans/
|
||||||
|
design-mockups/
|
||||||
|
test-data/
|
||||||
|
|||||||
+54
-18
@@ -5,34 +5,70 @@ on:
|
|||||||
branches: [main]
|
branches: [main]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
frontend:
|
||||||
|
# Skip the build on release-bump commits — the tag push runs release.yml.
|
||||||
|
if: "${{ !startsWith(gitea.event.head_commit.message, 'chore: release v') }}"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: npm
|
||||||
|
cache-dependency-path: web/package-lock.json
|
||||||
|
|
||||||
|
- name: Install frontend dependencies
|
||||||
|
working-directory: web
|
||||||
|
run: npm ci --no-audit
|
||||||
|
|
||||||
|
- name: Svelte check
|
||||||
|
working-directory: web
|
||||||
|
run: npm run check
|
||||||
|
|
||||||
|
- name: Unit tests (vitest)
|
||||||
|
working-directory: web
|
||||||
|
run: npm run test
|
||||||
|
|
||||||
|
- name: Build frontend
|
||||||
|
working-directory: web
|
||||||
|
run: npm run build
|
||||||
|
|
||||||
|
backend:
|
||||||
|
if: "${{ !startsWith(gitea.event.head_commit.message, 'chore: release v') }}"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.24'
|
go-version: '1.25'
|
||||||
|
cache-dependency-path: go.sum
|
||||||
- uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: '20'
|
|
||||||
|
|
||||||
- name: Install frontend dependencies
|
|
||||||
working-directory: web
|
|
||||||
run: npm ci --no-audit
|
|
||||||
|
|
||||||
- name: Build frontend
|
|
||||||
working-directory: web
|
|
||||||
run: npm run build
|
|
||||||
|
|
||||||
- name: Vet Go code
|
- name: Vet Go code
|
||||||
run: go vet ./...
|
run: go vet ./...
|
||||||
|
|
||||||
- name: Build Go binary
|
- name: Run Go tests
|
||||||
run: CGO_ENABLED=0 go build -ldflags="-s -w" -o tinyforge ./cmd/server
|
run: go test ./internal/... -count=1
|
||||||
|
|
||||||
- name: Build Docker image
|
build-image:
|
||||||
run: docker build -t tinyforge:dev .
|
if: "${{ !startsWith(gitea.event.head_commit.message, 'chore: release v') }}"
|
||||||
|
needs: [frontend, backend]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Build Docker image (no push)
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: false
|
||||||
|
tags: tinyforge:ci-${{ gitea.sha }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|||||||
+133
-68
@@ -10,19 +10,109 @@ env:
|
|||||||
REGISTRY: git.dolgolyov-family.by/alexei.dolgolyov/tiny-forge
|
REGISTRY: git.dolgolyov-family.by/alexei.dolgolyov/tiny-forge
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
create-release:
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
# Gate the release on a passing test suite. A tagged release must never
|
||||||
|
# ship code that fails `go vet` / `go test`.
|
||||||
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
|
||||||
release_id: ${{ steps.create.outputs.release_id }}
|
|
||||||
steps:
|
steps:
|
||||||
- name: Fetch RELEASE_NOTES.md only
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.25'
|
||||||
|
cache-dependency-path: go.sum
|
||||||
|
|
||||||
|
- name: Vet Go code
|
||||||
|
run: go vet ./...
|
||||||
|
|
||||||
|
- name: Run Go tests
|
||||||
|
run: go test ./internal/... -count=1
|
||||||
|
|
||||||
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
# Build + push the image FIRST. If this fails, no release is created
|
||||||
|
# (create-release depends on it) — so we never leave an orphan release
|
||||||
|
# pointing at a tag with no published image.
|
||||||
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
build-docker:
|
||||||
|
needs: test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Compute tags
|
||||||
|
id: meta
|
||||||
|
run: |
|
||||||
|
TAG="${{ gitea.ref_name }}"
|
||||||
|
VERSION="${TAG#v}"
|
||||||
|
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||||
|
# Detect pre-release (alpha/beta/rc) — these do NOT get :latest.
|
||||||
|
if echo "$TAG" | grep -qE '(alpha|beta|rc)'; then
|
||||||
|
echo "is_pre=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "is_pre=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Gitea Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.SERVER_HOST }}
|
||||||
|
username: ${{ gitea.actor }}
|
||||||
|
password: ${{ secrets.DEPLOY_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ env.REGISTRY }}:${{ steps.meta.outputs.tag }}
|
||||||
|
${{ env.REGISTRY }}:${{ steps.meta.outputs.version }}
|
||||||
|
${{ env.REGISTRY }}:sha-${{ gitea.sha }}
|
||||||
|
${{ steps.meta.outputs.is_pre == 'false' && format('{0}:latest', env.REGISTRY) || '' }}
|
||||||
|
cache-from: type=registry,ref=${{ env.REGISTRY }}:buildcache
|
||||||
|
cache-to: type=registry,ref=${{ env.REGISTRY }}:buildcache,mode=max
|
||||||
|
|
||||||
|
- name: Trigger redeploy webhook
|
||||||
|
if: steps.meta.outputs.is_pre == 'false'
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
if [ -n "${{ secrets.DOCKER_REDEPLOY_WEBHOOK_URL }}" ]; then
|
||||||
|
echo "Triggering redeploy webhook..."
|
||||||
|
curl -sf -X POST "${{ secrets.DOCKER_REDEPLOY_WEBHOOK_URL }}" \
|
||||||
|
--max-time 30 || echo "::warning::Redeploy webhook failed"
|
||||||
|
else
|
||||||
|
echo "DOCKER_REDEPLOY_WEBHOOK_URL not set — skipping auto-deploy"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
# Create the Gitea release LAST — body = RELEASE_NOTES.md + auto-changelog.
|
||||||
|
# ───────────────────────────────────────────────────────────────────────
|
||||||
|
create-release:
|
||||||
|
needs: build-docker
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout (full history for changelog)
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
sparse-checkout: RELEASE_NOTES.md
|
fetch-depth: 0
|
||||||
sparse-checkout-cone-mode: false
|
|
||||||
|
- name: Generate changelog
|
||||||
|
id: changelog
|
||||||
|
run: |
|
||||||
|
PREV_TAG=$(git tag --sort=-v:refname | head -2 | tail -1)
|
||||||
|
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "${{ gitea.ref_name }}" ]; then
|
||||||
|
git log --oneline --no-decorate -n 20 > /tmp/changelog.txt
|
||||||
|
else
|
||||||
|
git log --oneline --no-decorate "${PREV_TAG}..HEAD" > /tmp/changelog.txt
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Create Gitea release
|
- name: Create Gitea release
|
||||||
id: create
|
|
||||||
env:
|
env:
|
||||||
DEPLOY_TOKEN: ${{ secrets.DEPLOY_TOKEN }}
|
DEPLOY_TOKEN: ${{ secrets.DEPLOY_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
@@ -42,74 +132,49 @@ jobs:
|
|||||||
echo "Found RELEASE_NOTES.md"
|
echo "Found RELEASE_NOTES.md"
|
||||||
else
|
else
|
||||||
export RELEASE_NOTES=""
|
export RELEASE_NOTES=""
|
||||||
echo "No RELEASE_NOTES.md found — release will have no body"
|
echo "No RELEASE_NOTES.md found — release body = changelog only"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
BODY_JSON=$(python3 -c "
|
# Build release body (notes + changelog) via Python to avoid shell
|
||||||
|
# escaping and CLI length limits.
|
||||||
|
export TAG VERSION IS_PRE
|
||||||
|
python3 <<'PY'
|
||||||
import json, os
|
import json, os
|
||||||
notes = os.environ.get('RELEASE_NOTES', '')
|
|
||||||
print(json.dumps(notes.strip()))
|
|
||||||
")
|
|
||||||
|
|
||||||
# Create release via Gitea API
|
notes = os.environ.get('RELEASE_NOTES', '')
|
||||||
RELEASE=$(curl -s -X POST "$BASE_URL/releases" \
|
changelog = open('/tmp/changelog.txt').read().strip()
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
if notes.strip():
|
||||||
|
sections.append(notes.strip())
|
||||||
|
if changelog:
|
||||||
|
sections.append('## Changelog\n\n' + changelog)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
'tag_name': os.environ['TAG'],
|
||||||
|
'name': os.environ['VERSION'],
|
||||||
|
'body': '\n\n'.join(sections),
|
||||||
|
'draft': False,
|
||||||
|
'prerelease': os.environ['IS_PRE'] == 'true',
|
||||||
|
}
|
||||||
|
with open('/tmp/release-payload.json', 'w') as f:
|
||||||
|
json.dump(payload, f)
|
||||||
|
PY
|
||||||
|
|
||||||
|
HTTP=$(curl -s -o /tmp/release-resp.json -w "%{http_code}" \
|
||||||
|
-X POST "$BASE_URL/releases" \
|
||||||
-H "Authorization: token $DEPLOY_TOKEN" \
|
-H "Authorization: token $DEPLOY_TOKEN" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "{
|
--data-binary @/tmp/release-payload.json)
|
||||||
\"tag_name\": \"$TAG\",
|
|
||||||
\"name\": \"$VERSION\",
|
|
||||||
\"body\": $BODY_JSON,
|
|
||||||
\"draft\": false,
|
|
||||||
\"prerelease\": $IS_PRE
|
|
||||||
}")
|
|
||||||
|
|
||||||
# Fallback: if release already exists for this tag, reuse it
|
echo "POST /releases → HTTP $HTTP"
|
||||||
RELEASE_ID=$(echo "$RELEASE" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])" 2>/dev/null)
|
if [ "$HTTP" = "201" ]; then
|
||||||
if [ -z "$RELEASE_ID" ]; then
|
RELEASE_ID=$(python3 -c "import json; print(json.load(open('/tmp/release-resp.json'))['id'])")
|
||||||
echo "::warning::Release already exists for tag $TAG — reusing existing release"
|
|
||||||
RELEASE=$(curl -s "$BASE_URL/releases/tags/$TAG" \
|
|
||||||
-H "Authorization: token $DEPLOY_TOKEN")
|
|
||||||
RELEASE_ID=$(echo "$RELEASE" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
|
|
||||||
fi
|
|
||||||
echo "release_id=$RELEASE_ID" >> "$GITHUB_OUTPUT"
|
|
||||||
echo "Created release $RELEASE_ID for $TAG"
|
echo "Created release $RELEASE_ID for $TAG"
|
||||||
|
elif [ "$HTTP" = "409" ] || grep -q "already exists" /tmp/release-resp.json; then
|
||||||
build-docker:
|
echo "::warning::Release already exists for tag $TAG — reusing"
|
||||||
needs: create-release
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Login to Gitea Container Registry
|
|
||||||
id: docker-login
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
echo "${{ secrets.DEPLOY_TOKEN }}" | docker login \
|
|
||||||
"$SERVER_HOST" -u "${{ gitea.actor }}" --password-stdin
|
|
||||||
|
|
||||||
- name: Build and tag
|
|
||||||
if: steps.docker-login.outcome == 'success'
|
|
||||||
run: |
|
|
||||||
TAG="${{ gitea.ref_name }}"
|
|
||||||
VERSION="${TAG#v}"
|
|
||||||
docker build -t "$REGISTRY:$TAG" -t "$REGISTRY:$VERSION" .
|
|
||||||
# Tag as 'latest' only for stable releases
|
|
||||||
if ! echo "$TAG" | grep -qE '(alpha|beta|rc)'; then
|
|
||||||
docker tag "$REGISTRY:$TAG" "$REGISTRY:latest"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Push
|
|
||||||
if: steps.docker-login.outcome == 'success'
|
|
||||||
run: docker push "$REGISTRY" --all-tags
|
|
||||||
|
|
||||||
- name: Trigger Portainer redeploy
|
|
||||||
if: steps.docker-login.outcome == 'success'
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
if [ -n "${{ secrets.DOCKER_REDEPLOY_WEBHOOK_URL }}" ]; then
|
|
||||||
echo "Triggering Portainer redeploy..."
|
|
||||||
curl -sf -X POST "${{ secrets.DOCKER_REDEPLOY_WEBHOOK_URL }}" \
|
|
||||||
--max-time 30 || echo "::warning::Portainer webhook failed"
|
|
||||||
else
|
else
|
||||||
echo "DOCKER_REDEPLOY_WEBHOOK_URL not set — skipping auto-deploy"
|
echo "::error::Failed to create release for $TAG (HTTP $HTTP)"
|
||||||
|
head -c 2000 /tmp/release-resp.json; echo
|
||||||
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -6,7 +6,10 @@ data/
|
|||||||
.env
|
.env
|
||||||
tinyforge
|
tinyforge
|
||||||
tinyforge.exe
|
tinyforge.exe
|
||||||
|
/cli
|
||||||
|
/cli.exe
|
||||||
server.exe
|
server.exe
|
||||||
|
tinyforge-server.exe
|
||||||
docker-watcher
|
docker-watcher
|
||||||
docker-watcher.exe
|
docker-watcher.exe
|
||||||
docker-watcher.exe~
|
docker-watcher.exe~
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
# vex configuration — https://github.com/tenatarika/vex
|
||||||
|
#
|
||||||
|
# Place this file in your project root as .vex.toml
|
||||||
|
|
||||||
|
# Glob patterns to exclude from indexing (gitignore syntax, on top of .gitignore)
|
||||||
|
# exclude = [
|
||||||
|
# "vendor/**",
|
||||||
|
# "node_modules/**",
|
||||||
|
# "*.generated.go",
|
||||||
|
# "dist/**",
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# Default output format: "text", "json", or "compact"
|
||||||
|
# format = "text"
|
||||||
|
|
||||||
|
# Enable semantic embeddings by default (slower indexing, enables meaning-based search)
|
||||||
|
semantic = true
|
||||||
|
|
||||||
|
# Automatically run `vex update` before search if the index is stale
|
||||||
|
auto_update = true
|
||||||
|
|
||||||
|
# Embedder used for semantic indexing. Known IDs: minilm-l6-v2 (default).
|
||||||
|
# Changing the embedder requires a full reindex.
|
||||||
|
# embedder = "minilm-l6-v2"
|
||||||
|
|
||||||
|
# Cache directory override. Defaults to the platform cache location.
|
||||||
|
# macOS: ~/Library/Caches/vex
|
||||||
|
# Linux: $XDG_CACHE_HOME/vex (fallback: ~/.cache/vex)
|
||||||
|
# Windows: %LOCALAPPDATA%\vex (fallback: %USERPROFILE%\AppData\Local\vex)
|
||||||
|
# Accepts absolute paths, "~/..." or paths relative to this file (e.g. "./.vex/cache").
|
||||||
|
# Can also be overridden per-invocation with --cache-dir or $VEX_CACHE_DIR.
|
||||||
|
# cache_dir = "./.vex/cache"
|
||||||
|
|
||||||
|
# Store the index inside the project as `<project>/.vex_cache/`. Useful when
|
||||||
|
# the cache should travel with the project (e.g. on a moved or renamed
|
||||||
|
# directory). vex writes a `.gitignore` inside it so contents are not
|
||||||
|
# committed. Overridden by `cache_dir`, `--cache-dir`, or $VEX_CACHE_DIR.
|
||||||
|
# local_cache = false
|
||||||
|
|
||||||
|
# Thread count for parallel indexing (index/update/watch).
|
||||||
|
# * unset — 80% of available cores, rounded up (default, leaves headroom)
|
||||||
|
# * 0 — use all cores (explicit opt-in to max throughput)
|
||||||
|
# * N — exactly N workers
|
||||||
|
# Overridable per-invocation with `-j/--jobs` or $VEX_JOBS.
|
||||||
|
# jobs = 4
|
||||||
|
|
||||||
|
# Build the persistent call-graph section. Disabling falls back to live-scan
|
||||||
|
# for `vex callers`/`vex callees` (slower per-query, but saves indexing
|
||||||
|
# time on large monorepos). The opt-out is persisted in the manifest so
|
||||||
|
# `vex update` does not silently re-add the section.
|
||||||
|
# Per-invocation override: `vex index --no-call-graph`.
|
||||||
|
# call_graph = true
|
||||||
|
|
||||||
|
# Build the BM25 channel. Disabling drops the third RRF channel and keeps
|
||||||
|
# only structural (+ semantic). Same persistence rules as `call_graph`.
|
||||||
|
# Per-invocation override: `vex index --no-bm25`.
|
||||||
|
# bm25 = true
|
||||||
@@ -12,3 +12,13 @@ Start/restart with: `./scripts/dev-server.sh`
|
|||||||
## Frontend
|
## Frontend
|
||||||
|
|
||||||
- **Boolean inputs use `ToggleSwitch`** (`$lib/components/ToggleSwitch.svelte`) — the slide-style switch is the unified control across the WebUI. Do not introduce raw `<input type="checkbox">` elements; place a `<ToggleSwitch>` next to a label/help block instead.
|
- **Boolean inputs use `ToggleSwitch`** (`$lib/components/ToggleSwitch.svelte`) — the slide-style switch is the unified control across the WebUI. Do not introduce raw `<input type="checkbox">` elements; place a `<ToggleSwitch>` next to a label/help block instead.
|
||||||
|
- **Confirmations & destructive actions use `ConfirmDialog`** (`$lib/components/ConfirmDialog.svelte`) — never native `window.confirm` / `alert`. For navigation guards (e.g. the unsaved-changes prompt on `/apps/new`), `cancel()` the navigation in `beforeNavigate`, open `ConfirmDialog`, and re-issue the navigation with a bypass flag on confirm. Native `beforeunload` is acceptable only for hard tab-close/reload, where the browser forbids custom UI.
|
||||||
|
- **Source-config shape: `$lib/workload/sourceForms.ts`** is the single source of truth (seed/serialize/validity for image/compose/static/dockerfile), consumed by both `/apps/new` and `/apps/[id]`. Don't re-inline seed/serialize logic.
|
||||||
|
- **"App" = workload with `source_kind !== ''`.** Triggers are first-class bindings (`workload_trigger_bindings`), NOT on the workload row — never gate app lists/counts on `trigger_kind` (it's empty for plugin workloads). Legacy pre-cutover `kind:project/stack/site` rows have an empty `source_kind` and must be excluded everywhere.
|
||||||
|
- **i18n parity is mandatory** — every key in BOTH `web/src/lib/i18n/{en,ru}.json`. A missing key is NOT a build error (`$t` returns the key string), so verify parity manually.
|
||||||
|
|
||||||
|
## Build & Test
|
||||||
|
|
||||||
|
- Frontend (from `web/`): `npm run check` (svelte-check — expect 0 errors), `npm run build`, `npm run test` (vitest; pure-logic units like `sourceForms.test.ts`).
|
||||||
|
- Backend (repo root): `go build ./...`, `go vet ./internal/...`, `go test ./internal/...`.
|
||||||
|
- `./scripts/dev-server.sh` rebuilds the SPA + restarts the Go server on :8090; it kills the prior process, so a previous background dev-server task reporting **exit 1 is expected**, not a failure.
|
||||||
|
|||||||
+19
-4
@@ -1,3 +1,4 @@
|
|||||||
|
# syntax=docker/dockerfile:1.7
|
||||||
# Stage 1: Build frontend
|
# Stage 1: Build frontend
|
||||||
FROM node:20-alpine AS frontend-builder
|
FROM node:20-alpine AS frontend-builder
|
||||||
|
|
||||||
@@ -9,25 +10,33 @@ COPY web/ ./
|
|||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Stage 2: Build Go binary
|
# Stage 2: Build Go binary
|
||||||
FROM golang:1.24-alpine AS backend-builder
|
FROM golang:1.25-alpine AS backend-builder
|
||||||
|
|
||||||
RUN apk add --no-cache git ca-certificates
|
RUN apk add --no-cache git ca-certificates
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
COPY go.mod go.sum ./
|
COPY go.mod go.sum ./
|
||||||
ENV GOTOOLCHAIN=auto
|
ENV GOTOOLCHAIN=auto
|
||||||
RUN go mod download
|
# Cache mounts persist the module + build caches across rebuilds (BuildKit).
|
||||||
|
RUN --mount=type=cache,target=/go/pkg/mod \
|
||||||
|
go mod download
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
# Copy built frontend into the expected embed location.
|
# Copy built frontend into the expected embed location.
|
||||||
COPY --from=frontend-builder /build/web/build ./web/build
|
COPY --from=frontend-builder /build/web/build ./web/build
|
||||||
|
|
||||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /tinyforge ./cmd/server
|
RUN --mount=type=cache,target=/go/pkg/mod \
|
||||||
|
--mount=type=cache,target=/root/.cache/go-build \
|
||||||
|
CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /tinyforge ./cmd/server
|
||||||
|
|
||||||
# Stage 3: Minimal runtime image
|
# Stage 3: Minimal runtime image
|
||||||
FROM alpine:3.19
|
FROM alpine:3.19
|
||||||
|
|
||||||
RUN apk add --no-cache ca-certificates tzdata
|
LABEL org.opencontainers.image.source="https://git.dolgolyov-family.by/alexei.dolgolyov/tiny-forge"
|
||||||
|
LABEL org.opencontainers.image.title="Tinyforge"
|
||||||
|
LABEL org.opencontainers.image.description="Self-hosted Docker deployment + mini-CI platform"
|
||||||
|
|
||||||
|
RUN apk add --no-cache ca-certificates tzdata wget
|
||||||
|
|
||||||
# Create non-root user.
|
# Create non-root user.
|
||||||
RUN addgroup -g 1000 -S app && adduser -u 1000 -S app -G app
|
RUN addgroup -g 1000 -S app && adduser -u 1000 -S app -G app
|
||||||
@@ -46,4 +55,10 @@ EXPOSE 8080
|
|||||||
ENV DATA_DIR=/app/data
|
ENV DATA_DIR=/app/data
|
||||||
ENV LISTEN_ADDR=:8080
|
ENV LISTEN_ADDR=:8080
|
||||||
|
|
||||||
|
VOLUME /app/data
|
||||||
|
|
||||||
|
# /readyz is the public readiness probe (pings the DB); /livez is liveness.
|
||||||
|
HEALTHCHECK --interval=30s --timeout=5s --retries=3 --start-period=10s \
|
||||||
|
CMD wget --no-verbose --tries=1 --spider http://localhost:8080/readyz || exit 1
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tinyforge"]
|
ENTRYPOINT ["/app/tinyforge"]
|
||||||
|
|||||||
@@ -11,6 +11,15 @@ Self-hosted deployment platform with a web dashboard. Deploy Docker containers f
|
|||||||
- **Multi-stage projects** (dev, staging, prod) with tag pattern matching
|
- **Multi-stage projects** (dev, staging, prod) with tag pattern matching
|
||||||
- **Real-time deploy logs** via SSE streaming
|
- **Real-time deploy logs** via SSE streaming
|
||||||
|
|
||||||
|
### Branch Preview Environments
|
||||||
|
|
||||||
|
Get an isolated, throwaway deploy for every feature branch:
|
||||||
|
|
||||||
|
- Add a **branch pattern** (e.g. `feat/*`) to a workload's **git trigger** (Triggers panel → git trigger → *Branch pattern*).
|
||||||
|
- Pushing to any branch matching the pattern deploys an **isolated per-branch preview** — a child workload that inherits the source config, served at a **slug-prefixed subdomain** (`feat-login-app.example.com`) so previews never collide with each other or the main deploy.
|
||||||
|
- Previews are **automatically torn down** when the branch is deleted upstream.
|
||||||
|
- Manage live previews from the app's **Preview environments** panel (`/apps/[id]`): open each branch's URL or tear it down manually. A torn-down preview is recreated on the next push to its branch.
|
||||||
|
|
||||||
### Static Sites
|
### Static Sites
|
||||||
|
|
||||||
Deploy static sites and Deno-powered APIs directly from Git repositories:
|
Deploy static sites and Deno-powered APIs directly from Git repositories:
|
||||||
@@ -106,6 +115,46 @@ curl -X POST https://your-domain/api/webhook/<secret> \
|
|||||||
3. Enter your provider's Issuer URL, Client ID, and Client Secret
|
3. Enter your provider's Issuer URL, Client ID, and Client Secret
|
||||||
4. Set the Redirect URL to `https://your-domain/api/auth/oidc/callback`
|
4. Set the Redirect URL to `https://your-domain/api/auth/oidc/callback`
|
||||||
|
|
||||||
|
## CLI
|
||||||
|
|
||||||
|
`tinyforge` is a terminal client for driving a server from the shell, built on the same HTTP API as the web UI.
|
||||||
|
|
||||||
|
### Build
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go build -o tinyforge ./cmd/cli # ./tinyforge (tinyforge.exe on Windows)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Log in once — caches a 24h token in ~/.tinyforge/config.json (mode 0600)
|
||||||
|
tinyforge login --base-url http://localhost:8090
|
||||||
|
# ...or non-interactively (no password echo / shell-history leak):
|
||||||
|
TINYFORGE_PASSWORD=… tinyforge login --base-url http://localhost:8090 --user admin
|
||||||
|
|
||||||
|
tinyforge apps # list apps + container state
|
||||||
|
tinyforge deploy my-app # deploy and wait for completion
|
||||||
|
tinyforge deploy my-app --ref v1.2.3 --note "hotfix"
|
||||||
|
tinyforge logs my-app -f # follow logs (Ctrl-C to stop)
|
||||||
|
tinyforge status # server health + current user
|
||||||
|
tinyforge status my-app # one app's containers
|
||||||
|
tinyforge logout # revoke + clear the cached token
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server & token resolution
|
||||||
|
|
||||||
|
| Setting | Flag | Env | Default |
|
||||||
|
| -------- | ------------ | ----------------- | ------------------------ |
|
||||||
|
| Base URL | `--base-url` | `TINYFORGE_URL` | `http://localhost:8080` |
|
||||||
|
| Token | `--token` | `TINYFORGE_TOKEN` | cached by `login` |
|
||||||
|
| Config | `--config` | `TINYFORGE_CONFIG`| `~/.tinyforge/config.json` |
|
||||||
|
|
||||||
|
### Notes
|
||||||
|
|
||||||
|
- Login returns a **24h JWT** — there is no long-lived API token yet, so unattended use re-logs in when the token expires. `deploy` / `stop` / `start` require an **admin** account.
|
||||||
|
- The token is sent as an `Authorization: Bearer` header (never placed in the URL) and the config file is written with `0600` permissions.
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
+149
@@ -0,0 +1,149 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"text/tabwriter"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runApps(args []string) error {
|
||||||
|
// Accept an optional "list" subcommand: `tinyforge apps` == `tinyforge apps list`.
|
||||||
|
if len(args) > 0 && args[0] == "list" {
|
||||||
|
args = args[1:]
|
||||||
|
}
|
||||||
|
fs := flag.NewFlagSet("apps", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprint(os.Stderr, "Usage: tinyforge apps [list] [--base-url URL]\n\nList apps (workloads with a source) and their container state.\n")
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var workloads []Workload
|
||||||
|
if err := sess.client.doJSON(ctx, "GET", "/api/workloads", nil, &workloads); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// One extra call fetches every container so state can be shown without an
|
||||||
|
// N+1 per-app request.
|
||||||
|
var containers []Container
|
||||||
|
if err := sess.client.doJSON(ctx, "GET", "/api/containers", nil, &containers); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
byWorkload := map[string][]Container{}
|
||||||
|
for _, c := range containers {
|
||||||
|
byWorkload[c.WorkloadID] = append(byWorkload[c.WorkloadID], c)
|
||||||
|
}
|
||||||
|
|
||||||
|
apps := make([]Workload, 0, len(workloads))
|
||||||
|
for _, w := range workloads {
|
||||||
|
if w.isApp() {
|
||||||
|
apps = append(apps, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Slice(apps, func(i, j int) bool { return apps[i].Name < apps[j].Name })
|
||||||
|
|
||||||
|
if len(apps) == 0 {
|
||||||
|
fmt.Println("No apps yet. Create one in the web UI, then deploy with 'tinyforge deploy <app>'.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
|
||||||
|
fmt.Fprintln(tw, "NAME\tSOURCE\tSTATE\tID")
|
||||||
|
for _, w := range apps {
|
||||||
|
fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n", w.Name, w.SourceKind, stateSummary(byWorkload[w.ID]), idShort(w.ID))
|
||||||
|
}
|
||||||
|
return tw.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// stateSummary condenses a workload's containers into one status word.
|
||||||
|
func stateSummary(cs []Container) string {
|
||||||
|
if len(cs) == 0 {
|
||||||
|
return "—"
|
||||||
|
}
|
||||||
|
running := 0
|
||||||
|
for _, c := range cs {
|
||||||
|
if c.State == "running" {
|
||||||
|
running++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case running == len(cs):
|
||||||
|
return "running"
|
||||||
|
case running == 0:
|
||||||
|
return cs[0].State // e.g. stopped / failed / missing
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("%d/%d running", running, len(cs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveApp maps a user-supplied reference (name, full id, or id prefix) to a
|
||||||
|
// single app workload. Exact id wins, then exact name, then a unique id prefix.
|
||||||
|
func resolveApp(ctx context.Context, c *Client, ref string) (Workload, error) {
|
||||||
|
var workloads []Workload
|
||||||
|
if err := c.doJSON(ctx, "GET", "/api/workloads", nil, &workloads); err != nil {
|
||||||
|
return Workload{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var byID, byName, byPrefix []Workload
|
||||||
|
for _, w := range workloads {
|
||||||
|
if !w.isApp() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case w.ID == ref:
|
||||||
|
byID = append(byID, w)
|
||||||
|
case strings.EqualFold(w.Name, ref):
|
||||||
|
byName = append(byName, w)
|
||||||
|
case len(ref) >= 6 && strings.HasPrefix(w.ID, ref):
|
||||||
|
byPrefix = append(byPrefix, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(byID) == 1 {
|
||||||
|
return byID[0], nil
|
||||||
|
}
|
||||||
|
if len(byName) == 1 {
|
||||||
|
return byName[0], nil
|
||||||
|
}
|
||||||
|
if len(byName) > 1 {
|
||||||
|
return Workload{}, ambiguousErr(ref, byName)
|
||||||
|
}
|
||||||
|
if len(byPrefix) == 1 {
|
||||||
|
return byPrefix[0], nil
|
||||||
|
}
|
||||||
|
if len(byPrefix) > 1 {
|
||||||
|
return Workload{}, ambiguousErr(ref, byPrefix)
|
||||||
|
}
|
||||||
|
return Workload{}, fmt.Errorf("no app matching %q (try 'tinyforge apps list')", ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ambiguousErr(ref string, matches []Workload) error {
|
||||||
|
var b strings.Builder
|
||||||
|
fmt.Fprintf(&b, "%q matches multiple apps; use the id:\n", ref)
|
||||||
|
for _, w := range matches {
|
||||||
|
fmt.Fprintf(&b, " %s %s\n", idShort(w.ID), w.Name)
|
||||||
|
}
|
||||||
|
return fmt.Errorf("%s", strings.TrimRight(b.String(), "\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func idShort(id string) string {
|
||||||
|
if len(id) > 8 {
|
||||||
|
return id[:8]
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
@@ -0,0 +1,232 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// apiError carries the server's error message plus the HTTP status, so callers
|
||||||
|
// can distinguish auth failures (401) from other errors without losing the
|
||||||
|
// server's message (e.g. "invalid credentials" vs "invalid or expired token").
|
||||||
|
type apiError struct {
|
||||||
|
status int
|
||||||
|
msg string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *apiError) Error() string { return e.msg }
|
||||||
|
|
||||||
|
// isAuthError reports whether err is a 401 from the API.
|
||||||
|
func isAuthError(err error) bool {
|
||||||
|
var ae *apiError
|
||||||
|
return errors.As(err, &ae) && ae.status == http.StatusUnauthorized
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client talks to the Tinyforge HTTP API. It has no global timeout so that
|
||||||
|
// long synchronous deploys and follow streams work; callers pass a context
|
||||||
|
// with the appropriate deadline.
|
||||||
|
type Client struct {
|
||||||
|
baseURL string
|
||||||
|
token string
|
||||||
|
http *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func newClient(baseURL, token string) *Client {
|
||||||
|
return &Client{
|
||||||
|
baseURL: strings.TrimRight(baseURL, "/"),
|
||||||
|
token: token,
|
||||||
|
http: &http.Client{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// apiEnvelope mirrors the server's response wrapper. The server's struct is
|
||||||
|
// unexported, so the CLI defines its own matching shape. Data is deferred so a
|
||||||
|
// single decode path serves every endpoint.
|
||||||
|
type apiEnvelope struct {
|
||||||
|
Success bool `json:"success"`
|
||||||
|
Data json.RawMessage `json:"data"`
|
||||||
|
Error string `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SessionToken is the data payload of POST /api/auth/login.
|
||||||
|
type SessionToken struct {
|
||||||
|
Token string `json:"token"`
|
||||||
|
ExpiresAt string `json:"expires_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// User is the data payload of GET /api/auth/me.
|
||||||
|
type User struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Username string `json:"username"`
|
||||||
|
Email string `json:"email"`
|
||||||
|
Role string `json:"role"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Workload is the subset of the workload row the CLI needs. An "app" is a
|
||||||
|
// workload with a non-empty SourceKind.
|
||||||
|
type Workload struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Kind string `json:"kind"`
|
||||||
|
AppID string `json:"app_id"`
|
||||||
|
SourceKind string `json:"source_kind"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w Workload) isApp() bool { return w.SourceKind != "" }
|
||||||
|
|
||||||
|
// Container is the subset of a container row the CLI needs. State is one of
|
||||||
|
// running|stopped|failed|missing|starting|created|restarting|paused|...
|
||||||
|
type Container struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Role string `json:"role"`
|
||||||
|
ContainerID string `json:"container_id"`
|
||||||
|
ImageRef string `json:"image_ref"`
|
||||||
|
State string `json:"state"`
|
||||||
|
Port int `json:"port"`
|
||||||
|
Subdomain string `json:"subdomain"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeployResult is the data payload of POST /api/workloads/{id}/deploy.
|
||||||
|
type DeployResult struct {
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Reference string `json:"reference"`
|
||||||
|
TriggeredBy string `json:"triggered_by"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// doJSON performs a JSON request and unwraps the response envelope. body may be
|
||||||
|
// nil. out may be nil when the caller does not need the data payload. A 401
|
||||||
|
// maps to errNotAuthenticated; any other non-success surfaces the server's
|
||||||
|
// error message.
|
||||||
|
func (c *Client) doJSON(ctx context.Context, method, path string, body, out any) error {
|
||||||
|
var reqBody io.Reader
|
||||||
|
if body != nil {
|
||||||
|
buf, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("encode request: %w", err)
|
||||||
|
}
|
||||||
|
reqBody = bytes.NewReader(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("build request: %w", err)
|
||||||
|
}
|
||||||
|
if body != nil {
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
}
|
||||||
|
c.authorize(req)
|
||||||
|
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("%s %s: %w", method, path, err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
raw, err := io.ReadAll(io.LimitReader(resp.Body, 8<<20))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var env apiEnvelope
|
||||||
|
if jsonErr := json.Unmarshal(raw, &env); jsonErr != nil {
|
||||||
|
// Non-JSON body (e.g. a proxy error page). Surface status + a snippet,
|
||||||
|
// preserving auth-error typing for 401s with an unparseable body.
|
||||||
|
if resp.StatusCode >= 400 {
|
||||||
|
return &apiError{status: resp.StatusCode, msg: fmt.Sprintf(
|
||||||
|
"%s %s: unexpected response (status %d): %s", method, path, resp.StatusCode, snippet(raw))}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("%s %s: decode response: %w", method, path, jsonErr)
|
||||||
|
}
|
||||||
|
if resp.StatusCode >= 400 || !env.Success {
|
||||||
|
msg := env.Error
|
||||||
|
if msg == "" {
|
||||||
|
msg = fmt.Sprintf("%s %s: request failed (status %d)", method, path, resp.StatusCode)
|
||||||
|
}
|
||||||
|
return &apiError{status: resp.StatusCode, msg: msg}
|
||||||
|
}
|
||||||
|
if out != nil && len(env.Data) > 0 {
|
||||||
|
if err := json.Unmarshal(env.Data, out); err != nil {
|
||||||
|
return fmt.Errorf("decode response data: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// authorize attaches the bearer token. Using the Authorization header (rather
|
||||||
|
// than a ?token= query param) keeps the JWT out of server and proxy logs.
|
||||||
|
func (c *Client) authorize(req *http.Request) {
|
||||||
|
if c.token != "" {
|
||||||
|
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// streamSSE opens an SSE stream and invokes onData for each `data:` payload.
|
||||||
|
// Comment lines (heartbeats, beginning with ':') and blanks are skipped. The
|
||||||
|
// stream ends on EOF, context cancellation, or when onData returns an error.
|
||||||
|
func (c *Client) streamSSE(ctx context.Context, path string, onData func(payload []byte) error) error {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "text/event-stream")
|
||||||
|
c.authorize(req)
|
||||||
|
|
||||||
|
resp, err := c.http.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("GET %s: %w", path, err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
raw, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||||
|
var env apiEnvelope
|
||||||
|
msg := fmt.Sprintf("GET %s: stream failed (status %d)", path, resp.StatusCode)
|
||||||
|
if json.Unmarshal(raw, &env) == nil && env.Error != "" {
|
||||||
|
msg = env.Error
|
||||||
|
}
|
||||||
|
return &apiError{status: resp.StatusCode, msg: msg}
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(resp.Body)
|
||||||
|
scanner.Buffer(make([]byte, 0, 64<<10), 2<<20) // tolerate long log lines
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
if line == "" || strings.HasPrefix(line, ":") {
|
||||||
|
continue // blank separator or SSE comment/heartbeat
|
||||||
|
}
|
||||||
|
data, ok := strings.CutPrefix(line, "data:")
|
||||||
|
if !ok {
|
||||||
|
continue // ignore event:/id: fields — the API uses default events
|
||||||
|
}
|
||||||
|
if err := onData([]byte(strings.TrimPrefix(data, " "))); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil && !errors.Is(err, context.Canceled) {
|
||||||
|
return fmt.Errorf("read stream: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// snippet returns a short, single-line view of an unexpected response body.
|
||||||
|
func snippet(b []byte) string {
|
||||||
|
const max = 200
|
||||||
|
s := strings.TrimSpace(string(b))
|
||||||
|
s = strings.ReplaceAll(s, "\n", " ")
|
||||||
|
if len(s) > max {
|
||||||
|
s = s[:max] + "…"
|
||||||
|
}
|
||||||
|
if s == "" {
|
||||||
|
s = "(empty body)"
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
@@ -0,0 +1,148 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// defaultBaseURL matches the server's default LISTEN_ADDR (:8080). The dev
|
||||||
|
// server runs on :8090; point at it with --base-url or $TINYFORGE_URL.
|
||||||
|
const defaultBaseURL = "http://localhost:8080"
|
||||||
|
|
||||||
|
// Config is the persisted CLI state at ~/.tinyforge/config.json.
|
||||||
|
type Config struct {
|
||||||
|
BaseURL string `json:"base_url"`
|
||||||
|
Token string `json:"token"`
|
||||||
|
ExpiresAt string `json:"expires_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// globals holds the cross-cutting flags every command accepts.
|
||||||
|
type globals struct {
|
||||||
|
baseURL *string
|
||||||
|
token *string
|
||||||
|
configPath *string
|
||||||
|
}
|
||||||
|
|
||||||
|
// addGlobalFlags registers the shared flags on a command's flag set.
|
||||||
|
func addGlobalFlags(fs *flag.FlagSet) *globals {
|
||||||
|
return &globals{
|
||||||
|
baseURL: fs.String("base-url", "", "Tinyforge server URL (default $TINYFORGE_URL or "+defaultBaseURL+")"),
|
||||||
|
token: fs.String("token", "", "auth token (default $TINYFORGE_TOKEN or cached config)"),
|
||||||
|
configPath: fs.String("config", "", "config file path (default $TINYFORGE_CONFIG or ~/.tinyforge/config.json)"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// configFilePath resolves the config file location with precedence:
|
||||||
|
// --config flag > $TINYFORGE_CONFIG > ~/.tinyforge/config.json.
|
||||||
|
func configFilePath(g *globals) (string, error) {
|
||||||
|
if g != nil && *g.configPath != "" {
|
||||||
|
return *g.configPath, nil
|
||||||
|
}
|
||||||
|
if env := os.Getenv("TINYFORGE_CONFIG"); env != "" {
|
||||||
|
return env, nil
|
||||||
|
}
|
||||||
|
home, err := os.UserHomeDir()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("locate home directory: %w", err)
|
||||||
|
}
|
||||||
|
return filepath.Join(home, ".tinyforge", "config.json"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadConfig reads the config file. A missing file yields a zero Config and no
|
||||||
|
// error — first run is not a failure.
|
||||||
|
func loadConfig(path string) (Config, error) {
|
||||||
|
var cfg Config
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
return cfg, fmt.Errorf("read config %s: %w", path, err)
|
||||||
|
}
|
||||||
|
// An empty or whitespace-only file (e.g. freshly touched) is treated as
|
||||||
|
// "no config yet" rather than a parse error.
|
||||||
|
if len(bytes.TrimSpace(data)) == 0 {
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||||
|
return cfg, fmt.Errorf("parse config %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// saveConfig writes the config file with 0600 permissions, since it holds a
|
||||||
|
// bearer token. The parent directory is created if absent.
|
||||||
|
func saveConfig(path string, cfg Config) error {
|
||||||
|
if dir := filepath.Dir(path); dir != "" {
|
||||||
|
if err := os.MkdirAll(dir, 0o700); err != nil {
|
||||||
|
return fmt.Errorf("create config dir: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data, err := json.MarshalIndent(cfg, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("encode config: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path, append(data, '\n'), 0o600); err != nil {
|
||||||
|
return fmt.Errorf("write config %s: %w", path, err)
|
||||||
|
}
|
||||||
|
// os.WriteFile only applies the mode when creating the file; Chmod ensures
|
||||||
|
// 0600 even when overwriting a pre-existing, looser-permissioned config.
|
||||||
|
if err := os.Chmod(path, 0o600); err != nil {
|
||||||
|
return fmt.Errorf("secure config %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveBaseURL applies precedence: --base-url > $TINYFORGE_URL > config > default.
|
||||||
|
func resolveBaseURL(g *globals, cfg Config) string {
|
||||||
|
if g != nil && *g.baseURL != "" {
|
||||||
|
return *g.baseURL
|
||||||
|
}
|
||||||
|
if env := os.Getenv("TINYFORGE_URL"); env != "" {
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
if cfg.BaseURL != "" {
|
||||||
|
return cfg.BaseURL
|
||||||
|
}
|
||||||
|
return defaultBaseURL
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolveToken applies precedence: --token > $TINYFORGE_TOKEN > config.
|
||||||
|
func resolveToken(g *globals, cfg Config) string {
|
||||||
|
if g != nil && *g.token != "" {
|
||||||
|
return *g.token
|
||||||
|
}
|
||||||
|
if env := os.Getenv("TINYFORGE_TOKEN"); env != "" {
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
return cfg.Token
|
||||||
|
}
|
||||||
|
|
||||||
|
// session bundles the resolved client with the loaded config and its path, so
|
||||||
|
// commands can both make requests and persist updates (e.g. login).
|
||||||
|
type session struct {
|
||||||
|
client *Client
|
||||||
|
cfg Config
|
||||||
|
configPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
// newSession loads config and builds a client with resolved base URL + token.
|
||||||
|
func newSession(g *globals) (*session, error) {
|
||||||
|
path, err := configFilePath(g)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
cfg, err := loadConfig(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &session{
|
||||||
|
client: newClient(resolveBaseURL(g, cfg), resolveToken(g, cfg)),
|
||||||
|
cfg: cfg,
|
||||||
|
configPath: path,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runDeploy(args []string) error {
|
||||||
|
fs := flag.NewFlagSet("deploy", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
ref := fs.String("ref", "", "image tag / git ref / source-specific deploy target")
|
||||||
|
note := fs.String("note", "", "free-text note recorded with the deploy")
|
||||||
|
timeout := fs.Duration("timeout", 15*time.Minute, "max time to wait for the deploy to finish")
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprint(os.Stderr, "Usage: tinyforge deploy <app> [--ref TAG] [--note TEXT] [--timeout DUR]\n\n"+
|
||||||
|
"Trigger a deploy and wait for it to finish. Requires an admin token.\n")
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if fs.NArg() != 1 {
|
||||||
|
fs.Usage()
|
||||||
|
return fmt.Errorf("expected exactly one app (name or id)")
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the app on a short deadline; the deploy itself gets the full one.
|
||||||
|
resolveCtx, cancelResolve := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancelResolve()
|
||||||
|
app, err := resolveApp(resolveCtx, sess.client, fs.Arg(0))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
body := map[string]string{}
|
||||||
|
if *ref != "" {
|
||||||
|
body["reference"] = *ref
|
||||||
|
}
|
||||||
|
if *note != "" {
|
||||||
|
body["note"] = *note
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Deploying %s%s…\n", app.Name, refSuffix(*ref))
|
||||||
|
|
||||||
|
// The endpoint returns 202 but blocks until the deploy completes, so a
|
||||||
|
// success here means it finished; allow plenty of time for pull/build.
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), *timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var result DeployResult
|
||||||
|
if err := sess.client.doJSON(ctx, "POST", "/api/workloads/"+app.ID+"/deploy", body, &result); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Deploy of %s completed (triggered by %s).\n", app.Name, result.TriggeredBy)
|
||||||
|
fmt.Printf("Follow with: tinyforge logs %s -f\n", app.Name)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func refSuffix(ref string) string {
|
||||||
|
if ref == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(" @ %s", ref)
|
||||||
|
}
|
||||||
@@ -0,0 +1,136 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runLogin(args []string) error {
|
||||||
|
fs := flag.NewFlagSet("login", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
user := fs.String("user", "", "username (prompted if omitted)")
|
||||||
|
pass := fs.String("password", "", "password (insecure; prefer $TINYFORGE_PASSWORD or the prompt)")
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprint(os.Stderr, "Usage: tinyforge login [--user U] [--password P] [--base-url URL]\n\n"+
|
||||||
|
"Authenticate against the server and cache the token.\n")
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
username := *user
|
||||||
|
if username == "" {
|
||||||
|
username, err = promptLine("Username: ")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
password := *pass
|
||||||
|
if password == "" {
|
||||||
|
password = os.Getenv("TINYFORGE_PASSWORD")
|
||||||
|
}
|
||||||
|
if password == "" {
|
||||||
|
password, err = promptPassword("Password: ")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if username == "" || password == "" {
|
||||||
|
return fmt.Errorf("username and password are required")
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var tok SessionToken
|
||||||
|
body := map[string]string{"username": username, "password": password}
|
||||||
|
if err := sess.client.doJSON(ctx, "POST", "/api/auth/login", body, &tok); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist the resolved base URL alongside the token so later commands need
|
||||||
|
// no flags. The token file is written 0600 by saveConfig.
|
||||||
|
sess.cfg.BaseURL = sess.client.baseURL
|
||||||
|
sess.cfg.Token = tok.Token
|
||||||
|
sess.cfg.ExpiresAt = tok.ExpiresAt
|
||||||
|
if err := saveConfig(sess.configPath, sess.cfg); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Logged in to %s as %s.\n", sess.client.baseURL, username)
|
||||||
|
if exp := friendlyExpiry(tok.ExpiresAt); exp != "" {
|
||||||
|
fmt.Printf("Token valid until %s.\n", exp)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runLogout(args []string) error {
|
||||||
|
fs := flag.NewFlagSet("logout", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if sess.client.token == "" {
|
||||||
|
fmt.Println("Not logged in.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Best-effort server-side revocation; clear the local token regardless.
|
||||||
|
revokeErr := sess.client.doJSON(ctx, "POST", "/api/auth/logout", nil, nil)
|
||||||
|
|
||||||
|
sess.cfg.Token = ""
|
||||||
|
sess.cfg.ExpiresAt = ""
|
||||||
|
if err := saveConfig(sess.configPath, sess.cfg); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if revokeErr != nil {
|
||||||
|
fmt.Printf("Cleared local token (server revocation skipped: %v).\n", revokeErr)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
fmt.Println("Logged out.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// promptLine reads a single trimmed line from stdin.
|
||||||
|
func promptLine(label string) (string, error) {
|
||||||
|
fmt.Fprint(os.Stderr, label)
|
||||||
|
r := bufio.NewReader(os.Stdin)
|
||||||
|
line, err := r.ReadString('\n')
|
||||||
|
if err != nil && line == "" {
|
||||||
|
return "", fmt.Errorf("read input: %w", err)
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(line), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// friendlyExpiry formats an RFC3339 expiry as a local time, best-effort.
|
||||||
|
func friendlyExpiry(s string) string {
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
t, err := time.Parse(time.RFC3339, s)
|
||||||
|
if err != nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return t.Local().Format("2006-01-02 15:04 MST")
|
||||||
|
}
|
||||||
+143
@@ -0,0 +1,143 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runLogs(args []string) error {
|
||||||
|
fs := flag.NewFlagSet("logs", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
follow := fs.Bool("f", false, "follow the log stream (Ctrl-C to stop)")
|
||||||
|
tail := fs.Int("tail", 200, "number of trailing lines to show (max 5000)")
|
||||||
|
container := fs.String("container", "", "container row id/prefix or role (when an app has several)")
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprint(os.Stderr, "Usage: tinyforge logs <app> [-f] [--tail N] [--container CID]\n\nPrint or follow a container's logs.\n")
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if fs.NArg() != 1 {
|
||||||
|
fs.Usage()
|
||||||
|
return fmt.Errorf("expected exactly one app (name or id)")
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
resolveCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
app, err := resolveApp(resolveCtx, sess.client, fs.Arg(0))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
var containers []Container
|
||||||
|
if err := sess.client.doJSON(resolveCtx, "GET", "/api/workloads/"+app.ID+"/containers", nil, &containers); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
target, err := chooseContainer(containers, *container)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
q := url.Values{}
|
||||||
|
q.Set("tail", fmt.Sprintf("%d", *tail))
|
||||||
|
base := "/api/workloads/" + app.ID + "/containers/" + target.ID + "/logs"
|
||||||
|
|
||||||
|
if !*follow {
|
||||||
|
var lines []string
|
||||||
|
if err := sess.client.doJSON(resolveCtx, "GET", base+"?"+q.Encode(), nil, &lines); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, line := range lines {
|
||||||
|
fmt.Println(line)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Follow: stream until EOF or Ctrl-C.
|
||||||
|
q.Set("follow", "true")
|
||||||
|
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
|
||||||
|
defer stop()
|
||||||
|
|
||||||
|
err = sess.client.streamSSE(ctx, base+"?"+q.Encode(), func(payload []byte) error {
|
||||||
|
var frame struct {
|
||||||
|
Line string `json:"line"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(payload, &frame) != nil {
|
||||||
|
return nil // ignore frames we can't parse
|
||||||
|
}
|
||||||
|
fmt.Println(frame.Line)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if ctx.Err() != nil { // user interrupted — clean exit
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// chooseContainer selects which container to read. With an explicit selector,
|
||||||
|
// it matches the row id (exact or prefix) or the role. Otherwise it uses the
|
||||||
|
// sole container, or the sole running one, and errors with a list when the
|
||||||
|
// choice is ambiguous.
|
||||||
|
func chooseContainer(cs []Container, selector string) (Container, error) {
|
||||||
|
if len(cs) == 0 {
|
||||||
|
return Container{}, fmt.Errorf("app has no containers yet — deploy it first")
|
||||||
|
}
|
||||||
|
|
||||||
|
if selector != "" {
|
||||||
|
var matches []Container
|
||||||
|
for _, c := range cs {
|
||||||
|
if c.ID == selector || strings.EqualFold(c.Role, selector) ||
|
||||||
|
(len(selector) >= 6 && strings.HasPrefix(c.ID, selector)) {
|
||||||
|
matches = append(matches, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch len(matches) {
|
||||||
|
case 1:
|
||||||
|
return matches[0], nil
|
||||||
|
case 0:
|
||||||
|
return Container{}, fmt.Errorf("no container matching %q\n%s", selector, containerList(cs))
|
||||||
|
default:
|
||||||
|
return Container{}, fmt.Errorf("%q matches multiple containers\n%s", selector, containerList(cs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cs) == 1 {
|
||||||
|
return cs[0], nil
|
||||||
|
}
|
||||||
|
var running []Container
|
||||||
|
for _, c := range cs {
|
||||||
|
if c.State == "running" {
|
||||||
|
running = append(running, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(running) == 1 {
|
||||||
|
return running[0], nil
|
||||||
|
}
|
||||||
|
return Container{}, fmt.Errorf("app has %d containers; pick one with --container:\n%s", len(cs), containerList(cs))
|
||||||
|
}
|
||||||
|
|
||||||
|
func containerList(cs []Container) string {
|
||||||
|
var b strings.Builder
|
||||||
|
for _, c := range cs {
|
||||||
|
role := c.Role
|
||||||
|
if role == "" {
|
||||||
|
role = "(default)"
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&b, " %s %-12s %s\n", idShort(c.ID), role, c.State)
|
||||||
|
}
|
||||||
|
return strings.TrimRight(b.String(), "\n")
|
||||||
|
}
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
// Command tinyforge is a terminal client for a Tinyforge server.
|
||||||
|
//
|
||||||
|
// It drives the existing HTTP API: log in to obtain a 24h JWT, then list
|
||||||
|
// apps, trigger deploys, stream logs, and check status. The token is cached
|
||||||
|
// in ~/.tinyforge/config.json (mode 0600) so subsequent commands reuse it.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// tinyforge login [--user U] [--password P]
|
||||||
|
// tinyforge apps [list]
|
||||||
|
// tinyforge deploy <app> [--ref TAG] [--note TEXT]
|
||||||
|
// tinyforge logs <app> [-f] [--tail N] [--container CID]
|
||||||
|
// tinyforge status [<app>]
|
||||||
|
// tinyforge logout
|
||||||
|
// tinyforge version
|
||||||
|
//
|
||||||
|
// The target server is resolved from --base-url, then $TINYFORGE_URL, then the
|
||||||
|
// saved config, then http://localhost:8080.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// version is the CLI build version. Overridable at build time via
|
||||||
|
// -ldflags "-X main.version=...".
|
||||||
|
var version = "dev"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
usage(os.Stderr)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd, args := os.Args[1], os.Args[2:]
|
||||||
|
|
||||||
|
var err error
|
||||||
|
switch cmd {
|
||||||
|
case "login":
|
||||||
|
err = runLogin(args)
|
||||||
|
case "logout":
|
||||||
|
err = runLogout(args)
|
||||||
|
case "apps":
|
||||||
|
err = runApps(args)
|
||||||
|
case "deploy":
|
||||||
|
err = runDeploy(args)
|
||||||
|
case "logs":
|
||||||
|
err = runLogs(args)
|
||||||
|
case "status":
|
||||||
|
err = runStatus(args)
|
||||||
|
case "version", "--version", "-v":
|
||||||
|
fmt.Printf("tinyforge %s\n", version)
|
||||||
|
case "help", "-h", "--help":
|
||||||
|
usage(os.Stdout)
|
||||||
|
default:
|
||||||
|
fmt.Fprintf(os.Stderr, "tinyforge: unknown command %q\n\n", cmd)
|
||||||
|
usage(os.Stderr)
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
// Authenticated commands that hit a 401 get a re-login hint; the login
|
||||||
|
// command itself surfaces the server message ("invalid credentials").
|
||||||
|
if cmd != "login" && isAuthError(err) {
|
||||||
|
err = fmt.Errorf("%w — run 'tinyforge login'", err)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "tinyforge: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func usage(w *os.File) {
|
||||||
|
fmt.Fprint(w, `tinyforge — terminal client for a Tinyforge server
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
tinyforge <command> [flags]
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
login Authenticate and cache a token
|
||||||
|
logout Revoke the cached token and clear it
|
||||||
|
apps [list] List your apps (workloads with a source)
|
||||||
|
deploy <app> Trigger a deploy (waits for completion)
|
||||||
|
logs <app> Print container logs (use -f to follow)
|
||||||
|
status [<app>] Show server health, or one app's containers
|
||||||
|
version Print the CLI version
|
||||||
|
|
||||||
|
Global flags (accepted by any command):
|
||||||
|
--base-url URL Server URL (default $TINYFORGE_URL or http://localhost:8080)
|
||||||
|
--token TOKEN Auth token (default $TINYFORGE_TOKEN or cached config)
|
||||||
|
--config PATH Config file (default $TINYFORGE_CONFIG or ~/.tinyforge/config.json)
|
||||||
|
|
||||||
|
Run "tinyforge <command> -h" for command-specific flags.
|
||||||
|
`)
|
||||||
|
}
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
//go:build !windows
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// promptPassword reads a password from stdin with echo disabled via stty. If
|
||||||
|
// stty is unavailable (no tty, missing binary), it falls back to an echoed
|
||||||
|
// read so the command still works in pipes/CI.
|
||||||
|
func promptPassword(label string) (string, error) {
|
||||||
|
fmt.Fprint(os.Stderr, label)
|
||||||
|
|
||||||
|
echoDisabled := stty("-echo") == nil
|
||||||
|
if echoDisabled {
|
||||||
|
defer func() {
|
||||||
|
_ = stty("echo")
|
||||||
|
fmt.Fprintln(os.Stderr) // the Enter keystroke was not echoed
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
line, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||||
|
if err != nil && line == "" {
|
||||||
|
return "", fmt.Errorf("read password: %w", err)
|
||||||
|
}
|
||||||
|
return strings.TrimRight(line, "\r\n"), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func stty(arg string) error {
|
||||||
|
cmd := exec.Command("stty", arg)
|
||||||
|
cmd.Stdin = os.Stdin
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
//go:build windows
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// enableEchoInput is the Windows console mode bit that echoes typed input.
|
||||||
|
const enableEchoInput = 0x0004
|
||||||
|
|
||||||
|
// promptPassword reads a password from the console with echo disabled, using
|
||||||
|
// kernel32 directly so no third-party dependency is needed. If the console
|
||||||
|
// mode cannot be changed (e.g. piped stdin), it falls back to an echoed read.
|
||||||
|
func promptPassword(label string) (string, error) {
|
||||||
|
fmt.Fprint(os.Stderr, label)
|
||||||
|
|
||||||
|
kernel32 := syscall.NewLazyDLL("kernel32.dll")
|
||||||
|
getConsoleMode := kernel32.NewProc("GetConsoleMode")
|
||||||
|
setConsoleMode := kernel32.NewProc("SetConsoleMode")
|
||||||
|
handle := syscall.Handle(os.Stdin.Fd())
|
||||||
|
|
||||||
|
var mode uint32
|
||||||
|
echoDisabled := false
|
||||||
|
if r, _, _ := getConsoleMode.Call(uintptr(handle), uintptr(unsafe.Pointer(&mode))); r != 0 {
|
||||||
|
if ret, _, _ := setConsoleMode.Call(uintptr(handle), uintptr(mode&^enableEchoInput)); ret != 0 {
|
||||||
|
echoDisabled = true
|
||||||
|
defer setConsoleMode.Call(uintptr(handle), uintptr(mode))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
line, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||||
|
if echoDisabled {
|
||||||
|
fmt.Fprintln(os.Stderr) // the Enter keystroke was not echoed
|
||||||
|
}
|
||||||
|
if err != nil && line == "" {
|
||||||
|
return "", fmt.Errorf("read password: %w", err)
|
||||||
|
}
|
||||||
|
return strings.TrimRight(line, "\r\n"), nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,122 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"text/tabwriter"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func runStatus(args []string) error {
|
||||||
|
fs := flag.NewFlagSet("status", flag.ExitOnError)
|
||||||
|
g := addGlobalFlags(fs)
|
||||||
|
fs.Usage = func() {
|
||||||
|
fmt.Fprint(os.Stderr, "Usage: tinyforge status [<app>]\n\nWith no app: server health and the logged-in user.\nWith an app: that app's containers.\n")
|
||||||
|
fs.PrintDefaults()
|
||||||
|
}
|
||||||
|
if err := fs.Parse(args); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sess, err := newSession(g)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if fs.NArg() == 0 {
|
||||||
|
return serverStatus(ctx, sess)
|
||||||
|
}
|
||||||
|
return appStatus(ctx, sess.client, fs.Arg(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
func serverStatus(ctx context.Context, sess *session) error {
|
||||||
|
fmt.Printf("Server: %s\n", sess.client.baseURL)
|
||||||
|
|
||||||
|
var me User
|
||||||
|
if err := sess.client.doJSON(ctx, "GET", "/api/auth/me", nil, &me); err != nil {
|
||||||
|
fmt.Printf("User: not logged in (%v)\n", err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("User: %s (%s)\n", me.Username, me.Role)
|
||||||
|
}
|
||||||
|
if exp := friendlyExpiry(sess.cfg.ExpiresAt); exp != "" {
|
||||||
|
fmt.Printf("Token: valid until %s\n", exp)
|
||||||
|
}
|
||||||
|
|
||||||
|
var health map[string]any
|
||||||
|
if err := sess.client.doJSON(ctx, "GET", "/api/health", nil, &health); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fmt.Printf("DB: %s\n", connState(health, "database"))
|
||||||
|
docker := connState(health, "docker")
|
||||||
|
if v := nestedString(health, "docker", "version"); v != "" {
|
||||||
|
docker += " (v" + v + ")"
|
||||||
|
}
|
||||||
|
fmt.Printf("Docker: %s\n", docker)
|
||||||
|
if _, ok := health["proxy"]; ok {
|
||||||
|
fmt.Printf("Proxy: %s\n", connState(health, "proxy"))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func appStatus(ctx context.Context, c *Client, ref string) error {
|
||||||
|
app, err := resolveApp(ctx, c, ref)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var containers []Container
|
||||||
|
if err := c.doJSON(ctx, "GET", "/api/workloads/"+app.ID+"/containers", nil, &containers); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%s (%s, %s)\n", app.Name, app.SourceKind, idShort(app.ID))
|
||||||
|
if len(containers) == 0 {
|
||||||
|
fmt.Println("No containers — not deployed yet.")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
tw := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
|
||||||
|
fmt.Fprintln(tw, "ROLE\tSTATE\tIMAGE\tPORT\tSUBDOMAIN\tCONTAINER")
|
||||||
|
for _, c := range containers {
|
||||||
|
role := c.Role
|
||||||
|
if role == "" {
|
||||||
|
role = "(default)"
|
||||||
|
}
|
||||||
|
port := ""
|
||||||
|
if c.Port != 0 {
|
||||||
|
port = fmt.Sprintf("%d", c.Port)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\n",
|
||||||
|
role, c.State, c.ImageRef, port, c.Subdomain, idShort(c.ID))
|
||||||
|
}
|
||||||
|
return tw.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// connState reads health[section].connected and renders connected/disconnected,
|
||||||
|
// appending the section's error string when present.
|
||||||
|
func connState(health map[string]any, section string) string {
|
||||||
|
m, ok := health[section].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
connected, _ := m["connected"].(bool)
|
||||||
|
if connected {
|
||||||
|
return "connected"
|
||||||
|
}
|
||||||
|
if msg, ok := m["error"].(string); ok && msg != "" {
|
||||||
|
return "disconnected (" + msg + ")"
|
||||||
|
}
|
||||||
|
return "disconnected"
|
||||||
|
}
|
||||||
|
|
||||||
|
func nestedString(m map[string]any, section, key string) string {
|
||||||
|
sub, ok := m[section].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
s, _ := sub[key].(string)
|
||||||
|
return s
|
||||||
|
}
|
||||||
+57
-1
@@ -28,6 +28,7 @@ import (
|
|||||||
"github.com/alexei/tinyforge/internal/health"
|
"github.com/alexei/tinyforge/internal/health"
|
||||||
"github.com/alexei/tinyforge/internal/logging"
|
"github.com/alexei/tinyforge/internal/logging"
|
||||||
"github.com/alexei/tinyforge/internal/logscanner"
|
"github.com/alexei/tinyforge/internal/logscanner"
|
||||||
|
"github.com/alexei/tinyforge/internal/metricalert"
|
||||||
"github.com/alexei/tinyforge/internal/notify"
|
"github.com/alexei/tinyforge/internal/notify"
|
||||||
"github.com/alexei/tinyforge/internal/npm"
|
"github.com/alexei/tinyforge/internal/npm"
|
||||||
"github.com/alexei/tinyforge/internal/proxy"
|
"github.com/alexei/tinyforge/internal/proxy"
|
||||||
@@ -36,6 +37,7 @@ import (
|
|||||||
"github.com/alexei/tinyforge/internal/stale"
|
"github.com/alexei/tinyforge/internal/stale"
|
||||||
"github.com/alexei/tinyforge/internal/stats"
|
"github.com/alexei/tinyforge/internal/stats"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/volsnap"
|
||||||
"github.com/alexei/tinyforge/internal/webhook"
|
"github.com/alexei/tinyforge/internal/webhook"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
|
|
||||||
@@ -43,6 +45,7 @@ import (
|
|||||||
// itself with internal/workload/plugin. Adding a new Source or Trigger
|
// itself with internal/workload/plugin. Adding a new Source or Trigger
|
||||||
// is a matter of dropping a new package and adding it to this list.
|
// is a matter of dropping a new package and adding it to this list.
|
||||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/compose"
|
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/compose"
|
||||||
|
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/dockerfile"
|
||||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/image"
|
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/image"
|
||||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/static"
|
_ "github.com/alexei/tinyforge/internal/workload/plugin/source/static"
|
||||||
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/git"
|
_ "github.com/alexei/tinyforge/internal/workload/plugin/trigger/git"
|
||||||
@@ -62,6 +65,20 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Acquire single-instance lockfile BEFORE opening the DB. SQLite +
|
||||||
|
// SetMaxOpenConns(1) does not protect against two Tinyforge processes
|
||||||
|
// sharing a data directory; without this guard a misconfigured
|
||||||
|
// systemd unit, container restart race, or `tinyforge` shell typo can
|
||||||
|
// silently double-fire schedulers, double-poll registries, and
|
||||||
|
// corrupt `extra_json` RMW. The lockfile is a PID file under
|
||||||
|
// $DATA_DIR/tinyforge.lock — collisions with dead PIDs are reclaimed.
|
||||||
|
releaseLock, err := store.AcquireLockfile(dataDir)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("could not acquire data-dir lock", "data_dir", dataDir, "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer releaseLock()
|
||||||
|
|
||||||
// Open database.
|
// Open database.
|
||||||
dbPath := filepath.Join(dataDir, "tinyforge.db")
|
dbPath := filepath.Join(dataDir, "tinyforge.db")
|
||||||
db, err := store.New(dbPath)
|
db, err := store.New(dbPath)
|
||||||
@@ -78,6 +95,21 @@ func main() {
|
|||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// One-shot migration: rewrite every legacy unprefixed-hex secret
|
||||||
|
// in the DB into the new tf1: envelope form. Idempotent (gated by
|
||||||
|
// schema_versions version 2). Lets the rest of the codebase treat
|
||||||
|
// envelope-presence as a stable invariant for future key rotations.
|
||||||
|
// Failures here are logged but non-fatal: a partial migration just
|
||||||
|
// means some columns keep working through Decrypt's legacy
|
||||||
|
// fallback until the next manual save re-encrypts them.
|
||||||
|
if err := db.MigrateSecretsToEnvelope(store.EnvelopeMigrator{
|
||||||
|
HasEnvelope: crypto.HasEnvelope,
|
||||||
|
Decrypt: func(v string) (string, error) { return crypto.Decrypt(encKey, v) },
|
||||||
|
Encrypt: func(v string) (string, error) { return crypto.Encrypt(encKey, v) },
|
||||||
|
}); err != nil {
|
||||||
|
slog.Warn("secrets envelope migration", "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Import seed config on first launch (idempotent).
|
// Import seed config on first launch (idempotent).
|
||||||
seedPath := envOrDefault("SEED_FILE", "./tinyforge.yaml")
|
seedPath := envOrDefault("SEED_FILE", "./tinyforge.yaml")
|
||||||
if err := config.ImportSeed(db, seedPath); err != nil {
|
if err := config.ImportSeed(db, seedPath); err != nil {
|
||||||
@@ -197,7 +229,8 @@ func main() {
|
|||||||
switch {
|
switch {
|
||||||
case r.Deployed:
|
case r.Deployed:
|
||||||
deployed++
|
deployed++
|
||||||
case r.Reason == webhook.ReasonBindingDisabled, r.Reason == webhook.ReasonNoMatch:
|
case r.Reason == webhook.ReasonBindingDisabled, r.Reason == webhook.ReasonNoMatch,
|
||||||
|
r.Reason == webhook.ReasonPreviewNoop:
|
||||||
// not a failure — silent
|
// not a failure — silent
|
||||||
default:
|
default:
|
||||||
errored++
|
errored++
|
||||||
@@ -291,6 +324,19 @@ func main() {
|
|||||||
}
|
}
|
||||||
dep.SetPreDeployBackuper(backupEngine)
|
dep.SetPreDeployBackuper(backupEngine)
|
||||||
|
|
||||||
|
// Initialize volume-snapshot engine (per-workload data-volume archives).
|
||||||
|
snapshotEngine, err := volsnap.New(db, dataDir)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("create snapshot engine", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
// Reclaim snapshot files orphaned by workload deletes (rows CASCADE, files don't).
|
||||||
|
if cleaned, err := snapshotEngine.CleanOrphans(); err != nil {
|
||||||
|
slog.Warn("snapshots: clean orphans on startup", "error", err)
|
||||||
|
} else if cleaned > 0 {
|
||||||
|
slog.Info("snapshots: cleaned orphan files on startup", "count", cleaned)
|
||||||
|
}
|
||||||
|
|
||||||
// Clean orphaned backup files and prune on startup.
|
// Clean orphaned backup files and prune on startup.
|
||||||
if cleaned, err := backupEngine.CleanOrphans(); err != nil {
|
if cleaned, err := backupEngine.CleanOrphans(); err != nil {
|
||||||
slog.Warn("backup: clean orphans on startup", "error", err)
|
slog.Warn("backup: clean orphans on startup", "error", err)
|
||||||
@@ -359,11 +405,20 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer logScanMgr.Stop()
|
defer logScanMgr.Stop()
|
||||||
|
|
||||||
|
// Metric-alert manager: evaluates threshold rules against recent
|
||||||
|
// container stats samples and emits event_log entries on breach.
|
||||||
|
// The store satisfies RuleSource/SampleSource/EventSink; the event
|
||||||
|
// bus is the Publisher.
|
||||||
|
metricAlertMgr := metricalert.New(db, db, db, eventBus)
|
||||||
|
metricAlertMgr.Start()
|
||||||
|
defer metricAlertMgr.Stop()
|
||||||
|
|
||||||
// Build API server.
|
// Build API server.
|
||||||
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
|
apiServer := api.NewServer(db, dockerClient, npmClient, proxyProvider, dep, notifier, webhookHandler, eventBus, encKey)
|
||||||
apiServer.SetStaleScanner(staleScanner)
|
apiServer.SetStaleScanner(staleScanner)
|
||||||
apiServer.SetLogScanReloader(logScanMgr)
|
apiServer.SetLogScanReloader(logScanMgr)
|
||||||
apiServer.SetBackupEngine(backupEngine)
|
apiServer.SetBackupEngine(backupEngine)
|
||||||
|
apiServer.SetSnapshotEngine(snapshotEngine)
|
||||||
apiServer.SetDBPath(dbPath)
|
apiServer.SetDBPath(dbPath)
|
||||||
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
|
apiServer.SetBackupSettingsChangedCallback(scheduleAutobackup)
|
||||||
apiServer.SetDNSProvider(dnsProvider)
|
apiServer.SetDNSProvider(dnsProvider)
|
||||||
@@ -420,6 +475,7 @@ func main() {
|
|||||||
eventBus.Unsubscribe(notifySub)
|
eventBus.Unsubscribe(notifySub)
|
||||||
staleScanner.Stop()
|
staleScanner.Stop()
|
||||||
statsCollector.Stop()
|
statsCollector.Stop()
|
||||||
|
metricAlertMgr.Stop()
|
||||||
|
|
||||||
// Drain in-progress deploys and notifications.
|
// Drain in-progress deploys and notifications.
|
||||||
dep.Drain()
|
dep.Drain()
|
||||||
|
|||||||
+11
-2
@@ -1,7 +1,13 @@
|
|||||||
services:
|
services:
|
||||||
tinyforge:
|
tinyforge:
|
||||||
|
# Default: build from source so a fresh clone works out of the box.
|
||||||
build: .
|
build: .
|
||||||
image: tinyforge:latest
|
# Image name doubles as the Gitea registry tag. To DEPLOY the pre-built
|
||||||
|
# image instead of building (e.g. Portainer pulling on a webhook), comment
|
||||||
|
# out `build:` above — compose will then pull this tag. `:latest` is pushed
|
||||||
|
# only for stable (non pre-release) releases, and the registry may require
|
||||||
|
# `docker login git.dolgolyov-family.by` first if the package is private.
|
||||||
|
image: git.dolgolyov-family.by/alexei.dolgolyov/tiny-forge:latest
|
||||||
container_name: tinyforge
|
container_name: tinyforge
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
@@ -31,7 +37,10 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- staging-net
|
- staging-net
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/api/auth/login"]
|
# /readyz is the public readiness probe (pings the DB, rate-limited).
|
||||||
|
# The previous target (/api/auth/login) is POST-only, so a GET/spider
|
||||||
|
# request returned 405 and the container was always reported unhealthy.
|
||||||
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/readyz"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Tinyforge Codemaps — Index
|
# Tinyforge Codemaps — Index
|
||||||
|
|
||||||
**Last Updated:** 2026-05-16
|
**Last Updated:** 2026-05-16 (added `container-extra-json` policy doc)
|
||||||
|
|
||||||
This directory contains architectural maps of key Tinyforge subsystems. Each codemap focuses on one major area: core data types, contract surfaces, integration points, and recipes for extending the system.
|
This directory contains architectural maps of key Tinyforge subsystems. Each codemap focuses on one major area: core data types, contract surfaces, integration points, and recipes for extending the system.
|
||||||
|
|
||||||
@@ -8,6 +8,7 @@ This directory contains architectural maps of key Tinyforge subsystems. Each cod
|
|||||||
|
|
||||||
- **[Workload Plugin](./workload-plugin.md)** — Source × Trigger plugin contracts; registry lookups; webhook fan-out; how to add new kinds.
|
- **[Workload Plugin](./workload-plugin.md)** — Source × Trigger plugin contracts; registry lookups; webhook fan-out; how to add new kinds.
|
||||||
- **[Discovery & Runtime API](./discovery-and-runtime.md)** — `/api/discovery/*` helpers (Git provider probe, repo/branch/tree pickers, image conflicts); `/api/workloads/{id}/runtime-state` + `/storage` + `/stop` + `/start`; SSRF-safe HTTP client in `internal/staticsite`.
|
- **[Discovery & Runtime API](./discovery-and-runtime.md)** — `/api/discovery/*` helpers (Git provider probe, repo/branch/tree pickers, image conflicts); `/api/workloads/{id}/runtime-state` + `/storage` + `/stop` + `/start`; SSRF-safe HTTP client in `internal/staticsite`.
|
||||||
|
- **[`containers.extra_json` Evolution Policy](./container-extra-json.md)** — Ownership model, reader/writer rules, wholesale-overwrite vs preserve-unknown-keys patterns, concurrency invariants; checklist for adding a new field without breaking older deployers.
|
||||||
|
|
||||||
## Cross-References
|
## Cross-References
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,105 @@
|
|||||||
|
# `containers.extra_json` — Evolution Policy
|
||||||
|
|
||||||
|
**Last Updated:** 2026-05-16
|
||||||
|
|
||||||
|
`extra_json` is a TEXT column on the `containers` table that source plugins use to persist source-specific runtime state that hasn't been promoted to a first-class column. It is the single forward-compatibility seam between the canonical container row and per-source needs that arise after a schema is in production.
|
||||||
|
|
||||||
|
This doc captures the rules every reader and writer must follow so new sources can extend the blob without breaking older ones.
|
||||||
|
|
||||||
|
## Schema position
|
||||||
|
|
||||||
|
- Column: `containers.extra_json TEXT NOT NULL DEFAULT '{}'` ([`internal/store/store.go:233`](../../internal/store/store.go#L233)).
|
||||||
|
- All four write paths (`CreateContainer`, `UpsertContainer`, `ReconcileContainer`, `UpdateContainer`) normalize `""` → `'{}'` before the SQL exec — readers can assume a non-empty JSON object string and never need to handle SQL `NULL` or the empty-string edge.
|
||||||
|
- Defined on the `Container` model: [`internal/store/models.go:342-347`](../../internal/store/models.go#L342-L347).
|
||||||
|
|
||||||
|
## Ownership model
|
||||||
|
|
||||||
|
**One container row → one owning source.** Sources never write to a row that belongs to another source. In practice:
|
||||||
|
|
||||||
|
| Source kind | Row key | Number of rows per workload | Writes `extra_json` today? |
|
||||||
|
| ----------- | -------------------------------------- | --------------------------- | --------------------------- |
|
||||||
|
| `static` | deterministic `<workloadID>:site` | exactly 1 | yes (preserve-unknown-keys) |
|
||||||
|
| `image` | UUID per deployed container | 1 + N (blue-green rolls) | yes (wholesale-overwrite) |
|
||||||
|
| `compose` | deterministic `<workloadID>:<service>` | N (one per compose service) | no — left at `'{}'` default |
|
||||||
|
|
||||||
|
Two sources cannot contend on the same row, so the policy below is concerned with **forward compatibility across versions of the same source**, not cross-source contention. When compose (or any future source) starts writing `extra_json`, the same rules apply.
|
||||||
|
|
||||||
|
## Reader rules — ALL readers
|
||||||
|
|
||||||
|
1. **Tolerate unknown keys.** Decode into a typed struct using `encoding/json`; Go's default unmarshaller silently drops unknown keys, which is the desired behaviour. Never use `json.Decoder.DisallowUnknownFields()` on `extra_json`.
|
||||||
|
2. **Tolerate decode failure as non-fatal where the row's first-class columns are useful.** A corrupted `extra_json` is debug-logged and the reader falls back to zero state — see `workload_runtime.go:118-133` for the canonical pattern. The container's `ContainerID`, `State`, `ProxyRouteID`, etc. live in their own columns and are still trustworthy.
|
||||||
|
3. **Tolerate `''` and `'{}'`.** Both are equivalent to "no extras yet". Readers must short-circuit before json.Unmarshal to avoid `unexpected end of JSON input` on the empty case.
|
||||||
|
|
||||||
|
## Writer rules — by mutation style
|
||||||
|
|
||||||
|
Two distinct write patterns live in the codebase today. Pick the one that matches your source's needs.
|
||||||
|
|
||||||
|
### Wholesale-overwrite (image source pattern)
|
||||||
|
|
||||||
|
When the writer owns 100% of the blob's shape and discards old contents on every write:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// internal/workload/plugin/source/image/image.go:341-343
|
||||||
|
extra := containerExtra{ProxyRoutes: faceRoutes}
|
||||||
|
if b, err := json.Marshal(extra); err == nil {
|
||||||
|
created.ExtraJSON = string(b)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
- Cheap and simple.
|
||||||
|
- **Loses unknown keys written by future versions of the same source.** Only use when you are certain no other writer (including a future version of this code) needs to round-trip an unknown key.
|
||||||
|
- The `containerExtra` struct must be **additive-only**: never rename or remove a field once shipped, and never change its JSON type. Mark new fields with `omitempty` so older readers downgrading to an older codebase don't see surprise nulls.
|
||||||
|
|
||||||
|
### Preserve-unknown-keys (static source pattern)
|
||||||
|
|
||||||
|
When future versions of the source (or sibling writers) may add fields and the current writer must round-trip them:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// internal/workload/plugin/source/static/state.go saveState
|
||||||
|
// 1. Decode existing blob into map[string]json.RawMessage.
|
||||||
|
// 2. Strip every key the current typed-state struct owns
|
||||||
|
// (runtimeStateKeys) so a cleared field actually drops.
|
||||||
|
// 3. Apply caller's mutate() to the typed state.
|
||||||
|
// 4. Re-marshal typed state, splice its keys back into the
|
||||||
|
// generic map (overwriting any historical sibling).
|
||||||
|
// 5. Marshal the merged map back into extra_json.
|
||||||
|
```
|
||||||
|
|
||||||
|
- Slightly more expensive (two round-trips through `json`).
|
||||||
|
- Preserves keys the current writer doesn't know about — required for safe rolling deploys where a newer instance writes a new key, an older instance then reads, mutates, and writes back.
|
||||||
|
- Must declare the typed key set explicitly (`runtimeStateKeys`) so step 2 can strip them. This invariant is fenced by `TestRuntimeState_JSONTagsRoundTrip` in [`state_integration_test.go`](../../internal/workload/plugin/source/static/state_integration_test.go).
|
||||||
|
|
||||||
|
**Default to preserve-unknown-keys for any new source.** Wholesale-overwrite is acceptable for the image source today because the row's lifetime is short (replaced on every blue-green roll) and only one writer touches it. Sources whose container rows are long-lived (static, future compose-with-stateful-services) should preserve unknown keys.
|
||||||
|
|
||||||
|
## Concurrency
|
||||||
|
|
||||||
|
`UpsertContainer` is atomic at the SQL layer — SQLite serializes statements through one connection ([`internal/store/store.go:55`](../../internal/store/store.go#L55) `SetMaxOpenConns(1)`) with WAL mode enabled ([`store.go:60`](../../internal/store/store.go#L60)). That guarantees no torn write on a single row, and concurrent readers see a consistent snapshot — they read either the pre- or post-write state, never a half-applied one.
|
||||||
|
|
||||||
|
What that does **not** guarantee is atomic read-modify-write across two Go goroutines. The static source serializes its RMW through a per-workload `sync.Mutex` keyed by workload ID (`internal/workload/plugin/source/static/state.go` `lockFor` + `saveState`). Any source that does its own read-modify-write on `extra_json` must do the same — verified in `TestSaveState_ConcurrentWritesDoNotLoseUpdates` (which loses 15+ markers per 20-writer run when the mutex is disabled, as confirmed in commit `ef62a41`).
|
||||||
|
|
||||||
|
If a future source is purely wholesale-overwrite from a single writer, no lock is needed.
|
||||||
|
|
||||||
|
## What `extra_json` is NOT for
|
||||||
|
|
||||||
|
- **Workload-level config.** Workload config goes in `workloads.source_config` and is the operator's surface.
|
||||||
|
- **Cross-source state.** If two sources need the same data, promote it to a column.
|
||||||
|
- **Anything queryable.** SQLite can JSON-path `extra_json` but no index supports it; readers always pull the column wholesale and parse in Go.
|
||||||
|
- **Secrets.** Anything sensitive lives in `workload_env` (per-entry encrypt flag) or another encrypted table.
|
||||||
|
|
||||||
|
## Adding a new field — checklist
|
||||||
|
|
||||||
|
1. Add the field to your source's typed struct with `omitempty` and a stable `json:"snake_case"` tag.
|
||||||
|
2. If you use the **preserve-unknown-keys** pattern, add the JSON key to your `*Keys` slice (the equivalent of `runtimeStateKeys`).
|
||||||
|
3. Confirm older readers (older deploys of the same binary) still parse the blob — `encoding/json` should drop the unknown key silently. Add a regression test if there's any doubt.
|
||||||
|
4. Document the new field in this codemap if it's load-bearing for cross-source code (e.g., the proxy_routes map drives `ListProxyRoutes`).
|
||||||
|
|
||||||
|
## Pointers
|
||||||
|
|
||||||
|
- Container model + `ExtraJSON` comment: [`internal/store/models.go:342-347`](../../internal/store/models.go#L342-L347)
|
||||||
|
- Schema declaration: [`internal/store/store.go:233`](../../internal/store/store.go#L233)
|
||||||
|
- Store-level normalization (`'{}'` default) across all four write paths: [`internal/store/containers.go:42-43`](../../internal/store/containers.go#L42-L43) (CreateContainer), `:77-78` (UpsertContainer), `:129-130` (ReconcileContainer), `:321-322` (UpdateContainer).
|
||||||
|
- Wholesale-overwrite writer + struct: [`image.go:341-343`](../../internal/workload/plugin/source/image/image.go#L341-L343) writes; [`image.go:481-487`](../../internal/workload/plugin/source/image/image.go#L481-L487) defines `containerExtra`; [`image.go:449-456`](../../internal/workload/plugin/source/image/image.go#L449-L456) reads it back in Teardown.
|
||||||
|
- Preserve-unknown-keys example + concurrency lock: [`internal/workload/plugin/source/static/state.go`](../../internal/workload/plugin/source/static/state.go).
|
||||||
|
- Canonical "decode-and-tolerate" consumer (the only cross-source reader in tree today): [`internal/api/workload_runtime.go:118-133`](../../internal/api/workload_runtime.go#L118-L133) decodes the static-only typed fields and falls back to first-class columns when the blob is empty, missing keys, or malformed.
|
||||||
|
|
||||||
|
Note: no cross-source consumer reads `extra_json` in `internal/store/`. The proxy/route data exposed by `ListProxyRoutes` ([`containers.go:196`](../../internal/store/containers.go#L196)) comes from first-class columns (`proxy_route_id`, `subdomain`, `port`); the `proxy_routes` map inside `extra_json` is read only by the image source's own Teardown for cleanup.
|
||||||
@@ -500,13 +500,15 @@ covers the use case — `promote-from` works, the UI shows the relationship.
|
|||||||
Probably can leave the legacy `stages` table dropped entirely once cutover
|
Probably can leave the legacy `stages` table dropped entirely once cutover
|
||||||
proceeds.
|
proceeds.
|
||||||
|
|
||||||
### `Container.extra_json` evolution
|
### ~~`Container.extra_json` evolution~~ — DONE (2026-05-16)
|
||||||
|
|
||||||
Currently only the image source uses it (per-face proxy route IDs). If
|
Both writer patterns now have an active example in-tree (image source
|
||||||
other sources gain similar needs (compose service health metadata, static
|
clobbers, static source preserves) and the policy is documented in
|
||||||
build SHAs), the schema there should stay versionless and additive — every
|
[`docs/CODEMAPS/container-extra-json.md`](CODEMAPS/container-extra-json.md):
|
||||||
reader must tolerate unknown keys. Document this in the source plugin
|
ownership model, wholesale-overwrite vs preserve-unknown-keys, reader
|
||||||
guide alongside the codemap entry.
|
tolerance for unknown keys + decode failure, the per-workload mutex
|
||||||
|
requirement for any read-modify-write writer, and a checklist for adding
|
||||||
|
a new field without breaking older deployers.
|
||||||
|
|
||||||
## File pointers for the next session
|
## File pointers for the next session
|
||||||
|
|
||||||
|
|||||||
@@ -16,13 +16,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// rateLimitedLogin wraps the login handler with per-IP rate limiting.
|
// rateLimitedLogin wraps the login handler with per-IP rate limiting.
|
||||||
|
// Uses clientIP() so X-Forwarded-For is honored only when the request
|
||||||
|
// arrives from a configured trusted-proxy CIDR — preventing remote
|
||||||
|
// attackers from spoofing the header to bypass the per-IP login limiter.
|
||||||
func (s *Server) rateLimitedLogin(rl *rateLimiter) http.HandlerFunc {
|
func (s *Server) rateLimitedLogin(rl *rateLimiter) http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
ip := r.RemoteAddr
|
if !rl.allow(clientIP(r)) {
|
||||||
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
|
|
||||||
ip = fwd
|
|
||||||
}
|
|
||||||
if !rl.allow(ip) {
|
|
||||||
respondError(w, http.StatusTooManyRequests, "too many login attempts, try again later")
|
respondError(w, http.StatusTooManyRequests, "too many login attempts, try again later")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
+73
-32
@@ -1,7 +1,6 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@@ -118,7 +117,22 @@ func (s *Server) deleteBackup(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// restoreBackup handles POST /api/backups/{id}/restore.
|
// restoreBackup handles POST /api/backups/{id}/restore.
|
||||||
// This replaces the current database with the backup and triggers a graceful shutdown.
|
//
|
||||||
|
// Restore happens in three documented stages so a failure at any stage
|
||||||
|
// leaves the live DB intact:
|
||||||
|
//
|
||||||
|
// 1. PRE-FLIGHT (sync, before the HTTP response): PrepareRestore opens
|
||||||
|
// the candidate read-only and runs `PRAGMA integrity_check`. If it
|
||||||
|
// fails the live DB is untouched and we return 400 with the reason.
|
||||||
|
//
|
||||||
|
// 2. SAFETY NET: a pre-restore backup of the LIVE DB is created so the
|
||||||
|
// operator can roll back even if the candidate is later discovered
|
||||||
|
// to be missing data.
|
||||||
|
//
|
||||||
|
// 3. SWAP (async, after the response is flushed): close the live DB,
|
||||||
|
// atomic-rename the candidate over the live path, wipe WAL/SHM,
|
||||||
|
// trigger graceful shutdown. supervisord / systemd / docker
|
||||||
|
// restart=on-failure brings the process back with the new DB.
|
||||||
func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
||||||
if s.backupEngine == nil {
|
if s.backupEngine == nil {
|
||||||
respondError(w, http.StatusServiceUnavailable, "backup engine not initialized")
|
respondError(w, http.StatusServiceUnavailable, "backup engine not initialized")
|
||||||
@@ -126,13 +140,44 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
id := chi.URLParam(r, "id")
|
id := chi.URLParam(r, "id")
|
||||||
restorePath, err := s.backupEngine.RestorePath(id)
|
|
||||||
if err != nil {
|
// CSRF / accidental-fire guard: the restore endpoint is the most
|
||||||
respondError(w, http.StatusNotFound, "backup not found: "+err.Error())
|
// destructive surface in the API (replaces the whole DB). Even
|
||||||
|
// though it sits behind AdminOnly + Bearer JWT, a blind cross-site
|
||||||
|
// POST or a misclicked button in any open admin tab can fire it.
|
||||||
|
// Require the operator's client to echo X-Confirm-Restore: <id>
|
||||||
|
// — matching the path param — so a CSRF post-form / image-src
|
||||||
|
// trick can't trigger restore (browsers don't let cross-origin
|
||||||
|
// requests set custom headers without a preflight).
|
||||||
|
if confirm := r.Header.Get("X-Confirm-Restore"); confirm != id {
|
||||||
|
respondError(w, http.StatusBadRequest,
|
||||||
|
"missing or mismatched X-Confirm-Restore header (must equal backup id)")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a safety backup before restore so the user can undo if needed.
|
// Single-flight guard: a rapid double-click would otherwise spawn
|
||||||
|
// two goroutines racing s.store.Close() and the candidate-over-
|
||||||
|
// live rename. CAS to true here; if someone else won, return 409.
|
||||||
|
if !s.restoreInFlight.CompareAndSwap(false, true) {
|
||||||
|
respondError(w, http.StatusConflict, "a restore is already in progress")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Do NOT release the flag — the restore path triggers shutdown.
|
||||||
|
// A failed restore is also terminal (the DB may be closed); a
|
||||||
|
// fresh process boot is the recovery path.
|
||||||
|
// PRE-FLIGHT: refuse before touching anything if the candidate is
|
||||||
|
// not a valid SQLite database or fails integrity_check. This is the
|
||||||
|
// guard the prior code lacked — a corrupt backup would silently
|
||||||
|
// overwrite a healthy live DB.
|
||||||
|
restorePath, err := s.backupEngine.PrepareRestore(id)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusBadRequest, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY NET: pre-restore snapshot of the live DB. A failure here
|
||||||
|
// is logged but does not abort — the integrity-checked candidate
|
||||||
|
// is still safer than refusing to restore.
|
||||||
if _, err := s.backupEngine.CreateBackup("pre-restore"); err != nil {
|
if _, err := s.backupEngine.CreateBackup("pre-restore"); err != nil {
|
||||||
slog.Warn("failed to create pre-restore backup", "error", err)
|
slog.Warn("failed to create pre-restore backup", "error", err)
|
||||||
}
|
}
|
||||||
@@ -153,41 +198,37 @@ func (s *Server) restoreBackup(w http.ResponseWriter, r *http.Request) {
|
|||||||
go func() {
|
go func() {
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(500 * time.Millisecond)
|
||||||
|
|
||||||
// Close the current database to release locks.
|
// Once we begin closing the live DB the process can no longer serve
|
||||||
|
// requests against a sane store, so EVERY exit path from here must
|
||||||
|
// trigger shutdown. Returning early would leave the server limping
|
||||||
|
// on a closed/half-swapped database with no path to recovery except
|
||||||
|
// an external kill. shutdownFunc → graceful shutdown → main returns
|
||||||
|
// → deferred releaseLock()/db.Close() run, and the supervisor reopens
|
||||||
|
// whatever DB is on disk on the next boot.
|
||||||
|
triggerShutdown := func() {
|
||||||
|
if s.shutdownFunc != nil {
|
||||||
|
s.shutdownFunc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the current database to release locks. AtomicReplaceDB
|
||||||
|
// expects the live file to be unmapped before swap (especially
|
||||||
|
// important on Windows where open files cannot be renamed over).
|
||||||
if err := s.store.Close(); err != nil {
|
if err := s.store.Close(); err != nil {
|
||||||
slog.Error("restore: failed to close database", "error", err)
|
slog.Error("restore: failed to close database, restarting", "error", err)
|
||||||
|
triggerShutdown()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the backup file over the main database using streaming (no full read into memory).
|
if err := s.backupEngine.AtomicReplaceDB(restorePath, s.dbPath); err != nil {
|
||||||
src, err := os.Open(restorePath)
|
slog.Error("restore: atomic replace failed, restarting", "error", err)
|
||||||
if err != nil {
|
triggerShutdown()
|
||||||
slog.Error("restore: failed to open backup file", "error", err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer src.Close()
|
|
||||||
|
|
||||||
dst, err := os.Create(s.dbPath)
|
|
||||||
if err != nil {
|
|
||||||
slog.Error("restore: failed to create database file", "error", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer dst.Close()
|
|
||||||
|
|
||||||
if _, err := io.Copy(dst, src); err != nil {
|
|
||||||
slog.Error("restore: failed to copy backup to database", "error", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove WAL and SHM files to ensure clean state.
|
|
||||||
os.Remove(s.dbPath + "-wal")
|
|
||||||
os.Remove(s.dbPath + "-shm")
|
|
||||||
|
|
||||||
slog.Info("restore: database replaced, triggering shutdown")
|
slog.Info("restore: database replaced, triggering shutdown")
|
||||||
|
|
||||||
// Signal the server to shut down gracefully so it can be restarted.
|
// Signal the server to shut down gracefully so it can be restarted.
|
||||||
if s.shutdownFunc != nil {
|
triggerShutdown()
|
||||||
s.shutdownFunc()
|
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/docker"
|
||||||
"github.com/alexei/tinyforge/internal/staticsite"
|
"github.com/alexei/tinyforge/internal/staticsite"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -350,6 +351,54 @@ func (s *Server) listImageConflicts(w http.ResponseWriter, r *http.Request) {
|
|||||||
respondJSON(w, http.StatusOK, conflicts)
|
respondJSON(w, http.StatusOK, conflicts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// inspectImageRequest is the body for POST /api/discovery/image/inspect.
|
||||||
|
type inspectImageRequest struct {
|
||||||
|
Image string `json:"image"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// inspectImageResponse mirrors the frontend InspectResult shape the
|
||||||
|
// new-app wizard pre-fills from: the first exposed port (parsed to int,
|
||||||
|
// 0 when none) and the image's HEALTHCHECK command string.
|
||||||
|
type inspectImageResponse struct {
|
||||||
|
Port int `json:"port"`
|
||||||
|
Healthcheck string `json:"healthcheck"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// inspectImageMetadata inspects a LOCAL image and returns its first
|
||||||
|
// exposed port + healthcheck so the wizard can pre-fill those fields.
|
||||||
|
// POST /api/discovery/image/inspect.
|
||||||
|
//
|
||||||
|
// This inspects local images only — it does not pull. When the image is
|
||||||
|
// not present locally the docker call fails; we return a generic,
|
||||||
|
// non-leaky 400 rather than the git-specific upstreamError so a raw
|
||||||
|
// docker daemon string (which may echo the ref) never reaches the client.
|
||||||
|
func (s *Server) inspectImageMetadata(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req inspectImageRequest
|
||||||
|
if !decodeJSON(w, r, &req) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
image := strings.TrimSpace(req.Image)
|
||||||
|
if image == "" {
|
||||||
|
respondError(w, http.StatusBadRequest, "image is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), discoveryTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
info, err := s.docker.InspectImage(ctx, image)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("inspect image metadata failed", "error", err)
|
||||||
|
respondError(w, http.StatusBadRequest, "could not inspect image — make sure it is pulled locally and the reference is correct")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
respondJSON(w, http.StatusOK, inspectImageResponse{
|
||||||
|
Port: docker.ExtractPort(info.ExposedPorts),
|
||||||
|
Healthcheck: info.Healthcheck,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// stripImageTag returns the image reference with the trailing :tag
|
// stripImageTag returns the image reference with the trailing :tag
|
||||||
// removed, taking care to leave a registry port (e.g. registry:5000/foo)
|
// removed, taking care to leave a registry port (e.g. registry:5000/foo)
|
||||||
// intact. Digest references (image@sha256:...) are returned unchanged.
|
// intact. Digest references (image@sha256:...) are returned unchanged.
|
||||||
|
|||||||
@@ -348,3 +348,32 @@ func (s *Server) pruneImages(w http.ResponseWriter, r *http.Request) {
|
|||||||
"space_reclaimed_mb": reclaimedBytes / (1024 * 1024),
|
"space_reclaimed_mb": reclaimedBytes / (1024 * 1024),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pruneBuildCache handles POST /api/docker/prune-build-cache. It removes
|
||||||
|
// unused Docker build-cache records daemon-wide (all=false), so an app's next
|
||||||
|
// rebuild still hits its warm cache. The build cache is regenerable by
|
||||||
|
// definition — pruning only forces slower rebuilds, never data loss — and the
|
||||||
|
// dockerfile/static deploy paths never reclaim it on teardown, so it grows
|
||||||
|
// monotonically until pruned here.
|
||||||
|
func (s *Server) pruneBuildCache(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.docker == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "Docker is not available")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := s.docker.PruneBuildCache(r.Context(), false)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("prune: build cache", "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("prune: build cache",
|
||||||
|
"caches_deleted", result.CachesDeleted,
|
||||||
|
"space_reclaimed_mb", result.SpaceReclaimed/(1024*1024))
|
||||||
|
|
||||||
|
respondJSON(w, http.StatusOK, map[string]any{
|
||||||
|
"caches_deleted": result.CachesDeleted,
|
||||||
|
"space_reclaimed_mb": result.SpaceReclaimed / (1024 * 1024),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -37,6 +37,36 @@ func (s *Server) listEventLog(w http.ResponseWriter, r *http.Request) {
|
|||||||
respondJSON(w, http.StatusOK, events)
|
respondJSON(w, http.StatusOK, events)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// listWorkloadEvents handles GET /api/workloads/{id}/events — the per-app
|
||||||
|
// activity/deploy timeline. The workload id is pinned from the path, so a
|
||||||
|
// client cannot widen the scope to other workloads or the global feed.
|
||||||
|
// Supports the same severity/limit/offset query params as listEventLog.
|
||||||
|
func (s *Server) listWorkloadEvents(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
if id == "" {
|
||||||
|
respondError(w, http.StatusBadRequest, "workload id is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
q := r.URL.Query()
|
||||||
|
limit, _ := strconv.Atoi(q.Get("limit"))
|
||||||
|
offset, _ := strconv.Atoi(q.Get("offset"))
|
||||||
|
|
||||||
|
events, err := s.store.ListEvents(store.EventLogFilter{
|
||||||
|
WorkloadID: id,
|
||||||
|
Severity: q.Get("severity"),
|
||||||
|
Limit: limit,
|
||||||
|
Offset: offset,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("failed to list workload events", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "failed to list events")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
respondJSON(w, http.StatusOK, events)
|
||||||
|
}
|
||||||
|
|
||||||
// getEventLogStats handles GET /api/events/log/stats.
|
// getEventLogStats handles GET /api/events/log/stats.
|
||||||
func (s *Server) getEventLogStats(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) getEventLogStats(w http.ResponseWriter, r *http.Request) {
|
||||||
stats, err := s.store.GetEventStats()
|
stats, err := s.store.GetEventStats()
|
||||||
|
|||||||
@@ -0,0 +1,64 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
// livez always returns 200 if the process is up. Used by container
|
||||||
|
// orchestrators / load balancers / Docker HEALTHCHECK as the "is the
|
||||||
|
// binary alive" probe. Intentionally does NOT touch the DB or Docker —
|
||||||
|
// a slow DB must not cause restart loops.
|
||||||
|
func (s *Server) livez(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
_, _ = w.Write([]byte("ok\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// readyz returns 200 only when the process can actually serve traffic:
|
||||||
|
// SQLite is reachable, the encryption key is loaded, the deployer is
|
||||||
|
// not draining. The response body is intentionally minimal — the
|
||||||
|
// specific failing probe name is recorded in slog (operator-visible)
|
||||||
|
// rather than returned to unauthenticated callers. This avoids handing
|
||||||
|
// reconnaissance to an attacker who can hit /readyz during an outage
|
||||||
|
// ("DB down" vs "encryption key missing" leaks operational state).
|
||||||
|
func (s *Server) readyz(w http.ResponseWriter, r *http.Request) {
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), 2*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// DB ping: cheap and exact — exercises the connection pool, file
|
||||||
|
// lock, and busy-timeout. A failing ping means SQLite WAL is wedged
|
||||||
|
// or the data dir is gone.
|
||||||
|
if err := s.store.DB().PingContext(ctx); err != nil {
|
||||||
|
slog.Warn("readyz: db ping failed", "error", err)
|
||||||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
_, _ = w.Write([]byte("not ready\n"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encryption key sanity: if it's zero we cannot decrypt any stored
|
||||||
|
// secret, so the deployer paths will all explode at first use.
|
||||||
|
if s.encKey == ([32]byte{}) {
|
||||||
|
slog.Warn("readyz: encryption key not loaded")
|
||||||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
_, _ = w.Write([]byte("not ready\n"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
_, _ = w.Write([]byte("ready\n"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// metricsExport writes the process-wide metrics registry in Prometheus
|
||||||
|
// text format. Admin-only by router placement; surface is intentionally
|
||||||
|
// thin (no histograms / quantiles, only counters) to keep the binary
|
||||||
|
// dependency-free.
|
||||||
|
func (s *Server) metricsExport(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||||
|
_ = metrics.DefaultRegistry.WritePrometheus(w)
|
||||||
|
}
|
||||||
@@ -0,0 +1,235 @@
|
|||||||
|
// Package api: metric-alert rule HTTP handlers. The evaluator lives in
|
||||||
|
// internal/metricalert; this file is the REST surface that lets
|
||||||
|
// operators create, edit, and delete threshold rules. Mirrors the
|
||||||
|
// log-scan rule handlers.
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// metricAlertRuleInput is the JSON shape accepted by POST + PATCH.
|
||||||
|
// Pointers distinguish "absent" from explicit empty/zero. WorkloadID is
|
||||||
|
// immutable on update (per store.UpdateMetricAlertRule) so it only takes
|
||||||
|
// effect on create.
|
||||||
|
type metricAlertRuleInput struct {
|
||||||
|
WorkloadID *string `json:"workload_id"`
|
||||||
|
Name *string `json:"name"`
|
||||||
|
Metric *string `json:"metric"`
|
||||||
|
Comparator *string `json:"comparator"`
|
||||||
|
Threshold *float64 `json:"threshold"`
|
||||||
|
Severity *string `json:"severity"`
|
||||||
|
CooldownSeconds *int `json:"cooldown_seconds"`
|
||||||
|
Enabled *bool `json:"enabled"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// listMetricAlertRules handles GET /api/metric-alert-rules. Optional
|
||||||
|
// query filter `workload_id=...` returns rules applying to that workload
|
||||||
|
// (its own rows plus globals).
|
||||||
|
func (s *Server) listMetricAlertRules(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if wlID := r.URL.Query().Get("workload_id"); wlID != "" {
|
||||||
|
out, err := s.store.ListMetricAlertRulesByWorkload(wlID)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "list metric alert rules")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, out)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out, err := s.store.ListMetricAlertRules()
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "list metric alert rules")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getMetricAlertRule handles GET /api/metric-alert-rules/{id}.
|
||||||
|
func (s *Server) getMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id, ok := parseMetricAlertRuleID(w, r)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rule, err := s.store.GetMetricAlertRule(id)
|
||||||
|
if err != nil {
|
||||||
|
mapStoreError(w, err, "metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, rule)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createMetricAlertRule handles POST /api/metric-alert-rules.
|
||||||
|
func (s *Server) createMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var in metricAlertRuleInput
|
||||||
|
if !decodeJSON(w, r, &in) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rule := store.MetricAlertRule{
|
||||||
|
WorkloadID: derefString(in.WorkloadID),
|
||||||
|
Name: derefString(in.Name),
|
||||||
|
Metric: derefString(in.Metric),
|
||||||
|
Comparator: derefString(in.Comparator),
|
||||||
|
Threshold: derefFloat64(in.Threshold),
|
||||||
|
Severity: firstNonEmpty(derefString(in.Severity), store.LogScanSeverityWarn),
|
||||||
|
CooldownSeconds: derefIntDefault(in.CooldownSeconds, 300),
|
||||||
|
Enabled: in.Enabled == nil || *in.Enabled,
|
||||||
|
}
|
||||||
|
if msg := validateMetricAlertInput(rule); msg != "" {
|
||||||
|
respondError(w, http.StatusBadRequest, msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out, err := s.store.CreateMetricAlertRule(rule)
|
||||||
|
if err != nil {
|
||||||
|
if isMetricAlertValidationErr(err) {
|
||||||
|
respondError(w, http.StatusBadRequest, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "create metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusCreated, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateMetricAlertRule handles PATCH /api/metric-alert-rules/{id}.
|
||||||
|
// workload_id is immutable; name/metric/comparator/threshold/severity/
|
||||||
|
// cooldown/enabled are individually overridable.
|
||||||
|
func (s *Server) updateMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id, ok := parseMetricAlertRuleID(w, r)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
existing, err := s.store.GetMetricAlertRule(id)
|
||||||
|
if err != nil {
|
||||||
|
mapStoreError(w, err, "metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var in metricAlertRuleInput
|
||||||
|
if !decodeJSON(w, r, &in) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if in.Name != nil {
|
||||||
|
existing.Name = *in.Name
|
||||||
|
}
|
||||||
|
if in.Metric != nil && *in.Metric != "" {
|
||||||
|
existing.Metric = *in.Metric
|
||||||
|
}
|
||||||
|
if in.Comparator != nil && *in.Comparator != "" {
|
||||||
|
existing.Comparator = *in.Comparator
|
||||||
|
}
|
||||||
|
if in.Threshold != nil {
|
||||||
|
existing.Threshold = *in.Threshold
|
||||||
|
}
|
||||||
|
if in.Severity != nil && *in.Severity != "" {
|
||||||
|
existing.Severity = *in.Severity
|
||||||
|
}
|
||||||
|
if in.CooldownSeconds != nil {
|
||||||
|
existing.CooldownSeconds = *in.CooldownSeconds
|
||||||
|
}
|
||||||
|
if in.Enabled != nil {
|
||||||
|
existing.Enabled = *in.Enabled
|
||||||
|
}
|
||||||
|
if msg := validateMetricAlertInput(existing); msg != "" {
|
||||||
|
respondError(w, http.StatusBadRequest, msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out, err := s.store.UpdateMetricAlertRule(existing)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if isMetricAlertValidationErr(err) {
|
||||||
|
respondError(w, http.StatusBadRequest, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "update metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteMetricAlertRule handles DELETE /api/metric-alert-rules/{id}.
|
||||||
|
func (s *Server) deleteMetricAlertRule(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id, ok := parseMetricAlertRuleID(w, r)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := s.store.DeleteMetricAlertRule(id); err != nil {
|
||||||
|
mapStoreError(w, err, "metric alert rule")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateMetricAlertInput does boundary validation so we return a
|
||||||
|
// clear 400 before hitting the store. The store re-validates the same
|
||||||
|
// invariants as a backstop.
|
||||||
|
func validateMetricAlertInput(rule store.MetricAlertRule) string {
|
||||||
|
if strings.TrimSpace(rule.Name) == "" {
|
||||||
|
return "name is required"
|
||||||
|
}
|
||||||
|
switch rule.Metric {
|
||||||
|
case store.MetricCPUPercent, store.MetricMemoryPercent, store.MetricMemoryBytes:
|
||||||
|
default:
|
||||||
|
return "invalid metric: must be cpu_percent, memory_percent, or memory_bytes"
|
||||||
|
}
|
||||||
|
switch rule.Comparator {
|
||||||
|
case store.MetricComparatorGT, store.MetricComparatorLT:
|
||||||
|
default:
|
||||||
|
return "invalid comparator: must be gt or lt"
|
||||||
|
}
|
||||||
|
switch rule.Severity {
|
||||||
|
case store.LogScanSeverityInfo, store.LogScanSeverityWarn, store.LogScanSeverityError, "":
|
||||||
|
default:
|
||||||
|
return "invalid severity: must be info, warn, or error"
|
||||||
|
}
|
||||||
|
if rule.CooldownSeconds < 0 {
|
||||||
|
return "cooldown_seconds must be >= 0"
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// isMetricAlertValidationErr maps the store's validation errors to 400
|
||||||
|
// rather than 500 without leaking driver text.
|
||||||
|
func isMetricAlertValidationErr(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
msg := err.Error()
|
||||||
|
for _, needle := range []string{
|
||||||
|
"name is required",
|
||||||
|
"invalid metric",
|
||||||
|
"invalid comparator",
|
||||||
|
"invalid severity",
|
||||||
|
"cooldown_seconds must be",
|
||||||
|
} {
|
||||||
|
if strings.Contains(msg, needle) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMetricAlertRuleID(w http.ResponseWriter, r *http.Request) (int64, bool) {
|
||||||
|
raw := chi.URLParam(r, "id")
|
||||||
|
id, err := strconv.ParseInt(raw, 10, 64)
|
||||||
|
if err != nil || id <= 0 {
|
||||||
|
respondError(w, http.StatusBadRequest, "invalid rule id")
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return id, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func derefFloat64(p *float64) float64 {
|
||||||
|
if p == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return *p
|
||||||
|
}
|
||||||
+318
-7
@@ -1,14 +1,119 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"encoding/hex"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// requestIDKey is the context key under which the generated/forwarded
|
||||||
|
// X-Request-ID is stored. Exported indirectly via RequestIDFromContext
|
||||||
|
// so handlers and services downstream of the API layer can thread it
|
||||||
|
// into their own slog calls without re-extracting from headers.
|
||||||
|
type requestIDKeyType struct{}
|
||||||
|
|
||||||
|
var requestIDKey = requestIDKeyType{}
|
||||||
|
|
||||||
|
// RequestIDFromContext returns the correlation ID for the request, or
|
||||||
|
// "" when called outside the API request path.
|
||||||
|
func RequestIDFromContext(ctx context.Context) string {
|
||||||
|
if v, ok := ctx.Value(requestIDKey).(string); ok {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// requestID middleware ensures every request has a stable correlation
|
||||||
|
// ID. Honors a caller-supplied X-Request-ID when the request comes from
|
||||||
|
// a trusted proxy AND the value matches a safe character set; otherwise
|
||||||
|
// generates a fresh 128-bit ID. The ID is echoed back as X-Request-ID
|
||||||
|
// and stitched into every subsequent slog call via the context value
|
||||||
|
// the `logging` middleware reads.
|
||||||
|
//
|
||||||
|
// Format clamp: a compromised reverse proxy (or one that mis-parses an
|
||||||
|
// untrusted header) could forward an ID containing newlines, semicolons,
|
||||||
|
// or other separator characters. Those would corrupt structured log
|
||||||
|
// parsers that assume one record per line / key-value. Restricting to
|
||||||
|
// `[A-Za-z0-9._-]{1,64}` covers UUIDs, hex IDs, and trace-context IDs
|
||||||
|
// without any sharp edges.
|
||||||
|
func requestID(next http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
rid := r.Header.Get("X-Request-ID")
|
||||||
|
if rid == "" || !isTrustedPeer(r) || !isValidRequestID(rid) {
|
||||||
|
rid = newRequestID()
|
||||||
|
}
|
||||||
|
w.Header().Set("X-Request-ID", rid)
|
||||||
|
ctx := context.WithValue(r.Context(), requestIDKey, rid)
|
||||||
|
next.ServeHTTP(w, r.WithContext(ctx))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// isValidRequestID enforces `[A-Za-z0-9._-]{1,64}` without compiling a
|
||||||
|
// regex on the request path. Single linear scan, no allocations.
|
||||||
|
func isValidRequestID(s string) bool {
|
||||||
|
if len(s) == 0 || len(s) > 64 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
switch {
|
||||||
|
case c >= 'A' && c <= 'Z':
|
||||||
|
case c >= 'a' && c <= 'z':
|
||||||
|
case c >= '0' && c <= '9':
|
||||||
|
case c == '.' || c == '_' || c == '-':
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTrustedPeer is a thin wrapper around the TRUSTED_PROXY_CIDRS allow-
|
||||||
|
// list — we honor a forwarded request-id only from upstreams we already
|
||||||
|
// trust for X-Forwarded-For. Otherwise an internet client could spam
|
||||||
|
// log files with attacker-chosen IDs.
|
||||||
|
func isTrustedPeer(r *http.Request) bool {
|
||||||
|
peer := r.RemoteAddr
|
||||||
|
if host, _, err := net.SplitHostPort(peer); err == nil {
|
||||||
|
peer = host
|
||||||
|
}
|
||||||
|
if len(trustedProxyCIDRs) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ip := net.ParseIP(peer)
|
||||||
|
if ip == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, n := range trustedProxyCIDRs {
|
||||||
|
if n.Contains(ip) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRequestID() string {
|
||||||
|
var b [16]byte
|
||||||
|
if _, err := rand.Read(b[:]); err != nil {
|
||||||
|
// Fall back to time-based suffix if crypto/rand is unavailable
|
||||||
|
// — extremely unlikely outside of broken environments, but the
|
||||||
|
// ID is for tracing not security, so a deterministic fallback
|
||||||
|
// is preferable to a panic.
|
||||||
|
return "ts-" + time.Now().UTC().Format("20060102T150405.000000000")
|
||||||
|
}
|
||||||
|
return hex.EncodeToString(b[:])
|
||||||
|
}
|
||||||
|
|
||||||
// logging is an HTTP middleware that logs every request with method, path,
|
// logging is an HTTP middleware that logs every request with method, path,
|
||||||
// status code, and duration. Webhook URLs are redacted before being logged
|
// status code, and duration. Webhook URLs are redacted before being logged
|
||||||
// because the secret is the only authenticator — leaking it to log
|
// because the secret is the only authenticator — leaking it to log
|
||||||
@@ -20,15 +125,58 @@ func logging(next http.Handler) http.Handler {
|
|||||||
|
|
||||||
next.ServeHTTP(wrapped, r)
|
next.ServeHTTP(wrapped, r)
|
||||||
|
|
||||||
slog.Info("http request",
|
fields := []any{
|
||||||
"method", r.Method,
|
"method", r.Method,
|
||||||
"path", redactPath(r.URL.Path),
|
"path", redactPath(r.URL.Path),
|
||||||
"status", wrapped.status,
|
"status", wrapped.status,
|
||||||
"duration", time.Since(start).String(),
|
"duration", time.Since(start).String(),
|
||||||
)
|
}
|
||||||
|
if rq := redactQuery(r.URL.RawQuery); rq != "" {
|
||||||
|
fields = append(fields, "query", rq)
|
||||||
|
}
|
||||||
|
if rid := RequestIDFromContext(r.Context()); rid != "" {
|
||||||
|
fields = append(fields, "request_id", rid)
|
||||||
|
}
|
||||||
|
slog.Info("http request", fields...)
|
||||||
|
|
||||||
|
// Lightweight per-request counter. Bucket by status class so
|
||||||
|
// the cardinality stays at 5 × #methods regardless of how many
|
||||||
|
// distinct response codes we emit.
|
||||||
|
metrics.HTTPRequestsTotal.Inc(bucketMethod(r.Method), statusClass(wrapped.status))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bucketMethod normalises HTTP method names against the standard set
|
||||||
|
// so a malicious client cannot spam arbitrary method tokens (RFC 7230
|
||||||
|
// allows any token) and inflate the metrics map. Anything off the
|
||||||
|
// allow-list collapses to "other".
|
||||||
|
func bucketMethod(m string) string {
|
||||||
|
switch m {
|
||||||
|
case "GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS", "CONNECT", "TRACE":
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
return "other"
|
||||||
|
}
|
||||||
|
|
||||||
|
// statusClass buckets a status code into "1xx".."5xx" / "other". Keeps
|
||||||
|
// metrics cardinality bounded so a chatty endpoint can't explode the
|
||||||
|
// metrics map with one series per distinct response code.
|
||||||
|
func statusClass(code int) string {
|
||||||
|
switch {
|
||||||
|
case code >= 100 && code < 200:
|
||||||
|
return "1xx"
|
||||||
|
case code >= 200 && code < 300:
|
||||||
|
return "2xx"
|
||||||
|
case code >= 300 && code < 400:
|
||||||
|
return "3xx"
|
||||||
|
case code >= 400 && code < 500:
|
||||||
|
return "4xx"
|
||||||
|
case code >= 500 && code < 600:
|
||||||
|
return "5xx"
|
||||||
|
}
|
||||||
|
return "other"
|
||||||
|
}
|
||||||
|
|
||||||
// redactPath strips secrets from URL paths that carry them in segments.
|
// redactPath strips secrets from URL paths that carry them in segments.
|
||||||
// Only the canonical /api/webhook/triggers/{secret} surface remains after
|
// Only the canonical /api/webhook/triggers/{secret} surface remains after
|
||||||
// the hard cutover.
|
// the hard cutover.
|
||||||
@@ -40,6 +188,45 @@ func redactPath(path string) string {
|
|||||||
return path
|
return path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// redactQueryKeys is the case-insensitive set of query-parameter names whose
|
||||||
|
// values are masked before a URL lands in the request log. `token` is used by
|
||||||
|
// SSE/EventSource when a custom header can't be set; the rest are
|
||||||
|
// defence-in-depth against sensitive values ever appearing in a query string.
|
||||||
|
var redactQueryKeys = map[string]struct{}{
|
||||||
|
"token": {},
|
||||||
|
"secret": {},
|
||||||
|
"password": {},
|
||||||
|
"passwd": {},
|
||||||
|
"api_key": {},
|
||||||
|
"apikey": {},
|
||||||
|
"access_token": {},
|
||||||
|
"client_secret": {},
|
||||||
|
"sig": {},
|
||||||
|
"signature": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
// redactQuery masks the values of sensitive query parameters (see
|
||||||
|
// redactQueryKeys) in a URL's raw query before it lands in the request log.
|
||||||
|
// Key matching is case-insensitive. Returns the input unchanged when there is
|
||||||
|
// nothing to redact so a malformed URL surfaces naturally.
|
||||||
|
func redactQuery(rawQuery string) string {
|
||||||
|
if rawQuery == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
parts := strings.Split(rawQuery, "&")
|
||||||
|
for i, p := range parts {
|
||||||
|
eq := strings.IndexByte(p, '=')
|
||||||
|
if eq < 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key := strings.ToLower(p[:eq])
|
||||||
|
if _, ok := redactQueryKeys[key]; ok {
|
||||||
|
parts[i] = p[:eq+1] + "***"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(parts, "&")
|
||||||
|
}
|
||||||
|
|
||||||
// recovery is an HTTP middleware that catches panics and returns a 500 response.
|
// recovery is an HTTP middleware that catches panics and returns a 500 response.
|
||||||
func recovery(next http.Handler) http.Handler {
|
func recovery(next http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -54,16 +241,49 @@ func recovery(next http.Handler) http.Handler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// securityHeaders sets standard security headers on all responses.
|
// securityHeaders sets standard security headers on all responses.
|
||||||
|
//
|
||||||
|
// Strict-Transport-Security is emitted only when the request arrived
|
||||||
|
// over HTTPS (direct TLS or forwarded). Emitting HSTS over plain HTTP
|
||||||
|
// is harmless to compliant browsers but flags as an issue in scanners
|
||||||
|
// and confuses some reverse proxies.
|
||||||
|
//
|
||||||
|
// The CSP keeps `'unsafe-inline'` for now because SvelteKit injects
|
||||||
|
// inline boot scripts and styles; removing it requires a nonce-based
|
||||||
|
// strategy threaded through the SvelteKit handle hook. Tracked as a
|
||||||
|
// follow-up; documented in the security report.
|
||||||
func securityHeaders(next http.Handler) http.Handler {
|
func securityHeaders(next http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("X-Content-Type-Options", "nosniff")
|
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||||||
w.Header().Set("X-Frame-Options", "DENY")
|
w.Header().Set("X-Frame-Options", "DENY")
|
||||||
w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
|
w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
|
||||||
w.Header().Set("Content-Security-Policy", "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; connect-src 'self'; font-src 'self'")
|
w.Header().Set("Permissions-Policy", "camera=(), microphone=(), geolocation=(), payment=()")
|
||||||
|
w.Header().Set("Content-Security-Policy",
|
||||||
|
"default-src 'self'; "+
|
||||||
|
"script-src 'self' 'unsafe-inline'; "+
|
||||||
|
"style-src 'self' 'unsafe-inline'; "+
|
||||||
|
"img-src 'self' data:; "+
|
||||||
|
"connect-src 'self'; "+
|
||||||
|
"font-src 'self'; "+
|
||||||
|
"frame-ancestors 'none'; "+
|
||||||
|
"base-uri 'self'; "+
|
||||||
|
"form-action 'self'")
|
||||||
|
if isHTTPS(r) {
|
||||||
|
w.Header().Set("Strict-Transport-Security", "max-age=31536000; includeSubDomains")
|
||||||
|
}
|
||||||
next.ServeHTTP(w, r)
|
next.ServeHTTP(w, r)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isHTTPS(r *http.Request) bool {
|
||||||
|
if r.TLS != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if r.Header.Get("X-Forwarded-Proto") == "https" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// cors is an HTTP middleware that handles CORS for same-origin requests.
|
// cors is an HTTP middleware that handles CORS for same-origin requests.
|
||||||
// The frontend is served from the same origin, so cross-origin requests are not expected.
|
// The frontend is served from the same origin, so cross-origin requests are not expected.
|
||||||
func cors(next http.Handler) http.Handler {
|
func cors(next http.Handler) http.Handler {
|
||||||
@@ -164,10 +384,7 @@ func jsonContentType(next http.Handler) http.Handler {
|
|||||||
func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
|
func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
|
||||||
return func(next http.Handler) http.Handler {
|
return func(next http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
ip := r.RemoteAddr
|
ip := clientIP(r)
|
||||||
if fwd := r.Header.Get("X-Forwarded-For"); fwd != "" {
|
|
||||||
ip = fwd
|
|
||||||
}
|
|
||||||
if !rl.allow(ip) {
|
if !rl.allow(ip) {
|
||||||
respondError(w, http.StatusTooManyRequests, "rate limit exceeded")
|
respondError(w, http.StatusTooManyRequests, "rate limit exceeded")
|
||||||
return
|
return
|
||||||
@@ -177,6 +394,100 @@ func rateLimitMiddleware(rl *rateLimiter) func(http.Handler) http.Handler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trustedProxyCIDRs is the parsed allow-list of upstream proxy networks
|
||||||
|
// whose X-Forwarded-For header we honor. Set TRUSTED_PROXY_CIDRS to a
|
||||||
|
// comma-separated list of CIDRs (e.g. "127.0.0.1/32,10.0.0.0/8") to
|
||||||
|
// enable. When unset (the default) X-Forwarded-For is ignored entirely
|
||||||
|
// and rate limiting + audit logging use r.RemoteAddr — preventing a
|
||||||
|
// remote attacker from spoofing the header to bypass per-IP limiters.
|
||||||
|
var trustedProxyCIDRs = parseTrustedProxyCIDRs(os.Getenv("TRUSTED_PROXY_CIDRS"))
|
||||||
|
|
||||||
|
func parseTrustedProxyCIDRs(raw string) []*net.IPNet {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var nets []*net.IPNet
|
||||||
|
for _, p := range strings.Split(raw, ",") {
|
||||||
|
p = strings.TrimSpace(p)
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Allow bare IPs as /32 (IPv4) or /128 (IPv6).
|
||||||
|
if !strings.Contains(p, "/") {
|
||||||
|
if ip := net.ParseIP(p); ip != nil {
|
||||||
|
if ip.To4() != nil {
|
||||||
|
p += "/32"
|
||||||
|
} else {
|
||||||
|
p += "/128"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_, n, err := net.ParseCIDR(p)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("ignoring invalid TRUSTED_PROXY_CIDRS entry", "value", p, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nets = append(nets, n)
|
||||||
|
}
|
||||||
|
return nets
|
||||||
|
}
|
||||||
|
|
||||||
|
// clientIP returns the per-request "client" address used for rate-limit
|
||||||
|
// keying and audit attribution. X-Forwarded-For is honored ONLY when the
|
||||||
|
// direct peer (r.RemoteAddr) belongs to a configured trusted-proxy CIDR;
|
||||||
|
// otherwise the header is ignored to prevent header-spoofing bypasses.
|
||||||
|
func clientIP(r *http.Request) string {
|
||||||
|
peer := r.RemoteAddr
|
||||||
|
if host, _, err := net.SplitHostPort(peer); err == nil {
|
||||||
|
peer = host
|
||||||
|
}
|
||||||
|
if len(trustedProxyCIDRs) == 0 {
|
||||||
|
return peer
|
||||||
|
}
|
||||||
|
peerIP := net.ParseIP(peer)
|
||||||
|
if peerIP == nil || !isTrustedProxy(peerIP) {
|
||||||
|
return peer
|
||||||
|
}
|
||||||
|
fwd := r.Header.Get("X-Forwarded-For")
|
||||||
|
if fwd == "" {
|
||||||
|
return peer
|
||||||
|
}
|
||||||
|
// Walk X-Forwarded-For from the RIGHTMOST entry (the address closest to
|
||||||
|
// us, appended by our trusted peer) leftward, skipping entries that are
|
||||||
|
// themselves trusted proxies, and return the first untrusted address.
|
||||||
|
// The LEFTMOST entry is fully client-controlled — trusting it (as a
|
||||||
|
// naive `fwd[:firstComma]` does) lets an attacker spoof their rate-limit
|
||||||
|
// and audit identity by prepending a forged value, defeating the per-IP
|
||||||
|
// login limiter.
|
||||||
|
parts := strings.Split(fwd, ",")
|
||||||
|
for i := len(parts) - 1; i >= 0; i-- {
|
||||||
|
candidate := strings.TrimSpace(parts[i])
|
||||||
|
ip := net.ParseIP(candidate)
|
||||||
|
if ip == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if isTrustedProxy(ip) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return candidate
|
||||||
|
}
|
||||||
|
// Every forwarded entry was a trusted proxy (or unparseable) — fall back
|
||||||
|
// to the direct peer.
|
||||||
|
return peer
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTrustedProxy reports whether ip falls within a configured
|
||||||
|
// trusted-proxy CIDR.
|
||||||
|
func isTrustedProxy(ip net.IP) bool {
|
||||||
|
for _, n := range trustedProxyCIDRs {
|
||||||
|
if n.Contains(ip) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// statusRecorder wraps http.ResponseWriter to capture the status code.
|
// statusRecorder wraps http.ResponseWriter to capture the status code.
|
||||||
type statusRecorder struct {
|
type statusRecorder struct {
|
||||||
http.ResponseWriter
|
http.ResponseWriter
|
||||||
|
|||||||
+115
-8
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
@@ -18,6 +19,7 @@ import (
|
|||||||
"github.com/alexei/tinyforge/internal/proxy"
|
"github.com/alexei/tinyforge/internal/proxy"
|
||||||
"github.com/alexei/tinyforge/internal/stale"
|
"github.com/alexei/tinyforge/internal/stale"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/volsnap"
|
||||||
"github.com/alexei/tinyforge/internal/webhook"
|
"github.com/alexei/tinyforge/internal/webhook"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
)
|
)
|
||||||
@@ -55,12 +57,20 @@ type Server struct {
|
|||||||
onDNSProviderChanged DNSProviderChangedFunc
|
onDNSProviderChanged DNSProviderChangedFunc
|
||||||
|
|
||||||
backupEngine *backup.Engine
|
backupEngine *backup.Engine
|
||||||
|
snapshotEngine *volsnap.Engine
|
||||||
sseGate *sseGate
|
sseGate *sseGate
|
||||||
logScanReloader LogScanReloader
|
logScanReloader LogScanReloader
|
||||||
dbPath string
|
dbPath string
|
||||||
shutdownFunc func() // called after restore to trigger graceful shutdown
|
shutdownFunc func() // called after restore to trigger graceful shutdown
|
||||||
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
|
onBackupSettingsChanged func(enabled bool, intervalHours int) // called when backup settings change
|
||||||
onProxyProviderChanged func(provider proxy.Provider) // called when proxy provider changes
|
onProxyProviderChanged func(provider proxy.Provider) // called when proxy provider changes
|
||||||
|
|
||||||
|
// restoreInFlight is a process-wide guard against double-firing
|
||||||
|
// the restore endpoint. A rapid double-click would otherwise
|
||||||
|
// schedule two goroutines racing s.store.Close() and the
|
||||||
|
// candidate-over-live rename. CAS to true at the entry point;
|
||||||
|
// reject the second caller with 409 Conflict.
|
||||||
|
restoreInFlight atomic.Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer creates a new API Server with all required dependencies.
|
// NewServer creates a new API Server with all required dependencies.
|
||||||
@@ -111,6 +121,11 @@ func (s *Server) SetBackupEngine(engine *backup.Engine) {
|
|||||||
s.backupEngine = engine
|
s.backupEngine = engine
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetSnapshotEngine sets the volume-snapshot engine on the server.
|
||||||
|
func (s *Server) SetSnapshotEngine(engine *volsnap.Engine) {
|
||||||
|
s.snapshotEngine = engine
|
||||||
|
}
|
||||||
|
|
||||||
// SetDBPath sets the database file path (needed for restore).
|
// SetDBPath sets the database file path (needed for restore).
|
||||||
func (s *Server) SetDBPath(path string) {
|
func (s *Server) SetDBPath(path string) {
|
||||||
s.dbPath = path
|
s.dbPath = path
|
||||||
@@ -157,13 +172,32 @@ func (s *Server) SetDNSProviderChangedCallback(fn DNSProviderChangedFunc) {
|
|||||||
|
|
||||||
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
|
// initOIDCProvider creates an OIDC provider from settings. Errors are logged, not fatal.
|
||||||
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
||||||
// Decrypt the OIDC client secret if it's encrypted.
|
// Decrypt the OIDC client secret. The prior code did a try-decrypt
|
||||||
|
// and silently treated failures as plaintext — under a rotated key
|
||||||
|
// that sent ciphertext upstream to the OP. Now:
|
||||||
|
// - If the value carries the tf1: envelope → fail loud on
|
||||||
|
// decrypt failure (rotated key / corrupted ciphertext).
|
||||||
|
// - If the value is unprefixed (legacy ciphertext from v0 or true
|
||||||
|
// plaintext from an old migration) → try decrypt; on failure
|
||||||
|
// accept as plaintext (the only safe legacy interpretation).
|
||||||
clientSecret := as.OIDCClientSecret
|
clientSecret := as.OIDCClientSecret
|
||||||
if clientSecret != "" {
|
if clientSecret != "" {
|
||||||
|
switch {
|
||||||
|
case crypto.HasEnvelope(clientSecret):
|
||||||
|
decrypted, err := crypto.Decrypt(s.encKey, clientSecret)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("OIDC client secret could not be decrypted — refusing to initialize provider",
|
||||||
|
"error", err,
|
||||||
|
"hint", "rotate ENCRYPTION_KEY back, OR re-save OIDC settings to re-encrypt with the current key")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clientSecret = decrypted
|
||||||
|
default:
|
||||||
|
// Legacy v0 value: try decrypt; on failure assume plaintext.
|
||||||
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
|
if decrypted, err := crypto.Decrypt(s.encKey, clientSecret); err == nil {
|
||||||
clientSecret = decrypted
|
clientSecret = decrypted
|
||||||
}
|
}
|
||||||
// If decrypt fails, assume it's already plaintext (migration scenario).
|
}
|
||||||
}
|
}
|
||||||
provider, err := auth.NewOIDCProvider(ctx, auth.OIDCConfig{
|
provider, err := auth.NewOIDCProvider(ctx, auth.OIDCConfig{
|
||||||
IssuerURL: as.OIDCIssuerURL,
|
IssuerURL: as.OIDCIssuerURL,
|
||||||
@@ -183,12 +217,29 @@ func (s *Server) initOIDCProvider(ctx context.Context, as store.AuthSettings) {
|
|||||||
func (s *Server) Router() chi.Router {
|
func (s *Server) Router() chi.Router {
|
||||||
r := chi.NewRouter()
|
r := chi.NewRouter()
|
||||||
|
|
||||||
// Global middleware.
|
// Global middleware. requestID runs first so every downstream log
|
||||||
|
// line (and the access log emitted by `logging`) carries the same
|
||||||
|
// correlation id, plus the response carries it back on the
|
||||||
|
// X-Request-ID header for the operator to grep across services.
|
||||||
|
r.Use(requestID)
|
||||||
r.Use(recovery)
|
r.Use(recovery)
|
||||||
r.Use(securityHeaders)
|
r.Use(securityHeaders)
|
||||||
r.Use(logging)
|
r.Use(logging)
|
||||||
r.Use(cors)
|
r.Use(cors)
|
||||||
|
|
||||||
|
// Unauthenticated health probes — mounted at the root so container
|
||||||
|
// orchestrators / load balancers can hit them without knowing about
|
||||||
|
// the /api prefix. /livez intentionally does no work and stays
|
||||||
|
// unbounded; /readyz pings the DB and is rate-limited to keep an
|
||||||
|
// unauthenticated flood from serialising behind SQLite's single
|
||||||
|
// writer connection (busy-timeout = 5s) and log-amplifying every
|
||||||
|
// request via the structured access log. The 10-per-minute budget
|
||||||
|
// is the existing rateLimiter default — generous for k8s readiness
|
||||||
|
// probes (typically every 5-10s), restrictive for an attacker.
|
||||||
|
r.Get("/livez", s.livez)
|
||||||
|
readyLimiter := newRateLimiter()
|
||||||
|
r.With(rateLimitMiddleware(readyLimiter)).Get("/readyz", s.readyz)
|
||||||
|
|
||||||
loginLimiter := newRateLimiter()
|
loginLimiter := newRateLimiter()
|
||||||
webhookLimiter := newRateLimiter()
|
webhookLimiter := newRateLimiter()
|
||||||
|
|
||||||
@@ -232,6 +283,7 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.Post("/discovery/git/branches", s.listGitBranches)
|
r.Post("/discovery/git/branches", s.listGitBranches)
|
||||||
r.Post("/discovery/git/tree", s.listGitTree)
|
r.Post("/discovery/git/tree", s.listGitTree)
|
||||||
r.Get("/discovery/image/conflicts", s.listImageConflicts)
|
r.Get("/discovery/image/conflicts", s.listImageConflicts)
|
||||||
|
r.Post("/discovery/image/inspect", s.inspectImageMetadata)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Read-only endpoints (any authenticated user).
|
// Read-only endpoints (any authenticated user).
|
||||||
@@ -245,17 +297,19 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.Get("/events/log/stats", s.getEventLogStats)
|
r.Get("/events/log/stats", s.getEventLogStats)
|
||||||
r.Get("/registries", s.listRegistries)
|
r.Get("/registries", s.listRegistries)
|
||||||
r.Route("/registries/{id}", func(r chi.Router) {
|
r.Route("/registries/{id}", func(r chi.Router) {
|
||||||
|
// All registry probes are admin-gated. The /tags and
|
||||||
|
// /images endpoints used to be open to any authenticated
|
||||||
|
// user, but they make outbound requests using the
|
||||||
|
// admin-encrypted registry token — a viewer could
|
||||||
|
// effectively drive arbitrary requests against a private
|
||||||
|
// registry under admin credentials.
|
||||||
|
r.Use(auth.AdminOnly)
|
||||||
r.Get("/tags/*", s.listRegistryTags)
|
r.Get("/tags/*", s.listRegistryTags)
|
||||||
r.Get("/images", s.listRegistryImages)
|
r.Get("/images", s.listRegistryImages)
|
||||||
|
|
||||||
// Admin-only registry mutations.
|
|
||||||
r.Group(func(r chi.Router) {
|
|
||||||
r.Use(auth.AdminOnly)
|
|
||||||
r.Put("/", s.updateRegistry)
|
r.Put("/", s.updateRegistry)
|
||||||
r.Delete("/", s.deleteRegistry)
|
r.Delete("/", s.deleteRegistry)
|
||||||
r.Post("/test", s.testRegistry)
|
r.Post("/test", s.testRegistry)
|
||||||
})
|
})
|
||||||
})
|
|
||||||
r.Get("/settings", s.getSettings)
|
r.Get("/settings", s.getSettings)
|
||||||
r.Get("/settings/npm-certificates", s.listNpmCertificates)
|
r.Get("/settings/npm-certificates", s.listNpmCertificates)
|
||||||
r.Get("/settings/npm-access-lists", s.listNpmAccessLists)
|
r.Get("/settings/npm-access-lists", s.listNpmAccessLists)
|
||||||
@@ -282,11 +336,23 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.With(auth.AdminOnly).Post("/start", s.startPluginWorkload)
|
r.With(auth.AdminOnly).Post("/start", s.startPluginWorkload)
|
||||||
r.With(auth.AdminOnly).Delete("/", s.deletePluginWorkload)
|
r.With(auth.AdminOnly).Delete("/", s.deletePluginWorkload)
|
||||||
|
|
||||||
|
// Volume snapshots (admin-only). Capture/list a workload's
|
||||||
|
// host-bind data volumes; {sid}-scoped download/delete live
|
||||||
|
// in the global admin group alongside backups.
|
||||||
|
r.With(auth.AdminOnly).Get("/snapshots", s.listWorkloadSnapshots)
|
||||||
|
r.With(auth.AdminOnly).Get("/snapshotable", s.getWorkloadSnapshotable)
|
||||||
|
r.With(auth.AdminOnly).Post("/snapshots", s.createWorkloadSnapshot)
|
||||||
|
|
||||||
// Runtime view: per-source persisted state + storage usage.
|
// Runtime view: per-source persisted state + storage usage.
|
||||||
// Read-only; safe for any authenticated user.
|
// Read-only; safe for any authenticated user.
|
||||||
r.Get("/runtime-state", s.getWorkloadRuntimeState)
|
r.Get("/runtime-state", s.getWorkloadRuntimeState)
|
||||||
r.Get("/storage", s.getWorkloadStorage)
|
r.Get("/storage", s.getWorkloadStorage)
|
||||||
|
|
||||||
|
// Per-workload activity / deploy timeline (read-only). Scoped
|
||||||
|
// to this workload's event-log rows; the global feed lives at
|
||||||
|
// /events/log.
|
||||||
|
r.Get("/events", s.listWorkloadEvents)
|
||||||
|
|
||||||
// Per-workload env vars. Listing open to authenticated readers;
|
// Per-workload env vars. Listing open to authenticated readers;
|
||||||
// mutations admin-gated. Encrypted values are write-only after store.
|
// mutations admin-gated. Encrypted values are write-only after store.
|
||||||
r.Get("/env", s.listWorkloadEnv)
|
r.Get("/env", s.listWorkloadEnv)
|
||||||
@@ -312,6 +378,15 @@ func (s *Server) Router() chi.Router {
|
|||||||
// of /triggers/{id}/bindings keyed on the workload side.
|
// of /triggers/{id}/bindings keyed on the workload side.
|
||||||
r.Get("/triggers", s.listBindingsForWorkload)
|
r.Get("/triggers", s.listBindingsForWorkload)
|
||||||
r.With(auth.AdminOnly).Post("/triggers", s.bindTriggerToWorkload)
|
r.With(auth.AdminOnly).Post("/triggers", s.bindTriggerToWorkload)
|
||||||
|
|
||||||
|
// Per-workload notification routes — multi-destination
|
||||||
|
// fan-out (Slack channel + Discord webhook + ...). When
|
||||||
|
// zero rows are configured the dispatcher falls back to
|
||||||
|
// the legacy single-URL columns on the workload row.
|
||||||
|
r.Get("/notifications", s.listWorkloadNotifications)
|
||||||
|
r.With(auth.AdminOnly).Post("/notifications", s.createWorkloadNotification)
|
||||||
|
r.With(auth.AdminOnly).Put("/notifications/{nid}", s.updateWorkloadNotification)
|
||||||
|
r.With(auth.AdminOnly).Delete("/notifications/{nid}", s.deleteWorkloadNotification)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Global container index, joined to workload + app names.
|
// Global container index, joined to workload + app names.
|
||||||
@@ -370,6 +445,26 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.Post("/log-scan-rules/{id}/test", s.testLogScanRule)
|
r.Post("/log-scan-rules/{id}/test", s.testLogScanRule)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Metric-alert rules.
|
||||||
|
r.Get("/metric-alert-rules", s.listMetricAlertRules)
|
||||||
|
r.Get("/metric-alert-rules/{id}", s.getMetricAlertRule)
|
||||||
|
r.Group(func(r chi.Router) {
|
||||||
|
r.Use(auth.AdminOnly)
|
||||||
|
r.Post("/metric-alert-rules", s.createMetricAlertRule)
|
||||||
|
r.Patch("/metric-alert-rules/{id}", s.updateMetricAlertRule)
|
||||||
|
r.Delete("/metric-alert-rules/{id}", s.deleteMetricAlertRule)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Shared secrets (env vars shared across workloads by scope).
|
||||||
|
r.Get("/shared-secrets", s.listSharedSecrets)
|
||||||
|
r.Get("/shared-secrets/{id}", s.getSharedSecret)
|
||||||
|
r.Group(func(r chi.Router) {
|
||||||
|
r.Use(auth.AdminOnly)
|
||||||
|
r.Post("/shared-secrets", s.createSharedSecret)
|
||||||
|
r.Patch("/shared-secrets/{id}", s.updateSharedSecret)
|
||||||
|
r.Delete("/shared-secrets/{id}", s.deleteSharedSecret)
|
||||||
|
})
|
||||||
|
|
||||||
// System resources (read-only).
|
// System resources (read-only).
|
||||||
r.Get("/system/stats", s.getSystemStats)
|
r.Get("/system/stats", s.getSystemStats)
|
||||||
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
r.Get("/system/stats/history", s.getSystemStatsHistory)
|
||||||
@@ -379,6 +474,12 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.Group(func(r chi.Router) {
|
r.Group(func(r chi.Router) {
|
||||||
r.Use(auth.AdminOnly)
|
r.Use(auth.AdminOnly)
|
||||||
|
|
||||||
|
// Prometheus-format metrics export. Admin-only so the
|
||||||
|
// counter cardinality cannot be enumerated by a low-trust
|
||||||
|
// viewer to map internal endpoints / sources / outcomes.
|
||||||
|
// Scrape with bearer auth from your Prometheus job.
|
||||||
|
r.Get("/metrics", s.metricsExport)
|
||||||
|
|
||||||
// Config export (reveals registry/global details).
|
// Config export (reveals registry/global details).
|
||||||
r.Get("/config/export", s.exportConfig)
|
r.Get("/config/export", s.exportConfig)
|
||||||
|
|
||||||
@@ -414,6 +515,7 @@ func (s *Server) Router() chi.Router {
|
|||||||
|
|
||||||
// Docker management.
|
// Docker management.
|
||||||
r.Post("/docker/prune-images", s.pruneImages)
|
r.Post("/docker/prune-images", s.pruneImages)
|
||||||
|
r.Post("/docker/prune-build-cache", s.pruneBuildCache)
|
||||||
|
|
||||||
// NPM connection test.
|
// NPM connection test.
|
||||||
r.Post("/settings/npm/test", s.testNpmConnection)
|
r.Post("/settings/npm/test", s.testNpmConnection)
|
||||||
@@ -431,6 +533,11 @@ func (s *Server) Router() chi.Router {
|
|||||||
r.Get("/backups/{id}/download", s.downloadBackup)
|
r.Get("/backups/{id}/download", s.downloadBackup)
|
||||||
r.Delete("/backups/{id}", s.deleteBackup)
|
r.Delete("/backups/{id}", s.deleteBackup)
|
||||||
r.Post("/backups/{id}/restore", s.restoreBackup)
|
r.Post("/backups/{id}/restore", s.restoreBackup)
|
||||||
|
|
||||||
|
// Volume-snapshot download/delete (workload-scoped capture +
|
||||||
|
// list live under /workloads/{id}/snapshots).
|
||||||
|
r.Get("/snapshots/{sid}/download", s.downloadSnapshot)
|
||||||
|
r.Delete("/snapshots/{sid}", s.deleteSnapshot)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -0,0 +1,272 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/crypto"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// sharedSecretRow is the JSON shape returned to clients. The secret value is
|
||||||
|
// NEVER returned — once stored it is write-only (mirroring workload_env). The
|
||||||
|
// has_value flag lets the UI show whether a value is set without exposing it;
|
||||||
|
// to rotate, the operator submits a new value.
|
||||||
|
type sharedSecretRow struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
HasValue bool `json:"has_value"`
|
||||||
|
Encrypted bool `json:"encrypted"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
AppID string `json:"app_id"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
UpdatedAt string `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func toSharedSecretRow(sec store.SharedSecret) sharedSecretRow {
|
||||||
|
return sharedSecretRow{
|
||||||
|
ID: sec.ID,
|
||||||
|
Name: sec.Name,
|
||||||
|
HasValue: sec.Value != "",
|
||||||
|
Encrypted: sec.Encrypted,
|
||||||
|
Scope: sec.Scope,
|
||||||
|
AppID: sec.AppID,
|
||||||
|
Description: sec.Description,
|
||||||
|
Enabled: sec.Enabled,
|
||||||
|
CreatedAt: sec.CreatedAt,
|
||||||
|
UpdatedAt: sec.UpdatedAt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// listSharedSecrets handles GET /api/shared-secrets. Values are redacted.
|
||||||
|
func (s *Server) listSharedSecrets(w http.ResponseWriter, r *http.Request) {
|
||||||
|
rows, err := s.store.ListSharedSecrets()
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "list shared secrets")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out := make([]sharedSecretRow, 0, len(rows))
|
||||||
|
for _, sec := range rows {
|
||||||
|
out = append(out, toSharedSecretRow(sec))
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getSharedSecret handles GET /api/shared-secrets/{id}. Value is redacted.
|
||||||
|
func (s *Server) getSharedSecret(w http.ResponseWriter, r *http.Request) {
|
||||||
|
sec, err := s.store.GetSharedSecret(chi.URLParam(r, "id"))
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, toSharedSecretRow(sec))
|
||||||
|
}
|
||||||
|
|
||||||
|
// createSharedSecretRequest is the POST body. Encrypted=true (the default for
|
||||||
|
// a non-empty value) causes the value to be encrypted at rest with the global
|
||||||
|
// key before it ever reaches the store.
|
||||||
|
type createSharedSecretRequest struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
Encrypted *bool `json:"encrypted"` // defaults true
|
||||||
|
Scope string `json:"scope"` // global | app
|
||||||
|
AppID string `json:"app_id"` // required when scope == app
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enabled *bool `json:"enabled"` // defaults true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) createSharedSecret(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req createSharedSecretRequest
|
||||||
|
if !decodeJSONStrict(w, r, &req) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.Name = strings.TrimSpace(req.Name)
|
||||||
|
if !validEnvKey(req.Name) {
|
||||||
|
respondError(w, http.StatusBadRequest, "name must be a valid env key [A-Za-z_][A-Za-z0-9_]*")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if msg := validateSharedSecretScope(req.Scope, req.AppID); msg != "" {
|
||||||
|
respondError(w, http.StatusBadRequest, msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
encrypted := true
|
||||||
|
if req.Encrypted != nil {
|
||||||
|
encrypted = *req.Encrypted
|
||||||
|
}
|
||||||
|
enabled := true
|
||||||
|
if req.Enabled != nil {
|
||||||
|
enabled = *req.Enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
value, err := s.encryptSecretValue(req.Value, encrypted)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "encrypt value")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
sec, err := s.store.CreateSharedSecret(store.SharedSecret{
|
||||||
|
Name: req.Name,
|
||||||
|
Value: value,
|
||||||
|
Encrypted: encrypted,
|
||||||
|
Scope: req.Scope,
|
||||||
|
AppID: strings.TrimSpace(req.AppID),
|
||||||
|
Description: req.Description,
|
||||||
|
Enabled: enabled,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrUnique) {
|
||||||
|
respondError(w, http.StatusConflict, "a shared secret with this scope and name already exists")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "create shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusCreated, toSharedSecretRow(sec))
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateSharedSecretRequest is the PATCH body. Every field is optional; nil
|
||||||
|
// means "leave unchanged". A nil Value preserves the stored ciphertext (so a
|
||||||
|
// metadata-only edit can't accidentally blank a secret); a non-nil Value
|
||||||
|
// rotates it (re-encrypted under the effective Encrypted flag).
|
||||||
|
type updateSharedSecretRequest struct {
|
||||||
|
Name *string `json:"name"`
|
||||||
|
Value *string `json:"value"`
|
||||||
|
Encrypted *bool `json:"encrypted"`
|
||||||
|
Scope *string `json:"scope"`
|
||||||
|
AppID *string `json:"app_id"`
|
||||||
|
Description *string `json:"description"`
|
||||||
|
Enabled *bool `json:"enabled"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) updateSharedSecret(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
existing, err := s.store.GetSharedSecret(id)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req updateSharedSecretRequest
|
||||||
|
if !decodeJSONStrict(w, r, &req) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := existing
|
||||||
|
if req.Name != nil {
|
||||||
|
merged.Name = strings.TrimSpace(*req.Name)
|
||||||
|
if !validEnvKey(merged.Name) {
|
||||||
|
respondError(w, http.StatusBadRequest, "name must be a valid env key [A-Za-z_][A-Za-z0-9_]*")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if req.Encrypted != nil {
|
||||||
|
merged.Encrypted = *req.Encrypted
|
||||||
|
}
|
||||||
|
if req.Scope != nil {
|
||||||
|
merged.Scope = *req.Scope
|
||||||
|
}
|
||||||
|
if req.AppID != nil {
|
||||||
|
merged.AppID = strings.TrimSpace(*req.AppID)
|
||||||
|
}
|
||||||
|
if req.Description != nil {
|
||||||
|
merged.Description = *req.Description
|
||||||
|
}
|
||||||
|
if req.Enabled != nil {
|
||||||
|
merged.Enabled = *req.Enabled
|
||||||
|
}
|
||||||
|
if msg := validateSharedSecretScope(merged.Scope, merged.AppID); msg != "" {
|
||||||
|
respondError(w, http.StatusBadRequest, msg)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value handling: only (re)encrypt when the caller supplied a new value.
|
||||||
|
// Otherwise keep the stored ciphertext untouched — but if the Encrypted
|
||||||
|
// flag flipped without a new value we cannot transcode the opaque stored
|
||||||
|
// bytes, so reject that ambiguous request rather than corrupting the row.
|
||||||
|
if req.Value != nil {
|
||||||
|
v, encErr := s.encryptSecretValue(*req.Value, merged.Encrypted)
|
||||||
|
if encErr != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "encrypt value")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
merged.Value = v
|
||||||
|
} else if req.Encrypted != nil && *req.Encrypted != existing.Encrypted {
|
||||||
|
respondError(w, http.StatusBadRequest, "changing 'encrypted' requires resubmitting 'value'")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
sec, err := s.store.UpdateSharedSecret(merged)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if errors.Is(err, store.ErrUnique) {
|
||||||
|
respondError(w, http.StatusConflict, "a shared secret with this scope and name already exists")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "update shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, toSharedSecretRow(sec))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) deleteSharedSecret(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
if err := s.store.DeleteSharedSecret(id); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "delete shared secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, map[string]string{"deleted": id})
|
||||||
|
}
|
||||||
|
|
||||||
|
// encryptSecretValue encrypts value with the global key when encrypted is set
|
||||||
|
// and the value is non-empty; otherwise it returns the value unchanged. An
|
||||||
|
// empty value stays empty (no value set) regardless of the flag.
|
||||||
|
func (s *Server) encryptSecretValue(value string, encrypted bool) (string, error) {
|
||||||
|
if !encrypted || value == "" {
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
enc, err := crypto.Encrypt(s.encKey, value)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("encrypt shared secret value", "error", err)
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return enc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateSharedSecretScope returns a non-empty 400 message when the scope /
|
||||||
|
// app_id pairing is invalid; "" when valid. Mirrors the store-side invariant
|
||||||
|
// so the API rejects with a clear message before hitting the store.
|
||||||
|
func validateSharedSecretScope(scope, appID string) string {
|
||||||
|
switch scope {
|
||||||
|
case store.SharedSecretScopeGlobal:
|
||||||
|
return ""
|
||||||
|
case store.SharedSecretScopeApp:
|
||||||
|
if strings.TrimSpace(appID) == "" {
|
||||||
|
return "app_id is required when scope is 'app'"
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
default:
|
||||||
|
return "scope must be 'global' or 'app'"
|
||||||
|
}
|
||||||
|
}
|
||||||
+19
-2
@@ -32,9 +32,26 @@ func (s *Server) streamEvents(w http.ResponseWriter, r *http.Request) {
|
|||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
flusher.Flush()
|
flusher.Flush()
|
||||||
|
|
||||||
// Subscribe to instance status, deploy status, and persistent event log events.
|
// Build logs are high-volume: a single verbose `docker build` can emit
|
||||||
|
// thousands of lines. Streaming them to EVERY connection would flood each
|
||||||
|
// subscriber's bounded bus buffer and evict status/log events for ALL
|
||||||
|
// clients. So build logs are delivered ONLY to connections that opt in
|
||||||
|
// with ?workload_id=<id>, and only for that workload. Connections without
|
||||||
|
// the param (e.g. the global dashboard) never receive build-log frames.
|
||||||
|
buildLogWorkloadID := r.URL.Query().Get("workload_id")
|
||||||
sub := s.eventBus.Subscribe(func(evt events.Event) bool {
|
sub := s.eventBus.Subscribe(func(evt events.Event) bool {
|
||||||
return evt.Type == events.EventInstanceStatus || evt.Type == events.EventDeployStatus || evt.Type == events.EventLog
|
switch evt.Type {
|
||||||
|
case events.EventInstanceStatus, events.EventDeployStatus, events.EventLog:
|
||||||
|
return true
|
||||||
|
case events.EventBuildLog:
|
||||||
|
if buildLogWorkloadID == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
p, ok := evt.Payload.(events.BuildLogPayload)
|
||||||
|
return ok && p.WorkloadID == buildLogWorkloadID
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
})
|
})
|
||||||
defer s.eventBus.Unsubscribe(sub)
|
defer s.eventBus.Unsubscribe(sub)
|
||||||
|
|
||||||
|
|||||||
@@ -89,12 +89,16 @@ func toTriggerViewWithCount(row store.TriggerWithBindingCount) triggerView {
|
|||||||
// triggerRequest is the create/update body. Config is opaque per kind.
|
// triggerRequest is the create/update body. Config is opaque per kind.
|
||||||
// Auto-generates a webhook secret on create when WebhookEnabled is true;
|
// Auto-generates a webhook secret on create when WebhookEnabled is true;
|
||||||
// the secret is exposed only via the /webhook subresource.
|
// the secret is exposed only via the /webhook subresource.
|
||||||
|
//
|
||||||
|
// WebhookRequireSignature is a *bool so we can distinguish "field omitted
|
||||||
|
// by client" (nil → apply secure default of true when webhook is enabled)
|
||||||
|
// from an explicit opt-out (false → respected).
|
||||||
type triggerRequest struct {
|
type triggerRequest struct {
|
||||||
Kind string `json:"kind"`
|
Kind string `json:"kind"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Config json.RawMessage `json:"config"`
|
Config json.RawMessage `json:"config"`
|
||||||
WebhookEnabled bool `json:"webhook_enabled"`
|
WebhookEnabled bool `json:"webhook_enabled"`
|
||||||
WebhookRequireSignature bool `json:"webhook_require_signature"`
|
WebhookRequireSignature *bool `json:"webhook_require_signature,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same per-blob caps used on the workload pluginWorkloadRequest path —
|
// Same per-blob caps used on the workload pluginWorkloadRequest path —
|
||||||
@@ -134,12 +138,26 @@ func (s *Server) getTrigger(w http.ResponseWriter, r *http.Request) {
|
|||||||
// buildTriggerFromRequest assembles a store.Trigger ready for insert.
|
// buildTriggerFromRequest assembles a store.Trigger ready for insert.
|
||||||
// Centralized so the standalone create endpoint and the inline-bind
|
// Centralized so the standalone create endpoint and the inline-bind
|
||||||
// endpoint cannot drift on secret-generation defaults.
|
// endpoint cannot drift on secret-generation defaults.
|
||||||
|
//
|
||||||
|
// SECURITY: a new trigger with webhook enabled defaults to require_signature
|
||||||
|
// = true. Operators can opt out at create time for receivers that do not
|
||||||
|
// support HMAC, but the safer default avoids the "freshly-created trigger
|
||||||
|
// accepts unsigned posts to its URL" footgun.
|
||||||
func buildTriggerFromRequest(req triggerRequest) store.Trigger {
|
func buildTriggerFromRequest(req triggerRequest) store.Trigger {
|
||||||
|
// Secure default: if webhook is enabled and the operator did NOT
|
||||||
|
// explicitly set require_signature, force it on. Explicit false is
|
||||||
|
// preserved (legacy receivers without HMAC support still work).
|
||||||
|
requireSig := false
|
||||||
|
if req.WebhookRequireSignature != nil {
|
||||||
|
requireSig = *req.WebhookRequireSignature
|
||||||
|
} else if req.WebhookEnabled {
|
||||||
|
requireSig = true
|
||||||
|
}
|
||||||
t := store.Trigger{
|
t := store.Trigger{
|
||||||
Kind: req.Kind,
|
Kind: req.Kind,
|
||||||
Name: strings.TrimSpace(req.Name),
|
Name: strings.TrimSpace(req.Name),
|
||||||
Config: string(req.Config),
|
Config: string(req.Config),
|
||||||
WebhookRequireSignature: req.WebhookRequireSignature,
|
WebhookRequireSignature: requireSig,
|
||||||
}
|
}
|
||||||
if req.WebhookEnabled {
|
if req.WebhookEnabled {
|
||||||
t.WebhookSecret = generateWebhookSecret()
|
t.WebhookSecret = generateWebhookSecret()
|
||||||
@@ -199,7 +217,13 @@ func (s *Server) updateTrigger(w http.ResponseWriter, r *http.Request) {
|
|||||||
if len(req.Config) > 0 {
|
if len(req.Config) > 0 {
|
||||||
existing.Config = string(req.Config)
|
existing.Config = string(req.Config)
|
||||||
}
|
}
|
||||||
existing.WebhookRequireSignature = req.WebhookRequireSignature
|
if req.WebhookRequireSignature != nil {
|
||||||
|
existing.WebhookRequireSignature = *req.WebhookRequireSignature
|
||||||
|
} else if req.WebhookEnabled && !existing.WebhookRequireSignature {
|
||||||
|
// Re-enabling webhook without specifying the signature flag —
|
||||||
|
// take the secure default.
|
||||||
|
existing.WebhookRequireSignature = true
|
||||||
|
}
|
||||||
wasEnabled := existing.WebhookSecret != ""
|
wasEnabled := existing.WebhookSecret != ""
|
||||||
if req.WebhookEnabled && !wasEnabled {
|
if req.WebhookEnabled && !wasEnabled {
|
||||||
// false→true transition: rotate both secrets so re-enabling
|
// false→true transition: rotate both secrets so re-enabling
|
||||||
|
|||||||
@@ -0,0 +1,177 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/volsnap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// listWorkloadSnapshots handles GET /api/workloads/{id}/snapshots.
|
||||||
|
func (s *Server) listWorkloadSnapshots(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.snapshotEngine == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
snaps, err := s.snapshotEngine.List(id)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("snapshots: list", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, snaps)
|
||||||
|
}
|
||||||
|
|
||||||
|
// snapshotableVolume is the sanitized view of a volume in the snapshotable
|
||||||
|
// response — it omits the resolved host path so internal layout is not leaked.
|
||||||
|
type snapshotableVolume struct {
|
||||||
|
Target string `json:"target"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getWorkloadSnapshotable handles GET /api/workloads/{id}/snapshotable. It
|
||||||
|
// tells the UI which volumes can be snapshotted and which are skipped (and
|
||||||
|
// why), so users are never misled about coverage.
|
||||||
|
func (s *Server) getWorkloadSnapshotable(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.snapshotEngine == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
workload, err := s.store.GetWorkloadByID(id)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusNotFound, "workload not found")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
settings, err := s.store.GetSettings()
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
refs, skipped, err := volsnap.SnapshotableVolumes(s.store, workload, settings)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("snapshots: enumerate", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
volumes := make([]snapshotableVolume, 0, len(refs))
|
||||||
|
for _, ref := range refs {
|
||||||
|
volumes = append(volumes, snapshotableVolume{Target: ref.Target, Scope: ref.Scope, Source: ref.Source})
|
||||||
|
}
|
||||||
|
if skipped == nil {
|
||||||
|
skipped = []volsnap.SkippedVolume{}
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, map[string]any{
|
||||||
|
"volumes": volumes,
|
||||||
|
"skipped": skipped,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// createWorkloadSnapshot handles POST /api/workloads/{id}/snapshots.
|
||||||
|
func (s *Server) createWorkloadSnapshot(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.snapshotEngine == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
workload, err := s.store.GetWorkloadByID(id)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusNotFound, "workload not found")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
settings, err := s.store.GetSettings()
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var body struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
}
|
||||||
|
if r.ContentLength != 0 {
|
||||||
|
if err := json.NewDecoder(io.LimitReader(r.Body, 1<<20)).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
|
||||||
|
respondError(w, http.StatusBadRequest, "invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
snap, err := s.snapshotEngine.Create(workload, settings, body.Label)
|
||||||
|
if err != nil {
|
||||||
|
// "no snapshottable volume data" is client-actionable (400, safe to
|
||||||
|
// echo). Any other error is server-side: log the detail, return a
|
||||||
|
// generic 500 so internal paths / DB text never reach the client.
|
||||||
|
if errors.Is(err, volsnap.ErrNoSnapshotData) {
|
||||||
|
respondError(w, http.StatusBadRequest, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slog.Error("snapshots: create", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "internal server error")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusCreated, snap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteSnapshot handles DELETE /api/snapshots/{sid}.
|
||||||
|
func (s *Server) deleteSnapshot(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.snapshotEngine == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sid := chi.URLParam(r, "sid")
|
||||||
|
if err := s.snapshotEngine.Delete(sid); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondError(w, http.StatusNotFound, "snapshot not found")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "failed to delete snapshot")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, map[string]string{"status": "deleted"})
|
||||||
|
}
|
||||||
|
|
||||||
|
// downloadSnapshot handles GET /api/snapshots/{sid}/download, streaming the
|
||||||
|
// tar.gz archive. The resolved path is containment-checked against the
|
||||||
|
// snapshot directory.
|
||||||
|
func (s *Server) downloadSnapshot(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if s.snapshotEngine == nil {
|
||||||
|
respondError(w, http.StatusServiceUnavailable, "snapshot engine not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sid := chi.URLParam(r, "sid")
|
||||||
|
snap, err := s.snapshotEngine.Get(sid)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusNotFound, "snapshot not found")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
path, err := s.snapshotEngine.FilePath(snap)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusForbidden, "access denied")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusNotFound, "snapshot file not found on disk")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
stat, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "failed to read snapshot file")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
name := filepath.Base(snap.Filename)
|
||||||
|
w.Header().Set("Content-Type", "application/gzip")
|
||||||
|
w.Header().Set("Content-Disposition", "attachment; filename=\""+name+"\"")
|
||||||
|
http.ServeContent(w, r, name, stat.ModTime(), f)
|
||||||
|
}
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/auth"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/volsnap"
|
||||||
|
"github.com/alexei/tinyforge/internal/webhook"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newSnapshotEnv builds an API test env with the volume-snapshot engine wired
|
||||||
|
// (the shared newAPITestEnv does not wire it). dataDir holds the snapshot
|
||||||
|
// archives; baseVol is where host-bind volume directories resolve.
|
||||||
|
func newSnapshotEnv(t *testing.T) (*apiTestEnv, string) {
|
||||||
|
t.Helper()
|
||||||
|
st, err := store.New(":memory:")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { st.Close() })
|
||||||
|
|
||||||
|
encKey := [32]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
|
||||||
|
dispatcher := &fakeAPIDispatcher{}
|
||||||
|
wh := webhook.NewHandler(st)
|
||||||
|
wh.SetPluginDispatcher(dispatcher)
|
||||||
|
srv := NewServer(st, nil, nil, nil, dispatcher, nil, wh, nil, encKey)
|
||||||
|
|
||||||
|
snapEng, err := volsnap.New(st, t.TempDir())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("snapshot engine: %v", err)
|
||||||
|
}
|
||||||
|
srv.SetSnapshotEngine(snapEng)
|
||||||
|
|
||||||
|
httpsrv := httptest.NewServer(srv.Router())
|
||||||
|
t.Cleanup(httpsrv.Close)
|
||||||
|
|
||||||
|
la := auth.NewLocalAuth(encKey)
|
||||||
|
tok, err := la.GenerateToken(auth.Claims{UserID: "u-admin", Username: "admin", Role: "admin"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("mint token: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
baseVol := t.TempDir()
|
||||||
|
settings, _ := st.GetSettings()
|
||||||
|
settings.BaseVolumePath = baseVol
|
||||||
|
if err := st.UpdateSettings(settings); err != nil {
|
||||||
|
t.Fatalf("update settings: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &apiTestEnv{srv: httpsrv, store: st, dispatcher: dispatcher, adminToken: tok.Token, encKey: encKey}, baseVol
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVolumeSnapshots_EndToEnd(t *testing.T) {
|
||||||
|
e, baseVol := newSnapshotEnv(t)
|
||||||
|
|
||||||
|
w, err := e.store.CreateWorkload(store.Workload{
|
||||||
|
Name: "data-app",
|
||||||
|
Kind: "project",
|
||||||
|
SourceKind: "image",
|
||||||
|
SourceConfig: `{"image":"registry.example.com/owner/app","port":8080}`,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create workload: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := e.store.SetWorkloadVolume(store.WorkloadVolume{
|
||||||
|
WorkloadID: w.ID, Target: "/data", Source: "data", Scope: "project",
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("set volume: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Materialize the resolved host-bind dir with a file so there is data to
|
||||||
|
// capture. Layout mirrors ResolveWorkloadPath for project scope:
|
||||||
|
// <baseVol>/<name>-<id8>/<source>.
|
||||||
|
id8 := w.ID
|
||||||
|
if len(id8) > 8 {
|
||||||
|
id8 = id8[:8]
|
||||||
|
}
|
||||||
|
hostDir := filepath.Join(baseVol, "data-app-"+id8, "data")
|
||||||
|
if err := os.MkdirAll(hostDir, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(hostDir, "payload.txt"), []byte("important"), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// snapshotable lists the one host-bind volume.
|
||||||
|
resp := e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshotable", nil)
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("snapshotable status = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
var snapable struct {
|
||||||
|
Volumes []map[string]string `json:"volumes"`
|
||||||
|
Skipped []map[string]string `json:"skipped"`
|
||||||
|
}
|
||||||
|
decodeEnvelope(t, resp, &snapable)
|
||||||
|
if len(snapable.Volumes) != 1 || snapable.Volumes[0]["target"] != "/data" {
|
||||||
|
t.Fatalf("expected 1 snapshotable volume /data, got %+v", snapable)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a snapshot.
|
||||||
|
resp = e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", map[string]string{"label": "before upgrade"})
|
||||||
|
if resp.StatusCode != http.StatusCreated {
|
||||||
|
t.Fatalf("create snapshot status = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
var snap store.VolumeSnapshot
|
||||||
|
decodeEnvelope(t, resp, &snap)
|
||||||
|
if snap.ID == "" || snap.SizeBytes == 0 || snap.Label != "before upgrade" {
|
||||||
|
t.Fatalf("unexpected snapshot: %+v", snap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// It appears in the list.
|
||||||
|
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
|
||||||
|
var list []store.VolumeSnapshot
|
||||||
|
decodeEnvelope(t, resp, &list)
|
||||||
|
if len(list) != 1 || list[0].ID != snap.ID {
|
||||||
|
t.Fatalf("expected 1 snapshot in list, got %+v", list)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download streams a non-empty gzip archive (not the JSON envelope).
|
||||||
|
resp = e.do(t, http.MethodGet, "/api/snapshots/"+snap.ID+"/download", nil)
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("download status = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
if ct := resp.Header.Get("Content-Type"); ct != "application/gzip" {
|
||||||
|
t.Errorf("download content-type = %q, want application/gzip", ct)
|
||||||
|
}
|
||||||
|
data, _ := io.ReadAll(resp.Body)
|
||||||
|
resp.Body.Close()
|
||||||
|
if len(data) == 0 {
|
||||||
|
t.Error("download body is empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes it.
|
||||||
|
resp = e.do(t, http.MethodDelete, "/api/snapshots/"+snap.ID, nil)
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("delete status = %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
resp = e.do(t, http.MethodGet, "/api/workloads/"+w.ID+"/snapshots", nil)
|
||||||
|
var after []store.VolumeSnapshot
|
||||||
|
decodeEnvelope(t, resp, &after)
|
||||||
|
if len(after) != 0 {
|
||||||
|
t.Fatalf("expected 0 snapshots after delete, got %d", len(after))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSnapshot_NoVolumeData_Returns400(t *testing.T) {
|
||||||
|
e, _ := newSnapshotEnv(t)
|
||||||
|
w, err := e.store.CreateWorkload(store.Workload{
|
||||||
|
Name: "no-vol-app",
|
||||||
|
Kind: "project",
|
||||||
|
SourceKind: "image",
|
||||||
|
SourceConfig: `{"image":"x","port":80}`,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create workload: %v", err)
|
||||||
|
}
|
||||||
|
resp := e.do(t, http.MethodPost, "/api/workloads/"+w.ID+"/snapshots", nil)
|
||||||
|
if resp.StatusCode != http.StatusBadRequest {
|
||||||
|
t.Fatalf("expected 400 for an app with no snapshottable volumes, got %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSnapshotEndpoints_RequireWorkload(t *testing.T) {
|
||||||
|
e, _ := newSnapshotEnv(t)
|
||||||
|
// snapshotable on an unknown workload → 404.
|
||||||
|
resp := e.do(t, http.MethodGet, "/api/workloads/does-not-exist/snapshotable", nil)
|
||||||
|
if resp.StatusCode != http.StatusNotFound {
|
||||||
|
t.Fatalf("snapshotable unknown workload = %d, want 404", resp.StatusCode)
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
}
|
||||||
@@ -13,16 +13,27 @@ import (
|
|||||||
"github.com/alexei/tinyforge/internal/auth"
|
"github.com/alexei/tinyforge/internal/auth"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
|
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||||
)
|
)
|
||||||
|
|
||||||
// chainNode is the lightweight shape returned by /chain — we deliberately
|
// chainNode is the lightweight shape returned by /chain — we deliberately
|
||||||
// don't return full plugin.Workload values for ancestor/descendant rows
|
// don't return full plugin.Workload values for ancestor/descendant rows
|
||||||
// because the secret fields don't belong in a chain-traversal response.
|
// because the secret fields don't belong in a chain-traversal response.
|
||||||
|
//
|
||||||
|
// IsPreview / PreviewBranch surface branch-preview children to the UI so it
|
||||||
|
// can render them in a dedicated "Preview environments" panel rather than as
|
||||||
|
// undistinguished stage children. They are computed against the chain's
|
||||||
|
// `self` workload via preview.IsPreviewChild — the canonical "this child is a
|
||||||
|
// branch preview" test that reverses the MaterializeForBranch naming formula.
|
||||||
|
// Both are zero-valued (false / "") for the parent and self nodes and for
|
||||||
|
// operator-created stage children.
|
||||||
type chainNode struct {
|
type chainNode struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
SourceKind string `json:"source_kind"`
|
SourceKind string `json:"source_kind"`
|
||||||
TriggerKind string `json:"trigger_kind"`
|
TriggerKind string `json:"trigger_kind"`
|
||||||
|
IsPreview bool `json:"is_preview"`
|
||||||
|
PreviewBranch string `json:"preview_branch,omitempty"`
|
||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
UpdatedAt string `json:"updated_at"`
|
UpdatedAt string `json:"updated_at"`
|
||||||
}
|
}
|
||||||
@@ -38,6 +49,32 @@ func chainNodeOf(w store.Workload) chainNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// previewBranchOf extracts the branch a preview child was materialized for
|
||||||
|
// from its source_config (the `branch` key MaterializeForBranch wrote).
|
||||||
|
// Returns "" on a missing/malformed config — the caller only calls this for
|
||||||
|
// rows preview.IsPreviewChild already confirmed, so a blank result just means
|
||||||
|
// the JSON couldn't be decoded.
|
||||||
|
func previewBranchOf(w store.Workload) string {
|
||||||
|
var cfg struct {
|
||||||
|
Branch string `json:"branch"`
|
||||||
|
}
|
||||||
|
if w.SourceConfig != "" {
|
||||||
|
_ = json.Unmarshal([]byte(w.SourceConfig), &cfg)
|
||||||
|
}
|
||||||
|
return cfg.Branch
|
||||||
|
}
|
||||||
|
|
||||||
|
// childChainNode builds a chainNode for a child row, marking it as a branch
|
||||||
|
// preview (and attaching its branch) when it was materialized from `self`.
|
||||||
|
func childChainNode(self, child store.Workload) chainNode {
|
||||||
|
node := chainNodeOf(child)
|
||||||
|
if preview.IsPreviewChild(self, child) {
|
||||||
|
node.IsPreview = true
|
||||||
|
node.PreviewBranch = previewBranchOf(child)
|
||||||
|
}
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
|
||||||
// getWorkloadChain handles GET /api/workloads/{id}/chain.
|
// getWorkloadChain handles GET /api/workloads/{id}/chain.
|
||||||
//
|
//
|
||||||
// Returns the workload's parent (or nil), itself, and its direct children
|
// Returns the workload's parent (or nil), itself, and its direct children
|
||||||
@@ -76,7 +113,7 @@ func (s *Server) getWorkloadChain(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
children := make([]chainNode, 0, len(childRows))
|
children := make([]chainNode, 0, len(childRows))
|
||||||
for _, c := range childRows {
|
for _, c := range childRows {
|
||||||
children = append(children, chainNodeOf(c))
|
children = append(children, childChainNode(self, c))
|
||||||
}
|
}
|
||||||
|
|
||||||
respondJSON(w, http.StatusOK, map[string]any{
|
respondJSON(w, http.StatusOK, map[string]any{
|
||||||
|
|||||||
@@ -0,0 +1,147 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestChildChainNode_MarksPreviewChildren verifies the /chain DTO builder
|
||||||
|
// distinguishes branch-preview children (materialized by the preview package)
|
||||||
|
// from operator-created stage children that merely share the parent link.
|
||||||
|
// The discriminator is preview.IsPreviewChild, which reverses the
|
||||||
|
// MaterializeForBranch naming formula: name == template.Name + "/" + slug.
|
||||||
|
func TestChildChainNode_MarksPreviewChildren(t *testing.T) {
|
||||||
|
template := store.Workload{
|
||||||
|
ID: "tmpl-1",
|
||||||
|
Name: "myapp",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
child store.Workload
|
||||||
|
wantPrev bool
|
||||||
|
wantBranch string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "preview child is marked with its branch",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-prev",
|
||||||
|
Name: "myapp/feat-login",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"branch":"feat/login","port":3000}`,
|
||||||
|
ParentWorkloadID: "tmpl-1",
|
||||||
|
},
|
||||||
|
wantPrev: true,
|
||||||
|
wantBranch: "feat/login",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "operator-named stage child sharing the parent is not a preview",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-stage",
|
||||||
|
Name: "myapp-staging",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"branch":"main"}`,
|
||||||
|
ParentWorkloadID: "tmpl-1",
|
||||||
|
},
|
||||||
|
wantPrev: false,
|
||||||
|
wantBranch: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "child of a different parent is not a preview of self",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-other",
|
||||||
|
Name: "myapp/feat-login",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"branch":"feat/login"}`,
|
||||||
|
ParentWorkloadID: "some-other-template",
|
||||||
|
},
|
||||||
|
wantPrev: false,
|
||||||
|
wantBranch: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "child with no branch in source_config is not a preview",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-nobranch",
|
||||||
|
Name: "myapp/feat-login",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{}`,
|
||||||
|
ParentWorkloadID: "tmpl-1",
|
||||||
|
},
|
||||||
|
wantPrev: false,
|
||||||
|
wantBranch: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Same parent + a valid branch, but the name carries an extra
|
||||||
|
// suffix so it fails ONLY the slug-equality check (expected
|
||||||
|
// "myapp/feat-login", got "myapp/feat-login-staging"). The
|
||||||
|
// branch alone must not be enough to mark a preview.
|
||||||
|
name: "valid branch but name fails the slug match is not a preview",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-slugmiss",
|
||||||
|
Name: "myapp/feat-login-staging",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"branch":"feat/login","port":3000}`,
|
||||||
|
ParentWorkloadID: "tmpl-1",
|
||||||
|
},
|
||||||
|
wantPrev: false,
|
||||||
|
wantBranch: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Uppercase + slash branch: slugifyBranch lowercases and maps
|
||||||
|
// "/" -> "-", so "Feature/Login" -> "feature-login" and the name
|
||||||
|
// "myapp/feature-login" matches. PreviewBranch must echo the RAW
|
||||||
|
// branch from source_config ("Feature/Login"), not the slug.
|
||||||
|
name: "uppercase slash branch matches and keeps raw branch",
|
||||||
|
child: store.Workload{
|
||||||
|
ID: "child-upper",
|
||||||
|
Name: "myapp/feature-login",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"branch":"Feature/Login","port":8080}`,
|
||||||
|
ParentWorkloadID: "tmpl-1",
|
||||||
|
},
|
||||||
|
wantPrev: true,
|
||||||
|
wantBranch: "Feature/Login",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
node := childChainNode(template, tc.child)
|
||||||
|
if node.IsPreview != tc.wantPrev {
|
||||||
|
t.Errorf("IsPreview = %v, want %v", node.IsPreview, tc.wantPrev)
|
||||||
|
}
|
||||||
|
if node.PreviewBranch != tc.wantBranch {
|
||||||
|
t.Errorf("PreviewBranch = %q, want %q", node.PreviewBranch, tc.wantBranch)
|
||||||
|
}
|
||||||
|
// Base fields must always round-trip regardless of preview status.
|
||||||
|
if node.ID != tc.child.ID || node.Name != tc.child.Name {
|
||||||
|
t.Errorf("base fields mangled: got id=%q name=%q", node.ID, node.Name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestPreviewBranchOf_ToleratesMalformedConfig confirms the branch extractor
|
||||||
|
// returns "" rather than panicking on a missing or invalid source_config.
|
||||||
|
func TestPreviewBranchOf_ToleratesMalformedConfig(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
cfg string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"valid branch", `{"branch":"release/v1"}`, "release/v1"},
|
||||||
|
{"empty config", ``, ""},
|
||||||
|
{"empty object", `{}`, ""},
|
||||||
|
{"malformed json", `{not-json`, ""},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
got := previewBranchOf(store.Workload{SourceConfig: c.cfg})
|
||||||
|
if got != c.want {
|
||||||
|
t.Errorf("previewBranchOf(%q) = %q, want %q", c.cfg, got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,231 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/crypto"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// workloadNotificationRow is the JSON shape returned to clients. The
|
||||||
|
// `secret_set` boolean replaces the actual ciphertext: once stored a
|
||||||
|
// secret is write-only, mirroring how workload_env hides encrypted
|
||||||
|
// values. Rotating means submitting a new value.
|
||||||
|
type workloadNotificationRow struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
SecretSet bool `json:"secret_set"`
|
||||||
|
EventTypes string `json:"event_types"`
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
SortOrder int `json:"sort_order"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
UpdatedAt string `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func toWorkloadNotificationRow(n store.WorkloadNotification) workloadNotificationRow {
|
||||||
|
return workloadNotificationRow{
|
||||||
|
ID: n.ID,
|
||||||
|
WorkloadID: n.WorkloadID,
|
||||||
|
Name: n.Name,
|
||||||
|
URL: n.URL,
|
||||||
|
SecretSet: n.Secret != "",
|
||||||
|
EventTypes: n.EventTypes,
|
||||||
|
Enabled: n.Enabled,
|
||||||
|
SortOrder: n.SortOrder,
|
||||||
|
CreatedAt: n.CreatedAt,
|
||||||
|
UpdatedAt: n.UpdatedAt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) listWorkloadNotifications(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rows, err := s.store.ListWorkloadNotifications(id)
|
||||||
|
if err != nil {
|
||||||
|
respondError(w, http.StatusInternalServerError, "list workload notifications")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out := make([]workloadNotificationRow, 0, len(rows))
|
||||||
|
for _, n := range rows {
|
||||||
|
out = append(out, toWorkloadNotificationRow(n))
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// workloadNotificationRequest is the POST/PUT body. Secret is the raw
|
||||||
|
// plaintext webhook signing key; the server encrypts it at rest with
|
||||||
|
// the global encryption key before INSERT. An empty Secret on UPDATE
|
||||||
|
// leaves the stored secret untouched so the operator can edit the URL
|
||||||
|
// or event filter without re-entering the secret each time.
|
||||||
|
type workloadNotificationRequest struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Secret string `json:"secret"`
|
||||||
|
EventTypes string `json:"event_types"`
|
||||||
|
Enabled *bool `json:"enabled"`
|
||||||
|
SortOrder int `json:"sort_order"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) createWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var req workloadNotificationRequest
|
||||||
|
if !decodeJSONStrict(w, r, &req) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.URL = strings.TrimSpace(req.URL)
|
||||||
|
req.Name = strings.TrimSpace(req.Name)
|
||||||
|
if req.URL == "" {
|
||||||
|
respondError(w, http.StatusBadRequest, "url is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
encSecret := ""
|
||||||
|
if req.Secret != "" {
|
||||||
|
v, err := crypto.Encrypt(s.encKey, req.Secret)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "encrypt secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
encSecret = v
|
||||||
|
}
|
||||||
|
enabled := true
|
||||||
|
if req.Enabled != nil {
|
||||||
|
enabled = *req.Enabled
|
||||||
|
}
|
||||||
|
created, err := s.store.CreateWorkloadNotification(store.WorkloadNotification{
|
||||||
|
WorkloadID: id,
|
||||||
|
Name: req.Name,
|
||||||
|
URL: req.URL,
|
||||||
|
Secret: encSecret,
|
||||||
|
EventTypes: req.EventTypes,
|
||||||
|
Enabled: enabled,
|
||||||
|
SortOrder: req.SortOrder,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("workload notifications: create", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "create workload notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusCreated, toWorkloadNotificationRow(created))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) updateWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
nid := chi.URLParam(r, "nid")
|
||||||
|
if _, err := s.store.GetWorkloadByID(id); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get workload")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
existing, err := s.store.GetWorkloadNotification(nid)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if existing.WorkloadID != id {
|
||||||
|
// Route mismatch — the row exists but under a different workload.
|
||||||
|
// Return 404 rather than 403 so we don't leak the existence of
|
||||||
|
// foreign rows to an unauthorised caller.
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var req workloadNotificationRequest
|
||||||
|
if !decodeJSONStrict(w, r, &req) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.URL = strings.TrimSpace(req.URL)
|
||||||
|
req.Name = strings.TrimSpace(req.Name)
|
||||||
|
if req.URL == "" {
|
||||||
|
respondError(w, http.StatusBadRequest, "url is required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
existing.Name = req.Name
|
||||||
|
existing.URL = req.URL
|
||||||
|
existing.EventTypes = req.EventTypes
|
||||||
|
existing.SortOrder = req.SortOrder
|
||||||
|
if req.Enabled != nil {
|
||||||
|
existing.Enabled = *req.Enabled
|
||||||
|
}
|
||||||
|
// Empty Secret on UPDATE preserves the stored ciphertext — explicit
|
||||||
|
// rotation requires sending the new plaintext. This avoids forcing
|
||||||
|
// the operator to re-enter their secret on every URL edit.
|
||||||
|
if req.Secret != "" {
|
||||||
|
v, err := crypto.Encrypt(s.encKey, req.Secret)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("workload notifications: encrypt secret", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "encrypt secret")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
existing.Secret = v
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.store.UpdateWorkloadNotification(existing); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slog.Error("workload notifications: update", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "update workload notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, toWorkloadNotificationRow(existing))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) deleteWorkloadNotification(w http.ResponseWriter, r *http.Request) {
|
||||||
|
id := chi.URLParam(r, "id")
|
||||||
|
nid := chi.URLParam(r, "nid")
|
||||||
|
existing, err := s.store.GetWorkloadNotification(nid)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondError(w, http.StatusInternalServerError, "get workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if existing.WorkloadID != id {
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := s.store.DeleteWorkloadNotification(nid); err != nil {
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
respondNotFound(w, "workload_notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slog.Error("workload notifications: delete", "workload", id, "error", err)
|
||||||
|
respondError(w, http.StatusInternalServerError, "delete workload notification")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
respondJSON(w, http.StatusOK, map[string]any{"success": true})
|
||||||
|
}
|
||||||
@@ -82,16 +82,27 @@ func (s *Server) getWorkloadRuntimeState(w http.ResponseWriter, r *http.Request)
|
|||||||
|
|
||||||
payload := runtimeStatePayload{SourceKind: workload.SourceKind}
|
payload := runtimeStatePayload{SourceKind: workload.SourceKind}
|
||||||
|
|
||||||
if workload.SourceKind != "static" {
|
// Both static and dockerfile sources persist their runtime state into
|
||||||
|
// containers.extra_json under a deterministic row id. The shapes
|
||||||
|
// match (status / last_commit_sha / last_sync_at / last_error) so the
|
||||||
|
// handler can decode them identically. The suffix differs per source
|
||||||
|
// kind: static uses ":site", dockerfile uses ":dockerfile".
|
||||||
|
var rowSuffix string
|
||||||
|
switch workload.SourceKind {
|
||||||
|
case "static":
|
||||||
|
rowSuffix = ":site"
|
||||||
|
case "dockerfile":
|
||||||
|
rowSuffix = ":dockerfile"
|
||||||
|
default:
|
||||||
respondJSON(w, http.StatusOK, payload)
|
respondJSON(w, http.StatusOK, payload)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// The static plugin owns one container row per workload at the
|
// The owning plugin maintains one container row per workload at the
|
||||||
// deterministic ID <workloadID>:site. A missing row means the
|
// deterministic ID. A missing row means the workload has never been
|
||||||
// workload has never been deployed — return HasState=false so the
|
// deployed — return HasState=false so the UI can prompt the operator
|
||||||
// UI can prompt the operator to deploy.
|
// to deploy.
|
||||||
row, err := s.store.GetContainerByID(id + ":site")
|
row, err := s.store.GetContainerByID(id + rowSuffix)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, store.ErrNotFound) {
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
respondJSON(w, http.StatusOK, payload)
|
respondJSON(w, http.StatusOK, payload)
|
||||||
|
|||||||
@@ -130,6 +130,13 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
|||||||
SourceKind: "static",
|
SourceKind: "static",
|
||||||
SourceConfig: `{"provider":"gitea"}`,
|
SourceConfig: `{"provider":"gitea"}`,
|
||||||
})
|
})
|
||||||
|
// Seed a row with a valid extra_json first, then corrupt it via raw
|
||||||
|
// SQL. Prior to the write-side validateExtraJSON guard this test
|
||||||
|
// could pass a malformed string straight to UpsertContainer; the
|
||||||
|
// guard now rejects that at the boundary, which is the correct
|
||||||
|
// behaviour. The reader resilience this test verifies remains
|
||||||
|
// relevant for pre-existing bad rows from upgrades or external
|
||||||
|
// manipulation, so we still produce one via direct SQL.
|
||||||
if err := e.store.UpsertContainer(store.Container{
|
if err := e.store.UpsertContainer(store.Container{
|
||||||
ID: wl.ID + ":site",
|
ID: wl.ID + ":site",
|
||||||
WorkloadID: wl.ID,
|
WorkloadID: wl.ID,
|
||||||
@@ -137,10 +144,16 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
|||||||
Host: "local",
|
Host: "local",
|
||||||
ContainerID: "abc",
|
ContainerID: "abc",
|
||||||
State: "running",
|
State: "running",
|
||||||
ExtraJSON: `{this is not json`,
|
ExtraJSON: `{}`,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
t.Fatalf("seed: %v", err)
|
t.Fatalf("seed: %v", err)
|
||||||
}
|
}
|
||||||
|
if _, err := e.store.DB().Exec(
|
||||||
|
`UPDATE containers SET extra_json = ? WHERE id = ?`,
|
||||||
|
`{this is not json`, wl.ID+":site",
|
||||||
|
); err != nil {
|
||||||
|
t.Fatalf("corrupt extra_json: %v", err)
|
||||||
|
}
|
||||||
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
|
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
t.Fatalf("status = %d, want 200 (decode is non-fatal)", resp.StatusCode)
|
t.Fatalf("status = %d, want 200 (decode is non-fatal)", resp.StatusCode)
|
||||||
@@ -155,6 +168,57 @@ func TestGetWorkloadRuntimeState_MalformedExtraJSON_ReturnsContainerFieldsOnly(t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetWorkloadRuntimeState_DockerfileSourceDeployed_DecodesExtraJSON(t *testing.T) {
|
||||||
|
e := newAPITestEnv(t)
|
||||||
|
wl, err := e.store.CreateWorkload(store.Workload{
|
||||||
|
Kind: string(store.WorkloadKindProject),
|
||||||
|
Name: "build-app",
|
||||||
|
SourceKind: "dockerfile",
|
||||||
|
SourceConfig: `{"provider":"gitea","port":3000}`,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("seed workload: %v", err)
|
||||||
|
}
|
||||||
|
extra, _ := json.Marshal(map[string]any{
|
||||||
|
"status": "deployed",
|
||||||
|
"last_commit_sha": "deadbeef",
|
||||||
|
"last_sync_at": "2026-05-23T10:00:00Z",
|
||||||
|
"last_error": "",
|
||||||
|
})
|
||||||
|
if err := e.store.UpsertContainer(store.Container{
|
||||||
|
ID: wl.ID + ":dockerfile",
|
||||||
|
WorkloadID: wl.ID,
|
||||||
|
WorkloadKind: string(store.WorkloadKindBuild),
|
||||||
|
Host: "local",
|
||||||
|
ContainerID: "ffeeddcc",
|
||||||
|
State: "running",
|
||||||
|
ExtraJSON: string(extra),
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed container: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp := e.do(t, http.MethodGet, "/api/workloads/"+wl.ID+"/runtime-state", nil)
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("status = %d, want 200", resp.StatusCode)
|
||||||
|
}
|
||||||
|
var got runtimeStatePayload
|
||||||
|
if errMsg := decodeEnvelope(t, resp, &got); errMsg != "" {
|
||||||
|
t.Fatalf("envelope error: %q", errMsg)
|
||||||
|
}
|
||||||
|
if !got.HasState {
|
||||||
|
t.Fatalf("HasState = false, want true")
|
||||||
|
}
|
||||||
|
if got.SourceKind != "dockerfile" {
|
||||||
|
t.Errorf("SourceKind = %q, want dockerfile", got.SourceKind)
|
||||||
|
}
|
||||||
|
if got.ContainerID != "ffeeddcc" || got.State != "running" {
|
||||||
|
t.Errorf("container fields = (%q,%q), want (ffeeddcc, running)", got.ContainerID, got.State)
|
||||||
|
}
|
||||||
|
if got.Status != "deployed" || got.LastCommitSHA != "deadbeef" {
|
||||||
|
t.Errorf("runtime fields = %+v, want deployed/deadbeef", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// GET /api/workloads/{id}/storage
|
// GET /api/workloads/{id}/storage
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/alexei/tinyforge/internal/auth"
|
"github.com/alexei/tinyforge/internal/auth"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
|
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||||
)
|
)
|
||||||
|
|
||||||
// pluginWorkloadRequest is the JSON body accepted by create + update.
|
// pluginWorkloadRequest is the JSON body accepted by create + update.
|
||||||
@@ -227,6 +228,28 @@ func (s *Server) deletePluginWorkload(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cascade-teardown any branch previews materialized from this workload
|
||||||
|
// so deleting a template does not orphan their containers, proxy routes,
|
||||||
|
// and rows. Operator-managed stage-chain children (which share the same
|
||||||
|
// parent link) are deliberately left alone — only previews are auto-owned
|
||||||
|
// by the template (see preview.IsPreviewChild).
|
||||||
|
if previews, err := preview.ListPreviewChildren(s.store, row); err != nil {
|
||||||
|
slog.Warn("delete workload: list preview children", "workload", id, "error", err)
|
||||||
|
} else {
|
||||||
|
for _, child := range previews {
|
||||||
|
if child.SourceKind != "" {
|
||||||
|
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(child)); err != nil {
|
||||||
|
slog.Warn("delete workload: preview child teardown error",
|
||||||
|
"workload", id, "child", child.ID, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.store.DeleteWorkload(child.ID); err != nil && !errors.Is(err, store.ErrNotFound) {
|
||||||
|
slog.Warn("delete workload: preview child delete error",
|
||||||
|
"workload", id, "child", child.ID, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if row.SourceKind != "" {
|
if row.SourceKind != "" {
|
||||||
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(row)); err != nil {
|
if err := s.deployer.DispatchTeardown(r.Context(), toPluginWorkload(row)); err != nil {
|
||||||
slog.Warn("delete workload: teardown error",
|
slog.Warn("delete workload: teardown error",
|
||||||
|
|||||||
@@ -85,9 +85,15 @@ func (la *LocalAuth) cleanBlacklist() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// bcryptCost is the work factor used for new password hashes. Bumped from
|
||||||
|
// the library default (10) to 12 so cost grows with hardware. Existing
|
||||||
|
// hashes at lower costs still verify — bcrypt encodes the cost in the
|
||||||
|
// stored hash itself.
|
||||||
|
const bcryptCost = 12
|
||||||
|
|
||||||
// HashPassword hashes a plaintext password using bcrypt.
|
// HashPassword hashes a plaintext password using bcrypt.
|
||||||
func HashPassword(password string) (string, error) {
|
func HashPassword(password string) (string, error) {
|
||||||
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcrypt.DefaultCost)
|
hash, err := bcrypt.GenerateFromPassword([]byte(password), bcryptCost)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("hash password: %w", err)
|
return "", fmt.Errorf("hash password: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,17 @@
|
|||||||
package backup
|
package backup
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
_ "modernc.org/sqlite" // read-only candidate inspection via PRAGMA integrity_check
|
||||||
|
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -129,6 +133,17 @@ func (e *Engine) RestorePath(id string) (string, error) {
|
|||||||
return "", fmt.Errorf("get backup: %w", err)
|
return "", fmt.Errorf("get backup: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filename comes from a DB row. Defence-in-depth: a backup file must live
|
||||||
|
// directly under backupDir, so reject any value carrying a path separator
|
||||||
|
// or traversal before joining. A poisoned row (future import path, manual
|
||||||
|
// insert) must never let restore read — and then atomically copy over the
|
||||||
|
// live DB — an arbitrary file. CreateBackup builds safe base names; this
|
||||||
|
// enforces the same invariant on read.
|
||||||
|
if backup.Filename == "" || backup.Filename == "." || backup.Filename == ".." ||
|
||||||
|
backup.Filename != filepath.Base(backup.Filename) {
|
||||||
|
return "", fmt.Errorf("backup: invalid filename %q", backup.Filename)
|
||||||
|
}
|
||||||
|
|
||||||
filePath := filepath.Join(e.backupDir, backup.Filename)
|
filePath := filepath.Join(e.backupDir, backup.Filename)
|
||||||
if _, err := os.Stat(filePath); err != nil {
|
if _, err := os.Stat(filePath); err != nil {
|
||||||
return "", fmt.Errorf("backup file not found: %w", err)
|
return "", fmt.Errorf("backup file not found: %w", err)
|
||||||
@@ -137,6 +152,153 @@ func (e *Engine) RestorePath(id string) (string, error) {
|
|||||||
return filePath, nil
|
return filePath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PrepareRestore validates a backup candidate before the caller swaps it
|
||||||
|
// over the live DB. Runs three checks in order:
|
||||||
|
//
|
||||||
|
// 1. The candidate file exists and is non-empty.
|
||||||
|
// 2. SQLite header magic matches (catches corrupted or partial downloads).
|
||||||
|
// 3. `PRAGMA integrity_check` against a temp copy returns "ok"
|
||||||
|
// (catches WAL/page corruption that the header check misses).
|
||||||
|
//
|
||||||
|
// On success returns the candidate path. On failure returns a wrapped
|
||||||
|
// error describing which probe rejected the file, so the operator can
|
||||||
|
// see exactly why a "restore" was refused rather than getting a corrupt
|
||||||
|
// DB at next boot.
|
||||||
|
//
|
||||||
|
// We use a *temp copy* for integrity_check because attaching the
|
||||||
|
// candidate read-only into the live process would still hold a file
|
||||||
|
// handle SQLite considers writable on Windows.
|
||||||
|
func (e *Engine) PrepareRestore(id string) (string, error) {
|
||||||
|
path, err := e.RestorePath(id)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("restore: stat candidate: %w", err)
|
||||||
|
}
|
||||||
|
if info.Size() < 100 {
|
||||||
|
return "", fmt.Errorf("restore: candidate %s is suspiciously small (%d bytes)", path, info.Size())
|
||||||
|
}
|
||||||
|
|
||||||
|
// SQLite file header: "SQLite format 3\x00" (16 bytes).
|
||||||
|
hdr, err := readHead(path, 16)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("restore: read header: %w", err)
|
||||||
|
}
|
||||||
|
if string(hdr) != "SQLite format 3\x00" {
|
||||||
|
return "", fmt.Errorf("restore: candidate %s is not a SQLite database (header mismatch)", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := integrityCheck(path); err != nil {
|
||||||
|
return "", fmt.Errorf("restore: integrity check failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readHead(path string, n int) ([]byte, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
buf := make([]byte, n)
|
||||||
|
// io.ReadFull (not f.Read) guarantees the buffer is filled.
|
||||||
|
// A bare Read can short-return on some filesystems / on small
|
||||||
|
// files, which would skew the SQLite-header magic check below.
|
||||||
|
if _, err := io.ReadFull(f, buf); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrityCheck opens the candidate read-only and runs
|
||||||
|
// `PRAGMA integrity_check`. We use immutable=1 so the driver does not
|
||||||
|
// try to create WAL/SHM sidecars or upgrade the journal mode on the
|
||||||
|
// candidate — both of which fail with "attempt to write a readonly
|
||||||
|
// database" against a backup file. Anything other than the single row
|
||||||
|
// `"ok"` is treated as corruption.
|
||||||
|
func integrityCheck(path string) error {
|
||||||
|
db, err := sql.Open("sqlite", "file:"+path+"?mode=ro&immutable=1")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open candidate: %w", err)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
|
||||||
|
rows, err := db.Query("PRAGMA integrity_check")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("pragma integrity_check: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
if !rows.Next() {
|
||||||
|
return fmt.Errorf("integrity_check returned no rows")
|
||||||
|
}
|
||||||
|
var result string
|
||||||
|
if err := rows.Scan(&result); err != nil {
|
||||||
|
return fmt.Errorf("scan integrity_check: %w", err)
|
||||||
|
}
|
||||||
|
if result != "ok" {
|
||||||
|
return fmt.Errorf("integrity_check: %s", result)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AtomicReplaceDB writes a backup candidate into place atomically.
|
||||||
|
// The caller is expected to:
|
||||||
|
// 1. Call PrepareRestore(id) → candidatePath.
|
||||||
|
// 2. Take a "pre-restore" backup of the current DB via CreateBackup.
|
||||||
|
// 3. Close the live *sql.DB.
|
||||||
|
// 4. Call AtomicReplaceDB(candidatePath, livePath).
|
||||||
|
// 5. Trigger graceful shutdown; main() will re-open on next start.
|
||||||
|
//
|
||||||
|
// AtomicReplaceDB also wipes WAL/SHM sidecar files so the new DB starts
|
||||||
|
// from a clean checkpoint state. Failure to remove sidecars is logged
|
||||||
|
// but non-fatal — SQLite recreates them on open.
|
||||||
|
func (e *Engine) AtomicReplaceDB(candidatePath, livePath string) error {
|
||||||
|
// Copy candidate to a tmp file next to the live DB, then rename
|
||||||
|
// atomically. On Windows os.Rename across volumes fails, so we
|
||||||
|
// keep tmp on the same dir as the destination.
|
||||||
|
tmp := livePath + ".restore.tmp"
|
||||||
|
if err := copyFile(candidatePath, tmp); err != nil {
|
||||||
|
return fmt.Errorf("copy candidate to %s: %w", tmp, err)
|
||||||
|
}
|
||||||
|
// Best-effort: remove WAL/SHM so SQLite re-checkpoints from the
|
||||||
|
// restored main file rather than a stale WAL pointing at the old
|
||||||
|
// DB's pages.
|
||||||
|
for _, sidecar := range []string{livePath + "-wal", livePath + "-shm"} {
|
||||||
|
if err := os.Remove(sidecar); err != nil && !os.IsNotExist(err) {
|
||||||
|
slog.Warn("restore: remove sidecar", "path", sidecar, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := os.Rename(tmp, livePath); err != nil {
|
||||||
|
// Clean up tmp on rename failure so we don't leak a partial file.
|
||||||
|
_ = os.Remove(tmp)
|
||||||
|
return fmt.Errorf("rename %s → %s: %w", tmp, livePath, err)
|
||||||
|
}
|
||||||
|
slog.Info("restore: database file replaced atomically", "live", livePath)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func copyFile(src, dst string) error {
|
||||||
|
in, err := os.Open(src)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer in.Close()
|
||||||
|
out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, err := io.Copy(out, in); err != nil {
|
||||||
|
_ = out.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return out.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// Prune removes old backups exceeding the retention count.
|
// Prune removes old backups exceeding the retention count.
|
||||||
// Returns the number of backups pruned.
|
// Returns the number of backups pruned.
|
||||||
func (e *Engine) Prune(retentionCount int) (int, error) {
|
func (e *Engine) Prune(retentionCount int) (int, error) {
|
||||||
|
|||||||
@@ -0,0 +1,113 @@
|
|||||||
|
package backup
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTestEngine spins up an isolated store + engine pair for tests.
|
||||||
|
// Each test gets its own tempdir so backup files do not collide.
|
||||||
|
func newTestEngine(t *testing.T) (*Engine, *store.Store, string) {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
dbPath := filepath.Join(dir, "tinyforge.db")
|
||||||
|
st, err := store.New(dbPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("store.New: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { _ = st.Close() })
|
||||||
|
|
||||||
|
eng, err := New(st, dbPath, dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("backup.New: %v", err)
|
||||||
|
}
|
||||||
|
return eng, st, dbPath
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrepareRestore_RejectsTinyFile(t *testing.T) {
|
||||||
|
eng, st, _ := newTestEngine(t)
|
||||||
|
|
||||||
|
// Plant a backup row with a tiny file masquerading as a backup.
|
||||||
|
tinyPath := filepath.Join(eng.BackupDir(), "tinyforge-manual-junk.db")
|
||||||
|
if err := os.WriteFile(tinyPath, []byte("hi"), 0o600); err != nil {
|
||||||
|
t.Fatalf("write tiny: %v", err)
|
||||||
|
}
|
||||||
|
bk, err := st.CreateBackup(store.Backup{
|
||||||
|
Filename: "tinyforge-manual-junk.db",
|
||||||
|
SizeBytes: 2,
|
||||||
|
BackupType: "manual",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateBackup row: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := eng.PrepareRestore(bk.ID); err == nil {
|
||||||
|
t.Fatal("expected PrepareRestore to reject tiny file, got nil")
|
||||||
|
} else if !strings.Contains(err.Error(), "suspiciously small") {
|
||||||
|
t.Errorf("error = %v, want 'suspiciously small'", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrepareRestore_RejectsNonSQLite(t *testing.T) {
|
||||||
|
eng, st, _ := newTestEngine(t)
|
||||||
|
|
||||||
|
// 200 bytes of non-SQLite garbage: passes the size check, fails
|
||||||
|
// the header magic check.
|
||||||
|
garbagePath := filepath.Join(eng.BackupDir(), "tinyforge-manual-bogus.db")
|
||||||
|
junk := make([]byte, 200)
|
||||||
|
for i := range junk {
|
||||||
|
junk[i] = byte('x')
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(garbagePath, junk, 0o600); err != nil {
|
||||||
|
t.Fatalf("write junk: %v", err)
|
||||||
|
}
|
||||||
|
bk, err := st.CreateBackup(store.Backup{
|
||||||
|
Filename: "tinyforge-manual-bogus.db",
|
||||||
|
SizeBytes: int64(len(junk)),
|
||||||
|
BackupType: "manual",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateBackup row: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := eng.PrepareRestore(bk.ID); err == nil {
|
||||||
|
t.Fatal("expected PrepareRestore to reject non-SQLite blob, got nil")
|
||||||
|
} else if !strings.Contains(err.Error(), "header") {
|
||||||
|
t.Errorf("error = %v, want header mismatch", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrepareRestore_AcceptsValidVacuumInto(t *testing.T) {
|
||||||
|
eng, _, _ := newTestEngine(t)
|
||||||
|
|
||||||
|
// A fresh CreateBackup from the engine itself is, by construction,
|
||||||
|
// a valid SQLite database — VACUUM INTO produces a clean copy.
|
||||||
|
bk, err := eng.CreateBackup("manual")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateBackup: %v", err)
|
||||||
|
}
|
||||||
|
path, err := eng.PrepareRestore(bk.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("PrepareRestore on valid backup: %v", err)
|
||||||
|
}
|
||||||
|
if path == "" {
|
||||||
|
t.Errorf("PrepareRestore returned empty path")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrepareRestore_UnknownID(t *testing.T) {
|
||||||
|
eng, _, _ := newTestEngine(t)
|
||||||
|
|
||||||
|
_, err := eng.PrepareRestore("nonexistent-id")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for unknown id, got nil")
|
||||||
|
}
|
||||||
|
if errors.Is(err, store.ErrNotFound) {
|
||||||
|
// fine — wrapped through RestorePath
|
||||||
|
}
|
||||||
|
}
|
||||||
+46
-10
@@ -10,11 +10,26 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ErrNoKey is returned when ENCRYPTION_KEY is not set.
|
// ErrNoKey is returned when ENCRYPTION_KEY is not set.
|
||||||
var ErrNoKey = errors.New("ENCRYPTION_KEY environment variable is not set")
|
var ErrNoKey = errors.New("ENCRYPTION_KEY environment variable is not set")
|
||||||
|
|
||||||
|
// ErrDecryptFailed wraps any cipher.Open / decoder failure. Callers
|
||||||
|
// upgrading from the silent-fallback pattern (treat-as-plaintext when
|
||||||
|
// decrypt errored) MUST instead surface this — a rotated key would
|
||||||
|
// otherwise silently leak ciphertext to upstream services as if it
|
||||||
|
// were plaintext.
|
||||||
|
var ErrDecryptFailed = errors.New("crypto: decrypt failed (wrong key, corrupted ciphertext, or unversioned legacy value)")
|
||||||
|
|
||||||
|
// envelopeV1Prefix tags ciphertext produced by Encrypt going forward.
|
||||||
|
// Older databases may carry unprefixed hex blobs from the v0 era; those
|
||||||
|
// are still readable via Decrypt for backward compatibility, but every
|
||||||
|
// new write goes through EncryptV1 and emits the prefix so a future key
|
||||||
|
// rotation has a clean fail-loud signal.
|
||||||
|
const envelopeV1Prefix = "tf1:"
|
||||||
|
|
||||||
// DeriveKey computes a 32-byte AES-256 key from the given passphrase using SHA-256.
|
// DeriveKey computes a 32-byte AES-256 key from the given passphrase using SHA-256.
|
||||||
// This is acceptable when ENCRYPTION_KEY is a high-entropy random string (e.g., 32+ hex chars).
|
// This is acceptable when ENCRYPTION_KEY is a high-entropy random string (e.g., 32+ hex chars).
|
||||||
// For human-chosen passphrases, consider Argon2id or PBKDF2 with a salt instead.
|
// For human-chosen passphrases, consider Argon2id or PBKDF2 with a salt instead.
|
||||||
@@ -35,7 +50,8 @@ func KeyFromEnv() ([32]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Encrypt encrypts plaintext using AES-256-GCM with a random nonce.
|
// Encrypt encrypts plaintext using AES-256-GCM with a random nonce.
|
||||||
// The returned ciphertext is hex-encoded: nonce || ciphertext+tag.
|
// Returns a versioned envelope (tf1:<hex>) so downstream readers can
|
||||||
|
// distinguish ciphertext from accidentally-stored plaintext.
|
||||||
func Encrypt(key [32]byte, plaintext string) (string, error) {
|
func Encrypt(key [32]byte, plaintext string) (string, error) {
|
||||||
block, err := aes.NewCipher(key[:])
|
block, err := aes.NewCipher(key[:])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -53,14 +69,34 @@ func Encrypt(key [32]byte, plaintext string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sealed := gcm.Seal(nonce, nonce, []byte(plaintext), nil)
|
sealed := gcm.Seal(nonce, nonce, []byte(plaintext), nil)
|
||||||
return hex.EncodeToString(sealed), nil
|
return envelopeV1Prefix + hex.EncodeToString(sealed), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrypt decrypts a hex-encoded ciphertext produced by Encrypt.
|
// HasEnvelope reports whether the value is a v1-prefixed ciphertext.
|
||||||
func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
|
// Useful for router-level "decrypt only if encrypted" decision points
|
||||||
data, err := hex.DecodeString(ciphertextHex)
|
// that previously relied on `err == nil` from a try-decrypt — that
|
||||||
|
// pattern silently masked rotated-key failures.
|
||||||
|
func HasEnvelope(value string) bool {
|
||||||
|
return strings.HasPrefix(value, envelopeV1Prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decrypt decrypts an envelope (tf1:<hex>). For backward compatibility
|
||||||
|
// it also accepts unprefixed hex from the v0 era — but only when the
|
||||||
|
// resulting plaintext is valid; a wrong key for legacy data now returns
|
||||||
|
// ErrDecryptFailed instead of silently treating ciphertext as
|
||||||
|
// plaintext.
|
||||||
|
//
|
||||||
|
// Callers MUST NOT swallow the error and fall back to "use as-is".
|
||||||
|
// That pattern is the exact footgun the envelope versioning removes.
|
||||||
|
func Decrypt(key [32]byte, ciphertext string) (string, error) {
|
||||||
|
hexBlob := ciphertext
|
||||||
|
if strings.HasPrefix(hexBlob, envelopeV1Prefix) {
|
||||||
|
hexBlob = hexBlob[len(envelopeV1Prefix):]
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := hex.DecodeString(hexBlob)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("decode hex: %w", err)
|
return "", fmt.Errorf("%w: decode hex: %v", ErrDecryptFailed, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
block, err := aes.NewCipher(key[:])
|
block, err := aes.NewCipher(key[:])
|
||||||
@@ -75,15 +111,15 @@ func Decrypt(key [32]byte, ciphertextHex string) (string, error) {
|
|||||||
|
|
||||||
nonceSize := gcm.NonceSize()
|
nonceSize := gcm.NonceSize()
|
||||||
if len(data) < nonceSize {
|
if len(data) < nonceSize {
|
||||||
return "", errors.New("ciphertext too short")
|
return "", fmt.Errorf("%w: ciphertext too short", ErrDecryptFailed)
|
||||||
}
|
}
|
||||||
|
|
||||||
nonce := data[:nonceSize]
|
nonce := data[:nonceSize]
|
||||||
ciphertext := data[nonceSize:]
|
body := data[nonceSize:]
|
||||||
|
|
||||||
plaintext, err := gcm.Open(nil, nonce, ciphertext, nil)
|
plaintext, err := gcm.Open(nil, nonce, body, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("decrypt: %w", err)
|
return "", fmt.Errorf("%w: %v", ErrDecryptFailed, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(plaintext), nil
|
return string(plaintext), nil
|
||||||
|
|||||||
@@ -34,7 +34,19 @@ type Deployer struct {
|
|||||||
dnsMu sync.RWMutex
|
dnsMu sync.RWMutex
|
||||||
dns dns.Provider // nil when wildcard DNS is active
|
dns dns.Provider // nil when wildcard DNS is active
|
||||||
|
|
||||||
|
// proxyMu protects hot-swap of d.proxy from runtime settings updates
|
||||||
|
// (SetProxyProvider) racing with PluginDeps() reads on the deploy path.
|
||||||
|
proxyMu sync.RWMutex
|
||||||
|
|
||||||
// Graceful shutdown: tracks in-progress deploys.
|
// Graceful shutdown: tracks in-progress deploys.
|
||||||
|
//
|
||||||
|
// drainMu serializes the "is-draining check + activeWg.Add(1)" in
|
||||||
|
// beginDispatch against the "set shuttingDown + Wait()" in Drain. Without
|
||||||
|
// it, a dispatch could pass the draining check, Drain could then flip the
|
||||||
|
// flag and start Wait() with a zero counter, and the dispatch could call
|
||||||
|
// Add(1) concurrently with Wait — a documented sync.WaitGroup misuse
|
||||||
|
// (panic risk) that also lets a deploy slip past the drain barrier.
|
||||||
|
drainMu sync.Mutex
|
||||||
activeWg sync.WaitGroup
|
activeWg sync.WaitGroup
|
||||||
shuttingDown atomic.Bool
|
shuttingDown atomic.Bool
|
||||||
}
|
}
|
||||||
@@ -73,7 +85,11 @@ func New(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// SetProxyProvider updates the proxy provider at runtime (e.g., when settings change).
|
// SetProxyProvider updates the proxy provider at runtime (e.g., when settings change).
|
||||||
|
// Guarded by proxyMu so concurrent deploys that read d.proxy via PluginDeps()
|
||||||
|
// observe a coherent value (previously a torn-pointer race under -race).
|
||||||
func (d *Deployer) SetProxyProvider(provider proxy.Provider) {
|
func (d *Deployer) SetProxyProvider(provider proxy.Provider) {
|
||||||
|
d.proxyMu.Lock()
|
||||||
|
defer d.proxyMu.Unlock()
|
||||||
d.proxy = provider
|
d.proxy = provider
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,20 +100,34 @@ func (d *Deployer) SetPreDeployBackuper(b PreDeployBackuper) {
|
|||||||
d.backuper = b
|
d.backuper = b
|
||||||
}
|
}
|
||||||
|
|
||||||
// MaybeBackupBeforeDeploy creates a "pre-deploy" Tinyforge DB snapshot when
|
// maybeBackupBeforeDeploy takes a "pre-deploy" Tinyforge DB snapshot before a
|
||||||
// the setting is enabled. Failures are logged but do not abort the deploy:
|
// deploy when the operator enabled auto_backup_before_deploy. It is called on
|
||||||
// missing a backup is preferable to refusing to ship a fix. Exposed so
|
// the unified deploy path (DispatchPlugin) so the setting actually fires — its
|
||||||
// Source plugins can opt into the same behaviour.
|
// predecessor was orphaned when the legacy executeDeploy pipeline (its only
|
||||||
func (d *Deployer) MaybeBackupBeforeDeploy(deployID string, settings store.Settings) {
|
// caller) was removed in the workload-first cutover, silently disabling the
|
||||||
if !settings.AutoBackupBeforeDeploy || d.backuper == nil {
|
// setting.
|
||||||
|
//
|
||||||
|
// Fail-open: a nil backuper, a settings-load error, or a backup failure all
|
||||||
|
// skip the snapshot without blocking the deploy — missing a backup is
|
||||||
|
// preferable to refusing to ship a fix.
|
||||||
|
func (d *Deployer) maybeBackupBeforeDeploy(workloadID string) {
|
||||||
|
if d.backuper == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
settings, err := d.store.GetSettings()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("pre-deploy backup: load settings", "workload", workloadID, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !settings.AutoBackupBeforeDeploy {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
backup, err := d.backuper.CreateBackup("pre-deploy")
|
backup, err := d.backuper.CreateBackup("pre-deploy")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("pre-deploy backup failed", "deploy_id", deployID, "error", err)
|
slog.Warn("pre-deploy backup failed", "workload", workloadID, "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
slog.Info("pre-deploy backup created", "deploy_id", deployID, "backup_id", backup.ID, "filename", backup.Filename)
|
slog.Info("pre-deploy backup created", "workload", workloadID, "backup_id", backup.ID, "filename", backup.Filename)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetDNSProvider sets the DNS provider for managing DNS records during deployments.
|
// SetDNSProvider sets the DNS provider for managing DNS records during deployments.
|
||||||
@@ -110,8 +140,11 @@ func (d *Deployer) SetDNSProvider(provider dns.Provider) {
|
|||||||
|
|
||||||
// Drain waits for all in-progress deploys to complete. Call this during graceful shutdown.
|
// Drain waits for all in-progress deploys to complete. Call this during graceful shutdown.
|
||||||
func (d *Deployer) Drain() {
|
func (d *Deployer) Drain() {
|
||||||
if !d.shuttingDown.CompareAndSwap(false, true) {
|
d.drainMu.Lock()
|
||||||
// Already draining.
|
already := d.shuttingDown.Swap(true)
|
||||||
|
d.drainMu.Unlock()
|
||||||
|
if already {
|
||||||
|
slog.Info("deployer: drain already in progress")
|
||||||
}
|
}
|
||||||
slog.Info("deployer: draining in-progress deploys")
|
slog.Info("deployer: draining in-progress deploys")
|
||||||
d.activeWg.Wait()
|
d.activeWg.Wait()
|
||||||
@@ -121,11 +154,17 @@ func (d *Deployer) Drain() {
|
|||||||
// ShuttingDown reports whether Drain() has been called.
|
// ShuttingDown reports whether Drain() has been called.
|
||||||
func (d *Deployer) ShuttingDown() bool { return d.shuttingDown.Load() }
|
func (d *Deployer) ShuttingDown() bool { return d.shuttingDown.Load() }
|
||||||
|
|
||||||
// rejectIfDraining is exposed in case any plugin wants the same hard-stop
|
// beginDispatch atomically rejects when draining and otherwise registers the
|
||||||
// behaviour the legacy pipeline used.
|
// in-flight unit on activeWg. The shuttingDown check and the Add(1) MUST be
|
||||||
func (d *Deployer) rejectIfDraining() error {
|
// done together under drainMu (see the field comment): Drain sets the flag
|
||||||
|
// under the same mutex before Wait(), so once Wait() observes a zero counter
|
||||||
|
// no further Add can race it. Callers must defer d.activeWg.Done() on success.
|
||||||
|
func (d *Deployer) beginDispatch() error {
|
||||||
|
d.drainMu.Lock()
|
||||||
|
defer d.drainMu.Unlock()
|
||||||
if d.shuttingDown.Load() {
|
if d.shuttingDown.Load() {
|
||||||
return fmt.Errorf("deployer is shutting down, rejecting new deploy")
|
return fmt.Errorf("deployer is shutting down, rejecting new deploy")
|
||||||
}
|
}
|
||||||
|
d.activeWg.Add(1)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,26 +4,52 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DispatchPlugin routes a DeploymentIntent for w to the matching Source
|
// DispatchPlugin routes a DeploymentIntent for w to the matching Source
|
||||||
// plugin. This is the new unified deploy path; the legacy executeDeploy
|
// plugin. This is the unified deploy path for every source kind (the legacy
|
||||||
// remains in place until Phase 6 ports image-deploy logic into
|
// executeDeploy pipeline was removed in the workload-first cutover). When the
|
||||||
// source/image. While both exist, callers must pick: webhook/registry
|
// operator enables auto_backup_before_deploy, a pre-deploy Tinyforge DB
|
||||||
// triggers + image deploys still go through the legacy path, while
|
// snapshot is taken here, after the source resolves and before it runs.
|
||||||
// /api/hooks/generic + the unified webhook ingress go through here.
|
|
||||||
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
func (d *Deployer) DispatchPlugin(ctx context.Context, w plugin.Workload, intent plugin.DeploymentIntent) error {
|
||||||
|
if err := d.beginDispatch(); err != nil {
|
||||||
|
metrics.DeploysTotal.Inc(w.SourceKind, "rejected_draining")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer d.activeWg.Done()
|
||||||
src, err := plugin.GetSource(w.SourceKind)
|
src, err := plugin.GetSource(w.SourceKind)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Unknown source: use the constant "unknown" sentinel for the
|
||||||
|
// label so a typo-spam attack can't grow the metrics map with
|
||||||
|
// one series per bogus source_kind. The actual user-supplied
|
||||||
|
// value still surfaces via the wrapped error / event log.
|
||||||
|
metrics.DeploysTotal.Inc("unknown", "unknown_source")
|
||||||
return fmt.Errorf("dispatch %s: %w", w.Name, err)
|
return fmt.Errorf("dispatch %s: %w", w.Name, err)
|
||||||
}
|
}
|
||||||
return src.Deploy(ctx, d.PluginDeps(), w, intent)
|
// Optional operator-enabled pre-deploy DB snapshot. Fail-open: never
|
||||||
|
// blocks shipping a deploy. Runs before any source-internal idempotency
|
||||||
|
// check (e.g. the image source's same-tag short-circuit), so a same-tag
|
||||||
|
// redeploy still snapshots — "backup before every deploy attempt".
|
||||||
|
d.maybeBackupBeforeDeploy(w.ID)
|
||||||
|
err = src.Deploy(ctx, d.PluginDeps(), w, intent)
|
||||||
|
outcome := "success"
|
||||||
|
if err != nil {
|
||||||
|
outcome = "failure"
|
||||||
|
}
|
||||||
|
metrics.DeploysTotal.Inc(w.SourceKind, outcome)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// DispatchTeardown routes a teardown call to the matching Source plugin.
|
// DispatchTeardown routes a teardown call to the matching Source plugin.
|
||||||
// Used when a workload is deleted.
|
// Used when a workload is deleted. Tracked via activeWg so Drain() honours
|
||||||
|
// in-progress teardowns just like deploys.
|
||||||
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
|
func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) error {
|
||||||
|
if err := d.beginDispatch(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer d.activeWg.Done()
|
||||||
src, err := plugin.GetSource(w.SourceKind)
|
src, err := plugin.GetSource(w.SourceKind)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("dispatch teardown %s: %w", w.Name, err)
|
return fmt.Errorf("dispatch teardown %s: %w", w.Name, err)
|
||||||
@@ -33,8 +59,17 @@ func (d *Deployer) DispatchTeardown(ctx context.Context, w plugin.Workload) erro
|
|||||||
|
|
||||||
// DispatchReconcile routes a Reconcile call. Periodic reconciler iterates
|
// DispatchReconcile routes a Reconcile call. Periodic reconciler iterates
|
||||||
// every Workload and calls this; idle Sources should make it a cheap
|
// every Workload and calls this; idle Sources should make it a cheap
|
||||||
// no-op.
|
// no-op. Tracked via activeWg so a long-running reconcile blocks Drain().
|
||||||
func (d *Deployer) DispatchReconcile(ctx context.Context, w plugin.Workload) error {
|
func (d *Deployer) DispatchReconcile(ctx context.Context, w plugin.Workload) error {
|
||||||
|
if err := d.beginDispatch(); err != nil {
|
||||||
|
// Silent skip — reconcile is a periodic tick, not a user-initiated
|
||||||
|
// action, so we don't want to surface "draining" errors back to the
|
||||||
|
// reconciler loop. The next tick after restart will catch up. Routing
|
||||||
|
// through beginDispatch keeps the activeWg.Add atomic with the drain
|
||||||
|
// check (see Drain) instead of a bare shuttingDown.Load + Add race.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer d.activeWg.Done()
|
||||||
src, err := plugin.GetSource(w.SourceKind)
|
src, err := plugin.GetSource(w.SourceKind)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("dispatch reconcile %s: %w", w.Name, err)
|
return fmt.Errorf("dispatch reconcile %s: %w", w.Name, err)
|
||||||
@@ -52,10 +87,13 @@ func (d *Deployer) PluginDeps() plugin.Deps {
|
|||||||
d.dnsMu.RLock()
|
d.dnsMu.RLock()
|
||||||
dnsProvider := d.dns
|
dnsProvider := d.dns
|
||||||
d.dnsMu.RUnlock()
|
d.dnsMu.RUnlock()
|
||||||
|
d.proxyMu.RLock()
|
||||||
|
proxyProvider := d.proxy
|
||||||
|
d.proxyMu.RUnlock()
|
||||||
return plugin.Deps{
|
return plugin.Deps{
|
||||||
Store: d.store,
|
Store: d.store,
|
||||||
Docker: d.docker,
|
Docker: d.docker,
|
||||||
Proxy: d.proxy,
|
Proxy: proxyProvider,
|
||||||
DNS: dnsProvider,
|
DNS: dnsProvider,
|
||||||
Health: d.health,
|
Health: d.health,
|
||||||
Notifier: d.notifier,
|
Notifier: d.notifier,
|
||||||
|
|||||||
@@ -0,0 +1,107 @@
|
|||||||
|
package deployer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeBackuper records pre-deploy backup calls so the dispatch wiring can be
|
||||||
|
// asserted. err (when set) simulates a backup failure.
|
||||||
|
type fakeBackuper struct {
|
||||||
|
count atomic.Int32
|
||||||
|
lastType atomic.Value // string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeBackuper) CreateBackup(backupType string) (store.Backup, error) {
|
||||||
|
f.count.Add(1)
|
||||||
|
f.lastType.Store(backupType)
|
||||||
|
if f.err != nil {
|
||||||
|
return store.Backup{}, f.err
|
||||||
|
}
|
||||||
|
return store.Backup{ID: "b1", Filename: "tinyforge-pre-deploy.db"}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func setAutoBackup(t *testing.T, d *Deployer, enabled bool) {
|
||||||
|
t.Helper()
|
||||||
|
s, err := d.store.GetSettings()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get settings: %v", err)
|
||||||
|
}
|
||||||
|
s.AutoBackupBeforeDeploy = enabled
|
||||||
|
if err := d.store.UpdateSettings(s); err != nil {
|
||||||
|
t.Fatalf("update settings: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regression: the pre-deploy backup hook was orphaned after the cutover (no
|
||||||
|
// caller on DispatchPlugin), making auto_backup_before_deploy a silent no-op.
|
||||||
|
func TestDispatchPlugin_PreDeployBackup_FiresWhenEnabled(t *testing.T) {
|
||||||
|
resetFake(t)
|
||||||
|
d := newTestDeployer(t)
|
||||||
|
b := &fakeBackuper{}
|
||||||
|
d.SetPreDeployBackuper(b)
|
||||||
|
setAutoBackup(t, d, true)
|
||||||
|
|
||||||
|
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{}); err != nil {
|
||||||
|
t.Fatalf("dispatch: %v", err)
|
||||||
|
}
|
||||||
|
if got := b.count.Load(); got != 1 {
|
||||||
|
t.Fatalf("CreateBackup called %d times, want 1", got)
|
||||||
|
}
|
||||||
|
if bt, _ := b.lastType.Load().(string); bt != "pre-deploy" {
|
||||||
|
t.Fatalf("backup type = %q, want pre-deploy", bt)
|
||||||
|
}
|
||||||
|
if got := dispatchTestSource.deployCount.Load(); got != 1 {
|
||||||
|
t.Fatalf("Deploy ran %d times, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDispatchPlugin_PreDeployBackup_SkippedWhenDisabled(t *testing.T) {
|
||||||
|
resetFake(t)
|
||||||
|
d := newTestDeployer(t)
|
||||||
|
b := &fakeBackuper{}
|
||||||
|
d.SetPreDeployBackuper(b)
|
||||||
|
setAutoBackup(t, d, false)
|
||||||
|
|
||||||
|
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{}); err != nil {
|
||||||
|
t.Fatalf("dispatch: %v", err)
|
||||||
|
}
|
||||||
|
if got := b.count.Load(); got != 0 {
|
||||||
|
t.Fatalf("CreateBackup called %d times, want 0 (setting off)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDispatchPlugin_PreDeployBackup_NilBackuperNoPanic(t *testing.T) {
|
||||||
|
resetFake(t)
|
||||||
|
d := newTestDeployer(t)
|
||||||
|
setAutoBackup(t, d, true) // enabled, but no backuper wired
|
||||||
|
|
||||||
|
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{}); err != nil {
|
||||||
|
t.Fatalf("dispatch must not panic/fail with a nil backuper: %v", err)
|
||||||
|
}
|
||||||
|
if got := dispatchTestSource.deployCount.Load(); got != 1 {
|
||||||
|
t.Fatalf("Deploy ran %d times, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDispatchPlugin_PreDeployBackup_FailOpen(t *testing.T) {
|
||||||
|
resetFake(t)
|
||||||
|
d := newTestDeployer(t)
|
||||||
|
b := &fakeBackuper{err: errors.New("disk full")}
|
||||||
|
d.SetPreDeployBackuper(b)
|
||||||
|
setAutoBackup(t, d, true)
|
||||||
|
|
||||||
|
// A failed backup is logged but must NOT block the deploy.
|
||||||
|
if err := d.DispatchPlugin(context.Background(), sampleWorkload(), plugin.DeploymentIntent{}); err != nil {
|
||||||
|
t.Fatalf("deploy must succeed when backup fails (fail-open): %v", err)
|
||||||
|
}
|
||||||
|
if got := dispatchTestSource.deployCount.Load(); got != 1 {
|
||||||
|
t.Fatalf("Deploy ran %d times, want 1 (despite backup failure)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
+119
-20
@@ -2,20 +2,58 @@ package docker
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"archive/tar"
|
"archive/tar"
|
||||||
|
"bufio"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/moby/moby/api/types/build"
|
||||||
"github.com/moby/moby/client"
|
"github.com/moby/moby/client"
|
||||||
)
|
)
|
||||||
|
|
||||||
// BuildImage builds a Docker image from a directory containing a Dockerfile.
|
// BuildImage builds a Docker image from a directory containing a Dockerfile
|
||||||
// The directory is packaged as a tar archive and sent to the Docker daemon.
|
// at the context root. Kept as a thin wrapper around BuildImageAt for the
|
||||||
// The tag parameter is the image name:tag to apply (e.g., "dw-site-myapp:latest").
|
// static-site plugin which always emits its generated Dockerfile at the
|
||||||
|
// context root. New code should prefer BuildImageAt so the Dockerfile path
|
||||||
|
// is explicit.
|
||||||
func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
||||||
|
return c.BuildImageAt(ctx, contextDir, "Dockerfile", tag, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildImageAt builds a Docker image from a tar of contextDir, using the
|
||||||
|
// Dockerfile at `dockerfile` *inside* the context (typically "Dockerfile"
|
||||||
|
// but may be e.g. "docker/Dockerfile" when the user-supplied repo layout
|
||||||
|
// keeps Dockerfiles in a subfolder).
|
||||||
|
//
|
||||||
|
// The dockerfile argument is the path *relative to contextDir*. Empty
|
||||||
|
// strings are normalised to "Dockerfile" so callers can pass through a
|
||||||
|
// user config value without sanitising twice.
|
||||||
|
//
|
||||||
|
// logFn, if non-nil, is invoked for every non-empty `stream` line the
|
||||||
|
// daemon emits during the build. Callers use this to forward live build
|
||||||
|
// progress (e.g. SSE bus). Errors from the daemon are NOT delivered via
|
||||||
|
// logFn — they surface as the returned error so the caller's failure
|
||||||
|
// path stays the single source of truth.
|
||||||
|
func (c *Client) BuildImageAt(ctx context.Context, contextDir, dockerfile, tag string, logFn func(line string)) error {
|
||||||
|
if dockerfile == "" {
|
||||||
|
dockerfile = "Dockerfile"
|
||||||
|
}
|
||||||
|
// Normalise to forward slashes — the tar entry names use them and the
|
||||||
|
// Docker daemon expects the same.
|
||||||
|
dockerfile = filepath.ToSlash(dockerfile)
|
||||||
|
// Defence-in-depth: the dockerfile path is relative to contextDir and
|
||||||
|
// is increasingly user/config-supplied (subfolder Dockerfiles). Reject
|
||||||
|
// absolute paths and any `..` traversal at the boundary so a value like
|
||||||
|
// "../../etc/passwd" can never be handed to the daemon's build options,
|
||||||
|
// regardless of which builder backend resolves it.
|
||||||
|
if filepath.IsAbs(dockerfile) || strings.HasPrefix(dockerfile, "/") ||
|
||||||
|
dockerfile == ".." || strings.HasPrefix(dockerfile, "../") || strings.Contains(dockerfile, "/../") {
|
||||||
|
return fmt.Errorf("docker build: invalid dockerfile path %q (must be relative to the build context, no traversal)", dockerfile)
|
||||||
|
}
|
||||||
// Create tar archive of the build context.
|
// Create tar archive of the build context.
|
||||||
pr, pw := io.Pipe()
|
pr, pw := io.Pipe()
|
||||||
|
|
||||||
@@ -50,16 +88,14 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
file, err := os.Open(path)
|
// Per-file close, NOT defer. `defer file.Close()` inside the
|
||||||
if err != nil {
|
// WalkFunc only runs when the outer goroutine returns — for a
|
||||||
return fmt.Errorf("open %s: %w", path, err)
|
// build context with thousands of files (node_modules-heavy
|
||||||
|
// repo) that leaks one fd per file until the walk completes
|
||||||
|
// and trips EMFILE on default ulimit=1024 systems.
|
||||||
|
if err := streamFileIntoTar(tw, path, relPath); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
if _, err := io.Copy(tw, file); err != nil {
|
|
||||||
return fmt.Errorf("copy %s to tar: %w", relPath, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -69,8 +105,16 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
|||||||
pw.CloseWithError(err)
|
pw.CloseWithError(err)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
// Pin the legacy builder explicitly. On Docker Engine 23+ BuildKit
|
||||||
|
// is the default for the CLI, but the daemon honours the explicit
|
||||||
|
// Version field on ImageBuildOptions. Legacy builder does NOT support
|
||||||
|
// `RUN --mount=type=bind,source=/host` so a malicious Dockerfile
|
||||||
|
// cannot mount host paths into the build context. Switching to
|
||||||
|
// BuildKit later requires (a) Dockerfile-content validation to
|
||||||
|
// reject bind-mount hints, or (b) an explicit per-workload opt-in.
|
||||||
resp, err := c.api.ImageBuild(ctx, pr, client.ImageBuildOptions{
|
resp, err := c.api.ImageBuild(ctx, pr, client.ImageBuildOptions{
|
||||||
Dockerfile: "Dockerfile",
|
Version: build.BuilderV1,
|
||||||
|
Dockerfile: dockerfile,
|
||||||
Tags: []string{tag},
|
Tags: []string{tag},
|
||||||
Remove: true,
|
Remove: true,
|
||||||
ForceRemove: true,
|
ForceRemove: true,
|
||||||
@@ -80,16 +124,71 @@ func (c *Client) BuildImage(ctx context.Context, contextDir, tag string) error {
|
|||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
// Read the build output to completion (required for the build to finish).
|
// Drain the daemon's NDJSON stream to completion. The stream MUST
|
||||||
output, err := io.ReadAll(resp.Body)
|
// be read for the build to finish — closing the body early aborts
|
||||||
if err != nil {
|
// the build. We parse line-by-line into the {Stream, Error} shape
|
||||||
|
// the daemon emits so an honest `{"error":"..."}` line surfaces
|
||||||
|
// without false positives from informational `{"stream":"error
|
||||||
|
// handling: retrying..."}` chatter that the old strings.Contains
|
||||||
|
// path would have flagged.
|
||||||
|
type buildLine struct {
|
||||||
|
Stream string `json:"stream,omitempty"`
|
||||||
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
scanner := bufio.NewScanner(resp.Body)
|
||||||
|
// Some build steps emit single lines exceeding the default 64 KiB
|
||||||
|
// (e.g. a fat go-mod-download dump). Bump to 1 MiB so we don't
|
||||||
|
// silently truncate and miss the trailing error line.
|
||||||
|
scanner.Buffer(make([]byte, 64*1024), 1024*1024)
|
||||||
|
var firstErr string
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Bytes()
|
||||||
|
if len(line) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var bl buildLine
|
||||||
|
if err := json.Unmarshal(line, &bl); err != nil {
|
||||||
|
// Non-JSON line — daemon shouldn't produce these, but
|
||||||
|
// don't fail the build over a parse hiccup.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if bl.Error != "" && firstErr == "" {
|
||||||
|
firstErr = bl.Error
|
||||||
|
}
|
||||||
|
if logFn != nil && bl.Stream != "" {
|
||||||
|
logFn(bl.Stream)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
return fmt.Errorf("read build output for %s: %w", tag, err)
|
return fmt.Errorf("read build output for %s: %w", tag, err)
|
||||||
}
|
}
|
||||||
|
if firstErr != "" {
|
||||||
// Check for error in build output.
|
return fmt.Errorf("build image %s: %s", tag, firstErr)
|
||||||
if strings.Contains(string(output), `"error"`) {
|
|
||||||
return fmt.Errorf("build image %s: build errors in output", tag)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// streamFileIntoTar opens path, copies its contents into the tar writer
|
||||||
|
// under the given relPath header, and closes the file *before returning*
|
||||||
|
// — i.e. once per file, not deferred to the end of the entire walk.
|
||||||
|
// Extracted so the per-iteration close discipline is obvious at the
|
||||||
|
// callsite and the file handle isn't accidentally hoisted into the
|
||||||
|
// caller's defer stack via a future refactor.
|
||||||
|
func streamFileIntoTar(tw *tar.Writer, path, relPath string) error {
|
||||||
|
file, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open %s: %w", path, err)
|
||||||
|
}
|
||||||
|
_, copyErr := io.Copy(tw, file)
|
||||||
|
// Close BEFORE returning so the fd is released even on copy
|
||||||
|
// failure. Capture both errors so the more-specific copy error
|
||||||
|
// wins when both fire.
|
||||||
|
if cerr := file.Close(); cerr != nil && copyErr == nil {
|
||||||
|
copyErr = cerr
|
||||||
|
}
|
||||||
|
if copyErr != nil {
|
||||||
|
return fmt.Errorf("copy %s to tar: %w", relPath, copyErr)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -108,3 +108,29 @@ func (c *Client) GetSystemStats(ctx context.Context) (SystemStats, error) {
|
|||||||
|
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BuildCachePruneResult reports the outcome of a build-cache prune.
|
||||||
|
type BuildCachePruneResult struct {
|
||||||
|
CachesDeleted int `json:"caches_deleted"` // number of cache records removed
|
||||||
|
SpaceReclaimed int64 `json:"space_reclaimed"` // bytes reclaimed
|
||||||
|
}
|
||||||
|
|
||||||
|
// PruneBuildCache deletes unused Docker build-cache records and returns the
|
||||||
|
// number of records removed and bytes reclaimed. Docker's build-cache API is
|
||||||
|
// prune-by-filter only — there is no surgical per-record eviction — so this
|
||||||
|
// is the daemon-wide "prune unused" operation.
|
||||||
|
//
|
||||||
|
// When all is false (the default), only build cache not currently in use is
|
||||||
|
// removed, so an app's next rebuild still hits its warm cache. When all is
|
||||||
|
// true, every build-cache record is removed regardless of use, forcing a cold
|
||||||
|
// rebuild for every app.
|
||||||
|
func (c *Client) PruneBuildCache(ctx context.Context, all bool) (BuildCachePruneResult, error) {
|
||||||
|
res, err := c.api.BuildCachePrune(ctx, client.BuildCachePruneOptions{All: all})
|
||||||
|
if err != nil {
|
||||||
|
return BuildCachePruneResult{}, fmt.Errorf("prune build cache: %w", err)
|
||||||
|
}
|
||||||
|
return BuildCachePruneResult{
|
||||||
|
CachesDeleted: len(res.Report.CachesDeleted),
|
||||||
|
SpaceReclaimed: int64(res.Report.SpaceReclaimed),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -27,6 +27,13 @@ const (
|
|||||||
|
|
||||||
// EventStackStatus is emitted when a compose stack status changes.
|
// EventStackStatus is emitted when a compose stack status changes.
|
||||||
EventStackStatus EventType = "stack_status"
|
EventStackStatus EventType = "stack_status"
|
||||||
|
|
||||||
|
// EventBuildLog is emitted for each line of a streaming image build.
|
||||||
|
// Per-line events are ephemeral (not persisted to the event_log) — they
|
||||||
|
// exist to drive a live tail UI during the slow "building" phase of a
|
||||||
|
// dockerfile-source deploy. Subscribers should filter by WorkloadID
|
||||||
|
// because every dockerfile deploy on the box publishes on the same bus.
|
||||||
|
EventBuildLog EventType = "build_log"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Event is a single event published on the bus.
|
// Event is a single event published on the bus.
|
||||||
@@ -64,6 +71,7 @@ type DeployStatusPayload struct {
|
|||||||
type EventLogPayload struct {
|
type EventLogPayload struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
Severity string `json:"severity"`
|
Severity string `json:"severity"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
Metadata string `json:"metadata"`
|
Metadata string `json:"metadata"`
|
||||||
@@ -77,6 +85,14 @@ type StaticSiteStatusPayload struct {
|
|||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BuildLogPayload is the payload for EventBuildLog events. One event
|
||||||
|
// per non-empty line read off the daemon's NDJSON build stream.
|
||||||
|
type BuildLogPayload struct {
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Line string `json:"line"`
|
||||||
|
Stream string `json:"stream,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// StackStatusPayload is the payload for EventStackStatus events.
|
// StackStatusPayload is the payload for EventStackStatus events.
|
||||||
type StackStatusPayload struct {
|
type StackStatusPayload struct {
|
||||||
StackID string `json:"stack_id"`
|
StackID string `json:"stack_id"`
|
||||||
|
|||||||
@@ -0,0 +1,349 @@
|
|||||||
|
// Package metricalert implements a background goroutine that
|
||||||
|
// periodically evaluates operator-configured metric-threshold rules
|
||||||
|
// against recent container stats samples. On breach (subject to a
|
||||||
|
// per-rule-per-workload cooldown) it emits an event into the existing
|
||||||
|
// event_log + event-bus pipeline — the same fan-out used by the
|
||||||
|
// log-scanner — instead of building any new notification plumbing.
|
||||||
|
package metricalert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/events"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// EvalInterval is how often the evaluator tick fires.
|
||||||
|
const EvalInterval = 30 * time.Second
|
||||||
|
|
||||||
|
// lookbackSeconds bounds how far back we pull samples each tick. Stats
|
||||||
|
// are collected at most every few seconds (see internal/stats), so a
|
||||||
|
// 120s window comfortably captures the latest reading per container
|
||||||
|
// even if collection briefly stalls.
|
||||||
|
const lookbackSeconds = 120
|
||||||
|
|
||||||
|
// RuleSource is the read-side seam for fetching the current rule rows.
|
||||||
|
// Real callers pass *store.Store; tests pass a fake.
|
||||||
|
type RuleSource interface {
|
||||||
|
ListMetricAlertRules() ([]store.MetricAlertRule, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SampleSource fetches the recent container stats samples to evaluate.
|
||||||
|
type SampleSource interface {
|
||||||
|
ListAllRecentContainerStatsSamples(sinceTS int64) ([]store.ContainerStatsSample, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EventSink writes a breach into event_log.
|
||||||
|
type EventSink interface {
|
||||||
|
InsertEvent(store.EventLog) (store.EventLog, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Publisher fans the breach out on the event bus. Matches *events.Bus.
|
||||||
|
type Publisher interface {
|
||||||
|
Publish(events.Event)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source identifies metric-alert events in event_log + the bus.
|
||||||
|
const eventSource = "metric_alert"
|
||||||
|
|
||||||
|
// Manager owns the evaluation loop lifecycle. It mirrors
|
||||||
|
// stats.Collector: a once-guarded Start/Stop pair with stop/done
|
||||||
|
// channels and a single-goroutine run loop.
|
||||||
|
type Manager struct {
|
||||||
|
rules RuleSource
|
||||||
|
samples SampleSource
|
||||||
|
sink EventSink
|
||||||
|
pub Publisher
|
||||||
|
|
||||||
|
// now is swappable in tests so cooldown windows can be exercised
|
||||||
|
// deterministically. Defaults to time.Now.
|
||||||
|
now func() time.Time
|
||||||
|
|
||||||
|
// mu guards lastFired. The run loop is single-goroutine today, but
|
||||||
|
// Start/Stop and a future ReloadRules may touch shared state; the
|
||||||
|
// mutex is cheap insurance.
|
||||||
|
mu sync.Mutex
|
||||||
|
lastFired map[string]time.Time // "ruleID:ownerID" -> last emit time
|
||||||
|
|
||||||
|
startOnce sync.Once
|
||||||
|
stopOnce sync.Once
|
||||||
|
started bool
|
||||||
|
stop chan struct{}
|
||||||
|
done chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// New wires a manager with the supplied dependencies. Call Start to
|
||||||
|
// begin evaluating.
|
||||||
|
func New(rules RuleSource, samples SampleSource, sink EventSink, pub Publisher) *Manager {
|
||||||
|
return &Manager{
|
||||||
|
rules: rules,
|
||||||
|
samples: samples,
|
||||||
|
sink: sink,
|
||||||
|
pub: pub,
|
||||||
|
now: time.Now,
|
||||||
|
lastFired: map[string]time.Time{},
|
||||||
|
stop: make(chan struct{}),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start launches the background loop. Returns immediately. The loop
|
||||||
|
// exits when Stop is called. Safe to call multiple times — only the
|
||||||
|
// first call has an effect.
|
||||||
|
func (m *Manager) Start() {
|
||||||
|
m.startOnce.Do(func() {
|
||||||
|
m.started = true
|
||||||
|
go m.run()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop signals the loop to exit and blocks until it has finished the
|
||||||
|
// in-flight tick. If Start was never called, Stop returns immediately.
|
||||||
|
func (m *Manager) Stop() {
|
||||||
|
m.stopOnce.Do(func() {
|
||||||
|
close(m.stop)
|
||||||
|
if !m.started {
|
||||||
|
close(m.done)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
<-m.done
|
||||||
|
}
|
||||||
|
|
||||||
|
// run is the main loop. It evaluates once shortly after start, then on
|
||||||
|
// every EvalInterval tick, until Stop is called.
|
||||||
|
func (m *Manager) run() {
|
||||||
|
defer close(m.done)
|
||||||
|
|
||||||
|
// Settle delay so the app + first stats samples exist before the
|
||||||
|
// first evaluation.
|
||||||
|
select {
|
||||||
|
case <-time.After(3 * time.Second):
|
||||||
|
case <-m.stop:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(EvalInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
m.evaluate(m.now())
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-m.stop:
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
m.evaluate(m.now())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// evaluate runs one pass: load rules + recent samples, reduce to the
|
||||||
|
// freshest sample per (owner, container), and emit on breach subject to
|
||||||
|
// cooldown. Best-effort throughout — a bad rule or sample never crashes
|
||||||
|
// the loop.
|
||||||
|
func (m *Manager) evaluate(now time.Time) {
|
||||||
|
rules, err := m.rules.ListMetricAlertRules()
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("metricalert: list rules", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(rules) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
since := now.Unix() - lookbackSeconds
|
||||||
|
samples, err := m.samples.ListAllRecentContainerStatsSamples(since)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("metricalert: list samples", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
latest := latestPerContainer(samples)
|
||||||
|
if len(latest) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, rule := range rules {
|
||||||
|
if !rule.Enabled {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, sample := range latest {
|
||||||
|
// Per-workload rules only match their workload; "" matches all.
|
||||||
|
if rule.WorkloadID != "" && rule.WorkloadID != sample.OwnerID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, ok := metricValue(rule.Metric, sample)
|
||||||
|
if !ok {
|
||||||
|
continue // e.g. memory_percent with a zero limit
|
||||||
|
}
|
||||||
|
if !breached(rule.Comparator, value, rule.Threshold) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if m.coolingDown(rule, sample.OwnerID, now) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.emit(rule, sample, value)
|
||||||
|
m.recordFire(rule, sample.OwnerID, now)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// latestPerContainer keeps only the most recent sample per
|
||||||
|
// (OwnerID, ContainerID), so each container is judged on its freshest
|
||||||
|
// reading rather than every historical row in the window.
|
||||||
|
func latestPerContainer(samples []store.ContainerStatsSample) []store.ContainerStatsSample {
|
||||||
|
newest := map[string]store.ContainerStatsSample{}
|
||||||
|
for _, s := range samples {
|
||||||
|
key := s.OwnerID + "\x00" + s.ContainerID
|
||||||
|
if prev, ok := newest[key]; !ok || s.TS > prev.TS {
|
||||||
|
newest[key] = s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out := make([]store.ContainerStatsSample, 0, len(newest))
|
||||||
|
for _, s := range newest {
|
||||||
|
out = append(out, s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// metricValue resolves a rule's metric against a sample. The bool is
|
||||||
|
// false when the sample can't be judged for that metric (memory_percent
|
||||||
|
// with a zero/unknown limit) so the caller skips it instead of dividing
|
||||||
|
// by zero.
|
||||||
|
func metricValue(metric string, s store.ContainerStatsSample) (float64, bool) {
|
||||||
|
switch metric {
|
||||||
|
case store.MetricCPUPercent:
|
||||||
|
return s.CPUPercent, true
|
||||||
|
case store.MetricMemoryPercent:
|
||||||
|
if s.MemoryLimit <= 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return float64(s.MemoryUsage) / float64(s.MemoryLimit) * 100, true
|
||||||
|
case store.MetricMemoryBytes:
|
||||||
|
return float64(s.MemoryUsage), true
|
||||||
|
default:
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// breached returns whether value crosses threshold per the comparator.
|
||||||
|
func breached(comparator string, value, threshold float64) bool {
|
||||||
|
switch comparator {
|
||||||
|
case store.MetricComparatorGT:
|
||||||
|
return value > threshold
|
||||||
|
case store.MetricComparatorLT:
|
||||||
|
return value < threshold
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cooldownKey is the per-rule-per-workload cooldown key.
|
||||||
|
func cooldownKey(ruleID int64, ownerID string) string {
|
||||||
|
return fmt.Sprintf("%d:%s", ruleID, ownerID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) coolingDown(rule store.MetricAlertRule, ownerID string, now time.Time) bool {
|
||||||
|
if rule.CooldownSeconds <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
last, ok := m.lastFired[cooldownKey(rule.ID, ownerID)]
|
||||||
|
if !ok {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return now.Sub(last) < time.Duration(rule.CooldownSeconds)*time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Manager) recordFire(rule store.MetricAlertRule, ownerID string, now time.Time) {
|
||||||
|
m.mu.Lock()
|
||||||
|
m.lastFired[cooldownKey(rule.ID, ownerID)] = now
|
||||||
|
m.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// emit persists the breach as an event_log row and publishes it on the
|
||||||
|
// bus. WorkloadID routes the alert to that app's activity timeline.
|
||||||
|
// Metadata is JSON-marshalled (never string-concatenated). Any
|
||||||
|
// marshal/insert failure is logged and skipped — emitting must never
|
||||||
|
// crash the loop.
|
||||||
|
func (m *Manager) emit(rule store.MetricAlertRule, sample store.ContainerStatsSample, value float64) {
|
||||||
|
message := formatMessage(rule, value)
|
||||||
|
meta := map[string]any{
|
||||||
|
"workload_id": sample.OwnerID,
|
||||||
|
"rule": rule.Name,
|
||||||
|
"metric": rule.Metric,
|
||||||
|
"value": value,
|
||||||
|
"threshold": rule.Threshold,
|
||||||
|
"comparator": rule.Comparator,
|
||||||
|
}
|
||||||
|
metaJSON, err := json.Marshal(meta)
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("metricalert: marshal metadata", "rule", rule.Name, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
severity := rule.Severity
|
||||||
|
if severity == "" {
|
||||||
|
severity = store.LogScanSeverityWarn
|
||||||
|
}
|
||||||
|
evt, err := m.sink.InsertEvent(store.EventLog{
|
||||||
|
Source: eventSource,
|
||||||
|
Severity: severity,
|
||||||
|
Message: message,
|
||||||
|
WorkloadID: sample.OwnerID,
|
||||||
|
Metadata: string(metaJSON),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("metricalert: persist event", "rule", rule.Name, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if m.pub != nil {
|
||||||
|
m.pub.Publish(events.Event{
|
||||||
|
Type: events.EventLog,
|
||||||
|
Payload: events.EventLogPayload{
|
||||||
|
ID: evt.ID,
|
||||||
|
Source: eventSource,
|
||||||
|
WorkloadID: sample.OwnerID,
|
||||||
|
Severity: severity,
|
||||||
|
Message: message,
|
||||||
|
Metadata: string(metaJSON),
|
||||||
|
CreatedAt: evt.CreatedAt,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatMessage builds a concise, human, secret-free breach line. The
|
||||||
|
// only operator-supplied text is rule.Name; the rest are numbers and
|
||||||
|
// fixed labels.
|
||||||
|
func formatMessage(rule store.MetricAlertRule, value float64) string {
|
||||||
|
label, unit := metricLabelUnit(rule.Metric)
|
||||||
|
word := comparatorWord(rule.Comparator)
|
||||||
|
return fmt.Sprintf("%s: %s is %.0f%s (threshold %s %.0f%s)",
|
||||||
|
rule.Name, label, value, unit, word, rule.Threshold, unit)
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricLabelUnit(metric string) (label, unit string) {
|
||||||
|
switch metric {
|
||||||
|
case store.MetricCPUPercent:
|
||||||
|
return "CPU", "%"
|
||||||
|
case store.MetricMemoryPercent:
|
||||||
|
return "Memory", "%"
|
||||||
|
case store.MetricMemoryBytes:
|
||||||
|
return "Memory", " bytes"
|
||||||
|
default:
|
||||||
|
return metric, ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func comparatorWord(comparator string) string {
|
||||||
|
switch comparator {
|
||||||
|
case store.MetricComparatorGT:
|
||||||
|
return ">"
|
||||||
|
case store.MetricComparatorLT:
|
||||||
|
return "<"
|
||||||
|
default:
|
||||||
|
return comparator
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,284 @@
|
|||||||
|
package metricalert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/events"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// --- fakes -----------------------------------------------------------
|
||||||
|
|
||||||
|
type fakeRules struct {
|
||||||
|
rules []store.MetricAlertRule
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeRules) ListMetricAlertRules() ([]store.MetricAlertRule, error) {
|
||||||
|
return f.rules, f.err
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeSamples struct {
|
||||||
|
samples []store.ContainerStatsSample
|
||||||
|
err error
|
||||||
|
since int64 // captured arg of the last call
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeSamples) ListAllRecentContainerStatsSamples(sinceTS int64) ([]store.ContainerStatsSample, error) {
|
||||||
|
f.since = sinceTS
|
||||||
|
return f.samples, f.err
|
||||||
|
}
|
||||||
|
|
||||||
|
type recordedEvent struct {
|
||||||
|
evt store.EventLog
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeSink struct {
|
||||||
|
events []recordedEvent
|
||||||
|
err error
|
||||||
|
nextID int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeSink) InsertEvent(e store.EventLog) (store.EventLog, error) {
|
||||||
|
if f.err != nil {
|
||||||
|
return store.EventLog{}, f.err
|
||||||
|
}
|
||||||
|
f.nextID++
|
||||||
|
e.ID = f.nextID
|
||||||
|
e.CreatedAt = "2026-05-29T00:00:00Z"
|
||||||
|
f.events = append(f.events, recordedEvent{evt: e})
|
||||||
|
return e, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakePublisher struct {
|
||||||
|
published []events.Event
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakePublisher) Publish(e events.Event) {
|
||||||
|
f.published = append(f.published, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newManager(rules []store.MetricAlertRule, samples []store.ContainerStatsSample) (*Manager, *fakeSink, *fakePublisher) {
|
||||||
|
sink := &fakeSink{}
|
||||||
|
pub := &fakePublisher{}
|
||||||
|
m := New(&fakeRules{rules: rules}, &fakeSamples{samples: samples}, sink, pub)
|
||||||
|
return m, sink, pub
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- tests -----------------------------------------------------------
|
||||||
|
|
||||||
|
func TestEvaluate_BreachEmits(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Severity: "error",
|
||||||
|
CooldownSeconds: 300, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{
|
||||||
|
ContainerID: "c1", OwnerID: "w1", OwnerType: "instance", TS: 100, CPUPercent: 95,
|
||||||
|
}}
|
||||||
|
m, sink, pub := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("expected 1 event, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
got := sink.events[0].evt
|
||||||
|
if got.Source != "metric_alert" {
|
||||||
|
t.Errorf("source = %q, want metric_alert", got.Source)
|
||||||
|
}
|
||||||
|
if got.Severity != "error" {
|
||||||
|
t.Errorf("severity = %q, want error", got.Severity)
|
||||||
|
}
|
||||||
|
if got.WorkloadID != "w1" {
|
||||||
|
t.Errorf("workload_id = %q, want w1", got.WorkloadID)
|
||||||
|
}
|
||||||
|
if got.Metadata == "" || got.Metadata == "{}" {
|
||||||
|
t.Errorf("metadata should be populated JSON, got %q", got.Metadata)
|
||||||
|
}
|
||||||
|
if len(pub.published) != 1 {
|
||||||
|
t.Fatalf("expected 1 published event, got %d", len(pub.published))
|
||||||
|
}
|
||||||
|
payload, ok := pub.published[0].Payload.(events.EventLogPayload)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("published payload is not EventLogPayload")
|
||||||
|
}
|
||||||
|
if payload.WorkloadID != "w1" || payload.Source != "metric_alert" {
|
||||||
|
t.Errorf("payload workload/source mismatch: %+v", payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_NoBreachNoEmit(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{
|
||||||
|
ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 10,
|
||||||
|
}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 0 {
|
||||||
|
t.Fatalf("expected no events for non-breach, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_DisabledRuleSkipped(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: false,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 0 {
|
||||||
|
t.Fatalf("disabled rule should not emit, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_PerWorkloadScoping(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "w2-only", WorkloadID: "w2", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{
|
||||||
|
{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}, // breach but wrong workload
|
||||||
|
{ContainerID: "c2", OwnerID: "w2", TS: 100, CPUPercent: 95}, // breach, correct workload
|
||||||
|
}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("expected 1 event (only w2), got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
if sink.events[0].evt.WorkloadID != "w2" {
|
||||||
|
t.Errorf("event should be scoped to w2, got %q", sink.events[0].evt.WorkloadID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_GlobalRuleMatchesAll(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "global", WorkloadID: "", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{
|
||||||
|
{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95},
|
||||||
|
{ContainerID: "c2", OwnerID: "w2", TS: 100, CPUPercent: 95},
|
||||||
|
}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 2 {
|
||||||
|
t.Fatalf("global rule should fire for both workloads, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_MemoryPercentDivByZeroSkip(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "mem", Metric: store.MetricMemoryPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 50, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{
|
||||||
|
ContainerID: "c1", OwnerID: "w1", TS: 100, MemoryUsage: 1000, MemoryLimit: 0,
|
||||||
|
}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 0 {
|
||||||
|
t.Fatalf("zero memory limit should be skipped for percent rule, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_MemoryPercentBreaches(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "mem", Metric: store.MetricMemoryPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 90, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{
|
||||||
|
ContainerID: "c1", OwnerID: "w1", TS: 100, MemoryUsage: 950, MemoryLimit: 1000, // 95%
|
||||||
|
}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("95%% should breach 90%% threshold, got %d events", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_CooldownSuppressesSecondEmit(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, CooldownSeconds: 300, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 95}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
base := time.Unix(1000, 0)
|
||||||
|
m.evaluate(base)
|
||||||
|
// 10s later — still inside the 300s cooldown window.
|
||||||
|
m.evaluate(base.Add(10 * time.Second))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("cooldown should suppress second emit, got %d events", len(sink.events))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Past the window — should fire again.
|
||||||
|
m.evaluate(base.Add(301 * time.Second))
|
||||||
|
if len(sink.events) != 2 {
|
||||||
|
t.Fatalf("should re-fire after cooldown elapses, got %d events", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_LatestSamplePerContainer(t *testing.T) {
|
||||||
|
// Two samples for the same container: an old non-breaching reading
|
||||||
|
// and a newer breaching one. Only the freshest should be judged.
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-hot", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorGT, Threshold: 80, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{
|
||||||
|
{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 10},
|
||||||
|
{ContainerID: "c1", OwnerID: "w1", TS: 150, CPUPercent: 95},
|
||||||
|
}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("expected exactly 1 event from freshest sample, got %d", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_LessThanComparator(t *testing.T) {
|
||||||
|
rules := []store.MetricAlertRule{{
|
||||||
|
ID: 1, Name: "cpu-idle", Metric: store.MetricCPUPercent,
|
||||||
|
Comparator: store.MetricComparatorLT, Threshold: 5, Enabled: true,
|
||||||
|
}}
|
||||||
|
samples := []store.ContainerStatsSample{{ContainerID: "c1", OwnerID: "w1", TS: 100, CPUPercent: 1}}
|
||||||
|
m, sink, _ := newManager(rules, samples)
|
||||||
|
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
|
||||||
|
if len(sink.events) != 1 {
|
||||||
|
t.Fatalf("1%% < 5%% threshold should breach lt rule, got %d events", len(sink.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEvaluate_NoRulesNoFetch(t *testing.T) {
|
||||||
|
// With no rules there's nothing to do; we shouldn't even query samples.
|
||||||
|
samplesSrc := &fakeSamples{samples: nil}
|
||||||
|
m := New(&fakeRules{rules: nil}, samplesSrc, &fakeSink{}, &fakePublisher{})
|
||||||
|
m.evaluate(time.Unix(200, 0))
|
||||||
|
if samplesSrc.since != 0 {
|
||||||
|
t.Errorf("samples should not be queried when there are no rules")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,250 @@
|
|||||||
|
// Package metrics provides a minimal Prometheus text-format exposition
|
||||||
|
// of Tinyforge's operational counters. We deliberately do NOT import the
|
||||||
|
// official client_golang library: the metrics set here is small, the text
|
||||||
|
// format is simple, and avoiding the dependency keeps `tinyforge` a fast
|
||||||
|
// single-binary install.
|
||||||
|
//
|
||||||
|
// Every counter is a sync/atomic.Int64 — cheap, lock-free, and safe to
|
||||||
|
// touch from any goroutine. Histograms / gauges aren't modeled yet; the
|
||||||
|
// few we need (request latency p50/p99) live downstream of slog and can
|
||||||
|
// be added when the operator actually wants them.
|
||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Registry holds the process-wide counter set. A single zero-value
|
||||||
|
// Registry is ready to use — see DefaultRegistry below for the
|
||||||
|
// recommended way to grab the global handle.
|
||||||
|
type Registry struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
counters map[string]*counter
|
||||||
|
}
|
||||||
|
|
||||||
|
type counter struct {
|
||||||
|
name string
|
||||||
|
help string
|
||||||
|
labels []string // label names, ordered as declared at registration
|
||||||
|
series map[string]*atomic.Int64
|
||||||
|
// seriesMu only protects insertion of new label tuples — increments
|
||||||
|
// on existing tuples are lock-free via the atomic.
|
||||||
|
seriesMu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultRegistry is the process-wide registry. All Tinyforge metrics
|
||||||
|
// register against it. Tests can instantiate their own Registry.
|
||||||
|
var DefaultRegistry = newRegistry()
|
||||||
|
|
||||||
|
func newRegistry() *Registry {
|
||||||
|
return &Registry{counters: make(map[string]*counter)}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCounter declares a counter on the default registry. Call once at
|
||||||
|
// package init or during NewServer; subsequent calls with the same name
|
||||||
|
// return the existing counter so re-registration is safe.
|
||||||
|
//
|
||||||
|
// label names define the dimensions; calls to Inc must pass values in
|
||||||
|
// the same order. Use the empty slice for label-less counters.
|
||||||
|
func NewCounter(name, help string, labels ...string) *Counter {
|
||||||
|
return DefaultRegistry.NewCounter(name, help, labels...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCounter on a specific Registry — useful in tests.
|
||||||
|
func (r *Registry) NewCounter(name, help string, labels ...string) *Counter {
|
||||||
|
r.mu.Lock()
|
||||||
|
defer r.mu.Unlock()
|
||||||
|
if c, ok := r.counters[name]; ok {
|
||||||
|
return &Counter{c: c}
|
||||||
|
}
|
||||||
|
c := &counter{
|
||||||
|
name: name,
|
||||||
|
help: help,
|
||||||
|
labels: append([]string(nil), labels...),
|
||||||
|
series: make(map[string]*atomic.Int64),
|
||||||
|
}
|
||||||
|
r.counters[name] = c
|
||||||
|
return &Counter{c: c}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Counter is the public handle returned by NewCounter. Pass it around as
|
||||||
|
// a value — the underlying state lives on the registry.
|
||||||
|
type Counter struct {
|
||||||
|
c *counter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inc atomically increments the counter for the given label values.
|
||||||
|
// Passing the wrong number of values is a programmer error; we surface
|
||||||
|
// it as a panic during testing rather than silently aggregating into a
|
||||||
|
// bogus series.
|
||||||
|
func (c Counter) Inc(labelValues ...string) {
|
||||||
|
c.Add(1, labelValues...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add atomically adds delta. Negative delta is rejected (counters are
|
||||||
|
// monotonic by definition).
|
||||||
|
func (c Counter) Add(delta int64, labelValues ...string) {
|
||||||
|
if delta < 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if len(labelValues) != len(c.c.labels) {
|
||||||
|
// Programmer error. This used to panic to surface the bug, but Add
|
||||||
|
// runs on hot paths (HTTP middleware, deploy dispatch) and several
|
||||||
|
// callers are off the request goroutine, where a panic would take
|
||||||
|
// down the whole process rather than a single request. Log loudly
|
||||||
|
// and drop the sample so a mislabeled call site can never crash the
|
||||||
|
// server; the bug still shows up immediately in the logs and in
|
||||||
|
// tests via the error output.
|
||||||
|
slog.Error("metrics: label count mismatch — dropping sample",
|
||||||
|
"counter", c.c.name, "want", len(c.c.labels), "got", len(labelValues))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
key := encodeKey(labelValues)
|
||||||
|
c.c.seriesMu.Lock()
|
||||||
|
v, ok := c.c.series[key]
|
||||||
|
if !ok {
|
||||||
|
v = new(atomic.Int64)
|
||||||
|
c.c.series[key] = v
|
||||||
|
}
|
||||||
|
c.c.seriesMu.Unlock()
|
||||||
|
v.Add(delta)
|
||||||
|
}
|
||||||
|
|
||||||
|
// encodeKey joins label values with a 0x1f separator. Prometheus label
|
||||||
|
// values may contain anything except `"` and `\n`, which we escape on
|
||||||
|
// exposition only — the key here is just a map index.
|
||||||
|
func encodeKey(values []string) string {
|
||||||
|
return strings.Join(values, "\x1f")
|
||||||
|
}
|
||||||
|
|
||||||
|
// WritePrometheus dumps the registry in the text exposition format
|
||||||
|
// Prometheus / VictoriaMetrics / OpenMetrics understands. Stable
|
||||||
|
// ordering: counters alphabetical by name; series alphabetical by
|
||||||
|
// encoded label tuple.
|
||||||
|
func (r *Registry) WritePrometheus(w io.Writer) error {
|
||||||
|
r.mu.RLock()
|
||||||
|
names := make([]string, 0, len(r.counters))
|
||||||
|
for n := range r.counters {
|
||||||
|
names = append(names, n)
|
||||||
|
}
|
||||||
|
r.mu.RUnlock()
|
||||||
|
sort.Strings(names)
|
||||||
|
|
||||||
|
for _, name := range names {
|
||||||
|
r.mu.RLock()
|
||||||
|
c := r.counters[name]
|
||||||
|
r.mu.RUnlock()
|
||||||
|
if err := writeCounter(w, c); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeCounter(w io.Writer, c *counter) error {
|
||||||
|
if _, err := fmt.Fprintf(w, "# HELP %s %s\n# TYPE %s counter\n", c.name, escapeHelp(c.help), c.name); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Snapshot the series map under a SINGLE lock acquisition. The
|
||||||
|
// previous shape acquired+released seriesMu twice per emitted
|
||||||
|
// series (once for the key list, once per Load), contending with
|
||||||
|
// every hot-path Inc on the HTTP request path. The *atomic.Int64
|
||||||
|
// pointers are stable for the lifetime of the registry (we never
|
||||||
|
// delete entries), so reading them after the unlock is safe.
|
||||||
|
type sample struct {
|
||||||
|
key string
|
||||||
|
val *atomic.Int64
|
||||||
|
}
|
||||||
|
c.seriesMu.Lock()
|
||||||
|
samples := make([]sample, 0, len(c.series))
|
||||||
|
for k, v := range c.series {
|
||||||
|
samples = append(samples, sample{k, v})
|
||||||
|
}
|
||||||
|
c.seriesMu.Unlock()
|
||||||
|
|
||||||
|
sort.Slice(samples, func(i, j int) bool { return samples[i].key < samples[j].key })
|
||||||
|
|
||||||
|
for _, s := range samples {
|
||||||
|
val := s.val.Load()
|
||||||
|
labels := decodeKey(s.key, c.labels)
|
||||||
|
if labels == "" {
|
||||||
|
if _, err := fmt.Fprintf(w, "%s %d\n", c.name, val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, err := fmt.Fprintf(w, "%s{%s} %d\n", c.name, labels, val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func decodeKey(key string, names []string) string {
|
||||||
|
if key == "" || len(names) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
values := strings.Split(key, "\x1f")
|
||||||
|
if len(values) != len(names) {
|
||||||
|
// Should not happen — encodeKey/decode are symmetric.
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
parts := make([]string, len(names))
|
||||||
|
for i, n := range names {
|
||||||
|
parts[i] = fmt.Sprintf(`%s="%s"`, n, escapeLabelValue(values[i]))
|
||||||
|
}
|
||||||
|
return strings.Join(parts, ",")
|
||||||
|
}
|
||||||
|
|
||||||
|
func escapeHelp(s string) string {
|
||||||
|
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n")
|
||||||
|
return r.Replace(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func escapeLabelValue(s string) string {
|
||||||
|
r := strings.NewReplacer("\\", "\\\\", "\n", "\\n", `"`, `\"`)
|
||||||
|
return r.Replace(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Pre-declared counters ────────────────────────────────────────────
|
||||||
|
//
|
||||||
|
// These are the counters Tinyforge surfaces to operators. Adding more is
|
||||||
|
// a one-line NewCounter call at the call site — no central catalogue,
|
||||||
|
// just keep names lowercase_snake with the `tinyforge_` prefix.
|
||||||
|
|
||||||
|
var (
|
||||||
|
HTTPRequestsTotal = NewCounter(
|
||||||
|
"tinyforge_http_requests_total",
|
||||||
|
"Total HTTP requests handled, partitioned by method and outcome class.",
|
||||||
|
"method", "status_class",
|
||||||
|
)
|
||||||
|
DeploysTotal = NewCounter(
|
||||||
|
"tinyforge_deploys_total",
|
||||||
|
"Total deploys dispatched, partitioned by source kind and outcome.",
|
||||||
|
"source_kind", "outcome",
|
||||||
|
)
|
||||||
|
WebhookDeliveriesTotal = NewCounter(
|
||||||
|
"tinyforge_webhook_deliveries_total",
|
||||||
|
"Total inbound webhook deliveries, partitioned by outcome.",
|
||||||
|
"outcome",
|
||||||
|
)
|
||||||
|
SchedulerTicksTotal = NewCounter(
|
||||||
|
"tinyforge_scheduler_ticks_total",
|
||||||
|
"Total scheduler ticks. The dispatched counter is the success measure.",
|
||||||
|
)
|
||||||
|
SchedulerDispatchedTotal = NewCounter(
|
||||||
|
"tinyforge_scheduler_dispatched_total",
|
||||||
|
"Triggers actually dispatched by the scheduler.",
|
||||||
|
)
|
||||||
|
OutboundNotifyTotal = NewCounter(
|
||||||
|
"tinyforge_outbound_notify_total",
|
||||||
|
"Outbound notification dispatch attempts, partitioned by outcome.",
|
||||||
|
"outcome",
|
||||||
|
)
|
||||||
|
)
|
||||||
@@ -16,6 +16,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Event represents a deployment / site-sync notification payload.
|
// Event represents a deployment / site-sync notification payload.
|
||||||
@@ -83,17 +85,68 @@ type TestResult struct {
|
|||||||
// Notifications are fire-and-forget by default — failures are logged but do
|
// Notifications are fire-and-forget by default — failures are logged but do
|
||||||
// not propagate. SendSyncForTest is the exception, used only by the manual
|
// not propagate. SendSyncForTest is the exception, used only by the manual
|
||||||
// test endpoint.
|
// test endpoint.
|
||||||
|
//
|
||||||
|
// outboundSem caps the number of in-flight outbound notifications. Without
|
||||||
|
// it a single burst (e.g. 1000 event triggers firing on a noisy log scan)
|
||||||
|
// would spawn 1000 simultaneous TCP connections, which both DoSes the
|
||||||
|
// receiver and exhausts local FDs.
|
||||||
type Notifier struct {
|
type Notifier struct {
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
|
outboundSem chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxOutboundNotifications bounds the in-flight outbound webhook fan-out.
|
||||||
|
// Sized to keep small bursts non-blocking while preventing a runaway storm
|
||||||
|
// from starving the rest of the process. Tunable later via settings if any
|
||||||
|
// operator legitimately needs more concurrency.
|
||||||
|
const maxOutboundNotifications = 32
|
||||||
|
|
||||||
// New creates a Notifier with sensible defaults.
|
// New creates a Notifier with sensible defaults.
|
||||||
func New() *Notifier {
|
func New() *Notifier {
|
||||||
|
// Transport with bounded host pooling so a slow receiver cannot pin
|
||||||
|
// arbitrarily many sockets open. MaxConnsPerHost mirrors the worker
|
||||||
|
// pool size; idle pruning keeps long-lived processes from holding
|
||||||
|
// stale TCP entries indefinitely.
|
||||||
|
//
|
||||||
|
// NOTE: we deliberately do NOT apply the staticsite SSRF dialer here.
|
||||||
|
// Notification URLs are admin-configured, and an admin already has
|
||||||
|
// Docker-socket (host-root-equivalent) access, so the SSRF surface adds
|
||||||
|
// nothing they couldn't already reach. Blocking loopback/private targets
|
||||||
|
// would instead break the common self-hosted pattern of notifying a
|
||||||
|
// same-host sidecar/bridge (e.g. service-to-notification-bridge on
|
||||||
|
// 127.0.0.1). See the security review (rated LOW / out of trust boundary).
|
||||||
|
tr := &http.Transport{
|
||||||
|
MaxIdleConns: 64,
|
||||||
|
MaxIdleConnsPerHost: 8,
|
||||||
|
MaxConnsPerHost: maxOutboundNotifications,
|
||||||
|
IdleConnTimeout: 90 * time.Second,
|
||||||
|
}
|
||||||
return &Notifier{
|
return &Notifier{
|
||||||
httpClient: &http.Client{
|
httpClient: &http.Client{
|
||||||
Timeout: 10 * time.Second,
|
Timeout: 10 * time.Second,
|
||||||
|
Transport: tr,
|
||||||
},
|
},
|
||||||
|
outboundSem: make(chan struct{}, maxOutboundNotifications),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// acquireSlot reserves an outbound slot, respecting ctx so a backed-up
|
||||||
|
// queue cannot starve a request that already has its own deadline.
|
||||||
|
func (n *Notifier) acquireSlot(ctx context.Context) bool {
|
||||||
|
select {
|
||||||
|
case n.outboundSem <- struct{}{}:
|
||||||
|
return true
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Notifier) releaseSlot() {
|
||||||
|
select {
|
||||||
|
case <-n.outboundSem:
|
||||||
|
default:
|
||||||
|
// Drained during shutdown — never block.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,8 +181,15 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
|
|||||||
n.wg.Add(1)
|
n.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer n.wg.Done()
|
defer n.wg.Done()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
if !n.acquireSlot(ctx) {
|
||||||
|
slog.Warn("notify: dropped — outbound queue saturated",
|
||||||
|
"tier", tier, "host", safeHost(webhookURL), "delivery", delivery, "event", event.Type)
|
||||||
|
metrics.OutboundNotifyTotal.Inc("dropped")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer n.releaseSlot()
|
||||||
|
|
||||||
_, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
|
_, err := n.doSend(ctx, webhookURL, secret, tier, delivery, event)
|
||||||
// URL host only — never log the secret or full URL with user-info.
|
// URL host only — never log the secret or full URL with user-info.
|
||||||
@@ -138,11 +198,13 @@ func (n *Notifier) SendSigned(webhookURL, secret string, tier Tier, event Event)
|
|||||||
slog.Warn("notify: webhook send failed",
|
slog.Warn("notify: webhook send failed",
|
||||||
"tier", tier, "host", host, "delivery", delivery,
|
"tier", tier, "host", host, "delivery", delivery,
|
||||||
"event", event.Type, "signed", secret != "", "error", err)
|
"event", event.Type, "signed", secret != "", "error", err)
|
||||||
|
metrics.OutboundNotifyTotal.Inc("failure")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
slog.Info("notify: webhook dispatched",
|
slog.Info("notify: webhook dispatched",
|
||||||
"tier", tier, "host", host, "delivery", delivery,
|
"tier", tier, "host", host, "delivery", delivery,
|
||||||
"event", event.Type, "signed", secret != "")
|
"event", event.Type, "signed", secret != "")
|
||||||
|
metrics.OutboundNotifyTotal.Inc("success")
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,8 +228,15 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
|
|||||||
n.wg.Add(1)
|
n.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer n.wg.Done()
|
defer n.wg.Done()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
if !n.acquireSlot(ctx) {
|
||||||
|
slog.Warn("notify: dropped trigger payload — outbound queue saturated",
|
||||||
|
"tier", TierEventTrigger, "host", safeHost(webhookURL), "delivery", delivery, "event", eventType)
|
||||||
|
metrics.OutboundNotifyTotal.Inc("dropped")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer n.releaseSlot()
|
||||||
|
|
||||||
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
|
_, err := n.doSendRaw(ctx, webhookURL, secret, TierEventTrigger, delivery, eventType, timestamp, payload)
|
||||||
host := safeHost(webhookURL)
|
host := safeHost(webhookURL)
|
||||||
@@ -175,11 +244,13 @@ func (n *Notifier) SendPayload(webhookURL, secret, eventType string, payload any
|
|||||||
slog.Warn("notify: trigger webhook send failed",
|
slog.Warn("notify: trigger webhook send failed",
|
||||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||||
"event", eventType, "signed", secret != "", "error", err)
|
"event", eventType, "signed", secret != "", "error", err)
|
||||||
|
metrics.OutboundNotifyTotal.Inc("failure")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
slog.Info("notify: trigger webhook dispatched",
|
slog.Info("notify: trigger webhook dispatched",
|
||||||
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
"tier", TierEventTrigger, "host", host, "delivery", delivery,
|
||||||
"event", eventType, "signed", secret != "")
|
"event", eventType, "signed", secret != "")
|
||||||
|
metrics.OutboundNotifyTotal.Inc("success")
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -110,17 +111,37 @@ func (r *Reconciler) ReconcileOnce(ctx context.Context) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Load every workload ONCE per tick and index by ID. This replaces both
|
||||||
|
// the former N+1 GetWorkloadByID (one DB read per container) in the
|
||||||
|
// upsert loop and the second ListWorkloads("") in the plugin pass: net 1
|
||||||
|
// query per tick, 0 GetWorkloadByID.
|
||||||
|
//
|
||||||
|
// On error we return BEFORE the upsert loop and leave state untouched
|
||||||
|
// this tick (the next tick retries). We must NOT proceed with an empty
|
||||||
|
// map and fall through to markMissingRows: with no container resolving,
|
||||||
|
// `seen` would be empty and markMissingRows would flip EVERY live row to
|
||||||
|
// 'missing'. Aborting early is the safe choice.
|
||||||
|
rows, err := r.store.ListWorkloads("")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("reconciler: list workloads: %w", err)
|
||||||
|
}
|
||||||
|
byID := make(map[string]store.Workload, len(rows))
|
||||||
|
for _, w := range rows {
|
||||||
|
byID[w.ID] = w
|
||||||
|
}
|
||||||
|
|
||||||
seen := make(map[string]struct{}, len(items)) // container row IDs we touched
|
seen := make(map[string]struct{}, len(items)) // container row IDs we touched
|
||||||
|
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
rowID := r.upsertFromItem(item)
|
rowID := r.upsertFromItem(item, byID)
|
||||||
if rowID != "" {
|
if rowID != "" {
|
||||||
seen[rowID] = struct{}{}
|
seen[rowID] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r.markMissingRows(seen)
|
r.markMissingRows(seen)
|
||||||
r.reconcilePluginWorkloads(ctx)
|
r.reconcilePluginWorkloads(ctx, rows)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,15 +158,13 @@ func (r *Reconciler) ReconcileOnce(ctx context.Context) error {
|
|||||||
//
|
//
|
||||||
// No-op when the plugin dispatcher hasn't been wired (boot-time race,
|
// No-op when the plugin dispatcher hasn't been wired (boot-time race,
|
||||||
// disabled deployments, tests).
|
// disabled deployments, tests).
|
||||||
func (r *Reconciler) reconcilePluginWorkloads(ctx context.Context) {
|
//
|
||||||
|
// rows is the workload set already loaded once by ReconcileOnce — passed
|
||||||
|
// through rather than re-queried so a tick costs a single ListWorkloads.
|
||||||
|
func (r *Reconciler) reconcilePluginWorkloads(ctx context.Context, rows []store.Workload) {
|
||||||
if r.plugins == nil {
|
if r.plugins == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
rows, err := r.store.ListWorkloads("")
|
|
||||||
if err != nil {
|
|
||||||
slog.Warn("reconciler: list workloads for plugin pass", "error", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
for _, w := range rows {
|
for _, w := range rows {
|
||||||
if w.SourceKind == "" {
|
if w.SourceKind == "" {
|
||||||
continue
|
continue
|
||||||
@@ -214,9 +233,9 @@ func (r *Reconciler) loop(ctx context.Context) {
|
|||||||
// After the hard cutover only the canonical tinyforge.workload.id label
|
// After the hard cutover only the canonical tinyforge.workload.id label
|
||||||
// path is honored — every Source plugin labels its containers with the
|
// path is honored — every Source plugin labels its containers with the
|
||||||
// workload identity at create time.
|
// workload identity at create time.
|
||||||
func (r *Reconciler) upsertFromItem(item docker.ReconcileItem) string {
|
func (r *Reconciler) upsertFromItem(item docker.ReconcileItem, byID map[string]store.Workload) string {
|
||||||
if id := item.Labels[docker.LabelWorkloadID]; id != "" {
|
if id := item.Labels[docker.LabelWorkloadID]; id != "" {
|
||||||
return r.upsertByWorkloadLabel(item, id)
|
return r.upsertByWorkloadLabel(item, id, byID)
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
@@ -233,9 +252,9 @@ func (r *Reconciler) upsertFromItem(item docker.ReconcileItem) string {
|
|||||||
// known workload row is silently ignored. Anyone with Docker socket access
|
// known workload row is silently ignored. Anyone with Docker socket access
|
||||||
// could otherwise spawn a container with a forged label and steal the
|
// could otherwise spawn a container with a forged label and steal the
|
||||||
// canonical slot for an existing workload.
|
// canonical slot for an existing workload.
|
||||||
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string) string {
|
func (r *Reconciler) upsertByWorkloadLabel(item docker.ReconcileItem, workloadID string, byID map[string]store.Workload) string {
|
||||||
w, err := r.store.GetWorkloadByID(workloadID)
|
w, ok := byID[workloadID]
|
||||||
if err != nil {
|
if !ok {
|
||||||
// Forged or stale label — log once at debug; tick rate keeps logs quiet.
|
// Forged or stale label — log once at debug; tick rate keeps logs quiet.
|
||||||
slog.Debug("reconciler: unknown workload_id label", "workload_id", workloadID, "container_id", item.ID)
|
slog.Debug("reconciler: unknown workload_id label", "workload_id", workloadID, "container_id", item.ID)
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -257,6 +257,138 @@ func TestReconcileSkipsProjectInsertWithoutDeployerRow(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestReconcileBatchingPreservesBehavior locks Fix A: loading all workloads
|
||||||
|
// once per tick (and resolving labels from that in-memory map instead of an
|
||||||
|
// N+1 GetWorkloadByID) must produce the same outcome as the per-container
|
||||||
|
// lookup did. With multiple containers across multiple workloads plus a forged
|
||||||
|
// label and a stale row, after one ReconcileOnce: known-workload containers
|
||||||
|
// are upserted with the snapshot State, the forged-label container is skipped,
|
||||||
|
// and the absent stale row is flipped to missing.
|
||||||
|
func TestReconcileBatchingPreservesBehavior(t *testing.T) {
|
||||||
|
st := newTestStore(t)
|
||||||
|
|
||||||
|
w1 := makeWorkload(t, st, "batch-a", "stack")
|
||||||
|
w2 := makeWorkload(t, st, "batch-b", "stack")
|
||||||
|
|
||||||
|
// A stale row for w2 whose container is gone — must be marked missing.
|
||||||
|
if err := st.UpsertContainer(store.Container{
|
||||||
|
ID: w2.ID + ":old", WorkloadID: w2.ID, WorkloadKind: "stack",
|
||||||
|
Role: "old", ContainerID: "docker-vanished", State: "running",
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed stale row: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fake := &fakeDocker{items: []docker.ReconcileItem{
|
||||||
|
{
|
||||||
|
ID: "docker-a1", Name: "batch-a-web-1", Image: "nginx:1.27", State: "running",
|
||||||
|
Labels: map[string]string{
|
||||||
|
docker.LabelManaged: "true",
|
||||||
|
docker.LabelWorkloadID: w1.ID,
|
||||||
|
docker.LabelWorkloadKind: "stack",
|
||||||
|
docker.LabelRole: "web",
|
||||||
|
},
|
||||||
|
Ports: []uint16{8080},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "docker-b1", Name: "batch-b-api-1", Image: "redis:7", State: "exited",
|
||||||
|
Labels: map[string]string{
|
||||||
|
docker.LabelManaged: "true",
|
||||||
|
docker.LabelWorkloadID: w2.ID,
|
||||||
|
docker.LabelWorkloadKind: "stack",
|
||||||
|
docker.LabelRole: "api",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Forged label — no such workload. Must be skipped entirely.
|
||||||
|
ID: "docker-evil", Name: "evil", Image: "nginx", State: "running",
|
||||||
|
Labels: map[string]string{
|
||||||
|
docker.LabelManaged: "true",
|
||||||
|
docker.LabelWorkloadID: "wl-forged",
|
||||||
|
docker.LabelWorkloadKind: "stack",
|
||||||
|
docker.LabelRole: "web",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
|
||||||
|
r := New(st, fake, 0)
|
||||||
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
||||||
|
t.Fatalf("ReconcileOnce: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// w1: one row, bound to docker-a1, running.
|
||||||
|
w1Rows, _ := st.ListContainersByWorkload(w1.ID)
|
||||||
|
if len(w1Rows) != 1 {
|
||||||
|
t.Fatalf("w1: expected 1 row, got %d", len(w1Rows))
|
||||||
|
}
|
||||||
|
if w1Rows[0].ContainerID != "docker-a1" || w1Rows[0].State != "running" || w1Rows[0].Role != "web" {
|
||||||
|
t.Fatalf("w1 row wrong: %+v", w1Rows[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// w2: the new api container is present (exited→stopped); the stale row is missing.
|
||||||
|
api, _ := st.GetContainerByID(w2.ID + ":api")
|
||||||
|
if api.ContainerID != "docker-b1" || api.State != "stopped" {
|
||||||
|
t.Fatalf("w2 api row wrong: %+v", api)
|
||||||
|
}
|
||||||
|
old, _ := st.GetContainerByID(w2.ID + ":old")
|
||||||
|
if old.State != "missing" {
|
||||||
|
t.Fatalf("w2 stale row should be missing, got %q", old.State)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forged label produced no row anywhere.
|
||||||
|
all, _ := st.ListContainers(store.ContainerFilter{})
|
||||||
|
for _, c := range all {
|
||||||
|
if c.ContainerID == "docker-evil" {
|
||||||
|
t.Fatalf("forged-label container was adopted: %+v", c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestReconcileSyncsImageContainerState locks the Fix B coupling: the generic
|
||||||
|
// reconciler upsert pass — NOT image.Reconcile — is what syncs an image
|
||||||
|
// container's State from the snapshot. An image container carries the
|
||||||
|
// workload_id / kind=image / role=image labels at create time, so a present
|
||||||
|
// container's row gets its State written here, proving the per-container
|
||||||
|
// inspect formerly in image.Reconcile is redundant.
|
||||||
|
func TestReconcileSyncsImageContainerState(t *testing.T) {
|
||||||
|
st := newTestStore(t)
|
||||||
|
w := makeWorkload(t, st, "img", "image")
|
||||||
|
|
||||||
|
// Deployer pre-created the image container row (running). Docker now
|
||||||
|
// reports it exited — the generic pass must sync it to stopped.
|
||||||
|
if err := st.UpsertContainer(store.Container{
|
||||||
|
ID: "img-deploy-uuid", WorkloadID: w.ID, WorkloadKind: "image",
|
||||||
|
Role: "image", ContainerID: "docker-img", State: "running",
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("seed image row: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fake := &fakeDocker{items: []docker.ReconcileItem{{
|
||||||
|
ID: "docker-img", Image: "ghcr.io/owner/app:v1", State: "exited",
|
||||||
|
Labels: map[string]string{
|
||||||
|
docker.LabelManaged: "true",
|
||||||
|
docker.LabelWorkloadID: w.ID,
|
||||||
|
docker.LabelWorkloadKind: "image",
|
||||||
|
docker.LabelRole: "image",
|
||||||
|
},
|
||||||
|
Ports: []uint16{3000},
|
||||||
|
}}}
|
||||||
|
|
||||||
|
// No plugin reconciler wired — proves the state sync comes from the
|
||||||
|
// generic upsert pass, not from image.Reconcile.
|
||||||
|
r := New(st, fake, 0)
|
||||||
|
if err := r.ReconcileOnce(context.Background()); err != nil {
|
||||||
|
t.Fatalf("ReconcileOnce: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
got, _ := st.GetContainerByID("img-deploy-uuid")
|
||||||
|
if got.State != "stopped" {
|
||||||
|
t.Fatalf("image container state not synced by generic pass: got %q want stopped", got.State)
|
||||||
|
}
|
||||||
|
if got.Port != 3000 || got.ImageRef != "ghcr.io/owner/app:v1" {
|
||||||
|
t.Fatalf("image container docker fields not synced: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestReconcileNormalizesState(t *testing.T) {
|
func TestReconcileNormalizesState(t *testing.T) {
|
||||||
st := newTestStore(t)
|
st := newTestStore(t)
|
||||||
w := makeWorkload(t, st, "norm", "stack")
|
w := makeWorkload(t, st, "norm", "stack")
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin/trigger/schedule"
|
"github.com/alexei/tinyforge/internal/workload/plugin/trigger/schedule"
|
||||||
@@ -124,6 +125,7 @@ func (s *Scheduler) loop(ctx context.Context) {
|
|||||||
// TickOnce runs a single sweep. Exposed for tests and for the boot
|
// TickOnce runs a single sweep. Exposed for tests and for the boot
|
||||||
// kick. On error per-trigger the loop continues with the next row.
|
// kick. On error per-trigger the loop continues with the next row.
|
||||||
func (s *Scheduler) TickOnce(ctx context.Context) {
|
func (s *Scheduler) TickOnce(ctx context.Context) {
|
||||||
|
metrics.SchedulerTicksTotal.Inc()
|
||||||
rows, err := s.store.ListTriggers("schedule")
|
rows, err := s.store.ListTriggers("schedule")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("scheduler: list triggers", "error", err)
|
slog.Warn("scheduler: list triggers", "error", err)
|
||||||
@@ -226,5 +228,6 @@ func (s *Scheduler) fire(ctx context.Context, t store.Trigger, now time.Time) {
|
|||||||
slog.Warn("scheduler: dispatch", "trigger", t.Name, "error", err)
|
slog.Warn("scheduler: dispatch", "trigger", t.Name, "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
metrics.SchedulerDispatchedTotal.Inc()
|
||||||
slog.Info("scheduler: fired", "trigger", t.Name, "kind", t.Kind, "at", ts)
|
slog.Info("scheduler: fired", "trigger", t.Name, "kind", t.Kind, "at", ts)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -92,17 +92,27 @@ func (c *Compose) Ps(ctx context.Context, projectName, yamlPath string) ([]Servi
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Logs runs `docker compose -p <projectName> logs --no-color --tail=<n> <service>`.
|
// Logs runs `docker compose -p <projectName> logs --no-color --tail=<n> <service>`.
|
||||||
// If service is empty, logs for all services are returned.
|
// If service is empty, logs for all services are returned. The service arg
|
||||||
|
// is preceded by `--` so a service name that begins with `-` cannot be
|
||||||
|
// re-parsed as a flag by the docker CLI (flag-injection guard).
|
||||||
func (c *Compose) Logs(ctx context.Context, projectName, service string, tail int) (string, error) {
|
func (c *Compose) Logs(ctx context.Context, projectName, service string, tail int) (string, error) {
|
||||||
args := []string{"logs", "--no-color", fmt.Sprintf("--tail=%d", tail)}
|
args := []string{"logs", "--no-color", fmt.Sprintf("--tail=%d", tail)}
|
||||||
if service != "" {
|
if service != "" {
|
||||||
args = append(args, service)
|
args = append(args, "--", service)
|
||||||
}
|
}
|
||||||
return c.run(ctx, projectName, args...)
|
return c.run(ctx, projectName, args...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// run executes `docker compose -p <projectName> <args...>` and returns combined output.
|
// run executes `docker compose -p <projectName> <args...>` and returns
|
||||||
|
// combined output. projectName is verified not to begin with `-` because
|
||||||
|
// `docker compose -p '--foo'` would otherwise be re-parsed as a flag —
|
||||||
|
// the callers already sanitize project names through projectNameSanitizer,
|
||||||
|
// but a belt-and-braces refusal here means any future caller cannot
|
||||||
|
// accidentally bypass the sanitizer.
|
||||||
func (c *Compose) run(ctx context.Context, projectName string, args ...string) (string, error) {
|
func (c *Compose) run(ctx context.Context, projectName string, args ...string) (string, error) {
|
||||||
|
if projectName == "" || strings.HasPrefix(projectName, "-") {
|
||||||
|
return "", fmt.Errorf("docker compose: refusing project name %q", projectName)
|
||||||
|
}
|
||||||
full := append([]string{"compose", "-p", projectName}, args...)
|
full := append([]string{"compose", "-p", projectName}, args...)
|
||||||
cmd := exec.CommandContext(ctx, c.binary, full...)
|
cmd := exec.CommandContext(ctx, c.binary, full...)
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
|
|||||||
+142
-2
@@ -2,6 +2,7 @@ package stack
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
@@ -15,11 +16,25 @@ type ComposeSpec struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ServiceSpec captures the subset of compose service fields we inspect.
|
// ServiceSpec captures the subset of compose service fields we inspect.
|
||||||
|
//
|
||||||
|
// All host-escape-adjacent fields are decoded here even though Tinyforge
|
||||||
|
// itself never reads them at runtime — surfacing them to Validate() is the
|
||||||
|
// only way to *reject* them. Add new fields here when blocking a new
|
||||||
|
// escape vector.
|
||||||
type ServiceSpec struct {
|
type ServiceSpec struct {
|
||||||
Image string `yaml:"image,omitempty"`
|
Image string `yaml:"image,omitempty"`
|
||||||
|
Build any `yaml:"build,omitempty"` // banned — see Validate
|
||||||
Ports []any `yaml:"ports,omitempty"`
|
Ports []any `yaml:"ports,omitempty"`
|
||||||
Labels map[string]string `yaml:"labels,omitempty"`
|
Labels map[string]string `yaml:"labels,omitempty"`
|
||||||
Privileged bool `yaml:"privileged,omitempty"`
|
Privileged bool `yaml:"privileged,omitempty"`
|
||||||
|
Volumes []any `yaml:"volumes,omitempty"`
|
||||||
|
NetworkMode string `yaml:"network_mode,omitempty"`
|
||||||
|
Pid string `yaml:"pid,omitempty"`
|
||||||
|
Ipc string `yaml:"ipc,omitempty"`
|
||||||
|
UsernsMode string `yaml:"userns_mode,omitempty"`
|
||||||
|
CapAdd []string `yaml:"cap_add,omitempty"`
|
||||||
|
Devices []any `yaml:"devices,omitempty"`
|
||||||
|
SecurityOpt []string `yaml:"security_opt,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse decodes YAML into a ComposeSpec. Returns a descriptive error on failure.
|
// Parse decodes YAML into a ComposeSpec. Returns a descriptive error on failure.
|
||||||
@@ -35,10 +50,20 @@ func Parse(yamlText string) (ComposeSpec, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate enforces Tinyforge-level constraints beyond compose schema validity.
|
// Validate enforces Tinyforge-level constraints beyond compose schema validity.
|
||||||
|
// All blocked fields below are documented host-escape vectors: any one of
|
||||||
|
// them on its own gives the container root on the host. Tinyforge already
|
||||||
|
// owns the docker socket, so the threat model is "any admin == host root,"
|
||||||
|
// and these blocks raise the bar for any *future* viewer-to-admin
|
||||||
|
// escalation as well as honest-mistake guardrails.
|
||||||
|
//
|
||||||
// Current rules:
|
// Current rules:
|
||||||
// - No service may set `privileged: true`.
|
// - No service may set `privileged: true`.
|
||||||
// - Every service must declare an image (compose supports build: too, but
|
// - Every service must declare an image (build contexts disallowed).
|
||||||
// Tinyforge v1 disallows building from context to avoid arbitrary-code exec).
|
// - No host-IPC / host-PID / host-userns / host networking.
|
||||||
|
// - No `cap_add`, `security_opt`, `devices`.
|
||||||
|
// - `volumes` may not bind-mount the docker socket, /, /etc, /var, /proc,
|
||||||
|
// /sys, /root, or /home — list is conservative; operators with real
|
||||||
|
// bind-mount needs should ship a Source plugin or a dedicated wizard.
|
||||||
func Validate(spec ComposeSpec) error {
|
func Validate(spec ComposeSpec) error {
|
||||||
for name, svc := range spec.Services {
|
for name, svc := range spec.Services {
|
||||||
if svc.Privileged {
|
if svc.Privileged {
|
||||||
@@ -47,6 +72,121 @@ func Validate(spec ComposeSpec) error {
|
|||||||
if svc.Image == "" {
|
if svc.Image == "" {
|
||||||
return fmt.Errorf("service %q: image is required (build contexts not supported)", name)
|
return fmt.Errorf("service %q: image is required (build contexts not supported)", name)
|
||||||
}
|
}
|
||||||
|
if svc.Build != nil {
|
||||||
|
return fmt.Errorf("service %q: build: is not supported (use image:)", name)
|
||||||
|
}
|
||||||
|
if isBlockedNamespaceMode(svc.NetworkMode) {
|
||||||
|
return fmt.Errorf("service %q: network_mode %q is not allowed", name, svc.NetworkMode)
|
||||||
|
}
|
||||||
|
if isBlockedNamespaceMode(svc.Pid) {
|
||||||
|
return fmt.Errorf("service %q: pid: %q is not allowed", name, svc.Pid)
|
||||||
|
}
|
||||||
|
if isBlockedNamespaceMode(svc.Ipc) {
|
||||||
|
return fmt.Errorf("service %q: ipc: %q is not allowed", name, svc.Ipc)
|
||||||
|
}
|
||||||
|
if isHostMode(svc.UsernsMode) {
|
||||||
|
return fmt.Errorf("service %q: userns_mode %q is not allowed", name, svc.UsernsMode)
|
||||||
|
}
|
||||||
|
if len(svc.CapAdd) > 0 {
|
||||||
|
return fmt.Errorf("service %q: cap_add is not allowed", name)
|
||||||
|
}
|
||||||
|
if len(svc.SecurityOpt) > 0 {
|
||||||
|
return fmt.Errorf("service %q: security_opt is not allowed", name)
|
||||||
|
}
|
||||||
|
if len(svc.Devices) > 0 {
|
||||||
|
return fmt.Errorf("service %q: devices is not allowed", name)
|
||||||
|
}
|
||||||
|
for _, v := range svc.Volumes {
|
||||||
|
if host, ok := bindMountHostPath(v); ok {
|
||||||
|
if isBlockedBindMount(host) {
|
||||||
|
return fmt.Errorf("service %q: bind-mounting %q is not allowed", name, host)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isHostMode reports a host-namespace share, i.e. network_mode / pid / ipc /
|
||||||
|
// userns_mode set to "host". (It deliberately does NOT match "host-gateway",
|
||||||
|
// which is an extra_hosts value, not a namespace mode — matching it here only
|
||||||
|
// produced misleading rejections.)
|
||||||
|
func isHostMode(v string) bool {
|
||||||
|
return v == "host"
|
||||||
|
}
|
||||||
|
|
||||||
|
// isBlockedNamespaceMode reports a namespace mode that must be rejected for
|
||||||
|
// network_mode / pid / ipc: either host sharing ("host") or joining another
|
||||||
|
// container's / compose service's namespace ("container:<id>",
|
||||||
|
// "service:<name>"). The container/service joins are a lateral-movement and
|
||||||
|
// sandbox-escape vector — a malicious service could attach to a victim
|
||||||
|
// container's network or PID namespace.
|
||||||
|
func isBlockedNamespaceMode(v string) bool {
|
||||||
|
return isHostMode(v) ||
|
||||||
|
strings.HasPrefix(v, "container:") ||
|
||||||
|
strings.HasPrefix(v, "service:")
|
||||||
|
}
|
||||||
|
|
||||||
|
// bindMountHostPath extracts the host-side path from a compose volume
|
||||||
|
// declaration. Compose accepts two shapes: a short string "src:dst[:mode]"
|
||||||
|
// and a long form map with a "source" key. Returns ok=false for named
|
||||||
|
// volumes (no host source).
|
||||||
|
func bindMountHostPath(v any) (string, bool) {
|
||||||
|
switch t := v.(type) {
|
||||||
|
case string:
|
||||||
|
// "named:/in/container" has no '/' or '.' prefix on the source.
|
||||||
|
if t == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
parts := strings.SplitN(t, ":", 3)
|
||||||
|
src := parts[0]
|
||||||
|
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
|
||||||
|
return src, true
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
case map[string]any:
|
||||||
|
if typ, _ := t["type"].(string); typ != "" && typ != "bind" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
if src, ok := t["source"].(string); ok {
|
||||||
|
if strings.HasPrefix(src, "/") || strings.HasPrefix(src, ".") || strings.HasPrefix(src, "~") {
|
||||||
|
return src, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
// isBlockedBindMount returns true for paths that obviously escape the
|
||||||
|
// container's intended sandbox. Conservative deny-list — operators with
|
||||||
|
// legitimate bind-mount needs should write a dedicated Source plugin
|
||||||
|
// rather than tunnel them through compose.
|
||||||
|
func isBlockedBindMount(host string) bool {
|
||||||
|
// Normalize trailing slash so "/var" and "/var/" both match.
|
||||||
|
clean := strings.TrimRight(host, "/")
|
||||||
|
if clean == "" || clean == "/" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Relative ("./x", "../x", ".") and home-relative ("~/...") sources are
|
||||||
|
// resolved by Docker against the compose working directory (which
|
||||||
|
// Tinyforge controls and never intends as a host-bind source) or left
|
||||||
|
// unexpanded — and "../" can climb out of that directory entirely. The
|
||||||
|
// absolute-prefix deny-list below can't see these, so reject them
|
||||||
|
// outright rather than give a false sense of coverage.
|
||||||
|
if strings.HasPrefix(clean, ".") || strings.HasPrefix(clean, "~") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Specific blocked files / sockets.
|
||||||
|
switch clean {
|
||||||
|
case "/var/run/docker.sock", "/run/docker.sock":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Blocked prefixes (cover sub-paths too).
|
||||||
|
blocked := []string{"/etc", "/var", "/proc", "/sys", "/root", "/home", "/boot", "/dev"}
|
||||||
|
for _, p := range blocked {
|
||||||
|
if clean == p || strings.HasPrefix(clean, p+"/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,55 @@
|
|||||||
|
package staticsite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CommitStatusReporter pushes deploy outcomes back to the git provider as a
|
||||||
|
// commit status, gated on the per-workload report_commit_status flag. It is
|
||||||
|
// strictly best-effort: every call is wrapped so a reporting failure is logged
|
||||||
|
// at Warn and NEVER propagates to fail or block the deploy.
|
||||||
|
//
|
||||||
|
// The provider + identifiers are captured once at deploy start so the hot
|
||||||
|
// transition points (pending/success/failure) read as one-liners. A nil
|
||||||
|
// receiver (reporting disabled) makes Report a no-op, so callers don't have to
|
||||||
|
// guard each transition.
|
||||||
|
//
|
||||||
|
// It lives in the staticsite package (alongside GitProvider / CommitStatus)
|
||||||
|
// rather than the plugin package so the source plugins can share it without
|
||||||
|
// staticsite taking a dependency on plugin. It is parameterized on primitives
|
||||||
|
// (not plugin.Workload) for the same reason.
|
||||||
|
type CommitStatusReporter struct {
|
||||||
|
provider GitProvider
|
||||||
|
owner string
|
||||||
|
repo string
|
||||||
|
sha string
|
||||||
|
targetURL string
|
||||||
|
enabled bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCommitStatusReporter builds a reporter from the resolved deploy inputs.
|
||||||
|
// When enabled is false (report_commit_status off) or the SHA is empty, the
|
||||||
|
// returned reporter's Report method is inert.
|
||||||
|
func NewCommitStatusReporter(provider GitProvider, owner, repo, sha, targetURL string, enabled bool) *CommitStatusReporter {
|
||||||
|
return &CommitStatusReporter{
|
||||||
|
provider: provider,
|
||||||
|
owner: owner,
|
||||||
|
repo: repo,
|
||||||
|
sha: sha,
|
||||||
|
targetURL: targetURL,
|
||||||
|
enabled: enabled,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report sends one commit status, swallowing (and logging) any error. Safe to
|
||||||
|
// call on a nil/disabled reporter or with a nil provider/empty SHA.
|
||||||
|
func (r *CommitStatusReporter) Report(ctx context.Context, workloadName, workloadID string, status CommitStatus, description string) {
|
||||||
|
if r == nil || !r.enabled || r.provider == nil || r.sha == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := r.provider.SetCommitStatus(ctx, r.owner, r.repo, r.sha, status, r.targetURL, description); err != nil {
|
||||||
|
slog.Warn("commit-status report failed (ignored)",
|
||||||
|
"workload", workloadName, "workload_id", workloadID, "status", string(status), "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,122 @@
|
|||||||
|
package staticsite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeReporterProvider is a stub GitProvider that records SetCommitStatus
|
||||||
|
// calls. Only the methods the reporter exercises are meaningful; the rest
|
||||||
|
// satisfy the interface and panic if ever hit so a mis-wired test is loud.
|
||||||
|
type fakeReporterProvider struct {
|
||||||
|
calls []reporterStatusCall
|
||||||
|
failErr error // when set, SetCommitStatus returns it (best-effort path)
|
||||||
|
}
|
||||||
|
|
||||||
|
type reporterStatusCall struct {
|
||||||
|
owner, repo, sha string
|
||||||
|
status CommitStatus
|
||||||
|
targetURL, descr string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeReporterProvider) SetCommitStatus(_ context.Context, owner, repo, sha string, status CommitStatus, targetURL, description string) error {
|
||||||
|
f.calls = append(f.calls, reporterStatusCall{owner, repo, sha, status, targetURL, description})
|
||||||
|
return f.failErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*fakeReporterProvider) Name() string { return "fake" }
|
||||||
|
func (*fakeReporterProvider) TestConnection(context.Context, string, string) error {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
func (*fakeReporterProvider) ListRepos(context.Context, string) ([]RepoInfo, error) {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
func (*fakeReporterProvider) ListBranches(context.Context, string, string) ([]string, error) {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
func (*fakeReporterProvider) GetLatestCommitSHA(context.Context, string, string, string) (string, error) {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
func (*fakeReporterProvider) ListTree(context.Context, string, string, string) ([]FolderEntry, error) {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
func (*fakeReporterProvider) DownloadFolder(context.Context, string, string, string, string, string) error {
|
||||||
|
panic("unused")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enabled: forwards to the provider with the captured identifiers + target.
|
||||||
|
func TestCommitStatusReporter_Enabled_Calls(t *testing.T) {
|
||||||
|
fp := &fakeReporterProvider{}
|
||||||
|
r := NewCommitStatusReporter(fp, "owner", "pages", "abc123", "https://app.example.com", true)
|
||||||
|
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusPending, "Tinyforge: deploying")
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusSuccess, "Tinyforge: deployed")
|
||||||
|
|
||||||
|
if len(fp.calls) != 2 {
|
||||||
|
t.Fatalf("calls = %d, want 2", len(fp.calls))
|
||||||
|
}
|
||||||
|
first := fp.calls[0]
|
||||||
|
if first.owner != "owner" || first.repo != "pages" || first.sha != "abc123" {
|
||||||
|
t.Errorf("identifiers wrong: %+v", first)
|
||||||
|
}
|
||||||
|
if first.status != CommitStatusPending {
|
||||||
|
t.Errorf("first status = %q, want pending", first.status)
|
||||||
|
}
|
||||||
|
if first.targetURL != "https://app.example.com" {
|
||||||
|
t.Errorf("targetURL = %q", first.targetURL)
|
||||||
|
}
|
||||||
|
if fp.calls[1].status != CommitStatusSuccess {
|
||||||
|
t.Errorf("second status = %q, want success", fp.calls[1].status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disabled: the reporter is inert.
|
||||||
|
func TestCommitStatusReporter_Disabled_NoCalls(t *testing.T) {
|
||||||
|
fp := &fakeReporterProvider{}
|
||||||
|
r := NewCommitStatusReporter(fp, "owner", "pages", "abc123", "", false)
|
||||||
|
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusSuccess, "x")
|
||||||
|
if len(fp.calls) != 0 {
|
||||||
|
t.Fatalf("expected no calls when disabled, got %d", len(fp.calls))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// An empty SHA (e.g. a provider that couldn't resolve the branch) must not
|
||||||
|
// produce a status call even when reporting is enabled.
|
||||||
|
func TestCommitStatusReporter_EmptySHA_NoCalls(t *testing.T) {
|
||||||
|
fp := &fakeReporterProvider{}
|
||||||
|
r := NewCommitStatusReporter(fp, "owner", "pages", "", "", true)
|
||||||
|
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusPending, "x")
|
||||||
|
if len(fp.calls) != 0 {
|
||||||
|
t.Fatalf("expected no calls with empty SHA, got %d", len(fp.calls))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A provider error must be swallowed (best-effort) — Report never panics or
|
||||||
|
// propagates. We assert it returns normally after a failing provider call.
|
||||||
|
func TestCommitStatusReporter_ProviderError_Swallowed(t *testing.T) {
|
||||||
|
fp := &fakeReporterProvider{failErr: errors.New("boom")}
|
||||||
|
r := NewCommitStatusReporter(fp, "owner", "pages", "abc123", "", true)
|
||||||
|
|
||||||
|
// Should not panic / propagate.
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusFailure, "Tinyforge: deploy failed")
|
||||||
|
if len(fp.calls) != 1 {
|
||||||
|
t.Fatalf("expected the failing call to still be recorded, got %d", len(fp.calls))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A nil reporter (constructed only when needed in some call paths) is safe.
|
||||||
|
func TestCommitStatusReporter_NilSafe(t *testing.T) {
|
||||||
|
var r *CommitStatusReporter
|
||||||
|
// Must not panic.
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusSuccess, "x")
|
||||||
|
}
|
||||||
|
|
||||||
|
// A nil provider on an enabled reporter is also a no-op (defensive guard).
|
||||||
|
func TestCommitStatusReporter_NilProvider_NoPanic(t *testing.T) {
|
||||||
|
r := NewCommitStatusReporter(nil, "owner", "pages", "abc123", "", true)
|
||||||
|
// Must not panic.
|
||||||
|
r.Report(context.Background(), "site", "wid-1", CommitStatusSuccess, "x")
|
||||||
|
}
|
||||||
@@ -0,0 +1,331 @@
|
|||||||
|
package staticsite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ── State mapping (pure) ────────────────────────────────────────────
|
||||||
|
//
|
||||||
|
// Each provider maps the provider-agnostic CommitStatus onto its own API
|
||||||
|
// vocabulary. Gitea/GitHub accept the same four words; GitLab collapses
|
||||||
|
// failure+error into "failed".
|
||||||
|
|
||||||
|
func TestGiteaState_Mapping(t *testing.T) {
|
||||||
|
cases := map[CommitStatus]string{
|
||||||
|
CommitStatusPending: "pending",
|
||||||
|
CommitStatusSuccess: "success",
|
||||||
|
CommitStatusFailure: "failure",
|
||||||
|
CommitStatusError: "error",
|
||||||
|
CommitStatus("???"): "pending", // unknown -> pending fallback
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := giteaState(in); got != want {
|
||||||
|
t.Errorf("giteaState(%q) = %q, want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubState_Mapping(t *testing.T) {
|
||||||
|
cases := map[CommitStatus]string{
|
||||||
|
CommitStatusPending: "pending",
|
||||||
|
CommitStatusSuccess: "success",
|
||||||
|
CommitStatusFailure: "failure",
|
||||||
|
CommitStatusError: "error",
|
||||||
|
CommitStatus("???"): "pending",
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := githubState(in); got != want {
|
||||||
|
t.Errorf("githubState(%q) = %q, want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitLabState_Mapping(t *testing.T) {
|
||||||
|
cases := map[CommitStatus]string{
|
||||||
|
CommitStatusPending: "pending",
|
||||||
|
CommitStatusSuccess: "success",
|
||||||
|
CommitStatusFailure: "failed", // GitLab has no "failure"
|
||||||
|
CommitStatusError: "failed", // error also collapses to "failed"
|
||||||
|
CommitStatus("???"): "pending",
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := gitlabState(in); got != want {
|
||||||
|
t.Errorf("gitlabState(%q) = %q, want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTruncateDescription(t *testing.T) {
|
||||||
|
short := "Tinyforge: deploying"
|
||||||
|
if got := truncateDescription(short); got != short {
|
||||||
|
t.Errorf("short description mutated: %q", got)
|
||||||
|
}
|
||||||
|
long := strings.Repeat("x", 200)
|
||||||
|
got := truncateDescription(long)
|
||||||
|
if len([]rune(got)) > maxCommitStatusDescription {
|
||||||
|
t.Errorf("truncated length = %d runes, want <= %d", len([]rune(got)), maxCommitStatusDescription)
|
||||||
|
}
|
||||||
|
if !strings.HasSuffix(got, "…") {
|
||||||
|
t.Errorf("missing ellipsis on truncation: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Endpoint + body construction (httptest) ─────────────────────────
|
||||||
|
//
|
||||||
|
// The SSRF-safe client refuses loopback, so for these tests we swap the
|
||||||
|
// provider's httpClient for a plain one pointed at httptest. This still
|
||||||
|
// exercises the real URL/body construction inside each SetCommitStatus.
|
||||||
|
|
||||||
|
type capturedRequest struct {
|
||||||
|
method string
|
||||||
|
path string // r.URL.EscapedPath() — preserves %2F so GitLab's encoded project path is observable
|
||||||
|
rawQ string
|
||||||
|
body map[string]string
|
||||||
|
auth string
|
||||||
|
token string // PRIVATE-TOKEN (GitLab)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCaptureServer(t *testing.T, capture *capturedRequest) *httptest.Server {
|
||||||
|
t.Helper()
|
||||||
|
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
capture.method = r.Method
|
||||||
|
capture.path = r.URL.EscapedPath()
|
||||||
|
capture.rawQ = r.URL.RawQuery
|
||||||
|
capture.auth = r.Header.Get("Authorization")
|
||||||
|
capture.token = r.Header.Get("PRIVATE-TOKEN")
|
||||||
|
raw, _ := io.ReadAll(r.Body)
|
||||||
|
if len(raw) > 0 {
|
||||||
|
_ = json.Unmarshal(raw, &capture.body)
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
_, _ = w.Write([]byte(`{}`))
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitea_SetCommitStatus_Request(t *testing.T) {
|
||||||
|
var cap capturedRequest
|
||||||
|
srv := newCaptureServer(t, &cap)
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
f := NewGiteaContentFetcher(srv.URL, "tok123")
|
||||||
|
f.httpClient = srv.Client() // bypass SSRF guard for loopback test server
|
||||||
|
|
||||||
|
err := f.SetCommitStatus(context.Background(), "owner", "repo", "abc123",
|
||||||
|
CommitStatusSuccess, "https://app.example.com", "deployed")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("SetCommitStatus: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cap.method != http.MethodPost {
|
||||||
|
t.Errorf("method = %q, want POST", cap.method)
|
||||||
|
}
|
||||||
|
if want := "/api/v1/repos/owner/repo/statuses/abc123"; cap.path != want {
|
||||||
|
t.Errorf("path = %q, want %q", cap.path, want)
|
||||||
|
}
|
||||||
|
if cap.body["state"] != "success" {
|
||||||
|
t.Errorf("state = %q, want success", cap.body["state"])
|
||||||
|
}
|
||||||
|
if cap.body["context"] != "tinyforge" {
|
||||||
|
t.Errorf("context = %q, want tinyforge", cap.body["context"])
|
||||||
|
}
|
||||||
|
if cap.body["target_url"] != "https://app.example.com" {
|
||||||
|
t.Errorf("target_url = %q", cap.body["target_url"])
|
||||||
|
}
|
||||||
|
if cap.body["description"] != "deployed" {
|
||||||
|
t.Errorf("description = %q, want deployed", cap.body["description"])
|
||||||
|
}
|
||||||
|
if cap.auth != "token tok123" {
|
||||||
|
t.Errorf("auth = %q, want 'token tok123'", cap.auth)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHub_SetCommitStatus_Request(t *testing.T) {
|
||||||
|
var cap capturedRequest
|
||||||
|
srv := newCaptureServer(t, &cap)
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// Force GHE-style apiBase so we hit the server's path; the github.com
|
||||||
|
// branch hard-codes api.github.com which the SSRF client would block.
|
||||||
|
g := NewGitHubProvider(srv.URL, "ghp_tok")
|
||||||
|
g.apiBase = srv.URL + "/api/v3"
|
||||||
|
g.httpClient = srv.Client()
|
||||||
|
|
||||||
|
err := g.SetCommitStatus(context.Background(), "octo", "cat", "deadbeef",
|
||||||
|
CommitStatusFailure, "", "failed")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("SetCommitStatus: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if want := "/api/v3/repos/octo/cat/statuses/deadbeef"; cap.path != want {
|
||||||
|
t.Errorf("path = %q, want %q", cap.path, want)
|
||||||
|
}
|
||||||
|
if cap.body["state"] != "failure" {
|
||||||
|
t.Errorf("state = %q, want failure", cap.body["state"])
|
||||||
|
}
|
||||||
|
if cap.body["context"] != "tinyforge" {
|
||||||
|
t.Errorf("context = %q, want tinyforge", cap.body["context"])
|
||||||
|
}
|
||||||
|
if cap.auth != "Bearer ghp_tok" {
|
||||||
|
t.Errorf("auth = %q, want 'Bearer ghp_tok'", cap.auth)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitLab_SetCommitStatus_Request(t *testing.T) {
|
||||||
|
var cap capturedRequest
|
||||||
|
srv := newCaptureServer(t, &cap)
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := NewGitLabProvider(srv.URL, "glpat-xyz")
|
||||||
|
g.httpClient = srv.Client()
|
||||||
|
|
||||||
|
err := g.SetCommitStatus(context.Background(), "grp", "proj", "cafe01",
|
||||||
|
CommitStatusError, "https://app.example.com", "boom")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("SetCommitStatus: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GitLab uses the URL-encoded project path + sha in the path, and the
|
||||||
|
// status metadata as query params.
|
||||||
|
if want := "/api/v4/projects/grp%2Fproj/statuses/cafe01"; cap.path != want {
|
||||||
|
t.Errorf("path = %q, want %q", cap.path, want)
|
||||||
|
}
|
||||||
|
q, err := parseQuery(cap.rawQ)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse query %q: %v", cap.rawQ, err)
|
||||||
|
}
|
||||||
|
if q["state"] != "failed" { // error -> failed
|
||||||
|
t.Errorf("state = %q, want failed", q["state"])
|
||||||
|
}
|
||||||
|
if q["name"] != "tinyforge" {
|
||||||
|
t.Errorf("name = %q, want tinyforge", q["name"])
|
||||||
|
}
|
||||||
|
if q["target_url"] != "https://app.example.com" {
|
||||||
|
t.Errorf("target_url = %q", q["target_url"])
|
||||||
|
}
|
||||||
|
if q["description"] != "boom" {
|
||||||
|
t.Errorf("description = %q, want boom", q["description"])
|
||||||
|
}
|
||||||
|
if cap.token != "glpat-xyz" {
|
||||||
|
t.Errorf("PRIVATE-TOKEN = %q, want glpat-xyz", cap.token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseQuery is a tiny wrapper so the test reads the first value of each
|
||||||
|
// query key without dragging net/url into every assertion.
|
||||||
|
func parseQuery(raw string) (map[string]string, error) {
|
||||||
|
out := map[string]string{}
|
||||||
|
if raw == "" {
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
for _, pair := range strings.Split(raw, "&") {
|
||||||
|
kv := strings.SplitN(pair, "=", 2)
|
||||||
|
k := urlDecode(kv[0])
|
||||||
|
v := ""
|
||||||
|
if len(kv) == 2 {
|
||||||
|
v = urlDecode(kv[1])
|
||||||
|
}
|
||||||
|
if _, ok := out[k]; !ok {
|
||||||
|
out[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func urlDecode(s string) string {
|
||||||
|
dec, err := decodeQueryComponent(s)
|
||||||
|
if err != nil {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return dec
|
||||||
|
}
|
||||||
|
|
||||||
|
// decodeQueryComponent decodes one application/x-www-form-urlencoded
|
||||||
|
// component (handles %XX and '+'-as-space) without importing net/url here.
|
||||||
|
func decodeQueryComponent(s string) (string, error) {
|
||||||
|
var b strings.Builder
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
switch s[i] {
|
||||||
|
case '+':
|
||||||
|
b.WriteByte(' ')
|
||||||
|
case '%':
|
||||||
|
if i+2 >= len(s) {
|
||||||
|
return s, errPercent
|
||||||
|
}
|
||||||
|
hi, lo := fromHex(s[i+1]), fromHex(s[i+2])
|
||||||
|
if hi < 0 || lo < 0 {
|
||||||
|
return s, errPercent
|
||||||
|
}
|
||||||
|
b.WriteByte(byte(hi<<4 | lo))
|
||||||
|
i += 2
|
||||||
|
default:
|
||||||
|
b.WriteByte(s[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var errPercent = &decodeErr{}
|
||||||
|
|
||||||
|
type decodeErr struct{}
|
||||||
|
|
||||||
|
func (*decodeErr) Error() string { return "bad percent-encoding" }
|
||||||
|
|
||||||
|
func fromHex(c byte) int {
|
||||||
|
switch {
|
||||||
|
case c >= '0' && c <= '9':
|
||||||
|
return int(c - '0')
|
||||||
|
case c >= 'a' && c <= 'f':
|
||||||
|
return int(c-'a') + 10
|
||||||
|
case c >= 'A' && c <= 'F':
|
||||||
|
return int(c-'A') + 10
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSetCommitStatus_NonOK_ReturnsError verifies a non-2xx provider
|
||||||
|
// response surfaces as an error (the deploy hook logs + swallows it, but
|
||||||
|
// the provider method itself must report it).
|
||||||
|
func TestSetCommitStatus_NonOK_ReturnsError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"bad token"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
f := NewGiteaContentFetcher(srv.URL, "tok")
|
||||||
|
f.httpClient = srv.Client()
|
||||||
|
|
||||||
|
err := f.SetCommitStatus(context.Background(), "o", "r", "sha", CommitStatusPending, "", "x")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error on 401, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "401") {
|
||||||
|
t.Errorf("error missing status code: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSetCommitStatus_RespectsContext ensures the call honours context
|
||||||
|
// cancellation (defensive — the deploy hook passes the deploy ctx).
|
||||||
|
func TestSetCommitStatus_RespectsContext(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
time.Sleep(200 * time.Millisecond)
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
f := NewGiteaContentFetcher(srv.URL, "")
|
||||||
|
f.httpClient = srv.Client()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
if err := f.SetCommitStatus(ctx, "o", "r", "sha", CommitStatusPending, "", "x"); err == nil {
|
||||||
|
t.Fatal("expected context-deadline error, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -304,6 +305,54 @@ func (f *GiteaContentFetcher) TestConnection(ctx context.Context, owner, repo st
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCommitStatus reports a deploy status on a commit via Gitea's commit-
|
||||||
|
// status API (also serves Forgejo/Gogs). The "context" field is fixed to
|
||||||
|
// "tinyforge" so repeated deploys update one status row.
|
||||||
|
func (f *GiteaContentFetcher) SetCommitStatus(ctx context.Context, owner, repo, sha string, status CommitStatus, targetURL, description string) error {
|
||||||
|
state := giteaState(status)
|
||||||
|
body, err := json.Marshal(map[string]string{
|
||||||
|
"state": state,
|
||||||
|
"target_url": targetURL,
|
||||||
|
"description": truncateDescription(description),
|
||||||
|
"context": commitStatusContext,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal status: %w", err)
|
||||||
|
}
|
||||||
|
// Path-escape each identifier so the URL shape matches the other
|
||||||
|
// provider methods and a hostile owner/repo/sha can't break out of
|
||||||
|
// the intended path. The SSRF-safe client guards the host.
|
||||||
|
apiURL := fmt.Sprintf("%s/api/v1/repos/%s/%s/statuses/%s",
|
||||||
|
f.baseURL, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha))
|
||||||
|
if err := postJSON(ctx, f.httpClient, apiURL, body, f.setAuth); err != nil {
|
||||||
|
return fmt.Errorf("set commit status: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// setAuth applies the Gitea token header (no-op when the token is empty).
|
||||||
|
func (f *GiteaContentFetcher) setAuth(req *http.Request) {
|
||||||
|
if f.token != "" {
|
||||||
|
req.Header.Set("Authorization", "token "+f.token)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// giteaState maps a provider-agnostic CommitStatus onto Gitea's API
|
||||||
|
// vocabulary. Gitea accepts the same four words Tinyforge uses, so this is
|
||||||
|
// a 1:1 mapping with a "pending" fallback for any unknown value.
|
||||||
|
func giteaState(status CommitStatus) string {
|
||||||
|
switch status {
|
||||||
|
case CommitStatusSuccess:
|
||||||
|
return "success"
|
||||||
|
case CommitStatusFailure:
|
||||||
|
return "failure"
|
||||||
|
case CommitStatusError:
|
||||||
|
return "error"
|
||||||
|
default:
|
||||||
|
return "pending"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// doGet performs an authenticated GET request and returns the response body.
|
// doGet performs an authenticated GET request and returns the response body.
|
||||||
func (f *GiteaContentFetcher) doGet(ctx context.Context, url string) ([]byte, error) {
|
func (f *GiteaContentFetcher) doGet(ctx context.Context, url string) ([]byte, error) {
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -115,6 +116,43 @@ func (g *GitHubProvider) TestConnection(ctx context.Context, owner, repo string)
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCommitStatus reports a deploy status on a commit via GitHub's commit-
|
||||||
|
// status API (works for github.com and GitHub Enterprise — apiBase already
|
||||||
|
// carries the /api/v3 suffix for GHE). The "context" field is fixed to
|
||||||
|
// "tinyforge" so repeated deploys update one status row.
|
||||||
|
func (g *GitHubProvider) SetCommitStatus(ctx context.Context, owner, repo, sha string, status CommitStatus, targetURL, description string) error {
|
||||||
|
body, err := json.Marshal(map[string]string{
|
||||||
|
"state": githubState(status),
|
||||||
|
"target_url": targetURL,
|
||||||
|
"description": truncateDescription(description),
|
||||||
|
"context": commitStatusContext,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal status: %w", err)
|
||||||
|
}
|
||||||
|
apiURL := fmt.Sprintf("%s/repos/%s/%s/statuses/%s",
|
||||||
|
g.apiBase, url.PathEscape(owner), url.PathEscape(repo), url.PathEscape(sha))
|
||||||
|
if err := postJSON(ctx, g.httpClient, apiURL, body, g.setAuth); err != nil {
|
||||||
|
return fmt.Errorf("set commit status: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// githubState maps a provider-agnostic CommitStatus onto GitHub's API
|
||||||
|
// vocabulary. GitHub accepts the same four words Tinyforge uses.
|
||||||
|
func githubState(status CommitStatus) string {
|
||||||
|
switch status {
|
||||||
|
case CommitStatusSuccess:
|
||||||
|
return "success"
|
||||||
|
case CommitStatusFailure:
|
||||||
|
return "failure"
|
||||||
|
case CommitStatusError:
|
||||||
|
return "error"
|
||||||
|
default:
|
||||||
|
return "pending"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (g *GitHubProvider) ListBranches(ctx context.Context, owner, repo string) ([]string, error) {
|
func (g *GitHubProvider) ListBranches(ctx context.Context, owner, repo string) ([]string, error) {
|
||||||
var allBranches []string
|
var allBranches []string
|
||||||
page := 1
|
page := 1
|
||||||
|
|||||||
@@ -95,6 +95,45 @@ func (g *GitLabProvider) TestConnection(ctx context.Context, owner, repo string)
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCommitStatus reports a deploy status on a commit via GitLab's commit-
|
||||||
|
// status API. GitLab's state vocabulary differs (pending/running/success/
|
||||||
|
// failed/canceled), so failure AND error both map to "failed". The status
|
||||||
|
// metadata (name/target_url/description) is passed as query parameters,
|
||||||
|
// which is how GitLab's POST .../statuses/{sha} endpoint accepts them.
|
||||||
|
func (g *GitLabProvider) SetCommitStatus(ctx context.Context, owner, repo, sha string, status CommitStatus, targetURL, description string) error {
|
||||||
|
q := url.Values{}
|
||||||
|
q.Set("state", gitlabState(status))
|
||||||
|
q.Set("name", commitStatusContext)
|
||||||
|
if targetURL != "" {
|
||||||
|
q.Set("target_url", targetURL)
|
||||||
|
}
|
||||||
|
if description != "" {
|
||||||
|
q.Set("description", truncateDescription(description))
|
||||||
|
}
|
||||||
|
apiURL := fmt.Sprintf("%s/projects/%s/statuses/%s?%s",
|
||||||
|
g.apiBase, projectPath(owner, repo), url.PathEscape(sha), q.Encode())
|
||||||
|
// No JSON body — all fields ride as query params. Reuse postJSON for
|
||||||
|
// the SSRF-safe POST + 2xx handling; an empty body is valid here.
|
||||||
|
if err := postJSON(ctx, g.httpClient, apiURL, nil, g.setAuth); err != nil {
|
||||||
|
return fmt.Errorf("set commit status: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// gitlabState maps a provider-agnostic CommitStatus onto GitLab's API
|
||||||
|
// vocabulary. GitLab has no "failure"/"error" split — both map to
|
||||||
|
// "failed".
|
||||||
|
func gitlabState(status CommitStatus) string {
|
||||||
|
switch status {
|
||||||
|
case CommitStatusSuccess:
|
||||||
|
return "success"
|
||||||
|
case CommitStatusFailure, CommitStatusError:
|
||||||
|
return "failed"
|
||||||
|
default:
|
||||||
|
return "pending"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (g *GitLabProvider) ListBranches(ctx context.Context, owner, repo string) ([]string, error) {
|
func (g *GitLabProvider) ListBranches(ctx context.Context, owner, repo string) ([]string, error) {
|
||||||
var allBranches []string
|
var allBranches []string
|
||||||
page := 1
|
page := 1
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package staticsite
|
package staticsite
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -21,6 +22,40 @@ type RepoInfo struct {
|
|||||||
HTMLURL string `json:"html_url"`
|
HTMLURL string `json:"html_url"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CommitStatus is the deploy outcome reported back to the git provider as
|
||||||
|
// a commit status. The values are provider-agnostic; each implementation
|
||||||
|
// maps them onto its own API vocabulary (Gitea/GitHub use the same four
|
||||||
|
// words, GitLab collapses failure/error into "failed").
|
||||||
|
type CommitStatus string
|
||||||
|
|
||||||
|
const (
|
||||||
|
CommitStatusPending CommitStatus = "pending"
|
||||||
|
CommitStatusSuccess CommitStatus = "success"
|
||||||
|
CommitStatusFailure CommitStatus = "failure"
|
||||||
|
CommitStatusError CommitStatus = "error"
|
||||||
|
)
|
||||||
|
|
||||||
|
// commitStatusContext is the status "context"/"name" key reported to every
|
||||||
|
// provider so repeated deploys update the same status row rather than
|
||||||
|
// piling up new ones.
|
||||||
|
const commitStatusContext = "tinyforge"
|
||||||
|
|
||||||
|
// maxCommitStatusDescription caps the human-readable description so a
|
||||||
|
// provider can't reject the request for an over-long field.
|
||||||
|
const maxCommitStatusDescription = 140
|
||||||
|
|
||||||
|
// truncateDescription clamps a status description to the provider-safe
|
||||||
|
// length, appending an ellipsis when it had to cut.
|
||||||
|
func truncateDescription(s string) string {
|
||||||
|
if len(s) <= maxCommitStatusDescription {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
// Reserve room for the ellipsis rune; cut on a byte boundary that
|
||||||
|
// stays under the cap. Descriptions are short ASCII strings in
|
||||||
|
// practice, so a simple byte cut is fine here.
|
||||||
|
return s[:maxCommitStatusDescription-1] + "…"
|
||||||
|
}
|
||||||
|
|
||||||
// GitProvider abstracts Git hosting API operations.
|
// GitProvider abstracts Git hosting API operations.
|
||||||
// Implementations exist for Gitea/Forgejo/Gogs, GitHub, and GitLab.
|
// Implementations exist for Gitea/Forgejo/Gogs, GitHub, and GitLab.
|
||||||
type GitProvider interface {
|
type GitProvider interface {
|
||||||
@@ -45,6 +80,12 @@ type GitProvider interface {
|
|||||||
|
|
||||||
// DownloadFolder downloads all files from a folder path to a local directory.
|
// DownloadFolder downloads all files from a folder path to a local directory.
|
||||||
DownloadFolder(ctx context.Context, owner, repo, branch, folderPath, destDir string) error
|
DownloadFolder(ctx context.Context, owner, repo, branch, folderPath, destDir string) error
|
||||||
|
|
||||||
|
// SetCommitStatus reports a deploy status on a commit. Best-effort;
|
||||||
|
// callers ignore errors beyond logging. targetURL and description are
|
||||||
|
// optional (pass "" to omit); description is truncated to a provider-
|
||||||
|
// safe length by the implementation.
|
||||||
|
SetCommitStatus(ctx context.Context, owner, repo, sha string, status CommitStatus, targetURL, description string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProviderType identifies a Git hosting provider.
|
// ProviderType identifies a Git hosting provider.
|
||||||
@@ -135,6 +176,36 @@ func httpGet(ctx context.Context, client *http.Client, url string) (int, error)
|
|||||||
return resp.StatusCode, nil
|
return resp.StatusCode, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// postJSON is a shared helper for POSTing a JSON body to a provider API
|
||||||
|
// endpoint with the caller's auth applied. It accepts any 2xx as success
|
||||||
|
// (status APIs return 201 Created on Gitea/GitHub, 200/201 on GitLab) and
|
||||||
|
// returns a status-code-only error on non-2xx — it must NOT echo the
|
||||||
|
// response body: the deploy hook logs this error best-effort, and a
|
||||||
|
// hostile/misconfigured provider could reflect the request's auth token
|
||||||
|
// back in its body. The body bytes must already be marshalled by the caller.
|
||||||
|
func postJSON(ctx context.Context, client *http.Client, url string, body []byte, authHeader func(r *http.Request)) error {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("create request: %w", err)
|
||||||
|
}
|
||||||
|
if authHeader != nil {
|
||||||
|
authHeader(req)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("execute request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("unexpected status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// downloadFileHTTP is a shared helper for downloading a file from a URL.
|
// downloadFileHTTP is a shared helper for downloading a file from a URL.
|
||||||
func downloadFileHTTP(ctx context.Context, client *http.Client, url, localPath string, authHeader func(r *http.Request)) error {
|
func downloadFileHTTP(ctx context.Context, client *http.Client, url, localPath string, authHeader func(r *http.Request)) error {
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||||
|
|||||||
@@ -50,7 +50,22 @@ func ValidateBaseURL(raw string) error {
|
|||||||
func NewSafeHTTPClient(timeout time.Duration) *http.Client {
|
func NewSafeHTTPClient(timeout time.Duration) *http.Client {
|
||||||
dialer := &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second}
|
dialer := &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second}
|
||||||
transport := &http.Transport{
|
transport := &http.Transport{
|
||||||
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
|
DialContext: SafeDialContext(dialer),
|
||||||
|
MaxIdleConns: 16,
|
||||||
|
IdleConnTimeout: 30 * time.Second,
|
||||||
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
|
}
|
||||||
|
return &http.Client{Timeout: timeout, Transport: transport}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SafeDialContext returns a DialContext that rejects loopback, link-local,
|
||||||
|
// multicast, unspecified, and cloud-metadata addresses at connect time,
|
||||||
|
// re-resolving and binding to the resolved IP so a DNS rebind between
|
||||||
|
// resolution and connect cannot slip through. Exposed so other transports
|
||||||
|
// (e.g. the outbound notification client) can apply the same SSRF policy
|
||||||
|
// without duplicating it or losing their own connection-pool tuning.
|
||||||
|
func SafeDialContext(dialer *net.Dialer) func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||||
|
return func(ctx context.Context, network, addr string) (net.Conn, error) {
|
||||||
host, port, err := net.SplitHostPort(addr)
|
host, port, err := net.SplitHostPort(addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -77,12 +92,7 @@ func NewSafeHTTPClient(timeout time.Duration) *http.Client {
|
|||||||
// Bind to the first resolved IP so a rebind between resolution
|
// Bind to the first resolved IP so a rebind between resolution
|
||||||
// and connect cannot redirect the request to a blocked address.
|
// and connect cannot redirect the request to a blocked address.
|
||||||
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
|
return dialer.DialContext(ctx, network, net.JoinHostPort(ips[0].IP.String(), port))
|
||||||
},
|
|
||||||
MaxIdleConns: 16,
|
|
||||||
IdleConnTimeout: 30 * time.Second,
|
|
||||||
TLSHandshakeTimeout: 10 * time.Second,
|
|
||||||
}
|
}
|
||||||
return &http.Client{Timeout: timeout, Transport: transport}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// blockReason returns a human label for why an IP is rejected, or ""
|
// blockReason returns a human label for why an IP is rejected, or ""
|
||||||
@@ -92,6 +102,13 @@ func blockReason(ip net.IP) string {
|
|||||||
if ip == nil {
|
if ip == nil {
|
||||||
return "nil address"
|
return "nil address"
|
||||||
}
|
}
|
||||||
|
// Normalize IPv4-mapped IPv6 (::ffff:x.x.x.x) so the loopback / link-local
|
||||||
|
// classifiers below catch them. net.IP.To4() returns the 4-byte form for
|
||||||
|
// IPv4-mapped addresses; net's IsLoopback already handles this, but pin
|
||||||
|
// the conversion to avoid future surprises if the std-lib semantics drift.
|
||||||
|
if v4 := ip.To4(); v4 != nil {
|
||||||
|
ip = v4
|
||||||
|
}
|
||||||
switch {
|
switch {
|
||||||
case ip.IsLoopback():
|
case ip.IsLoopback():
|
||||||
return "loopback"
|
return "loopback"
|
||||||
@@ -104,5 +121,22 @@ func blockReason(ip net.IP) string {
|
|||||||
case ip.IsMulticast():
|
case ip.IsMulticast():
|
||||||
return "multicast"
|
return "multicast"
|
||||||
}
|
}
|
||||||
|
// Cloud metadata endpoints — AWS / GCP / Azure are covered by the
|
||||||
|
// link-local block (169.254.169.254). The rest must be enumerated.
|
||||||
|
if metadataIPSet[ip.String()] {
|
||||||
|
return "cloud metadata endpoint"
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// metadataIPSet enumerates well-known cloud metadata IPs that are NOT
|
||||||
|
// covered by net.IP.IsLinkLocalUnicast. Updating this set is the lightest
|
||||||
|
// way to keep up with new providers without changing the policy shape.
|
||||||
|
var metadataIPSet = map[string]bool{
|
||||||
|
// Alibaba Cloud ECS metadata.
|
||||||
|
"100.100.100.200": true,
|
||||||
|
// Oracle Cloud Infrastructure metadata.
|
||||||
|
"192.0.0.192": true,
|
||||||
|
// AWS IMDS over IPv6 (ULA — not link-local, must be listed).
|
||||||
|
"fd00:ec2::254": true,
|
||||||
|
}
|
||||||
|
|||||||
@@ -234,17 +234,17 @@ func (c *Collector) sampleAll(ctx context.Context, targets []target) []store.Con
|
|||||||
found := make([]bool, len(targets))
|
found := make([]bool, len(targets))
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
loop:
|
||||||
for i, t := range targets {
|
for i, t := range targets {
|
||||||
// Acquire the semaphore in the parent loop so ctx cancellation
|
// Acquire the semaphore in the parent loop so ctx cancellation
|
||||||
// short-circuits the queue rather than spawning goroutines that
|
// short-circuits the queue rather than spawning goroutines that
|
||||||
// block on an unreachable slot.
|
// block on an unreachable slot. The labelled break exits the for
|
||||||
|
// loop directly; a bare `break` inside `select` would only break
|
||||||
|
// the select and let the loop continue.
|
||||||
select {
|
select {
|
||||||
case sem <- struct{}{}:
|
case sem <- struct{}{}:
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
break
|
break loop
|
||||||
}
|
|
||||||
if ctx.Err() != nil {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(i int, t target) {
|
go func(i int, t target) {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package store
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -9,6 +10,22 @@ import (
|
|||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// validateExtraJSON ensures the extra_json column never receives an
|
||||||
|
// invalid JSON document. The codemap (docs/CODEMAPS/container-extra-json.md)
|
||||||
|
// is explicit that readers tolerate unknown keys — but only if the value
|
||||||
|
// is valid JSON at all. A buggy plugin writing `"not json"` would silently
|
||||||
|
// break every reader, with no schema-level check to catch it. Guarding at
|
||||||
|
// the store boundary keeps the invariant cheap and obvious.
|
||||||
|
func validateExtraJSON(v string) error {
|
||||||
|
if v == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !json.Valid([]byte(v)) {
|
||||||
|
return fmt.Errorf("extra_json: not valid JSON (%d bytes)", len(v))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// containerColumns is the canonical column list for `containers` queries.
|
// containerColumns is the canonical column list for `containers` queries.
|
||||||
// stage_id is populated by the deployer for project containers (so ListProxyRoutes
|
// stage_id is populated by the deployer for project containers (so ListProxyRoutes
|
||||||
// survives stage renames) and left empty for stacks and sites.
|
// survives stage renames) and left empty for stacks and sites.
|
||||||
@@ -42,6 +59,9 @@ func (s *Store) CreateContainer(c Container) (Container, error) {
|
|||||||
if c.ExtraJSON == "" {
|
if c.ExtraJSON == "" {
|
||||||
c.ExtraJSON = "{}"
|
c.ExtraJSON = "{}"
|
||||||
}
|
}
|
||||||
|
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||||
|
return Container{}, err
|
||||||
|
}
|
||||||
|
|
||||||
_, err := s.db.Exec(
|
_, err := s.db.Exec(
|
||||||
`INSERT INTO containers (`+containerColumns+`)
|
`INSERT INTO containers (`+containerColumns+`)
|
||||||
@@ -77,6 +97,9 @@ func (s *Store) UpsertContainer(c Container) error {
|
|||||||
if c.ExtraJSON == "" {
|
if c.ExtraJSON == "" {
|
||||||
c.ExtraJSON = "{}"
|
c.ExtraJSON = "{}"
|
||||||
}
|
}
|
||||||
|
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// SQLite UPSERT — INSERT...ON CONFLICT(id) DO UPDATE.
|
// SQLite UPSERT — INSERT...ON CONFLICT(id) DO UPDATE.
|
||||||
_, err := s.db.Exec(
|
_, err := s.db.Exec(
|
||||||
@@ -129,6 +152,9 @@ func (s *Store) ReconcileContainer(c Container) error {
|
|||||||
if c.ExtraJSON == "" {
|
if c.ExtraJSON == "" {
|
||||||
c.ExtraJSON = "{}"
|
c.ExtraJSON = "{}"
|
||||||
}
|
}
|
||||||
|
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// extra_json is deliberately NOT in the ON CONFLICT SET clause: the
|
// extra_json is deliberately NOT in the ON CONFLICT SET clause: the
|
||||||
// reconciler can't observe per-face route IDs from Docker, and
|
// reconciler can't observe per-face route IDs from Docker, and
|
||||||
@@ -321,6 +347,9 @@ func (s *Store) UpdateContainer(c Container) error {
|
|||||||
if c.ExtraJSON == "" {
|
if c.ExtraJSON == "" {
|
||||||
c.ExtraJSON = "{}"
|
c.ExtraJSON = "{}"
|
||||||
}
|
}
|
||||||
|
if err := validateExtraJSON(c.ExtraJSON); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
result, err := s.db.Exec(
|
result, err := s.db.Exec(
|
||||||
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, stage_id=?, container_id=?,
|
`UPDATE containers SET workload_id=?, workload_kind=?, role=?, stage_id=?, container_id=?,
|
||||||
image_ref=?, image_tag=?, host=?, state=?, port=?,
|
image_ref=?, image_tag=?, host=?, state=?, port=?,
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
type EventLogFilter struct {
|
type EventLogFilter struct {
|
||||||
Severity string // Filter by severity (info, warn, error).
|
Severity string // Filter by severity (info, warn, error).
|
||||||
Source string // Filter by source.
|
Source string // Filter by source.
|
||||||
|
WorkloadID string // Filter by owning workload (exact match).
|
||||||
Since string // Only events created at or after this timestamp.
|
Since string // Only events created at or after this timestamp.
|
||||||
Until string // Only events created at or before this timestamp.
|
Until string // Only events created at or before this timestamp.
|
||||||
Limit int // Maximum number of results (default 50).
|
Limit int // Maximum number of results (default 50).
|
||||||
@@ -31,9 +32,9 @@ func (s *Store) InsertEvent(evt EventLog) (EventLog, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result, err := s.db.Exec(
|
result, err := s.db.Exec(
|
||||||
`INSERT INTO event_log (source, severity, message, metadata, created_at)
|
`INSERT INTO event_log (source, workload_id, severity, message, metadata, created_at)
|
||||||
VALUES (?, ?, ?, ?, ?)`,
|
VALUES (?, ?, ?, ?, ?, ?)`,
|
||||||
evt.Source, evt.Severity, evt.Message, evt.Metadata, evt.CreatedAt,
|
evt.Source, evt.WorkloadID, evt.Severity, evt.Message, evt.Metadata, evt.CreatedAt,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return EventLog{}, fmt.Errorf("insert event: %w", err)
|
return EventLog{}, fmt.Errorf("insert event: %w", err)
|
||||||
@@ -81,6 +82,10 @@ func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) {
|
|||||||
conditions = append(conditions, "source IN ("+strings.Join(placeholders, ",")+")")
|
conditions = append(conditions, "source IN ("+strings.Join(placeholders, ",")+")")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if filter.WorkloadID != "" {
|
||||||
|
conditions = append(conditions, "workload_id = ?")
|
||||||
|
args = append(args, filter.WorkloadID)
|
||||||
|
}
|
||||||
if filter.Since != "" {
|
if filter.Since != "" {
|
||||||
conditions = append(conditions, "created_at >= ?")
|
conditions = append(conditions, "created_at >= ?")
|
||||||
args = append(args, filter.Since)
|
args = append(args, filter.Since)
|
||||||
@@ -90,7 +95,7 @@ func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) {
|
|||||||
args = append(args, filter.Until)
|
args = append(args, filter.Until)
|
||||||
}
|
}
|
||||||
|
|
||||||
query := "SELECT id, source, severity, message, metadata, created_at FROM event_log"
|
query := "SELECT id, source, workload_id, severity, message, metadata, created_at FROM event_log"
|
||||||
if len(conditions) > 0 {
|
if len(conditions) > 0 {
|
||||||
query += " WHERE " + strings.Join(conditions, " AND ")
|
query += " WHERE " + strings.Join(conditions, " AND ")
|
||||||
}
|
}
|
||||||
@@ -114,7 +119,7 @@ func (s *Store) ListEvents(filter EventLogFilter) ([]EventLog, error) {
|
|||||||
events := []EventLog{}
|
events := []EventLog{}
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var evt EventLog
|
var evt EventLog
|
||||||
if err := rows.Scan(&evt.ID, &evt.Source, &evt.Severity, &evt.Message, &evt.Metadata, &evt.CreatedAt); err != nil {
|
if err := rows.Scan(&evt.ID, &evt.Source, &evt.WorkloadID, &evt.Severity, &evt.Message, &evt.Metadata, &evt.CreatedAt); err != nil {
|
||||||
return nil, fmt.Errorf("scan event: %w", err)
|
return nil, fmt.Errorf("scan event: %w", err)
|
||||||
}
|
}
|
||||||
events = append(events, evt)
|
events = append(events, evt)
|
||||||
|
|||||||
@@ -0,0 +1,120 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInsertEvent_RoundTripsWorkloadID(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
|
||||||
|
in := EventLog{
|
||||||
|
Source: "image",
|
||||||
|
WorkloadID: "wl-abc",
|
||||||
|
Severity: "info",
|
||||||
|
Message: "my-app: deployed",
|
||||||
|
Metadata: `{"workload_id":"wl-abc"}`,
|
||||||
|
}
|
||||||
|
saved, err := s.InsertEvent(in)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("InsertEvent: %v", err)
|
||||||
|
}
|
||||||
|
if saved.ID == 0 {
|
||||||
|
t.Fatal("expected non-zero ID after insert")
|
||||||
|
}
|
||||||
|
if saved.WorkloadID != "wl-abc" {
|
||||||
|
t.Fatalf("returned WorkloadID = %q, want %q", saved.WorkloadID, "wl-abc")
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := s.ListEvents(EventLogFilter{WorkloadID: "wl-abc"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 {
|
||||||
|
t.Fatalf("got %d rows, want 1", len(rows))
|
||||||
|
}
|
||||||
|
got := rows[0]
|
||||||
|
if got.WorkloadID != "wl-abc" {
|
||||||
|
t.Errorf("WorkloadID = %q, want %q", got.WorkloadID, "wl-abc")
|
||||||
|
}
|
||||||
|
if got.Source != "image" || got.Severity != "info" || got.Message != "my-app: deployed" {
|
||||||
|
t.Errorf("round-trip mismatch: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInsertEvent_DefaultsWorkloadIDToEmpty(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
|
||||||
|
// Non-deploy callers leave WorkloadID at its zero value; the column
|
||||||
|
// must accept "" (NOT NULL DEFAULT '').
|
||||||
|
saved, err := s.InsertEvent(EventLog{Source: "stale", Severity: "warn", Message: "x"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("InsertEvent: %v", err)
|
||||||
|
}
|
||||||
|
if saved.WorkloadID != "" {
|
||||||
|
t.Fatalf("WorkloadID = %q, want empty", saved.WorkloadID)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := s.ListEvents(EventLogFilter{Source: "stale"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 || rows[0].WorkloadID != "" {
|
||||||
|
t.Fatalf("expected one unscoped row, got %+v", rows)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListEvents_FilterByWorkloadID(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
|
||||||
|
for _, e := range []EventLog{
|
||||||
|
{Source: "image", WorkloadID: "wl-1", Severity: "info", Message: "a"},
|
||||||
|
{Source: "image", WorkloadID: "wl-1", Severity: "error", Message: "b"},
|
||||||
|
{Source: "compose", WorkloadID: "wl-2", Severity: "info", Message: "c"},
|
||||||
|
{Source: "stale", WorkloadID: "", Severity: "warn", Message: "d"},
|
||||||
|
} {
|
||||||
|
if _, err := s.InsertEvent(e); err != nil {
|
||||||
|
t.Fatalf("InsertEvent %q: %v", e.Message, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filtering by wl-1 returns only its two rows.
|
||||||
|
rows, err := s.ListEvents(EventLogFilter{WorkloadID: "wl-1"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents wl-1: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 2 {
|
||||||
|
t.Fatalf("wl-1: got %d rows, want 2", len(rows))
|
||||||
|
}
|
||||||
|
for _, r := range rows {
|
||||||
|
if r.WorkloadID != "wl-1" {
|
||||||
|
t.Errorf("wl-1 filter leaked row with workload_id %q", r.WorkloadID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// wl-2 returns exactly one row.
|
||||||
|
rows, err = s.ListEvents(EventLogFilter{WorkloadID: "wl-2"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents wl-2: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 || rows[0].Message != "c" {
|
||||||
|
t.Fatalf("wl-2: got %+v, want single row 'c'", rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combined workload + severity filter still narrows correctly.
|
||||||
|
rows, err = s.ListEvents(EventLogFilter{WorkloadID: "wl-1", Severity: "error"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents wl-1+error: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 || rows[0].Message != "b" {
|
||||||
|
t.Fatalf("wl-1+error: got %+v, want single row 'b'", rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
// No filter returns all four rows (back-compat: unscoped query intact).
|
||||||
|
rows, err = s.ListEvents(EventLogFilter{})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents all: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 4 {
|
||||||
|
t.Fatalf("unfiltered: got %d rows, want 4", len(rows))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ErrLockHeld is returned when another Tinyforge process appears to be
|
||||||
|
// running against the same data directory. SQLite + SetMaxOpenConns(1)
|
||||||
|
// makes this otherwise-silent collision a recipe for double-fired
|
||||||
|
// schedulers, double-polled registries, and `extra_json` RMW corruption.
|
||||||
|
var ErrLockHeld = errors.New("data directory is locked by another tinyforge process")
|
||||||
|
|
||||||
|
// Lockfile is a portable PID file. AcquireLockfile takes it; the returned
|
||||||
|
// Release function removes it. The contract:
|
||||||
|
//
|
||||||
|
// - Lockfile is created with O_CREATE|O_EXCL — atomic on POSIX, atomic
|
||||||
|
// on NTFS / ReFS via the equivalent.
|
||||||
|
// - On collision, the existing file's PID is read; if the PID is dead,
|
||||||
|
// we treat the lock as stale (process crashed without cleanup),
|
||||||
|
// reclaim it, and proceed. Live PID → ErrLockHeld.
|
||||||
|
// - flock is intentionally not used: cross-platform consistency wins
|
||||||
|
// over advisory-lock semantics for the single-instance use case.
|
||||||
|
type Lockfile struct {
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcquireLockfile creates a PID-file lock under dataDir. Returns a
|
||||||
|
// Release function the caller must defer. If another live process holds
|
||||||
|
// the lock, returns ErrLockHeld with a hint pointing at the lockfile.
|
||||||
|
//
|
||||||
|
// Reclaim atomicity: when the existing lockfile names a dead PID, the
|
||||||
|
// replacement is serialized through an auxiliary reclaim lock (see
|
||||||
|
// reclaimStaleLock) so that, of N processes booting concurrently against
|
||||||
|
// the same stale lockfile, EXACTLY ONE reclaims it and the rest get
|
||||||
|
// ErrLockHeld. A bare `os.Remove`+`O_EXCL` retry — or a rename, which is
|
||||||
|
// "last-writer-wins" — cannot guarantee this: multiple reclaimers can each
|
||||||
|
// end up believing they own the lock, defeating the single-instance guard.
|
||||||
|
func AcquireLockfile(dataDir string) (release func(), err error) {
|
||||||
|
path := filepath.Join(dataDir, "tinyforge.lock")
|
||||||
|
|
||||||
|
// First try: clean acquire.
|
||||||
|
if rel, ok, err := tryCreateExclusive(path); ok {
|
||||||
|
return rel, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Existing lockfile — read PID and decide whether to reclaim.
|
||||||
|
pid, readErr := readLockPID(path)
|
||||||
|
if readErr == nil && processAlive(pid) {
|
||||||
|
return nil, fmt.Errorf("%w (held by pid %d, lockfile=%s)", ErrLockHeld, pid, path)
|
||||||
|
}
|
||||||
|
// Stale lock (dead pid) or malformed file — reclaim under serialization.
|
||||||
|
reason := "malformed existing lockfile"
|
||||||
|
if readErr == nil {
|
||||||
|
reason = fmt.Sprintf("stale lockfile (dead pid %d)", pid)
|
||||||
|
}
|
||||||
|
return reclaimStaleLock(path, reason)
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryCreateExclusive attempts an atomic O_CREATE|O_EXCL create at path.
|
||||||
|
// Returns (release, true, nil) on success; (nil, false, nil) when the
|
||||||
|
// file already exists; (nil, false, err) on any other error.
|
||||||
|
func tryCreateExclusive(path string) (func(), bool, error) {
|
||||||
|
f, openErr := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||||
|
if openErr != nil {
|
||||||
|
if os.IsExist(openErr) {
|
||||||
|
return nil, false, nil
|
||||||
|
}
|
||||||
|
return nil, false, fmt.Errorf("open lockfile: %w", openErr)
|
||||||
|
}
|
||||||
|
if _, err := fmt.Fprintf(f, "%d\n", os.Getpid()); err != nil {
|
||||||
|
_ = f.Close()
|
||||||
|
_ = os.Remove(path)
|
||||||
|
return nil, false, fmt.Errorf("write lockfile: %w", err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
_ = os.Remove(path)
|
||||||
|
return nil, false, fmt.Errorf("close lockfile: %w", err)
|
||||||
|
}
|
||||||
|
return func() { _ = os.Remove(path) }, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// reclaimStaleLock replaces a stale/malformed lockfile with one holding our
|
||||||
|
// PID, serialized by an auxiliary reclaim lock. Holding the reclaim lock
|
||||||
|
// (O_EXCL) guarantees that only one process performs the remove-and-recreate
|
||||||
|
// of the main lockfile at a time, so concurrent reclaimers cannot each end
|
||||||
|
// up "owning" the lock the way a rename or unguarded remove+create would
|
||||||
|
// allow. The reclaim lock is itself liveness-checked so a reclaimer that
|
||||||
|
// crashed mid-reclaim cannot wedge startup forever.
|
||||||
|
func reclaimStaleLock(lockPath, reason string) (func(), error) {
|
||||||
|
reclaimPath := lockPath + ".reclaim"
|
||||||
|
if err := acquireReclaimLock(reclaimPath); err != nil {
|
||||||
|
return nil, fmt.Errorf("%w (%v; %s)", ErrLockHeld, err, reason)
|
||||||
|
}
|
||||||
|
defer func() { _ = os.Remove(reclaimPath) }()
|
||||||
|
|
||||||
|
// Serialized now. Re-check the main lock: another process may have fully
|
||||||
|
// reclaimed it between our liveness probe and our taking the reclaim lock.
|
||||||
|
if pid, perr := readLockPID(lockPath); perr == nil && processAlive(pid) {
|
||||||
|
return nil, fmt.Errorf("%w (reclaimed by pid %d while we waited; %s)",
|
||||||
|
ErrLockHeld, pid, reason)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe to replace: remove the stale file, then create a fresh exclusive
|
||||||
|
// one. Both run while we hold the reclaim lock, so no other reclaimer can
|
||||||
|
// observe the gap.
|
||||||
|
if err := os.Remove(lockPath); err != nil && !os.IsNotExist(err) {
|
||||||
|
return nil, fmt.Errorf("%w (could not remove stale lockfile %s: %v; %s)",
|
||||||
|
ErrLockHeld, lockPath, err, reason)
|
||||||
|
}
|
||||||
|
rel, ok, err := tryCreateExclusive(lockPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
// Should be impossible while we hold the reclaim lock; fail safe.
|
||||||
|
return nil, fmt.Errorf("%w (lockfile reappeared during reclaim of %s; %s)",
|
||||||
|
ErrLockHeld, lockPath, reason)
|
||||||
|
}
|
||||||
|
return rel, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// acquireReclaimLock takes the auxiliary reclaim lock with O_EXCL. An
|
||||||
|
// existing reclaim lock is honoured only while its recorded PID is alive (a
|
||||||
|
// genuine concurrent reclaim); a stale one (dead/foreign PID) is removed once
|
||||||
|
// and re-attempted so a crashed reclaimer cannot block boot indefinitely. Of
|
||||||
|
// concurrent callers, O_EXCL ensures at most one acquires it; the rest fail
|
||||||
|
// and back off to ErrLockHeld.
|
||||||
|
func acquireReclaimLock(reclaimPath string) error {
|
||||||
|
for attempt := 0; attempt < 2; attempt++ {
|
||||||
|
f, err := os.OpenFile(reclaimPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||||
|
if err == nil {
|
||||||
|
if _, werr := fmt.Fprintf(f, "%d\n", os.Getpid()); werr != nil {
|
||||||
|
_ = f.Close()
|
||||||
|
_ = os.Remove(reclaimPath)
|
||||||
|
return fmt.Errorf("write reclaim lock %s: %v", reclaimPath, werr)
|
||||||
|
}
|
||||||
|
return f.Close()
|
||||||
|
}
|
||||||
|
if !os.IsExist(err) {
|
||||||
|
return fmt.Errorf("create reclaim lock %s: %v", reclaimPath, err)
|
||||||
|
}
|
||||||
|
// Reclaim lock present. A live owner means a real concurrent reclaim.
|
||||||
|
if pid, perr := readLockPID(reclaimPath); perr == nil && processAlive(pid) {
|
||||||
|
return fmt.Errorf("concurrent reclaim in progress (pid %d)", pid)
|
||||||
|
}
|
||||||
|
// Stale reclaim lock — clear it and retry the exclusive create once.
|
||||||
|
if rerr := os.Remove(reclaimPath); rerr != nil && !os.IsNotExist(rerr) {
|
||||||
|
return fmt.Errorf("remove stale reclaim lock %s: %v", reclaimPath, rerr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("could not acquire reclaim lock %s after retry", reclaimPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readLockPID(path string) (int, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
pidStr := strings.TrimSpace(string(data))
|
||||||
|
if pidStr == "" {
|
||||||
|
return 0, errors.New("empty lockfile")
|
||||||
|
}
|
||||||
|
return strconv.Atoi(pidStr)
|
||||||
|
}
|
||||||
@@ -0,0 +1,137 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAcquireLockfile_FreshDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
release, err := AcquireLockfile(dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("AcquireLockfile: %v", err)
|
||||||
|
}
|
||||||
|
defer release()
|
||||||
|
|
||||||
|
// Lockfile should exist with our PID.
|
||||||
|
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read lockfile: %v", err)
|
||||||
|
}
|
||||||
|
want := fmt.Sprintf("%d\n", os.Getpid())
|
||||||
|
if string(data) != want {
|
||||||
|
t.Errorf("lockfile content = %q, want %q", data, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAcquireLockfile_HeldByLivePID_Refused(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Plant a lockfile holding the current PID (which is obviously alive).
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||||
|
[]byte(fmt.Sprintf("%d\n", os.Getpid())), 0o600); err != nil {
|
||||||
|
t.Fatalf("plant lockfile: %v", err)
|
||||||
|
}
|
||||||
|
release, err := AcquireLockfile(dir)
|
||||||
|
if err == nil {
|
||||||
|
release()
|
||||||
|
t.Fatal("expected ErrLockHeld, got nil")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, ErrLockHeld) {
|
||||||
|
t.Errorf("error = %v, want wrap of ErrLockHeld", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAcquireLockfile_StalePID_Reclaimed(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// PID 1 is init/launchd/systemd on POSIX and the System Idle Process
|
||||||
|
// on Windows — never our process, and very unlikely to be dead. We
|
||||||
|
// use a deliberately-impossible PID instead: a 31-bit value far
|
||||||
|
// above any plausible system maximum.
|
||||||
|
stalePID := 2147483640
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||||
|
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
|
||||||
|
t.Fatalf("plant stale lockfile: %v", err)
|
||||||
|
}
|
||||||
|
release, err := AcquireLockfile(dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected reclaim of stale lock, got: %v", err)
|
||||||
|
}
|
||||||
|
defer release()
|
||||||
|
|
||||||
|
// Verify it now holds OUR pid, not the stale one.
|
||||||
|
data, err := os.ReadFile(filepath.Join(dir, "tinyforge.lock"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read lockfile after reclaim: %v", err)
|
||||||
|
}
|
||||||
|
want := fmt.Sprintf("%d\n", os.Getpid())
|
||||||
|
if string(data) != want {
|
||||||
|
t.Errorf("lockfile content after reclaim = %q, want %q", data, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAcquireLockfile_ConcurrentReclaim_SingleWinner(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Plant a stale lockfile (impossibly high, certainly-dead PID), then have
|
||||||
|
// many goroutines race to reclaim it. Exactly one must win; the rest must
|
||||||
|
// be refused with ErrLockHeld. A "last-writer-wins" reclaim would let
|
||||||
|
// several goroutines all believe they own the lock.
|
||||||
|
stalePID := 2147483640
|
||||||
|
if err := os.WriteFile(filepath.Join(dir, "tinyforge.lock"),
|
||||||
|
[]byte(fmt.Sprintf("%d\n", stalePID)), 0o600); err != nil {
|
||||||
|
t.Fatalf("plant stale lockfile: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
const n = 16
|
||||||
|
var (
|
||||||
|
wg sync.WaitGroup
|
||||||
|
mu sync.Mutex
|
||||||
|
winners int
|
||||||
|
releases []func()
|
||||||
|
)
|
||||||
|
start := make(chan struct{})
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
<-start
|
||||||
|
release, err := AcquireLockfile(dir)
|
||||||
|
if err != nil {
|
||||||
|
if !errors.Is(err, ErrLockHeld) {
|
||||||
|
t.Errorf("loser error = %v, want wrap of ErrLockHeld", err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mu.Lock()
|
||||||
|
winners++
|
||||||
|
releases = append(releases, release)
|
||||||
|
mu.Unlock()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
close(start)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
for _, r := range releases {
|
||||||
|
r()
|
||||||
|
}
|
||||||
|
if winners != 1 {
|
||||||
|
t.Fatalf("concurrent reclaim winners = %d, want exactly 1", winners)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAcquireLockfile_ReleaseRemovesFile(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
release, err := AcquireLockfile(dir)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("AcquireLockfile: %v", err)
|
||||||
|
}
|
||||||
|
release()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "tinyforge.lock")
|
||||||
|
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||||
|
t.Errorf("lockfile still present after release: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
//go:build !windows
|
||||||
|
|
||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
// processAlive checks whether the given PID belongs to a running process.
|
||||||
|
// On POSIX, kill(pid, 0) sends no signal but returns ESRCH if the PID is
|
||||||
|
// dead, EPERM if alive-but-foreign-owned (still "alive" for our purposes).
|
||||||
|
//
|
||||||
|
// os.FindProcess never returns a non-nil error on Linux / macOS / *BSD
|
||||||
|
// for any PID value — it just records the integer. The probe is purely
|
||||||
|
// the Signal(0) result. We keep the FindProcess call to obtain the
|
||||||
|
// *os.Process handle Signal needs; we don't branch on its error.
|
||||||
|
func processAlive(pid int) bool {
|
||||||
|
if pid <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
proc, _ := os.FindProcess(pid)
|
||||||
|
if proc == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
err := proc.Signal(syscall.Signal(0))
|
||||||
|
if err == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// EPERM = alive but not ours; ESRCH = dead.
|
||||||
|
return errors.Is(err, os.ErrPermission) || errors.Is(err, syscall.EPERM)
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
//go:build windows
|
||||||
|
|
||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"golang.org/x/sys/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
// processAlive returns true when the given PID is currently held by a
|
||||||
|
// running Windows process. OpenProcess with PROCESS_QUERY_LIMITED_INFORMATION
|
||||||
|
// is the supported way to check liveness without elevation.
|
||||||
|
func processAlive(pid int) bool {
|
||||||
|
if pid <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer windows.CloseHandle(h)
|
||||||
|
var exitCode uint32
|
||||||
|
if err := windows.GetExitCodeProcess(h, &exitCode); err != nil {
|
||||||
|
// Conservative: if we can't ask, assume alive so we don't reclaim
|
||||||
|
// an active lock. Worst case the operator sees ErrLockHeld and
|
||||||
|
// removes the lockfile by hand.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
const stillActive = 259 // STILL_ACTIVE
|
||||||
|
return exitCode == stillActive
|
||||||
|
}
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CreateMetricAlertRule inserts a new rule row after validating its
|
||||||
|
// metric/comparator/severity enums and rejecting negative cooldowns.
|
||||||
|
func (s *Store) CreateMetricAlertRule(r MetricAlertRule) (MetricAlertRule, error) {
|
||||||
|
if err := validateMetricAlertRule(r); err != nil {
|
||||||
|
return MetricAlertRule{}, err
|
||||||
|
}
|
||||||
|
now := Now()
|
||||||
|
r.CreatedAt = now
|
||||||
|
r.UpdatedAt = now
|
||||||
|
res, err := s.db.Exec(
|
||||||
|
`INSERT INTO metric_alert_rules
|
||||||
|
(workload_id, name, metric, comparator, threshold, severity,
|
||||||
|
cooldown_seconds, enabled, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
r.WorkloadID, r.Name, r.Metric, r.Comparator, r.Threshold, r.Severity,
|
||||||
|
r.CooldownSeconds, boolToInt(r.Enabled), r.CreatedAt, r.UpdatedAt,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("insert metric alert rule: %w", err)
|
||||||
|
}
|
||||||
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("get metric alert rule id: %w", err)
|
||||||
|
}
|
||||||
|
r.ID = id
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListMetricAlertRules returns every rule, ordered by id for stable UI
|
||||||
|
// rendering.
|
||||||
|
func (s *Store) ListMetricAlertRules() ([]MetricAlertRule, error) {
|
||||||
|
return s.queryMetricAlertRules(
|
||||||
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
||||||
|
cooldown_seconds, enabled, created_at, updated_at
|
||||||
|
FROM metric_alert_rules ORDER BY id`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListMetricAlertRulesByWorkload returns rules that apply to the given
|
||||||
|
// workload: rows explicitly scoped to it plus global rows (workload_id
|
||||||
|
// = ""). Useful for the workload detail page.
|
||||||
|
func (s *Store) ListMetricAlertRulesByWorkload(workloadID string) ([]MetricAlertRule, error) {
|
||||||
|
return s.queryMetricAlertRules(
|
||||||
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
||||||
|
cooldown_seconds, enabled, created_at, updated_at
|
||||||
|
FROM metric_alert_rules WHERE workload_id = ? OR workload_id = '' ORDER BY id`,
|
||||||
|
workloadID,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetMetricAlertRule fetches one rule by id or returns ErrNotFound.
|
||||||
|
func (s *Store) GetMetricAlertRule(id int64) (MetricAlertRule, error) {
|
||||||
|
row := s.db.QueryRow(
|
||||||
|
`SELECT id, workload_id, name, metric, comparator, threshold, severity,
|
||||||
|
cooldown_seconds, enabled, created_at, updated_at
|
||||||
|
FROM metric_alert_rules WHERE id = ?`, id,
|
||||||
|
)
|
||||||
|
r, err := scanMetricAlertRuleRow(row)
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule %d: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("query metric alert rule: %w", err)
|
||||||
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateMetricAlertRule overwrites the editable columns of a rule row.
|
||||||
|
// id and workload_id are immutable on update — change the scope of a
|
||||||
|
// rule by deleting + recreating, mirroring the log-scan store.
|
||||||
|
func (s *Store) UpdateMetricAlertRule(r MetricAlertRule) (MetricAlertRule, error) {
|
||||||
|
if r.ID == 0 {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule: id is required for update")
|
||||||
|
}
|
||||||
|
if err := validateMetricAlertRule(r); err != nil {
|
||||||
|
return MetricAlertRule{}, err
|
||||||
|
}
|
||||||
|
r.UpdatedAt = Now()
|
||||||
|
res, err := s.db.Exec(
|
||||||
|
`UPDATE metric_alert_rules
|
||||||
|
SET name = ?, metric = ?, comparator = ?, threshold = ?, severity = ?,
|
||||||
|
cooldown_seconds = ?, enabled = ?, updated_at = ?
|
||||||
|
WHERE id = ?`,
|
||||||
|
r.Name, r.Metric, r.Comparator, r.Threshold, r.Severity,
|
||||||
|
r.CooldownSeconds, boolToInt(r.Enabled), r.UpdatedAt, r.ID,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("update metric alert rule: %w", err)
|
||||||
|
}
|
||||||
|
n, _ := res.RowsAffected()
|
||||||
|
if n == 0 {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("metric alert rule %d: %w", r.ID, ErrNotFound)
|
||||||
|
}
|
||||||
|
return s.GetMetricAlertRule(r.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteMetricAlertRule removes a rule by id, returning ErrNotFound when
|
||||||
|
// no row matched.
|
||||||
|
func (s *Store) DeleteMetricAlertRule(id int64) error {
|
||||||
|
res, err := s.db.Exec(`DELETE FROM metric_alert_rules WHERE id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("delete metric alert rule: %w", err)
|
||||||
|
}
|
||||||
|
n, _ := res.RowsAffected()
|
||||||
|
if n == 0 {
|
||||||
|
return fmt.Errorf("metric alert rule %d: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) queryMetricAlertRules(query string, args ...any) ([]MetricAlertRule, error) {
|
||||||
|
rows, err := s.db.Query(query, args...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query metric alert rules: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
out := []MetricAlertRule{}
|
||||||
|
for rows.Next() {
|
||||||
|
r, err := scanMetricAlertRuleRows(rows)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, r)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanMetricAlertRuleRows(rows *sql.Rows) (MetricAlertRule, error) {
|
||||||
|
var r MetricAlertRule
|
||||||
|
var enabled int
|
||||||
|
if err := rows.Scan(
|
||||||
|
&r.ID, &r.WorkloadID, &r.Name, &r.Metric, &r.Comparator, &r.Threshold, &r.Severity,
|
||||||
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||||
|
); err != nil {
|
||||||
|
return MetricAlertRule{}, fmt.Errorf("scan metric alert rule: %w", err)
|
||||||
|
}
|
||||||
|
r.Enabled = enabled != 0
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanMetricAlertRuleRow(row *sql.Row) (MetricAlertRule, error) {
|
||||||
|
var r MetricAlertRule
|
||||||
|
var enabled int
|
||||||
|
if err := row.Scan(
|
||||||
|
&r.ID, &r.WorkloadID, &r.Name, &r.Metric, &r.Comparator, &r.Threshold, &r.Severity,
|
||||||
|
&r.CooldownSeconds, &enabled, &r.CreatedAt, &r.UpdatedAt,
|
||||||
|
); err != nil {
|
||||||
|
return MetricAlertRule{}, err
|
||||||
|
}
|
||||||
|
r.Enabled = enabled != 0
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateMetricAlertRule enforces the per-row invariants: a non-empty
|
||||||
|
// name, a known metric/comparator, a valid severity (blank allowed so
|
||||||
|
// the caller can default it), and a non-negative cooldown.
|
||||||
|
func validateMetricAlertRule(r MetricAlertRule) error {
|
||||||
|
if strings.TrimSpace(r.Name) == "" {
|
||||||
|
return fmt.Errorf("metric alert rule: name is required")
|
||||||
|
}
|
||||||
|
switch r.Metric {
|
||||||
|
case MetricCPUPercent, MetricMemoryPercent, MetricMemoryBytes:
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("metric alert rule: invalid metric %q", r.Metric)
|
||||||
|
}
|
||||||
|
switch r.Comparator {
|
||||||
|
case MetricComparatorGT, MetricComparatorLT:
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("metric alert rule: invalid comparator %q", r.Comparator)
|
||||||
|
}
|
||||||
|
switch r.Severity {
|
||||||
|
case LogScanSeverityInfo, LogScanSeverityWarn, LogScanSeverityError:
|
||||||
|
case "":
|
||||||
|
// Default applied at the caller; allow blank.
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("metric alert rule: invalid severity %q", r.Severity)
|
||||||
|
}
|
||||||
|
if r.CooldownSeconds < 0 {
|
||||||
|
return fmt.Errorf("metric alert rule: cooldown_seconds must be >= 0")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,167 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCreateMetricAlertRule_Validates(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
in MetricAlertRule
|
||||||
|
wantErr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "missing name",
|
||||||
|
in: MetricAlertRule{Metric: MetricCPUPercent, Comparator: MetricComparatorGT},
|
||||||
|
wantErr: "name is required",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "bad metric",
|
||||||
|
in: MetricAlertRule{Name: "n", Metric: "load_avg", Comparator: MetricComparatorGT},
|
||||||
|
wantErr: "invalid metric",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "bad comparator",
|
||||||
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: "eq"},
|
||||||
|
wantErr: "invalid comparator",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "bad severity",
|
||||||
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT, Severity: "loud"},
|
||||||
|
wantErr: "invalid severity",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "negative cooldown",
|
||||||
|
in: MetricAlertRule{Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT, CooldownSeconds: -1},
|
||||||
|
wantErr: "cooldown_seconds must be",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
_, err := s.CreateMetricAlertRule(c.in)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error containing %q, got nil", c.wantErr)
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), c.wantErr) {
|
||||||
|
t.Fatalf("error mismatch: got %q want substring %q", err.Error(), c.wantErr)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateAndGetMetricAlertRule(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
r, err := s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "cpu-hot", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 80, Severity: "warn", CooldownSeconds: 300, Enabled: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create: %v", err)
|
||||||
|
}
|
||||||
|
if r.ID == 0 {
|
||||||
|
t.Fatal("id should be set")
|
||||||
|
}
|
||||||
|
got, err := s.GetMetricAlertRule(r.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get: %v", err)
|
||||||
|
}
|
||||||
|
if got.Metric != MetricCPUPercent || got.Comparator != MetricComparatorGT {
|
||||||
|
t.Errorf("metric/comparator mismatch: %q %q", got.Metric, got.Comparator)
|
||||||
|
}
|
||||||
|
if got.Threshold != 80 {
|
||||||
|
t.Errorf("threshold mismatch: %v", got.Threshold)
|
||||||
|
}
|
||||||
|
if !got.Enabled {
|
||||||
|
t.Error("enabled lost on round-trip")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetMetricAlertRule_NotFound(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
if _, err := s.GetMetricAlertRule(999); err == nil {
|
||||||
|
t.Fatal("expected ErrNotFound for missing rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListMetricAlertRulesByWorkload(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "global", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 90, Severity: "warn", Enabled: true,
|
||||||
|
})
|
||||||
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "w1-mem", WorkloadID: "w1", Metric: MetricMemoryPercent, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 85, Severity: "error", Enabled: true,
|
||||||
|
})
|
||||||
|
_, _ = s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "w2-mem", WorkloadID: "w2", Metric: MetricMemoryBytes, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 1000, Severity: "info", Enabled: true,
|
||||||
|
})
|
||||||
|
|
||||||
|
w1, err := s.ListMetricAlertRulesByWorkload("w1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("by workload: %v", err)
|
||||||
|
}
|
||||||
|
// w1 sees its own rule + the global, but NOT w2's rule.
|
||||||
|
if len(w1) != 2 {
|
||||||
|
t.Fatalf("w1 should see 2 rules (own + global), got %d", len(w1))
|
||||||
|
}
|
||||||
|
for _, r := range w1 {
|
||||||
|
if r.WorkloadID == "w2" {
|
||||||
|
t.Errorf("w1 should not see w2's rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateMetricAlertRule(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
r, _ := s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 80, Severity: "warn", Enabled: true,
|
||||||
|
})
|
||||||
|
r.Threshold = 95
|
||||||
|
r.Comparator = MetricComparatorLT
|
||||||
|
r.Enabled = false
|
||||||
|
got, err := s.UpdateMetricAlertRule(r)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("update: %v", err)
|
||||||
|
}
|
||||||
|
if got.Threshold != 95 {
|
||||||
|
t.Errorf("threshold not updated: %v", got.Threshold)
|
||||||
|
}
|
||||||
|
if got.Comparator != MetricComparatorLT {
|
||||||
|
t.Errorf("comparator not updated: %q", got.Comparator)
|
||||||
|
}
|
||||||
|
if got.Enabled {
|
||||||
|
t.Error("enabled=false not applied")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateMetricAlertRule_NotFound(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
_, err := s.UpdateMetricAlertRule(MetricAlertRule{
|
||||||
|
ID: 999, Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected ErrNotFound updating missing rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteMetricAlertRule(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
r, _ := s.CreateMetricAlertRule(MetricAlertRule{
|
||||||
|
Name: "n", Metric: MetricCPUPercent, Comparator: MetricComparatorGT,
|
||||||
|
Threshold: 80, Severity: "warn", Enabled: true,
|
||||||
|
})
|
||||||
|
if err := s.DeleteMetricAlertRule(r.ID); err != nil {
|
||||||
|
t.Fatalf("delete: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := s.GetMetricAlertRule(r.ID); err == nil {
|
||||||
|
t.Error("rule should be gone after delete")
|
||||||
|
}
|
||||||
|
if err := s.DeleteMetricAlertRule(r.ID); err == nil {
|
||||||
|
t.Error("expected ErrNotFound deleting already-deleted rule")
|
||||||
|
}
|
||||||
|
}
|
||||||
+107
-1
@@ -91,6 +91,21 @@ type Backup struct {
|
|||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VolumeSnapshot is one captured archive of a workload's host-bind data
|
||||||
|
// volumes. Unlike Backup (global, SQLite-specific) it is per-workload and the
|
||||||
|
// archive is a tar.gz of the resolved volume directories. Manifest is a
|
||||||
|
// JSON-encoded []SnapshotVolume describing what the archive covers, so a
|
||||||
|
// future restore can re-resolve each target even if volume settings drift.
|
||||||
|
type VolumeSnapshot struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Label string `json:"label"`
|
||||||
|
Filename string `json:"filename"`
|
||||||
|
SizeBytes int64 `json:"size_bytes"`
|
||||||
|
Manifest string `json:"manifest"` // JSON []SnapshotVolume
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
}
|
||||||
|
|
||||||
// DNSRecord tracks a DNS record managed by the application.
|
// DNSRecord tracks a DNS record managed by the application.
|
||||||
type DNSRecord struct {
|
type DNSRecord struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
@@ -106,10 +121,12 @@ type DNSRecord struct {
|
|||||||
// page. The legacy field names (ProjectID, ProjectName, StageID,
|
// page. The legacy field names (ProjectID, ProjectName, StageID,
|
||||||
// StageName, InstanceID) are retained verbatim for the existing
|
// StageName, InstanceID) are retained verbatim for the existing
|
||||||
// frontend contract — after the workload-first cutover they map to:
|
// frontend contract — after the workload-first cutover they map to:
|
||||||
|
//
|
||||||
// ProjectID/Name → workload id / workload name
|
// ProjectID/Name → workload id / workload name
|
||||||
// StageID/Name → containers.stage_id / containers.role
|
// StageID/Name → containers.stage_id / containers.role
|
||||||
// InstanceID → container row id
|
// InstanceID → container row id
|
||||||
// Source → "instance" for image/compose, "static_site" for static
|
// Source → "instance" for image/compose, "static_site" for static
|
||||||
|
//
|
||||||
// Renaming would require a coordinated frontend change; deferred.
|
// Renaming would require a coordinated frontend change; deferred.
|
||||||
type ProxyRoute struct {
|
type ProxyRoute struct {
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
@@ -157,6 +174,29 @@ type WorkloadEnv struct {
|
|||||||
UpdatedAt string `json:"updated_at"`
|
UpdatedAt string `json:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SharedSecret is an env var shared across workloads by scope. Resolved
|
||||||
|
// into a workload's container env as a low-precedence default (overridden
|
||||||
|
// by image cfg.Env and workload_env).
|
||||||
|
type SharedSecret struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"` // the env KEY
|
||||||
|
Value string `json:"value"` // ciphertext when Encrypted; never returned decrypted by the API
|
||||||
|
Encrypted bool `json:"encrypted"`
|
||||||
|
Scope string `json:"scope"` // global | app
|
||||||
|
AppID string `json:"app_id"` // set when scope == app; "" for global
|
||||||
|
Description string `json:"description"`
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
UpdatedAt string `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shared-secret scope enum: a secret is either applied to every workload
|
||||||
|
// (global) or only to workloads whose app_id matches (app).
|
||||||
|
const (
|
||||||
|
SharedSecretScopeGlobal = "global"
|
||||||
|
SharedSecretScopeApp = "app"
|
||||||
|
)
|
||||||
|
|
||||||
// VolumeScope defines the sharing scope for a volume mount.
|
// VolumeScope defines the sharing scope for a volume mount.
|
||||||
// Valid scopes: instance, stage, project, project_named, named, ephemeral.
|
// Valid scopes: instance, stage, project, project_named, named, ephemeral.
|
||||||
type VolumeScope string
|
type VolumeScope string
|
||||||
@@ -192,6 +232,7 @@ func IsValidVolumeScope(s string) bool {
|
|||||||
type EventLog struct {
|
type EventLog struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
|
WorkloadID string `json:"workload_id"` // "" = unscoped (non-deploy events)
|
||||||
Severity string `json:"severity"` // info, warn, error
|
Severity string `json:"severity"` // info, warn, error
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
Metadata string `json:"metadata"` // JSON-encoded structured data
|
Metadata string `json:"metadata"` // JSON-encoded structured data
|
||||||
@@ -274,16 +315,57 @@ const (
|
|||||||
LogScanSeverityError = "error"
|
LogScanSeverityError = "error"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MetricAlertRule fires an event when a container metric breaches a
|
||||||
|
// threshold. Mirrors LogScanRule but evaluated against stats_samples
|
||||||
|
// instead of log lines.
|
||||||
|
type MetricAlertRule struct {
|
||||||
|
ID int64 `json:"id"`
|
||||||
|
WorkloadID string `json:"workload_id"` // "" = applies to all workloads
|
||||||
|
Name string `json:"name"`
|
||||||
|
Metric string `json:"metric"` // cpu_percent | memory_percent | memory_bytes
|
||||||
|
Comparator string `json:"comparator"` // gt | lt
|
||||||
|
Threshold float64 `json:"threshold"`
|
||||||
|
Severity string `json:"severity"` // info | warn | error
|
||||||
|
CooldownSeconds int `json:"cooldown_seconds"` // min seconds between fires per (rule,workload)
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
UpdatedAt string `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric-alert metric identifiers. cpu_percent + memory_percent are
|
||||||
|
// 0–100 ratios; memory_bytes is an absolute usage figure. Validated in
|
||||||
|
// the store on create/update.
|
||||||
|
const (
|
||||||
|
MetricCPUPercent = "cpu_percent"
|
||||||
|
MetricMemoryPercent = "memory_percent"
|
||||||
|
MetricMemoryBytes = "memory_bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Metric-alert comparators. gt fires when the value exceeds the
|
||||||
|
// threshold; lt when it falls below.
|
||||||
|
const (
|
||||||
|
MetricComparatorGT = "gt"
|
||||||
|
MetricComparatorLT = "lt"
|
||||||
|
)
|
||||||
|
|
||||||
// WorkloadKind enumerates the legacy discriminator values written into
|
// WorkloadKind enumerates the legacy discriminator values written into
|
||||||
// containers.workload_kind and workloads.kind. After the hard cutover the
|
// containers.workload_kind and workloads.kind. After the hard cutover the
|
||||||
// backing project / stack / static_site tables are gone — these constants
|
// backing project / stack / static_site tables are gone — these constants
|
||||||
// are just strings used to filter the unified containers index in the UI.
|
// are just strings used to filter the unified containers index in the UI.
|
||||||
|
//
|
||||||
|
// `build` is the dockerfile-source kind: a container built from a
|
||||||
|
// Dockerfile in a Git repo. Operationally it looks like a site (one
|
||||||
|
// container, one optional public face) but its origin is the build
|
||||||
|
// pipeline, not a static-asset extract. Dashboard filters that need to
|
||||||
|
// distinguish "I built this from source" from "I served files from a
|
||||||
|
// repo" should key on this value.
|
||||||
type WorkloadKind string
|
type WorkloadKind string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
WorkloadKindProject WorkloadKind = "project"
|
WorkloadKindProject WorkloadKind = "project"
|
||||||
WorkloadKindStack WorkloadKind = "stack"
|
WorkloadKindStack WorkloadKind = "stack"
|
||||||
WorkloadKindSite WorkloadKind = "site"
|
WorkloadKindSite WorkloadKind = "site"
|
||||||
|
WorkloadKindBuild WorkloadKind = "build"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
|
// Workload is the unifying primitive that abstracts Project, Stack, and StaticSite.
|
||||||
@@ -316,6 +398,31 @@ type Workload struct {
|
|||||||
UpdatedAt string `json:"updated_at"`
|
UpdatedAt string `json:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WorkloadNotification is one configured outbound notification route for
|
||||||
|
// a workload. Multiple rows per workload model the "one Slack channel
|
||||||
|
// for failures, one Discord webhook for successes" routing the legacy
|
||||||
|
// single notification_url column could not express.
|
||||||
|
//
|
||||||
|
// EventTypes is a comma-separated allow-list (e.g. "build_failure" or
|
||||||
|
// "deploy_success,deploy_failure"). An empty EventTypes means the row
|
||||||
|
// fires for every event type — the cheapest way to keep the existing
|
||||||
|
// single-destination behaviour expressible in the new shape.
|
||||||
|
//
|
||||||
|
// Secret round-trips through the same crypto envelope as other stored
|
||||||
|
// secrets; the API layer strips it from responses.
|
||||||
|
type WorkloadNotification struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
WorkloadID string `json:"workload_id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Secret string `json:"-"`
|
||||||
|
EventTypes string `json:"event_types"`
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
SortOrder int `json:"sort_order"`
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
UpdatedAt string `json:"updated_at"`
|
||||||
|
}
|
||||||
|
|
||||||
// Container is the normalized index of every Tinyforge-managed container.
|
// Container is the normalized index of every Tinyforge-managed container.
|
||||||
// Replaces the project-specific Instance table after migration. Subdomain/
|
// Replaces the project-specific Instance table after migration. Subdomain/
|
||||||
// proxy fields are hoisted as first-class columns because ListProxyRoutes,
|
// proxy fields are hoisted as first-class columns because ListProxyRoutes,
|
||||||
@@ -400,4 +507,3 @@ type App struct {
|
|||||||
CreatedAt string `json:"created_at"`
|
CreatedAt string `json:"created_at"`
|
||||||
UpdatedAt string `json:"updated_at"`
|
UpdatedAt string `json:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,186 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CreateSharedSecret inserts a new shared-secret row after validating its
|
||||||
|
// scope/app_id pairing and non-empty name. The caller is responsible for
|
||||||
|
// encrypting Value when Encrypted is set (mirroring workload_env) — the
|
||||||
|
// store treats Value as opaque bytes.
|
||||||
|
func (s *Store) CreateSharedSecret(sec SharedSecret) (SharedSecret, error) {
|
||||||
|
if err := validateSharedSecret(&sec); err != nil {
|
||||||
|
return SharedSecret{}, err
|
||||||
|
}
|
||||||
|
now := Now()
|
||||||
|
if sec.ID == "" {
|
||||||
|
sec.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
sec.CreatedAt = now
|
||||||
|
sec.UpdatedAt = now
|
||||||
|
_, err := s.db.Exec(
|
||||||
|
`INSERT INTO shared_secrets
|
||||||
|
(id, name, value, encrypted, scope, app_id, description, enabled, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
sec.ID, sec.Name, sec.Value, BoolToInt(sec.Encrypted), sec.Scope, sec.AppID,
|
||||||
|
sec.Description, BoolToInt(sec.Enabled), sec.CreatedAt, sec.UpdatedAt,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return SharedSecret{}, fmt.Errorf("insert shared secret: %w", translateSQLError(err))
|
||||||
|
}
|
||||||
|
return sec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListSharedSecrets returns every shared secret, ordered by scope then
|
||||||
|
// name for stable UI rendering (globals first).
|
||||||
|
func (s *Store) ListSharedSecrets() ([]SharedSecret, error) {
|
||||||
|
return s.querySharedSecrets(
|
||||||
|
`SELECT id, name, value, encrypted, scope, app_id, description, enabled, created_at, updated_at
|
||||||
|
FROM shared_secrets
|
||||||
|
ORDER BY CASE scope WHEN 'global' THEN 0 ELSE 1 END, app_id, name`,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSharedSecret fetches one shared secret by id or returns ErrNotFound.
|
||||||
|
func (s *Store) GetSharedSecret(id string) (SharedSecret, error) {
|
||||||
|
row := s.db.QueryRow(
|
||||||
|
`SELECT id, name, value, encrypted, scope, app_id, description, enabled, created_at, updated_at
|
||||||
|
FROM shared_secrets WHERE id = ?`, id,
|
||||||
|
)
|
||||||
|
sec, err := scanSharedSecretRow(row)
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
return SharedSecret{}, fmt.Errorf("shared secret %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return SharedSecret{}, fmt.Errorf("query shared secret: %w", err)
|
||||||
|
}
|
||||||
|
return sec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateSharedSecret overwrites the editable columns of a shared-secret
|
||||||
|
// row. id is immutable; name/value/encrypted/scope/app_id/description/
|
||||||
|
// enabled are overwritten wholesale (the API layer is responsible for
|
||||||
|
// merging partial PATCH input onto the existing row first).
|
||||||
|
func (s *Store) UpdateSharedSecret(sec SharedSecret) (SharedSecret, error) {
|
||||||
|
if sec.ID == "" {
|
||||||
|
return SharedSecret{}, fmt.Errorf("shared secret: id is required for update")
|
||||||
|
}
|
||||||
|
if err := validateSharedSecret(&sec); err != nil {
|
||||||
|
return SharedSecret{}, err
|
||||||
|
}
|
||||||
|
sec.UpdatedAt = Now()
|
||||||
|
res, err := s.db.Exec(
|
||||||
|
`UPDATE shared_secrets
|
||||||
|
SET name = ?, value = ?, encrypted = ?, scope = ?, app_id = ?,
|
||||||
|
description = ?, enabled = ?, updated_at = ?
|
||||||
|
WHERE id = ?`,
|
||||||
|
sec.Name, sec.Value, BoolToInt(sec.Encrypted), sec.Scope, sec.AppID,
|
||||||
|
sec.Description, BoolToInt(sec.Enabled), sec.UpdatedAt, sec.ID,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return SharedSecret{}, fmt.Errorf("update shared secret: %w", translateSQLError(err))
|
||||||
|
}
|
||||||
|
n, _ := res.RowsAffected()
|
||||||
|
if n == 0 {
|
||||||
|
return SharedSecret{}, fmt.Errorf("shared secret %s: %w", sec.ID, ErrNotFound)
|
||||||
|
}
|
||||||
|
return s.GetSharedSecret(sec.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteSharedSecret removes a shared secret by id, returning ErrNotFound
|
||||||
|
// when no row matched.
|
||||||
|
func (s *Store) DeleteSharedSecret(id string) error {
|
||||||
|
res, err := s.db.Exec(`DELETE FROM shared_secrets WHERE id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("delete shared secret: %w", err)
|
||||||
|
}
|
||||||
|
n, _ := res.RowsAffected()
|
||||||
|
if n == 0 {
|
||||||
|
return fmt.Errorf("shared secret %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListApplicableSharedSecrets returns ENABLED secrets that apply to a
|
||||||
|
// workload in the given app: all global secrets plus the app's own.
|
||||||
|
// Ordered global-first so callers can overlay app on top of global.
|
||||||
|
func (s *Store) ListApplicableSharedSecrets(appID string) ([]SharedSecret, error) {
|
||||||
|
return s.querySharedSecrets(
|
||||||
|
`SELECT id, name, value, encrypted, scope, app_id, description, enabled, created_at, updated_at
|
||||||
|
FROM shared_secrets
|
||||||
|
WHERE enabled = 1 AND (scope = 'global' OR (scope = 'app' AND app_id = ?))
|
||||||
|
ORDER BY CASE scope WHEN 'global' THEN 0 ELSE 1 END, name`,
|
||||||
|
appID,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Store) querySharedSecrets(query string, args ...any) ([]SharedSecret, error) {
|
||||||
|
rows, err := s.db.Query(query, args...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query shared secrets: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
out := []SharedSecret{}
|
||||||
|
for rows.Next() {
|
||||||
|
sec, err := scanSharedSecretRows(rows)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, sec)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanSharedSecretRows(rows *sql.Rows) (SharedSecret, error) {
|
||||||
|
var sec SharedSecret
|
||||||
|
var enc, enabled int
|
||||||
|
if err := rows.Scan(
|
||||||
|
&sec.ID, &sec.Name, &sec.Value, &enc, &sec.Scope, &sec.AppID,
|
||||||
|
&sec.Description, &enabled, &sec.CreatedAt, &sec.UpdatedAt,
|
||||||
|
); err != nil {
|
||||||
|
return SharedSecret{}, fmt.Errorf("scan shared secret: %w", err)
|
||||||
|
}
|
||||||
|
sec.Encrypted = enc != 0
|
||||||
|
sec.Enabled = enabled != 0
|
||||||
|
return sec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanSharedSecretRow(row *sql.Row) (SharedSecret, error) {
|
||||||
|
var sec SharedSecret
|
||||||
|
var enc, enabled int
|
||||||
|
if err := row.Scan(
|
||||||
|
&sec.ID, &sec.Name, &sec.Value, &enc, &sec.Scope, &sec.AppID,
|
||||||
|
&sec.Description, &enabled, &sec.CreatedAt, &sec.UpdatedAt,
|
||||||
|
); err != nil {
|
||||||
|
return SharedSecret{}, err
|
||||||
|
}
|
||||||
|
sec.Encrypted = enc != 0
|
||||||
|
sec.Enabled = enabled != 0
|
||||||
|
return sec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateSharedSecret enforces the per-row invariants: a non-empty name,
|
||||||
|
// a valid scope, and a coherent scope/app_id pairing. When scope==app an
|
||||||
|
// app_id is required; when scope==global the app_id is forced blank so the
|
||||||
|
// unique index (scope, app_id, name) stays consistent for globals.
|
||||||
|
func validateSharedSecret(sec *SharedSecret) error {
|
||||||
|
if strings.TrimSpace(sec.Name) == "" {
|
||||||
|
return fmt.Errorf("shared secret: name is required")
|
||||||
|
}
|
||||||
|
switch sec.Scope {
|
||||||
|
case SharedSecretScopeGlobal:
|
||||||
|
sec.AppID = ""
|
||||||
|
case SharedSecretScopeApp:
|
||||||
|
if strings.TrimSpace(sec.AppID) == "" {
|
||||||
|
return fmt.Errorf("shared secret: app_id is required when scope is app")
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("shared secret: invalid scope %q", sec.Scope)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,218 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCreateSharedSecret_Validates(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
in SharedSecret
|
||||||
|
wantErr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "missing name",
|
||||||
|
in: SharedSecret{Scope: SharedSecretScopeGlobal},
|
||||||
|
wantErr: "name is required",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid scope",
|
||||||
|
in: SharedSecret{Name: "FOO", Scope: "team"},
|
||||||
|
wantErr: "invalid scope",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "app scope without app_id",
|
||||||
|
in: SharedSecret{Name: "FOO", Scope: SharedSecretScopeApp},
|
||||||
|
wantErr: "app_id is required",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
_, err := s.CreateSharedSecret(c.in)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error containing %q, got nil", c.wantErr)
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), c.wantErr) {
|
||||||
|
t.Fatalf("error mismatch: got %q want substring %q", err.Error(), c.wantErr)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateSharedSecret_GlobalForcesBlankAppID(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
got, err := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "GLOBAL_KEY", Value: "v", Scope: SharedSecretScopeGlobal,
|
||||||
|
AppID: "should-be-cleared", Enabled: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create: %v", err)
|
||||||
|
}
|
||||||
|
if got.AppID != "" {
|
||||||
|
t.Errorf("global secret AppID = %q, want empty", got.AppID)
|
||||||
|
}
|
||||||
|
if got.ID == "" {
|
||||||
|
t.Error("id should be set")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateAndGetSharedSecret(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
created, err := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "API_KEY", Value: "ciphertext", Encrypted: true,
|
||||||
|
Scope: SharedSecretScopeApp, AppID: "app1", Description: "d", Enabled: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create: %v", err)
|
||||||
|
}
|
||||||
|
got, err := s.GetSharedSecret(created.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("get: %v", err)
|
||||||
|
}
|
||||||
|
if got.Name != "API_KEY" || got.Value != "ciphertext" || !got.Encrypted {
|
||||||
|
t.Errorf("round-trip mismatch: %+v", got)
|
||||||
|
}
|
||||||
|
if got.Scope != SharedSecretScopeApp || got.AppID != "app1" {
|
||||||
|
t.Errorf("scope/app mismatch: %+v", got)
|
||||||
|
}
|
||||||
|
if !got.Enabled {
|
||||||
|
t.Error("enabled lost on round-trip")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetSharedSecret_NotFound(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
if _, err := s.GetSharedSecret("nope"); !errors.Is(err, ErrNotFound) {
|
||||||
|
t.Fatalf("expected ErrNotFound, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateSharedSecret(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
created, _ := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "K", Value: "v1", Scope: SharedSecretScopeGlobal, Enabled: true,
|
||||||
|
})
|
||||||
|
created.Value = "v2"
|
||||||
|
created.Description = "updated"
|
||||||
|
created.Enabled = false
|
||||||
|
got, err := s.UpdateSharedSecret(created)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("update: %v", err)
|
||||||
|
}
|
||||||
|
if got.Value != "v2" {
|
||||||
|
t.Errorf("value not updated: %q", got.Value)
|
||||||
|
}
|
||||||
|
if got.Description != "updated" {
|
||||||
|
t.Errorf("description not updated: %q", got.Description)
|
||||||
|
}
|
||||||
|
if got.Enabled {
|
||||||
|
t.Error("enabled=false not applied")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateSharedSecret_NotFound(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
_, err := s.UpdateSharedSecret(SharedSecret{
|
||||||
|
ID: "missing", Name: "K", Scope: SharedSecretScopeGlobal,
|
||||||
|
})
|
||||||
|
if !errors.Is(err, ErrNotFound) {
|
||||||
|
t.Fatalf("expected ErrNotFound updating missing secret, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteSharedSecret(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
created, _ := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "K", Value: "v", Scope: SharedSecretScopeGlobal, Enabled: true,
|
||||||
|
})
|
||||||
|
if err := s.DeleteSharedSecret(created.ID); err != nil {
|
||||||
|
t.Fatalf("delete: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := s.GetSharedSecret(created.ID); !errors.Is(err, ErrNotFound) {
|
||||||
|
t.Fatalf("expected ErrNotFound after delete, got %v", err)
|
||||||
|
}
|
||||||
|
if err := s.DeleteSharedSecret(created.ID); !errors.Is(err, ErrNotFound) {
|
||||||
|
t.Fatalf("expected ErrNotFound deleting twice, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSharedSecret_UniquePerScopeAppName(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
if _, err := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "DUP", Value: "a", Scope: SharedSecretScopeGlobal, Enabled: true,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("first create: %v", err)
|
||||||
|
}
|
||||||
|
// Same scope+name collides on the unique index.
|
||||||
|
if _, err := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "DUP", Value: "b", Scope: SharedSecretScopeGlobal, Enabled: true,
|
||||||
|
}); err == nil {
|
||||||
|
t.Fatal("expected unique-index violation for duplicate global key")
|
||||||
|
}
|
||||||
|
// Same name under an app scope is a distinct row.
|
||||||
|
if _, err := s.CreateSharedSecret(SharedSecret{
|
||||||
|
Name: "DUP", Value: "c", Scope: SharedSecretScopeApp, AppID: "app1", Enabled: true,
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatalf("app-scoped same name should be allowed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListApplicableSharedSecrets(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
// Two globals (one disabled), one app1 secret, one app2 secret.
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "G_ONE", Value: "g1", Scope: SharedSecretScopeGlobal, Enabled: true})
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "G_OFF", Value: "off", Scope: SharedSecretScopeGlobal, Enabled: false})
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "A_ONE", Value: "a1", Scope: SharedSecretScopeApp, AppID: "app1", Enabled: true})
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "A_TWO", Value: "a2", Scope: SharedSecretScopeApp, AppID: "app2", Enabled: true})
|
||||||
|
|
||||||
|
got, err := s.ListApplicableSharedSecrets("app1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("applicable: %v", err)
|
||||||
|
}
|
||||||
|
// app1 sees the enabled global + its own; not the disabled global, not app2's.
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("want 2 applicable secrets, got %d: %+v", len(got), got)
|
||||||
|
}
|
||||||
|
// Global must come first so callers can overlay app on top.
|
||||||
|
if got[0].Name != "G_ONE" {
|
||||||
|
t.Errorf("expected global first, got %q", got[0].Name)
|
||||||
|
}
|
||||||
|
if got[1].Name != "A_ONE" {
|
||||||
|
t.Errorf("expected app1 secret second, got %q", got[1].Name)
|
||||||
|
}
|
||||||
|
for _, sec := range got {
|
||||||
|
if sec.AppID == "app2" {
|
||||||
|
t.Errorf("app1 must not see app2's secret: %+v", sec)
|
||||||
|
}
|
||||||
|
if !sec.Enabled {
|
||||||
|
t.Errorf("disabled secret leaked into applicable set: %+v", sec)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListApplicableSharedSecrets_NoAppOnlyGlobals(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "G", Value: "g", Scope: SharedSecretScopeGlobal, Enabled: true})
|
||||||
|
mustCreate(t, s, SharedSecret{Name: "A", Value: "a", Scope: SharedSecretScopeApp, AppID: "app1", Enabled: true})
|
||||||
|
|
||||||
|
// An ungrouped workload (appID == "") sees only globals.
|
||||||
|
got, err := s.ListApplicableSharedSecrets("")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("applicable: %v", err)
|
||||||
|
}
|
||||||
|
if len(got) != 1 || got[0].Name != "G" {
|
||||||
|
t.Fatalf("ungrouped workload should see only the global, got %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustCreate(t *testing.T, s *Store, sec SharedSecret) SharedSecret {
|
||||||
|
t.Helper()
|
||||||
|
out, err := s.CreateSharedSecret(sec)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("create shared secret %q: %v", sec.Name, err)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
+291
-2
@@ -55,11 +55,20 @@ func New(dbPath string) (*Store, error) {
|
|||||||
db.SetMaxOpenConns(1)
|
db.SetMaxOpenConns(1)
|
||||||
db.SetConnMaxLifetime(0)
|
db.SetConnMaxLifetime(0)
|
||||||
|
|
||||||
// Enable WAL mode and foreign keys for better concurrency and referential integrity.
|
// Enable WAL mode and foreign keys for better concurrency and
|
||||||
|
// referential integrity. `synchronous=NORMAL` pairs with WAL to skip
|
||||||
|
// the per-write fsync — the OS still flushes on checkpoint, durability
|
||||||
|
// is preserved across clean shutdowns, and crashes lose at most the
|
||||||
|
// last few committed transactions (acceptable for a tinyforge box).
|
||||||
|
// cache_size=-20000 = 20 MiB page cache, temp_store=MEMORY keeps
|
||||||
|
// indexer scratch off disk; both are pure perf knobs.
|
||||||
pragmas := []string{
|
pragmas := []string{
|
||||||
"PRAGMA journal_mode=WAL",
|
"PRAGMA journal_mode=WAL",
|
||||||
|
"PRAGMA synchronous=NORMAL",
|
||||||
"PRAGMA foreign_keys=ON",
|
"PRAGMA foreign_keys=ON",
|
||||||
"PRAGMA busy_timeout=5000",
|
"PRAGMA busy_timeout=5000",
|
||||||
|
"PRAGMA cache_size=-20000",
|
||||||
|
"PRAGMA temp_store=MEMORY",
|
||||||
}
|
}
|
||||||
for _, p := range pragmas {
|
for _, p := range pragmas {
|
||||||
if _, err := db.Exec(p); err != nil {
|
if _, err := db.Exec(p); err != nil {
|
||||||
@@ -169,6 +178,12 @@ func (s *Store) runMigrations() error {
|
|||||||
// Empty string = never fired. Pre-trigger-split DBs land the column
|
// Empty string = never fired. Pre-trigger-split DBs land the column
|
||||||
// here so the scheduler can read/write it on first boot.
|
// here so the scheduler can read/write it on first boot.
|
||||||
`ALTER TABLE triggers ADD COLUMN last_fired_at TEXT NOT NULL DEFAULT ''`,
|
`ALTER TABLE triggers ADD COLUMN last_fired_at TEXT NOT NULL DEFAULT ''`,
|
||||||
|
// Per-app deploy/activity timeline: scope each event_log row to the
|
||||||
|
// workload that produced it so the dashboard can query a workload's
|
||||||
|
// deploy history. Empty string = unscoped (the existing non-deploy
|
||||||
|
// loggers don't set it). Additive ADD COLUMN — the loop below
|
||||||
|
// tolerates the "duplicate column" error on fully-migrated DBs.
|
||||||
|
`ALTER TABLE event_log ADD COLUMN workload_id TEXT NOT NULL DEFAULT ''`,
|
||||||
// Hard cutover: drop every legacy table. Idempotent — DROP TABLE
|
// Hard cutover: drop every legacy table. Idempotent — DROP TABLE
|
||||||
// IF EXISTS is a no-op once the table is gone. Operators upgrading
|
// IF EXISTS is a no-op once the table is gone. Operators upgrading
|
||||||
// from a pre-cutover build will lose any project / stack / static
|
// from a pre-cutover build will lose any project / stack / static
|
||||||
@@ -269,6 +284,20 @@ func (s *Store) runMigrations() error {
|
|||||||
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
UNIQUE(workload_id, target)
|
UNIQUE(workload_id, target)
|
||||||
)`,
|
)`,
|
||||||
|
// volume_snapshots: per-workload archives of host-bind data
|
||||||
|
// volumes (tar.gz). Mirrors the backups table shape but scoped to a
|
||||||
|
// workload and self-describing via the manifest column so a restore
|
||||||
|
// can re-resolve each target. ON DELETE CASCADE so deleting an app
|
||||||
|
// drops its snapshot rows (the files are pruned separately).
|
||||||
|
`CREATE TABLE IF NOT EXISTS volume_snapshots (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||||
|
label TEXT NOT NULL DEFAULT '',
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
size_bytes INTEGER NOT NULL DEFAULT 0,
|
||||||
|
manifest TEXT NOT NULL DEFAULT '[]',
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)`,
|
||||||
// triggers: first-class redeploy signal sources. Webhook secrets
|
// triggers: first-class redeploy signal sources. Webhook secrets
|
||||||
// move from workload onto the trigger so one webhook URL can fan
|
// move from workload onto the trigger so one webhook URL can fan
|
||||||
// out to multiple workloads via workload_trigger_bindings.
|
// out to multiple workloads via workload_trigger_bindings.
|
||||||
@@ -284,6 +313,24 @@ func (s *Store) runMigrations() error {
|
|||||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
)`,
|
)`,
|
||||||
|
// workload_notifications: per-workload notification destinations.
|
||||||
|
// Each row is one route (Slack channel, Discord webhook, generic
|
||||||
|
// receiver, ...). event_types is a comma-separated allow-list —
|
||||||
|
// empty means "all events". When zero rows exist for a workload
|
||||||
|
// the dispatcher falls back to the legacy single notification_url
|
||||||
|
// column on workloads so existing setups keep working unchanged.
|
||||||
|
`CREATE TABLE IF NOT EXISTS workload_notifications (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
workload_id TEXT NOT NULL REFERENCES workloads(id) ON DELETE CASCADE,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
secret TEXT NOT NULL DEFAULT '',
|
||||||
|
event_types TEXT NOT NULL DEFAULT '',
|
||||||
|
enabled INTEGER NOT NULL DEFAULT 1,
|
||||||
|
sort_order INTEGER NOT NULL DEFAULT 0,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)`,
|
||||||
// workload_trigger_bindings: many-to-many between workloads and
|
// workload_trigger_bindings: many-to-many between workloads and
|
||||||
// triggers. binding_config is the per-binding override applied on
|
// triggers. binding_config is the per-binding override applied on
|
||||||
// top of trigger.config (top-level JSON merge, binding wins).
|
// top of trigger.config (top-level JSON merge, binding wins).
|
||||||
@@ -375,6 +422,43 @@ func (s *Store) runMigrations() error {
|
|||||||
)`,
|
)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_workload ON log_scan_rules(workload_id)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_log_scan_rules_overrides ON log_scan_rules(overrides_id)`,
|
||||||
|
// metric_alert_rules: threshold rules the metric-alert manager
|
||||||
|
// evaluates against recent container stats samples. WorkloadID is
|
||||||
|
// nullable (via "" sentinel) so a global rule applies to every
|
||||||
|
// workload; a non-empty value scopes it to one workload.
|
||||||
|
`CREATE TABLE IF NOT EXISTS metric_alert_rules (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
workload_id TEXT NOT NULL DEFAULT '',
|
||||||
|
name TEXT NOT NULL DEFAULT '',
|
||||||
|
metric TEXT NOT NULL,
|
||||||
|
comparator TEXT NOT NULL,
|
||||||
|
threshold REAL NOT NULL DEFAULT 0,
|
||||||
|
severity TEXT NOT NULL DEFAULT 'warn',
|
||||||
|
cooldown_seconds INTEGER NOT NULL DEFAULT 300,
|
||||||
|
enabled INTEGER NOT NULL DEFAULT 1,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_metric_alert_rules_workload ON metric_alert_rules(workload_id)`,
|
||||||
|
// shared_secrets: env vars shared across workloads by scope. Scope
|
||||||
|
// "global" applies to every workload; "app" applies only to
|
||||||
|
// workloads whose app_id matches. Resolved into a workload's
|
||||||
|
// container env as a low-precedence default (see
|
||||||
|
// internal/workload/plugin/env.go).
|
||||||
|
`CREATE TABLE IF NOT EXISTS shared_secrets (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
value TEXT NOT NULL DEFAULT '',
|
||||||
|
encrypted INTEGER NOT NULL DEFAULT 1,
|
||||||
|
scope TEXT NOT NULL,
|
||||||
|
app_id TEXT NOT NULL DEFAULT '',
|
||||||
|
description TEXT NOT NULL DEFAULT '',
|
||||||
|
enabled INTEGER NOT NULL DEFAULT 1,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)`,
|
||||||
|
`CREATE UNIQUE INDEX IF NOT EXISTS idx_shared_secrets_scope_name ON shared_secrets(scope, app_id, name)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_shared_secrets_app ON shared_secrets(app_id)`,
|
||||||
}
|
}
|
||||||
for _, t := range observabilityTables {
|
for _, t := range observabilityTables {
|
||||||
if _, err := s.db.Exec(t); err != nil {
|
if _, err := s.db.Exec(t); err != nil {
|
||||||
@@ -405,6 +489,7 @@ func (s *Store) runMigrations() error {
|
|||||||
`CREATE INDEX IF NOT EXISTS idx_event_log_severity ON event_log(severity)`,
|
`CREATE INDEX IF NOT EXISTS idx_event_log_severity ON event_log(severity)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_event_log_source ON event_log(source)`,
|
`CREATE INDEX IF NOT EXISTS idx_event_log_source ON event_log(source)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_event_log_created_at ON event_log(created_at)`,
|
`CREATE INDEX IF NOT EXISTS idx_event_log_created_at ON event_log(created_at)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_event_log_workload ON event_log(workload_id, created_at)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_dns_records_consumer ON dns_records(consumer_type, consumer_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_dns_records_consumer ON dns_records(consumer_type, consumer_id)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_container_stats_owner_ts ON container_stats_samples(owner_type, owner_id, ts)`,
|
`CREATE INDEX IF NOT EXISTS idx_container_stats_owner_ts ON container_stats_samples(owner_type, owner_id, ts)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`,
|
`CREATE INDEX IF NOT EXISTS idx_container_stats_container_ts ON container_stats_samples(container_id, ts)`,
|
||||||
@@ -422,11 +507,13 @@ func (s *Store) runMigrations() error {
|
|||||||
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
|
`CREATE INDEX IF NOT EXISTS idx_containers_stage_id ON containers(stage_id) WHERE stage_id != ''`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_workload_env_workload ON workload_env(workload_id)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_workload_volumes_workload ON workload_volumes(workload_id)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_volume_snapshots_workload ON volume_snapshots(workload_id)`,
|
||||||
// Trigger-split indexes.
|
// Trigger-split indexes.
|
||||||
`CREATE INDEX IF NOT EXISTS idx_triggers_kind ON triggers(kind)`,
|
`CREATE INDEX IF NOT EXISTS idx_triggers_kind ON triggers(kind)`,
|
||||||
`CREATE UNIQUE INDEX IF NOT EXISTS idx_triggers_webhook_secret ON triggers(webhook_secret) WHERE webhook_secret != ''`,
|
`CREATE UNIQUE INDEX IF NOT EXISTS idx_triggers_webhook_secret ON triggers(webhook_secret) WHERE webhook_secret != ''`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_bindings_workload ON workload_trigger_bindings(workload_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_bindings_workload ON workload_trigger_bindings(workload_id)`,
|
||||||
`CREATE INDEX IF NOT EXISTS idx_bindings_trigger ON workload_trigger_bindings(trigger_id)`,
|
`CREATE INDEX IF NOT EXISTS idx_bindings_trigger ON workload_trigger_bindings(trigger_id)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_workload_notifs_workload ON workload_notifications(workload_id)`,
|
||||||
}
|
}
|
||||||
for _, idx := range indexes {
|
for _, idx := range indexes {
|
||||||
if _, err := s.db.Exec(idx); err != nil {
|
if _, err := s.db.Exec(idx); err != nil {
|
||||||
@@ -434,13 +521,215 @@ func (s *Store) runMigrations() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.backfillTriggersFromWorkloads(); err != nil {
|
// schema_versions table gates one-shot data migrations like the
|
||||||
|
// trigger backfill below. Without this, the backfill scan ran on
|
||||||
|
// every boot even on fully-migrated DBs — wasted I/O and (more
|
||||||
|
// importantly) made it impossible to tell whether a "no rows
|
||||||
|
// processed" was a clean state or a missed-migration bug.
|
||||||
|
if _, err := s.db.Exec(`CREATE TABLE IF NOT EXISTS schema_versions (
|
||||||
|
version INTEGER PRIMARY KEY,
|
||||||
|
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)`); err != nil {
|
||||||
|
return fmt.Errorf("create schema_versions: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.runOnce(1, "trigger backfill", s.backfillTriggersFromWorkloads); err != nil {
|
||||||
|
// Backfill failure is non-fatal — we log and let the operator
|
||||||
|
// retry. The version is only recorded on success.
|
||||||
slog.Warn("trigger backfill", "error", err)
|
slog.Warn("trigger backfill", "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// runOnce executes fn at most one time per database lifetime, recording
|
||||||
|
// success in schema_versions. Useful for data migrations whose source
|
||||||
|
// table eventually disappears (so re-running becomes pointless or
|
||||||
|
// dangerous).
|
||||||
|
func (s *Store) runOnce(version int, label string, fn func() error) error {
|
||||||
|
var applied int
|
||||||
|
if err := s.db.QueryRow(`SELECT COUNT(*) FROM schema_versions WHERE version = ?`, version).Scan(&applied); err != nil {
|
||||||
|
return fmt.Errorf("check %s: %w", label, err)
|
||||||
|
}
|
||||||
|
if applied > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := fn(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, err := s.db.Exec(`INSERT INTO schema_versions (version) VALUES (?)`, version); err != nil {
|
||||||
|
return fmt.Errorf("mark %s applied: %w", label, err)
|
||||||
|
}
|
||||||
|
slog.Info("schema migration applied", "version", version, "label", label)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunOnce is the public counterpart of runOnce, exposed so cmd/server can
|
||||||
|
// gate post-store-open migrations (e.g. crypto re-encryption that needs
|
||||||
|
// the ENCRYPTION_KEY which Store does not own) through the same
|
||||||
|
// schema_versions ledger.
|
||||||
|
func (s *Store) RunOnce(version int, label string, fn func() error) error {
|
||||||
|
return s.runOnce(version, label, fn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnvelopeMigrator describes the contract a crypto package implements to
|
||||||
|
// rewrite legacy unprefixed-hex ciphertext as versioned envelope values.
|
||||||
|
// hasEnvelope reports whether a value already carries the new prefix.
|
||||||
|
// decrypt returns plaintext for either form; encrypt always produces the
|
||||||
|
// new envelope form. By accepting closures the store stays free of any
|
||||||
|
// import on internal/crypto, mirroring the rest of the package layout.
|
||||||
|
type EnvelopeMigrator struct {
|
||||||
|
HasEnvelope func(value string) bool
|
||||||
|
Decrypt func(ciphertext string) (string, error)
|
||||||
|
Encrypt func(plaintext string) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MigrateSecretsToEnvelope walks every column known to carry an encrypted
|
||||||
|
// secret and rewrites legacy unprefixed-hex values into the new
|
||||||
|
// envelope form using the current encryption key.
|
||||||
|
//
|
||||||
|
// Behaviour, per-row:
|
||||||
|
// - empty value → skip (no secret stored)
|
||||||
|
// - already-envelope value → skip (already migrated)
|
||||||
|
// - decrypt fails → skip (value is either plaintext from a v0 boot
|
||||||
|
// OR ciphertext from a rotated key; either way we cannot safely
|
||||||
|
// re-encrypt and leaving it alone preserves the existing read
|
||||||
|
// semantics)
|
||||||
|
// - decrypt succeeds → encrypt to envelope form + UPDATE
|
||||||
|
//
|
||||||
|
// The whole sweep runs in a single transaction so a power-loss
|
||||||
|
// mid-migration leaves the DB in either the pre- or post-migration
|
||||||
|
// state, never half. Idempotent via schema_versions version 2 — the
|
||||||
|
// next boot is a no-op.
|
||||||
|
//
|
||||||
|
// Columns covered:
|
||||||
|
// - settings.npm_password
|
||||||
|
// - settings.cloudflare_api_token
|
||||||
|
// - auth_settings.oidc_client_secret
|
||||||
|
// - registries.token
|
||||||
|
// - workload_env.value WHERE encrypted=1
|
||||||
|
func (s *Store) MigrateSecretsToEnvelope(m EnvelopeMigrator) error {
|
||||||
|
return s.runOnce(2, "secrets envelope migration", func() error {
|
||||||
|
tx, err := s.db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("begin: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
|
||||||
|
// Single-row tables (settings, auth_settings) — read-update inline.
|
||||||
|
singleRowColumns := []struct {
|
||||||
|
table, column string
|
||||||
|
}{
|
||||||
|
{"settings", "npm_password"},
|
||||||
|
{"settings", "cloudflare_api_token"},
|
||||||
|
{"auth_settings", "oidc_client_secret"},
|
||||||
|
}
|
||||||
|
for _, c := range singleRowColumns {
|
||||||
|
var v string
|
||||||
|
err := tx.QueryRow(
|
||||||
|
fmt.Sprintf(`SELECT %s FROM %s LIMIT 1`, c.column, c.table),
|
||||||
|
).Scan(&v)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// auth_settings may not exist on a brand-new DB until
|
||||||
|
// the OIDC code touches it; treat as nothing-to-migrate.
|
||||||
|
slog.Debug("envelope migration: column read skipped",
|
||||||
|
"table", c.table, "column", c.column, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
migrated, ok := tryMigrate(m, v)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, err := tx.Exec(
|
||||||
|
fmt.Sprintf(`UPDATE %s SET %s = ?`, c.table, c.column),
|
||||||
|
migrated,
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("update %s.%s: %w", c.table, c.column, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multi-row: registries.token
|
||||||
|
if err := migrateRowColumn(tx, m,
|
||||||
|
`SELECT id, token FROM registries WHERE token != ''`,
|
||||||
|
`UPDATE registries SET token = ? WHERE id = ?`,
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("registries.token: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multi-row: workload_env.value WHERE encrypted=1
|
||||||
|
if err := migrateRowColumn(tx, m,
|
||||||
|
`SELECT id, value FROM workload_env WHERE encrypted = 1 AND value != ''`,
|
||||||
|
`UPDATE workload_env SET value = ? WHERE id = ?`,
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("workload_env.value: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return fmt.Errorf("commit: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// migrateRowColumn applies the envelope rewrite to every (id, value)
|
||||||
|
// pair returned by selectQ. updateQ takes (newValue, id) as parameters.
|
||||||
|
// Each row is its own attempt; one row failing migration (decrypt fail)
|
||||||
|
// does not abort the others.
|
||||||
|
func migrateRowColumn(tx *sql.Tx, m EnvelopeMigrator, selectQ, updateQ string) error {
|
||||||
|
rows, err := tx.Query(selectQ)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
type pending struct{ id, newValue string }
|
||||||
|
var updates []pending
|
||||||
|
for rows.Next() {
|
||||||
|
var id, value string
|
||||||
|
if err := rows.Scan(&id, &value); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
newValue, ok := tryMigrate(m, value)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
updates = append(updates, pending{id, newValue})
|
||||||
|
}
|
||||||
|
if err := rows.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, u := range updates {
|
||||||
|
if _, err := tx.Exec(updateQ, u.newValue, u.id); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryMigrate returns the envelope-form ciphertext + true when the input
|
||||||
|
// is a legacy unprefixed value that decrypts successfully with the
|
||||||
|
// current key. Returns ("", false) for anything else: empty, already
|
||||||
|
// envelope, plaintext, or decrypt-failed (rotated-key case).
|
||||||
|
func tryMigrate(m EnvelopeMigrator, v string) (string, bool) {
|
||||||
|
if v == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
if m.HasEnvelope(v) {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
plaintext, err := m.Decrypt(v)
|
||||||
|
if err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
enc, err := m.Encrypt(plaintext)
|
||||||
|
if err != nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
return enc, true
|
||||||
|
}
|
||||||
|
|
||||||
// backfillTriggersFromWorkloads converts embedded trigger config on
|
// backfillTriggersFromWorkloads converts embedded trigger config on
|
||||||
// workload rows into standalone trigger + binding rows. Runs once per
|
// workload rows into standalone trigger + binding rows. Runs once per
|
||||||
// boot and is idempotent — only workloads with non-empty trigger_kind
|
// boot and is idempotent — only workloads with non-empty trigger_kind
|
||||||
|
|||||||
@@ -0,0 +1,146 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CreateVolumeSnapshot inserts a snapshot metadata record. ID is generated
|
||||||
|
// when empty; CreatedAt is stamped server-side.
|
||||||
|
func (s *Store) CreateVolumeSnapshot(v VolumeSnapshot) (VolumeSnapshot, error) {
|
||||||
|
if v.WorkloadID == "" || v.Filename == "" {
|
||||||
|
return VolumeSnapshot{}, fmt.Errorf("volume_snapshot: workload_id and filename are required")
|
||||||
|
}
|
||||||
|
if v.ID == "" {
|
||||||
|
v.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
if v.Manifest == "" {
|
||||||
|
v.Manifest = "[]"
|
||||||
|
}
|
||||||
|
v.CreatedAt = Now()
|
||||||
|
|
||||||
|
if _, err := s.db.Exec(
|
||||||
|
`INSERT INTO volume_snapshots (id, workload_id, label, filename, size_bytes, manifest, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
v.ID, v.WorkloadID, v.Label, v.Filename, v.SizeBytes, v.Manifest, v.CreatedAt,
|
||||||
|
); err != nil {
|
||||||
|
return VolumeSnapshot{}, fmt.Errorf("insert volume snapshot: %w", err)
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetVolumeSnapshot returns one snapshot by ID.
|
||||||
|
func (s *Store) GetVolumeSnapshot(id string) (VolumeSnapshot, error) {
|
||||||
|
var v VolumeSnapshot
|
||||||
|
err := s.db.QueryRow(
|
||||||
|
`SELECT id, workload_id, label, filename, size_bytes, manifest, created_at
|
||||||
|
FROM volume_snapshots WHERE id = ?`, id,
|
||||||
|
).Scan(&v.ID, &v.WorkloadID, &v.Label, &v.Filename, &v.SizeBytes, &v.Manifest, &v.CreatedAt)
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
return VolumeSnapshot{}, fmt.Errorf("volume snapshot %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return VolumeSnapshot{}, fmt.Errorf("query volume snapshot: %w", err)
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListVolumeSnapshots returns a workload's snapshots, newest first.
|
||||||
|
func (s *Store) ListVolumeSnapshots(workloadID string) ([]VolumeSnapshot, error) {
|
||||||
|
rows, err := s.db.Query(
|
||||||
|
`SELECT id, workload_id, label, filename, size_bytes, manifest, created_at
|
||||||
|
FROM volume_snapshots WHERE workload_id = ? ORDER BY created_at DESC`, workloadID,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query volume snapshots: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
out := []VolumeSnapshot{}
|
||||||
|
for rows.Next() {
|
||||||
|
v, err := scanVolumeSnapshot(rows)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, v)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteVolumeSnapshot removes one snapshot row by ID.
|
||||||
|
func (s *Store) DeleteVolumeSnapshot(id string) error {
|
||||||
|
result, err := s.db.Exec(`DELETE FROM volume_snapshots WHERE id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("delete volume snapshot: %w", err)
|
||||||
|
}
|
||||||
|
if n, _ := result.RowsAffected(); n == 0 {
|
||||||
|
return fmt.Errorf("volume snapshot %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CountVolumeSnapshots returns how many snapshots a workload has.
|
||||||
|
func (s *Store) CountVolumeSnapshots(workloadID string) (int, error) {
|
||||||
|
var n int
|
||||||
|
if err := s.db.QueryRow(
|
||||||
|
`SELECT COUNT(*) FROM volume_snapshots WHERE workload_id = ?`, workloadID,
|
||||||
|
).Scan(&n); err != nil {
|
||||||
|
return 0, fmt.Errorf("count volume snapshots: %w", err)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOldestVolumeSnapshots returns the N oldest snapshots for a workload, for
|
||||||
|
// retention pruning.
|
||||||
|
func (s *Store) GetOldestVolumeSnapshots(workloadID string, limit int) ([]VolumeSnapshot, error) {
|
||||||
|
rows, err := s.db.Query(
|
||||||
|
`SELECT id, workload_id, label, filename, size_bytes, manifest, created_at
|
||||||
|
FROM volume_snapshots WHERE workload_id = ? ORDER BY created_at ASC LIMIT ?`, workloadID, limit,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query oldest volume snapshots: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
out := []VolumeSnapshot{}
|
||||||
|
for rows.Next() {
|
||||||
|
v, err := scanVolumeSnapshot(rows)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, v)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// AllVolumeSnapshotFilenames returns every snapshot archive filename across all
|
||||||
|
// workloads, for orphan-file reconciliation at startup.
|
||||||
|
func (s *Store) AllVolumeSnapshotFilenames() ([]string, error) {
|
||||||
|
rows, err := s.db.Query(`SELECT filename FROM volume_snapshots`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("query snapshot filenames: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
out := []string{}
|
||||||
|
for rows.Next() {
|
||||||
|
var name string
|
||||||
|
if err := rows.Scan(&name); err != nil {
|
||||||
|
return nil, fmt.Errorf("scan snapshot filename: %w", err)
|
||||||
|
}
|
||||||
|
out = append(out, name)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanVolumeSnapshot(rows *sql.Rows) (VolumeSnapshot, error) {
|
||||||
|
var v VolumeSnapshot
|
||||||
|
if err := rows.Scan(&v.ID, &v.WorkloadID, &v.Label, &v.Filename,
|
||||||
|
&v.SizeBytes, &v.Manifest, &v.CreatedAt); err != nil {
|
||||||
|
return VolumeSnapshot{}, fmt.Errorf("scan volume snapshot: %w", err)
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
const workloadNotificationColumns = `id, workload_id, name, url, secret,
|
||||||
|
event_types, enabled, sort_order, created_at, updated_at`
|
||||||
|
|
||||||
|
func scanWorkloadNotification(scanner interface{ Scan(...any) error }) (WorkloadNotification, error) {
|
||||||
|
var n WorkloadNotification
|
||||||
|
var enabled int
|
||||||
|
err := scanner.Scan(
|
||||||
|
&n.ID, &n.WorkloadID, &n.Name, &n.URL, &n.Secret,
|
||||||
|
&n.EventTypes, &enabled, &n.SortOrder, &n.CreatedAt, &n.UpdatedAt,
|
||||||
|
)
|
||||||
|
n.Enabled = enabled != 0
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateWorkloadNotification inserts a notification route. Returns the
|
||||||
|
// populated row (with assigned id + timestamps) so callers don't need to
|
||||||
|
// follow up with a Get.
|
||||||
|
func (s *Store) CreateWorkloadNotification(n WorkloadNotification) (WorkloadNotification, error) {
|
||||||
|
if n.WorkloadID == "" {
|
||||||
|
return WorkloadNotification{}, fmt.Errorf("workload_id is required")
|
||||||
|
}
|
||||||
|
if n.URL == "" {
|
||||||
|
return WorkloadNotification{}, fmt.Errorf("url is required")
|
||||||
|
}
|
||||||
|
if n.ID == "" {
|
||||||
|
n.ID = uuid.New().String()
|
||||||
|
}
|
||||||
|
n.CreatedAt = Now()
|
||||||
|
n.UpdatedAt = n.CreatedAt
|
||||||
|
|
||||||
|
_, err := s.db.Exec(
|
||||||
|
`INSERT INTO workload_notifications (`+workloadNotificationColumns+`)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
n.ID, n.WorkloadID, n.Name, n.URL, n.Secret,
|
||||||
|
n.EventTypes, BoolToInt(n.Enabled), n.SortOrder, n.CreatedAt, n.UpdatedAt,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return WorkloadNotification{}, fmt.Errorf("insert workload_notification: %w", err)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListWorkloadNotifications returns every notification row for a
|
||||||
|
// workload ordered by (sort_order, created_at) so the UI stays stable
|
||||||
|
// across reorderings.
|
||||||
|
func (s *Store) ListWorkloadNotifications(workloadID string) ([]WorkloadNotification, error) {
|
||||||
|
rows, err := s.db.Query(
|
||||||
|
`SELECT `+workloadNotificationColumns+`
|
||||||
|
FROM workload_notifications
|
||||||
|
WHERE workload_id = ?
|
||||||
|
ORDER BY sort_order, created_at`,
|
||||||
|
workloadID,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("list workload_notifications: %w", err)
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
out := []WorkloadNotification{}
|
||||||
|
for rows.Next() {
|
||||||
|
n, err := scanWorkloadNotification(rows)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("scan workload_notification: %w", err)
|
||||||
|
}
|
||||||
|
out = append(out, n)
|
||||||
|
}
|
||||||
|
return out, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetWorkloadNotification fetches one notification row by id. Returns
|
||||||
|
// ErrNotFound when the row does not exist so callers can return 404
|
||||||
|
// cleanly.
|
||||||
|
func (s *Store) GetWorkloadNotification(id string) (WorkloadNotification, error) {
|
||||||
|
n, err := scanWorkloadNotification(s.db.QueryRow(
|
||||||
|
`SELECT `+workloadNotificationColumns+`
|
||||||
|
FROM workload_notifications WHERE id = ?`, id,
|
||||||
|
))
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
return WorkloadNotification{}, fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return WorkloadNotification{}, fmt.Errorf("query workload_notification: %w", err)
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateWorkloadNotification rewrites an existing row. WorkloadID is
|
||||||
|
// immutable — re-anchoring a route to a different workload would invite
|
||||||
|
// silent reassignments after a paste-bug in the UI; recreate instead.
|
||||||
|
func (s *Store) UpdateWorkloadNotification(n WorkloadNotification) error {
|
||||||
|
if n.ID == "" {
|
||||||
|
return fmt.Errorf("id is required")
|
||||||
|
}
|
||||||
|
if n.URL == "" {
|
||||||
|
return fmt.Errorf("url is required")
|
||||||
|
}
|
||||||
|
n.UpdatedAt = Now()
|
||||||
|
res, err := s.db.Exec(
|
||||||
|
`UPDATE workload_notifications
|
||||||
|
SET name = ?, url = ?, secret = ?, event_types = ?,
|
||||||
|
enabled = ?, sort_order = ?, updated_at = ?
|
||||||
|
WHERE id = ?`,
|
||||||
|
n.Name, n.URL, n.Secret, n.EventTypes,
|
||||||
|
BoolToInt(n.Enabled), n.SortOrder, n.UpdatedAt, n.ID,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("update workload_notification: %w", err)
|
||||||
|
}
|
||||||
|
rows, _ := res.RowsAffected()
|
||||||
|
if rows == 0 {
|
||||||
|
return fmt.Errorf("workload_notification %s: %w", n.ID, ErrNotFound)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteWorkloadNotification drops a single notification row.
|
||||||
|
// Idempotent: missing id returns ErrNotFound so the API can map it to
|
||||||
|
// 404 cleanly.
|
||||||
|
func (s *Store) DeleteWorkloadNotification(id string) error {
|
||||||
|
res, err := s.db.Exec(`DELETE FROM workload_notifications WHERE id = ?`, id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("delete workload_notification: %w", err)
|
||||||
|
}
|
||||||
|
rows, _ := res.RowsAffected()
|
||||||
|
if rows == 0 {
|
||||||
|
return fmt.Errorf("workload_notification %s: %w", id, ErrNotFound)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchesEventType returns true when the notification row's EventTypes
|
||||||
|
// allow-list includes eventType (or is empty, meaning "match all").
|
||||||
|
// Helper exported so the notification dispatcher can fan-out filtering
|
||||||
|
// inline without duplicating the comma-split parser.
|
||||||
|
func (n WorkloadNotification) MatchesEventType(eventType string) bool {
|
||||||
|
if !n.Enabled {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if n.EventTypes == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, et := range strings.Split(n.EventTypes, ",") {
|
||||||
|
if strings.TrimSpace(et) == eventType {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
@@ -0,0 +1,170 @@
|
|||||||
|
package store
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// seedWorkloadForNotifications creates a minimal workload row so the FK
|
||||||
|
// constraint on workload_notifications is satisfied. Returns the new
|
||||||
|
// workload's ID for tests to reference.
|
||||||
|
func seedWorkloadForNotifications(t *testing.T, s *Store, name string) string {
|
||||||
|
t.Helper()
|
||||||
|
w, err := s.CreateWorkload(Workload{
|
||||||
|
Kind: string(WorkloadKindProject),
|
||||||
|
Name: name,
|
||||||
|
SourceKind: "image",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("seed workload: %v", err)
|
||||||
|
}
|
||||||
|
return w.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateWorkloadNotification_RoundTrip(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||||
|
|
||||||
|
created, err := s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID,
|
||||||
|
Name: "Slack alerts",
|
||||||
|
URL: "https://hooks.slack.test/x",
|
||||||
|
Secret: "shh",
|
||||||
|
EventTypes: "deploy_failure,build_failure",
|
||||||
|
Enabled: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateWorkloadNotification: %v", err)
|
||||||
|
}
|
||||||
|
if created.ID == "" {
|
||||||
|
t.Fatal("expected ID to be assigned")
|
||||||
|
}
|
||||||
|
|
||||||
|
got, err := s.GetWorkloadNotification(created.ID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Get: %v", err)
|
||||||
|
}
|
||||||
|
if got.URL != "https://hooks.slack.test/x" || got.Name != "Slack alerts" {
|
||||||
|
t.Errorf("row mismatch: %+v", got)
|
||||||
|
}
|
||||||
|
if !got.Enabled {
|
||||||
|
t.Error("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if got.EventTypes != "deploy_failure,build_failure" {
|
||||||
|
t.Errorf("event_types = %q", got.EventTypes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCreateWorkloadNotification_RejectsMissingURL(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||||
|
_, err := s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID,
|
||||||
|
Name: "broken",
|
||||||
|
URL: "",
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected URL validation error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListWorkloadNotifications_SortedByOrder(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||||
|
|
||||||
|
// Insert out of order; ListWorkloadNotifications should return
|
||||||
|
// them sorted by SortOrder ascending.
|
||||||
|
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID, Name: "C", URL: "https://c.test", SortOrder: 30,
|
||||||
|
})
|
||||||
|
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID, Name: "A", URL: "https://a.test", SortOrder: 10,
|
||||||
|
})
|
||||||
|
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID, Name: "B", URL: "https://b.test", SortOrder: 20,
|
||||||
|
})
|
||||||
|
|
||||||
|
rows, err := s.ListWorkloadNotifications(wlID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("list: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 3 {
|
||||||
|
t.Fatalf("len = %d, want 3", len(rows))
|
||||||
|
}
|
||||||
|
if rows[0].Name != "A" || rows[1].Name != "B" || rows[2].Name != "C" {
|
||||||
|
t.Errorf("sort order wrong: %q %q %q", rows[0].Name, rows[1].Name, rows[2].Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateWorkloadNotification_PersistsChanges(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||||
|
n, _ := s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID, Name: "old", URL: "https://old.test", Enabled: true,
|
||||||
|
})
|
||||||
|
n.Name = "new"
|
||||||
|
n.URL = "https://new.test"
|
||||||
|
n.Enabled = false
|
||||||
|
n.EventTypes = "deploy_success"
|
||||||
|
if err := s.UpdateWorkloadNotification(n); err != nil {
|
||||||
|
t.Fatalf("update: %v", err)
|
||||||
|
}
|
||||||
|
got, _ := s.GetWorkloadNotification(n.ID)
|
||||||
|
if got.Name != "new" || got.URL != "https://new.test" || got.Enabled {
|
||||||
|
t.Errorf("update did not persist: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteWorkloadNotification_ReturnsNotFoundForMissing(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
err := s.DeleteWorkloadNotification("nope")
|
||||||
|
if !errors.Is(err, ErrNotFound) {
|
||||||
|
t.Errorf("expected ErrNotFound, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteWorkloadNotification_CascadesFromWorkload(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
wlID := seedWorkloadForNotifications(t, s, "app1")
|
||||||
|
_, _ = s.CreateWorkloadNotification(WorkloadNotification{
|
||||||
|
WorkloadID: wlID, Name: "x", URL: "https://x.test",
|
||||||
|
})
|
||||||
|
if err := s.DeleteWorkload(wlID); err != nil {
|
||||||
|
t.Fatalf("delete workload: %v", err)
|
||||||
|
}
|
||||||
|
rows, err := s.ListWorkloadNotifications(wlID)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("list after cascade: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 0 {
|
||||||
|
t.Errorf("expected cascade delete to remove rows, got %d", len(rows))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchesEventType_AllowList(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
eventTypes string
|
||||||
|
probe string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"", "deploy_success", true}, // empty = all
|
||||||
|
{"deploy_success,deploy_failure", "deploy_success", true},
|
||||||
|
{"deploy_success,deploy_failure", "build_failure", false},
|
||||||
|
{"build_failure", "build_failure", true},
|
||||||
|
{" deploy_success , build_failure ", "build_failure", true}, // whitespace tolerated
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
n := WorkloadNotification{Enabled: true, EventTypes: c.eventTypes}
|
||||||
|
got := n.MatchesEventType(c.probe)
|
||||||
|
if got != c.want {
|
||||||
|
t.Errorf("MatchesEventType(%q, %q) = %v, want %v", c.eventTypes, c.probe, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchesEventType_DisabledNeverMatches(t *testing.T) {
|
||||||
|
n := WorkloadNotification{Enabled: false, EventTypes: ""}
|
||||||
|
if n.MatchesEventType("any") {
|
||||||
|
t.Error("disabled row should never match")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -173,11 +173,24 @@ func (s *Store) UpdateWorkload(w Workload) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteWorkload removes a workload row. Cascading deletes for the matching
|
// DeleteWorkload removes a workload row. Cascading deletes for FK-backed
|
||||||
// project/stack/site row stay with the kind-specific Delete functions; this
|
// child tables (workload_env, workload_volumes, workload_trigger_bindings)
|
||||||
// only removes the workload entry.
|
// happen via SQLite's ON DELETE CASCADE. The `containers` table doesn't
|
||||||
|
// yet have an FK to workloads (planned migration — see ops notes), so we
|
||||||
|
// drop its rows explicitly here in the same transaction to prevent zombie
|
||||||
|
// container rows from outliving their owning workload.
|
||||||
func (s *Store) DeleteWorkload(id string) error {
|
func (s *Store) DeleteWorkload(id string) error {
|
||||||
result, err := s.db.Exec(`DELETE FROM workloads WHERE id = ?`, id)
|
tx, err := s.db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("begin: %w", err)
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
|
||||||
|
// Explicit container cleanup until the FK migration lands.
|
||||||
|
if _, err := tx.Exec(`DELETE FROM containers WHERE workload_id = ?`, id); err != nil {
|
||||||
|
return fmt.Errorf("delete containers: %w", err)
|
||||||
|
}
|
||||||
|
result, err := tx.Exec(`DELETE FROM workloads WHERE id = ?`, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("delete workload: %w", err)
|
return fmt.Errorf("delete workload: %w", err)
|
||||||
}
|
}
|
||||||
@@ -188,6 +201,9 @@ func (s *Store) DeleteWorkload(id string) error {
|
|||||||
if n == 0 {
|
if n == 0 {
|
||||||
return fmt.Errorf("workload %s: %w", id, ErrNotFound)
|
return fmt.Errorf("workload %s: %w", id, ErrNotFound)
|
||||||
}
|
}
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return fmt.Errorf("commit: %w", err)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,140 @@
|
|||||||
|
package volsnap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/tar"
|
||||||
|
"compress/gzip"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// writeArchive serializes the given host-bind volume directories into a
|
||||||
|
// gzip-compressed tar at dest. Each volume's files live under an integer
|
||||||
|
// subdirectory (its manifest Index); a manifest.json at the archive root makes
|
||||||
|
// the archive self-describing. Returns the manifest describing what was
|
||||||
|
// captured.
|
||||||
|
//
|
||||||
|
// Only regular files and directories are archived. Symlinks and special files
|
||||||
|
// (devices, sockets, fifos) are skipped — this keeps capture safe and avoids
|
||||||
|
// recording links whose targets would be meaningless or escape the volume on a
|
||||||
|
// later restore. A torn snapshot is possible if the app writes during capture;
|
||||||
|
// callers should surface that caveat.
|
||||||
|
func writeArchive(dest string, refs []VolumeRef) ([]SnapshotVolume, error) {
|
||||||
|
// O_EXCL: never clobber an existing file (filenames are unique per call).
|
||||||
|
f, err := os.OpenFile(dest, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("create snapshot file: %w", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
gz := gzip.NewWriter(f)
|
||||||
|
tw := tar.NewWriter(gz)
|
||||||
|
|
||||||
|
manifest := make([]SnapshotVolume, 0, len(refs))
|
||||||
|
for i, ref := range refs {
|
||||||
|
manifest = append(manifest, SnapshotVolume{Index: i, Target: ref.Target, Scope: ref.Scope, Source: ref.Source})
|
||||||
|
if err := addDir(tw, ref.HostPath, fmt.Sprintf("%d", i)); err != nil {
|
||||||
|
_ = tw.Close()
|
||||||
|
_ = gz.Close()
|
||||||
|
_ = f.Close()
|
||||||
|
os.Remove(dest)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := writeManifestEntry(tw, manifest); err != nil {
|
||||||
|
_ = tw.Close()
|
||||||
|
_ = gz.Close()
|
||||||
|
os.Remove(dest)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tw.Close(); err != nil {
|
||||||
|
_ = gz.Close()
|
||||||
|
os.Remove(dest)
|
||||||
|
return nil, fmt.Errorf("finalize tar: %w", err)
|
||||||
|
}
|
||||||
|
if err := gz.Close(); err != nil {
|
||||||
|
os.Remove(dest)
|
||||||
|
return nil, fmt.Errorf("finalize gzip: %w", err)
|
||||||
|
}
|
||||||
|
if err := f.Close(); err != nil {
|
||||||
|
os.Remove(dest)
|
||||||
|
return nil, fmt.Errorf("close snapshot file: %w", err)
|
||||||
|
}
|
||||||
|
return manifest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// addDir walks root and writes its regular files and directories into tw under
|
||||||
|
// the given archive prefix.
|
||||||
|
func addDir(tw *tar.Writer, root, prefix string) error {
|
||||||
|
return filepath.WalkDir(root, func(p string, d fs.DirEntry, walkErr error) error {
|
||||||
|
if walkErr != nil {
|
||||||
|
return fmt.Errorf("walk %s: %w", p, walkErr)
|
||||||
|
}
|
||||||
|
// Skip symlinks and special files; archive only dirs and regular files.
|
||||||
|
if d.Type()&fs.ModeSymlink != 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !d.IsDir() && !d.Type().IsRegular() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rel, err := filepath.Rel(root, p)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("relativize %s: %w", p, err)
|
||||||
|
}
|
||||||
|
name := prefix
|
||||||
|
if rel != "." {
|
||||||
|
name = path.Join(prefix, filepath.ToSlash(rel))
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := d.Info()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("stat %s: %w", p, err)
|
||||||
|
}
|
||||||
|
hdr, err := tar.FileInfoHeader(info, "")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("tar header %s: %w", p, err)
|
||||||
|
}
|
||||||
|
hdr.Name = name
|
||||||
|
if d.IsDir() {
|
||||||
|
hdr.Name += "/"
|
||||||
|
}
|
||||||
|
if err := tw.WriteHeader(hdr); err != nil {
|
||||||
|
return fmt.Errorf("write tar header %s: %w", name, err)
|
||||||
|
}
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
src, err := os.Open(p)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open %s: %w", p, err)
|
||||||
|
}
|
||||||
|
defer src.Close()
|
||||||
|
if _, err := io.Copy(tw, src); err != nil {
|
||||||
|
return fmt.Errorf("copy %s: %w", p, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeManifestEntry(tw *tar.Writer, manifest []SnapshotVolume) error {
|
||||||
|
data, err := json.MarshalIndent(manifest, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("encode manifest: %w", err)
|
||||||
|
}
|
||||||
|
hdr := &tar.Header{Name: "manifest.json", Mode: 0o600, Size: int64(len(data)), Typeflag: tar.TypeReg}
|
||||||
|
if err := tw.WriteHeader(hdr); err != nil {
|
||||||
|
return fmt.Errorf("write manifest header: %w", err)
|
||||||
|
}
|
||||||
|
if _, err := tw.Write(data); err != nil {
|
||||||
|
return fmt.Errorf("write manifest: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -0,0 +1,117 @@
|
|||||||
|
package volsnap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/tar"
|
||||||
|
"compress/gzip"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWriteArchiveRoundTrip(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
mustWrite(t, filepath.Join(root, "a.txt"), "hello")
|
||||||
|
if err := os.MkdirAll(filepath.Join(root, "sub"), 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
mustWrite(t, filepath.Join(root, "sub", "b.txt"), "world")
|
||||||
|
|
||||||
|
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
|
||||||
|
refs := []VolumeRef{{Target: "/data", Scope: "project", Source: "data", HostPath: root}}
|
||||||
|
|
||||||
|
manifest, err := writeArchive(dest, refs)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("writeArchive: %v", err)
|
||||||
|
}
|
||||||
|
if len(manifest) != 1 || manifest[0].Index != 0 || manifest[0].Target != "/data" || manifest[0].Scope != "project" {
|
||||||
|
t.Fatalf("unexpected manifest: %+v", manifest)
|
||||||
|
}
|
||||||
|
|
||||||
|
entries := readArchive(t, dest)
|
||||||
|
for _, want := range []string{"0/a.txt", "0/sub/b.txt", "manifest.json"} {
|
||||||
|
if _, ok := entries[want]; !ok {
|
||||||
|
keys := make([]string, 0, len(entries))
|
||||||
|
for k := range entries {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
t.Fatalf("archive missing %q; got %v", want, keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := entries["0/a.txt"]; got != "hello" {
|
||||||
|
t.Errorf("0/a.txt = %q, want %q", got, "hello")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteArchiveRefusesExisting(t *testing.T) {
|
||||||
|
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
|
||||||
|
mustWrite(t, dest, "existing")
|
||||||
|
if _, err := writeArchive(dest, nil); err == nil {
|
||||||
|
t.Fatal("expected error writing over an existing file (O_EXCL)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWriteArchiveSkipsSymlinks(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
mustWrite(t, filepath.Join(root, "real.txt"), "data")
|
||||||
|
if err := os.Symlink(filepath.Join(root, "real.txt"), filepath.Join(root, "link.txt")); err != nil {
|
||||||
|
t.Skipf("symlinks unavailable on this platform: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dest := filepath.Join(t.TempDir(), "snap.tar.gz")
|
||||||
|
if _, err := writeArchive(dest, []VolumeRef{{Target: "/d", Scope: "project", HostPath: root}}); err != nil {
|
||||||
|
t.Fatalf("writeArchive: %v", err)
|
||||||
|
}
|
||||||
|
entries := readArchive(t, dest)
|
||||||
|
if _, ok := entries["0/link.txt"]; ok {
|
||||||
|
t.Error("symlink should have been skipped, but it is in the archive")
|
||||||
|
}
|
||||||
|
if _, ok := entries["0/real.txt"]; !ok {
|
||||||
|
t.Error("regular file should be archived")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mustWrite(t *testing.T, path, content string) {
|
||||||
|
t.Helper()
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// readArchive returns a map of regular-file entry name -> content. Directory
|
||||||
|
// entries are recorded with an empty string so their presence can be asserted.
|
||||||
|
func readArchive(t *testing.T, path string) map[string]string {
|
||||||
|
t.Helper()
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
gz, err := gzip.NewReader(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer gz.Close()
|
||||||
|
|
||||||
|
out := map[string]string{}
|
||||||
|
tr := tar.NewReader(gz)
|
||||||
|
for {
|
||||||
|
hdr, err := tr.Next()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if hdr.Typeflag == tar.TypeDir {
|
||||||
|
out[hdr.Name] = ""
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
data, err := io.ReadAll(tr)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
out[hdr.Name] = string(data)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
package volsnap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// maxSnapshotsPerWorkload caps how many snapshots are retained per app. On
|
||||||
|
// create, older snapshots beyond this count are pruned (best-effort) so volume
|
||||||
|
// snapshots cannot grow the data disk without bound.
|
||||||
|
const maxSnapshotsPerWorkload = 20
|
||||||
|
|
||||||
|
// ErrNoSnapshotData is returned by Create when the workload has no resolved
|
||||||
|
// host-bind volume directory to capture. It is a client-actionable condition
|
||||||
|
// (HTTP 400), distinct from internal failures (HTTP 500).
|
||||||
|
var ErrNoSnapshotData = errors.New("no snapshottable volume data for this app")
|
||||||
|
|
||||||
|
// Engine creates and manages volume snapshots under <dataDir>/snapshots.
|
||||||
|
type Engine struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
store *store.Store
|
||||||
|
snapDir string
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates the snapshot engine, ensuring the snapshot directory exists.
|
||||||
|
func New(st *store.Store, dataDir string) (*Engine, error) {
|
||||||
|
dir := filepath.Join(dataDir, "snapshots")
|
||||||
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||||
|
return nil, fmt.Errorf("create snapshot directory: %w", err)
|
||||||
|
}
|
||||||
|
return &Engine{store: st, snapDir: dir}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapDir returns the directory holding snapshot archives.
|
||||||
|
func (e *Engine) SnapDir() string { return e.snapDir }
|
||||||
|
|
||||||
|
// Create captures a snapshot of the workload's host-bind data volumes.
|
||||||
|
func (e *Engine) Create(w store.Workload, settings store.Settings, label string) (store.VolumeSnapshot, error) {
|
||||||
|
refs, _, err := SnapshotableVolumes(e.store, w, settings)
|
||||||
|
if err != nil {
|
||||||
|
return store.VolumeSnapshot{}, fmt.Errorf("enumerate volumes: %w", err)
|
||||||
|
}
|
||||||
|
if len(refs) == 0 {
|
||||||
|
return store.VolumeSnapshot{}, ErrNoSnapshotData
|
||||||
|
}
|
||||||
|
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("%s-%s-%s.tar.gz",
|
||||||
|
idShort(w.ID), time.Now().UTC().Format("20060102-150405"), uuid.New().String()[:8])
|
||||||
|
dest := filepath.Join(e.snapDir, filename)
|
||||||
|
|
||||||
|
manifest, err := writeArchive(dest, refs)
|
||||||
|
if err != nil {
|
||||||
|
return store.VolumeSnapshot{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := os.Stat(dest)
|
||||||
|
if err != nil {
|
||||||
|
os.Remove(dest)
|
||||||
|
return store.VolumeSnapshot{}, fmt.Errorf("stat snapshot: %w", err)
|
||||||
|
}
|
||||||
|
manifestJSON, err := json.Marshal(manifest)
|
||||||
|
if err != nil {
|
||||||
|
os.Remove(dest)
|
||||||
|
return store.VolumeSnapshot{}, fmt.Errorf("encode manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
row, err := e.store.CreateVolumeSnapshot(store.VolumeSnapshot{
|
||||||
|
WorkloadID: w.ID,
|
||||||
|
Label: strings.TrimSpace(label),
|
||||||
|
Filename: filename,
|
||||||
|
SizeBytes: info.Size(),
|
||||||
|
Manifest: string(manifestJSON),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
os.Remove(dest) // best-effort: don't leak an orphan file
|
||||||
|
return store.VolumeSnapshot{}, fmt.Errorf("record snapshot: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Info("volume snapshot created", "id", row.ID, "workload", w.ID,
|
||||||
|
"volumes", len(manifest), "size", info.Size())
|
||||||
|
|
||||||
|
e.pruneWorkload(w.ID)
|
||||||
|
return row, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// List returns a workload's snapshots, newest first.
|
||||||
|
func (e *Engine) List(workloadID string) ([]store.VolumeSnapshot, error) {
|
||||||
|
return e.store.ListVolumeSnapshots(workloadID)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns one snapshot by id.
|
||||||
|
func (e *Engine) Get(id string) (store.VolumeSnapshot, error) {
|
||||||
|
return e.store.GetVolumeSnapshot(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete removes a snapshot's archive file and its metadata row.
|
||||||
|
func (e *Engine) Delete(id string) error {
|
||||||
|
snap, err := e.store.GetVolumeSnapshot(id)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
if p, perr := e.FilePath(snap); perr == nil {
|
||||||
|
if rmErr := os.Remove(p); rmErr != nil && !os.IsNotExist(rmErr) {
|
||||||
|
slog.Warn("volume snapshot: remove file", "id", id, "error", rmErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return e.store.DeleteVolumeSnapshot(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilePath resolves a snapshot's archive path and verifies it stays within the
|
||||||
|
// snapshot directory (defence-in-depth against a tampered filename column).
|
||||||
|
func (e *Engine) FilePath(snap store.VolumeSnapshot) (string, error) {
|
||||||
|
base := filepath.Base(snap.Filename)
|
||||||
|
if base == "" || base == "." || base != snap.Filename {
|
||||||
|
return "", fmt.Errorf("invalid snapshot filename")
|
||||||
|
}
|
||||||
|
p := filepath.Join(e.snapDir, base)
|
||||||
|
abs, err := filepath.Abs(p)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
absDir, _ := filepath.Abs(e.snapDir)
|
||||||
|
if !strings.HasPrefix(abs, absDir+string(filepath.Separator)) {
|
||||||
|
return "", fmt.Errorf("snapshot path escapes snapshot directory")
|
||||||
|
}
|
||||||
|
return abs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanOrphans removes snapshot archive files that have no metadata row,
|
||||||
|
// reconciling on-disk files against the DB. Workload deletion CASCADEs the
|
||||||
|
// volume_snapshots rows but cannot reach the files; this (run at startup)
|
||||||
|
// reclaims them. Mirrors backup.Engine.CleanOrphans.
|
||||||
|
func (e *Engine) CleanOrphans() (int, error) {
|
||||||
|
e.mu.Lock()
|
||||||
|
defer e.mu.Unlock()
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(e.snapDir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("read snapshot dir: %w", err)
|
||||||
|
}
|
||||||
|
filenames, err := e.store.AllVolumeSnapshotFilenames()
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("list snapshot filenames: %w", err)
|
||||||
|
}
|
||||||
|
known := make(map[string]bool, len(filenames))
|
||||||
|
for _, f := range filenames {
|
||||||
|
known[f] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
removed := 0
|
||||||
|
for _, ent := range entries {
|
||||||
|
if ent.IsDir() || known[ent.Name()] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := os.Remove(filepath.Join(e.snapDir, ent.Name())); err != nil {
|
||||||
|
slog.Warn("volume snapshot: remove orphan", "file", ent.Name(), "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
removed++
|
||||||
|
}
|
||||||
|
return removed, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// pruneWorkload deletes snapshots beyond maxSnapshotsPerWorkload for one
|
||||||
|
// workload (oldest first). Best-effort: caller already holds e.mu.
|
||||||
|
func (e *Engine) pruneWorkload(workloadID string) {
|
||||||
|
count, err := e.store.CountVolumeSnapshots(workloadID)
|
||||||
|
if err != nil || count <= maxSnapshotsPerWorkload {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
oldest, err := e.store.GetOldestVolumeSnapshots(workloadID, count-maxSnapshotsPerWorkload)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("volume snapshot: prune query", "workload", workloadID, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, snap := range oldest {
|
||||||
|
if p, perr := e.FilePath(snap); perr == nil {
|
||||||
|
_ = os.Remove(p)
|
||||||
|
}
|
||||||
|
if derr := e.store.DeleteVolumeSnapshot(snap.ID); derr != nil {
|
||||||
|
slog.Warn("volume snapshot: prune delete", "id", snap.ID, "error", derr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func idShort(id string) string {
|
||||||
|
if len(id) > 8 {
|
||||||
|
return id[:8]
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
@@ -0,0 +1,146 @@
|
|||||||
|
// Package volsnap captures and manages per-workload snapshots of an app's
|
||||||
|
// host-bind data volumes. It is deliberately independent of internal/backup
|
||||||
|
// (which is SQLite-specific): a snapshot here is a tar.gz of the resolved
|
||||||
|
// volume directories, recorded in the volume_snapshots table.
|
||||||
|
//
|
||||||
|
// Phase 2a-i covers CAPTURE only (create/list/delete/download). The restore
|
||||||
|
// path — which overwrites live data and needs container quiesce + atomic swap
|
||||||
|
// — is intentionally a separate, later phase.
|
||||||
|
package volsnap
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
"github.com/alexei/tinyforge/internal/volume"
|
||||||
|
)
|
||||||
|
|
||||||
|
// supportedScopes are the host-bind volume scopes phase 2a-i can snapshot.
|
||||||
|
// Each resolves to a real host directory the running container binds. Excluded
|
||||||
|
// for now: instance (needs the deployed image tag to resolve a per-tag dir),
|
||||||
|
// named/project_named (Docker named volumes — need a docker-run-tar primitive),
|
||||||
|
// and ephemeral (tmpfs — no data to capture).
|
||||||
|
var supportedScopes = map[string]bool{
|
||||||
|
string(store.VolumeScopeAbsolute): true,
|
||||||
|
string(store.VolumeScopeStage): true,
|
||||||
|
string(store.VolumeScopeProject): true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapshotVolume is one volume covered by a snapshot. It is persisted in the
|
||||||
|
// snapshot row's manifest (JSON) and written into the archive so a future
|
||||||
|
// restore can re-resolve the target even if volume settings drift. Index names
|
||||||
|
// the archive subdirectory holding that volume's files.
|
||||||
|
type SnapshotVolume struct {
|
||||||
|
Index int `json:"index"`
|
||||||
|
Target string `json:"target"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// VolumeRef is a resolved, on-disk host-bind volume eligible for snapshotting.
|
||||||
|
type VolumeRef struct {
|
||||||
|
Target string
|
||||||
|
Scope string
|
||||||
|
Source string
|
||||||
|
HostPath string
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkippedVolume is a declared volume that cannot be snapshotted, with the
|
||||||
|
// reason surfaced to the UI so users are never misled into thinking data is
|
||||||
|
// captured when it is not.
|
||||||
|
type SkippedVolume struct {
|
||||||
|
Target string `json:"target"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// scVolumes is the minimal shape parsed out of an image workload's
|
||||||
|
// source_config — just enough to learn its declared volumes without importing
|
||||||
|
// the image source package.
|
||||||
|
type scVolumes struct {
|
||||||
|
Volumes []struct {
|
||||||
|
Source string `json:"source"`
|
||||||
|
Target string `json:"target"`
|
||||||
|
Scope string `json:"scope"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
} `json:"volumes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnapshotableVolumes enumerates a workload's data volumes and splits them into
|
||||||
|
// those that can be snapshotted now (resolved host-bind dirs that exist on
|
||||||
|
// disk) and those that are skipped (with a reason). It mirrors the image
|
||||||
|
// source's computeMounts merge: source_config volumes overlaid by persisted
|
||||||
|
// workload_volumes rows (persisted wins on a target conflict).
|
||||||
|
//
|
||||||
|
// Only image-source workloads declare host-bind data volumes today; for any
|
||||||
|
// other source kind both slices come back empty.
|
||||||
|
func SnapshotableVolumes(st *store.Store, w store.Workload, settings store.Settings) (refs []VolumeRef, skipped []SkippedVolume, err error) {
|
||||||
|
if w.SourceKind != "image" {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
byTarget := map[string]store.WorkloadVolume{}
|
||||||
|
|
||||||
|
var cfg scVolumes
|
||||||
|
if w.SourceConfig != "" {
|
||||||
|
// Best-effort: a malformed config simply yields no inline volumes; the
|
||||||
|
// persisted rows below still apply.
|
||||||
|
_ = json.Unmarshal([]byte(w.SourceConfig), &cfg)
|
||||||
|
}
|
||||||
|
for _, v := range cfg.Volumes {
|
||||||
|
if v.Target == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
byTarget[v.Target] = store.WorkloadVolume{Source: v.Source, Target: v.Target, Scope: v.Scope, Name: v.Name}
|
||||||
|
}
|
||||||
|
persisted, perr := st.ListWorkloadVolumes(w.ID)
|
||||||
|
if perr != nil {
|
||||||
|
return nil, nil, perr
|
||||||
|
}
|
||||||
|
for _, p := range persisted {
|
||||||
|
byTarget[p.Target] = store.WorkloadVolume{Source: p.Source, Target: p.Target, Scope: p.Scope, Name: p.Name}
|
||||||
|
}
|
||||||
|
|
||||||
|
params := volume.ResolveWorkloadParams{
|
||||||
|
BasePath: settings.BaseVolumePath,
|
||||||
|
WorkloadID: w.ID,
|
||||||
|
WorkloadName: w.Name,
|
||||||
|
AllowedVolumePaths: settings.AllowedVolumePaths,
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range byTarget {
|
||||||
|
if v.Target == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !supportedScopes[v.Scope] {
|
||||||
|
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: skipReason(v.Scope)})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hostPath, rerr := volume.ResolveWorkloadPath(v, params)
|
||||||
|
if rerr != nil {
|
||||||
|
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: rerr.Error()})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
info, serr := os.Stat(hostPath)
|
||||||
|
if serr != nil || !info.IsDir() {
|
||||||
|
skipped = append(skipped, SkippedVolume{Target: v.Target, Scope: v.Scope, Reason: "no data on disk yet"})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
refs = append(refs, VolumeRef{Target: v.Target, Scope: v.Scope, Source: v.Source, HostPath: hostPath})
|
||||||
|
}
|
||||||
|
return refs, skipped, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipReason(scope string) string {
|
||||||
|
switch scope {
|
||||||
|
case string(store.VolumeScopeInstance):
|
||||||
|
return "instance-scoped volumes are not yet snapshottable"
|
||||||
|
case string(store.VolumeScopeNamed), string(store.VolumeScopeProjectNamed):
|
||||||
|
return "Docker named volumes are not yet snapshottable"
|
||||||
|
case string(store.VolumeScopeEphemeral):
|
||||||
|
return "ephemeral (tmpfs) volumes hold no persistent data"
|
||||||
|
default:
|
||||||
|
return "unsupported volume scope"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -169,6 +169,18 @@ func SaveFile(rootPath, relativePath string, r io.Reader) error {
|
|||||||
|
|
||||||
// safePath resolves a relative path within rootPath and validates it doesn't escape.
|
// safePath resolves a relative path within rootPath and validates it doesn't escape.
|
||||||
// Resolves symlinks to prevent symlink-based traversal attacks.
|
// Resolves symlinks to prevent symlink-based traversal attacks.
|
||||||
|
//
|
||||||
|
// The check used to be `strings.HasPrefix(absResolved, absRoot)` which has
|
||||||
|
// a classic boundary bug: a sibling root at /data/vol10 would pass the
|
||||||
|
// prefix test for /data/vol1. The fix enforces a separator boundary so
|
||||||
|
// the only allowed cases are absResolved == absRoot OR absResolved begins
|
||||||
|
// with absRoot + separator.
|
||||||
|
//
|
||||||
|
// For paths that don't yet exist (e.g. SaveFile creating a new file),
|
||||||
|
// EvalSymlinks returns an error and we fall back to the lexical path.
|
||||||
|
// In that case we walk every existing ancestor with EvalSymlinks too —
|
||||||
|
// if any ancestor is a symlink that escapes the root, we reject. This
|
||||||
|
// closes the prior gap where pre-planted symlinks could divert writes.
|
||||||
func safePath(rootPath, relativePath string) (string, error) {
|
func safePath(rootPath, relativePath string) (string, error) {
|
||||||
if relativePath == "" {
|
if relativePath == "" {
|
||||||
return rootPath, nil
|
return rootPath, nil
|
||||||
@@ -176,7 +188,7 @@ func safePath(rootPath, relativePath string) (string, error) {
|
|||||||
|
|
||||||
// Clean and ensure no traversal.
|
// Clean and ensure no traversal.
|
||||||
cleaned := filepath.Clean(relativePath)
|
cleaned := filepath.Clean(relativePath)
|
||||||
if strings.Contains(cleaned, "..") {
|
if cleaned == ".." || strings.HasPrefix(cleaned, ".."+string(filepath.Separator)) || strings.Contains(cleaned, string(filepath.Separator)+".."+string(filepath.Separator)) {
|
||||||
return "", fmt.Errorf("path traversal not allowed")
|
return "", fmt.Errorf("path traversal not allowed")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,18 +203,66 @@ func safePath(rootPath, relativePath string) (string, error) {
|
|||||||
absRoot = realRoot
|
absRoot = realRoot
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve the target path including symlinks.
|
// Resolve the target path. If the leaf doesn't exist (write path),
|
||||||
|
// walk parent directories — any of which may already be a symlink.
|
||||||
absResolved, err := filepath.Abs(absPath)
|
absResolved, err := filepath.Abs(absPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("resolve path: %w", err)
|
return "", fmt.Errorf("resolve path: %w", err)
|
||||||
}
|
}
|
||||||
if realResolved, err := filepath.EvalSymlinks(absResolved); err == nil {
|
if realResolved, err := filepath.EvalSymlinks(absResolved); err == nil {
|
||||||
absResolved = realResolved
|
absResolved = realResolved
|
||||||
|
} else {
|
||||||
|
// Leaf missing — resolve the deepest existing ancestor and
|
||||||
|
// re-join the unresolved tail. This catches a pre-planted
|
||||||
|
// symlink in any parent dir. An error here means an ancestor
|
||||||
|
// could not be resolved (e.g. a symlink we cannot follow): we MUST
|
||||||
|
// reject rather than fall back to the lexical path, which still
|
||||||
|
// carries the absRoot prefix and would let a symlink ancestor that
|
||||||
|
// escapes the root slip past the boundary check below.
|
||||||
|
resolved, tailErr := resolveExistingAncestor(absResolved)
|
||||||
|
if tailErr != nil {
|
||||||
|
return "", fmt.Errorf("path traversal not allowed")
|
||||||
|
}
|
||||||
|
if resolved != "" {
|
||||||
|
absResolved = resolved
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !strings.HasPrefix(absResolved, absRoot) {
|
if absResolved != absRoot && !strings.HasPrefix(absResolved, absRoot+string(filepath.Separator)) {
|
||||||
return "", fmt.Errorf("path traversal not allowed")
|
return "", fmt.Errorf("path traversal not allowed")
|
||||||
}
|
}
|
||||||
|
|
||||||
return absPath, nil
|
return absPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// resolveExistingAncestor walks p upward until it finds an existing
|
||||||
|
// directory, resolves its symlinks, then rejoins the missing tail.
|
||||||
|
// Returns ("", nil) when no ancestor exists (vanishingly rare).
|
||||||
|
func resolveExistingAncestor(p string) (string, error) {
|
||||||
|
tail := ""
|
||||||
|
cur := p
|
||||||
|
for {
|
||||||
|
if cur == "" || cur == "/" || cur == filepath.VolumeName(cur)+string(filepath.Separator) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
info, err := os.Lstat(cur)
|
||||||
|
if err == nil {
|
||||||
|
real, rerr := filepath.EvalSymlinks(cur)
|
||||||
|
if rerr != nil {
|
||||||
|
return "", rerr
|
||||||
|
}
|
||||||
|
_ = info
|
||||||
|
if tail == "" {
|
||||||
|
return real, nil
|
||||||
|
}
|
||||||
|
return filepath.Join(real, tail), nil
|
||||||
|
}
|
||||||
|
// Move one level up.
|
||||||
|
parent := filepath.Dir(cur)
|
||||||
|
if parent == cur {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
tail = filepath.Join(filepath.Base(cur), tail)
|
||||||
|
cur = parent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -131,8 +131,14 @@ const maxWebhookBodyBytes = 256 * 1024 // 256 KiB
|
|||||||
// PluginDispatcher is what the plugin-workload webhook handler needs from
|
// PluginDispatcher is what the plugin-workload webhook handler needs from
|
||||||
// the deployer: the canonical Source-dispatch entry point plus access to
|
// the deployer: the canonical Source-dispatch entry point plus access to
|
||||||
// the same Deps bundle so Trigger.Match can read store / crypto.
|
// the same Deps bundle so Trigger.Match can read store / crypto.
|
||||||
|
//
|
||||||
|
// DispatchTeardown is required so the preview-deploy flow can tear down
|
||||||
|
// an ephemeral per-branch child workload when its upstream branch is
|
||||||
|
// deleted. Same teardown path the API /workloads/{id} DELETE route uses;
|
||||||
|
// nil error on a clean teardown lets the caller delete the workload row.
|
||||||
type PluginDispatcher interface {
|
type PluginDispatcher interface {
|
||||||
DispatchPlugin(ctx context.Context, w pluginWorkload, intent pluginIntent) error
|
DispatchPlugin(ctx context.Context, w pluginWorkload, intent pluginIntent) error
|
||||||
|
DispatchTeardown(ctx context.Context, w pluginWorkload) error
|
||||||
PluginDeps() pluginDeps
|
PluginDeps() pluginDeps
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,10 @@ import (
|
|||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/metrics"
|
||||||
"github.com/alexei/tinyforge/internal/store"
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
"github.com/alexei/tinyforge/internal/workload/plugin"
|
"github.com/alexei/tinyforge/internal/workload/plugin"
|
||||||
|
"github.com/alexei/tinyforge/internal/workload/preview"
|
||||||
)
|
)
|
||||||
|
|
||||||
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
|
// maxTriggerFanOutConcurrency caps how many bindings dispatch in
|
||||||
@@ -44,6 +46,17 @@ const (
|
|||||||
ReasonConfigError = "config merge error"
|
ReasonConfigError = "config merge error"
|
||||||
ReasonMatchError = "match error"
|
ReasonMatchError = "match error"
|
||||||
ReasonDispatchFailed = "dispatch failed"
|
ReasonDispatchFailed = "dispatch failed"
|
||||||
|
ReasonPreviewError = "preview materialize error"
|
||||||
|
ReasonPreviewTorndown = "preview torn down"
|
||||||
|
// ReasonPreviewNoop: a branch-delete webhook arrived but no preview was
|
||||||
|
// ever materialized for that branch — a legitimate clean skip, distinct
|
||||||
|
// from "no binding matched" so it isn't misreported as a wiring problem.
|
||||||
|
ReasonPreviewNoop = "preview noop"
|
||||||
|
// ReasonPreviewOrphaned: the preview container was torn down but its
|
||||||
|
// workload row could not be deleted, leaving an orphan row. Surfaced
|
||||||
|
// distinctly so the partial failure is visible rather than masquerading
|
||||||
|
// as a clean teardown.
|
||||||
|
ReasonPreviewOrphaned = "preview torn down (row orphaned)"
|
||||||
)
|
)
|
||||||
|
|
||||||
// handleTriggerWebhook processes an inbound webhook for a first-class
|
// handleTriggerWebhook processes an inbound webhook for a first-class
|
||||||
@@ -172,7 +185,7 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
|||||||
switch {
|
switch {
|
||||||
case r.Deployed:
|
case r.Deployed:
|
||||||
deployed++
|
deployed++
|
||||||
case r.Reason == ReasonBindingDisabled:
|
case r.Reason == ReasonBindingDisabled, r.Reason == ReasonPreviewNoop:
|
||||||
skipped++
|
skipped++
|
||||||
case r.Reason == ReasonNoMatch:
|
case r.Reason == ReasonNoMatch:
|
||||||
noMatch++
|
noMatch++
|
||||||
@@ -194,8 +207,10 @@ func (h *Handler) handleTriggerWebhook(w http.ResponseWriter, r *http.Request) {
|
|||||||
case noMatch == len(results)-skipped:
|
case noMatch == len(results)-skipped:
|
||||||
delivery.Detail = "no binding matched"
|
delivery.Detail = "no binding matched"
|
||||||
default:
|
default:
|
||||||
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d", skipped, errored)
|
delivery.Detail = fmt.Sprintf("matched=0 skipped=%d errored=%d nomatch=%d",
|
||||||
|
skipped, errored, noMatch)
|
||||||
}
|
}
|
||||||
|
metrics.WebhookDeliveriesTotal.Inc(delivery.Outcome)
|
||||||
respondWebhookJSON(w, http.StatusOK, map[string]any{
|
respondWebhookJSON(w, http.StatusOK, map[string]any{
|
||||||
"success": true,
|
"success": true,
|
||||||
"trigger": trg.Name,
|
"trigger": trg.Name,
|
||||||
@@ -326,6 +341,18 @@ func (h *Handler) fireBinding(
|
|||||||
if intent.TriggeredBy == "" {
|
if intent.TriggeredBy == "" {
|
||||||
intent.TriggeredBy = "trigger-webhook"
|
intent.TriggeredBy = "trigger-webhook"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preview-deploy fork: the git trigger plugin attaches preview_branch
|
||||||
|
// metadata when BranchPattern matches a non-baseline branch. Route
|
||||||
|
// the dispatch through a per-branch child workload rather than
|
||||||
|
// redeploying the parent template. The fork is intentionally before
|
||||||
|
// the dispatch so the template's container never gets clobbered by
|
||||||
|
// a feature-branch push.
|
||||||
|
if previewBranch := intent.Metadata["preview_branch"]; previewBranch != "" {
|
||||||
|
fired, reason := h.handlePreviewIntent(ctx, row, intent, previewBranch)
|
||||||
|
return fired, reason
|
||||||
|
}
|
||||||
|
|
||||||
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
|
if err := h.plugins.DispatchPlugin(ctx, pwl, *intent); err != nil {
|
||||||
slog.Warn("webhook: dispatch failed",
|
slog.Warn("webhook: dispatch failed",
|
||||||
"trigger", trg.Name, "workload", row.Name, "error", err)
|
"trigger", trg.Name, "workload", row.Name, "error", err)
|
||||||
@@ -336,3 +363,72 @@ func (h *Handler) fireBinding(
|
|||||||
return true, intent.Reason
|
return true, intent.Reason
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handlePreviewIntent dispatches an intent that targeted a non-baseline
|
||||||
|
// branch on a preview-template workload. Two paths:
|
||||||
|
//
|
||||||
|
// 1. Branch deleted: find the matching preview workload, dispatch
|
||||||
|
// Teardown, then delete the workload row so the dashboard reflects
|
||||||
|
// the upstream state.
|
||||||
|
// 2. Branch pushed: materialize (or reuse) the preview workload, then
|
||||||
|
// dispatch the deploy against it. The template workload itself is
|
||||||
|
// never deployed against a feature branch.
|
||||||
|
//
|
||||||
|
// On any error the helper logs and returns a generic reason — the
|
||||||
|
// fan-out caller treats these the same as a normal dispatch failure.
|
||||||
|
func (h *Handler) handlePreviewIntent(
|
||||||
|
ctx context.Context,
|
||||||
|
template store.Workload,
|
||||||
|
intent *plugin.DeploymentIntent,
|
||||||
|
branch string,
|
||||||
|
) (bool, string) {
|
||||||
|
deleted := intent.Metadata["preview_deleted"] == "1"
|
||||||
|
if deleted {
|
||||||
|
child, ok, err := preview.FindPreviewForBranch(h.store, template.ID, branch)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("webhook: preview lookup failed",
|
||||||
|
"template", template.Name, "branch", branch, "error", err)
|
||||||
|
return false, ReasonPreviewError
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
// Branch was deleted upstream but we never materialized a
|
||||||
|
// preview for it — nothing to do. Report as a distinct noop so
|
||||||
|
// it isn't bucketed as "no binding matched".
|
||||||
|
return false, ReasonPreviewNoop
|
||||||
|
}
|
||||||
|
childPwl := toPluginWorkload(child)
|
||||||
|
if err := h.plugins.DispatchTeardown(ctx, childPwl); err != nil {
|
||||||
|
slog.Warn("webhook: preview teardown dispatch failed",
|
||||||
|
"template", template.Name, "preview", child.Name, "error", err)
|
||||||
|
return false, ReasonDispatchFailed
|
||||||
|
}
|
||||||
|
if err := h.store.DeleteWorkload(child.ID); err != nil {
|
||||||
|
// Container is gone but the row is orphaned. Surface this as a
|
||||||
|
// distinct reason so the partial failure is visible rather than
|
||||||
|
// reported as a clean teardown; the operator can delete the row
|
||||||
|
// from the dashboard if it sticks around.
|
||||||
|
slog.Warn("webhook: preview row delete failed (orphaned row)",
|
||||||
|
"template", template.Name, "preview", child.Name, "error", err)
|
||||||
|
return true, ReasonPreviewOrphaned
|
||||||
|
}
|
||||||
|
slog.Info("webhook: preview torn down",
|
||||||
|
"template", template.Name, "branch", branch, "preview", child.Name)
|
||||||
|
return true, ReasonPreviewTorndown
|
||||||
|
}
|
||||||
|
|
||||||
|
child, err := preview.MaterializeForBranch(h.store, template, branch)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("webhook: preview materialize failed",
|
||||||
|
"template", template.Name, "branch", branch, "error", err)
|
||||||
|
return false, ReasonPreviewError
|
||||||
|
}
|
||||||
|
childPwl := toPluginWorkload(child)
|
||||||
|
if err := h.plugins.DispatchPlugin(ctx, childPwl, *intent); err != nil {
|
||||||
|
slog.Warn("webhook: preview dispatch failed",
|
||||||
|
"template", template.Name, "preview", child.Name, "error", err)
|
||||||
|
return false, ReasonDispatchFailed
|
||||||
|
}
|
||||||
|
slog.Info("webhook: triggered preview deploy",
|
||||||
|
"template", template.Name, "branch", branch, "preview", child.Name, "reason", intent.Reason)
|
||||||
|
return true, intent.Reason
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -327,6 +327,10 @@ func parseGitLabPushEvent(body []byte, headers http.Header) vendorParseResult {
|
|||||||
Ref: probe.Ref,
|
Ref: probe.Ref,
|
||||||
CommitSHA: probe.After,
|
CommitSHA: probe.After,
|
||||||
Pusher: pusher,
|
Pusher: pusher,
|
||||||
|
// GitLab does not emit `deleted: true`; the canonical signal
|
||||||
|
// is an all-zero `after` SHA. Same parser helper used for the
|
||||||
|
// GitHub / Gitea fallback so the two branches agree.
|
||||||
|
Deleted: isZeroSHA(probe.After),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
||||||
@@ -346,6 +350,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
|||||||
var probe struct {
|
var probe struct {
|
||||||
Ref string `json:"ref"`
|
Ref string `json:"ref"`
|
||||||
After string `json:"after"`
|
After string `json:"after"`
|
||||||
|
Deleted bool `json:"deleted"`
|
||||||
Repository struct {
|
Repository struct {
|
||||||
FullName string `json:"full_name"`
|
FullName string `json:"full_name"`
|
||||||
CloneURL string `json:"clone_url"`
|
CloneURL string `json:"clone_url"`
|
||||||
@@ -370,6 +375,12 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
|||||||
if pusher == "" {
|
if pusher == "" {
|
||||||
pusher = probe.Pusher.Username
|
pusher = probe.Pusher.Username
|
||||||
}
|
}
|
||||||
|
// Branch / tag deletion is signalled either by the explicit
|
||||||
|
// `deleted: true` flag (GitHub / Gitea) or by an all-zero `after`
|
||||||
|
// SHA (older shapes). Both are honoured so the preview-deploy flow
|
||||||
|
// can tear down ephemeral workloads even when a vendor omits the
|
||||||
|
// boolean flag.
|
||||||
|
deleted := probe.Deleted || isZeroSHA(probe.After)
|
||||||
evt := plugin.InboundEvent{
|
evt := plugin.InboundEvent{
|
||||||
Kind: "git-push",
|
Kind: "git-push",
|
||||||
Git: &plugin.GitEvent{
|
Git: &plugin.GitEvent{
|
||||||
@@ -377,6 +388,7 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
|||||||
Ref: probe.Ref,
|
Ref: probe.Ref,
|
||||||
CommitSHA: probe.After,
|
CommitSHA: probe.After,
|
||||||
Pusher: pusher,
|
Pusher: pusher,
|
||||||
|
Deleted: deleted,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
if strings.HasPrefix(probe.Ref, "refs/heads/") {
|
||||||
@@ -388,3 +400,19 @@ func parseGenericGitPush(body []byte) (plugin.InboundEvent, error) {
|
|||||||
}
|
}
|
||||||
return evt, nil
|
return evt, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isZeroSHA returns true when sha is the canonical "no commit" sentinel
|
||||||
|
// (40 zeros) that vendors emit on the `after` field of a branch- or
|
||||||
|
// tag-delete push event. Length-tolerant because some test fixtures
|
||||||
|
// truncate the SHA.
|
||||||
|
func isZeroSHA(sha string) bool {
|
||||||
|
if sha == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, r := range sha {
|
||||||
|
if r != '0' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(sha) >= 7
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,100 @@
|
|||||||
|
package plugin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/crypto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ResolveSharedSecrets returns the applicable shared secrets (global, then
|
||||||
|
// app-scoped overlaying global) as a decrypted KEY->VALUE map. Decrypt
|
||||||
|
// failures log + skip the one entry (mirroring BuildWorkloadEnv). Best-effort:
|
||||||
|
// a store error logs and returns an empty map so a shared-secret outage never
|
||||||
|
// fails a deploy.
|
||||||
|
//
|
||||||
|
// The store orders the rows global-first (then app), so iterating in order and
|
||||||
|
// writing into the map makes a later app-scoped entry with the same Name
|
||||||
|
// overwrite the global default — the intended global < app precedence.
|
||||||
|
//
|
||||||
|
// NOTE: the compose plugin intentionally does NOT call this — compose env is
|
||||||
|
// YAML-defined and shared-secret support for compose is an explicit
|
||||||
|
// out-of-scope follow-up.
|
||||||
|
func ResolveSharedSecrets(deps Deps, appID, sourceName string) map[string]string {
|
||||||
|
merged := map[string]string{}
|
||||||
|
rows, err := deps.Store.ListApplicableSharedSecrets(appID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn(sourceName+": list shared secrets", "app", appID, "error", err)
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
for _, sec := range rows {
|
||||||
|
value := sec.Value
|
||||||
|
if sec.Encrypted {
|
||||||
|
decrypted, err := crypto.Decrypt(deps.EncKey, sec.Value)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn(sourceName+": decrypt shared secret",
|
||||||
|
"app", appID, "name", sec.Name, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value = decrypted
|
||||||
|
}
|
||||||
|
merged[sec.Name] = value
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildWorkloadEnv flattens the applicable shared secrets plus workload_env
|
||||||
|
// rows into the KEY=VALUE list Docker expects. Shared by the source plugins
|
||||||
|
// (static, dockerfile) so they all handle decrypt failures the same way.
|
||||||
|
//
|
||||||
|
// Shared secrets are the low-precedence base layer; workload_env rows overlay
|
||||||
|
// them so a workload's own config always wins on a key conflict. A workload
|
||||||
|
// with no applicable shared secrets starts from an empty base, so the output
|
||||||
|
// is identical to the workload_env-only behavior that predated shared secrets.
|
||||||
|
//
|
||||||
|
// Encrypted rows are decrypted lazily so plaintext never lives in the store
|
||||||
|
// output. A decrypt failure logs and skips the entry rather than failing the
|
||||||
|
// whole deploy: bricking a sync/build because one rotated key missed an env
|
||||||
|
// entry would be worse than running with the variable unset and surfacing the
|
||||||
|
// warning.
|
||||||
|
//
|
||||||
|
// appID is the workload's app_id (plugin.Workload.GroupID), used to resolve
|
||||||
|
// app-scoped shared secrets. sourceName is the slog prefix the caller wants on
|
||||||
|
// the warning lines (e.g. "static source" / "dockerfile source") so existing
|
||||||
|
// log scrapers keep matching the per-source message text.
|
||||||
|
func BuildWorkloadEnv(deps Deps, workloadID, appID, sourceName string) []string {
|
||||||
|
// Base layer: shared secrets (global, then app overlaying global).
|
||||||
|
merged := ResolveSharedSecrets(deps, appID, sourceName)
|
||||||
|
|
||||||
|
rows, err := deps.Store.ListWorkloadEnv(workloadID)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn(sourceName+": list workload env", "workload", workloadID, "error", err)
|
||||||
|
// Still return whatever shared secrets resolved; a workload_env
|
||||||
|
// outage shouldn't drop the shared defaults a deploy already has.
|
||||||
|
return flattenEnvMap(merged)
|
||||||
|
}
|
||||||
|
for _, e := range rows {
|
||||||
|
value := e.Value
|
||||||
|
if e.Encrypted {
|
||||||
|
decrypted, err := crypto.Decrypt(deps.EncKey, e.Value)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn(sourceName+": decrypt env value",
|
||||||
|
"workload", workloadID, "key", e.Key, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value = decrypted
|
||||||
|
}
|
||||||
|
merged[e.Key] = value // workload_env overrides shared secrets
|
||||||
|
}
|
||||||
|
return flattenEnvMap(merged)
|
||||||
|
}
|
||||||
|
|
||||||
|
// flattenEnvMap turns a KEY->VALUE map into the KEY=VALUE slice Docker
|
||||||
|
// expects. Order is unspecified (map iteration) — Docker treats env as a
|
||||||
|
// set, and callers that need determinism sort downstream.
|
||||||
|
func flattenEnvMap(m map[string]string) []string {
|
||||||
|
out := make([]string, 0, len(m))
|
||||||
|
for k, v := range m {
|
||||||
|
out = append(out, k+"="+v)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
@@ -0,0 +1,103 @@
|
|||||||
|
package plugin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/events"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// maxDeployStatusRunes bounds the persisted status. This is a defense-in-depth
|
||||||
|
// BACKSTOP, not a sanitizer.
|
||||||
|
//
|
||||||
|
// CALLER CONTRACT: deploy events are persisted indefinitely, rendered in the
|
||||||
|
// per-app timeline, AND egress off-box — error-severity events are forwarded
|
||||||
|
// to the global NotificationURL (cmd/server) and to operator-configured
|
||||||
|
// event-trigger webhooks (internal/events/dispatcher). Callers MUST therefore
|
||||||
|
// keep secrets and raw subprocess output (e.g. `docker compose` combined
|
||||||
|
// stderr, which can echo the deployed app's own secret-bearing logs) OUT of
|
||||||
|
// `status`; emit a curated, secret-free reason and keep verbose detail only in
|
||||||
|
// the returned error (server logs + admin deploy result, neither of which
|
||||||
|
// egresses). The cap below merely bounds blast radius if something slips
|
||||||
|
// through — 256 runes keeps a meaningful reason without letting a status
|
||||||
|
// become an unbounded sink.
|
||||||
|
const maxDeployStatusRunes = 256
|
||||||
|
|
||||||
|
// capDeployStatus truncates s to maxDeployStatusRunes runes, appending an
|
||||||
|
// ellipsis when it had to cut. Operating on the rune slice keeps the cut on
|
||||||
|
// a UTF-8 boundary so multibyte output can't be sliced mid-rune.
|
||||||
|
func capDeployStatus(s string) string {
|
||||||
|
runes := []rune(s)
|
||||||
|
if len(runes) <= maxDeployStatusRunes {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return string(runes[:maxDeployStatusRunes]) + "…"
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmitDeployEvent records a workload-scoped deploy event in the event log
|
||||||
|
// and publishes it on the bus. Best-effort: logs and returns on failure,
|
||||||
|
// never blocks or fails the deploy. `source` is the per-kind event source
|
||||||
|
// string ("image","compose","static_site","dockerfile"); `status` is a
|
||||||
|
// short human status ("deploying","deployed","failed: <reason>").
|
||||||
|
//
|
||||||
|
// The metadata always carries workload_id so the per-app activity timeline
|
||||||
|
// can be reconstructed even by consumers that only read the JSON blob, and
|
||||||
|
// the dedicated workload_id column powers the indexed per-workload query.
|
||||||
|
func EmitDeployEvent(deps Deps, w Workload, source, status string) {
|
||||||
|
// Audit logging is best-effort and must never crash a real deploy. The
|
||||||
|
// production Deps always wires both, but guard so a missing bus/store
|
||||||
|
// (e.g. a narrow unit test) degrades to a no-op instead of a panic.
|
||||||
|
if deps.Store == nil || deps.Events == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive severity from the raw status prefix BEFORE capping, then bound
|
||||||
|
// the status that actually gets persisted/displayed/published.
|
||||||
|
severity := "info"
|
||||||
|
if strings.HasPrefix(status, "failed") {
|
||||||
|
severity = "error"
|
||||||
|
}
|
||||||
|
status = capDeployStatus(status)
|
||||||
|
message := fmt.Sprintf("%s: %s", w.Name, status)
|
||||||
|
|
||||||
|
metaBytes, err := json.Marshal(map[string]string{
|
||||||
|
"workload_id": w.ID,
|
||||||
|
"workload_name": w.Name,
|
||||||
|
"status": status,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("plugin: marshal deploy event metadata",
|
||||||
|
"source", source, "workload", w.ID, "error", err)
|
||||||
|
metaBytes = []byte("{}")
|
||||||
|
}
|
||||||
|
metadata := string(metaBytes)
|
||||||
|
|
||||||
|
evt, err := deps.Store.InsertEvent(store.EventLog{
|
||||||
|
Source: source,
|
||||||
|
Severity: severity,
|
||||||
|
Message: message,
|
||||||
|
Metadata: metadata,
|
||||||
|
WorkloadID: w.ID,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
slog.Error("plugin: failed to persist deploy event log",
|
||||||
|
"source", source, "workload", w.ID, "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
deps.Events.Publish(events.Event{
|
||||||
|
Type: events.EventLog,
|
||||||
|
Payload: events.EventLogPayload{
|
||||||
|
ID: evt.ID,
|
||||||
|
Source: source,
|
||||||
|
WorkloadID: w.ID,
|
||||||
|
Severity: severity,
|
||||||
|
Message: message,
|
||||||
|
Metadata: metadata,
|
||||||
|
CreatedAt: evt.CreatedAt,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -0,0 +1,167 @@
|
|||||||
|
package plugin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/alexei/tinyforge/internal/events"
|
||||||
|
"github.com/alexei/tinyforge/internal/store"
|
||||||
|
)
|
||||||
|
|
||||||
|
// capturePublisher records every event published on it so a test can
|
||||||
|
// assert on the bus payload. Satisfies plugin.EventPublisher.
|
||||||
|
type capturePublisher struct {
|
||||||
|
events []events.Event
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *capturePublisher) Publish(evt events.Event) {
|
||||||
|
c.events = append(c.events, evt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// newEmitDeps builds a plugin.Deps backed by an in-memory store and a
|
||||||
|
// capturing publisher. Mirrors the in-memory store pattern used by the
|
||||||
|
// store + source-plugin tests.
|
||||||
|
func newEmitDeps(t *testing.T) (Deps, *capturePublisher) {
|
||||||
|
t.Helper()
|
||||||
|
st, err := store.New(":memory:")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open store: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { _ = st.Close() })
|
||||||
|
pub := &capturePublisher{}
|
||||||
|
return Deps{Store: st, Events: pub}, pub
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmitDeployEvent(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
status string
|
||||||
|
wantSeverity string
|
||||||
|
}{
|
||||||
|
{name: "deployed is info", status: "deployed", wantSeverity: "info"},
|
||||||
|
{name: "deploying is info", status: "deploying", wantSeverity: "info"},
|
||||||
|
{name: "failed is error", status: "failed: pull foo failed", wantSeverity: "error"},
|
||||||
|
{name: "failed bare is error", status: "failed", wantSeverity: "error"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
deps, pub := newEmitDeps(t)
|
||||||
|
w := Workload{ID: "wl-123", Name: "my-app"}
|
||||||
|
|
||||||
|
EmitDeployEvent(deps, w, "image", tt.status)
|
||||||
|
|
||||||
|
// Persisted row carries the workload scope + derived severity.
|
||||||
|
rows, err := deps.Store.ListEvents(store.EventLogFilter{WorkloadID: w.ID})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 {
|
||||||
|
t.Fatalf("got %d persisted events, want 1", len(rows))
|
||||||
|
}
|
||||||
|
got := rows[0]
|
||||||
|
if got.Severity != tt.wantSeverity {
|
||||||
|
t.Errorf("severity = %q, want %q", got.Severity, tt.wantSeverity)
|
||||||
|
}
|
||||||
|
if got.Source != "image" {
|
||||||
|
t.Errorf("source = %q, want %q", got.Source, "image")
|
||||||
|
}
|
||||||
|
if got.WorkloadID != w.ID {
|
||||||
|
t.Errorf("workload_id = %q, want %q", got.WorkloadID, w.ID)
|
||||||
|
}
|
||||||
|
wantMsg := w.Name + ": " + tt.status
|
||||||
|
if got.Message != wantMsg {
|
||||||
|
t.Errorf("message = %q, want %q", got.Message, wantMsg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metadata JSON carries workload_id / workload_name / status.
|
||||||
|
var meta map[string]string
|
||||||
|
if err := json.Unmarshal([]byte(got.Metadata), &meta); err != nil {
|
||||||
|
t.Fatalf("unmarshal metadata %q: %v", got.Metadata, err)
|
||||||
|
}
|
||||||
|
if meta["workload_id"] != w.ID {
|
||||||
|
t.Errorf("metadata workload_id = %q, want %q", meta["workload_id"], w.ID)
|
||||||
|
}
|
||||||
|
if meta["workload_name"] != w.Name {
|
||||||
|
t.Errorf("metadata workload_name = %q, want %q", meta["workload_name"], w.Name)
|
||||||
|
}
|
||||||
|
if meta["status"] != tt.status {
|
||||||
|
t.Errorf("metadata status = %q, want %q", meta["status"], tt.status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The persisted row is also re-published on the bus as an
|
||||||
|
// EventLog so SSE clients see it live.
|
||||||
|
if len(pub.events) != 1 {
|
||||||
|
t.Fatalf("got %d published events, want 1", len(pub.events))
|
||||||
|
}
|
||||||
|
ev := pub.events[0]
|
||||||
|
if ev.Type != events.EventLog {
|
||||||
|
t.Errorf("event type = %q, want %q", ev.Type, events.EventLog)
|
||||||
|
}
|
||||||
|
payload, ok := ev.Payload.(events.EventLogPayload)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("payload type = %T, want events.EventLogPayload", ev.Payload)
|
||||||
|
}
|
||||||
|
if payload.WorkloadID != w.ID {
|
||||||
|
t.Errorf("payload workload_id = %q, want %q", payload.WorkloadID, w.ID)
|
||||||
|
}
|
||||||
|
if payload.Severity != tt.wantSeverity {
|
||||||
|
t.Errorf("payload severity = %q, want %q", payload.Severity, tt.wantSeverity)
|
||||||
|
}
|
||||||
|
if payload.ID != got.ID {
|
||||||
|
t.Errorf("payload id = %d, want %d", payload.ID, got.ID)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEmitDeployEvent_CapsLongStatus verifies a long failure status (e.g. one
|
||||||
|
// embedding raw subprocess output) is bounded to maxDeployStatusRunes runes in
|
||||||
|
// both the persisted message and metadata, cut on a UTF-8 boundary, while
|
||||||
|
// severity is still derived from the original "failed" prefix.
|
||||||
|
func TestEmitDeployEvent_CapsLongStatus(t *testing.T) {
|
||||||
|
deps, pub := newEmitDeps(t)
|
||||||
|
w := Workload{ID: "wl-cap", Name: "app"}
|
||||||
|
|
||||||
|
// Multibyte body so a naive byte-slice would corrupt a rune; prefix with
|
||||||
|
// "failed: " so the severity check exercises the pre-cap derivation.
|
||||||
|
longStatus := "failed: " + strings.Repeat("é", 400)
|
||||||
|
EmitDeployEvent(deps, w, "compose", longStatus)
|
||||||
|
|
||||||
|
rows, err := deps.Store.ListEvents(store.EventLogFilter{WorkloadID: w.ID})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListEvents: %v", err)
|
||||||
|
}
|
||||||
|
if len(rows) != 1 {
|
||||||
|
t.Fatalf("got %d events, want 1", len(rows))
|
||||||
|
}
|
||||||
|
got := rows[0]
|
||||||
|
|
||||||
|
if got.Severity != "error" {
|
||||||
|
t.Errorf("severity = %q, want error (derived from pre-cap prefix)", got.Severity)
|
||||||
|
}
|
||||||
|
|
||||||
|
var meta map[string]string
|
||||||
|
if err := json.Unmarshal([]byte(got.Metadata), &meta); err != nil {
|
||||||
|
t.Fatalf("unmarshal metadata: %v", err)
|
||||||
|
}
|
||||||
|
capped := meta["status"]
|
||||||
|
if rc := len([]rune(capped)); rc != maxDeployStatusRunes+1 { // +1 for the ellipsis rune
|
||||||
|
t.Errorf("capped status = %d runes, want %d", rc, maxDeployStatusRunes+1)
|
||||||
|
}
|
||||||
|
if !utf8.ValidString(capped) {
|
||||||
|
t.Errorf("capped status is not valid UTF-8: %q", capped)
|
||||||
|
}
|
||||||
|
if !strings.HasSuffix(capped, "…") {
|
||||||
|
t.Errorf("capped status missing ellipsis suffix: %q", capped)
|
||||||
|
}
|
||||||
|
wantMsg := w.Name + ": " + capped
|
||||||
|
if got.Message != wantMsg {
|
||||||
|
t.Errorf("message = %q, want %q", got.Message, wantMsg)
|
||||||
|
}
|
||||||
|
if len(pub.events) != 1 {
|
||||||
|
t.Fatalf("got %d published events, want 1", len(pub.events))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
package plugin
|
||||||
|
|
||||||
|
// IDShort returns the first 8 chars of a workload ID, used as the uniqueness
|
||||||
|
// suffix on the Docker resources (container, image, volume) a source plugin
|
||||||
|
// materializes. Workload names are not UNIQUE in the schema today; including
|
||||||
|
// the ID short prevents two workloads with the same name from clobbering each
|
||||||
|
// other's container, image, or storage volume.
|
||||||
|
//
|
||||||
|
// Shared by the source plugins (static, dockerfile). Each plugin still owns
|
||||||
|
// its own container/image NAME format (the human-readable prefix differs by
|
||||||
|
// source kind) — only the ID-short derivation is common.
|
||||||
|
func IDShort(w Workload) string {
|
||||||
|
if len(w.ID) < 8 {
|
||||||
|
return w.ID
|
||||||
|
}
|
||||||
|
return w.ID[:8]
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user