ops: weekly backup verification script + scripts README

verify-backup.sh: restores latest backup to /tmp, runs
PRAGMA integrity_check, compares row counts vs prod (>5% drop
in users = fail, >48h age = fail). Cron-driven, fails loud on
non-zero exit so cron mails the admin.

Exit codes: 2=no files, 3=too old, 4=corrupt, 5=row count diverged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Maxim Dolgolyov
2026-05-06 17:16:04 +03:00
parent 513ec059bf
commit cb43538c54
2 changed files with 156 additions and 0 deletions
+63
View File
@@ -0,0 +1,63 @@
# backend/scripts
Operational scripts for LearnSpace backend.
## Cron setup (production)
```
# Daily backup at 4am
0 4 * * * /path/to/repo/backend/scripts/backup.sh
# Weekly verification at 6am Sunday (cron mails on non-zero exit)
0 6 * * 0 /path/to/repo/backend/scripts/verify-backup.sh
```
## Scripts
### backup.sh
Creates a safe SQLite snapshot via `VACUUM INTO`. Keeps last 7 backups (configurable via `KEEP=14`).
```sh
./backup.sh # default: ../data/learnspace.db → ../../backups/
./backup.sh /path/to/db /path/to/backups
KEEP=14 ./backup.sh
```
### verify-backup.sh
Restores the latest backup to `/tmp`, runs `PRAGMA integrity_check`, compares row counts vs production.
Exit codes:
- `0` — all checks passed
- `2` — no backup files found
- `3` — latest backup older than 48h (backup job may have stopped)
- `4``integrity_check` failed (backup is corrupt)
- `5` — user count diverged >5% from production
```sh
./verify-backup.sh
BACKUP_DIR=/custom/backups PROD_DB=/custom/db.sqlite ./verify-backup.sh
```
### check-route-auth.js
Scans `src/routes/*.js` for `:id`-bearing routes without an auth-guard middleware.
Fails if new unprotected routes exceed the current baseline.
```sh
npm run lint:routes
```
### import-content.js _(coming in Task 8)_
Imports question collections from YAML manifests into the database.
```sh
npm run import:content -- ../content/phys/ct-2024.yaml
```
## Deploy order (first time / fresh server)
```sh
npm install
npm run migrate
npm run seed:permissions
npm start
```
+93
View File
@@ -0,0 +1,93 @@
#!/bin/sh
# verify-backup.sh — restore latest backup to /tmp, run integrity check,
# compare row counts vs production DB.
#
# Cron (Sunday 6am):
# 0 6 * * 0 /path/to/repo/backend/scripts/verify-backup.sh
#
# Exit codes:
# 0 — OK
# 1 — generic error (set -e)
# 2 — no backup files found
# 3 — latest backup is older than 48h
# 4 — PRAGMA integrity_check failed
# 5 — user count diverged > 5% from production
#
# Usage:
# ./verify-backup.sh
# BACKUP_DIR=/custom/path PROD_DB=/custom/db.sqlite ./verify-backup.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
BACKUP_DIR="${BACKUP_DIR:-$SCRIPT_DIR/../../backups}"
PROD_DB="${PROD_DB:-$SCRIPT_DIR/../data/learnspace.db}"
# ── 1. Find latest backup ────────────────────────────────────────────────────
LATEST=$(ls -1t "$BACKUP_DIR"/learnspace_*.db 2>/dev/null | head -1)
if [ -z "$LATEST" ]; then
echo "[verify] FAIL: no backup files found in $BACKUP_DIR" >&2
exit 2
fi
echo "[verify] Latest backup: $(basename "$LATEST")"
# ── 2. Age check: backup must be < 48h ──────────────────────────────────────
# stat -c (Linux) vs stat -f (macOS)
if stat --version > /dev/null 2>&1; then
MTIME=$(stat -c %Y "$LATEST")
else
MTIME=$(stat -f %m "$LATEST")
fi
NOW=$(date +%s)
AGE_SEC=$(( NOW - MTIME ))
AGE_H=$(( AGE_SEC / 3600 ))
if [ "$AGE_SEC" -gt 172800 ]; then
echo "[verify] FAIL: backup is ${AGE_H}h old (limit: 48h)" >&2
exit 3
fi
echo "[verify] Age: ${AGE_H}h — OK"
# ── 3. Restore to temp file ──────────────────────────────────────────────────
TEST_DB="/tmp/ls_verify_$$.db"
cp "$LATEST" "$TEST_DB"
trap 'rm -f "$TEST_DB"' EXIT
# ── 4. Integrity check ───────────────────────────────────────────────────────
INTEGRITY=$(sqlite3 "$TEST_DB" "PRAGMA integrity_check;" 2>&1)
if [ "$INTEGRITY" != "ok" ]; then
echo "[verify] FAIL: integrity_check returned: $INTEGRITY" >&2
exit 4
fi
echo "[verify] Integrity: ok"
# ── 5. Row count sanity vs production ───────────────────────────────────────
if [ -f "$PROD_DB" ]; then
PROD_USERS=$(sqlite3 "$PROD_DB" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo 0)
BACK_USERS=$(sqlite3 "$TEST_DB" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo 0)
PROD_QUESTIONS=$(sqlite3 "$PROD_DB" "SELECT COUNT(*) FROM questions;" 2>/dev/null || echo 0)
BACK_QUESTIONS=$(sqlite3 "$TEST_DB" "SELECT COUNT(*) FROM questions;" 2>/dev/null || echo 0)
echo "[verify] Users: backup=$BACK_USERS prod=$PROD_USERS"
echo "[verify] Questions: backup=$BACK_QUESTIONS prod=$PROD_QUESTIONS"
# Users must be >= 95% of prod (gap allowed: users may register after backup)
THRESHOLD=$(( PROD_USERS * 95 / 100 ))
if [ "$PROD_USERS" -gt 0 ] && [ "$BACK_USERS" -lt "$THRESHOLD" ]; then
echo "[verify] FAIL: backup users ($BACK_USERS) < 95% of prod ($PROD_USERS)" >&2
exit 5
fi
# Questions are essentially immutable — warn on any divergence
if [ "$BACK_QUESTIONS" -ne "$PROD_QUESTIONS" ]; then
echo "[verify] WARN: question count mismatch — backup=$BACK_QUESTIONS prod=$PROD_QUESTIONS"
echo "[verify] (may be mid-import; not failing)"
fi
else
echo "[verify] Prod DB not found at $PROD_DB — skipping row count check"
fi
echo "[verify] OK: $(basename "$LATEST") passed all checks"
exit 0