Files
Learn_System/backend/scripts/verify-backup.sh
T
Maxim Dolgolyov cb43538c54 ops: weekly backup verification script + scripts README
verify-backup.sh: restores latest backup to /tmp, runs
PRAGMA integrity_check, compares row counts vs prod (>5% drop
in users = fail, >48h age = fail). Cron-driven, fails loud on
non-zero exit so cron mails the admin.

Exit codes: 2=no files, 3=too old, 4=corrupt, 5=row count diverged.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 17:16:04 +03:00

94 lines
3.7 KiB
Bash

#!/bin/sh
# verify-backup.sh — restore latest backup to /tmp, run integrity check,
# compare row counts vs production DB.
#
# Cron (Sunday 6am):
# 0 6 * * 0 /path/to/repo/backend/scripts/verify-backup.sh
#
# Exit codes:
# 0 — OK
# 1 — generic error (set -e)
# 2 — no backup files found
# 3 — latest backup is older than 48h
# 4 — PRAGMA integrity_check failed
# 5 — user count diverged > 5% from production
#
# Usage:
# ./verify-backup.sh
# BACKUP_DIR=/custom/path PROD_DB=/custom/db.sqlite ./verify-backup.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
BACKUP_DIR="${BACKUP_DIR:-$SCRIPT_DIR/../../backups}"
PROD_DB="${PROD_DB:-$SCRIPT_DIR/../data/learnspace.db}"
# ── 1. Find latest backup ────────────────────────────────────────────────────
LATEST=$(ls -1t "$BACKUP_DIR"/learnspace_*.db 2>/dev/null | head -1)
if [ -z "$LATEST" ]; then
echo "[verify] FAIL: no backup files found in $BACKUP_DIR" >&2
exit 2
fi
echo "[verify] Latest backup: $(basename "$LATEST")"
# ── 2. Age check: backup must be < 48h ──────────────────────────────────────
# stat -c (Linux) vs stat -f (macOS)
if stat --version > /dev/null 2>&1; then
MTIME=$(stat -c %Y "$LATEST")
else
MTIME=$(stat -f %m "$LATEST")
fi
NOW=$(date +%s)
AGE_SEC=$(( NOW - MTIME ))
AGE_H=$(( AGE_SEC / 3600 ))
if [ "$AGE_SEC" -gt 172800 ]; then
echo "[verify] FAIL: backup is ${AGE_H}h old (limit: 48h)" >&2
exit 3
fi
echo "[verify] Age: ${AGE_H}h — OK"
# ── 3. Restore to temp file ──────────────────────────────────────────────────
TEST_DB="/tmp/ls_verify_$$.db"
cp "$LATEST" "$TEST_DB"
trap 'rm -f "$TEST_DB"' EXIT
# ── 4. Integrity check ───────────────────────────────────────────────────────
INTEGRITY=$(sqlite3 "$TEST_DB" "PRAGMA integrity_check;" 2>&1)
if [ "$INTEGRITY" != "ok" ]; then
echo "[verify] FAIL: integrity_check returned: $INTEGRITY" >&2
exit 4
fi
echo "[verify] Integrity: ok"
# ── 5. Row count sanity vs production ───────────────────────────────────────
if [ -f "$PROD_DB" ]; then
PROD_USERS=$(sqlite3 "$PROD_DB" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo 0)
BACK_USERS=$(sqlite3 "$TEST_DB" "SELECT COUNT(*) FROM users;" 2>/dev/null || echo 0)
PROD_QUESTIONS=$(sqlite3 "$PROD_DB" "SELECT COUNT(*) FROM questions;" 2>/dev/null || echo 0)
BACK_QUESTIONS=$(sqlite3 "$TEST_DB" "SELECT COUNT(*) FROM questions;" 2>/dev/null || echo 0)
echo "[verify] Users: backup=$BACK_USERS prod=$PROD_USERS"
echo "[verify] Questions: backup=$BACK_QUESTIONS prod=$PROD_QUESTIONS"
# Users must be >= 95% of prod (gap allowed: users may register after backup)
THRESHOLD=$(( PROD_USERS * 95 / 100 ))
if [ "$PROD_USERS" -gt 0 ] && [ "$BACK_USERS" -lt "$THRESHOLD" ]; then
echo "[verify] FAIL: backup users ($BACK_USERS) < 95% of prod ($PROD_USERS)" >&2
exit 5
fi
# Questions are essentially immutable — warn on any divergence
if [ "$BACK_QUESTIONS" -ne "$PROD_QUESTIONS" ]; then
echo "[verify] WARN: question count mismatch — backup=$BACK_QUESTIONS prod=$PROD_QUESTIONS"
echo "[verify] (may be mid-import; not failing)"
fi
else
echo "[verify] Prod DB not found at $PROD_DB — skipping row count check"
fi
echo "[verify] OK: $(basename "$LATEST") passed all checks"
exit 0