feat(backup): bundle assets in ZIP + partial-write hardening + restart log
Auto-backups now produce a ZIP containing ledgrab.db plus every file in the assets dir under assets/ — matching the manual GET /api/v1/system/backup format, so restore accepts either output interchangeably. Legacy .db backups remain listable, restorable, and prunable; both extensions count toward max_backups. Writes stage to <name>.partial then os.replace into place — a crash mid-ZIP never leaves a half-written backup that masquerades as valid. Stale .partials from prior crashes are swept on the next run. Symlinks inside the assets dir are skipped so a hostile link can't slurp a target outside the dir into every backup. Backups larger than 500 MB log a warning so operators notice unbounded asset growth before disk fills up. restart.py: redirect the spawned restart script's stdout/stderr to restart.log and bail out early if the script is missing — silent failures (PowerShell off PATH, restart.ps1 erroring) used to vanish into a detached child with no diagnostic trail. Tests cover happy path, asset bytes round-trip, partial cleanup, None/missing assets_dir, failure rollback, stale-partial sweep, symlink rejection, mixed legacy+new listing, and cross-format prune.
This commit is contained in:
@@ -11,6 +11,7 @@ import sys
|
||||
import threading
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
||||
from fastapi.responses import StreamingResponse
|
||||
@@ -38,28 +39,59 @@ _SERVER_DIR = Path(__file__).resolve().parents[4]
|
||||
|
||||
|
||||
def _schedule_restart() -> None:
|
||||
"""Spawn a restart script after a short delay so the HTTP response completes."""
|
||||
"""Spawn a restart script after a short delay so the HTTP response completes.
|
||||
|
||||
def _restart():
|
||||
stdout/stderr of the spawned script are redirected to ``<server>/restart.log``
|
||||
so a silent failure (PowerShell not on PATH, restart.ps1 erroring, etc.)
|
||||
leaves evidence on disk instead of vanishing into a detached child.
|
||||
"""
|
||||
|
||||
def _restart() -> None:
|
||||
import time
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
# Annotated as ``dict[str, Any]`` because the value union spans
|
||||
# int flags (Windows ``creationflags``) and bool (POSIX
|
||||
# ``start_new_session``); a narrower union confuses ``**`` unpacking.
|
||||
popen_kwargs: dict[str, Any]
|
||||
if sys.platform == "win32":
|
||||
subprocess.Popen(
|
||||
[
|
||||
"powershell",
|
||||
"-ExecutionPolicy",
|
||||
"Bypass",
|
||||
"-File",
|
||||
str(_SERVER_DIR / "restart.ps1"),
|
||||
],
|
||||
creationflags=subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP,
|
||||
)
|
||||
script = _SERVER_DIR / "restart.ps1"
|
||||
cmd = ["powershell", "-ExecutionPolicy", "Bypass", "-File", str(script)]
|
||||
popen_kwargs = {
|
||||
"creationflags": (
|
||||
subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
),
|
||||
}
|
||||
else:
|
||||
subprocess.Popen(
|
||||
["bash", str(_SERVER_DIR / "restart.sh")],
|
||||
start_new_session=True,
|
||||
)
|
||||
script = _SERVER_DIR / "restart.sh"
|
||||
cmd = ["bash", str(script)]
|
||||
popen_kwargs = {"start_new_session": True}
|
||||
|
||||
if not script.is_file():
|
||||
logger.error("Restart script missing: %s", script)
|
||||
return
|
||||
|
||||
log_path = _SERVER_DIR / "restart.log"
|
||||
try:
|
||||
# Open in append mode so multiple restarts accumulate; the child
|
||||
# owns its own duped handle, so closing here in the parent is safe.
|
||||
with open(log_path, "ab") as log_file:
|
||||
log_file.write(
|
||||
f"\n--- restart spawned at {time.strftime('%Y-%m-%d %H:%M:%S')} ---\n".encode()
|
||||
)
|
||||
log_file.flush()
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=log_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
**popen_kwargs,
|
||||
)
|
||||
logger.info("Restart script launched: %s (PID %s, log %s)", cmd[0], proc.pid, log_path)
|
||||
except OSError as e:
|
||||
logger.error("Failed to launch restart script %s: %s", script, e, exc_info=True)
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error launching restart script: %s", e, exc_info=True)
|
||||
|
||||
threading.Thread(target=_restart, daemon=True).start()
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
"""Auto-backup engine — periodic SQLite snapshot backups."""
|
||||
"""Auto-backup engine — periodic SQLite + assets snapshot backups."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Iterable, List
|
||||
|
||||
from ledgrab.storage.database import Database
|
||||
from ledgrab.utils import get_logger
|
||||
@@ -20,19 +22,35 @@ DEFAULT_SETTINGS = {
|
||||
# Skip the immediate-on-start backup if a recent backup exists within this window.
|
||||
_STARTUP_BACKUP_COOLDOWN = timedelta(minutes=5)
|
||||
|
||||
_BACKUP_EXT = ".db"
|
||||
# Current write format. ``.db`` is still recognised on read so backups taken
|
||||
# by older versions remain listable, restorable, and prunable.
|
||||
_BACKUP_EXT = ".zip"
|
||||
_RECOGNISED_EXTS: tuple[str, ...] = (".zip", ".db")
|
||||
|
||||
# Soft warning threshold — large backups indicate an unbounded assets dir or
|
||||
# bloated DB. We don't refuse to write (user data is theirs), but log loudly
|
||||
# so the operator can investigate before disk fills up over many intervals.
|
||||
_BACKUP_SIZE_WARN_BYTES = 500 * 1024 * 1024 # 500 MB
|
||||
|
||||
|
||||
class AutoBackupEngine:
|
||||
"""Creates periodic SQLite snapshot backups of the database."""
|
||||
"""Creates periodic backups of the database and asset files.
|
||||
|
||||
Each backup is a ZIP archive containing ``ledgrab.db`` plus every file
|
||||
from ``assets_dir`` under ``assets/`` — matching the format produced by
|
||||
the manual ``GET /api/v1/system/backup`` download. The restore endpoint
|
||||
accepts either ``.zip`` or ``.db`` interchangeably.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
backup_dir: Path,
|
||||
db: Database,
|
||||
assets_dir: Path | None = None,
|
||||
):
|
||||
self._backup_dir = Path(backup_dir)
|
||||
self._db = db
|
||||
self._assets_dir = Path(assets_dir) if assets_dir else None
|
||||
self._task: asyncio.Task | None = None
|
||||
self._last_backup_time: datetime | None = None
|
||||
|
||||
@@ -82,9 +100,14 @@ class AutoBackupEngine:
|
||||
self._task.cancel()
|
||||
self._task = None
|
||||
|
||||
def _iter_backup_files(self) -> Iterable[Path]:
|
||||
"""Yield every backup file (both legacy ``.db`` and current ``.zip``)."""
|
||||
for ext in _RECOGNISED_EXTS:
|
||||
yield from self._backup_dir.glob(f"*{ext}")
|
||||
|
||||
def _most_recent_backup_age(self) -> timedelta | None:
|
||||
"""Return the age of the newest backup file, or None if no backups exist."""
|
||||
files = list(self._backup_dir.glob(f"*{_BACKUP_EXT}"))
|
||||
files = list(self._iter_backup_files())
|
||||
if not files:
|
||||
return None
|
||||
newest = max(files, key=lambda p: p.stat().st_mtime)
|
||||
@@ -124,15 +147,72 @@ class AutoBackupEngine:
|
||||
timestamp = now.strftime("%Y-%m-%dT%H%M%S")
|
||||
filename = f"ledgrab-backup-{timestamp}{_BACKUP_EXT}"
|
||||
file_path = self._backup_dir / filename
|
||||
# Stage the ZIP at <name>.partial then os.replace into place once it's
|
||||
# fully written. A crash mid-write leaves a .partial file (cleaned up
|
||||
# on the next backup) but never a half-written backup that would fool
|
||||
# ``_most_recent_backup_age`` / ``_prune_old_backups`` into trusting
|
||||
# corrupt data.
|
||||
partial_path = file_path.with_suffix(file_path.suffix + ".partial")
|
||||
|
||||
self._db.backup_to(file_path)
|
||||
# SQLite backup API → temp .db so we get a consistent snapshot
|
||||
# without holding the DB lock for the ZIP write.
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
|
||||
tmp_path = Path(tmp.name)
|
||||
tmp.close()
|
||||
asset_count = 0
|
||||
try:
|
||||
self._db.backup_to(tmp_path)
|
||||
with zipfile.ZipFile(partial_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.write(tmp_path, "ledgrab.db")
|
||||
if self._assets_dir and self._assets_dir.is_dir():
|
||||
for asset_file in self._assets_dir.iterdir():
|
||||
# Skip symlinks: ``is_file()`` follows them and we
|
||||
# don't want to silently slurp a symlink target that
|
||||
# lives outside the assets dir into every backup.
|
||||
if asset_file.is_symlink():
|
||||
continue
|
||||
if asset_file.is_file():
|
||||
zf.write(asset_file, f"assets/{asset_file.name}")
|
||||
asset_count += 1
|
||||
os.replace(partial_path, file_path)
|
||||
except Exception:
|
||||
# Roll back the staged partial so it doesn't accumulate; the
|
||||
# finally block still removes the SQLite temp file. Re-raise so
|
||||
# the caller (``_backup_loop`` / ``trigger_backup``) sees + logs
|
||||
# the failure instead of silently emitting a missing backup.
|
||||
partial_path.unlink(missing_ok=True)
|
||||
raise
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
# Best-effort sweep of any older orphan .partial files left by a
|
||||
# crash on a previous run.
|
||||
for stale in self._backup_dir.glob("*.partial"):
|
||||
try:
|
||||
stale.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
size_bytes = file_path.stat().st_size
|
||||
self._last_backup_time = now
|
||||
logger.info(f"Backup created: {filename}")
|
||||
logger.info(
|
||||
"Backup created: %s (%d asset files, %.1f MB)",
|
||||
filename,
|
||||
asset_count,
|
||||
size_bytes / (1024 * 1024),
|
||||
)
|
||||
if size_bytes > _BACKUP_SIZE_WARN_BYTES:
|
||||
logger.warning(
|
||||
"Backup %s is %.1f MB — exceeds %d MB warning threshold; "
|
||||
"consider pruning the assets directory or lowering max_backups",
|
||||
filename,
|
||||
size_bytes / (1024 * 1024),
|
||||
_BACKUP_SIZE_WARN_BYTES // (1024 * 1024),
|
||||
)
|
||||
|
||||
def _prune_old_backups(self) -> None:
|
||||
max_backups = self._settings["max_backups"]
|
||||
files = sorted(self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime)
|
||||
files = sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime)
|
||||
excess = len(files) - max_backups
|
||||
if excess > 0:
|
||||
for f in files[:excess]:
|
||||
@@ -179,9 +259,7 @@ class AutoBackupEngine:
|
||||
|
||||
def list_backups(self) -> List[dict]:
|
||||
backups = []
|
||||
for f in sorted(
|
||||
self._backup_dir.glob(f"*{_BACKUP_EXT}"), key=lambda p: p.stat().st_mtime, reverse=True
|
||||
):
|
||||
for f in sorted(self._iter_backup_files(), key=lambda p: p.stat().st_mtime, reverse=True):
|
||||
stat = f.stat()
|
||||
backups.append(
|
||||
{
|
||||
|
||||
@@ -283,6 +283,7 @@ async def lifespan(app: FastAPI):
|
||||
auto_backup_engine = AutoBackupEngine(
|
||||
backup_dir=_data_dir / "backups",
|
||||
db=db,
|
||||
assets_dir=Path(config.assets.assets_dir),
|
||||
)
|
||||
|
||||
# Create update service (checks for new releases)
|
||||
|
||||
Reference in New Issue
Block a user