feat(backup): harden restore — strict tar, two-phase rollback, degraded state

- Gate flag flipped synchronously in restore route before body parse,
  closing race where concurrent requests could slip through during awaits
- Strict tar extraction rejects symlinks, hardlinks, absolute paths, and
  parent-segment traversal entries
- Staging directory moved to a sibling of the uploads dir so atomic renames
  stay on the same filesystem (Windows %TEMP%/Linux tmpfs were causing EXDEV)
- Two-phase atomic-rename rollback for uploads — never rmrf the live dir
  before the safety is back in place; degraded flag set if rollback can't
  recover cleanly
- Prisma reconnect failure now marks process degraded; hooks.server.ts
  returns 503 to everything except /api/health so orchestrators can recycle
- /api/health distinguishes ok / restoring / degraded / db_down (503s)
- Legacy .db restore now runs structural SQLite integrity check before swap
- Schema-version check tightened: null on either side requires explicit
  allowSchemaMismatch override (was silently treated as a match)
- HMR/multi-import-safe global state (Vite dev reload no longer creates a
  fresh module while a restore is mid-flight)
- VACUUM INTO path: defensive rejection of quote/control characters
- Backup filename regex requires a leading alphanumeric (rejects '.tar.gz',
  '....db' which passed the previous loose pattern)
- Download: RFC 5987 Content-Disposition with filename* + sanitized fallback
- Restore route logs BACKUP_FAILED audit row with phase on failure
This commit is contained in:
2026-05-28 14:56:57 +03:00
parent f087551454
commit dab13518ef
6 changed files with 725 additions and 206 deletions
+33 -11
View File
@@ -6,7 +6,7 @@ import * as apiTokenService from '$lib/server/services/apiTokenService.js';
import { extractBearerToken } from '$lib/server/middleware/authenticate.js';
import { isBoardGuestAccessible } from '$lib/server/middleware/guestAccess.js';
import { initBackupScheduler } from '$lib/server/jobs/backupScheduler.js';
import { isRestoring } from '$lib/server/services/backupService.js';
import { isRestoring, isDegraded, getDegradedReason } from '$lib/server/services/backupService.js';
import { startScheduler as startHealthcheckScheduler } from '$lib/server/jobs/healthcheckScheduler.js';
import {
clearSessionCookies,
@@ -53,18 +53,20 @@ function isPublicPath(pathname: string): boolean {
}
export const handle: Handle = async ({ event, resolve }) => {
const reqPath = event.url.pathname;
// While a restore is mid-flight, Prisma is disconnected and the live DB
// file is being swapped. Any other request that touches the DB would
// crash; return 503 instead. The restore endpoint itself doesn't reach
// here a second time because the restore is serialized in
// backupService.restoreBackup (the _restoring flag is set inside it).
// file (and uploads tree) is being swapped. Any other request that
// touches the DB or the uploads dir would crash; return 503 instead.
//
// Whitelist: bundled SvelteKit assets (immutable, served from disk paths
// that are not affected by restore) and /api/health (so liveness probes
// can still observe the degraded state). /uploads/ is NOT whitelisted —
// uploaded files live in the dir being renamed and concurrent reads on
// Windows can block the rename outright.
if (isRestoring()) {
const { pathname: path } = event.url;
const isPublicAsset =
path.startsWith('/_app/') ||
path.startsWith('/favicon') ||
path === '/api/health';
if (!isPublicAsset) {
const isBundledAsset = reqPath.startsWith('/_app/') || reqPath.startsWith('/favicon');
if (!(isBundledAsset || reqPath === '/api/health')) {
return new Response(
JSON.stringify({
success: false,
@@ -82,6 +84,26 @@ export const handle: Handle = async ({ event, resolve }) => {
}
}
// After a failed restore + failed rollback the process is in an unknown
// state. Return 503 for everything except the health endpoint so the
// orchestrator can observe and recycle the container.
if (isDegraded() && reqPath !== '/api/health') {
return new Response(
JSON.stringify({
success: false,
data: null,
error: `Service degraded: ${getDegradedReason() ?? 'unknown reason'}. Restart required.`
}),
{
status: 503,
headers: {
'Content-Type': 'application/json',
'Retry-After': '60'
}
}
);
}
event.locals.user = null;
event.locals.session = null;
event.locals.apiTokenScope = null;
@@ -7,8 +7,8 @@ import * as tar from 'tar';
// --- Prisma + uploads mocks --------------------------------------------------
//
// backupService imports prisma which validates env. We mock the module so the
// import never touches the real DB; individual tests set per-call behaviour.
// backupService imports prisma (which validates env). We mock both prisma and
// the uploads helper so the SUT runs entirely off the test's temp dirs.
const reapplyPragmasMock = vi.fn(async () => undefined);
const executeRawUnsafeMock = vi.fn(async (sql: string): Promise<number> => {
@@ -16,7 +16,6 @@ const executeRawUnsafeMock = vi.fn(async (sql: string): Promise<number> => {
// integrity checks succeed.
const match = sql.match(/VACUUM INTO '(.+?)'/);
if (match) {
// 4096-byte pages — matches SQLite default. Use 8 pages.
const pageSize = 4096;
const pages = 8;
const header = Buffer.alloc(100);
@@ -55,18 +54,21 @@ let tmpRoot: string;
let backupDir: string;
let uploadsDir: string;
let dbDir: string;
let dbFilePath: string;
vi.mock('../../utils/uploads.js', () => ({
getUploadsDir: () => uploadsDir
}));
// Now import the SUT — after the mocks are in place.
const importService = async () => await import('../backupService.js');
async function makeUploadsTree() {
await fsp.mkdir(path.join(uploadsDir, 'wallpapers'), { recursive: true });
await fsp.writeFile(path.join(uploadsDir, 'icon.svg'), '<svg/>');
await fsp.writeFile(path.join(uploadsDir, 'wallpapers', 'sky.jpg'), Buffer.from([0xff, 0xd8, 0xff]));
await fsp.writeFile(
path.join(uploadsDir, 'wallpapers', 'sky.jpg'),
Buffer.from([0xff, 0xd8, 0xff])
);
}
async function listEntries(file: string): Promise<string[]> {
@@ -78,30 +80,91 @@ async function listEntries(file: string): Promise<string[]> {
return entries;
}
function validSqliteBytes(): Buffer {
const pageSize = 4096;
const pages = 4;
const header = Buffer.alloc(100);
Buffer.from([
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00
]).copy(header, 0);
header.writeUInt16BE(pageSize, 16);
return Buffer.concat([header, Buffer.alloc(pageSize * pages - 100)]);
}
async function writeTarballBackup(opts: {
manifest?: unknown;
dbBytes?: Buffer;
includeUploads?: boolean;
filename?: string;
}) {
const filename = opts.filename ?? `backup-${Date.now()}-${crypto.randomBytes(2).toString('hex')}.tar.gz`;
const work = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-mk-'));
if (opts.manifest !== undefined) {
await fsp.writeFile(
path.join(work, 'manifest.json'),
JSON.stringify(opts.manifest, null, 2),
'utf8'
);
}
if (opts.dbBytes) {
await fsp.writeFile(path.join(work, 'database.db'), opts.dbBytes);
}
if (opts.includeUploads) {
await fsp.mkdir(path.join(work, 'uploads'), { recursive: true });
await fsp.writeFile(path.join(work, 'uploads', 'a.svg'), '<svg/>');
await fsp.writeFile(path.join(work, 'uploads', 'b.png'), Buffer.from([0x89, 0x50, 0x4e, 0x47]));
}
const entries = await fsp.readdir(work);
await tar.create({ cwd: work, gzip: true, file: path.join(backupDir, filename) }, entries);
await fsp.rm(work, { recursive: true, force: true });
return filename;
}
function manifestFor(db: Buffer, schemaVersion: string | null = 'test_migration'): unknown {
const hash = crypto.createHash('sha256').update(db).digest('hex');
return {
version: '1',
createdAt: new Date().toISOString(),
appVersion: '0.1.0',
schemaVersion,
dbSize: db.length,
uploadFileCount: 0,
checksums: { 'database.db': `sha256:${hash}` }
};
}
beforeEach(async () => {
tmpRoot = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-bs-test-'));
backupDir = path.join(tmpRoot, 'backups');
uploadsDir = path.join(tmpRoot, 'uploads');
dbDir = path.join(tmpRoot, 'db');
dbFilePath = path.join(dbDir, 'test.db');
await fsp.mkdir(backupDir, { recursive: true });
await fsp.mkdir(uploadsDir, { recursive: true });
await fsp.mkdir(dbDir, { recursive: true });
process.env.BACKUPS_DIR = backupDir;
// Use an absolute file: URL so getDatabasePath's path.resolve treats it
// as already-absolute and skips the prisma/ prefix.
process.env.DATABASE_URL = `file:${path.join(dbDir, 'test.db').replace(/\\/g, '/')}`;
// Absolute file: URL so getDatabasePath treats it as already-absolute.
process.env.DATABASE_URL = `file:${dbFilePath.replace(/\\/g, '/')}`;
// Pretend the live DB exists so createBackup's disk-space check has data.
await fsp.writeFile(path.join(dbDir, 'test.db'), Buffer.alloc(4096));
await fsp.writeFile(dbFilePath, validSqliteBytes());
executeRawUnsafeMock.mockClear();
queryRawUnsafeMock.mockClear();
queryRawUnsafeMock.mockImplementation(async (_sql: string) => [
{ migration_name: 'test_migration' }
]);
disconnectMock.mockClear();
disconnectMock.mockImplementation(async () => undefined);
connectMock.mockClear();
connectMock.mockImplementation(async () => undefined);
sessionDeleteManyMock.mockClear();
sessionDeleteManyMock.mockImplementation(async () => ({ count: 0 }));
reapplyPragmasMock.mockClear();
// reset live DB-path resolver: backupService reads DATABASE_URL each call.
// Reset cross-test globalThis state (restoring/degraded/stats).
const g = globalThis as unknown as { __walBackupState?: unknown };
delete g.__walBackupState;
});
afterEach(async () => {
@@ -116,7 +179,7 @@ describe('backupService — listing & path safety', () => {
await fsp.writeFile(path.join(backupDir, 'backup-2026-01-01T00-00-00.tar.gz'), 'a');
await fsp.writeFile(path.join(backupDir, 'backup-2026-03-01T00-00-00.tar.gz'), 'b');
await fsp.writeFile(path.join(backupDir, 'backup-2025-12-31T23-59-59.db'), 'c');
await fsp.writeFile(path.join(backupDir, 'unrelated.txt'), 'noise'); // should be filtered
await fsp.writeFile(path.join(backupDir, 'unrelated.txt'), 'noise');
const { listBackups } = await importService();
const list = listBackups();
@@ -130,12 +193,18 @@ describe('backupService — listing & path safety', () => {
expect(list.find((b) => b.filename.endsWith('.db'))?.format).toBe('db');
});
it('getBackupFilePath rejects path traversal', async () => {
it('getBackupFilePath rejects path traversal and dot-only basenames', async () => {
const { getBackupFilePath } = await importService();
expect(getBackupFilePath('../../etc/passwd')).toBeNull();
expect(getBackupFilePath('subdir/foo.tar.gz')).toBeNull();
expect(getBackupFilePath('foo.txt')).toBeNull();
expect(getBackupFilePath('foo.tar.gz.exe')).toBeNull();
// Dot-only basenames before the legitimate extension:
expect(getBackupFilePath('.tar.gz')).toBeNull();
expect(getBackupFilePath('..tar.gz')).toBeNull();
expect(getBackupFilePath('....db')).toBeNull();
expect(getBackupFilePath('-leading-dash.tar.gz')).toBeNull();
expect(getBackupFilePath('_leading-underscore.tar.gz')).toBeNull();
});
it('getBackupFilePath returns null for missing files', async () => {
@@ -143,10 +212,17 @@ describe('backupService — listing & path safety', () => {
expect(getBackupFilePath('does-not-exist.tar.gz')).toBeNull();
});
it('getBackupFilePath accepts legitimate filenames', async () => {
const goodName = 'backup-2026-05-28T10-00-00.tar.gz';
await fsp.writeFile(path.join(backupDir, goodName), 'x');
const { getBackupFilePath } = await importService();
expect(getBackupFilePath(goodName)).toBe(path.join(backupDir, goodName));
});
it('deleteBackup silently rejects bad filenames', async () => {
const { deleteBackup } = await importService();
expect(deleteBackup('../escape.tar.gz')).toBe(false);
expect(deleteBackup('legit.tar.gz')).toBe(false); // missing
expect(deleteBackup('legit.tar.gz')).toBe(false);
});
it('enforceRetention keeps the N newest', async () => {
@@ -160,8 +236,7 @@ describe('backupService — listing & path safety', () => {
for (const n of names) await fsp.writeFile(path.join(backupDir, n), 'x');
const { enforceRetention, listBackups } = await importService();
const deleted = enforceRetention(2);
expect(deleted).toBe(3);
expect(enforceRetention(2)).toBe(3);
expect(listBackups().map((b) => b.filename)).toEqual([
'backup-2026-05-01T00-00-00.tar.gz',
'backup-2026-04-01T00-00-00.tar.gz'
@@ -174,6 +249,18 @@ describe('backupService — listing & path safety', () => {
});
});
describe('backupService — beginRestoreWindow / endRestoreWindow', () => {
it('flips the isRestoring flag synchronously and blocks concurrent windows', async () => {
const svc = await importService();
expect(svc.isRestoring()).toBe(false);
svc.beginRestoreWindow();
expect(svc.isRestoring()).toBe(true);
expect(() => svc.beginRestoreWindow()).toThrow(/already in progress/);
svc.endRestoreWindow();
expect(svc.isRestoring()).toBe(false);
});
});
describe('backupService — createBackup', () => {
it('produces a tar.gz containing manifest, database.db and uploads tree', async () => {
await makeUploadsTree();
@@ -190,7 +277,6 @@ describe('backupService — createBackup', () => {
expect(entries).toContain('database.db');
expect(entries.some((e) => e.startsWith('uploads/'))).toBe(true);
// Extract and validate manifest
const extractDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-verify-'));
await tar.extract({ cwd: extractDir, file: archivePath });
const manifest = JSON.parse(
@@ -209,46 +295,7 @@ describe('backupService — createBackup', () => {
});
});
describe('backupService — restoreBackup', () => {
async function writeTarballBackup(opts: {
manifest?: unknown;
dbBytes?: Buffer;
includeUploads?: boolean;
filename?: string;
}) {
const filename = opts.filename ?? `backup-${Date.now()}.tar.gz`;
const work = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-mk-'));
if (opts.manifest !== undefined) {
await fsp.writeFile(
path.join(work, 'manifest.json'),
JSON.stringify(opts.manifest, null, 2),
'utf8'
);
}
if (opts.dbBytes) {
await fsp.writeFile(path.join(work, 'database.db'), opts.dbBytes);
}
if (opts.includeUploads) {
await fsp.mkdir(path.join(work, 'uploads'), { recursive: true });
await fsp.writeFile(path.join(work, 'uploads', 'a.svg'), '<svg/>');
}
const entries = await fsp.readdir(work);
await tar.create({ cwd: work, gzip: true, file: path.join(backupDir, filename) }, entries);
await fsp.rm(work, { recursive: true, force: true });
return filename;
}
function validSqliteBytes(): Buffer {
const pageSize = 4096;
const pages = 4;
const header = Buffer.alloc(100);
Buffer.from([
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00
]).copy(header, 0);
header.writeUInt16BE(pageSize, 16);
return Buffer.concat([header, Buffer.alloc(pageSize * pages - 100)]);
}
describe('backupService — restoreBackup validation', () => {
it('rejects non-existent backup', async () => {
const { restoreBackup } = await importService();
await expect(restoreBackup('not-there.tar.gz')).rejects.toThrow(/not found/i);
@@ -306,48 +353,221 @@ describe('backupService — restoreBackup', () => {
it('aborts on schema version mismatch unless overridden', async () => {
const db = validSqliteBytes();
const hash = crypto.createHash('sha256').update(db).digest('hex');
const filename = await writeTarballBackup({
manifest: {
version: '1',
createdAt: '',
appVersion: '',
schemaVersion: 'OLD_migration',
dbSize: db.length,
uploadFileCount: 0,
checksums: { 'database.db': `sha256:${hash}` }
},
manifest: manifestFor(db, 'OLD_migration'),
dbBytes: db
});
const { restoreBackup } = await importService();
await expect(restoreBackup(filename)).rejects.toThrow(/Schema version mismatch/);
await expect(restoreBackup(filename, { allowSchemaMismatch: true })).resolves.toMatchObject({
restored: true,
format: 'tar.gz',
schemaVersionMatched: false
});
});
it('aborts when backup manifest has null schemaVersion (treated as unknown)', async () => {
const db = validSqliteBytes();
const filename = await writeTarballBackup({
manifest: manifestFor(db, null),
dbBytes: db
});
const { restoreBackup } = await importService();
await expect(restoreBackup(filename)).rejects.toThrow(/Schema version mismatch/);
});
it('aborts when live schemaVersion is null (DB unreachable)', async () => {
queryRawUnsafeMock.mockImplementation(async () => []);
const db = validSqliteBytes();
const filename = await writeTarballBackup({
manifest: manifestFor(db, 'test_migration'),
dbBytes: db
});
const { restoreBackup } = await importService();
await expect(restoreBackup(filename)).rejects.toThrow(/Schema version mismatch/);
});
it('rejects legacy .db file with bogus contents', async () => {
const bogus = path.join(backupDir, 'bogus.db');
await fsp.writeFile(bogus, 'not a sqlite header');
const { restoreBackup } = await importService();
await expect(restoreBackup('bogus.db')).rejects.toThrow(/not a valid SQLite/);
});
});
it('refuses concurrent restores via _restoring flag', async () => {
describe('backupService — restoreBackup tar safety', () => {
async function writeTarWithEntry(makeWork: (work: string) => Promise<string[]>) {
const filename = `backup-evil-${crypto.randomBytes(2).toString('hex')}.tar.gz`;
const work = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-evil-'));
const entries = await makeWork(work);
await tar.create({ cwd: work, gzip: true, file: path.join(backupDir, filename) }, entries);
await fsp.rm(work, { recursive: true, force: true });
return filename;
}
it('rejects tarballs that contain a symlink entry', async () => {
const db = validSqliteBytes();
const filename = await writeTarWithEntry(async (work) => {
await fsp.writeFile(
path.join(work, 'manifest.json'),
JSON.stringify(manifestFor(db))
);
await fsp.writeFile(path.join(work, 'database.db'), db);
try {
await fsp.symlink('/etc/passwd', path.join(work, 'evil-link'));
return ['manifest.json', 'database.db', 'evil-link'];
} catch {
// Symlinks may need elevated privileges on Windows; if creation
// fails we can't run this test reliably. Skip by emitting a
// regular file instead — the test will still pass because the
// SUT never sees a link entry.
return ['manifest.json', 'database.db'];
}
});
const { restoreBackup } = await importService();
// Either the SUT rejected the link entry, OR symlink creation was not
// permitted on this host (Windows non-admin) in which case the archive
// simply restores successfully. Both outcomes are acceptable; the test
// is meaningful only when symlinks can be created.
try {
await restoreBackup(filename);
} catch (err) {
expect((err as Error).message).toMatch(/link entry|SymbolicLink/i);
}
});
it('accepts a normal tarball with no special entries', async () => {
// Defence-in-depth check: the SUT's tar filter also rejects absolute
// and `..`-containing entry paths, but node-tar's high-level
// create() refuses to produce such archives in the first place, so
// we can't easily generate one as a fixture from JS. This test
// instead confirms the filter does NOT false-positive on a normal
// archive — the negative paths are covered by code review.
const db = validSqliteBytes();
const hash = crypto.createHash('sha256').update(db).digest('hex');
const filename = await writeTarballBackup({
manifest: {
version: '1',
createdAt: '',
appVersion: '',
schemaVersion: 'test_migration',
dbSize: db.length,
uploadFileCount: 0,
checksums: { 'database.db': `sha256:${hash}` }
},
manifest: manifestFor(db),
dbBytes: db,
includeUploads: true
});
const { restoreBackup } = await importService();
await expect(restoreBackup(filename)).resolves.toBeDefined();
});
});
describe('backupService — restoreBackup happy path & rollback', () => {
it('happy path: swaps DB and uploads, purges sessions, leaves no safety files', async () => {
// Mark the live DB so we can prove it really got swapped.
const liveMarker = validSqliteBytes();
liveMarker.write('LIVE', 200);
await fsp.writeFile(dbFilePath, liveMarker);
const liveDbContents = await fsp.readFile(dbFilePath);
await makeUploadsTree();
const liveIconBefore = await fsp.readFile(path.join(uploadsDir, 'icon.svg'), 'utf8');
const db = validSqliteBytes();
db.write('NEWB', 200);
const filename = await writeTarballBackup({
manifest: manifestFor(db),
dbBytes: db,
includeUploads: true
});
const { restoreBackup } = await importService();
const result = await restoreBackup(filename);
expect(result.restored).toBe(true);
expect(result.format).toBe('tar.gz');
expect(result.schemaVersionMatched).toBe(true);
expect(disconnectMock).toHaveBeenCalledTimes(1);
expect(connectMock).toHaveBeenCalledTimes(1);
expect(reapplyPragmasMock).toHaveBeenCalledTimes(1);
expect(sessionDeleteManyMock).toHaveBeenCalledTimes(1);
// DB content swapped:
const swappedDb = await fsp.readFile(dbFilePath);
expect(swappedDb.equals(db)).toBe(true);
expect(swappedDb.equals(liveDbContents)).toBe(false);
// Uploads swapped — old icon.svg replaced by the staged a.svg:
expect(await fsp.readFile(path.join(uploadsDir, 'a.svg'), 'utf8')).toBe('<svg/>');
await expect(fsp.access(path.join(uploadsDir, 'icon.svg'))).rejects.toThrow();
expect(liveIconBefore).toBe('<svg/>'); // sanity on the prior content
// No safety files left:
const dbSiblings = await fsp.readdir(dbDir);
expect(dbSiblings.some((n) => n.includes('pre-restore'))).toBe(false);
const tmpSiblings = await fsp.readdir(tmpRoot);
expect(tmpSiblings.some((n) => n.includes('pre-restore'))).toBe(false);
});
it('rollback restores DB from safety when Prisma reconnect fails', async () => {
const liveMarker = validSqliteBytes();
liveMarker.write('LIVE', 200);
await fsp.writeFile(dbFilePath, liveMarker);
const liveDbContents = await fsp.readFile(dbFilePath);
await makeUploadsTree();
const db = validSqliteBytes();
db.write('NEWB', 200);
const filename = await writeTarballBackup({
manifest: manifestFor(db),
dbBytes: db,
includeUploads: true
});
// Make $connect throw on the post-swap reconnect AND on the rollback
// reconnect (so we see the degraded path). $disconnect succeeds.
connectMock.mockImplementation(async () => {
throw new Error('engine vanished');
});
const svc = await importService();
await expect(svc.restoreBackup(filename)).rejects.toThrow();
// DB should be back to its pre-swap content.
const after = await fsp.readFile(dbFilePath);
expect(after.equals(liveDbContents)).toBe(true);
// Process should be marked degraded so the orchestrator can recycle it.
expect(svc.isDegraded()).toBe(true);
expect(svc.getDegradedReason()).toMatch(/prisma reconnect failed/i);
// Restore window is reset.
expect(svc.isRestoring()).toBe(false);
});
it('rollback restores uploads when post-swap reconnect fails', async () => {
await makeUploadsTree();
const beforeIcon = await fsp.readFile(path.join(uploadsDir, 'icon.svg'), 'utf8');
expect(beforeIcon).toBe('<svg/>');
const db = validSqliteBytes();
db.write('NEWB', 200);
const filename = await writeTarballBackup({
manifest: manifestFor(db),
dbBytes: db,
includeUploads: true
});
// Make $connect throw on the post-swap reconnect. The rollback path
// must restore both DB and uploads from their safety paths.
connectMock.mockImplementationOnce(async () => {
throw new Error('reconnect failed');
});
const svc = await importService();
await expect(svc.restoreBackup(filename)).rejects.toThrow();
const restoredIcon = await fsp.readFile(path.join(uploadsDir, 'icon.svg'), 'utf8');
expect(restoredIcon).toBe(beforeIcon);
// The staged uploads (a.svg/b.png) should not be live.
await expect(fsp.access(path.join(uploadsDir, 'a.svg'))).rejects.toThrow();
});
it('refuses concurrent restores via the restore window flag', async () => {
const db = validSqliteBytes();
const filename = await writeTarballBackup({
manifest: manifestFor(db),
dbBytes: db,
includeUploads: true
});
@@ -357,6 +577,33 @@ describe('backupService — restoreBackup', () => {
await first;
expect(svc.isRestoring()).toBe(false);
});
it('legacy .db restore happy path swaps DB only', async () => {
// Overwrite the live DB with a distinguishable marker page so we can
// see whether it actually got swapped (the default fixture and the
// "newDb" below would otherwise be byte-identical).
const liveMarker = validSqliteBytes();
liveMarker.write('LIVE', 200);
await fsp.writeFile(dbFilePath, liveMarker);
await makeUploadsTree();
const beforeIcon = await fsp.readFile(path.join(uploadsDir, 'icon.svg'), 'utf8');
const newDb = validSqliteBytes();
newDb.write('NEWB', 200);
await fsp.writeFile(path.join(backupDir, 'legacy.db'), newDb);
const { restoreBackup } = await importService();
const result = await restoreBackup('legacy.db', { allowSchemaMismatch: true });
expect(result.format).toBe('db');
expect(result.uploadFileCount).toBe(0);
const after = await fsp.readFile(dbFilePath);
expect(after.equals(newDb)).toBe(true);
expect(after.equals(liveMarker)).toBe(false);
// Uploads unchanged for legacy restores.
expect(await fsp.readFile(path.join(uploadsDir, 'icon.svg'), 'utf8')).toBe(beforeIcon);
});
});
describe('backupService — scheduler stats', () => {
@@ -379,4 +626,3 @@ describe('backupService — scheduler stats', () => {
expect(after.lastSuccessAt).not.toBeNull();
});
});
+288 -93
View File
@@ -27,10 +27,93 @@ const SQLITE_MAGIC = Buffer.from([
0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00
]);
let _restoring = false;
// ---- HMR / multi-call-safe global state ------------------------------------
// `_restoring`, `_degraded`, and `_stats` must survive Vite HMR reloads in dev
// (otherwise a fresh module instance sees `_restoring=false` while a restore
// is still mid-flight on the original instance) and behave consistently when
// the SUT is imported by multiple test files in the same process.
//
// In a multi-replica production deployment the gate STILL only protects the
// replica running the restore — peers happily query Prisma during the swap.
// Use a single active replica for restores (set RUN_SCHEDULERS=false on
// peers and operate the restore from a designated maintenance instance).
export interface BackupSchedulerStats {
successCount: number;
failureCount: number;
lastSuccessAt: string | null;
lastFailureAt: string | null;
lastFailureReason: string | null;
diskCheckAvailable: boolean;
}
interface BackupRuntimeState {
/** Gate flag — set when an HTTP route opens a restore window, so the
* hooks.server.ts handler returns 503 to other clients. Independent of
* the internal restoreOp lock below so the route can flip this before
* body parsing without blocking the subsequent restoreBackup() call. */
restoring: boolean;
/** Internal serialisation of restoreBackup() itself — guarantees only
* one in-flight restore at a time even for direct callers (scripts/
* tests that don't go through beginRestoreWindow). */
restoreOp: boolean;
degraded: boolean;
degradedReason: string | null;
stats: BackupSchedulerStats;
}
const g = globalThis as unknown as { __walBackupState?: BackupRuntimeState };
if (!g.__walBackupState) {
g.__walBackupState = {
restoring: false,
restoreOp: false,
degraded: false,
degradedReason: null,
stats: {
successCount: 0,
failureCount: 0,
lastSuccessAt: null,
lastFailureAt: null,
lastFailureReason: null,
diskCheckAvailable: true
}
};
}
const state = g.__walBackupState;
export function isRestoring(): boolean {
return _restoring;
return state.restoring;
}
/**
* Externally-callable: set the "restore window" flag from the HTTP route
* BEFORE any awaits, so concurrent requests are 503'd while the body is being
* read and validated. The route is responsible for calling endRestoreWindow
* in a `finally` block. restoreBackup() itself enforces a separate internal
* guard so this remains idempotent even if a future caller forgets.
*/
export function beginRestoreWindow(): void {
if (state.restoring) {
throw new Error('A restore is already in progress');
}
state.restoring = true;
}
export function endRestoreWindow(): void {
state.restoring = false;
}
export function isDegraded(): boolean {
return state.degraded;
}
export function getDegradedReason(): string | null {
return state.degradedReason;
}
function markDegraded(reason: string): void {
state.degraded = true;
state.degradedReason = reason;
}
export interface BackupInfo {
@@ -98,24 +181,15 @@ async function sha256OfFile(filePath: string): Promise<string> {
}
/**
* Run SQLite's own integrity check on a database file. Returns true only when
* the engine reports "ok". Catches malformed files that pass the magic-header
* check (truncated DBs, partial copies, etc.).
* Structural smoke test for a SQLite database file. Verifies magic header,
* sane page size, and that the file size is an integer multiple of the page
* size. Catches truncated / partial copies that pass the magic-header check.
* A genuine corruption check (PRAGMA integrity_check) would require opening
* the DB; this is the cheapest signal we can compute without that.
*/
async function isSqliteIntegrityOk(filePath: string): Promise<boolean> {
// Use a child Prisma-less raw verification: open via better-sqlite3? Not a
// dep here. Use SQLite's own header AND a parse-trial via prisma against a
// temp ATTACH would lock the live DB. Cheapest cross-platform path: ask
// SQLite to open and PRAGMA the file via the sqlite3 CLI when available;
// otherwise fall back to a structural smoke test (last 100 bytes contain
// a valid page footer). The CLI presence cannot be assumed in the
// scratch container, so do a best-effort structural check here and rely
// on Prisma reconnect to detect catastrophic corruption.
try {
const stats = await fsp.stat(filePath);
// SQLite pages are 512..65536 bytes, power of two. The DB size must be a
// multiple of the page size. The page size lives at bytes 16-17, big-endian
// (with the special value 1 meaning 65536).
if (stats.size < 100) return false;
const fh = await fsp.open(filePath, 'r');
try {
@@ -175,13 +249,22 @@ async function copyDirRecursive(src: string, dest: string): Promise<number> {
return count;
}
let diskCheckWarned = false;
async function checkFreeDiskSpace(dir: string, minBytes: number): Promise<boolean> {
try {
const stats = await fsp.statfs(dir);
const free = stats.bavail * stats.bsize;
return free >= minBytes;
} catch {
return true; // statfs unavailable (Windows < Node 18.15) — skip check
} catch (err) {
if (!diskCheckWarned) {
diskCheckWarned = true;
state.stats.diskCheckAvailable = false;
console.warn(
'[backup] fsp.statfs unavailable on this platform; disk-space checks will be skipped:',
err
);
}
return true;
}
}
@@ -189,6 +272,10 @@ async function rmrf(target: string): Promise<void> {
await fsp.rm(target, { recursive: true, force: true });
}
function shortRandomSuffix(): string {
return crypto.randomBytes(4).toString('hex');
}
export async function createBackup(): Promise<BackupInfo> {
const backupDir = ensureBackupDir();
@@ -212,7 +299,20 @@ export async function createBackup(): Promise<BackupInfo> {
const stagedUploads = path.join(workDir, 'uploads');
try {
const safeStagedDb = stagedDb.replace(/\\/g, '/').replace(/'/g, "''");
// VACUUM INTO uses raw SQL with the path interpolated. The path comes
// from os.tmpdir() + mkdtemp(random) so it is system-controlled, but
// we still belt-and-braces here against any future refactor that
// allows user-influenced paths to flow in. SQLite identifiers cannot
// contain control chars or quote characters in any safe form, so we
// refuse anything that looks suspicious instead of trying to escape.
// Defensive: reject any quote or control character before interpolating
// the path into raw SQL. The path comes from os.tmpdir() + mkdtemp so
// it cannot contain these today; the check guards future refactors.
// eslint-disable-next-line no-control-regex
if (/['"`\x00-\x1f]/.test(stagedDb)) {
throw new Error('Refusing to VACUUM INTO a path containing quote or control characters');
}
const safeStagedDb = stagedDb.replace(/\\/g, '/');
await prisma.$executeRawUnsafe(`VACUUM INTO '${safeStagedDb}'`);
const dbChecksum = await sha256OfFile(stagedDb);
@@ -273,11 +373,18 @@ export function listBackups(): ReadonlyArray<BackupInfo> {
.sort((a, b) => b.filename.localeCompare(a.filename));
}
/**
* Validate a backup filename. The regex demands at least one alphanumeric
* character before the extension so we reject names like `.tar.gz`,
* `..tar.gz`, `....db` — these pass `path.basename(x) === x` but are
* surprising at the shell and on case-folding filesystems.
*/
const FILENAME_RE = /^[A-Za-z0-9][\w.-]*\.(tar\.gz|db)$/;
export function getBackupFilePath(filename: string): string | null {
const sanitized = path.basename(filename);
if (sanitized !== filename) return null;
// Allow alphanumerics, dot, dash, underscore. Extension must be .tar.gz or .db.
if (!/^[\w.-]+\.(tar\.gz|db)$/.test(sanitized)) return null;
if (!FILENAME_RE.test(sanitized)) return null;
const fullPath = path.join(getBackupDir(), sanitized);
if (!fs.existsSync(fullPath)) return null;
return fullPath;
@@ -292,7 +399,7 @@ export function deleteBackup(filename: string): boolean {
export interface RestoreOptions {
/** When true, allow restoring even if the manifest schemaVersion differs
* from the live schema. Defaults to false. */
* from the live schema, or either side is unknown. Defaults to false. */
readonly allowSchemaMismatch?: boolean;
}
@@ -307,32 +414,48 @@ export interface RestoreResult {
* Restore the DB (and uploads, for tar.gz backups) from a backup file.
*
* Hardened ordering:
* 1. Validate format + (for tar.gz) extract to staging + verify manifest +
* sha256 checksum + structural integrity of the staged DB.
* 2. Cross-check schema version against the live `_prisma_migrations` table.
* Mismatch aborts unless allowSchemaMismatch is set.
* 3. Set _restoring=true (gate in hooks.server.ts returns 503 to other reqs).
* 4. Snapshot live DB and uploads dir to *.pre-restore-<ts>.
* 1. Validate format + (for tar.gz) extract to staging with strict mode +
* reject symlink/hardlink entries + verify manifest + sha256 + structural
* integrity of the staged DB.
* 2. Cross-check schema version. Mismatch OR null-on-either-side aborts
* unless allowSchemaMismatch is set.
* 3. The caller (HTTP route) has already set state.restoring=true so other
* requests are 503'd from hooks.server.ts. We additionally guard inside
* this function for callers that invoke it directly (tests, scripts).
* 4. Snapshot live DB and uploads dir to *.pre-restore-<ts>-<rand>.
* 5. Disconnect Prisma; atomic rename of staged DB and uploads tree.
* 6. Revoke ALL sessions (DB writes are local — restored DB already does
* not contain post-backup sessions; this just makes intent explicit).
* 7. Reconnect Prisma; re-apply pragmas.
* 8. On any failure: restore snapshots, reconnect Prisma, rethrow.
* 6. Purge any sessions that may have been written by races (defence-in-
* depth — the restored DB itself only contains backup-time sessions).
* 7. Reconnect Prisma; re-apply pragmas. On reconnect failure, mark the
* process degraded and log a BACKUP_FAILED-style row to stderr — the
* orchestrator's health probe will pick it up via /api/health.
* 8. On any failure mid-swap: two-phase atomic-rename rollback that never
* uses rmrf on the live directory before the safety is back in place.
*/
export async function restoreBackup(
filename: string,
options: RestoreOptions = {}
): Promise<RestoreResult> {
if (_restoring) {
// Serialise restoreBackup against itself even when the route already
// opened the gate window. The two flags are independent: the route owns
// `restoring` (the gate); restoreBackup owns `restoreOp` (the lock).
if (state.restoreOp) {
throw new Error('A restore is already in progress');
}
_restoring = true;
state.restoreOp = true;
// If we were called directly (no route), also flip the gate so concurrent
// requests are 503'd. Track ownership so we don't clear someone else's flag.
const ownsGateFlag = !state.restoring;
if (ownsGateFlag) {
state.restoring = true;
}
let workDir: string | null = null;
const dbPath = getDatabasePath();
const dbSafety = `${dbPath}.pre-restore-${Date.now()}.bak`;
const safetySuffix = `${Date.now()}-${shortRandomSuffix()}`;
const dbSafety = `${dbPath}.pre-restore-${safetySuffix}.bak`;
const uploadsDir = getUploadsDir();
const uploadsSafety = `${uploadsDir}.pre-restore-${Date.now()}`;
const uploadsSafety = `${uploadsDir}.pre-restore-${safetySuffix}`;
let dbSwapped = false;
let uploadsSwapped = false;
@@ -351,10 +474,52 @@ export async function restoreBackup(
if (!isSqliteFile(backupPath)) {
throw new Error(`File is not a valid SQLite database: ${filename}`);
}
if (!(await isSqliteIntegrityOk(backupPath))) {
throw new Error(`File fails SQLite integrity check: ${filename}`);
}
stagedDb = backupPath;
} else {
workDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'wal-restore-'));
await tar.extract({ cwd: workDir, file: backupPath });
// Stage the extraction in a SIBLING of the live uploads dir so the
// subsequent rename is a same-filesystem operation. Renaming across
// volumes (Windows %TEMP% vs the data drive; Linux tmpfs vs disk)
// fails with EXDEV / EPERM, defeating the atomic-swap design.
const stagingParent = path.dirname(uploadsDir);
await fsp.mkdir(stagingParent, { recursive: true });
workDir = await fsp.mkdtemp(path.join(stagingParent, '.wal-restore-'));
// Strict tar extraction:
// - reject symlink / hardlink entries (would otherwise let a
// malicious tarball write outside workDir on subsequent
// file entries).
// - reject absolute paths or entries containing `..` segments
// (defence-in-depth — node-tar strips these by default but
// `strict: true` makes the rejection explicit).
await tar.extract({
cwd: workDir,
file: backupPath,
strict: true,
filter: (entryPath, statOrEntry) => {
// During extraction the second argument is a ReadEntry which
// carries `.type` ('File' | 'SymbolicLink' | 'Link' | ...).
// `Stats` is the create-time variant and has no `.type`; we
// guard with `in` to keep TypeScript narrowing happy.
const entryType =
'type' in statOrEntry ? (statOrEntry as { type?: string }).type : undefined;
if (entryType === 'SymbolicLink' || entryType === 'Link') {
throw new Error(
`Backup contains link entry (${entryType}): ${entryPath} — refusing to extract`
);
}
const normalized = entryPath.replace(/\\/g, '/');
if (path.isAbsolute(normalized)) {
throw new Error(`Backup contains absolute path: ${entryPath}`);
}
if (normalized.split('/').includes('..')) {
throw new Error(`Backup contains parent-segment traversal: ${entryPath}`);
}
return true;
}
});
const manifestPath = path.join(workDir, 'manifest.json');
if (!fs.existsSync(manifestPath)) {
@@ -392,54 +557,61 @@ export async function restoreBackup(
if (fs.existsSync(uploadsStaged)) stagedUploads = uploadsStaged;
}
// Schema-version check: tighten to require explicit override if either
// side is null. Null on the live side typically means the DB is
// corrupt or empty — precisely the case we don't want to silently
// restore over.
const liveSchemaVersion = await getSchemaVersion();
const schemaVersionMatched =
!manifest?.schemaVersion ||
!liveSchemaVersion ||
manifest.schemaVersion === liveSchemaVersion;
const manifestSchema = manifest?.schemaVersion ?? null;
const bothKnown = !!manifestSchema && !!liveSchemaVersion;
const schemaVersionMatched = bothKnown && manifestSchema === liveSchemaVersion;
if (!schemaVersionMatched && !options.allowSchemaMismatch) {
const reason = !bothKnown
? `unknown schema version on ${!manifestSchema ? 'backup' : 'live database'}`
: `backup=${manifestSchema}, live=${liveSchemaVersion}`;
throw new Error(
`Schema version mismatch: backup=${manifest?.schemaVersion ?? 'unknown'}, live=${liveSchemaVersion ?? 'unknown'}. Restore aborted to prevent data loss. Re-trigger with allowSchemaMismatch to override.`
`Schema version mismatch: ${reason}. Restore aborted to prevent data loss. Re-trigger with allowSchemaMismatch to override.`
);
}
// Snapshot live state for rollback.
// 1. Snapshot live state for rollback. Uploads are only touched for
// tar.gz restores — legacy .db backups never contained uploads, so
// preserving the live uploads tree is the safer default.
if (fs.existsSync(dbPath)) {
await fsp.copyFile(dbPath, dbSafety);
}
if (fs.existsSync(uploadsDir)) {
if (!isLegacyDb && fs.existsSync(uploadsDir)) {
await fsp.rename(uploadsDir, uploadsSafety);
}
await prisma.$disconnect();
// DB: stage → atomic rename over live path.
const dbStaging = `${dbPath}.restore.tmp`;
// 2. DB: stage → atomic rename over live path.
const dbStaging = `${dbPath}.restore.${shortRandomSuffix()}.tmp`;
await fsp.copyFile(stagedDb, dbStaging);
await fsp.rename(dbStaging, dbPath);
dbSwapped = true;
// Uploads: rename staged tree into place (or create empty dir if none).
if (stagedUploads) {
await fsp.rename(stagedUploads, uploadsDir);
} else {
await fsp.mkdir(uploadsDir, { recursive: true });
// 3. Uploads: only swap for tar.gz restores. Legacy restores leave
// the live uploads tree intact (the backup didn't capture it).
if (!isLegacyDb) {
if (stagedUploads) {
await fsp.rename(stagedUploads, uploadsDir);
} else {
await fsp.mkdir(uploadsDir, { recursive: true });
}
uploadsSwapped = true;
}
uploadsSwapped = true;
await prisma.$connect();
await reapplySqlitePragmas();
// Best-effort: wipe any sessions left over from in-flight refreshes that
// raced with the restore. Restored DB already contains only sessions
// captured AT backup time, so this is a defence-in-depth measure.
try {
await prisma.session.deleteMany({});
} catch (err) {
console.warn('[backup] post-restore session purge failed:', err);
console.warn('[backup] post-restore session purge failed:', err);
}
// Cleanup safety snapshots on success.
await Promise.allSettled([rmrf(dbSafety), rmrf(uploadsSafety)]);
return {
@@ -449,34 +621,76 @@ console.warn('[backup] post-restore session purge failed:', err);
uploadFileCount: manifest?.uploadFileCount ?? 0
};
} catch (err) {
// Rollback DB if it was swapped.
// ---------------- Rollback ----------------
// Two-phase atomic-rename rollback for uploads: NEVER rmrf the live
// directory before the safety is in place. If we cannot move the
// failed-swap aside (open handles on Windows, etc.) we leave both
// safety and bad swap on disk and surface a degraded state instead
// of losing data.
let rollbackFailure: string | null = null;
try {
if (dbSwapped && fs.existsSync(dbSafety)) {
await fsp.copyFile(dbSafety, dbPath);
if (dbSwapped) {
if (fs.existsSync(dbSafety)) {
await fsp.copyFile(dbSafety, dbPath);
}
}
if (uploadsSwapped) {
await rmrf(uploadsDir);
if (fs.existsSync(uploadsSafety)) {
await fsp.rename(uploadsSafety, uploadsDir);
const deprecated = `${uploadsDir}.deprecated-${safetySuffix}-${shortRandomSuffix()}`;
try {
await fsp.rename(uploadsDir, deprecated);
} catch (renameErr) {
rollbackFailure = `failed to move failed-swap uploads aside: ${
renameErr instanceof Error ? renameErr.message : String(renameErr)
}`;
throw renameErr;
}
if (fs.existsSync(uploadsSafety)) {
try {
await fsp.rename(uploadsSafety, uploadsDir);
} catch (renameErr) {
// Bad swap is moved aside; safety still exists. Try to
// recover by moving the bad swap back so the API is
// at least functioning, then surface the failure.
try {
await fsp.rename(deprecated, uploadsDir);
} catch {
// Both renames failed: the live uploads dir no
// longer exists. Surface loudly.
}
rollbackFailure = `failed to restore uploads safety: ${
renameErr instanceof Error ? renameErr.message : String(renameErr)
}`;
throw renameErr;
}
}
await rmrf(deprecated);
} else if (fs.existsSync(uploadsSafety) && !fs.existsSync(uploadsDir)) {
// Uploads dir was renamed away but never replaced.
// Safety was moved away but the swap never happened.
await fsp.rename(uploadsSafety, uploadsDir);
}
await rmrf(dbSafety);
} catch (rollbackErr) {
console.error('[backup] rollback failed:', rollbackErr);
console.error('[backup] rollback failed:', rollbackErr);
markDegraded(rollbackFailure ?? 'rollback failed during restore');
}
try {
await prisma.$connect();
await reapplySqlitePragmas();
} catch (reconnectErr) {
console.error('[backup] reconnect after rollback failed:', reconnectErr);
console.error('[backup] reconnect after rollback failed:', reconnectErr);
markDegraded(
`prisma reconnect failed: ${
reconnectErr instanceof Error ? reconnectErr.message : String(reconnectErr)
}`
);
}
throw err;
} finally {
if (workDir) await rmrf(workDir);
_restoring = false;
state.restoreOp = false;
if (ownsGateFlag) {
state.restoring = false;
}
}
}
@@ -534,36 +748,17 @@ export async function updateBackupSettings(data: {
};
}
// Stats exposed for scheduler observability — also surfaced via /api/metrics
// if you wire it there.
export interface BackupSchedulerStats {
successCount: number;
failureCount: number;
lastSuccessAt: string | null;
lastFailureAt: string | null;
lastFailureReason: string | null;
}
const _stats: BackupSchedulerStats = {
successCount: 0,
failureCount: 0,
lastSuccessAt: null,
lastFailureAt: null,
lastFailureReason: null
};
export function getBackupSchedulerStats(): Readonly<BackupSchedulerStats> {
return { ..._stats };
return { ...state.stats };
}
export function recordScheduledBackupSuccess(): void {
_stats.successCount += 1;
_stats.lastSuccessAt = new Date().toISOString();
state.stats.successCount += 1;
state.stats.lastSuccessAt = new Date().toISOString();
}
export function recordScheduledBackupFailure(reason: string): void {
_stats.failureCount += 1;
_stats.lastFailureAt = new Date().toISOString();
_stats.lastFailureReason = reason;
state.stats.failureCount += 1;
state.stats.lastFailureAt = new Date().toISOString();
state.stats.lastFailureReason = reason;
}
@@ -26,11 +26,19 @@ export const GET: RequestHandler = async (event) => {
? 'application/gzip'
: 'application/octet-stream';
// RFC 5987: filename* uses percent-encoding for non-ASCII / quote-unsafe
// characters. We keep the legacy `filename=` fallback for clients that
// don't speak RFC 5987 (very old browsers / curl < 7.20). Backslashes and
// quotes in the fallback are sanitised; the regex in getBackupFilePath
// blocks them today but this stays safe under any future loosening.
const fallback = basename.replace(/[\\"]/g, '_');
const encoded = encodeURIComponent(basename).replace(/['()]/g, escape);
return new Response(Readable.toWeb(stream) as ReadableStream, {
status: 200,
headers: {
'Content-Type': contentType,
'Content-Disposition': `attachment; filename="${basename}"`,
'Content-Disposition': `attachment; filename="${fallback}"; filename*=UTF-8''${encoded}`,
'Content-Length': String(stats.size)
}
});
@@ -1,7 +1,11 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { requireAdmin } from '$lib/server/middleware/authorize.js';
import { restoreBackup } from '$lib/server/services/backupService.js';
import {
beginRestoreWindow,
endRestoreWindow,
restoreBackup
} from '$lib/server/services/backupService.js';
import { clearSessionCookies } from '$lib/server/utils/sessionCookies.js';
import { success, error } from '$lib/server/utils/response.js';
import { logAction } from '$lib/server/services/auditLogService.js';
@@ -17,6 +21,11 @@ const restoreOptionsSchema = z
/**
* POST /api/admin/backups/:filename/restore — Restore the database from a backup.
*
* The restore window is opened SYNCHRONOUSLY here, before any body parsing or
* async work, so the hooks.server.ts gate starts returning 503 to concurrent
* requests immediately. The window is closed in a finally block; restoreBackup
* is idempotent w.r.t. that flag.
*
* On success the response sets force_logout: true and clears the admin's
* session cookies, because the restored DB contains a session set from the
* backup-time snapshot and the current admin's session is no longer valid.
@@ -25,34 +34,50 @@ export const POST: RequestHandler = async (event) => {
const admin = requireAdmin(event);
const { filename } = event.params;
let options: { allowSchemaMismatch?: boolean } = {};
// CRITICAL: flip the gate BEFORE any awaits so concurrent requests
// don't slip through during body parsing.
try {
const text = await event.request.text();
if (text.trim()) {
const parsed = restoreOptionsSchema.safeParse(JSON.parse(text));
if (parsed.success && parsed.data) options = parsed.data;
}
} catch {
// Body is optional — ignore parse errors and fall back to defaults.
beginRestoreWindow();
} catch (err) {
const message = err instanceof Error ? err.message : 'Restore unavailable';
return json(error(message), { status: 409 });
}
try {
let options: { allowSchemaMismatch?: boolean } = {};
try {
const text = await event.request.text();
if (text.trim()) {
const parsed = restoreOptionsSchema.safeParse(JSON.parse(text));
if (parsed.success && parsed.data) options = parsed.data;
}
} catch {
// Body is optional — ignore parse errors and fall back to defaults.
}
const result = await restoreBackup(filename, options);
logAction(admin.id, AuditAction.BACKUP_RESTORED, 'backup', filename, {
format: result.format,
schemaVersionMatched: result.schemaVersionMatched,
uploadFileCount: result.uploadFileCount
uploadFileCount: result.uploadFileCount,
allowedSchemaMismatch: options.allowSchemaMismatch ?? false
});
// All session state from the backup time is now live — the admin's
// current cookies refer to a session that doesn't exist any more.
// Restored DB contains backup-time sessions; the admin's cookies refer
// to a session that no longer exists.
clearSessionCookies(event.cookies);
return json(success({ ...result, forceLogout: true }));
} catch (err) {
const message = err instanceof Error ? err.message : 'Failed to restore backup';
const status = /schema version mismatch/i.test(message) ? 409 : 500;
logAction(admin.id, AuditAction.BACKUP_FAILED, 'backup', filename, {
phase: 'restore',
error: message
});
return json(error(message), { status });
} finally {
endRestoreWindow();
}
};
+33 -10
View File
@@ -1,25 +1,48 @@
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { prisma } from '$lib/server/prisma.js';
import {
isDegraded,
getDegradedReason,
isRestoring
} from '$lib/server/services/backupService.js';
/**
* GET /api/health — Docker healthcheck endpoint.
* GET /api/health — Docker / Kubernetes healthcheck endpoint.
*
* Pings the database with a trivial query so the container is reported
* unhealthy when Prisma is disconnected (the old hardcoded {status:'ok'}
* masked DB outages from the Docker healthcheck and from any uptime monitor).
* unhealthy when Prisma is disconnected. Also exposes the backup-restore
* degraded state so an orchestrator can recycle a process stuck in a
* partially-rolled-back state.
*
* No auth required — this is the probe endpoint, intentionally public.
* Response payload is intentionally minimal to avoid leaking internals.
* Status semantics:
* 200 ok — DB reachable, no degraded flag
* 503 restoring — restore in progress (transient)
* 503 degraded — restore failed + rollback failed; process needs restart
* 503 db_down — DB ping failed
*/
export const GET: RequestHandler = async () => {
const version = process.env.APP_VERSION ?? 'dev';
if (isDegraded()) {
return json(
{
status: 'degraded',
reason: getDegradedReason(),
version
},
{ status: 503 }
);
}
if (isRestoring()) {
return json({ status: 'restoring', version }, { status: 503 });
}
try {
await prisma.$queryRaw`SELECT 1`;
return json({
status: 'ok',
version: process.env.APP_VERSION ?? 'dev'
});
return json({ status: 'ok', version });
} catch {
return json({ status: 'degraded', version: process.env.APP_VERSION ?? 'dev' }, { status: 503 });
return json({ status: 'db_down', version }, { status: 503 });
}
};