25489a733a
content/phys/ct-2024.yaml — 15 questions from ЦЭ,ЦТ 2024 across 6 topics (kinem, mol, emf, electro, magnet, optics) as proof of format. backend/scripts/import-content.js — unified importer: - Validates schema (subject, year, options, exactly-1-correct) - Aliases (kinem, mol, ...) resolve to Russian topic names via get-or-create - Deduplicates by first 80 chars of text (matches legacy seed_*.js behavior) - Runs in a single transaction, idempotent re-runs On fresh DB: 13 added (2 dedup collisions — same 80-char prefix, expected). On prod DB: 0 added (all already exist from legacy seeds). Second run on either: 0 added (dedup works). Legacy seed_phys_ct2024.js kept as backup — see content/README.md for migration guide. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
170 lines
6.7 KiB
JavaScript
170 lines
6.7 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* import-content.js — imports question collections from YAML manifests.
|
|
*
|
|
* Usage:
|
|
* npm run import:content -- ../content/phys/ct-2024.yaml
|
|
*
|
|
* YAML format: content/README.md
|
|
*
|
|
* Topic aliases (subject=phys):
|
|
* kinem=29, dynam=30, cons=31, mol=32, thermo=33, electro=34,
|
|
* dc=35, magnet=36, emf=37, optics=38, quantum=39, waves=40
|
|
*
|
|
* For subjects without predefined aliases, or for additional topics,
|
|
* add entries to SUBJECT_TOPIC_MAP below, or use full topic name strings
|
|
* as topic keys (they will be looked up / created automatically).
|
|
*/
|
|
'use strict';
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const yaml = require('js-yaml');
|
|
const db = require('../src/db/db');
|
|
|
|
/* ── Subject → topic alias → topic name (for get-or-create lookup) ────── */
|
|
const SUBJECT_ID_MAP = { bio: 1, chem: 2, math: 3, phys: 4 };
|
|
|
|
const SUBJECT_TOPIC_NAMES = {
|
|
phys: {
|
|
kinem: 'Кинематика',
|
|
dynam: 'Динамика',
|
|
cons: 'Законы сохранения',
|
|
mol: 'Молекулярная физика',
|
|
thermo: 'Термодинамика',
|
|
electro: 'Электростатика',
|
|
dc: 'Постоянный ток',
|
|
magnet: 'Магнетизм',
|
|
emf: 'Электромагнитная индукция',
|
|
optics: 'Оптика',
|
|
quantum: 'Квантовая и ядерная физика',
|
|
waves: 'Колебания и волны',
|
|
},
|
|
// Add math/bio/chem topic name maps here as collections are migrated
|
|
};
|
|
|
|
/* ── Look up or create topic by name (alias or full name) ─────────────── */
|
|
function resolveTopicId(subjectId, key) {
|
|
const subjectSlug = Object.keys(SUBJECT_ID_MAP).find(s => SUBJECT_ID_MAP[s] === subjectId);
|
|
const aliasMap = SUBJECT_TOPIC_NAMES[subjectSlug] || {};
|
|
|
|
// Resolve alias → full topic name (or use key as-is if it's already a name)
|
|
const topicName = aliasMap[key] || key;
|
|
|
|
const existing = db.prepare('SELECT id FROM topics WHERE subject_id=? AND LOWER(name)=LOWER(?)').get(subjectId, topicName);
|
|
if (existing) return existing.id;
|
|
|
|
const { lastInsertRowid } = db.prepare('INSERT INTO topics (subject_id, name) VALUES (?,?)').run(subjectId, topicName);
|
|
console.log(`[import] Created new topic: "${topicName}" (id=${lastInsertRowid})`);
|
|
return Number(lastInsertRowid);
|
|
}
|
|
|
|
/* ── Validation ──────────────────────────────────────────────────────── */
|
|
function validate(doc, file) {
|
|
const errors = [];
|
|
|
|
if (!doc || typeof doc !== 'object') { errors.push('document must be an object'); }
|
|
if (!doc?.meta?.subject) errors.push('meta.subject required');
|
|
if (!doc?.meta?.year) errors.push('meta.year required');
|
|
if (!SUBJECT_ID_MAP[doc?.meta?.subject]) errors.push(`unknown subject "${doc?.meta?.subject}" (valid: ${Object.keys(SUBJECT_ID_MAP).join(', ')})`);
|
|
if (!doc?.topics || typeof doc.topics !== 'object') errors.push('topics object required');
|
|
|
|
if (doc?.topics) {
|
|
for (const [topicKey, items] of Object.entries(doc.topics)) {
|
|
if (!Array.isArray(items)) { errors.push(`topics.${topicKey} must be array`); continue; }
|
|
items.forEach((q, i) => {
|
|
const loc = `topics.${topicKey}[${i}]`;
|
|
if (!q.text || typeof q.text !== 'string') errors.push(`${loc}: text required (string)`);
|
|
if (!Array.isArray(q.options)) errors.push(`${loc}: options array required`);
|
|
else {
|
|
const correctCount = q.options.filter(o => o.correct).length;
|
|
if (correctCount !== 1) errors.push(`${loc}: exactly 1 correct option required (got ${correctCount})`);
|
|
q.options.forEach((o, oi) => {
|
|
if (!o.text) errors.push(`${loc}.options[${oi}]: text required`);
|
|
});
|
|
}
|
|
if (q.difficulty !== undefined && ![1, 2, 3].includes(Number(q.difficulty)))
|
|
errors.push(`${loc}: difficulty must be 1, 2, or 3`);
|
|
});
|
|
}
|
|
}
|
|
|
|
if (errors.length) {
|
|
console.error(`\n[import] FAIL: validation errors in ${path.basename(file)}:`);
|
|
errors.forEach(e => console.error(` - ${e}`));
|
|
process.exit(2);
|
|
}
|
|
}
|
|
|
|
/* ── Import ──────────────────────────────────────────────────────────── */
|
|
function importFile(file) {
|
|
const raw = fs.readFileSync(file, 'utf8');
|
|
const doc = yaml.load(raw);
|
|
validate(doc, file);
|
|
|
|
const subjectId = SUBJECT_ID_MAP[doc.meta.subject];
|
|
const year = doc.meta.year;
|
|
|
|
// Dedup: skip questions whose first 80 chars already exist for this subject
|
|
const existingTexts = new Set(
|
|
db.prepare('SELECT text FROM questions WHERE subject_id=?').all(subjectId)
|
|
.map(q => q.text.slice(0, 80).trim())
|
|
);
|
|
|
|
const insertQ = db.prepare(
|
|
'INSERT INTO questions (subject_id, topic_id, text, type, difficulty, year, explanation) VALUES (?,?,?,?,?,?,?)'
|
|
);
|
|
const insertO = db.prepare(
|
|
'INSERT INTO options (question_id, text, is_correct, order_index) VALUES (?,?,?,?)'
|
|
);
|
|
|
|
let added = 0, skipped = 0;
|
|
|
|
db.transaction(() => {
|
|
for (const [topicKey, items] of Object.entries(doc.topics)) {
|
|
const topicId = resolveTopicId(subjectId, topicKey);
|
|
|
|
for (const q of items) {
|
|
const text = q.text.trim();
|
|
const key = text.slice(0, 80).trim();
|
|
|
|
if (existingTexts.has(key)) { skipped++; continue; }
|
|
existingTexts.add(key);
|
|
|
|
const { lastInsertRowid } = insertQ.run(
|
|
subjectId,
|
|
topicId,
|
|
text,
|
|
q.type || 'single',
|
|
Number(q.difficulty) || 1,
|
|
year,
|
|
q.explanation || null
|
|
);
|
|
|
|
const qid = Number(lastInsertRowid);
|
|
q.options.forEach((o, i) => insertO.run(qid, o.text, o.correct ? 1 : 0, i));
|
|
added++;
|
|
}
|
|
}
|
|
})();
|
|
|
|
const source = doc.meta.source ? ` (${doc.meta.source})` : '';
|
|
console.log(`[import] ${path.basename(file)}${source} — added ${added}, skipped ${skipped} duplicates`);
|
|
return { added, skipped };
|
|
}
|
|
|
|
/* ── Entry point ─────────────────────────────────────────────────────── */
|
|
const file = process.argv[2];
|
|
if (!file) {
|
|
console.error('Usage: node import-content.js <path/to/collection.yaml>');
|
|
console.error(' npm run import:content -- ../content/phys/ct-2024.yaml');
|
|
process.exit(1);
|
|
}
|
|
|
|
const resolved = path.resolve(file);
|
|
if (!fs.existsSync(resolved)) {
|
|
console.error(`[import] File not found: ${resolved}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
importFile(resolved);
|