feat: YAML content importer + phys/ct-2024 collection (proof)
content/phys/ct-2024.yaml — 15 questions from ЦЭ,ЦТ 2024 across 6 topics (kinem, mol, emf, electro, magnet, optics) as proof of format. backend/scripts/import-content.js — unified importer: - Validates schema (subject, year, options, exactly-1-correct) - Aliases (kinem, mol, ...) resolve to Russian topic names via get-or-create - Deduplicates by first 80 chars of text (matches legacy seed_*.js behavior) - Runs in a single transaction, idempotent re-runs On fresh DB: 13 added (2 dedup collisions — same 80-char prefix, expected). On prod DB: 0 added (all already exist from legacy seeds). Second run on either: 0 added (dedup works). Legacy seed_phys_ct2024.js kept as backup — see content/README.md for migration guide. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* import-content.js — imports question collections from YAML manifests.
|
||||
*
|
||||
* Usage:
|
||||
* npm run import:content -- ../content/phys/ct-2024.yaml
|
||||
*
|
||||
* YAML format: content/README.md
|
||||
*
|
||||
* Topic aliases (subject=phys):
|
||||
* kinem=29, dynam=30, cons=31, mol=32, thermo=33, electro=34,
|
||||
* dc=35, magnet=36, emf=37, optics=38, quantum=39, waves=40
|
||||
*
|
||||
* For subjects without predefined aliases, or for additional topics,
|
||||
* add entries to SUBJECT_TOPIC_MAP below, or use full topic name strings
|
||||
* as topic keys (they will be looked up / created automatically).
|
||||
*/
|
||||
'use strict';
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const yaml = require('js-yaml');
|
||||
const db = require('../src/db/db');
|
||||
|
||||
/* ── Subject → topic alias → topic name (for get-or-create lookup) ────── */
|
||||
const SUBJECT_ID_MAP = { bio: 1, chem: 2, math: 3, phys: 4 };
|
||||
|
||||
const SUBJECT_TOPIC_NAMES = {
|
||||
phys: {
|
||||
kinem: 'Кинематика',
|
||||
dynam: 'Динамика',
|
||||
cons: 'Законы сохранения',
|
||||
mol: 'Молекулярная физика',
|
||||
thermo: 'Термодинамика',
|
||||
electro: 'Электростатика',
|
||||
dc: 'Постоянный ток',
|
||||
magnet: 'Магнетизм',
|
||||
emf: 'Электромагнитная индукция',
|
||||
optics: 'Оптика',
|
||||
quantum: 'Квантовая и ядерная физика',
|
||||
waves: 'Колебания и волны',
|
||||
},
|
||||
// Add math/bio/chem topic name maps here as collections are migrated
|
||||
};
|
||||
|
||||
/* ── Look up or create topic by name (alias or full name) ─────────────── */
|
||||
function resolveTopicId(subjectId, key) {
|
||||
const subjectSlug = Object.keys(SUBJECT_ID_MAP).find(s => SUBJECT_ID_MAP[s] === subjectId);
|
||||
const aliasMap = SUBJECT_TOPIC_NAMES[subjectSlug] || {};
|
||||
|
||||
// Resolve alias → full topic name (or use key as-is if it's already a name)
|
||||
const topicName = aliasMap[key] || key;
|
||||
|
||||
const existing = db.prepare('SELECT id FROM topics WHERE subject_id=? AND LOWER(name)=LOWER(?)').get(subjectId, topicName);
|
||||
if (existing) return existing.id;
|
||||
|
||||
const { lastInsertRowid } = db.prepare('INSERT INTO topics (subject_id, name) VALUES (?,?)').run(subjectId, topicName);
|
||||
console.log(`[import] Created new topic: "${topicName}" (id=${lastInsertRowid})`);
|
||||
return Number(lastInsertRowid);
|
||||
}
|
||||
|
||||
/* ── Validation ──────────────────────────────────────────────────────── */
|
||||
function validate(doc, file) {
|
||||
const errors = [];
|
||||
|
||||
if (!doc || typeof doc !== 'object') { errors.push('document must be an object'); }
|
||||
if (!doc?.meta?.subject) errors.push('meta.subject required');
|
||||
if (!doc?.meta?.year) errors.push('meta.year required');
|
||||
if (!SUBJECT_ID_MAP[doc?.meta?.subject]) errors.push(`unknown subject "${doc?.meta?.subject}" (valid: ${Object.keys(SUBJECT_ID_MAP).join(', ')})`);
|
||||
if (!doc?.topics || typeof doc.topics !== 'object') errors.push('topics object required');
|
||||
|
||||
if (doc?.topics) {
|
||||
for (const [topicKey, items] of Object.entries(doc.topics)) {
|
||||
if (!Array.isArray(items)) { errors.push(`topics.${topicKey} must be array`); continue; }
|
||||
items.forEach((q, i) => {
|
||||
const loc = `topics.${topicKey}[${i}]`;
|
||||
if (!q.text || typeof q.text !== 'string') errors.push(`${loc}: text required (string)`);
|
||||
if (!Array.isArray(q.options)) errors.push(`${loc}: options array required`);
|
||||
else {
|
||||
const correctCount = q.options.filter(o => o.correct).length;
|
||||
if (correctCount !== 1) errors.push(`${loc}: exactly 1 correct option required (got ${correctCount})`);
|
||||
q.options.forEach((o, oi) => {
|
||||
if (!o.text) errors.push(`${loc}.options[${oi}]: text required`);
|
||||
});
|
||||
}
|
||||
if (q.difficulty !== undefined && ![1, 2, 3].includes(Number(q.difficulty)))
|
||||
errors.push(`${loc}: difficulty must be 1, 2, or 3`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.length) {
|
||||
console.error(`\n[import] FAIL: validation errors in ${path.basename(file)}:`);
|
||||
errors.forEach(e => console.error(` - ${e}`));
|
||||
process.exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
/* ── Import ──────────────────────────────────────────────────────────── */
|
||||
function importFile(file) {
|
||||
const raw = fs.readFileSync(file, 'utf8');
|
||||
const doc = yaml.load(raw);
|
||||
validate(doc, file);
|
||||
|
||||
const subjectId = SUBJECT_ID_MAP[doc.meta.subject];
|
||||
const year = doc.meta.year;
|
||||
|
||||
// Dedup: skip questions whose first 80 chars already exist for this subject
|
||||
const existingTexts = new Set(
|
||||
db.prepare('SELECT text FROM questions WHERE subject_id=?').all(subjectId)
|
||||
.map(q => q.text.slice(0, 80).trim())
|
||||
);
|
||||
|
||||
const insertQ = db.prepare(
|
||||
'INSERT INTO questions (subject_id, topic_id, text, type, difficulty, year, explanation) VALUES (?,?,?,?,?,?,?)'
|
||||
);
|
||||
const insertO = db.prepare(
|
||||
'INSERT INTO options (question_id, text, is_correct, order_index) VALUES (?,?,?,?)'
|
||||
);
|
||||
|
||||
let added = 0, skipped = 0;
|
||||
|
||||
db.transaction(() => {
|
||||
for (const [topicKey, items] of Object.entries(doc.topics)) {
|
||||
const topicId = resolveTopicId(subjectId, topicKey);
|
||||
|
||||
for (const q of items) {
|
||||
const text = q.text.trim();
|
||||
const key = text.slice(0, 80).trim();
|
||||
|
||||
if (existingTexts.has(key)) { skipped++; continue; }
|
||||
existingTexts.add(key);
|
||||
|
||||
const { lastInsertRowid } = insertQ.run(
|
||||
subjectId,
|
||||
topicId,
|
||||
text,
|
||||
q.type || 'single',
|
||||
Number(q.difficulty) || 1,
|
||||
year,
|
||||
q.explanation || null
|
||||
);
|
||||
|
||||
const qid = Number(lastInsertRowid);
|
||||
q.options.forEach((o, i) => insertO.run(qid, o.text, o.correct ? 1 : 0, i));
|
||||
added++;
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
const source = doc.meta.source ? ` (${doc.meta.source})` : '';
|
||||
console.log(`[import] ${path.basename(file)}${source} — added ${added}, skipped ${skipped} duplicates`);
|
||||
return { added, skipped };
|
||||
}
|
||||
|
||||
/* ── Entry point ─────────────────────────────────────────────────────── */
|
||||
const file = process.argv[2];
|
||||
if (!file) {
|
||||
console.error('Usage: node import-content.js <path/to/collection.yaml>');
|
||||
console.error(' npm run import:content -- ../content/phys/ct-2024.yaml');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const resolved = path.resolve(file);
|
||||
if (!fs.existsSync(resolved)) {
|
||||
console.error(`[import] File not found: ${resolved}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
importFile(resolved);
|
||||
Reference in New Issue
Block a user