Files
Learn_System/backend/scripts/import-content.js
T
Maxim Dolgolyov 25489a733a feat: YAML content importer + phys/ct-2024 collection (proof)
content/phys/ct-2024.yaml — 15 questions from ЦЭ,ЦТ 2024 across
6 topics (kinem, mol, emf, electro, magnet, optics) as proof of format.

backend/scripts/import-content.js — unified importer:
- Validates schema (subject, year, options, exactly-1-correct)
- Aliases (kinem, mol, ...) resolve to Russian topic names via get-or-create
- Deduplicates by first 80 chars of text (matches legacy seed_*.js behavior)
- Runs in a single transaction, idempotent re-runs

On fresh DB: 13 added (2 dedup collisions — same 80-char prefix, expected).
On prod DB: 0 added (all already exist from legacy seeds).
Second run on either: 0 added (dedup works).

Legacy seed_phys_ct2024.js kept as backup — see content/README.md
for migration guide.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-06 17:42:07 +03:00

170 lines
6.7 KiB
JavaScript

#!/usr/bin/env node
/**
* import-content.js — imports question collections from YAML manifests.
*
* Usage:
* npm run import:content -- ../content/phys/ct-2024.yaml
*
* YAML format: content/README.md
*
* Topic aliases (subject=phys):
* kinem=29, dynam=30, cons=31, mol=32, thermo=33, electro=34,
* dc=35, magnet=36, emf=37, optics=38, quantum=39, waves=40
*
* For subjects without predefined aliases, or for additional topics,
* add entries to SUBJECT_TOPIC_MAP below, or use full topic name strings
* as topic keys (they will be looked up / created automatically).
*/
'use strict';
const fs = require('fs');
const path = require('path');
const yaml = require('js-yaml');
const db = require('../src/db/db');
/* ── Subject → topic alias → topic name (for get-or-create lookup) ────── */
const SUBJECT_ID_MAP = { bio: 1, chem: 2, math: 3, phys: 4 };
const SUBJECT_TOPIC_NAMES = {
phys: {
kinem: 'Кинематика',
dynam: 'Динамика',
cons: 'Законы сохранения',
mol: 'Молекулярная физика',
thermo: 'Термодинамика',
electro: 'Электростатика',
dc: 'Постоянный ток',
magnet: 'Магнетизм',
emf: 'Электромагнитная индукция',
optics: 'Оптика',
quantum: 'Квантовая и ядерная физика',
waves: 'Колебания и волны',
},
// Add math/bio/chem topic name maps here as collections are migrated
};
/* ── Look up or create topic by name (alias or full name) ─────────────── */
function resolveTopicId(subjectId, key) {
const subjectSlug = Object.keys(SUBJECT_ID_MAP).find(s => SUBJECT_ID_MAP[s] === subjectId);
const aliasMap = SUBJECT_TOPIC_NAMES[subjectSlug] || {};
// Resolve alias → full topic name (or use key as-is if it's already a name)
const topicName = aliasMap[key] || key;
const existing = db.prepare('SELECT id FROM topics WHERE subject_id=? AND LOWER(name)=LOWER(?)').get(subjectId, topicName);
if (existing) return existing.id;
const { lastInsertRowid } = db.prepare('INSERT INTO topics (subject_id, name) VALUES (?,?)').run(subjectId, topicName);
console.log(`[import] Created new topic: "${topicName}" (id=${lastInsertRowid})`);
return Number(lastInsertRowid);
}
/* ── Validation ──────────────────────────────────────────────────────── */
function validate(doc, file) {
const errors = [];
if (!doc || typeof doc !== 'object') { errors.push('document must be an object'); }
if (!doc?.meta?.subject) errors.push('meta.subject required');
if (!doc?.meta?.year) errors.push('meta.year required');
if (!SUBJECT_ID_MAP[doc?.meta?.subject]) errors.push(`unknown subject "${doc?.meta?.subject}" (valid: ${Object.keys(SUBJECT_ID_MAP).join(', ')})`);
if (!doc?.topics || typeof doc.topics !== 'object') errors.push('topics object required');
if (doc?.topics) {
for (const [topicKey, items] of Object.entries(doc.topics)) {
if (!Array.isArray(items)) { errors.push(`topics.${topicKey} must be array`); continue; }
items.forEach((q, i) => {
const loc = `topics.${topicKey}[${i}]`;
if (!q.text || typeof q.text !== 'string') errors.push(`${loc}: text required (string)`);
if (!Array.isArray(q.options)) errors.push(`${loc}: options array required`);
else {
const correctCount = q.options.filter(o => o.correct).length;
if (correctCount !== 1) errors.push(`${loc}: exactly 1 correct option required (got ${correctCount})`);
q.options.forEach((o, oi) => {
if (!o.text) errors.push(`${loc}.options[${oi}]: text required`);
});
}
if (q.difficulty !== undefined && ![1, 2, 3].includes(Number(q.difficulty)))
errors.push(`${loc}: difficulty must be 1, 2, or 3`);
});
}
}
if (errors.length) {
console.error(`\n[import] FAIL: validation errors in ${path.basename(file)}:`);
errors.forEach(e => console.error(` - ${e}`));
process.exit(2);
}
}
/* ── Import ──────────────────────────────────────────────────────────── */
function importFile(file) {
const raw = fs.readFileSync(file, 'utf8');
const doc = yaml.load(raw);
validate(doc, file);
const subjectId = SUBJECT_ID_MAP[doc.meta.subject];
const year = doc.meta.year;
// Dedup: skip questions whose first 80 chars already exist for this subject
const existingTexts = new Set(
db.prepare('SELECT text FROM questions WHERE subject_id=?').all(subjectId)
.map(q => q.text.slice(0, 80).trim())
);
const insertQ = db.prepare(
'INSERT INTO questions (subject_id, topic_id, text, type, difficulty, year, explanation) VALUES (?,?,?,?,?,?,?)'
);
const insertO = db.prepare(
'INSERT INTO options (question_id, text, is_correct, order_index) VALUES (?,?,?,?)'
);
let added = 0, skipped = 0;
db.transaction(() => {
for (const [topicKey, items] of Object.entries(doc.topics)) {
const topicId = resolveTopicId(subjectId, topicKey);
for (const q of items) {
const text = q.text.trim();
const key = text.slice(0, 80).trim();
if (existingTexts.has(key)) { skipped++; continue; }
existingTexts.add(key);
const { lastInsertRowid } = insertQ.run(
subjectId,
topicId,
text,
q.type || 'single',
Number(q.difficulty) || 1,
year,
q.explanation || null
);
const qid = Number(lastInsertRowid);
q.options.forEach((o, i) => insertO.run(qid, o.text, o.correct ? 1 : 0, i));
added++;
}
}
})();
const source = doc.meta.source ? ` (${doc.meta.source})` : '';
console.log(`[import] ${path.basename(file)}${source} — added ${added}, skipped ${skipped} duplicates`);
return { added, skipped };
}
/* ── Entry point ─────────────────────────────────────────────────────── */
const file = process.argv[2];
if (!file) {
console.error('Usage: node import-content.js <path/to/collection.yaml>');
console.error(' npm run import:content -- ../content/phys/ct-2024.yaml');
process.exit(1);
}
const resolved = path.resolve(file);
if (!fs.existsSync(resolved)) {
console.error(`[import] File not found: ${resolved}`);
process.exit(1);
}
importFile(resolved);