Files
Learn_System/backend/scripts/import-exam-tasks.js
T

288 lines
11 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* import-exam-tasks.js — imports tasks from /frontend/js/exam9/variants/*.js
* into the generic exam_tasks table for the exam-prep module.
*
* Usage:
* node backend/scripts/import-exam-tasks.js # all enabled tracks
* node backend/scripts/import-exam-tasks.js math9 # one specific track
* node backend/scripts/import-exam-tasks.js math9 --dry # don't write, only report parse stats
*
* Idempotent: deletes existing exam_tasks rows for the target exam_key before inserting.
*
* For each variant V it produces tasks_per_variant rows in exam_tasks. For each task:
* - task_type: 'mc' if has opts; 'open' if sol-ans parses to a clean numeric/short value; 'long' otherwise
* - answer: explicit task.answer if present; else autoparsed from <div class="sol-ans">
* - text_html / figure_html / opts_json / solution_html — direct
*
* Reports parse-quality stats at the end:
* - per-track: total / mc / open / long / explicit-answer / parsed-answer / unparseable
* - lists tasks where autoparse failed but has no opts and no explicit answer
*/
'use strict';
const fs = require('fs');
const path = require('path');
const db = require('../src/db/db');
const TRACK_VARIANTS_DIR = {
math9: path.join(__dirname, '../../frontend/js/exam9/variants'),
};
const args = process.argv.slice(2).filter(a => !a.startsWith('--'));
const flags = new Set(process.argv.slice(2).filter(a => a.startsWith('--')));
const DRY_RUN = flags.has('--dry');
const VERBOSE = flags.has('--verbose') || flags.has('-v');
/* ── HTML-text extraction from sol-ans div ───────────────────────── */
function extractAnswerText(solHtml) {
if (!solHtml) return null;
const m = solHtml.match(/<div class="sol-ans">([\s\S]*?)<\/div>/);
if (!m) return null;
let raw = m[1]
.replace(/<[^>]+>/g, '') // strip HTML tags
.replace(/&ensp;|&nbsp;|&thinsp;/g, ' ') // common entities
.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
.trim();
raw = raw.replace(/^Ответ[:\s]*/i, '').trim();
return raw || null;
}
/* ── MC letter detector: matches «а)», «б.», «в », etc. ──────────── */
function parseMcLetter(answerText) {
if (!answerText) return null;
const m = answerText.match(/^([а-д])\s*[\)\.]/i);
return m ? m[1].toLowerCase() : null;
}
/* ── Open answer parser: returns a canonical answer string.
Forms supported:
"-2" single integer / decimal
"7500" positive integer
"9/4" fraction (from \dfrac{a}{b})
"-2;4" pair of values (from "x=A и x=B")
Returns null if the answer is too complex (expressions, multiple vars,
inequalities, square roots, intervals). ── */
function parseOpenAnswer(answerText) {
if (!answerText) return null;
// Normalize: strip $...$ and trivial LaTeX spacing
let s = answerText
.replace(/\\;|\\,|\\:|\\ /g, ' ')
.replace(/&ensp;|&nbsp;/g, ' ')
.trim();
// ── Pattern A: \dfrac{a}{b} or \frac{a}{b} as the sole answer
const fracMatch = s.match(/^\$?\\d?frac\{(-?\d+)\}\{(-?\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
if (fracMatch) {
return `${fracMatch[1]}/${fracMatch[2]}`;
}
// Also: "-\dfrac{a}{b}" with sign outside
const negFracMatch = s.match(/^\$?-\\d?frac\{(\d+)\}\{(\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
if (negFracMatch) {
return `-${negFracMatch[1]}/${negFracMatch[2]}`;
}
// ── Pattern B: two roots "$x = A$ и $x = B$" or "x_1=A; x_2=B"
const twoRoots = s.match(/x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)[\s\$]*(?:\sи\s|;)\s*\$?x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)/);
if (twoRoots) {
const a = twoRoots[1].replace(',', '.');
const b = twoRoots[2].replace(',', '.');
return `${a};${b}`;
}
// Strip $...$ for further checks (single-number paths)
s = s.replace(/\$/g, '').trim();
// Reject remaining complex forms
if (/\\dfrac|\\frac|\\sqrt|\\sum|\\int|\\cdot|\\pi/.test(s)) return null;
if (/[<>≤≥]/.test(s)) return null;
if (/\(.*[;,].*\)/.test(s)) return null; // intervals/points
if (s.split(/\s+или\s+|\s+and\s+|\s+и\s+/i).length > 1) return null;
if (/[xyz]\s*_?\d?\s*=.*[xyz]\s*_?\d?\s*=/.test(s)) return null; // multi-var didn't match pattern B
// "X = NUM" → take RHS
const eq = s.match(/=\s*(-?\d+(?:[.,]\d+)?)/);
if (eq) return eq[1].replace(',', '.');
// Single number with optional short unit tail
const single = s.match(/^(-?\d+(?:[.,]\d+)?)(\s*[а-яА-Яa-zA-Z\.²³°%]*\.?)?$/);
if (single) return single[1].replace(',', '.');
// Last try: first number iff rest is short suffix
const first = s.match(/(-?\d+(?:[.,]\d+)?)/);
if (first && first[1].length >= s.length - 8) return first[1].replace(',', '.');
return null;
}
/* ── Load a variant from .js via Function constructor ────────────── */
function loadVariant(dir, n) {
const nn = String(n).padStart(2, '0');
const file = path.join(dir, `v${nn}.js`);
if (!fs.existsSync(file)) return null;
const src = fs.readFileSync(file, 'utf8');
const scope = {};
new Function('VARIANTS', src)(scope);
return scope[n] || null;
}
/* ── Per-task classification + answer extraction ──────────────────── */
function classifyTask(task) {
const sol = task.sol || '';
const ansText = extractAnswerText(sol);
if (Array.isArray(task.opts) && task.opts.length) {
// MC: explicit answer wins, else parse letter from sol-ans
let answer = (typeof task.answer === 'string') ? task.answer.toLowerCase().trim() : null;
let source = 'explicit';
if (!answer || !/^[а-д]$/.test(answer)) {
answer = parseMcLetter(ansText);
source = answer ? 'parsed' : 'failed';
}
return { task_type: 'mc', answer, source, raw_answer: ansText };
}
// Non-MC: try open numeric, then fallback to long
let answer = (typeof task.answer === 'string') ? task.answer.trim() : null;
let source = answer ? 'explicit' : null;
if (!answer) {
answer = parseOpenAnswer(ansText);
source = answer ? 'parsed' : 'failed';
}
if (answer) return { task_type: 'open', answer, source, raw_answer: ansText };
return { task_type: 'long', answer: null, source: 'long', raw_answer: ansText };
}
/* ── Import a single track ────────────────────────────────────────── */
function importTrack(examKey) {
const dir = TRACK_VARIANTS_DIR[examKey];
if (!dir) throw new Error(`Unknown exam_key: ${examKey} (no variants dir mapping)`);
const track = db.prepare('SELECT variants_count FROM exam_tracks WHERE exam_key = ?').get(examKey);
if (!track) throw new Error(`Track not registered in exam_tracks: ${examKey}`);
const stats = {
examKey,
variants: 0,
tasks: 0,
mc: 0, open: 0, long: 0,
mcExplicit: 0, mcParsed: 0, mcFailed: 0,
openExplicit: 0, openParsed: 0,
failedExamples: [], // tasks classified as long where sol-ans existed (potential miss)
};
if (!DRY_RUN) {
db.prepare('DELETE FROM exam_tasks WHERE exam_key = ?').run(examKey);
}
const ins = db.prepare(`
INSERT INTO exam_tasks
(exam_key, variant, task_idx, task_type, text_html, figure_html, opts_json, answer, solution_html)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const writeAll = db.transaction(() => {
for (let n = 1; n <= track.variants_count; n++) {
const v = loadVariant(dir, n);
if (!v || !Array.isArray(v.tasks) || !v.tasks.length) {
if (VERBOSE) console.log(` v${String(n).padStart(2,'0')}: missing/empty — skipped`);
continue;
}
stats.variants++;
v.tasks.forEach((task, idx) => {
const taskIdx = idx + 1;
const cls = classifyTask(task);
stats[cls.task_type]++;
if (cls.task_type === 'mc') {
if (cls.source === 'explicit') stats.mcExplicit++;
else if (cls.source === 'parsed') stats.mcParsed++;
else stats.mcFailed++;
} else if (cls.task_type === 'open') {
if (cls.source === 'explicit') stats.openExplicit++;
else stats.openParsed++;
} else if (cls.task_type === 'long' && cls.raw_answer) {
// Has an answer but we classified as long → likely autoparser missed something
if (stats.failedExamples.length < 20) {
stats.failedExamples.push({ v: n, idx: taskIdx, raw: cls.raw_answer.slice(0, 80) });
}
}
stats.tasks++;
if (!DRY_RUN) {
ins.run(
examKey,
n,
taskIdx,
cls.task_type,
task.text || '',
task.figure || null,
task.opts ? JSON.stringify(task.opts) : null,
cls.answer,
task.sol || ''
);
}
});
}
});
writeAll();
return stats;
}
/* ── Reporting ────────────────────────────────────────────────────── */
function pct(n, total) {
if (!total) return '0%';
return ((n / total) * 100).toFixed(1) + '%';
}
function report(stats) {
const mcTotal = stats.mc;
const openTotal = stats.open;
console.log(`\n═══ ${stats.examKey} ═══`);
console.log(`Variants imported: ${stats.variants}`);
console.log(`Total tasks: ${stats.tasks}`);
console.log(` MC : ${stats.mc} (${pct(stats.mc, stats.tasks)})`);
console.log(` explicit: ${stats.mcExplicit}, parsed: ${stats.mcParsed}, FAILED: ${stats.mcFailed}`);
console.log(` Open : ${stats.open} (${pct(stats.open, stats.tasks)})`);
console.log(` explicit: ${stats.openExplicit}, parsed: ${stats.openParsed}`);
console.log(` Long : ${stats.long} (${pct(stats.long, stats.tasks)})`);
console.log(` ${stats.long - stats.failedExamples.length} truly complex, ${stats.failedExamples.length}+ POTENTIAL autoparse misses`);
if (stats.failedExamples.length) {
console.log(`\nPotential autoparse misses (classified 'long' but had a sol-ans answer):`);
stats.failedExamples.forEach(e => {
console.log(` v${String(e.v).padStart(2,'0')} t${e.idx}: «${e.raw}»`);
});
console.log(`(showing first ${stats.failedExamples.length}; fix by adding answer: '...' field in v*.js task, or relax parser in this script)`);
}
const autoSuccess = stats.mcParsed + stats.openParsed + stats.mcExplicit + stats.openExplicit;
const checkable = mcTotal + openTotal;
console.log(`\nAutocheckable tasks (mc+open): ${checkable} / ${stats.tasks} (${pct(checkable, stats.tasks)})`);
console.log(`Of those, answer determined: ${autoSuccess} (${pct(autoSuccess, checkable)})`);
}
/* ── Main ─────────────────────────────────────────────────────────── */
function main() {
const targets = args.length ? args : Object.keys(TRACK_VARIANTS_DIR);
console.log(`[import-exam-tasks] Targets: ${targets.join(', ')}${DRY_RUN ? ' (DRY RUN)' : ''}`);
for (const examKey of targets) {
try {
const stats = importTrack(examKey);
report(stats);
} catch (e) {
console.error(`[${examKey}] FAILED: ${e.message}`);
process.exitCode = 1;
}
}
if (DRY_RUN) console.log(`\n[DRY RUN] No changes written to DB.`);
}
main();