feat(exam-prep F0): миграция 022 + импорт-скрипт (800 задач math9, 76% автопроверяемые)

2026-05-29 10:04:30 +03:00
parent b76315573c
commit 948b831273
3 changed files with 413 additions and 0 deletions
@@ -13,6 +13,7 @@
    "seed:permissions": "node src/db/seed-permissions.js",
    "lint:routes": "node scripts/check-route-auth.js",
    "import:content": "node scripts/import-content.js",
+    "import:exam-tasks": "node scripts/import-exam-tasks.js",
    "test": "node --test tests/*.test.js",
    "hooks:install": "sh ../scripts/install-hooks.sh"
  },
@@ -0,0 +1,287 @@
+#!/usr/bin/env node
+/**
+ * import-exam-tasks.js — imports tasks from /frontend/js/exam9/variants/*.js
+ * into the generic exam_tasks table for the exam-prep module.
+ *
+ * Usage:
+ *   node backend/scripts/import-exam-tasks.js              # all enabled tracks
+ *   node backend/scripts/import-exam-tasks.js math9        # one specific track
+ *   node backend/scripts/import-exam-tasks.js math9 --dry  # don't write, only report parse stats
+ *
+ * Idempotent: deletes existing exam_tasks rows for the target exam_key before inserting.
+ *
+ * For each variant V it produces tasks_per_variant rows in exam_tasks. For each task:
+ *   - task_type: 'mc' if has opts; 'open' if sol-ans parses to a clean numeric/short value; 'long' otherwise
+ *   - answer: explicit task.answer if present; else autoparsed from <div class="sol-ans">
+ *   - text_html / figure_html / opts_json / solution_html — direct
+ *
+ * Reports parse-quality stats at the end:
+ *   - per-track: total / mc / open / long / explicit-answer / parsed-answer / unparseable
+ *   - lists tasks where autoparse failed but has no opts and no explicit answer
+ */
+'use strict';
+const fs   = require('fs');
+const path = require('path');
+const db   = require('../src/db/db');
+
+const TRACK_VARIANTS_DIR = {
+  math9: path.join(__dirname, '../../frontend/js/exam9/variants'),
+};
+
+const args      = process.argv.slice(2).filter(a => !a.startsWith('--'));
+const flags     = new Set(process.argv.slice(2).filter(a => a.startsWith('--')));
+const DRY_RUN   = flags.has('--dry');
+const VERBOSE   = flags.has('--verbose') || flags.has('-v');
+
+/* ── HTML-text extraction from sol-ans div ───────────────────────── */
+function extractAnswerText(solHtml) {
+  if (!solHtml) return null;
+  const m = solHtml.match(/<div class="sol-ans">([\s\S]*?)<\/div>/);
+  if (!m) return null;
+  let raw = m[1]
+    .replace(/<[^>]+>/g, '')                    // strip HTML tags
+    .replace(/&ensp;|&nbsp;|&thinsp;/g, ' ')    // common entities
+    .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
+    .trim();
+  raw = raw.replace(/^Ответ[:\s]*/i, '').trim();
+  return raw || null;
+}
+
+/* ── MC letter detector: matches «а)», «б.», «в », etc. ──────────── */
+function parseMcLetter(answerText) {
+  if (!answerText) return null;
+  const m = answerText.match(/^([а-д])\s*[\)\.]/i);
+  return m ? m[1].toLowerCase() : null;
+}
+
+/* ── Open answer parser: returns a canonical answer string.
+   Forms supported:
+     "-2"           single integer / decimal
+     "7500"         positive integer
+     "9/4"          fraction (from \dfrac{a}{b})
+     "-2;4"         pair of values (from "x=A и x=B")
+   Returns null if the answer is too complex (expressions, multiple vars,
+   inequalities, square roots, intervals).                              ── */
+function parseOpenAnswer(answerText) {
+  if (!answerText) return null;
+
+  // Normalize: strip $...$ and trivial LaTeX spacing
+  let s = answerText
+    .replace(/\\;|\\,|\\:|\\ /g, ' ')
+    .replace(/&ensp;|&nbsp;/g, ' ')
+    .trim();
+
+  // ── Pattern A: \dfrac{a}{b} or \frac{a}{b} as the sole answer
+  const fracMatch = s.match(/^\$?\\d?frac\{(-?\d+)\}\{(-?\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
+  if (fracMatch) {
+    return `${fracMatch[1]}/${fracMatch[2]}`;
+  }
+  // Also: "-\dfrac{a}{b}" with sign outside
+  const negFracMatch = s.match(/^\$?-\\d?frac\{(\d+)\}\{(\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
+  if (negFracMatch) {
+    return `-${negFracMatch[1]}/${negFracMatch[2]}`;
+  }
+
+  // ── Pattern B: two roots "$x = A$ и $x = B$" or "x_1=A; x_2=B"
+  const twoRoots = s.match(/x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)[\s\$]*(?:\sи\s|;)\s*\$?x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)/);
+  if (twoRoots) {
+    const a = twoRoots[1].replace(',', '.');
+    const b = twoRoots[2].replace(',', '.');
+    return `${a};${b}`;
+  }
+
+  // Strip $...$ for further checks (single-number paths)
+  s = s.replace(/\$/g, '').trim();
+
+  // Reject remaining complex forms
+  if (/\\dfrac|\\frac|\\sqrt|\\sum|\\int|\\cdot|\\pi/.test(s)) return null;
+  if (/[<>≤≥]/.test(s)) return null;
+  if (/\(.*[;,].*\)/.test(s)) return null;                       // intervals/points
+  if (s.split(/\s+или\s+|\s+and\s+|\s+и\s+/i).length > 1) return null;
+  if (/[xyz]\s*_?\d?\s*=.*[xyz]\s*_?\d?\s*=/.test(s)) return null; // multi-var didn't match pattern B
+
+  // "X = NUM" → take RHS
+  const eq = s.match(/=\s*(-?\d+(?:[.,]\d+)?)/);
+  if (eq) return eq[1].replace(',', '.');
+
+  // Single number with optional short unit tail
+  const single = s.match(/^(-?\d+(?:[.,]\d+)?)(\s*[а-яА-Яa-zA-Z\.²³°%]*\.?)?$/);
+  if (single) return single[1].replace(',', '.');
+
+  // Last try: first number iff rest is short suffix
+  const first = s.match(/(-?\d+(?:[.,]\d+)?)/);
+  if (first && first[1].length >= s.length - 8) return first[1].replace(',', '.');
+
+  return null;
+}
+
+/* ── Load a variant from .js via Function constructor ────────────── */
+function loadVariant(dir, n) {
+  const nn = String(n).padStart(2, '0');
+  const file = path.join(dir, `v${nn}.js`);
+  if (!fs.existsSync(file)) return null;
+  const src = fs.readFileSync(file, 'utf8');
+  const scope = {};
+  new Function('VARIANTS', src)(scope);
+  return scope[n] || null;
+}
+
+/* ── Per-task classification + answer extraction ──────────────────── */
+function classifyTask(task) {
+  const sol     = task.sol || '';
+  const ansText = extractAnswerText(sol);
+
+  if (Array.isArray(task.opts) && task.opts.length) {
+    // MC: explicit answer wins, else parse letter from sol-ans
+    let answer = (typeof task.answer === 'string') ? task.answer.toLowerCase().trim() : null;
+    let source = 'explicit';
+    if (!answer || !/^[а-д]$/.test(answer)) {
+      answer = parseMcLetter(ansText);
+      source = answer ? 'parsed' : 'failed';
+    }
+    return { task_type: 'mc', answer, source, raw_answer: ansText };
+  }
+
+  // Non-MC: try open numeric, then fallback to long
+  let answer = (typeof task.answer === 'string') ? task.answer.trim() : null;
+  let source = answer ? 'explicit' : null;
+
+  if (!answer) {
+    answer = parseOpenAnswer(ansText);
+    source = answer ? 'parsed' : 'failed';
+  }
+
+  if (answer) return { task_type: 'open', answer, source, raw_answer: ansText };
+  return { task_type: 'long', answer: null, source: 'long', raw_answer: ansText };
+}
+
+/* ── Import a single track ────────────────────────────────────────── */
+function importTrack(examKey) {
+  const dir = TRACK_VARIANTS_DIR[examKey];
+  if (!dir) throw new Error(`Unknown exam_key: ${examKey} (no variants dir mapping)`);
+
+  const track = db.prepare('SELECT variants_count FROM exam_tracks WHERE exam_key = ?').get(examKey);
+  if (!track) throw new Error(`Track not registered in exam_tracks: ${examKey}`);
+
+  const stats = {
+    examKey,
+    variants: 0,
+    tasks: 0,
+    mc: 0, open: 0, long: 0,
+    mcExplicit: 0, mcParsed: 0, mcFailed: 0,
+    openExplicit: 0, openParsed: 0,
+    failedExamples: [],          // tasks classified as long where sol-ans existed (potential miss)
+  };
+
+  if (!DRY_RUN) {
+    db.prepare('DELETE FROM exam_tasks WHERE exam_key = ?').run(examKey);
+  }
+
+  const ins = db.prepare(`
+    INSERT INTO exam_tasks
+      (exam_key, variant, task_idx, task_type, text_html, figure_html, opts_json, answer, solution_html)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+  `);
+
+  const writeAll = db.transaction(() => {
+    for (let n = 1; n <= track.variants_count; n++) {
+      const v = loadVariant(dir, n);
+      if (!v || !Array.isArray(v.tasks) || !v.tasks.length) {
+        if (VERBOSE) console.log(`  v${String(n).padStart(2,'0')}: missing/empty — skipped`);
+        continue;
+      }
+      stats.variants++;
+
+      v.tasks.forEach((task, idx) => {
+        const taskIdx = idx + 1;
+        const cls     = classifyTask(task);
+
+        stats[cls.task_type]++;
+        if (cls.task_type === 'mc') {
+          if (cls.source === 'explicit') stats.mcExplicit++;
+          else if (cls.source === 'parsed') stats.mcParsed++;
+          else stats.mcFailed++;
+        } else if (cls.task_type === 'open') {
+          if (cls.source === 'explicit') stats.openExplicit++;
+          else stats.openParsed++;
+        } else if (cls.task_type === 'long' && cls.raw_answer) {
+          // Has an answer but we classified as long → likely autoparser missed something
+          if (stats.failedExamples.length < 20) {
+            stats.failedExamples.push({ v: n, idx: taskIdx, raw: cls.raw_answer.slice(0, 80) });
+          }
+        }
+        stats.tasks++;
+
+        if (!DRY_RUN) {
+          ins.run(
+            examKey,
+            n,
+            taskIdx,
+            cls.task_type,
+            task.text || '',
+            task.figure || null,
+            task.opts ? JSON.stringify(task.opts) : null,
+            cls.answer,
+            task.sol || ''
+          );
+        }
+      });
+    }
+  });
+
+  writeAll();
+  return stats;
+}
+
+/* ── Reporting ────────────────────────────────────────────────────── */
+function pct(n, total) {
+  if (!total) return '0%';
+  return ((n / total) * 100).toFixed(1) + '%';
+}
+
+function report(stats) {
+  const mcTotal   = stats.mc;
+  const openTotal = stats.open;
+  console.log(`\n═══ ${stats.examKey} ═══`);
+  console.log(`Variants imported: ${stats.variants}`);
+  console.log(`Total tasks: ${stats.tasks}`);
+  console.log(`  MC   : ${stats.mc}  (${pct(stats.mc, stats.tasks)})`);
+  console.log(`         explicit: ${stats.mcExplicit}, parsed: ${stats.mcParsed}, FAILED: ${stats.mcFailed}`);
+  console.log(`  Open : ${stats.open}  (${pct(stats.open, stats.tasks)})`);
+  console.log(`         explicit: ${stats.openExplicit}, parsed: ${stats.openParsed}`);
+  console.log(`  Long : ${stats.long}  (${pct(stats.long, stats.tasks)})`);
+  console.log(`         ${stats.long - stats.failedExamples.length} truly complex, ${stats.failedExamples.length}+ POTENTIAL autoparse misses`);
+
+  if (stats.failedExamples.length) {
+    console.log(`\nPotential autoparse misses (classified 'long' but had a sol-ans answer):`);
+    stats.failedExamples.forEach(e => {
+      console.log(`  v${String(e.v).padStart(2,'0')} t${e.idx}: «${e.raw}»`);
+    });
+    console.log(`(showing first ${stats.failedExamples.length}; fix by adding answer: '...' field in v*.js task, or relax parser in this script)`);
+  }
+
+  const autoSuccess = stats.mcParsed + stats.openParsed + stats.mcExplicit + stats.openExplicit;
+  const checkable   = mcTotal + openTotal;
+  console.log(`\nAutocheckable tasks (mc+open): ${checkable} / ${stats.tasks} (${pct(checkable, stats.tasks)})`);
+  console.log(`Of those, answer determined: ${autoSuccess} (${pct(autoSuccess, checkable)})`);
+}
+
+/* ── Main ─────────────────────────────────────────────────────────── */
+function main() {
+  const targets = args.length ? args : Object.keys(TRACK_VARIANTS_DIR);
+  console.log(`[import-exam-tasks] Targets: ${targets.join(', ')}${DRY_RUN ? ' (DRY RUN)' : ''}`);
+
+  for (const examKey of targets) {
+    try {
+      const stats = importTrack(examKey);
+      report(stats);
+    } catch (e) {
+      console.error(`[${examKey}] FAILED: ${e.message}`);
+      process.exitCode = 1;
+    }
+  }
+
+  if (DRY_RUN) console.log(`\n[DRY RUN] No changes written to DB.`);
+}
+
+main();
@@ -0,0 +1,125 @@
+-- ═══════════════════════════════════════════════════════════════
+-- 022: Exam Preparation Module
+--
+-- Generic exam preparation infrastructure parameterized by exam_key.
+-- Supports multiple exams (math9, phys9, chem9, math11ce, etc.) sharing
+-- the same task bank, attempts tracking, mock sessions, and study plans.
+--
+-- See docs/exam-prep-plan.md for full architecture.
+-- ═══════════════════════════════════════════════════════════════
+
+-- ── Exam tracks registry ────────────────────────────────────────
+CREATE TABLE exam_tracks (
+  exam_key          TEXT PRIMARY KEY,
+  title             TEXT NOT NULL,
+  subject_slug      TEXT NOT NULL,
+  grade             INTEGER NOT NULL,
+  duration_min      INTEGER NOT NULL,
+  tasks_per_variant INTEGER NOT NULL,
+  variants_count    INTEGER NOT NULL,
+  scoring_json      TEXT,
+  intro_html        TEXT,
+  enabled           INTEGER NOT NULL DEFAULT 1,
+  sort_order        INTEGER NOT NULL DEFAULT 0
+);
+
+-- ── Task bank (one row per task across all variants) ────────────
+CREATE TABLE exam_tasks (
+  id            INTEGER PRIMARY KEY,
+  exam_key      TEXT NOT NULL REFERENCES exam_tracks(exam_key) ON DELETE CASCADE,
+  variant       INTEGER NOT NULL,
+  task_idx      INTEGER NOT NULL,
+  task_type     TEXT NOT NULL CHECK (task_type IN ('mc','open','long')),
+  text_html     TEXT NOT NULL,
+  figure_html   TEXT,
+  opts_json     TEXT,
+  answer        TEXT,
+  solution_html TEXT NOT NULL,
+  topic         TEXT,
+  subtopic      TEXT,
+  difficulty    INTEGER,
+  UNIQUE(exam_key, variant, task_idx)
+);
+CREATE INDEX idx_exam_tasks_topic   ON exam_tasks(exam_key, topic);
+CREATE INDEX idx_exam_tasks_variant ON exam_tasks(exam_key, variant);
+CREATE INDEX idx_exam_tasks_type    ON exam_tasks(exam_key, task_type);
+
+-- ── User attempts ───────────────────────────────────────────────
+CREATE TABLE exam_attempts (
+  id              INTEGER PRIMARY KEY,
+  user_id         INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+  exam_task_id    INTEGER NOT NULL REFERENCES exam_tasks(id) ON DELETE CASCADE,
+  user_answer     TEXT,
+  is_correct      INTEGER,
+  time_ms         INTEGER,
+  mode            TEXT NOT NULL CHECK (mode IN ('practice','variant','topic','mock')),
+  session_id      INTEGER,
+  hint_used       INTEGER NOT NULL DEFAULT 0,
+  solution_viewed INTEGER NOT NULL DEFAULT 0,
+  created_at      INTEGER NOT NULL
+);
+CREATE INDEX idx_exam_attempts_user_time ON exam_attempts(user_id, created_at DESC);
+CREATE INDEX idx_exam_attempts_task      ON exam_attempts(exam_task_id);
+CREATE INDEX idx_exam_attempts_session   ON exam_attempts(session_id);
+CREATE INDEX idx_exam_attempts_user_task ON exam_attempts(user_id, exam_task_id, created_at DESC);
+
+-- ── Mock exam sessions ──────────────────────────────────────────
+CREATE TABLE exam_mock_sessions (
+  id                   INTEGER PRIMARY KEY,
+  user_id              INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+  exam_key             TEXT NOT NULL,
+  variant              INTEGER,
+  source               TEXT NOT NULL CHECK (source IN ('variant','random','weak-topics')),
+  task_ids_json        TEXT NOT NULL,
+  started_at           INTEGER NOT NULL,
+  finished_at          INTEGER,
+  duration_planned_min INTEGER NOT NULL,
+  score                INTEGER,
+  total_correct        INTEGER,
+  total_tasks          INTEGER,
+  status               TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active','finished','abandoned'))
+);
+CREATE INDEX idx_mock_user ON exam_mock_sessions(user_id, started_at DESC);
+
+-- ── User preparation plan ───────────────────────────────────────
+CREATE TABLE exam_user_plan (
+  user_id      INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
+  exam_key     TEXT NOT NULL,
+  exam_date    TEXT NOT NULL,
+  daily_target INTEGER NOT NULL DEFAULT 10,
+  weak_focus   INTEGER NOT NULL DEFAULT 1,
+  created_at   INTEGER NOT NULL,
+  updated_at   INTEGER NOT NULL,
+  PRIMARY KEY (user_id, exam_key)
+);
+
+-- ── Topic dictionary (populated in F6) ──────────────────────────
+CREATE TABLE exam_topics (
+  slug         TEXT PRIMARY KEY,
+  exam_key     TEXT NOT NULL,
+  parent_slug  TEXT,
+  title        TEXT NOT NULL,
+  description  TEXT,
+  sort_order   INTEGER NOT NULL DEFAULT 0
+);
+CREATE INDEX idx_topics_exam ON exam_topics(exam_key, parent_slug);
+
+-- ── Seed: math9 track ───────────────────────────────────────────
+-- Placeholder scoring grid (simple proportional). Replace with the actual
+-- Belarus 9th-grade math exam grid once published per academic year.
+INSERT INTO exam_tracks (
+  exam_key, title, subject_slug, grade, duration_min,
+  tasks_per_variant, variants_count, scoring_json, intro_html, enabled, sort_order
+) VALUES (
+  'math9',
+  'Экзамен 9 класс — Математика',
+  'math',
+  9,
+  180,
+  10,
+  80,
+  '[{"correct":30,"score":10},{"correct":27,"score":9},{"correct":24,"score":8},{"correct":21,"score":7},{"correct":18,"score":6},{"correct":15,"score":5},{"correct":12,"score":4},{"correct":9,"score":3},{"correct":6,"score":2},{"correct":3,"score":1},{"correct":0,"score":0}]',
+  '<p>Полная подготовка к выпускному экзамену по математике за 9 класс. 80 реальных вариантов, разбор каждого задания, тренажёр по темам, пробные экзамены с таймером.</p>',
+  1,
+  10
+);