Learn_System/backend/scripts/import-exam-tasks.js

#!/usr/bin/env node
/**
 * import-exam-tasks.js — imports tasks from /frontend/js/exam9/variants/*.js
 * into the generic exam_tasks table for the exam-prep module.
 *
 * Usage:
 *   node backend/scripts/import-exam-tasks.js              # all enabled tracks
 *   node backend/scripts/import-exam-tasks.js math9        # one specific track
 *   node backend/scripts/import-exam-tasks.js math9 --dry  # don't write, only report parse stats
 *
 * Idempotent: deletes existing exam_tasks rows for the target exam_key before inserting.
 *
 * For each variant V it produces tasks_per_variant rows in exam_tasks. For each task:
 *   - task_type: 'mc' if has opts; 'open' if sol-ans parses to a clean numeric/short value; 'long' otherwise
 *   - answer: explicit task.answer if present; else autoparsed from <div class="sol-ans">
 *   - text_html / figure_html / opts_json / solution_html — direct
 *
 * Reports parse-quality stats at the end:
 *   - per-track: total / mc / open / long / explicit-answer / parsed-answer / unparseable
 *   - lists tasks where autoparse failed but has no opts and no explicit answer
 */
'use strict';
const fs   = require('fs');
const path = require('path');
const db   = require('../src/db/db');

const TRACK_VARIANTS_DIR = {
  math9: path.join(__dirname, '../../frontend/js/exam9/variants'),
};

const args      = process.argv.slice(2).filter(a => !a.startsWith('--'));
const flags     = new Set(process.argv.slice(2).filter(a => a.startsWith('--')));
const DRY_RUN   = flags.has('--dry');
const VERBOSE   = flags.has('--verbose') || flags.has('-v');

/* ── HTML-text extraction from sol-ans div ───────────────────────── */
function extractAnswerText(solHtml) {
  if (!solHtml) return null;
  const m = solHtml.match(/<div class="sol-ans">([\s\S]*?)<\/div>/);
  if (!m) return null;
  let raw = m[1]
    .replace(/<[^>]+>/g, '')                    // strip HTML tags
    .replace(/&ensp;|&nbsp;|&thinsp;/g, ' ')    // common entities
    .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
    .trim();
  raw = raw.replace(/^Ответ[:\s]*/i, '').trim();
  return raw || null;
}

/* ── MC letter detector: matches «а)», «б.», «в », etc. ──────────── */
function parseMcLetter(answerText) {
  if (!answerText) return null;
  const m = answerText.match(/^([а-д])\s*[\)\.]/i);
  return m ? m[1].toLowerCase() : null;
}

/* ── Open answer parser: returns a canonical answer string.
   Forms supported:
     "-2"           single integer / decimal
     "7500"         positive integer
     "9/4"          fraction (from \dfrac{a}{b})
     "-2;4"         pair of values (from "x=A и x=B")
   Returns null if the answer is too complex (expressions, multiple vars,
   inequalities, square roots, intervals).                              ── */
function parseOpenAnswer(answerText) {
  if (!answerText) return null;

  // Normalize: strip $...$ and trivial LaTeX spacing
  let s = answerText
    .replace(/\\;|\\,|\\:|\\ /g, ' ')
    .replace(/&ensp;|&nbsp;/g, ' ')
    .trim();

  // ── Pattern A: \dfrac{a}{b} or \frac{a}{b} as the sole answer
  const fracMatch = s.match(/^\$?\\d?frac\{(-?\d+)\}\{(-?\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
  if (fracMatch) {
    return `${fracMatch[1]}/${fracMatch[2]}`;
  }
  // Also: "-\dfrac{a}{b}" with sign outside
  const negFracMatch = s.match(/^\$?-\\d?frac\{(\d+)\}\{(\d+)\}\$?(?:\s*[а-яА-Яa-zA-Z²³°%\.]*\.?)?$/);
  if (negFracMatch) {
    return `-${negFracMatch[1]}/${negFracMatch[2]}`;
  }

  // ── Pattern B: two roots "$x = A$ и $x = B$" or "x_1=A; x_2=B"
  const twoRoots = s.match(/x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)[\s\$]*(?:\sи\s|;)\s*\$?x\s*_?\d?\s*=\s*(-?\d+(?:[.,]\d+)?)/);
  if (twoRoots) {
    const a = twoRoots[1].replace(',', '.');
    const b = twoRoots[2].replace(',', '.');
    return `${a};${b}`;
  }

  // Strip $...$ for further checks (single-number paths)
  s = s.replace(/\$/g, '').trim();

  // Reject remaining complex forms
  if (/\\dfrac|\\frac|\\sqrt|\\sum|\\int|\\cdot|\\pi/.test(s)) return null;
  if (/[<>≤≥]/.test(s)) return null;
  if (/\(.*[;,].*\)/.test(s)) return null;                       // intervals/points
  if (s.split(/\s+или\s+|\s+and\s+|\s+и\s+/i).length > 1) return null;
  if (/[xyz]\s*_?\d?\s*=.*[xyz]\s*_?\d?\s*=/.test(s)) return null; // multi-var didn't match pattern B

  // "X = NUM" → take RHS
  const eq = s.match(/=\s*(-?\d+(?:[.,]\d+)?)/);
  if (eq) return eq[1].replace(',', '.');

  // Single number with optional short unit tail
  const single = s.match(/^(-?\d+(?:[.,]\d+)?)(\s*[а-яА-Яa-zA-Z\.²³°%]*\.?)?$/);
  if (single) return single[1].replace(',', '.');

  // Last try: first number iff rest is short suffix
  const first = s.match(/(-?\d+(?:[.,]\d+)?)/);
  if (first && first[1].length >= s.length - 8) return first[1].replace(',', '.');

  return null;
}

/* ── Load a variant from .js via Function constructor ────────────── */
function loadVariant(dir, n) {
  const nn = String(n).padStart(2, '0');
  const file = path.join(dir, `v${nn}.js`);
  if (!fs.existsSync(file)) return null;
  const src = fs.readFileSync(file, 'utf8');
  const scope = {};
  new Function('VARIANTS', src)(scope);
  return scope[n] || null;
}

/* ── Per-task classification + answer extraction ──────────────────── */
function classifyTask(task) {
  const sol     = task.sol || '';
  const ansText = extractAnswerText(sol);

  if (Array.isArray(task.opts) && task.opts.length) {
    // MC: explicit answer wins, else parse letter from sol-ans
    let answer = (typeof task.answer === 'string') ? task.answer.toLowerCase().trim() : null;
    let source = 'explicit';
    if (!answer || !/^[а-д]$/.test(answer)) {
      answer = parseMcLetter(ansText);
      source = answer ? 'parsed' : 'failed';
    }
    return { task_type: 'mc', answer, source, raw_answer: ansText };
  }

  // Non-MC: try open numeric, then fallback to long
  let answer = (typeof task.answer === 'string') ? task.answer.trim() : null;
  let source = answer ? 'explicit' : null;

  if (!answer) {
    answer = parseOpenAnswer(ansText);
    source = answer ? 'parsed' : 'failed';
  }

  if (answer) return { task_type: 'open', answer, source, raw_answer: ansText };
  return { task_type: 'long', answer: null, source: 'long', raw_answer: ansText };
}

/* ── Import a single track ────────────────────────────────────────── */
function importTrack(examKey) {
  const dir = TRACK_VARIANTS_DIR[examKey];
  if (!dir) throw new Error(`Unknown exam_key: ${examKey} (no variants dir mapping)`);

  const track = db.prepare('SELECT variants_count FROM exam_tracks WHERE exam_key = ?').get(examKey);
  if (!track) throw new Error(`Track not registered in exam_tracks: ${examKey}`);

  const stats = {
    examKey,
    variants: 0,
    tasks: 0,
    mc: 0, open: 0, long: 0,
    mcExplicit: 0, mcParsed: 0, mcFailed: 0,
    openExplicit: 0, openParsed: 0,
    failedExamples: [],          // tasks classified as long where sol-ans existed (potential miss)
  };

  if (!DRY_RUN) {
    db.prepare('DELETE FROM exam_tasks WHERE exam_key = ?').run(examKey);
  }

  const ins = db.prepare(`
    INSERT INTO exam_tasks
      (exam_key, variant, task_idx, task_type, text_html, figure_html, opts_json, answer, solution_html)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
  `);

  const writeAll = db.transaction(() => {
    for (let n = 1; n <= track.variants_count; n++) {
      const v = loadVariant(dir, n);
      if (!v || !Array.isArray(v.tasks) || !v.tasks.length) {
        if (VERBOSE) console.log(`  v${String(n).padStart(2,'0')}: missing/empty — skipped`);
        continue;
      }
      stats.variants++;

      v.tasks.forEach((task, idx) => {
        const taskIdx = idx + 1;
        const cls     = classifyTask(task);

        stats[cls.task_type]++;
        if (cls.task_type === 'mc') {
          if (cls.source === 'explicit') stats.mcExplicit++;
          else if (cls.source === 'parsed') stats.mcParsed++;
          else stats.mcFailed++;
        } else if (cls.task_type === 'open') {
          if (cls.source === 'explicit') stats.openExplicit++;
          else stats.openParsed++;
        } else if (cls.task_type === 'long' && cls.raw_answer) {
          // Has an answer but we classified as long → likely autoparser missed something
          if (stats.failedExamples.length < 20) {
            stats.failedExamples.push({ v: n, idx: taskIdx, raw: cls.raw_answer.slice(0, 80) });
          }
        }
        stats.tasks++;

        if (!DRY_RUN) {
          ins.run(
            examKey,
            n,
            taskIdx,
            cls.task_type,
            task.text || '',
            task.figure || null,
            task.opts ? JSON.stringify(task.opts) : null,
            cls.answer,
            task.sol || ''
          );
        }
      });
    }
  });

  writeAll();
  return stats;
}

/* ── Reporting ────────────────────────────────────────────────────── */
function pct(n, total) {
  if (!total) return '0%';
  return ((n / total) * 100).toFixed(1) + '%';
}

function report(stats) {
  const mcTotal   = stats.mc;
  const openTotal = stats.open;
  console.log(`\n═══ ${stats.examKey} ═══`);
  console.log(`Variants imported: ${stats.variants}`);
  console.log(`Total tasks: ${stats.tasks}`);
  console.log(`  MC   : ${stats.mc}  (${pct(stats.mc, stats.tasks)})`);
  console.log(`         explicit: ${stats.mcExplicit}, parsed: ${stats.mcParsed}, FAILED: ${stats.mcFailed}`);
  console.log(`  Open : ${stats.open}  (${pct(stats.open, stats.tasks)})`);
  console.log(`         explicit: ${stats.openExplicit}, parsed: ${stats.openParsed}`);
  console.log(`  Long : ${stats.long}  (${pct(stats.long, stats.tasks)})`);
  console.log(`         ${stats.long - stats.failedExamples.length} truly complex, ${stats.failedExamples.length}+ POTENTIAL autoparse misses`);

  if (stats.failedExamples.length) {
    console.log(`\nPotential autoparse misses (classified 'long' but had a sol-ans answer):`);
    stats.failedExamples.forEach(e => {
      console.log(`  v${String(e.v).padStart(2,'0')} t${e.idx}: «${e.raw}»`);
    });
    console.log(`(showing first ${stats.failedExamples.length}; fix by adding answer: '...' field in v*.js task, or relax parser in this script)`);
  }

  const autoSuccess = stats.mcParsed + stats.openParsed + stats.mcExplicit + stats.openExplicit;
  const checkable   = mcTotal + openTotal;
  console.log(`\nAutocheckable tasks (mc+open): ${checkable} / ${stats.tasks} (${pct(checkable, stats.tasks)})`);
  console.log(`Of those, answer determined: ${autoSuccess} (${pct(autoSuccess, checkable)})`);
}

/* ── Main ─────────────────────────────────────────────────────────── */
function main() {
  const targets = args.length ? args : Object.keys(TRACK_VARIANTS_DIR);
  console.log(`[import-exam-tasks] Targets: ${targets.join(', ')}${DRY_RUN ? ' (DRY RUN)' : ''}`);

  for (const examKey of targets) {
    try {
      const stats = importTrack(examKey);
      report(stats);
    } catch (e) {
      console.error(`[${examKey}] FAILED: ${e.message}`);
      process.exitCode = 1;
    }
  }

  if (DRY_RUN) console.log(`\n[DRY RUN] No changes written to DB.`);
}

main();