#!/usr/bin/env node /** * gen-exam-textbook-sections.js * * Regenerates the §-section taxonomy of the grades 5-9 math-family textbooks, * used by tag-exam-textbook.js (the exam→textbook classifier). * * Outputs: * backend/scripts/exam-textbook-sections.json — machine-readable (the classifier reads this) * plans/exam-textbook-links/taxonomy.md — human-readable reference * * Re-run whenever a grade 5-9 algebra/geometry/math chapter gains or renames a §. * Note: math-5/6 are engine-rendered (math6_engine.js builds
* from window.M6.paras) — their §s are NOT extracted statically here (emitted with * engine:'math6' marker); the classifier links them at chapter level. * * Usage: node backend/scripts/gen-exam-textbook-sections.js */ 'use strict'; const fs = require('fs'); const path = require('path'); const DIR = path.join(__dirname, '../../frontend/textbooks'); const OUT_MD = path.join(__dirname, '../../plans/exam-textbook-links/taxonomy.md'); const OUT_JSON = path.join(__dirname, 'exam-textbook-sections.json'); // chapter slug -> html file (from the textbooks table). Order = teaching order. const CHAPTERS = [ ['math-5-ch1', 'math_5_ch1.html'], ['math-5-ch2', 'math_5_ch2.html'], ['math-5-ch3', 'math_5_ch3.html'], ['math-6-ch1', 'math_6_ch1.html'], ['math-6-ch2', 'math_6_ch2.html'], ['math-6-ch3', 'math_6_ch3.html'], ['math-6-ch4', 'math_6_ch4.html'], ['math-6-ch5', 'math_6_ch5.html'], ['math-6-ch6', 'math_6_ch6.html'], ['algebra-7-ch1', 'algebra_7_ch1.html'], ['algebra-7-ch2', 'algebra_7_ch2.html'], ['algebra-7-ch3', 'algebra_7_ch3.html'], ['algebra-7-ch4', 'algebra_7_ch4.html'], ['geometry-7-ch1', 'geometry_7_ch1.html'], ['geometry-7-ch2', 'geometry_7_ch2.html'], ['geometry-7-ch3', 'geometry_7_ch3.html'], ['geometry-7-ch4', 'geometry_7_ch4.html'], ['geometry-7-ch5', 'geometry_7_ch5.html'], ['algebra-8-ch1', 'algebra_8.html'], ['algebra-8-ch2', 'algebra_8_ch2.html'], ['algebra-8-ch3', 'algebra_8_ch3.html'], ['geometry-8-ch1', 'geometry_8_ch1.html'], ['geometry-8-ch2', 'geometry_8_ch2.html'], ['geometry-8-ch3', 'geometry_8_ch3.html'], ['geometry-8-ch4', 'geometry_8_ch4.html'], ['algebra-9-ch1', 'algebra_9_ch1.html'], ['algebra-9-ch2', 'algebra_9_ch2.html'], ['algebra-9-ch3', 'algebra_9_ch3.html'], ['algebra-9-ch4', 'algebra_9_ch4.html'], ['geometry-9-ch1', 'geometry_9_ch1.html'], ['geometry-9-ch2', 'geometry_9_ch2.html'], ['geometry-9-ch3', 'geometry_9_ch3.html'], ['geometry-9-ch4', 'geometry_9_ch4.html'], ]; function strip(html) { return String(html).replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim(); } const lines = ['# §-таксономия учебников 5–9 (математика) — эталон для классификатора экзамена math9', '']; const json = []; // [{book, chapter_slug, subject, grade, para_id, num, title}] let prevBook = ''; for (const [slug, file] of CHAPTERS) { const book = slug.replace(/-ch\d+$/, ''); const subject = book.replace(/-\d+$/, ''); // math|algebra|geometry const grade = Number((book.match(/-(\d+)$/) || [])[1]) || null; if (book !== prevBook) { lines.push(`\n## ${book}`); prevBook = book; } const p = path.join(DIR, file); if (!fs.existsSync(p)) { lines.push(`### ${slug} (FILE MISSING: ${file})`); continue; } const html = fs.readFileSync(p, 'utf8'); const tm = html.match(/([^<]*)<\/title>/i); lines.push(`### ${slug} — ${tm ? strip(tm[1]) : file}`); const secRe = /<(?:section|div)\b[^>]*\sid="(sec-(?:p\d+|final\d*|[a-z0-9-]+))"[^>]*>/gi; let m; const secs = []; while ((m = secRe.exec(html)) !== null) secs.push({ id: m[1], start: m.index }); if (!secs.length) { lines.push(` (движок math6: статических sec[id] нет; якоря строятся из window.M6.paras → id="sec-<p.id>")`); json.push({ book, chapter_slug: slug, subject, grade, engine: 'math6', note: 'paras in window.M6 config; anchors sec-<p.id>' }); continue; } for (let i = 0; i < secs.length; i++) { const seg = html.slice(secs[i].start, secs[i + 1] ? secs[i + 1].start : secs[i].start + 4000); const numM = seg.match(/class="sec-num"[^>]*>([\s\S]*?)<\//i); const hM = seg.match(/class="sec-h"[^>]*>([\s\S]*?)<\//i); const paraId = secs[i].id.replace(/^sec-/, ''); // p10 | final3 const num = numM ? strip(numM[1]) : ''; const title = hM ? strip(hM[1]) : ''; lines.push(` ${secs[i].id.padEnd(12)} ${num ? '['+num+'] ' : ''}${title}`); if (/^p\d+$/.test(paraId)) { json.push({ book, chapter_slug: slug, subject, grade, para_id: paraId, num, title }); } } } fs.mkdirSync(path.dirname(OUT_MD), { recursive: true }); fs.writeFileSync(OUT_MD, lines.join('\n'), 'utf8'); fs.writeFileSync(OUT_JSON, JSON.stringify(json, null, 2), 'utf8'); console.log('Wrote', OUT_MD); console.log('Wrote', OUT_JSON, '(' + json.length + ' sections)');