/* * Fix OVER-ESCAPED LaTeX backslashes in textbook HTML. * * BUG: some formulas in JS string literals have too many backslashes, e.g. * "$V=\\\\dfrac{1}{3}S_{осн}\\\\cdot h$" (4 backslashes) * After JS unescaping KaTeX receives \\dfrac -> it renders "\\" as a LINE * BREAK and prints "dfrac"/"cdot" as plain text (exactly the screenshot). * The correct literal is 2 backslashes ("\\dfrac" -> value \dfrac). * * PARITY RULE (critical — protects legitimate row separators): * literal-run length value backslashes meaning * 2 1 \cmd OK keep * 4 2 \\ + "cmd"(text) BUG -> 2 * 6 3 \\ + \cmd (rowbreak+cmd) OK keep * 8 4 \\\\ + "cmd"(text) BUG -> 2 * => collapse ONLY runs whose length is a multiple of 4, AND only when the * run is immediately followed by a known LaTeX command. Runs before "x", * digits, etc. (real \\ row separators inside cases/array) are untouched. * * Usage: node backend/scripts/fix_overescaped_latex.js (dry run) * node backend/scripts/fix_overescaped_latex.js --apply (write) */ 'use strict'; const fs = require('fs'); const path = require('path'); const APPLY = process.argv.includes('--apply'); // Known LaTeX commands observed at 4/8 backslashes (exact-match whitelist). const CMDSET = new Set([ 'dfrac','tfrac','frac','sqrt','cdot','pi','log','ln','lg','alpha','beta','gamma', 'delta','Delta','theta','lambda','mu','sigma','phi','varphi','omega','infty', 'iff','in','notin','ne','neq','ge','geq','le','leq','mathbb','mathrm', 'leftrightarrow','rightarrow','leftarrow','times','div','vec','overline', 'perp','parallel','cos','sin','tan','cot','ldots','cdots','pm','mp','angle','triangle', ]); let katex = null; try { katex = require('katex'); } catch { /* validation optional */ } function mathRegions(t) { const out = []; let i = 0; while (i < t.length) { const a = t.indexOf('$', i); if (a < 0) break; const dbl = t[a + 1] === '$'; const s = a + (dbl ? 2 : 1); let b = dbl ? t.indexOf('$$', s) : t.indexOf('$', s); if (b < 0 && dbl) b = t.indexOf('$', s); if (b < 0) break; out.push(t.slice(s, b)); i = b + (dbl && t.slice(b, b + 2) === '$$' ? 2 : 1); } return out; } // These math strings live in JS literals; KaTeX sees them AFTER one level of JS // unescaping. Emulate that so validation reflects what the browser renders. function jsUnescape(s) { return s.replace(/\\\\/g, '\\'); } function katexErrors(t) { if (!katex) return null; let bad = 0; for (const inner of mathRegions(t)) { const expr = jsUnescape(inner); try { katex.renderToString(expr, { throwOnError: true }); } catch { bad++; } } return bad; } const dir = path.join(__dirname, '..', '..', 'frontend', 'textbooks'); const files = ['algebra_11_ch1.html','algebra_11_ch3.html','geometry_11_ch3.html', 'geometry_11_ch2.html','geometry_11_ch1.html','algebra_11_ch2.html','algebra_8.html', 'algebra_7_ch4.html','geometry_11_ch4.html']; const report = []; report.push('MODE: ' + (APPLY ? 'APPLY' : 'DRY-RUN')); let grandFixes = 0; for (const f of files) { const p = path.join(dir, f); const t = fs.readFileSync(p, 'utf8'); const before = katexErrors(t); const perCmd = {}; let fixes = 0; const next = t.replace(/(\\{4,})([A-Za-z]+)/g, (whole, bs, word) => { if (bs.length % 4 !== 0) return whole; // 6,10,... rowbreak+command -> keep if (!CMDSET.has(word)) return whole; // x / begin / unknown -> keep fixes++; perCmd[word] = (perCmd[word] || 0) + 1; return '\\\\' + word; // collapse to two backslashes }); // validate by emulating browser render of the FIXED text const after = katexErrors(next); grandFixes += fixes; report.push(''); report.push(f + ': fixes=' + fixes + ' katexErrors before=' + before + ' after=' + after + (fixes ? ' cmds=' + JSON.stringify(perCmd) : '')); if (after !== null && before !== null && after > before) report.push(' !! WARNING: katex errors INCREASED — not writing this file'); if (APPLY && fixes > 0 && !(after > before)) fs.writeFileSync(p, next, 'utf8'); } report.push(''); report.push('TOTAL fixes: ' + grandFixes); fs.writeFileSync(path.join(__dirname, 'fix_overescaped_latex.report.txt'), report.join('\n'), 'utf8'); console.log(report.join('\n'));