Files
Learn_System/backend/scripts/fix_overescaped_latex.js
Maxim Dolgolyov 8786cf5e20 fix(textbooks): убраны лишние слэши в LaTeX-формулах (over-escaping)
Формулы в JS-литералах имели \\\\dfrac / \\\\\\\\dfrac (4/8 слэшей) вместо
\\dfrac (2). После JS-анескейпа KaTeX получал \\dfrac, трактовал \\ как
перенос строки и печатал dfrac/cdot/sqrt/pi как текст (карточка пирамиды и
конуса в geometry_11_ch2, и др.).

Схлопнуты прогоны слэшей кратные 4 перед LaTeX-командой -> 2. Прогоны из
3 слэшей (\\ перенос строки + \cmd в \begin{cases}) и перед x/цифрой не
тронуты. 150 правок в 7 файлах (algebra_11_ch1/ch2/ch3, geometry_11_ch1..ch4).

БД чиста: questions (1398) text/explanation/correct_text + options (5187) -
0 багов. Скрипт: backend/scripts/fix_overescaped_latex.js (идемпотентный,
dry-run по умолчанию, --apply, с KaTeX-валидацией).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 11:53:17 +03:00

109 lines
4.4 KiB
JavaScript

/*
* Fix OVER-ESCAPED LaTeX backslashes in textbook HTML.
*
* BUG: some formulas in JS string literals have too many backslashes, e.g.
* "$V=\\\\dfrac{1}{3}S_{осн}\\\\cdot h$" (4 backslashes)
* After JS unescaping KaTeX receives \\dfrac -> it renders "\\" as a LINE
* BREAK and prints "dfrac"/"cdot" as plain text (exactly the screenshot).
* The correct literal is 2 backslashes ("\\dfrac" -> value \dfrac).
*
* PARITY RULE (critical — protects legitimate row separators):
* literal-run length value backslashes meaning
* 2 1 \cmd OK keep
* 4 2 \\ + "cmd"(text) BUG -> 2
* 6 3 \\ + \cmd (rowbreak+cmd) OK keep
* 8 4 \\\\ + "cmd"(text) BUG -> 2
* => collapse ONLY runs whose length is a multiple of 4, AND only when the
* run is immediately followed by a known LaTeX command. Runs before "x",
* digits, etc. (real \\ row separators inside cases/array) are untouched.
*
* Usage: node backend/scripts/fix_overescaped_latex.js (dry run)
* node backend/scripts/fix_overescaped_latex.js --apply (write)
*/
'use strict';
const fs = require('fs');
const path = require('path');
const APPLY = process.argv.includes('--apply');
// Known LaTeX commands observed at 4/8 backslashes (exact-match whitelist).
const CMDSET = new Set([
'dfrac','tfrac','frac','sqrt','cdot','pi','log','ln','lg','alpha','beta','gamma',
'delta','Delta','theta','lambda','mu','sigma','phi','varphi','omega','infty',
'iff','in','notin','ne','neq','ge','geq','le','leq','mathbb','mathrm',
'leftrightarrow','rightarrow','leftarrow','times','div','vec','overline',
'perp','parallel','cos','sin','tan','cot','ldots','cdots','pm','mp','angle','triangle',
]);
let katex = null;
try { katex = require('katex'); } catch { /* validation optional */ }
function mathRegions(t) {
const out = []; let i = 0;
while (i < t.length) {
const a = t.indexOf('$', i); if (a < 0) break;
const dbl = t[a + 1] === '$'; const s = a + (dbl ? 2 : 1);
let b = dbl ? t.indexOf('$$', s) : t.indexOf('$', s);
if (b < 0 && dbl) b = t.indexOf('$', s);
if (b < 0) break;
out.push(t.slice(s, b));
i = b + (dbl && t.slice(b, b + 2) === '$$' ? 2 : 1);
}
return out;
}
// These math strings live in JS literals; KaTeX sees them AFTER one level of JS
// unescaping. Emulate that so validation reflects what the browser renders.
function jsUnescape(s) {
return s.replace(/\\\\/g, '\\');
}
function katexErrors(t) {
if (!katex) return null;
let bad = 0;
for (const inner of mathRegions(t)) {
const expr = jsUnescape(inner);
try { katex.renderToString(expr, { throwOnError: true }); }
catch { bad++; }
}
return bad;
}
const dir = path.join(__dirname, '..', '..', 'frontend', 'textbooks');
const files = ['algebra_11_ch1.html','algebra_11_ch3.html','geometry_11_ch3.html',
'geometry_11_ch2.html','geometry_11_ch1.html','algebra_11_ch2.html','algebra_8.html',
'algebra_7_ch4.html','geometry_11_ch4.html'];
const report = [];
report.push('MODE: ' + (APPLY ? 'APPLY' : 'DRY-RUN'));
let grandFixes = 0;
for (const f of files) {
const p = path.join(dir, f);
const t = fs.readFileSync(p, 'utf8');
const before = katexErrors(t);
const perCmd = {};
let fixes = 0;
const next = t.replace(/(\\{4,})([A-Za-z]+)/g, (whole, bs, word) => {
if (bs.length % 4 !== 0) return whole; // 6,10,... rowbreak+command -> keep
if (!CMDSET.has(word)) return whole; // x / begin / unknown -> keep
fixes++;
perCmd[word] = (perCmd[word] || 0) + 1;
return '\\\\' + word; // collapse to two backslashes
});
// validate by emulating browser render of the FIXED text
const after = katexErrors(next);
grandFixes += fixes;
report.push('');
report.push(f + ': fixes=' + fixes + ' katexErrors before=' + before + ' after=' + after +
(fixes ? ' cmds=' + JSON.stringify(perCmd) : ''));
if (after !== null && before !== null && after > before)
report.push(' !! WARNING: katex errors INCREASED — not writing this file');
if (APPLY && fixes > 0 && !(after > before)) fs.writeFileSync(p, next, 'utf8');
}
report.push('');
report.push('TOTAL fixes: ' + grandFixes);
fs.writeFileSync(path.join(__dirname, 'fix_overescaped_latex.report.txt'), report.join('\n'), 'utf8');
console.log(report.join('\n'));