8786cf5e20
Формулы в JS-литералах имели \\\\dfrac / \\\\\\\\dfrac (4/8 слэшей) вместо
\\dfrac (2). После JS-анескейпа KaTeX получал \\dfrac, трактовал \\ как
перенос строки и печатал dfrac/cdot/sqrt/pi как текст (карточка пирамиды и
конуса в geometry_11_ch2, и др.).
Схлопнуты прогоны слэшей кратные 4 перед LaTeX-командой -> 2. Прогоны из
3 слэшей (\\ перенос строки + \cmd в \begin{cases}) и перед x/цифрой не
тронуты. 150 правок в 7 файлах (algebra_11_ch1/ch2/ch3, geometry_11_ch1..ch4).
БД чиста: questions (1398) text/explanation/correct_text + options (5187) -
0 багов. Скрипт: backend/scripts/fix_overescaped_latex.js (идемпотентный,
dry-run по умолчанию, --apply, с KaTeX-валидацией).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
109 lines
4.4 KiB
JavaScript
109 lines
4.4 KiB
JavaScript
/*
|
|
* Fix OVER-ESCAPED LaTeX backslashes in textbook HTML.
|
|
*
|
|
* BUG: some formulas in JS string literals have too many backslashes, e.g.
|
|
* "$V=\\\\dfrac{1}{3}S_{осн}\\\\cdot h$" (4 backslashes)
|
|
* After JS unescaping KaTeX receives \\dfrac -> it renders "\\" as a LINE
|
|
* BREAK and prints "dfrac"/"cdot" as plain text (exactly the screenshot).
|
|
* The correct literal is 2 backslashes ("\\dfrac" -> value \dfrac).
|
|
*
|
|
* PARITY RULE (critical — protects legitimate row separators):
|
|
* literal-run length value backslashes meaning
|
|
* 2 1 \cmd OK keep
|
|
* 4 2 \\ + "cmd"(text) BUG -> 2
|
|
* 6 3 \\ + \cmd (rowbreak+cmd) OK keep
|
|
* 8 4 \\\\ + "cmd"(text) BUG -> 2
|
|
* => collapse ONLY runs whose length is a multiple of 4, AND only when the
|
|
* run is immediately followed by a known LaTeX command. Runs before "x",
|
|
* digits, etc. (real \\ row separators inside cases/array) are untouched.
|
|
*
|
|
* Usage: node backend/scripts/fix_overescaped_latex.js (dry run)
|
|
* node backend/scripts/fix_overescaped_latex.js --apply (write)
|
|
*/
|
|
'use strict';
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const APPLY = process.argv.includes('--apply');
|
|
|
|
// Known LaTeX commands observed at 4/8 backslashes (exact-match whitelist).
|
|
const CMDSET = new Set([
|
|
'dfrac','tfrac','frac','sqrt','cdot','pi','log','ln','lg','alpha','beta','gamma',
|
|
'delta','Delta','theta','lambda','mu','sigma','phi','varphi','omega','infty',
|
|
'iff','in','notin','ne','neq','ge','geq','le','leq','mathbb','mathrm',
|
|
'leftrightarrow','rightarrow','leftarrow','times','div','vec','overline',
|
|
'perp','parallel','cos','sin','tan','cot','ldots','cdots','pm','mp','angle','triangle',
|
|
]);
|
|
|
|
let katex = null;
|
|
try { katex = require('katex'); } catch { /* validation optional */ }
|
|
function mathRegions(t) {
|
|
const out = []; let i = 0;
|
|
while (i < t.length) {
|
|
const a = t.indexOf('$', i); if (a < 0) break;
|
|
const dbl = t[a + 1] === '$'; const s = a + (dbl ? 2 : 1);
|
|
let b = dbl ? t.indexOf('$$', s) : t.indexOf('$', s);
|
|
if (b < 0 && dbl) b = t.indexOf('$', s);
|
|
if (b < 0) break;
|
|
out.push(t.slice(s, b));
|
|
i = b + (dbl && t.slice(b, b + 2) === '$$' ? 2 : 1);
|
|
}
|
|
return out;
|
|
}
|
|
// These math strings live in JS literals; KaTeX sees them AFTER one level of JS
|
|
// unescaping. Emulate that so validation reflects what the browser renders.
|
|
function jsUnescape(s) {
|
|
return s.replace(/\\\\/g, '\\');
|
|
}
|
|
function katexErrors(t) {
|
|
if (!katex) return null;
|
|
let bad = 0;
|
|
for (const inner of mathRegions(t)) {
|
|
const expr = jsUnescape(inner);
|
|
try { katex.renderToString(expr, { throwOnError: true }); }
|
|
catch { bad++; }
|
|
}
|
|
return bad;
|
|
}
|
|
|
|
const dir = path.join(__dirname, '..', '..', 'frontend', 'textbooks');
|
|
const files = ['algebra_11_ch1.html','algebra_11_ch3.html','geometry_11_ch3.html',
|
|
'geometry_11_ch2.html','geometry_11_ch1.html','algebra_11_ch2.html','algebra_8.html',
|
|
'algebra_7_ch4.html','geometry_11_ch4.html'];
|
|
|
|
const report = [];
|
|
report.push('MODE: ' + (APPLY ? 'APPLY' : 'DRY-RUN'));
|
|
let grandFixes = 0;
|
|
|
|
for (const f of files) {
|
|
const p = path.join(dir, f);
|
|
const t = fs.readFileSync(p, 'utf8');
|
|
const before = katexErrors(t);
|
|
|
|
const perCmd = {};
|
|
let fixes = 0;
|
|
const next = t.replace(/(\\{4,})([A-Za-z]+)/g, (whole, bs, word) => {
|
|
if (bs.length % 4 !== 0) return whole; // 6,10,... rowbreak+command -> keep
|
|
if (!CMDSET.has(word)) return whole; // x / begin / unknown -> keep
|
|
fixes++;
|
|
perCmd[word] = (perCmd[word] || 0) + 1;
|
|
return '\\\\' + word; // collapse to two backslashes
|
|
});
|
|
|
|
// validate by emulating browser render of the FIXED text
|
|
const after = katexErrors(next);
|
|
grandFixes += fixes;
|
|
report.push('');
|
|
report.push(f + ': fixes=' + fixes + ' katexErrors before=' + before + ' after=' + after +
|
|
(fixes ? ' cmds=' + JSON.stringify(perCmd) : ''));
|
|
if (after !== null && before !== null && after > before)
|
|
report.push(' !! WARNING: katex errors INCREASED — not writing this file');
|
|
|
|
if (APPLY && fixes > 0 && !(after > before)) fs.writeFileSync(p, next, 'utf8');
|
|
}
|
|
|
|
report.push('');
|
|
report.push('TOTAL fixes: ' + grandFixes);
|
|
fs.writeFileSync(path.join(__dirname, 'fix_overescaped_latex.report.txt'), report.join('\n'), 'utf8');
|
|
console.log(report.join('\n'));
|