fix(textbooks): убраны лишние слэши в LaTeX-формулах (over-escaping)
Формулы в JS-литералах имели \\\\dfrac / \\\\\\\\dfrac (4/8 слэшей) вместо
\\dfrac (2). После JS-анескейпа KaTeX получал \\dfrac, трактовал \\ как
перенос строки и печатал dfrac/cdot/sqrt/pi как текст (карточка пирамиды и
конуса в geometry_11_ch2, и др.).
Схлопнуты прогоны слэшей кратные 4 перед LaTeX-командой -> 2. Прогоны из
3 слэшей (\\ перенос строки + \cmd в \begin{cases}) и перед x/цифрой не
тронуты. 150 правок в 7 файлах (algebra_11_ch1/ch2/ch3, geometry_11_ch1..ch4).
БД чиста: questions (1398) text/explanation/correct_text + options (5187) -
0 багов. Скрипт: backend/scripts/fix_overescaped_latex.js (идемпотентный,
dry-run по умолчанию, --apply, с KaTeX-валидацией).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Fix OVER-ESCAPED LaTeX backslashes in textbook HTML.
|
||||
*
|
||||
* BUG: some formulas in JS string literals have too many backslashes, e.g.
|
||||
* "$V=\\\\dfrac{1}{3}S_{осн}\\\\cdot h$" (4 backslashes)
|
||||
* After JS unescaping KaTeX receives \\dfrac -> it renders "\\" as a LINE
|
||||
* BREAK and prints "dfrac"/"cdot" as plain text (exactly the screenshot).
|
||||
* The correct literal is 2 backslashes ("\\dfrac" -> value \dfrac).
|
||||
*
|
||||
* PARITY RULE (critical — protects legitimate row separators):
|
||||
* literal-run length value backslashes meaning
|
||||
* 2 1 \cmd OK keep
|
||||
* 4 2 \\ + "cmd"(text) BUG -> 2
|
||||
* 6 3 \\ + \cmd (rowbreak+cmd) OK keep
|
||||
* 8 4 \\\\ + "cmd"(text) BUG -> 2
|
||||
* => collapse ONLY runs whose length is a multiple of 4, AND only when the
|
||||
* run is immediately followed by a known LaTeX command. Runs before "x",
|
||||
* digits, etc. (real \\ row separators inside cases/array) are untouched.
|
||||
*
|
||||
* Usage: node backend/scripts/fix_overescaped_latex.js (dry run)
|
||||
* node backend/scripts/fix_overescaped_latex.js --apply (write)
|
||||
*/
|
||||
'use strict';
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const APPLY = process.argv.includes('--apply');
|
||||
|
||||
// Known LaTeX commands observed at 4/8 backslashes (exact-match whitelist).
|
||||
const CMDSET = new Set([
|
||||
'dfrac','tfrac','frac','sqrt','cdot','pi','log','ln','lg','alpha','beta','gamma',
|
||||
'delta','Delta','theta','lambda','mu','sigma','phi','varphi','omega','infty',
|
||||
'iff','in','notin','ne','neq','ge','geq','le','leq','mathbb','mathrm',
|
||||
'leftrightarrow','rightarrow','leftarrow','times','div','vec','overline',
|
||||
'perp','parallel','cos','sin','tan','cot','ldots','cdots','pm','mp','angle','triangle',
|
||||
]);
|
||||
|
||||
let katex = null;
|
||||
try { katex = require('katex'); } catch { /* validation optional */ }
|
||||
function mathRegions(t) {
|
||||
const out = []; let i = 0;
|
||||
while (i < t.length) {
|
||||
const a = t.indexOf('$', i); if (a < 0) break;
|
||||
const dbl = t[a + 1] === '$'; const s = a + (dbl ? 2 : 1);
|
||||
let b = dbl ? t.indexOf('$$', s) : t.indexOf('$', s);
|
||||
if (b < 0 && dbl) b = t.indexOf('$', s);
|
||||
if (b < 0) break;
|
||||
out.push(t.slice(s, b));
|
||||
i = b + (dbl && t.slice(b, b + 2) === '$$' ? 2 : 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
// These math strings live in JS literals; KaTeX sees them AFTER one level of JS
|
||||
// unescaping. Emulate that so validation reflects what the browser renders.
|
||||
function jsUnescape(s) {
|
||||
return s.replace(/\\\\/g, '\\');
|
||||
}
|
||||
function katexErrors(t) {
|
||||
if (!katex) return null;
|
||||
let bad = 0;
|
||||
for (const inner of mathRegions(t)) {
|
||||
const expr = jsUnescape(inner);
|
||||
try { katex.renderToString(expr, { throwOnError: true }); }
|
||||
catch { bad++; }
|
||||
}
|
||||
return bad;
|
||||
}
|
||||
|
||||
const dir = path.join(__dirname, '..', '..', 'frontend', 'textbooks');
|
||||
const files = ['algebra_11_ch1.html','algebra_11_ch3.html','geometry_11_ch3.html',
|
||||
'geometry_11_ch2.html','geometry_11_ch1.html','algebra_11_ch2.html','algebra_8.html',
|
||||
'algebra_7_ch4.html','geometry_11_ch4.html'];
|
||||
|
||||
const report = [];
|
||||
report.push('MODE: ' + (APPLY ? 'APPLY' : 'DRY-RUN'));
|
||||
let grandFixes = 0;
|
||||
|
||||
for (const f of files) {
|
||||
const p = path.join(dir, f);
|
||||
const t = fs.readFileSync(p, 'utf8');
|
||||
const before = katexErrors(t);
|
||||
|
||||
const perCmd = {};
|
||||
let fixes = 0;
|
||||
const next = t.replace(/(\\{4,})([A-Za-z]+)/g, (whole, bs, word) => {
|
||||
if (bs.length % 4 !== 0) return whole; // 6,10,... rowbreak+command -> keep
|
||||
if (!CMDSET.has(word)) return whole; // x / begin / unknown -> keep
|
||||
fixes++;
|
||||
perCmd[word] = (perCmd[word] || 0) + 1;
|
||||
return '\\\\' + word; // collapse to two backslashes
|
||||
});
|
||||
|
||||
// validate by emulating browser render of the FIXED text
|
||||
const after = katexErrors(next);
|
||||
grandFixes += fixes;
|
||||
report.push('');
|
||||
report.push(f + ': fixes=' + fixes + ' katexErrors before=' + before + ' after=' + after +
|
||||
(fixes ? ' cmds=' + JSON.stringify(perCmd) : ''));
|
||||
if (after !== null && before !== null && after > before)
|
||||
report.push(' !! WARNING: katex errors INCREASED — not writing this file');
|
||||
|
||||
if (APPLY && fixes > 0 && !(after > before)) fs.writeFileSync(p, next, 'utf8');
|
||||
}
|
||||
|
||||
report.push('');
|
||||
report.push('TOTAL fixes: ' + grandFixes);
|
||||
fs.writeFileSync(path.join(__dirname, 'fix_overescaped_latex.report.txt'), report.join('\n'), 'utf8');
|
||||
console.log(report.join('\n'));
|
||||
Reference in New Issue
Block a user