1/* eslint-disable no-irregular-whitespace */
  2import { describe, it, expect, test } from 'vitest';
  3import { maskInlineLaTeX, preprocessLaTeX } from '$lib/utils/latex-protection';
  4
  5describe('maskInlineLaTeX', () => {
  6	it('should protect LaTeX $x + y$ but not money $3.99', () => {
  7		const latexExpressions: string[] = [];
  8		const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
  9		const output = maskInlineLaTeX(input, latexExpressions);
 10
 11		expect(output).toBe('I have $10, $3.99 and <<LATEX_0>> and <<LATEX_1>>. The amount is $2,000.');
 12		expect(latexExpressions).toEqual(['$x + y$', '$100x$']);
 13	});
 14
 15	it('should ignore money like $5 and $12.99', () => {
 16		const latexExpressions: string[] = [];
 17		const input = 'Prices are $12.99 and $5. Tax?';
 18		const output = maskInlineLaTeX(input, latexExpressions);
 19
 20		expect(output).toBe('Prices are $12.99 and $5. Tax?');
 21		expect(latexExpressions).toEqual([]);
 22	});
 23
 24	it('should protect inline math $a^2 + b^2$ even after text', () => {
 25		const latexExpressions: string[] = [];
 26		const input = 'Pythagorean: $a^2 + b^2 = c^2$.';
 27		const output = maskInlineLaTeX(input, latexExpressions);
 28
 29		expect(output).toBe('Pythagorean: <<LATEX_0>>.');
 30		expect(latexExpressions).toEqual(['$a^2 + b^2 = c^2$']);
 31	});
 32
 33	it('should not protect math that has letter after closing $ (e.g. units)', () => {
 34		const latexExpressions: string[] = [];
 35		const input = 'The cost is $99 and change.';
 36		const output = maskInlineLaTeX(input, latexExpressions);
 37
 38		expect(output).toBe('The cost is $99 and change.');
 39		expect(latexExpressions).toEqual([]);
 40	});
 41
 42	it('should allow $x$ followed by punctuation', () => {
 43		const latexExpressions: string[] = [];
 44		const input = 'We know $x$, right?';
 45		const output = maskInlineLaTeX(input, latexExpressions);
 46
 47		expect(output).toBe('We know <<LATEX_0>>, right?');
 48		expect(latexExpressions).toEqual(['$x$']);
 49	});
 50
 51	it('should work across multiple lines', () => {
 52		const latexExpressions: string[] = [];
 53		const input = `Emma buys cupcakes for $3 each.\nHow much is $x + y$?`;
 54		const output = maskInlineLaTeX(input, latexExpressions);
 55
 56		expect(output).toBe(`Emma buys cupcakes for $3 each.\nHow much is <<LATEX_0>>?`);
 57		expect(latexExpressions).toEqual(['$x + y$']);
 58	});
 59
 60	it('should not protect $100 but protect $matrix$', () => {
 61		const latexExpressions: string[] = [];
 62		const input = '$100 and $\\mathrm{GL}_2(\\mathbb{F}_7)$ are different.';
 63		const output = maskInlineLaTeX(input, latexExpressions);
 64
 65		expect(output).toBe('$100 and <<LATEX_0>> are different.');
 66		expect(latexExpressions).toEqual(['$\\mathrm{GL}_2(\\mathbb{F}_7)$']);
 67	});
 68
 69	it('should skip if $ is followed by digit and alphanumeric after close (money)', () => {
 70		const latexExpressions: string[] = [];
 71		const input = 'I paid $5 quickly.';
 72		const output = maskInlineLaTeX(input, latexExpressions);
 73
 74		expect(output).toBe('I paid $5 quickly.');
 75		expect(latexExpressions).toEqual([]);
 76	});
 77
 78	it('should protect LaTeX even with special chars inside', () => {
 79		const latexExpressions: string[] = [];
 80		const input = 'Consider $\\alpha_1 + \\beta_2$ now.';
 81		const output = maskInlineLaTeX(input, latexExpressions);
 82
 83		expect(output).toBe('Consider <<LATEX_0>> now.');
 84		expect(latexExpressions).toEqual(['$\\alpha_1 + \\beta_2$']);
 85	});
 86
 87	it('short text', () => {
 88		const latexExpressions: string[] = ['$0$'];
 89		const input = '$a$\n$a$ and $b$';
 90		const output = maskInlineLaTeX(input, latexExpressions);
 91
 92		expect(output).toBe('<<LATEX_1>>\n<<LATEX_2>> and <<LATEX_3>>');
 93		expect(latexExpressions).toEqual(['$0$', '$a$', '$a$', '$b$']);
 94	});
 95
 96	it('empty text', () => {
 97		const latexExpressions: string[] = [];
 98		const input = '$\n$$\n';
 99		const output = maskInlineLaTeX(input, latexExpressions);
100
101		expect(output).toBe('$\n$$\n');
102		expect(latexExpressions).toEqual([]);
103	});
104
105	it('LaTeX-spacer preceded by backslash', () => {
106		const latexExpressions: string[] = [];
107		const input = `\\[
108\\boxed{
109\\begin{aligned}
110N_{\\text{att}}^{\\text{(MHA)}} &=
111h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr]   && (\\text{Q,K,V の重み})\\\\
112&\\quad+ h(d_{k}+d_{k}+d_{v})                                          && (\\text{バイアス Q,K,V)}\\\\[4pt]
113&\\quad+ (h d_{v})\\, d_{\\text{model}}                                 && (\\text{出力射影 }W^{O})\\\\
114&\\quad+ d_{\\text{model}}                                            && (\\text{バイアス }b^{O})
115\\end{aligned}}
116\\]`;
117		const output = maskInlineLaTeX(input, latexExpressions);
118
119		expect(output).toBe(input);
120		expect(latexExpressions).toEqual([]);
121	});
122});
123
124describe('preprocessLaTeX', () => {
125	test('converts inline \\( ... \\) to $...$', () => {
126		const input =
127			'\\( \\mathrm{GL}_2(\\mathbb{F}_7) \\): Group of invertible matrices with entries in \\(\\mathbb{F}_7\\).';
128		const output = preprocessLaTeX(input);
129		expect(output).toBe(
130			'$ \\mathrm{GL}_2(\\mathbb{F}_7) $: Group of invertible matrices with entries in $\\mathbb{F}_7$.'
131		);
132	});
133
134	test("don't inline \\\\( ... \\) to $...$", () => {
135		const input =
136			'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula \\((x_1,\\ldots,x_n)\\).';
137		const output = preprocessLaTeX(input);
138		expect(output).toBe(
139			'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula $(x_1,\\ldots,x_n)$.'
140		);
141	});
142
143	test('preserves display math \\[ ... \\] and protects adjacent text', () => {
144		const input = `Some kernel of \\(\\mathrm{SL}_2(\\mathbb{F}_7)\\):
145  \\[
146  \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\}
147  \\]`;
148		const output = preprocessLaTeX(input);
149
150		expect(output).toBe(`Some kernel of $\\mathrm{SL}_2(\\mathbb{F}_7)$:
151  $$
152  \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\}
153  $$`);
154	});
155
156	test('handles standalone display math equation', () => {
157		const input = `Algebra:
158\\[
159x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
160\\]`;
161		const output = preprocessLaTeX(input);
162
163		expect(output).toBe(`Algebra:
164$$
165x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
166$$`);
167	});
168
169	test('does not interpret currency values as LaTeX', () => {
170		const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
171		const output = preprocessLaTeX(input);
172
173		expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.');
174	});
175
176	test('ignores dollar signs followed by digits (money), but keeps valid math $x + y$', () => {
177		const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.';
178		const output = preprocessLaTeX(input);
179
180		expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.');
181	});
182
183	test('handles real-world word problems with amounts and no math delimiters', () => {
184		const input =
185			'Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total?';
186		const output = preprocessLaTeX(input);
187
188		expect(output).toBe(
189			'Emma buys 2 cupcakes for \\$3 each and 1 cookie for \\$1.50. How much money does she spend in total?'
190		);
191	});
192
193	test('handles decimal amounts in word problem correctly', () => {
194		const input =
195			'Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive?';
196		const output = preprocessLaTeX(input);
197
198		expect(output).toBe(
199			'Maria has \\$20. She buys a notebook for \\$4.75 and a pack of pencils for \\$3.25. How much change does she receive?'
200		);
201	});
202
203	test('preserves display math with surrounding non-ASCII text', () => {
204		const input = `1 kg の質量は
205  \\[
206  E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J}
207  \\]
208  というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`;
209		const output = preprocessLaTeX(input);
210
211		expect(output).toBe(
212			`1 kg の質量は
213  $$
214  E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J}
215  $$
216  というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`
217		);
218	});
219
220	test('LaTeX-spacer preceded by backslash', () => {
221		const input = `\\[
222\\boxed{
223\\begin{aligned}
224N_{\\text{att}}^{\\text{(MHA)}} &=
225h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr]   && (\\text{Q,K,V の重み})\\\\
226&\\quad+ h(d_{k}+d_{k}+d_{v})                                          && (\\text{バイアス Q,K,V)}\\\\[4pt]
227&\\quad+ (h d_{v})\\, d_{\\text{model}}                                 && (\\text{出力射影 }W^{O})\\\\
228&\\quad+ d_{\\text{model}}                                            && (\\text{バイアス }b^{O})
229\\end{aligned}}
230\\]`;
231		const output = preprocessLaTeX(input);
232		expect(output).toBe(
233			`$$
234\\boxed{
235\\begin{aligned}
236N_{\\text{att}}^{\\text{(MHA)}} &=
237h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr]   && (\\text{Q,K,V の重み})\\\\
238&\\quad+ h(d_{k}+d_{k}+d_{v})                                          && (\\text{バイアス Q,K,V)}\\\\[4pt]
239&\\quad+ (h d_{v})\\, d_{\\text{model}}                                 && (\\text{出力射影 }W^{O})\\\\
240&\\quad+ d_{\\text{model}}                                            && (\\text{バイアス }b^{O})
241\\end{aligned}}
242$$`
243		);
244	});
245
246	test('converts \\[ ... \\] even when preceded by text without space', () => {
247		const input = 'Some line ...\nAlgebra: \\[x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}\\]';
248		const output = preprocessLaTeX(input);
249
250		expect(output).toBe(
251			'Some line ...\nAlgebra: \n$$x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$$\n'
252		);
253	});
254
255	test('converts \\[ ... \\] in table-cells', () => {
256		const input = `| ID | Expression |\n| #1 | \\[
257			x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}
258\\] |`;
259		const output = preprocessLaTeX(input);
260
261		expect(output).toBe(
262			'| ID | Expression |\n| #1 | $x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$ |'
263		);
264	});
265
266	test('escapes isolated $ before digits ($5 → \\$5), but not valid math', () => {
267		const input = 'This costs $5 and this is math $x^2$. $100 is money.';
268		const output = preprocessLaTeX(input);
269
270		expect(output).toBe('This costs \\$5 and this is math $x^2$. \\$100 is money.');
271		// Note: Since $x^2$ is detected as valid LaTeX, it's preserved.
272		// $5 becomes \$5 only *after* real math is masked — but here it's correct because the masking logic avoids treating $5 as math.
273	});
274
275	test('display with LaTeX-line-breaks', () => {
276		const input = String.raw`- Algebraic topology, Homotopy Groups of $\mathbb{S}^3$:
277$$\pi_n(\mathbb{S}^3) = \begin{cases}
278\mathbb{Z} & n = 3 \\
2790 & n > 3, n \neq 4 \\
280\mathbb{Z}_2 & n = 4 \\
281\end{cases}$$`;
282		const output = preprocessLaTeX(input);
283		// If the formula contains '\\' the $$-delimiters should be in their own line.
284		expect(output).toBe(`- Algebraic topology, Homotopy Groups of $\\mathbb{S}^3$:
285$$\n\\pi_n(\\mathbb{S}^3) = \\begin{cases}
286\\mathbb{Z} & n = 3 \\\\
2870 & n > 3, n \\neq 4 \\\\
288\\mathbb{Z}_2 & n = 4 \\\\
289\\end{cases}\n$$`);
290	});
291
292	test('handles mhchem notation safely if present', () => {
293		const input = 'Chemical reaction: \\( \\ce{H2O} \\) and $\\ce{CO2}$';
294		const output = preprocessLaTeX(input);
295
296		expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\ce{CO2}$');
297	});
298
299	test('preserves code blocks', () => {
300		const input = 'Inline code: `sum $total` and block:\n```\ndollar $amount\n```\nEnd.';
301		const output = preprocessLaTeX(input);
302
303		expect(output).toBe(input); // Code blocks prevent misinterpretation
304	});
305
306	test('preserves backslash parentheses in code blocks (GitHub issue)', () => {
307		const input = '```python\nfoo = "\\(bar\\)"\n```';
308		const output = preprocessLaTeX(input);
309
310		expect(output).toBe(input); // Code blocks should not have LaTeX conversion applied
311	});
312
313	test('preserves backslash brackets in code blocks', () => {
314		const input = '```python\nfoo = "\\[bar\\]"\n```';
315		const output = preprocessLaTeX(input);
316
317		expect(output).toBe(input); // Code blocks should not have LaTeX conversion applied
318	});
319
320	test('preserves backslash parentheses in inline code', () => {
321		const input = 'Use `foo = "\\(bar\\)"` in your code.';
322		const output = preprocessLaTeX(input);
323
324		expect(output).toBe(input);
325	});
326
327	test('escape backslash in mchem ce', () => {
328		const input = 'mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$';
329		const output = preprocessLaTeX(input);
330
331		// mhchem-escape would insert a backslash here.
332		expect(output).toBe('mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$');
333	});
334
335	test('escape backslash in mchem pu', () => {
336		const input = 'mchem pu:\n$\\pu{-572 kJ mol^{-1}}$';
337		const output = preprocessLaTeX(input);
338
339		// mhchem-escape would insert a backslash here.
340		expect(output).toBe('mchem pu:\n$\\pu{-572 kJ mol^{-1}}$');
341	});
342
343	test('LaTeX in blockquotes with display math', () => {
344		const input =
345			'> **Definition (limit):**  \n>  \\[\n>  \\lim_{x\\to a} f(x) = L\n>  \\]\n>  means that as \\(x\\) gets close to \\(a\\).';
346		const output = preprocessLaTeX(input);
347
348		// Blockquote markers should be preserved, LaTeX should be converted
349		expect(output).toContain('> **Definition (limit):**');
350		expect(output).toContain('$$');
351		expect(output).toContain('$x$');
352		expect(output).not.toContain('\\[');
353		expect(output).not.toContain('\\]');
354		expect(output).not.toContain('\\(');
355		expect(output).not.toContain('\\)');
356	});
357
358	test('LaTeX in blockquotes with inline math', () => {
359		const input =
360			"> The derivative \\(f'(x)\\) at point \\(x=a\\) measures slope.\n> Formula: \\(f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}\\)";
361		const output = preprocessLaTeX(input);
362
363		// Blockquote markers should be preserved, inline LaTeX converted to $...$
364		expect(output).toContain("> The derivative $f'(x)$ at point $x=a$ measures slope.");
365		expect(output).toContain("> Formula: $f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}$");
366	});
367
368	test('Mixed content with blockquotes and regular text', () => {
369		const input =
370			'Regular text with \\(x^2\\).\n\n> Quote with \\(y^2\\).\n\nMore text with \\(z^2\\).';
371		const output = preprocessLaTeX(input);
372
373		// All LaTeX should be converted, blockquote markers preserved
374		expect(output).toBe('Regular text with $x^2$.\n\n> Quote with $y^2$.\n\nMore text with $z^2$.');
375	});
376});