summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts')
-rw-r--r--llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts270
1 files changed, 270 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts b/llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts
new file mode 100644
index 0000000..cafa2d4
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/utils/latex-protection.ts
@@ -0,0 +1,270 @@
+import {
+ CODE_BLOCK_REGEXP,
+ LATEX_MATH_AND_CODE_PATTERN,
+ LATEX_LINEBREAK_REGEXP,
+ MHCHEM_PATTERN_MAP
+} from '$lib/constants/latex-protection';
+
+/**
+ * Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs
+ * that appear to be part of monetary values or identifiers.
+ *
+ * This function processes the input line by line and skips `$` sequences that are likely
+ * part of money amounts (e.g., `$5`, `$100.99`) or code-like tokens (e.g., `var$`, `$var`).
+ * Valid LaTeX inline math is replaced with a placeholder like `<<LATEX_0>>`, and the
+ * actual LaTeX content is stored in the provided `latexExpressions` array.
+ *
+ * @param content - The input text potentially containing LaTeX expressions.
+ * @param latexExpressions - An array used to collect extracted LaTeX expressions.
+ * @returns The processed string with LaTeX replaced by placeholders.
+ */
+export function maskInlineLaTeX(content: string, latexExpressions: string[]): string {
+ if (!content.includes('$')) {
+ return content;
+ }
+ return content
+ .split('\n')
+ .map((line) => {
+ if (line.indexOf('$') == -1) {
+ return line;
+ }
+
+ let processedLine = '';
+ let currentPosition = 0;
+
+ while (currentPosition < line.length) {
+ const openDollarIndex = line.indexOf('$', currentPosition);
+
+ if (openDollarIndex == -1) {
+ processedLine += line.slice(currentPosition);
+ break;
+ }
+
+ // Is there a next $-sign?
+ const closeDollarIndex = line.indexOf('$', openDollarIndex + 1);
+
+ if (closeDollarIndex == -1) {
+ processedLine += line.slice(currentPosition);
+ break;
+ }
+
+ const charBeforeOpen = openDollarIndex > 0 ? line[openDollarIndex - 1] : '';
+ const charAfterOpen = line[openDollarIndex + 1];
+ const charBeforeClose =
+ openDollarIndex + 1 < closeDollarIndex ? line[closeDollarIndex - 1] : '';
+ const charAfterClose = closeDollarIndex + 1 < line.length ? line[closeDollarIndex + 1] : '';
+
+ let shouldSkipAsNonLatex = false;
+
+ if (closeDollarIndex == currentPosition + 1) {
+ // No content
+ shouldSkipAsNonLatex = true;
+ }
+
+ if (/[A-Za-z0-9_$-]/.test(charBeforeOpen)) {
+ // Character, digit, $, _ or - before first '$', no TeX.
+ shouldSkipAsNonLatex = true;
+ }
+
+ if (
+ /[0-9]/.test(charAfterOpen) &&
+ (/[A-Za-z0-9_$-]/.test(charAfterClose) || ' ' == charBeforeClose)
+ ) {
+ // First $ seems to belong to an amount.
+ shouldSkipAsNonLatex = true;
+ }
+
+ if (shouldSkipAsNonLatex) {
+ processedLine += line.slice(currentPosition, openDollarIndex + 1);
+ currentPosition = openDollarIndex + 1;
+
+ continue;
+ }
+
+ // Treat as LaTeX
+ processedLine += line.slice(currentPosition, openDollarIndex);
+ const latexContent = line.slice(openDollarIndex, closeDollarIndex + 1);
+ latexExpressions.push(latexContent);
+ processedLine += `<<LATEX_${latexExpressions.length - 1}>>`;
+ currentPosition = closeDollarIndex + 1;
+ }
+
+ return processedLine;
+ })
+ .join('\n');
+}
+
+function escapeBrackets(text: string): string {
+ return text.replace(
+ LATEX_MATH_AND_CODE_PATTERN,
+ (
+ match: string,
+ codeBlock: string | undefined,
+ squareBracket: string | undefined,
+ roundBracket: string | undefined
+ ): string => {
+ if (codeBlock != null) {
+ return codeBlock;
+ } else if (squareBracket != null) {
+ return `$$${squareBracket}$$`;
+ } else if (roundBracket != null) {
+ return `$${roundBracket}$`;
+ }
+
+ return match;
+ }
+ );
+}
+
+// Escape $\\ce{...} → $\\ce{...} but with proper handling
+function escapeMhchem(text: string): string {
+ return MHCHEM_PATTERN_MAP.reduce((result, [pattern, replacement]) => {
+ return result.replace(pattern, replacement);
+ }, text);
+}
+
+const doEscapeMhchem = false;
+
+/**
+ * Preprocesses markdown content to safely handle LaTeX math expressions while protecting
+ * against false positives (e.g., dollar amounts like $5.99) and ensuring proper rendering.
+ *
+ * This function:
+ * - Protects code blocks (```) and inline code (`...`)
+ * - Safeguards block and inline LaTeX: \(...\), \[...\], $$...$$, and selective $...$
+ * - Escapes standalone dollar signs before numbers (e.g., $5 → \$5) to prevent misinterpretation
+ * - Restores protected LaTeX and code blocks after processing
+ * - Converts \(...\) → $...$ and \[...\] → $$...$$ for compatibility with math renderers
+ * - Applies additional escaping for brackets and mhchem syntax if needed
+ *
+ * @param content - The raw text (e.g., markdown) that may contain LaTeX or code blocks.
+ * @returns The preprocessed string with properly escaped and normalized LaTeX.
+ *
+ * @example
+ * preprocessLaTeX("Price: $10. The equation is \\(x^2\\).")
+ * // → "Price: $10. The equation is $x^2$."
+ */
+export function preprocessLaTeX(content: string): string {
+ // See also:
+ // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts
+
+ // Step 0: Temporarily remove blockquote markers (>) to process LaTeX correctly
+ // Store the structure so we can restore it later
+ const blockquoteMarkers: Map<number, string> = new Map();
+ const lines = content.split('\n');
+ const processedLines = lines.map((line, index) => {
+ const match = line.match(/^(>\s*)/);
+ if (match) {
+ blockquoteMarkers.set(index, match[1]);
+ return line.slice(match[1].length);
+ }
+ return line;
+ });
+ content = processedLines.join('\n');
+
+ // Step 1: Protect code blocks
+ const codeBlocks: string[] = [];
+
+ content = content.replace(CODE_BLOCK_REGEXP, (match) => {
+ codeBlocks.push(match);
+
+ return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
+ });
+
+ // Step 2: Protect existing LaTeX expressions
+ const latexExpressions: string[] = [];
+
+ // Match \S...\[...\] and protect them and insert a line-break.
+ content = content.replace(/([\S].*?)\\\[([\s\S]*?)\\\](.*)/g, (match, group1, group2, group3) => {
+ // Check if there are characters following the formula (display-formula in a table-cell?)
+ if (group1.endsWith('\\')) {
+ return match; // Backslash before \[, do nothing.
+ }
+ const hasSuffix = /\S/.test(group3);
+ let optBreak;
+
+ if (hasSuffix) {
+ latexExpressions.push(`\\(${group2.trim()}\\)`); // Convert into inline.
+ optBreak = '';
+ } else {
+ latexExpressions.push(`\\[${group2}\\]`);
+ optBreak = '\n';
+ }
+
+ return `${group1}${optBreak}<<LATEX_${latexExpressions.length - 1}>>${optBreak}${group3}`;
+ });
+
+ // Match \(...\), \[...\], $$...$$ and protect them
+ content = content.replace(
+ /(\$\$[\s\S]*?\$\$|(?<!\\)\\\[[\s\S]*?\\\]|(?<!\\)\\\(.*?\\\))/g,
+ (match) => {
+ latexExpressions.push(match);
+
+ return `<<LATEX_${latexExpressions.length - 1}>>`;
+ }
+ );
+
+ // Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99)
+ content = maskInlineLaTeX(content, latexExpressions);
+
+ // Step 3: Escape standalone $ before digits (currency like $5 → \$5)
+ // (Now that inline math is protected, this will only escape dollars not already protected)
+ content = content.replace(/\$(?=\d)/g, '\\$');
+
+ // Step 4: Restore protected LaTeX expressions (they are valid)
+ content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => {
+ let expr = latexExpressions[parseInt(index)];
+ const match = expr.match(LATEX_LINEBREAK_REGEXP);
+ if (match) {
+ // Katex: The $$-delimiters should be in their own line
+ // if there are \\-line-breaks.
+ const formula = match[1];
+ const prefix = formula.startsWith('\n') ? '' : '\n';
+ const suffix = formula.endsWith('\n') ? '' : '\n';
+ expr = '$$' + prefix + formula + suffix + '$$';
+ }
+ return expr;
+ });
+
+ // Step 5: Apply additional escaping functions (brackets and mhchem)
+ // This must happen BEFORE restoring code blocks to avoid affecting code content
+ content = escapeBrackets(content);
+
+ if (doEscapeMhchem && (content.includes('\\ce{') || content.includes('\\pu{'))) {
+ content = escapeMhchem(content);
+ }
+
+ // Step 6: Convert remaining \(...\) → $...$, \[...\] → $$...$$
+ // This must happen BEFORE restoring code blocks to avoid affecting code content
+ content = content
+ // Using the look‑behind pattern `(?<!\\)` we skip matches
+ // that are preceded by a backslash, e.g.
+ // `Definitions\\(also called macros)` (title of chapter 20 in The TeXbook).
+ .replace(/(?<!\\)\\\((.+?)\\\)/g, '$$$1$') // inline
+ .replace(
+ // Using the look‑behind pattern `(?<!\\)` we skip matches
+ // that are preceded by a backslash, e.g. `\\[4pt]`.
+ /(?<!\\)\\\[([\s\S]*?)\\\]/g, // display, see also PR #16599
+ (_, content: string) => {
+ return `$$${content}$$`;
+ }
+ );
+
+ // Step 7: Restore code blocks
+ // This happens AFTER all LaTeX conversions to preserve code content
+ content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => {
+ return codeBlocks[parseInt(index)];
+ });
+
+ // Step 8: Restore blockquote markers
+ if (blockquoteMarkers.size > 0) {
+ const finalLines = content.split('\n');
+ const restoredLines = finalLines.map((line, index) => {
+ const marker = blockquoteMarkers.get(index);
+ return marker ? marker + line : line;
+ });
+ content = restoredLines.join('\n');
+ }
+
+ return content;
+}