diff options
Diffstat (limited to 'llama.cpp/tools/server/webui/src/lib/markdown')
4 files changed, 497 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts b/llama.cpp/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts new file mode 100644 index 0000000..6f0e03e --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/markdown/enhance-code-blocks.ts @@ -0,0 +1,162 @@ +/** + * Rehype plugin to enhance code blocks with wrapper, header, and action buttons. + * + * Wraps <pre><code> elements with a container that includes: + * - Language label + * - Copy button + * - Preview button (for HTML code blocks) + * + * This operates directly on the HAST tree for better performance, + * avoiding the need to stringify and re-parse HTML. + */ + +import type { Plugin } from 'unified'; +import type { Root, Element, ElementContent } from 'hast'; +import { visit } from 'unist-util-visit'; + +declare global { + interface Window { + idxCodeBlock?: number; + } +} + +const COPY_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy-icon lucide-copy"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`; + +const PREVIEW_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-eye lucide-eye-icon"><path d="M2.062 12.345a1 1 0 0 1 0-.69C3.5 7.73 7.36 5 12 5s8.5 2.73 9.938 6.655a1 1 0 0 1 0 .69C20.5 16.27 16.64 19 12 19s-8.5-2.73-9.938-6.655"/><circle cx="12" cy="12" r="3"/></svg>`; + +/** + * Creates an SVG element node from raw SVG string. + * Since we can't parse HTML in HAST directly, we use the raw property. + */ +function createRawHtmlElement(html: string): Element { + return { + type: 'element', + tagName: 'span', + properties: {}, + children: [{ type: 'raw', value: html } as unknown as ElementContent] + }; +} + +function createCopyButton(codeId: string): Element { + return { + type: 'element', + tagName: 'button', + properties: { + className: ['copy-code-btn'], + 'data-code-id': codeId, + title: 'Copy code', + type: 'button' + }, + children: [createRawHtmlElement(COPY_ICON_SVG)] + }; +} + +function createPreviewButton(codeId: string): Element { + return { + type: 'element', + tagName: 'button', + properties: { + className: ['preview-code-btn'], + 'data-code-id': codeId, + title: 'Preview code', + type: 'button' + }, + children: [createRawHtmlElement(PREVIEW_ICON_SVG)] + }; +} + +function createHeader(language: string, codeId: string): Element { + const actions: Element[] = [createCopyButton(codeId)]; + + if (language.toLowerCase() === 'html') { + actions.push(createPreviewButton(codeId)); + } + + return { + type: 'element', + tagName: 'div', + properties: { className: ['code-block-header'] }, + children: [ + { + type: 'element', + tagName: 'span', + properties: { className: ['code-language'] }, + children: [{ type: 'text', value: language }] + }, + { + type: 'element', + tagName: 'div', + properties: { className: ['code-block-actions'] }, + children: actions + } + ] + }; +} + +function createWrapper(header: Element, preElement: Element): Element { + return { + type: 'element', + tagName: 'div', + properties: { className: ['code-block-wrapper'] }, + children: [header, preElement] + }; +} + +function extractLanguage(codeElement: Element): string { + const className = codeElement.properties?.className; + if (!Array.isArray(className)) return 'text'; + + for (const cls of className) { + if (typeof cls === 'string' && cls.startsWith('language-')) { + return cls.replace('language-', ''); + } + } + + return 'text'; +} + +/** + * Generates a unique code block ID using a global counter. + */ +function generateCodeId(): string { + if (typeof window !== 'undefined') { + return `code-${(window.idxCodeBlock = (window.idxCodeBlock ?? 0) + 1)}`; + } + // Fallback for SSR - use timestamp + random + return `code-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`; +} + +/** + * Rehype plugin to enhance code blocks with wrapper, header, and action buttons. + * This plugin wraps <pre><code> elements with a container that includes: + * - Language label + * - Copy button + * - Preview button (for HTML code blocks) + */ +export const rehypeEnhanceCodeBlocks: Plugin<[], Root> = () => { + return (tree: Root) => { + visit(tree, 'element', (node: Element, index, parent) => { + if (node.tagName !== 'pre' || !parent || index === undefined) return; + + const codeElement = node.children.find( + (child): child is Element => child.type === 'element' && child.tagName === 'code' + ); + + if (!codeElement) return; + + const language = extractLanguage(codeElement); + const codeId = generateCodeId(); + + codeElement.properties = { + ...codeElement.properties, + 'data-code-id': codeId + }; + + const header = createHeader(language, codeId); + const wrapper = createWrapper(header, node); + + // Replace pre with wrapper in parent + (parent.children as ElementContent[])[index] = wrapper; + }); + }; +}; diff --git a/llama.cpp/tools/server/webui/src/lib/markdown/enhance-links.ts b/llama.cpp/tools/server/webui/src/lib/markdown/enhance-links.ts new file mode 100644 index 0000000..b5fbcbd --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/markdown/enhance-links.ts @@ -0,0 +1,33 @@ +/** + * Rehype plugin to enhance links with security attributes. + * + * Adds target="_blank" and rel="noopener noreferrer" to all anchor elements, + * ensuring external links open in new tabs safely. + */ + +import type { Plugin } from 'unified'; +import type { Root, Element } from 'hast'; +import { visit } from 'unist-util-visit'; + +/** + * Rehype plugin that adds security attributes to all links. + * This plugin ensures external links open in new tabs safely by adding: + * - target="_blank" + * - rel="noopener noreferrer" + */ +export const rehypeEnhanceLinks: Plugin<[], Root> = () => { + return (tree: Root) => { + visit(tree, 'element', (node: Element) => { + if (node.tagName !== 'a') return; + + const props = node.properties ?? {}; + + // Only modify if href exists + if (!props.href) return; + + props.target = '_blank'; + props.rel = 'noopener noreferrer'; + node.properties = props; + }); + }; +}; diff --git a/llama.cpp/tools/server/webui/src/lib/markdown/literal-html.ts b/llama.cpp/tools/server/webui/src/lib/markdown/literal-html.ts new file mode 100644 index 0000000..d4ace01 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/markdown/literal-html.ts @@ -0,0 +1,121 @@ +import type { Plugin } from 'unified'; +import { visit } from 'unist-util-visit'; +import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast'; +import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html'; + +/** + * remark plugin that rewrites raw HTML nodes into plain-text equivalents. + * + * remark parses inline HTML into `html` nodes even when we do not want to render + * them. We turn each of those nodes into regular text (plus `<br>` break markers) + * so the downstream rehype pipeline escapes the characters instead of executing + * them. Leading spaces and tab characters are converted to non‑breaking spaces to + * keep indentation identical to the original author input. + */ + +function preserveIndent(line: string): string { + let index = 0; + let output = ''; + + while (index < line.length) { + const char = line[index]; + + if (char === ' ') { + output += NBSP; + index += 1; + continue; + } + + if (char === '\t') { + output += TAB_AS_SPACES; + index += 1; + continue; + } + + break; + } + + return output + line.slice(index); +} + +function createLiteralChildren(value: string): PhrasingContent[] { + const lines = value.split(LINE_BREAK); + const nodes: PhrasingContent[] = []; + + for (const [lineIndex, rawLine] of lines.entries()) { + if (lineIndex > 0) { + nodes.push({ type: 'break' } as Break as unknown as PhrasingContent); + } + + nodes.push({ + type: 'text', + value: preserveIndent(rawLine) + } as Text as unknown as PhrasingContent); + } + + if (!nodes.length) { + nodes.push({ type: 'text', value: '' } as Text as unknown as PhrasingContent); + } + + return nodes; +} + +export const remarkLiteralHtml: Plugin<[], Root> = () => { + return (tree) => { + visit(tree, 'html', (node, index, parent) => { + if (!parent || typeof index !== 'number') { + return; + } + + const replacement = createLiteralChildren(node.value); + + if (!PHRASE_PARENTS.has(parent.type as string)) { + const paragraph: Paragraph = { + type: 'paragraph', + children: replacement as Paragraph['children'], + data: { literalHtml: true } + }; + + const siblings = parent.children as unknown as Content[]; + siblings.splice(index, 1, paragraph as unknown as Content); + + if (index > 0) { + const previous = siblings[index - 1] as Paragraph | undefined; + + if ( + previous?.type === 'paragraph' && + (previous.data as { literalHtml?: boolean } | undefined)?.literalHtml + ) { + const prevChildren = previous.children as unknown as PhrasingContent[]; + + if (prevChildren.length) { + const lastChild = prevChildren[prevChildren.length - 1]; + + if (lastChild.type !== 'break') { + prevChildren.push({ + type: 'break' + } as Break as unknown as PhrasingContent); + } + } + + prevChildren.push(...(paragraph.children as unknown as PhrasingContent[])); + + siblings.splice(index, 1); + + return index; + } + } + + return index + 1; + } + + (parent.children as unknown as PhrasingContent[]).splice( + index, + 1, + ...(replacement as unknown as PhrasingContent[]) + ); + + return index + replacement.length; + }); + }; +}; diff --git a/llama.cpp/tools/server/webui/src/lib/markdown/table-html-restorer.ts b/llama.cpp/tools/server/webui/src/lib/markdown/table-html-restorer.ts new file mode 100644 index 0000000..918aa46 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/markdown/table-html-restorer.ts @@ -0,0 +1,181 @@ +/** + * Rehype plugin to restore limited HTML elements inside Markdown table cells. + * + * ## Problem + * The remark/rehype pipeline neutralizes inline HTML as literal text + * (remarkLiteralHtml) so that XML/HTML snippets in LLM responses display + * as-is instead of being rendered. This causes <br> and <ul> markup in + * table cells to show as plain text. + * + * ## Solution + * This plugin traverses the HAST post-conversion, parses whitelisted HTML + * patterns from text nodes, and replaces them with actual HAST element nodes + * that will be rendered as real HTML. + * + * ## Supported HTML + * - `<br>` / `<br/>` / `<br />` - Line breaks (inline) + * - `<ul><li>...</li></ul>` - Unordered lists (block) + * + * ## Key Implementation Details + * + * ### 1. Sibling Combination (Critical) + * The Markdown pipeline may fragment content across multiple text nodes and `<br>` + * elements. For example, `<ul><li>a</li></ul>` might arrive as: + * - Text: `"<ul>"` + * - Element: `<br>` + * - Text: `"<li>a</li></ul>"` + * + * We must combine consecutive text nodes and `<br>` elements into a single string + * before attempting to parse list markup. Without this, list detection fails. + * + * ### 2. visitParents for Deep Traversal + * Table cell content may be wrapped in intermediate elements (e.g., `<p>` tags). + * Using `visitParents` instead of direct child iteration ensures we find text + * nodes at any depth within the cell. + * + * ### 3. Reference Comparison for No-Op Detection + * When checking if `<br>` expansion changed anything, we compare: + * `expanded.length !== 1 || expanded[0] !== textNode` + * + * This catches both cases: + * - Multiple nodes created (text was split) + * - Single NEW node created (original had only `<br>`, now it's an element) + * + * A simple `length > 1` check would miss the single `<br>` case. + * + * ### 4. Strict List Validation + * `parseList()` rejects malformed markup by checking for garbage text between + * `<li>` elements. This prevents creating broken DOM from partial matches like + * `<ul>garbage<li>a</li></ul>`. + * + * ### 5. Newline Substitution for `<br>` in Combined String + * When combining siblings, existing `<br>` elements become `\n` in the combined + * string. This allows list content to span visual lines while still being parsed + * as a single unit. + * + * @example + * // Input Markdown: + * // | Feature | Notes | + * // |---------|-------| + * // | Multi-line | First<br>Second | + * // | List | <ul><li>A</li><li>B</li></ul> | + * // + * // Without this plugin: <br> and <ul> render as literal text + * // With this plugin: <br> becomes line break, <ul> becomes actual list + */ + +import type { Plugin } from 'unified'; +import type { Element, ElementContent, Root, Text } from 'hast'; +import { visit } from 'unist-util-visit'; +import { visitParents } from 'unist-util-visit-parents'; +import { BR_PATTERN, LIST_PATTERN, LI_PATTERN } from '$lib/constants/table-html-restorer'; + +/** + * Expands text containing `<br>` tags into an array of text nodes and br elements. + */ +function expandBrTags(value: string): ElementContent[] { + const matches = [...value.matchAll(BR_PATTERN)]; + if (!matches.length) return [{ type: 'text', value } as Text]; + + const result: ElementContent[] = []; + let cursor = 0; + + for (const m of matches) { + if (m.index! > cursor) { + result.push({ type: 'text', value: value.slice(cursor, m.index) } as Text); + } + result.push({ type: 'element', tagName: 'br', properties: {}, children: [] } as Element); + cursor = m.index! + m[0].length; + } + + if (cursor < value.length) { + result.push({ type: 'text', value: value.slice(cursor) } as Text); + } + + return result; +} + +/** + * Parses a `<ul><li>...</li></ul>` string into a HAST element. + * Returns null if the markup is malformed or contains unexpected content. + */ +function parseList(value: string): Element | null { + const match = value.trim().match(LIST_PATTERN); + if (!match) return null; + + const body = match[1]; + const items: ElementContent[] = []; + let cursor = 0; + + for (const liMatch of body.matchAll(LI_PATTERN)) { + // Reject if there's non-whitespace between list items + if (body.slice(cursor, liMatch.index!).trim()) return null; + + items.push({ + type: 'element', + tagName: 'li', + properties: {}, + children: expandBrTags(liMatch[1] ?? '') + } as Element); + + cursor = liMatch.index! + liMatch[0].length; + } + + // Reject if no items found or trailing garbage exists + if (!items.length || body.slice(cursor).trim()) return null; + + return { type: 'element', tagName: 'ul', properties: {}, children: items } as Element; +} + +/** + * Processes a single table cell, restoring HTML elements from text content. + */ +function processCell(cell: Element) { + visitParents(cell, 'text', (textNode: Text, ancestors) => { + const parent = ancestors[ancestors.length - 1]; + if (!parent || parent.type !== 'element') return; + + const parentEl = parent as Element; + const siblings = parentEl.children as ElementContent[]; + const startIndex = siblings.indexOf(textNode as ElementContent); + if (startIndex === -1) return; + + // Combine consecutive text nodes and <br> elements into one string + let combined = ''; + let endIndex = startIndex; + + for (let i = startIndex; i < siblings.length; i++) { + const sib = siblings[i]; + if (sib.type === 'text') { + combined += (sib as Text).value; + endIndex = i; + } else if (sib.type === 'element' && (sib as Element).tagName === 'br') { + combined += '\n'; + endIndex = i; + } else { + break; + } + } + + // Try parsing as list first (replaces entire combined range) + const list = parseList(combined); + if (list) { + siblings.splice(startIndex, endIndex - startIndex + 1, list); + return; + } + + // Otherwise, just expand <br> tags in this text node + const expanded = expandBrTags(textNode.value); + if (expanded.length !== 1 || expanded[0] !== textNode) { + siblings.splice(startIndex, 1, ...expanded); + } + }); +} + +export const rehypeRestoreTableHtml: Plugin<[], Root> = () => (tree) => { + visit(tree, 'element', (node: Element) => { + if (node.tagName === 'td' || node.tagName === 'th') { + processCell(node); + } + }); +}; |
