1import type { Plugin } from 'unified';
  2import { visit } from 'unist-util-visit';
  3import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast';
  4import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html';
  5
  6/**
  7 * remark plugin that rewrites raw HTML nodes into plain-text equivalents.
  8 *
  9 * remark parses inline HTML into `html` nodes even when we do not want to render
 10 * them. We turn each of those nodes into regular text (plus `<br>` break markers)
 11 * so the downstream rehype pipeline escapes the characters instead of executing
 12 * them. Leading spaces and tab characters are converted to non‑breaking spaces to
 13 * keep indentation identical to the original author input.
 14 */
 15
 16function preserveIndent(line: string): string {
 17	let index = 0;
 18	let output = '';
 19
 20	while (index < line.length) {
 21		const char = line[index];
 22
 23		if (char === ' ') {
 24			output += NBSP;
 25			index += 1;
 26			continue;
 27		}
 28
 29		if (char === '\t') {
 30			output += TAB_AS_SPACES;
 31			index += 1;
 32			continue;
 33		}
 34
 35		break;
 36	}
 37
 38	return output + line.slice(index);
 39}
 40
 41function createLiteralChildren(value: string): PhrasingContent[] {
 42	const lines = value.split(LINE_BREAK);
 43	const nodes: PhrasingContent[] = [];
 44
 45	for (const [lineIndex, rawLine] of lines.entries()) {
 46		if (lineIndex > 0) {
 47			nodes.push({ type: 'break' } as Break as unknown as PhrasingContent);
 48		}
 49
 50		nodes.push({
 51			type: 'text',
 52			value: preserveIndent(rawLine)
 53		} as Text as unknown as PhrasingContent);
 54	}
 55
 56	if (!nodes.length) {
 57		nodes.push({ type: 'text', value: '' } as Text as unknown as PhrasingContent);
 58	}
 59
 60	return nodes;
 61}
 62
 63export const remarkLiteralHtml: Plugin<[], Root> = () => {
 64	return (tree) => {
 65		visit(tree, 'html', (node, index, parent) => {
 66			if (!parent || typeof index !== 'number') {
 67				return;
 68			}
 69
 70			const replacement = createLiteralChildren(node.value);
 71
 72			if (!PHRASE_PARENTS.has(parent.type as string)) {
 73				const paragraph: Paragraph = {
 74					type: 'paragraph',
 75					children: replacement as Paragraph['children'],
 76					data: { literalHtml: true }
 77				};
 78
 79				const siblings = parent.children as unknown as Content[];
 80				siblings.splice(index, 1, paragraph as unknown as Content);
 81
 82				if (index > 0) {
 83					const previous = siblings[index - 1] as Paragraph | undefined;
 84
 85					if (
 86						previous?.type === 'paragraph' &&
 87						(previous.data as { literalHtml?: boolean } | undefined)?.literalHtml
 88					) {
 89						const prevChildren = previous.children as unknown as PhrasingContent[];
 90
 91						if (prevChildren.length) {
 92							const lastChild = prevChildren[prevChildren.length - 1];
 93
 94							if (lastChild.type !== 'break') {
 95								prevChildren.push({
 96									type: 'break'
 97								} as Break as unknown as PhrasingContent);
 98							}
 99						}
100
101						prevChildren.push(...(paragraph.children as unknown as PhrasingContent[]));
102
103						siblings.splice(index, 1);
104
105						return index;
106					}
107				}
108
109				return index + 1;
110			}
111
112			(parent.children as unknown as PhrasingContent[]).splice(
113				index,
114				1,
115				...(replacement as unknown as PhrasingContent[])
116			);
117
118			return index + replacement.length;
119		});
120	};
121};