1import type { Plugin } from 'unified';
2import { visit } from 'unist-util-visit';
3import type { Break, Content, Paragraph, PhrasingContent, Root, Text } from 'mdast';
4import { LINE_BREAK, NBSP, PHRASE_PARENTS, TAB_AS_SPACES } from '$lib/constants/literal-html';
5
6/**
7 * remark plugin that rewrites raw HTML nodes into plain-text equivalents.
8 *
9 * remark parses inline HTML into `html` nodes even when we do not want to render
10 * them. We turn each of those nodes into regular text (plus `<br>` break markers)
11 * so the downstream rehype pipeline escapes the characters instead of executing
12 * them. Leading spaces and tab characters are converted to non‑breaking spaces to
13 * keep indentation identical to the original author input.
14 */
15
16function preserveIndent(line: string): string {
17 let index = 0;
18 let output = '';
19
20 while (index < line.length) {
21 const char = line[index];
22
23 if (char === ' ') {
24 output += NBSP;
25 index += 1;
26 continue;
27 }
28
29 if (char === '\t') {
30 output += TAB_AS_SPACES;
31 index += 1;
32 continue;
33 }
34
35 break;
36 }
37
38 return output + line.slice(index);
39}
40
41function createLiteralChildren(value: string): PhrasingContent[] {
42 const lines = value.split(LINE_BREAK);
43 const nodes: PhrasingContent[] = [];
44
45 for (const [lineIndex, rawLine] of lines.entries()) {
46 if (lineIndex > 0) {
47 nodes.push({ type: 'break' } as Break as unknown as PhrasingContent);
48 }
49
50 nodes.push({
51 type: 'text',
52 value: preserveIndent(rawLine)
53 } as Text as unknown as PhrasingContent);
54 }
55
56 if (!nodes.length) {
57 nodes.push({ type: 'text', value: '' } as Text as unknown as PhrasingContent);
58 }
59
60 return nodes;
61}
62
63export const remarkLiteralHtml: Plugin<[], Root> = () => {
64 return (tree) => {
65 visit(tree, 'html', (node, index, parent) => {
66 if (!parent || typeof index !== 'number') {
67 return;
68 }
69
70 const replacement = createLiteralChildren(node.value);
71
72 if (!PHRASE_PARENTS.has(parent.type as string)) {
73 const paragraph: Paragraph = {
74 type: 'paragraph',
75 children: replacement as Paragraph['children'],
76 data: { literalHtml: true }
77 };
78
79 const siblings = parent.children as unknown as Content[];
80 siblings.splice(index, 1, paragraph as unknown as Content);
81
82 if (index > 0) {
83 const previous = siblings[index - 1] as Paragraph | undefined;
84
85 if (
86 previous?.type === 'paragraph' &&
87 (previous.data as { literalHtml?: boolean } | undefined)?.literalHtml
88 ) {
89 const prevChildren = previous.children as unknown as PhrasingContent[];
90
91 if (prevChildren.length) {
92 const lastChild = prevChildren[prevChildren.length - 1];
93
94 if (lastChild.type !== 'break') {
95 prevChildren.push({
96 type: 'break'
97 } as Break as unknown as PhrasingContent);
98 }
99 }
100
101 prevChildren.push(...(paragraph.children as unknown as PhrasingContent[]));
102
103 siblings.splice(index, 1);
104
105 return index;
106 }
107 }
108
109 return index + 1;
110 }
111
112 (parent.children as unknown as PhrasingContent[]).splice(
113 index,
114 1,
115 ...(replacement as unknown as PhrasingContent[])
116 );
117
118 return index + replacement.length;
119 });
120 };
121};