1#include "tests.h"
2
3#include "json-schema-to-grammar.h"
4
5#include <regex>
6
7static std::string trim_leading_space(const std::string & s) {
8 static const std::regex leading_ws_re = std::regex(R"((^|\n)\s+)");
9 return std::regex_replace(s, leading_ws_re, "$1");
10}
11
12static void assert_gbnf_equal(testing & t, const std::string & expected, const std::string & actual) {
13 t.assert_equal("gbnf are equal", trim_leading_space(expected), trim_leading_space(actual));
14}
15
16void test_gbnf_generation(testing &t) {
17 t.test("literal grammar generation", [](testing &t) {
18 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
19 return p.literal("hello");
20 });
21
22 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
23 parser.build_grammar(builder);
24 });
25
26 assert_gbnf_equal(t, R"""(
27 root ::= "hello"
28 space ::= | " " | "\n"{1,2} [ \t]{0,20}
29 )""", gbnf);
30 });
31
32 t.test("char class grammar", [](testing &t) {
33 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
34 return p.chars("[a-z]", 1, 1);
35 });
36
37 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
38 parser.build_grammar(builder);
39 });
40
41 assert_gbnf_equal(t, R"""(
42 root ::= [a-z]
43 space ::= | " " | "\n"{1,2} [ \t]{0,20}
44 )""", gbnf);
45 });
46
47 t.test("sequence grammar", [](testing &t) {
48 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
49 return p.literal("hello") + p.literal(" ") + p.literal("world");
50 });
51
52 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
53 parser.build_grammar(builder);
54 });
55
56 assert_gbnf_equal(t, R"""(
57 root ::= "hello" " " "world"
58 space ::= | " " | "\n"{1,2} [ \t]{0,20}
59 )""", gbnf);
60 });
61
62 t.test("choice grammar", [](testing &t) {
63 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
64 return p.literal("cat") | p.literal("dog");
65 });
66
67 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
68 parser.build_grammar(builder);
69 });
70
71 assert_gbnf_equal(t, R"""(
72 root ::= "cat" | "dog"
73 space ::= | " " | "\n"{1,2} [ \t]{0,20}
74 )""", gbnf);
75 });
76
77 t.test("one_or_more grammar", [](testing &t) {
78 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
79 return p.one_or_more(p.literal("a"));
80 });
81
82 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
83 parser.build_grammar(builder);
84 });
85
86 assert_gbnf_equal(t, R"""(
87 root ::= "a"+
88 space ::= | " " | "\n"{1,2} [ \t]{0,20}
89 )""", gbnf);
90 });
91
92 t.test("zero_or_more grammar", [](testing &t) {
93 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
94 return p.zero_or_more(p.literal("a"));
95 });
96
97 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
98 parser.build_grammar(builder);
99 });
100
101 assert_gbnf_equal(t, R"""(
102 root ::= "a"*
103 space ::= | " " | "\n"{1,2} [ \t]{0,20}
104 )""", gbnf);
105 });
106
107 t.test("optional grammar", [](testing &t) {
108 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
109 return p.literal("hello") + p.optional(p.literal(" world"));
110 });
111
112 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
113 parser.build_grammar(builder);
114 });
115
116 assert_gbnf_equal(t, R"""(
117 root ::= "hello" " world"?
118 space ::= | " " | "\n"{1,2} [ \t]{0,20}
119 )""", gbnf);
120 });
121
122 t.test("until grammar", [](testing &t) {
123 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
124 return p.until("</tag>");
125 });
126
127 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
128 parser.build_grammar(builder);
129 });
130
131 assert_gbnf_equal(t, R"""(
132 root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])*
133 space ::= | " " | "\n"{1,2} [ \t]{0,20}
134 )""", gbnf);
135 });
136
137 t.test("complex expressions with parentheses", [](testing &t) {
138 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
139 return p.one_or_more(p.literal("a") | p.literal("b"));
140 });
141
142 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
143 parser.build_grammar(builder);
144 });
145
146 assert_gbnf_equal(t, R"""(
147 root ::= ("a" | "b")+
148 space ::= | " " | "\n"{1,2} [ \t]{0,20}
149 )""", gbnf);
150 });
151
152 t.test("rule references", [](testing &t) {
153 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
154 auto digit = p.rule("digit", p.chars("[0-9]", 1, 1));
155 return p.one_or_more(digit);
156 });
157
158 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
159 parser.build_grammar(builder);
160 });
161
162 assert_gbnf_equal(t, R"""(
163 digit ::= [0-9]
164 root ::= digit+
165 space ::= | " " | "\n"{1,2} [ \t]{0,20}
166 )""", gbnf);
167 });
168
169 t.test("escaping in literals", [](testing &t) {
170 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
171 return p.literal("hello\nworld\n!");
172 });
173
174 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
175 parser.build_grammar(builder);
176 });
177
178 assert_gbnf_equal(t, R"""(
179 root ::= "hello\nworld\n!"
180 space ::= | " " | "\n"{1,2} [ \t]{0,20}
181 )""", gbnf);
182 });
183
184 t.test("operator<< (whitespace insertion)", [](testing &t) {
185 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
186 return p.literal("hello") << p.literal("world");
187 });
188
189 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
190 parser.build_grammar(builder);
191 });
192
193 assert_gbnf_equal(t, R"""(
194 root ::= "hello" space "world"
195 space ::= | " " | "\n"{1,2} [ \t]{0,20}
196 )""", gbnf);
197 });
198
199 t.test("emit only reachable rules", [](testing &t) {
200 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
201 p.rule("orphan", p.literal("orphan"));
202 return p.literal("hello") + p.rule("child", p.literal(" world"));
203 });
204
205 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
206 parser.build_grammar(builder);
207 });
208
209 assert_gbnf_equal(t, R"""(
210 child ::= " world"
211 root ::= "hello" child
212 space ::= | " " | "\n"{1,2} [ \t]{0,20}
213 )""", gbnf);
214 });
215
216 t.test("emit only trigger rules (and references)", [](testing &t) {
217 auto parser = build_peg_parser([](common_peg_parser_builder & p) {
218 auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
219 p.rule("rule-2", p.literal("b") + p.ref("rule-3"), true);
220 p.rule("rule-3", p.literal("c") + p.ref("rule-4"));
221 p.rule("rule-4", p.literal("d"), true);
222 return rule1;
223 });
224
225 auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
226 parser.build_grammar(builder);
227 });
228
229 assert_gbnf_equal(t, R"""(
230 root ::= rule-1
231 rule-1 ::= "a" rule-2
232 rule-2 ::= "b" rule-3
233 rule-3 ::= "c" rule-4
234 rule-4 ::= "d"
235 space ::= | " " | "\n"{1,2} [ \t]{0,20}
236 )""", gbnf);
237
238 auto gbnf_lazy = build_grammar([&](const common_grammar_builder & builder) {
239 parser.build_grammar(builder, true);
240 });
241
242 assert_gbnf_equal(t, R"""(
243 root ::= rule-2 | rule-4
244 rule-2 ::= "b" rule-3
245 rule-3 ::= "c" rule-4
246 rule-4 ::= "d"
247 space ::= | " " | "\n"{1,2} [ \t]{0,20}
248 )""", gbnf_lazy);
249 });
250}