1#include "tests.h"
  2
  3#include "json-schema-to-grammar.h"
  4
  5#include <regex>
  6
  7static std::string trim_leading_space(const std::string & s) {
  8    static const std::regex leading_ws_re = std::regex(R"((^|\n)\s+)");
  9    return std::regex_replace(s, leading_ws_re, "$1");
 10}
 11
 12static void assert_gbnf_equal(testing & t, const std::string & expected, const std::string & actual) {
 13    t.assert_equal("gbnf are equal", trim_leading_space(expected), trim_leading_space(actual));
 14}
 15
 16void test_gbnf_generation(testing &t) {
 17    t.test("literal grammar generation", [](testing &t) {
 18        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 19            return p.literal("hello");
 20        });
 21
 22        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 23            parser.build_grammar(builder);
 24        });
 25
 26        assert_gbnf_equal(t, R"""(
 27            root ::= "hello"
 28            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 29        )""", gbnf);
 30    });
 31
 32    t.test("char class grammar", [](testing &t) {
 33        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 34            return p.chars("[a-z]", 1, 1);
 35        });
 36
 37        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 38            parser.build_grammar(builder);
 39        });
 40
 41        assert_gbnf_equal(t, R"""(
 42            root ::= [a-z]
 43            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 44        )""", gbnf);
 45    });
 46
 47    t.test("sequence grammar", [](testing &t) {
 48        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 49            return p.literal("hello") + p.literal(" ") + p.literal("world");
 50        });
 51
 52        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 53            parser.build_grammar(builder);
 54        });
 55
 56        assert_gbnf_equal(t, R"""(
 57            root ::= "hello" " " "world"
 58            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 59        )""", gbnf);
 60    });
 61
 62    t.test("choice grammar", [](testing &t) {
 63        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 64            return p.literal("cat") | p.literal("dog");
 65        });
 66
 67        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 68            parser.build_grammar(builder);
 69        });
 70
 71        assert_gbnf_equal(t, R"""(
 72            root ::= "cat" | "dog"
 73            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 74        )""", gbnf);
 75    });
 76
 77    t.test("one_or_more grammar", [](testing &t) {
 78        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 79            return p.one_or_more(p.literal("a"));
 80        });
 81
 82        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 83            parser.build_grammar(builder);
 84        });
 85
 86        assert_gbnf_equal(t, R"""(
 87            root ::= "a"+
 88            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 89        )""", gbnf);
 90    });
 91
 92    t.test("zero_or_more grammar", [](testing &t) {
 93        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
 94            return p.zero_or_more(p.literal("a"));
 95        });
 96
 97        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
 98            parser.build_grammar(builder);
 99        });
100
101        assert_gbnf_equal(t, R"""(
102            root ::= "a"*
103            space ::= | " " | "\n"{1,2} [ \t]{0,20}
104        )""", gbnf);
105    });
106
107    t.test("optional grammar", [](testing &t) {
108        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
109            return p.literal("hello") + p.optional(p.literal(" world"));
110        });
111
112        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
113            parser.build_grammar(builder);
114        });
115
116        assert_gbnf_equal(t, R"""(
117            root ::= "hello" " world"?
118            space ::= | " " | "\n"{1,2} [ \t]{0,20}
119        )""", gbnf);
120    });
121
122    t.test("until grammar", [](testing &t) {
123        auto parser = build_peg_parser([](common_peg_parser_builder & p)  {
124            return p.until("</tag>");
125        });
126
127        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
128            parser.build_grammar(builder);
129        });
130
131        assert_gbnf_equal(t, R"""(
132            root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])*
133            space ::= | " " | "\n"{1,2} [ \t]{0,20}
134        )""", gbnf);
135    });
136
137    t.test("complex expressions with parentheses", [](testing &t) {
138        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
139            return p.one_or_more(p.literal("a") | p.literal("b"));
140        });
141
142        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
143            parser.build_grammar(builder);
144        });
145
146        assert_gbnf_equal(t, R"""(
147            root ::= ("a" | "b")+
148            space ::= | " " | "\n"{1,2} [ \t]{0,20}
149        )""", gbnf);
150    });
151
152    t.test("rule references", [](testing &t) {
153        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
154            auto digit = p.rule("digit", p.chars("[0-9]", 1, 1));
155            return p.one_or_more(digit);
156        });
157
158        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
159            parser.build_grammar(builder);
160        });
161
162        assert_gbnf_equal(t, R"""(
163            digit ::= [0-9]
164            root ::= digit+
165            space ::= | " " | "\n"{1,2} [ \t]{0,20}
166        )""", gbnf);
167    });
168
169    t.test("escaping in literals", [](testing &t) {
170        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
171            return p.literal("hello\nworld\n!");
172        });
173
174        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
175            parser.build_grammar(builder);
176        });
177
178        assert_gbnf_equal(t, R"""(
179            root ::= "hello\nworld\n!"
180            space ::= | " " | "\n"{1,2} [ \t]{0,20}
181        )""", gbnf);
182    });
183
184    t.test("operator<< (whitespace insertion)", [](testing &t) {
185        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
186            return p.literal("hello") << p.literal("world");
187        });
188
189        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
190            parser.build_grammar(builder);
191        });
192
193        assert_gbnf_equal(t, R"""(
194            root ::= "hello" space "world"
195            space ::= | " " | "\n"{1,2} [ \t]{0,20}
196        )""", gbnf);
197    });
198
199    t.test("emit only reachable rules", [](testing &t) {
200        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
201            p.rule("orphan", p.literal("orphan"));
202            return p.literal("hello") + p.rule("child", p.literal(" world"));
203        });
204
205        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
206            parser.build_grammar(builder);
207        });
208
209        assert_gbnf_equal(t, R"""(
210            child ::= " world"
211            root ::= "hello" child
212            space ::= | " " | "\n"{1,2} [ \t]{0,20}
213        )""", gbnf);
214    });
215
216    t.test("emit only trigger rules (and references)", [](testing &t) {
217        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
218            auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
219            p.rule("rule-2", p.literal("b") + p.ref("rule-3"), true);
220            p.rule("rule-3", p.literal("c") + p.ref("rule-4"));
221            p.rule("rule-4", p.literal("d"), true);
222            return rule1;
223        });
224
225        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
226            parser.build_grammar(builder);
227        });
228
229        assert_gbnf_equal(t, R"""(
230            root ::= rule-1
231            rule-1 ::= "a" rule-2
232            rule-2 ::= "b" rule-3
233            rule-3 ::= "c" rule-4
234            rule-4 ::= "d"
235            space ::= | " " | "\n"{1,2} [ \t]{0,20}
236        )""", gbnf);
237
238        auto gbnf_lazy = build_grammar([&](const common_grammar_builder & builder) {
239            parser.build_grammar(builder, true);
240        });
241
242        assert_gbnf_equal(t, R"""(
243            root ::= rule-2 | rule-4
244            rule-2 ::= "b" rule-3
245            rule-3 ::= "c" rule-4
246            rule-4 ::= "d"
247            space ::= | " " | "\n"{1,2} [ \t]{0,20}
248        )""", gbnf_lazy);
249    });
250}