summaryrefslogtreecommitdiff
path: root/llama.cpp/tests/peg-parser/test-gbnf-generation.cpp
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/tests/peg-parser/test-gbnf-generation.cpp
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/tests/peg-parser/test-gbnf-generation.cpp')
-rw-r--r--llama.cpp/tests/peg-parser/test-gbnf-generation.cpp250
1 files changed, 250 insertions, 0 deletions
diff --git a/llama.cpp/tests/peg-parser/test-gbnf-generation.cpp b/llama.cpp/tests/peg-parser/test-gbnf-generation.cpp
new file mode 100644
index 0000000..68857a5
--- /dev/null
+++ b/llama.cpp/tests/peg-parser/test-gbnf-generation.cpp
@@ -0,0 +1,250 @@
+#include "tests.h"
+
+#include "json-schema-to-grammar.h"
+
+#include <regex>
+
+static std::string trim_leading_space(const std::string & s) {
+ static const std::regex leading_ws_re = std::regex(R"((^|\n)\s+)");
+ return std::regex_replace(s, leading_ws_re, "$1");
+}
+
+static void assert_gbnf_equal(testing & t, const std::string & expected, const std::string & actual) {
+ t.assert_equal("gbnf are equal", trim_leading_space(expected), trim_leading_space(actual));
+}
+
+void test_gbnf_generation(testing &t) {
+ t.test("literal grammar generation", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("hello");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "hello"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("char class grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.chars("[a-z]", 1, 1);
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= [a-z]
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("sequence grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("hello") + p.literal(" ") + p.literal("world");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "hello" " " "world"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("choice grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("cat") | p.literal("dog");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "cat" | "dog"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("one_or_more grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.one_or_more(p.literal("a"));
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "a"+
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("zero_or_more grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.zero_or_more(p.literal("a"));
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "a"*
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("optional grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("hello") + p.optional(p.literal(" world"));
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "hello" " world"?
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("until grammar", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.until("</tag>");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])*
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("complex expressions with parentheses", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.one_or_more(p.literal("a") | p.literal("b"));
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= ("a" | "b")+
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("rule references", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ auto digit = p.rule("digit", p.chars("[0-9]", 1, 1));
+ return p.one_or_more(digit);
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ digit ::= [0-9]
+ root ::= digit+
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("escaping in literals", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("hello\nworld\n!");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "hello\nworld\n!"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("operator<< (whitespace insertion)", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ return p.literal("hello") << p.literal("world");
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= "hello" space "world"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("emit only reachable rules", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ p.rule("orphan", p.literal("orphan"));
+ return p.literal("hello") + p.rule("child", p.literal(" world"));
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ child ::= " world"
+ root ::= "hello" child
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+ });
+
+ t.test("emit only trigger rules (and references)", [](testing &t) {
+ auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+ auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
+ p.rule("rule-2", p.literal("b") + p.ref("rule-3"), true);
+ p.rule("rule-3", p.literal("c") + p.ref("rule-4"));
+ p.rule("rule-4", p.literal("d"), true);
+ return rule1;
+ });
+
+ auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= rule-1
+ rule-1 ::= "a" rule-2
+ rule-2 ::= "b" rule-3
+ rule-3 ::= "c" rule-4
+ rule-4 ::= "d"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf);
+
+ auto gbnf_lazy = build_grammar([&](const common_grammar_builder & builder) {
+ parser.build_grammar(builder, true);
+ });
+
+ assert_gbnf_equal(t, R"""(
+ root ::= rule-2 | rule-4
+ rule-2 ::= "b" rule-3
+ rule-3 ::= "c" rule-4
+ rule-4 ::= "d"
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""", gbnf_lazy);
+ });
+}