1// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
2const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
3
4function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
5 if (maxItems == 0) {
6 return '';
7 }
8 if (minItems === 0 && maxItems === 1) {
9 return `${itemRule}?`;
10 }
11
12
13 const separatorRule = opts.separatorRule ?? '';
14 const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
15
16 if (separatorRule === '') {
17 if (minItems === 1 && maxItems === undefined) {
18 return `${itemRule}+`;
19 } else if (minItems === 0 && maxItems === undefined) {
20 return `${itemRule}*`;
21 } else {
22 return `${itemRule}{${minItems},${maxItems !== undefined ? maxItems : ''}}`;
23 }
24 }
25
26 const result = itemRule + ' ' + _buildRepetition(`(${separatorRule} ${itemRule})`, minItems > 0 ? minItems - 1 : 0, maxItems !== undefined ? maxItems - 1 : undefined);
27 return minItems === 0 ? `(${result})?` : result;
28}
29
30function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
31 const hasMin = minValue !== null;
32 const hasMax = maxValue !== null;
33
34 function digitRange(fromChar, toChar) {
35 out.push("[");
36 if (fromChar === toChar) {
37 out.push(fromChar);
38 } else {
39 out.push(fromChar);
40 out.push("-");
41 out.push(toChar);
42 }
43 out.push("]");
44 }
45
46 function moreDigits(minDigits, maxDigits) {
47 out.push("[0-9]");
48 if (minDigits === maxDigits && minDigits === 1) {
49 return;
50 }
51 out.push("{");
52 out.push(minDigits.toString());
53 if (maxDigits !== minDigits) {
54 out.push(",");
55 if (maxDigits !== Number.MAX_SAFE_INTEGER) {
56 out.push(maxDigits.toString());
57 }
58 }
59 out.push("}");
60 }
61
62 function uniformRange(fromStr, toStr) {
63 let i = 0;
64 while (i < fromStr.length && fromStr[i] === toStr[i]) {
65 i++;
66 }
67 if (i > 0) {
68 out.push("\"");
69 out.push(fromStr.slice(0, i));
70 out.push("\"");
71 }
72 if (i < fromStr.length) {
73 if (i > 0) {
74 out.push(" ");
75 }
76 const subLen = fromStr.length - i - 1;
77 if (subLen > 0) {
78 const fromSub = fromStr.slice(i + 1);
79 const toSub = toStr.slice(i + 1);
80 const subZeros = "0".repeat(subLen);
81 const subNines = "9".repeat(subLen);
82
83 let toReached = false;
84 out.push("(");
85 if (fromSub === subZeros) {
86 digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
87 out.push(" ");
88 moreDigits(subLen, subLen);
89 } else {
90 out.push("[");
91 out.push(fromStr[i]);
92 out.push("] ");
93 out.push("(");
94 uniformRange(fromSub, subNines);
95 out.push(")");
96 if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
97 out.push(" | ");
98 if (toSub === subNines) {
99 digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
100 toReached = true;
101 } else {
102 digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
103 }
104 out.push(" ");
105 moreDigits(subLen, subLen);
106 }
107 }
108 if (!toReached) {
109 out.push(" | ");
110 digitRange(toStr[i], toStr[i]);
111 out.push(" ");
112 uniformRange(subZeros, toSub);
113 }
114 out.push(")");
115 } else {
116 out.push("[");
117 out.push(fromStr[i]);
118 out.push("-");
119 out.push(toStr[i]);
120 out.push("]");
121 }
122 }
123 }
124
125 if (hasMin && hasMax) {
126 if (minValue < 0 && maxValue < 0) {
127 out.push("\"-\" (");
128 _generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
129 out.push(")");
130 return;
131 }
132
133 if (minValue < 0) {
134 out.push("\"-\" (");
135 _generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
136 out.push(") | ");
137 minValue = 0;
138 }
139
140 let minS = minValue.toString();
141 const maxS = maxValue.toString();
142 const minDigits = minS.length;
143 const maxDigits = maxS.length;
144
145 for (let digits = minDigits; digits < maxDigits; digits++) {
146 uniformRange(minS, "9".repeat(digits));
147 minS = "1" + "0".repeat(digits);
148 out.push(" | ");
149 }
150 uniformRange(minS, maxS);
151 return;
152 }
153
154 const lessDecimals = Math.max(decimalsLeft - 1, 1);
155
156 if (hasMin) {
157 if (minValue < 0) {
158 out.push("\"-\" (");
159 _generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
160 out.push(") | [0] | [1-9] ");
161 moreDigits(0, decimalsLeft - 1);
162 } else if (minValue === 0) {
163 if (topLevel) {
164 out.push("[0] | [1-9] ");
165 moreDigits(0, lessDecimals);
166 } else {
167 moreDigits(1, decimalsLeft);
168 }
169 } else if (minValue <= 9) {
170 const c = minValue.toString();
171 const range_start = topLevel ? '1' : '0';
172 if (c > range_start) {
173 digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
174 out.push(" ");
175 moreDigits(1, lessDecimals);
176 out.push(" | ");
177 }
178 digitRange(c, "9");
179 out.push(" ");
180 moreDigits(0, lessDecimals);
181 } else {
182 const minS = minValue.toString();
183 const length = minS.length;
184 const c = minS[0];
185
186 if (c > "1") {
187 digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
188 out.push(" ");
189 moreDigits(length, lessDecimals);
190 out.push(" | ");
191 }
192 digitRange(c, c);
193 out.push(" (");
194 _generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
195 out.push(")");
196 if (c < "9") {
197 out.push(" | ");
198 digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
199 out.push(" ");
200 moreDigits(length - 1, lessDecimals);
201 }
202 }
203 return;
204 }
205
206 if (hasMax) {
207 if (maxValue >= 0) {
208 if (topLevel) {
209 out.push("\"-\" [1-9] ");
210 moreDigits(0, lessDecimals);
211 out.push(" | ");
212 }
213 _generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
214 } else {
215 out.push("\"-\" (");
216 _generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
217 out.push(")");
218 }
219 return;
220 }
221
222 throw new Error("At least one of minValue or maxValue must be set");
223}
224
225class BuiltinRule {
226 constructor(content, deps) {
227 this.content = content;
228 this.deps = deps || [];
229 }
230}
231
232const PRIMITIVE_RULES = {
233 boolean : new BuiltinRule('("true" | "false") space', []),
234 'decimal-part' : new BuiltinRule('[0-9]{1,16}', []),
235 'integral-part': new BuiltinRule('[0] | [1-9] [0-9]{0,15}', []),
236 number : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
237 integer : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
238 value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
239 object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
240 array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
241 uuid : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []),
242 char : new BuiltinRule(`[^"\\\\\\x7F\\x00-\\x1F] | [\\\\] (["\\\\bfnrt] | "u" [0-9a-fA-F]{4})`, []),
243 string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
244 null : new BuiltinRule('"null" space', []),
245};
246
247// TODO: support "uri", "email" string formats
248const STRING_FORMAT_RULES = {
249 'date' : new BuiltinRule('[0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
250 'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
251 'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
252 'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
253 'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
254 'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
255}
256
257const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
258
259const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
260const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"\\]/g;
261const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
262const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]', '\\': '\\\\' };
263
264const NON_LITERAL_SET = new Set('|.()[]{}*+?');
265const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('^$.[]()|{}*+?');
266
267export class SchemaConverter {
268 constructor(options) {
269 this._propOrder = options.prop_order || {};
270 this._allowFetch = options.allow_fetch || false;
271 this._dotall = options.dotall || false;
272 this._rules = {'space': SPACE_RULE};
273 this._refs = {};
274 this._refsBeingResolved = new Set();
275 }
276
277 _formatLiteral(literal) {
278 const escaped = literal.replace(
279 GRAMMAR_LITERAL_ESCAPE_RE,
280 m => GRAMMAR_LITERAL_ESCAPES[m]
281 );
282 return `"${escaped}"`;
283 }
284
285 _formatRangeChar(literal) {
286 return JSON.stringify(literal).slice(1, -1).replace(
287 GRAMMAR_RANGE_LITERAL_ESCAPE_RE,
288 m => GRAMMAR_LITERAL_ESCAPES[m]
289 );
290 }
291
292 _addRule(name, rule) {
293 let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
294 let key = escName;
295
296 if (escName in this._rules) {
297 if (this._rules[escName] === rule) {
298 return key;
299 }
300
301 let i = 0;
302 while ((`${escName}${i}` in this._rules) && (this._rules[`${escName}${i}`] !== rule)) {
303 i += 1;
304 }
305 key = `${escName}${i}`;
306 }
307
308 this._rules[key] = rule;
309 return key;
310 }
311
312 async resolveRefs(schema, url) {
313 const visit = async (n) => {
314 if (Array.isArray(n)) {
315 return Promise.all(n.map(visit));
316 } else if (typeof n === 'object' && n !== null) {
317 let ref = n.$ref;
318 let target;
319 if (ref !== undefined && !this._refs[ref]) {
320 if (ref.startsWith('https://')) {
321 if (!this._allowFetch) {
322 throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
323 }
324 const fetch = (await import('node-fetch')).default;
325
326 const fragSplit = ref.split('#');
327 const baseUrl = fragSplit[0];
328
329 target = this._refs[baseUrl];
330 if (!target) {
331 target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
332 this._refs[baseUrl] = target;
333 }
334
335 if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
336 return target;
337 }
338 } else if (ref.startsWith('#/')) {
339 target = schema;
340 ref = `${url}${ref}`;
341 n.$ref = ref;
342 } else {
343 throw new Error(`Unsupported ref ${ref}`);
344 }
345
346 const selectors = ref.split('#')[1].split('/').slice(1);
347 for (const sel of selectors) {
348 const selIndex = parseInt(sel, 10);
349 if (target && sel in target) {
350 target = target[sel];
351 } else if (target && selIndex in target) {
352 target = target[selIndex];
353 } else {
354 throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
355 }
356 }
357
358 this._refs[ref] = target;
359 } else {
360 await Promise.all(Object.values(n).map(visit));
361 }
362 }
363
364 return n;
365 };
366
367 return visit(schema);
368 }
369
370 _generateUnionRule(name, altSchemas) {
371 return altSchemas
372 .map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
373 .join(' | ');
374 }
375
376 _visitPattern(pattern, name) {
377 if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
378 throw new Error('Pattern must start with "^" and end with "$"');
379 }
380 pattern = pattern.slice(1, -1);
381 const subRuleIds = {};
382
383 let i = 0;
384 const length = pattern.length;
385
386 const getDot = () => {
387 let rule;
388 if (this._dotall) {
389 rule = '[\\U00000000-\\U0010FFFF]';
390 } else {
391 // Accept any character... except \n and \r line break chars (\x0A and \xOD)
392 rule = '[^\\x0A\\x0D]';
393 }
394 return this._addRule('dot', rule);
395 };
396
397
398 const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
399
400 const transform = () => {
401 const start = i;
402 // For each component of this sequence, store its string representation and whether it's a literal.
403 // We only need a flat structure here to apply repetition operators to the last item, and
404 // to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
405 // (GBNF's syntax is luckily very close to regular expressions!)
406 const seq = [];
407
408 const joinSeq = () => {
409 const ret = [];
410 for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
411 if (isLiteral) {
412 ret.push([[...g].map(x => x[0]).join(''), true]);
413 } else {
414 ret.push(...g);
415 }
416 }
417 if (ret.length === 1) {
418 return ret[0];
419 }
420 return [ret.map(x => toRule(x)).join(' '), false];
421 };
422
423 while (i < length) {
424 const c = pattern[i];
425 if (c === '.') {
426 seq.push([getDot(), false]);
427 i += 1;
428 } else if (c === '(') {
429 i += 1;
430 if (i < length) {
431 if (pattern[i] === '?') {
432 throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
433 }
434 }
435 seq.push([`(${toRule(transform())})`, false]);
436 } else if (c === ')') {
437 i += 1;
438 if (start <= 0 || pattern[start - 1] !== '(') {
439 throw new Error(`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`);
440 }
441 return joinSeq();
442 } else if (c === '[') {
443 let squareBrackets = c;
444 i += 1;
445 while (i < length && pattern[i] !== ']') {
446 if (pattern[i] === '\\') {
447 squareBrackets += pattern.slice(i, i + 2);
448 i += 2;
449 } else {
450 squareBrackets += pattern[i];
451 i += 1;
452 }
453 }
454 if (i >= length) {
455 throw new Error(`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
456 }
457 squareBrackets += ']';
458 i += 1;
459 seq.push([squareBrackets, false]);
460 } else if (c === '|') {
461 seq.push(['|', false]);
462 i += 1;
463 } else if (c === '*' || c === '+' || c === '?') {
464 seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
465 i += 1;
466 } else if (c === '{') {
467 let curlyBrackets = c;
468 i += 1;
469 while (i < length && pattern[i] !== '}') {
470 curlyBrackets += pattern[i];
471 i += 1;
472 }
473 if (i >= length) {
474 throw new Error(`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
475 }
476 curlyBrackets += '}';
477 i += 1;
478 const nums = curlyBrackets.slice(1, -1).split(',').map(s => s.trim());
479 let minTimes, maxTimes;
480 if (nums.length === 1) {
481 minTimes = parseInt(nums[0], 10);
482 maxTimes = minTimes;
483 } else {
484 if (nums.length !== 2) {
485 throw new Error(`Invalid quantifier ${curlyBrackets}`);
486 }
487 minTimes = nums[0] ? parseInt(nums[0], 10) : 0;
488 maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity;
489 }
490
491 let [sub, subIsLiteral] = seq[seq.length - 1];
492
493 if (!subIsLiteral) {
494 let id = subRuleIds[sub];
495 if (id === undefined) {
496 id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub);
497 subRuleIds[sub] = id;
498 }
499 sub = id;
500 }
501
502 seq[seq.length - 1] = [
503 _buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}),
504 false
505 ];
506 } else {
507 let literal = '';
508 while (i < length) {
509 if (pattern[i] === '\\' && i < length - 1) {
510 const next = pattern[i + 1];
511 if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
512 i += 1;
513 literal += pattern[i];
514 i += 1;
515 } else {
516 literal += pattern.slice(i, i + 2);
517 i += 2;
518 }
519 } else if (pattern[i] === '"') {
520 literal += '\\"';
521 i += 1;
522 } else if (!NON_LITERAL_SET.has(pattern[i]) &&
523 (i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
524 literal += pattern[i];
525 i += 1;
526 } else {
527 break;
528 }
529 }
530 if (literal !== '') {
531 seq.push([literal, true]);
532 }
533 }
534 }
535
536 return joinSeq();
537 };
538
539 return this._addRule(name, "\"\\\"\" (" + toRule(transform()) + ") \"\\\"\" space")
540 }
541
542 _notStrings(strings) {
543 class TrieNode {
544 constructor() {
545 this.children = {};
546 this.isEndOfString = false;
547 }
548
549 insert(str) {
550 let node = this;
551 for (const c of str) {
552 node = node.children[c] = node.children[c] || new TrieNode();
553 }
554 node.isEndOfString = true;
555 }
556 }
557
558 const trie = new TrieNode();
559 for (const s of strings) {
560 trie.insert(s);
561 }
562
563 const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
564 const out = ['["] ( '];
565
566 const visit = (node) => {
567 const rejects = [];
568 let first = true;
569 for (const c of Object.keys(node.children).sort()) {
570 const child = node.children[c];
571 rejects.push(c);
572 if (first) {
573 first = false;
574 } else {
575 out.push(' | ');
576 }
577 out.push(`[${c}]`);
578 if (Object.keys(child.children).length > 0) {
579 out.push(' (');
580 visit(child);
581 out.push(')');
582 } else if (child.isEndOfString) {
583 out.push(` ${charRuleName}+`);
584 }
585 }
586 if (Object.keys(node.children).length > 0) {
587 if (!first) {
588 out.push(' | ');
589 }
590 out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
591 }
592 };
593
594 visit(trie);
595
596 out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
597 return out.join('');
598 }
599
600 _resolveRef(ref) {
601 let refFragment = ref.split('#').pop();
602 let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
603 if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
604 this._refsBeingResolved.add(ref);
605 const resolved = this._refs[ref];
606 refName = this.visit(resolved, refName);
607 this._refsBeingResolved.delete(ref);
608 }
609 return refName;
610 }
611
612 _generateConstantRule(value) {
613 return this._formatLiteral(JSON.stringify(value));
614 }
615
616 visit(schema, name) {
617 const schemaType = schema.type;
618 const schemaFormat = schema.format;
619 const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
620
621 const ref = schema.$ref;
622 if (ref !== undefined) {
623 return this._addRule(ruleName, this._resolveRef(ref));
624 } else if (schema.oneOf || schema.anyOf) {
625 return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
626 } else if (Array.isArray(schemaType)) {
627 return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
628 } else if ('const' in schema) {
629 return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
630 } else if ('enum' in schema) {
631 const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
632 return this._addRule(ruleName, rule);
633 } else if ((schemaType === undefined || schemaType === 'object') &&
634 ('properties' in schema ||
635 ('additionalProperties' in schema && schema.additionalProperties !== true))) {
636 const required = new Set(schema.required || []);
637 const properties = Object.entries(schema.properties ?? {});
638 return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
639 } else if ((schemaType === undefined || schemaType === 'object' || schemaType === 'string') && 'allOf' in schema) {
640 const required = new Set();
641 const properties = [];
642 const enumSets = [];
643 const addComponent = (compSchema, isRequired) => {
644 const ref = compSchema.$ref;
645 if (ref !== undefined) {
646 compSchema = this._refs[ref];
647 }
648
649 if ('properties' in compSchema) {
650 for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
651 properties.push([propName, propSchema]);
652 if (isRequired) {
653 required.add(propName);
654 }
655 }
656 }
657
658 if ('enum' in compSchema) {
659 enumSets.push(new Set(compSchema.enum || []));
660 }
661 };
662
663 for (const t of schema.allOf) {
664 if ('anyOf' in t) {
665 for (const tt of t.anyOf) {
666 addComponent(tt, false);
667 }
668 } else {
669 addComponent(t, true);
670 }
671 }
672
673 if (enumSets.length > 0) {
674 const enumIntersection = new Set([...enumSets[0]].filter(v => enumSets.every(s => s.has(v))));
675 if (enumIntersection.size > 0) {
676 const sortedEnums = [...enumIntersection].sort((a, b) => a.localeCompare(b));
677 const rule = '(' + sortedEnums.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
678 return this._addRule(ruleName, rule);
679 }
680 }
681 return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
682 } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
683 const items = schema.items ?? schema.prefixItems;
684 if (Array.isArray(items)) {
685 return this._addRule(
686 ruleName,
687 '"[" space ' +
688 items.map((item, i) => this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`)).join(' "," space ') +
689 ' "]" space'
690 );
691 } else {
692 const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
693 const minItems = schema.minItems || 0;
694 const maxItems = schema.maxItems;
695 return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space');
696 }
697 } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
698 return this._visitPattern(schema.pattern, ruleName);
699 } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
700 return this._addPrimitive(
701 ruleName === 'root' ? 'root' : schemaFormat,
702 PRIMITIVE_RULES['uuid']
703 );
704 } else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
705 const primName = `${schema.format}-string`
706 return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
707 } else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) {
708 const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
709 const minLen = schema.minLength || 0;
710 const maxLen = schema.maxLength;
711 return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
712 } else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
713 let minValue = null;
714 let maxValue = null;
715 if ('minimum' in schema) {
716 minValue = schema.minimum;
717 } else if ('exclusiveMinimum' in schema) {
718 minValue = schema.exclusiveMinimum + 1;
719 }
720 if ('maximum' in schema) {
721 maxValue = schema.maximum;
722 } else if ('exclusiveMaximum' in schema) {
723 maxValue = schema.exclusiveMaximum - 1;
724 }
725
726 const out = ["("];
727 _generateMinMaxInt(minValue, maxValue, out);
728 out.push(") space");
729 return this._addRule(ruleName, out.join(''));
730 } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
731 return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
732 } else {
733 if (!(schemaType in PRIMITIVE_RULES)) {
734 throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
735 }
736 // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
737 return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
738 }
739 }
740
741 _addPrimitive(name, rule) {
742 let n = this._addRule(name, rule.content);
743 for (const dep of rule.deps) {
744 const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
745 if (!depRule) {
746 throw new Error(`Rule ${dep} not known`);
747 }
748 if (!(dep in this._rules)) {
749 this._addPrimitive(dep, depRule);
750 }
751 }
752 return n;
753 }
754
755 _buildObjectRule(properties, required, name, additionalProperties) {
756 const propOrder = this._propOrder;
757 // sort by position in prop_order (if specified) then by original order
758 const sortedProps = properties.map(([k]) => k).sort((a, b) => {
759 const orderA = propOrder[a] || Infinity;
760 const orderB = propOrder[b] || Infinity;
761 return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
762 });
763
764 const propKvRuleNames = {};
765 for (const [propName, propSchema] of properties) {
766 const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
767 propKvRuleNames[propName] = this._addRule(
768 `${name ?? ''}${name ? '-' : ''}${propName}-kv`,
769 `${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
770 );
771 }
772 const requiredProps = sortedProps.filter(k => required.has(k));
773 const optionalProps = sortedProps.filter(k => !required.has(k));
774
775 if (additionalProperties) {
776 const subName = `${name ?? ''}${name ? '-' : ''}additional`;
777 const valueRule =
778 additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
779 : this._addPrimitive('value', PRIMITIVE_RULES['value']);
780
781 const key_rule =
782 sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
783 : this._addRule(`${subName}-k`, this._notStrings(sortedProps));
784
785 propKvRuleNames['*'] = this._addRule(
786 `${subName}-kv`,
787 `${key_rule} ":" space ${valueRule}`);
788 optionalProps.push('*');
789 }
790
791 let rule = '"{" space ';
792 rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
793
794 if (optionalProps.length > 0) {
795 rule += ' (';
796 if (requiredProps.length > 0) {
797 rule += ' "," space ( ';
798 }
799
800 const getRecursiveRefs = (ks, firstIsOptional) => {
801 const [k, ...rest] = ks;
802 const kvRuleName = propKvRuleNames[k];
803 let res;
804 const commaRef = `( "," space ${kvRuleName} )`;
805 if (firstIsOptional) {
806 res = commaRef + (k === '*' ? '*' : '?');
807 } else {
808 res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
809 }
810 if (rest.length > 0) {
811 res += ' ' + this._addRule(
812 `${name ?? ''}${name ? '-' : ''}${k}-rest`,
813 getRecursiveRefs(rest, true)
814 );
815 }
816 return res;
817 };
818
819 rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
820 if (requiredProps.length > 0) {
821 rule += ' )';
822 }
823 rule += ' )?';
824 }
825
826 rule += ' "}" space';
827
828 return rule;
829 }
830
831 formatGrammar() {
832 let grammar = '';
833 for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) => a.localeCompare(b))) {
834 grammar += `${name} ::= ${rule}\n`;
835 }
836 return grammar;
837 }
838}
839
840// Helper function to group elements by a key function
841function* groupBy(iterable, keyFn) {
842 let lastKey = null;
843 let group = [];
844 for (const element of iterable) {
845 const key = keyFn(element);
846 if (lastKey !== null && key !== lastKey) {
847 yield [lastKey, group];
848 group = [];
849 }
850 group.push(element);
851 lastKey = key;
852 }
853 if (group.length > 0) {
854 yield [lastKey, group];
855 }
856}