1// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
  2const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
  3
  4function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
  5  if (maxItems == 0) {
  6    return '';
  7  }
  8  if (minItems === 0 && maxItems === 1) {
  9    return `${itemRule}?`;
 10  }
 11
 12
 13  const separatorRule = opts.separatorRule ?? '';
 14  const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
 15
 16  if (separatorRule === '') {
 17    if (minItems === 1 && maxItems === undefined) {
 18      return `${itemRule}+`;
 19    } else if (minItems === 0 && maxItems === undefined) {
 20      return `${itemRule}*`;
 21    } else {
 22      return `${itemRule}{${minItems},${maxItems !== undefined ? maxItems : ''}}`;
 23    }
 24  }
 25
 26  const result = itemRule + ' ' + _buildRepetition(`(${separatorRule} ${itemRule})`, minItems > 0 ? minItems - 1 : 0, maxItems !== undefined ? maxItems - 1 : undefined);
 27  return minItems === 0 ? `(${result})?` : result;
 28}
 29
 30function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
 31  const hasMin = minValue !== null;
 32  const hasMax = maxValue !== null;
 33
 34  function digitRange(fromChar, toChar) {
 35      out.push("[");
 36      if (fromChar === toChar) {
 37          out.push(fromChar);
 38      } else {
 39          out.push(fromChar);
 40          out.push("-");
 41          out.push(toChar);
 42      }
 43      out.push("]");
 44  }
 45
 46  function moreDigits(minDigits, maxDigits) {
 47      out.push("[0-9]");
 48      if (minDigits === maxDigits && minDigits === 1) {
 49          return;
 50      }
 51      out.push("{");
 52      out.push(minDigits.toString());
 53      if (maxDigits !== minDigits) {
 54          out.push(",");
 55          if (maxDigits !== Number.MAX_SAFE_INTEGER) {
 56              out.push(maxDigits.toString());
 57          }
 58      }
 59      out.push("}");
 60  }
 61
 62  function uniformRange(fromStr, toStr) {
 63      let i = 0;
 64      while (i < fromStr.length && fromStr[i] === toStr[i]) {
 65          i++;
 66      }
 67      if (i > 0) {
 68          out.push("\"");
 69          out.push(fromStr.slice(0, i));
 70          out.push("\"");
 71      }
 72      if (i < fromStr.length) {
 73          if (i > 0) {
 74              out.push(" ");
 75          }
 76          const subLen = fromStr.length - i - 1;
 77          if (subLen > 0) {
 78              const fromSub = fromStr.slice(i + 1);
 79              const toSub = toStr.slice(i + 1);
 80              const subZeros = "0".repeat(subLen);
 81              const subNines = "9".repeat(subLen);
 82
 83              let toReached = false;
 84              out.push("(");
 85              if (fromSub === subZeros) {
 86                  digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
 87                  out.push(" ");
 88                  moreDigits(subLen, subLen);
 89              } else {
 90                  out.push("[");
 91                  out.push(fromStr[i]);
 92                  out.push("] ");
 93                  out.push("(");
 94                  uniformRange(fromSub, subNines);
 95                  out.push(")");
 96                  if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
 97                      out.push(" | ");
 98                      if (toSub === subNines) {
 99                          digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
100                          toReached = true;
101                      } else {
102                          digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
103                      }
104                      out.push(" ");
105                      moreDigits(subLen, subLen);
106                  }
107              }
108              if (!toReached) {
109                  out.push(" | ");
110                  digitRange(toStr[i], toStr[i]);
111                  out.push(" ");
112                  uniformRange(subZeros, toSub);
113              }
114              out.push(")");
115          } else {
116              out.push("[");
117              out.push(fromStr[i]);
118              out.push("-");
119              out.push(toStr[i]);
120              out.push("]");
121          }
122      }
123  }
124
125  if (hasMin && hasMax) {
126      if (minValue < 0 && maxValue < 0) {
127          out.push("\"-\" (");
128          _generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
129          out.push(")");
130          return;
131      }
132
133      if (minValue < 0) {
134          out.push("\"-\" (");
135          _generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
136          out.push(") | ");
137          minValue = 0;
138      }
139
140      let minS = minValue.toString();
141      const maxS = maxValue.toString();
142      const minDigits = minS.length;
143      const maxDigits = maxS.length;
144
145      for (let digits = minDigits; digits < maxDigits; digits++) {
146          uniformRange(minS, "9".repeat(digits));
147          minS = "1" + "0".repeat(digits);
148          out.push(" | ");
149      }
150      uniformRange(minS, maxS);
151      return;
152  }
153
154  const lessDecimals = Math.max(decimalsLeft - 1, 1);
155
156  if (hasMin) {
157      if (minValue < 0) {
158          out.push("\"-\" (");
159          _generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
160          out.push(") | [0] | [1-9] ");
161          moreDigits(0, decimalsLeft - 1);
162      } else if (minValue === 0) {
163          if (topLevel) {
164              out.push("[0] | [1-9] ");
165              moreDigits(0, lessDecimals);
166          } else {
167              moreDigits(1, decimalsLeft);
168          }
169      } else if (minValue <= 9) {
170          const c = minValue.toString();
171          const range_start = topLevel ? '1' : '0';
172          if (c > range_start) {
173              digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
174              out.push(" ");
175              moreDigits(1, lessDecimals);
176              out.push(" | ");
177          }
178          digitRange(c, "9");
179          out.push(" ");
180          moreDigits(0, lessDecimals);
181      } else {
182          const minS = minValue.toString();
183          const length = minS.length;
184          const c = minS[0];
185
186          if (c > "1") {
187              digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
188              out.push(" ");
189              moreDigits(length, lessDecimals);
190              out.push(" | ");
191          }
192          digitRange(c, c);
193          out.push(" (");
194          _generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
195          out.push(")");
196          if (c < "9") {
197              out.push(" | ");
198              digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
199              out.push(" ");
200              moreDigits(length - 1, lessDecimals);
201          }
202      }
203      return;
204  }
205
206  if (hasMax) {
207      if (maxValue >= 0) {
208          if (topLevel) {
209              out.push("\"-\" [1-9] ");
210              moreDigits(0, lessDecimals);
211              out.push(" | ");
212          }
213          _generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
214      } else {
215          out.push("\"-\" (");
216          _generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
217          out.push(")");
218      }
219      return;
220  }
221
222  throw new Error("At least one of minValue or maxValue must be set");
223}
224
225class BuiltinRule {
226  constructor(content, deps) {
227    this.content = content;
228    this.deps = deps || [];
229  }
230}
231
232const PRIMITIVE_RULES = {
233  boolean        : new BuiltinRule('("true" | "false") space', []),
234  'decimal-part' : new BuiltinRule('[0-9]{1,16}', []),
235  'integral-part': new BuiltinRule('[0] | [1-9] [0-9]{0,15}', []),
236  number         : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
237  integer        : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
238  value          : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
239  object         : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
240  array          : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
241  uuid           : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []),
242  char           : new BuiltinRule(`[^"\\\\\\x7F\\x00-\\x1F] | [\\\\] (["\\\\bfnrt] | "u" [0-9a-fA-F]{4})`, []),
243  string         : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
244  null           : new BuiltinRule('"null" space', []),
245};
246
247// TODO: support "uri", "email" string formats
248const STRING_FORMAT_RULES = {
249  'date'            : new BuiltinRule('[0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
250  'time'            : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
251  'date-time'       : new BuiltinRule('date "T" time', ['date', 'time']),
252  'date-string'     : new BuiltinRule('"\\"" date "\\"" space', ['date']),
253  'time-string'     : new BuiltinRule('"\\"" time "\\"" space', ['time']),
254  'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
255}
256
257const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
258
259const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
260const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"\\]/g;
261const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
262const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]', '\\': '\\\\' };
263
264const NON_LITERAL_SET = new Set('|.()[]{}*+?');
265const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('^$.[]()|{}*+?');
266
267export class SchemaConverter {
268  constructor(options) {
269    this._propOrder = options.prop_order || {};
270    this._allowFetch = options.allow_fetch || false;
271    this._dotall = options.dotall || false;
272    this._rules = {'space': SPACE_RULE};
273    this._refs = {};
274    this._refsBeingResolved = new Set();
275  }
276
277  _formatLiteral(literal) {
278    const escaped = literal.replace(
279      GRAMMAR_LITERAL_ESCAPE_RE,
280      m => GRAMMAR_LITERAL_ESCAPES[m]
281    );
282    return `"${escaped}"`;
283  }
284
285  _formatRangeChar(literal) {
286    return JSON.stringify(literal).slice(1, -1).replace(
287      GRAMMAR_RANGE_LITERAL_ESCAPE_RE,
288      m => GRAMMAR_LITERAL_ESCAPES[m]
289    );
290  }
291
292  _addRule(name, rule) {
293    let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
294    let key = escName;
295
296    if (escName in this._rules) {
297      if (this._rules[escName] === rule) {
298        return key;
299      }
300
301      let i = 0;
302      while ((`${escName}${i}` in this._rules) && (this._rules[`${escName}${i}`] !== rule)) {
303        i += 1;
304      }
305      key = `${escName}${i}`;
306    }
307
308    this._rules[key] = rule;
309    return key;
310  }
311
312  async resolveRefs(schema, url) {
313    const visit = async (n) => {
314      if (Array.isArray(n)) {
315        return Promise.all(n.map(visit));
316      } else if (typeof n === 'object' && n !== null) {
317        let ref = n.$ref;
318        let target;
319        if (ref !== undefined && !this._refs[ref]) {
320          if (ref.startsWith('https://')) {
321            if (!this._allowFetch) {
322              throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
323            }
324            const fetch = (await import('node-fetch')).default;
325
326            const fragSplit = ref.split('#');
327            const baseUrl = fragSplit[0];
328
329            target = this._refs[baseUrl];
330            if (!target) {
331              target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
332              this._refs[baseUrl] = target;
333            }
334
335            if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
336              return target;
337            }
338          } else if (ref.startsWith('#/')) {
339            target = schema;
340            ref = `${url}${ref}`;
341            n.$ref = ref;
342          } else {
343            throw new Error(`Unsupported ref ${ref}`);
344          }
345
346          const selectors = ref.split('#')[1].split('/').slice(1);
347          for (const sel of selectors) {
348            const selIndex = parseInt(sel, 10);
349            if (target && sel in target) {
350              target = target[sel];
351            } else if (target && selIndex in target) {
352              target = target[selIndex];
353            } else {
354              throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
355            }
356          }
357
358          this._refs[ref] = target;
359        } else {
360          await Promise.all(Object.values(n).map(visit));
361        }
362      }
363
364      return n;
365    };
366
367    return visit(schema);
368  }
369
370  _generateUnionRule(name, altSchemas) {
371    return altSchemas
372      .map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
373      .join(' | ');
374  }
375
376  _visitPattern(pattern, name) {
377    if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
378      throw new Error('Pattern must start with "^" and end with "$"');
379    }
380    pattern = pattern.slice(1, -1);
381    const subRuleIds = {};
382
383    let i = 0;
384    const length = pattern.length;
385
386    const getDot = () => {
387      let rule;
388      if (this._dotall) {
389        rule = '[\\U00000000-\\U0010FFFF]';
390      } else {
391        // Accept any character... except \n and \r line break chars (\x0A and \xOD)
392        rule = '[^\\x0A\\x0D]';
393      }
394      return this._addRule('dot', rule);
395    };
396
397
398    const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
399
400    const transform = () => {
401      const start = i;
402      // For each component of this sequence, store its string representation and whether it's a literal.
403      // We only need a flat structure here to apply repetition operators to the last item, and
404      // to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
405      // (GBNF's syntax is luckily very close to regular expressions!)
406      const seq = [];
407
408      const joinSeq = () => {
409        const ret = [];
410        for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
411          if (isLiteral) {
412            ret.push([[...g].map(x => x[0]).join(''), true]);
413          } else {
414            ret.push(...g);
415          }
416        }
417        if (ret.length === 1) {
418          return ret[0];
419        }
420        return [ret.map(x => toRule(x)).join(' '), false];
421      };
422
423      while (i < length) {
424        const c = pattern[i];
425        if (c === '.') {
426          seq.push([getDot(), false]);
427          i += 1;
428        } else if (c === '(') {
429          i += 1;
430          if (i < length) {
431            if (pattern[i] === '?') {
432              throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
433            }
434          }
435          seq.push([`(${toRule(transform())})`, false]);
436        } else if (c === ')') {
437          i += 1;
438          if (start <= 0 || pattern[start - 1] !== '(') {
439            throw new Error(`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`);
440          }
441          return joinSeq();
442        } else if (c === '[') {
443          let squareBrackets = c;
444          i += 1;
445          while (i < length && pattern[i] !== ']') {
446            if (pattern[i] === '\\') {
447              squareBrackets += pattern.slice(i, i + 2);
448              i += 2;
449            } else {
450              squareBrackets += pattern[i];
451              i += 1;
452            }
453          }
454          if (i >= length) {
455            throw new Error(`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
456          }
457          squareBrackets += ']';
458          i += 1;
459          seq.push([squareBrackets, false]);
460        } else if (c === '|') {
461          seq.push(['|', false]);
462          i += 1;
463        } else if (c === '*' || c === '+' || c === '?') {
464          seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
465          i += 1;
466        } else if (c === '{') {
467          let curlyBrackets = c;
468          i += 1;
469          while (i < length && pattern[i] !== '}') {
470            curlyBrackets += pattern[i];
471            i += 1;
472          }
473          if (i >= length) {
474            throw new Error(`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
475          }
476          curlyBrackets += '}';
477          i += 1;
478          const nums = curlyBrackets.slice(1, -1).split(',').map(s => s.trim());
479          let minTimes, maxTimes;
480          if (nums.length === 1) {
481            minTimes = parseInt(nums[0], 10);
482            maxTimes = minTimes;
483          } else {
484            if (nums.length !== 2) {
485              throw new Error(`Invalid quantifier ${curlyBrackets}`);
486            }
487            minTimes = nums[0] ? parseInt(nums[0], 10) : 0;
488            maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity;
489          }
490
491          let [sub, subIsLiteral] = seq[seq.length - 1];
492
493          if (!subIsLiteral) {
494            let id = subRuleIds[sub];
495            if (id === undefined) {
496              id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub);
497              subRuleIds[sub] = id;
498            }
499            sub = id;
500          }
501
502          seq[seq.length - 1] = [
503            _buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}),
504            false
505          ];
506        } else {
507          let literal = '';
508          while (i < length) {
509            if (pattern[i] === '\\' && i < length - 1) {
510              const next = pattern[i + 1];
511              if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
512                i += 1;
513                literal += pattern[i];
514                i += 1;
515              } else {
516                literal += pattern.slice(i, i + 2);
517                i += 2;
518              }
519            } else if (pattern[i] === '"') {
520              literal += '\\"';
521              i += 1;
522            } else if (!NON_LITERAL_SET.has(pattern[i]) &&
523                (i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
524              literal += pattern[i];
525              i += 1;
526            } else {
527              break;
528            }
529          }
530          if (literal !== '') {
531            seq.push([literal, true]);
532          }
533        }
534      }
535
536      return joinSeq();
537    };
538
539    return this._addRule(name, "\"\\\"\" (" + toRule(transform()) + ") \"\\\"\" space")
540  }
541
542  _notStrings(strings) {
543    class TrieNode {
544      constructor() {
545        this.children = {};
546        this.isEndOfString = false;
547      }
548
549      insert(str) {
550        let node = this;
551        for (const c of str) {
552          node = node.children[c] = node.children[c] || new TrieNode();
553        }
554        node.isEndOfString = true;
555      }
556    }
557
558    const trie = new TrieNode();
559    for (const s of strings) {
560      trie.insert(s);
561    }
562
563    const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
564    const out = ['["] ( '];
565
566    const visit = (node) => {
567      const rejects = [];
568      let first = true;
569      for (const c of Object.keys(node.children).sort()) {
570        const child = node.children[c];
571        rejects.push(c);
572        if (first) {
573          first = false;
574        } else {
575          out.push(' | ');
576        }
577        out.push(`[${c}]`);
578        if (Object.keys(child.children).length > 0) {
579          out.push(' (');
580          visit(child);
581          out.push(')');
582        } else if (child.isEndOfString) {
583          out.push(` ${charRuleName}+`);
584        }
585      }
586      if (Object.keys(node.children).length > 0) {
587        if (!first) {
588          out.push(' | ');
589        }
590        out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
591      }
592    };
593
594    visit(trie);
595
596    out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
597    return out.join('');
598  }
599
600  _resolveRef(ref) {
601    let refFragment = ref.split('#').pop();
602    let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
603    if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
604      this._refsBeingResolved.add(ref);
605      const resolved = this._refs[ref];
606      refName = this.visit(resolved, refName);
607      this._refsBeingResolved.delete(ref);
608    }
609    return refName;
610  }
611
612  _generateConstantRule(value) {
613    return this._formatLiteral(JSON.stringify(value));
614  }
615
616  visit(schema, name) {
617    const schemaType = schema.type;
618    const schemaFormat = schema.format;
619    const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
620
621    const ref = schema.$ref;
622    if (ref !== undefined) {
623      return this._addRule(ruleName, this._resolveRef(ref));
624    } else if (schema.oneOf || schema.anyOf) {
625      return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
626    } else if (Array.isArray(schemaType)) {
627      return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
628    } else if ('const' in schema) {
629      return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
630    } else if ('enum' in schema) {
631      const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
632      return this._addRule(ruleName, rule);
633    } else if ((schemaType === undefined || schemaType === 'object') &&
634               ('properties' in schema ||
635                ('additionalProperties' in schema && schema.additionalProperties !== true))) {
636      const required = new Set(schema.required || []);
637      const properties = Object.entries(schema.properties ?? {});
638      return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
639    } else if ((schemaType === undefined || schemaType === 'object' || schemaType === 'string') && 'allOf' in schema) {
640      const required = new Set();
641      const properties = [];
642      const enumSets = [];
643      const addComponent = (compSchema, isRequired) => {
644        const ref = compSchema.$ref;
645        if (ref !== undefined) {
646          compSchema = this._refs[ref];
647        }
648
649        if ('properties' in compSchema) {
650          for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
651            properties.push([propName, propSchema]);
652            if (isRequired) {
653              required.add(propName);
654            }
655          }
656        }
657
658        if ('enum' in compSchema) {
659          enumSets.push(new Set(compSchema.enum || []));
660        }
661      };
662
663      for (const t of schema.allOf) {
664        if ('anyOf' in t) {
665          for (const tt of t.anyOf) {
666            addComponent(tt, false);
667          }
668        } else {
669          addComponent(t, true);
670        }
671      }
672
673      if (enumSets.length > 0) {
674        const enumIntersection = new Set([...enumSets[0]].filter(v => enumSets.every(s => s.has(v))));
675        if (enumIntersection.size > 0) {
676          const sortedEnums = [...enumIntersection].sort((a, b) => a.localeCompare(b));
677          const rule = '(' + sortedEnums.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
678          return this._addRule(ruleName, rule);
679        }
680      }
681      return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
682    } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
683      const items = schema.items ?? schema.prefixItems;
684      if (Array.isArray(items)) {
685        return this._addRule(
686          ruleName,
687          '"[" space ' +
688            items.map((item, i) => this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`)).join(' "," space ') +
689            ' "]" space'
690        );
691      } else {
692        const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
693        const minItems = schema.minItems || 0;
694        const maxItems = schema.maxItems;
695        return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space');
696      }
697    } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
698      return this._visitPattern(schema.pattern, ruleName);
699    } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
700      return this._addPrimitive(
701        ruleName === 'root' ? 'root' : schemaFormat,
702        PRIMITIVE_RULES['uuid']
703      );
704    } else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
705      const primName = `${schema.format}-string`
706      return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
707    } else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) {
708      const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
709      const minLen = schema.minLength || 0;
710      const maxLen = schema.maxLength;
711      return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
712    } else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
713      let minValue = null;
714      let maxValue = null;
715      if ('minimum' in schema) {
716        minValue = schema.minimum;
717      } else if ('exclusiveMinimum' in schema) {
718        minValue = schema.exclusiveMinimum + 1;
719      }
720      if ('maximum' in schema) {
721        maxValue = schema.maximum;
722      } else if ('exclusiveMaximum' in schema) {
723        maxValue = schema.exclusiveMaximum - 1;
724      }
725
726      const out = ["("];
727      _generateMinMaxInt(minValue, maxValue, out);
728      out.push(") space");
729      return this._addRule(ruleName, out.join(''));
730    } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
731      return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
732    } else {
733      if (!(schemaType in PRIMITIVE_RULES)) {
734        throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
735      }
736      // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
737      return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
738    }
739  }
740
741  _addPrimitive(name, rule) {
742    let n = this._addRule(name, rule.content);
743    for (const dep of rule.deps) {
744      const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
745      if (!depRule) {
746        throw new Error(`Rule ${dep} not known`);
747      }
748      if (!(dep in this._rules)) {
749        this._addPrimitive(dep, depRule);
750      }
751    }
752    return n;
753  }
754
755  _buildObjectRule(properties, required, name, additionalProperties) {
756    const propOrder = this._propOrder;
757    // sort by position in prop_order (if specified) then by original order
758    const sortedProps = properties.map(([k]) => k).sort((a, b) => {
759      const orderA = propOrder[a] || Infinity;
760      const orderB = propOrder[b] || Infinity;
761      return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
762    });
763
764    const propKvRuleNames = {};
765    for (const [propName, propSchema] of properties) {
766      const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
767      propKvRuleNames[propName] = this._addRule(
768        `${name ?? ''}${name ? '-' : ''}${propName}-kv`,
769        `${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
770      );
771    }
772    const requiredProps = sortedProps.filter(k => required.has(k));
773    const optionalProps = sortedProps.filter(k => !required.has(k));
774
775    if (additionalProperties) {
776      const subName = `${name ?? ''}${name ? '-' : ''}additional`;
777      const valueRule =
778        additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
779        : this._addPrimitive('value', PRIMITIVE_RULES['value']);
780
781      const key_rule =
782        sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
783        : this._addRule(`${subName}-k`, this._notStrings(sortedProps));
784
785      propKvRuleNames['*'] = this._addRule(
786        `${subName}-kv`,
787        `${key_rule} ":" space ${valueRule}`);
788      optionalProps.push('*');
789    }
790
791    let rule = '"{" space ';
792    rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
793
794    if (optionalProps.length > 0) {
795      rule += ' (';
796      if (requiredProps.length > 0) {
797        rule += ' "," space ( ';
798      }
799
800      const getRecursiveRefs = (ks, firstIsOptional) => {
801        const [k, ...rest] = ks;
802        const kvRuleName = propKvRuleNames[k];
803        let res;
804        const commaRef = `( "," space ${kvRuleName} )`;
805        if (firstIsOptional) {
806          res = commaRef + (k === '*' ? '*' : '?');
807        } else {
808          res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
809        }
810        if (rest.length > 0) {
811          res += ' ' + this._addRule(
812            `${name ?? ''}${name ? '-' : ''}${k}-rest`,
813            getRecursiveRefs(rest, true)
814          );
815        }
816        return res;
817      };
818
819      rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
820      if (requiredProps.length > 0) {
821        rule += ' )';
822      }
823      rule += ' )?';
824    }
825
826    rule += ' "}" space';
827
828    return rule;
829  }
830
831  formatGrammar() {
832    let grammar = '';
833    for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) => a.localeCompare(b))) {
834      grammar += `${name} ::= ${rule}\n`;
835    }
836    return grammar;
837  }
838}
839
840// Helper function to group elements by a key function
841function* groupBy(iterable, keyFn) {
842  let lastKey = null;
843  let group = [];
844  for (const element of iterable) {
845    const key = keyFn(element);
846    if (lastKey !== null && key !== lastKey) {
847      yield [lastKey, group];
848      group = [];
849    }
850    group.push(element);
851    lastKey = key;
852  }
853  if (group.length > 0) {
854    yield [lastKey, group];
855  }
856}