1from __future__ import annotations
2
3import inspect
4import json
5import re
6from copy import copy
7from enum import Enum
8from inspect import getdoc, isclass
9from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union, get_args, get_origin, get_type_hints
10
11from docstring_parser import parse
12from pydantic import BaseModel, create_model
13
14if TYPE_CHECKING:
15 from types import GenericAlias
16else:
17 # python 3.8 compat
18 from typing import _GenericAlias as GenericAlias
19
20# TODO: fix this
21# pyright: reportAttributeAccessIssue=information
22
23
24class PydanticDataType(Enum):
25 """
26 Defines the data types supported by the grammar_generator.
27
28 Attributes:
29 STRING (str): Represents a string data type.
30 BOOLEAN (str): Represents a boolean data type.
31 INTEGER (str): Represents an integer data type.
32 FLOAT (str): Represents a float data type.
33 OBJECT (str): Represents an object data type.
34 ARRAY (str): Represents an array data type.
35 ENUM (str): Represents an enum data type.
36 CUSTOM_CLASS (str): Represents a custom class data type.
37 """
38
39 STRING = "string"
40 TRIPLE_QUOTED_STRING = "triple_quoted_string"
41 MARKDOWN_CODE_BLOCK = "markdown_code_block"
42 BOOLEAN = "boolean"
43 INTEGER = "integer"
44 FLOAT = "float"
45 OBJECT = "object"
46 ARRAY = "array"
47 ENUM = "enum"
48 ANY = "any"
49 NULL = "null"
50 CUSTOM_CLASS = "custom-class"
51 CUSTOM_DICT = "custom-dict"
52 SET = "set"
53
54
55def map_pydantic_type_to_gbnf(pydantic_type: type[Any]) -> str:
56 origin_type = get_origin(pydantic_type)
57 origin_type = pydantic_type if origin_type is None else origin_type
58
59 if isclass(origin_type) and issubclass(origin_type, str):
60 return PydanticDataType.STRING.value
61 elif isclass(origin_type) and issubclass(origin_type, bool):
62 return PydanticDataType.BOOLEAN.value
63 elif isclass(origin_type) and issubclass(origin_type, int):
64 return PydanticDataType.INTEGER.value
65 elif isclass(origin_type) and issubclass(origin_type, float):
66 return PydanticDataType.FLOAT.value
67 elif isclass(origin_type) and issubclass(origin_type, Enum):
68 return PydanticDataType.ENUM.value
69
70 elif isclass(origin_type) and issubclass(origin_type, BaseModel):
71 return format_model_and_field_name(origin_type.__name__)
72 elif origin_type is list:
73 element_type = get_args(pydantic_type)[0]
74 return f"{map_pydantic_type_to_gbnf(element_type)}-list"
75 elif origin_type is set:
76 element_type = get_args(pydantic_type)[0]
77 return f"{map_pydantic_type_to_gbnf(element_type)}-set"
78 elif origin_type is Union:
79 union_types = get_args(pydantic_type)
80 union_rules = [map_pydantic_type_to_gbnf(ut) for ut in union_types]
81 return f"union-{'-or-'.join(union_rules)}"
82 elif origin_type is Optional:
83 element_type = get_args(pydantic_type)[0]
84 return f"optional-{map_pydantic_type_to_gbnf(element_type)}"
85 elif isclass(origin_type):
86 return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(origin_type.__name__)}"
87 elif origin_type is dict:
88 key_type, value_type = get_args(pydantic_type)
89 return f"custom-dict-key-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(key_type))}-value-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(value_type))}"
90 else:
91 return "unknown"
92
93
94def format_model_and_field_name(model_name: str) -> str:
95 parts = re.findall("[A-Z][^A-Z]*", model_name)
96 if not parts: # Check if the list is empty
97 return model_name.lower().replace("_", "-")
98 return "-".join(part.lower().replace("_", "-") for part in parts)
99
100
101def generate_list_rule(element_type):
102 """
103 Generate a GBNF rule for a list of a given element type.
104
105 :param element_type: The type of the elements in the list (e.g., 'string').
106 :return: A string representing the GBNF rule for a list of the given type.
107 """
108 rule_name = f"{map_pydantic_type_to_gbnf(element_type)}-list"
109 element_rule = map_pydantic_type_to_gbnf(element_type)
110 list_rule = rf'{rule_name} ::= "[" {element_rule} ("," {element_rule})* "]"'
111 return list_rule
112
113
114def get_members_structure(cls, rule_name):
115 if issubclass(cls, Enum):
116 # Handle Enum types
117 members = [f'"\\"{member.value}\\""' for name, member in cls.__members__.items()]
118 return f"{cls.__name__.lower()} ::= " + " | ".join(members)
119 if cls.__annotations__ and cls.__annotations__ != {}:
120 result = f'{rule_name} ::= "{{"'
121 # Modify this comprehension
122 members = [
123 f' "\\"{name}\\"" ":" {map_pydantic_type_to_gbnf(param_type)}'
124 for name, param_type in get_type_hints(cls).items()
125 if name != "self"
126 ]
127
128 result += '"," '.join(members)
129 result += ' "}"'
130 return result
131 if rule_name == "custom-class-any":
132 result = f"{rule_name} ::= "
133 result += "value"
134 return result
135
136 init_signature = inspect.signature(cls.__init__)
137 parameters = init_signature.parameters
138 result = f'{rule_name} ::= "{{"'
139 # Modify this comprehension too
140 members = [
141 f' "\\"{name}\\"" ":" {map_pydantic_type_to_gbnf(param.annotation)}'
142 for name, param in parameters.items()
143 if name != "self" and param.annotation != inspect.Parameter.empty
144 ]
145
146 result += '", "'.join(members)
147 result += ' "}"'
148 return result
149
150
151def regex_to_gbnf(regex_pattern: str) -> str:
152 """
153 Translate a basic regex pattern to a GBNF rule.
154 Note: This function handles only a subset of simple regex patterns.
155 """
156 gbnf_rule = regex_pattern
157
158 # Translate common regex components to GBNF
159 gbnf_rule = gbnf_rule.replace("\\d", "[0-9]")
160 gbnf_rule = gbnf_rule.replace("\\s", "[ \t\n]")
161
162 # Handle quantifiers and other regex syntax that is similar in GBNF
163 # (e.g., '*', '+', '?', character classes)
164
165 return gbnf_rule
166
167
168def generate_gbnf_integer_rules(max_digit=None, min_digit=None):
169 """
170
171 Generate GBNF Integer Rules
172
173 Generates GBNF (Generalized Backus-Naur Form) rules for integers based on the given maximum and minimum digits.
174
175 Parameters:
176 max_digit (int): The maximum number of digits for the integer. Default is None.
177 min_digit (int): The minimum number of digits for the integer. Default is None.
178
179 Returns:
180 integer_rule (str): The identifier for the integer rule generated.
181 additional_rules (list): A list of additional rules generated based on the given maximum and minimum digits.
182
183 """
184 additional_rules = []
185
186 # Define the rule identifier based on max_digit and min_digit
187 integer_rule = "integer-part"
188 if max_digit is not None:
189 integer_rule += f"-max{max_digit}"
190 if min_digit is not None:
191 integer_rule += f"-min{min_digit}"
192
193 # Handling Integer Rules
194 if max_digit is not None or min_digit is not None:
195 # Start with an empty rule part
196 integer_rule_part = ""
197
198 # Add mandatory digits as per min_digit
199 if min_digit is not None:
200 integer_rule_part += "[0-9] " * min_digit
201
202 # Add optional digits up to max_digit
203 if max_digit is not None:
204 optional_digits = max_digit - (min_digit if min_digit is not None else 0)
205 integer_rule_part += "".join(["[0-9]? " for _ in range(optional_digits)])
206
207 # Trim the rule part and append it to additional rules
208 integer_rule_part = integer_rule_part.strip()
209 if integer_rule_part:
210 additional_rules.append(f"{integer_rule} ::= {integer_rule_part}")
211
212 return integer_rule, additional_rules
213
214
215def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None):
216 """
217 Generate GBNF float rules based on the given constraints.
218
219 :param max_digit: Maximum number of digits in the integer part (default: None)
220 :param min_digit: Minimum number of digits in the integer part (default: None)
221 :param max_precision: Maximum number of digits in the fractional part (default: None)
222 :param min_precision: Minimum number of digits in the fractional part (default: None)
223 :return: A tuple containing the float rule and additional rules as a list
224
225 Example Usage:
226 max_digit = 3
227 min_digit = 1
228 max_precision = 2
229 min_precision = 1
230 generate_gbnf_float_rules(max_digit, min_digit, max_precision, min_precision)
231
232 Output:
233 ('float-3-1-2-1', ['integer-part-max3-min1 ::= [0-9] [0-9] [0-9]?', 'fractional-part-max2-min1 ::= [0-9] [0-9]?', 'float-3-1-2-1 ::= integer-part-max3-min1 "." fractional-part-max2-min
234 *1'])
235
236 Note:
237 GBNF stands for Generalized Backus-Naur Form, which is a notation technique to specify the syntax of programming languages or other formal grammars.
238 """
239 additional_rules = []
240
241 # Define the integer part rule
242 integer_part_rule = (
243 "integer-part"
244 + (f"-max{max_digit}" if max_digit is not None else "")
245 + (f"-min{min_digit}" if min_digit is not None else "")
246 )
247
248 # Define the fractional part rule based on precision constraints
249 fractional_part_rule = "fractional-part"
250 fractional_rule_part = ""
251 if max_precision is not None or min_precision is not None:
252 fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + (
253 f"-min{min_precision}" if min_precision is not None else ""
254 )
255 # Minimum number of digits
256 fractional_rule_part = "[0-9]" * (min_precision if min_precision is not None else 1)
257 # Optional additional digits
258 fractional_rule_part += "".join(
259 [" [0-9]?"] * ((max_precision - (
260 min_precision if min_precision is not None else 1)) if max_precision is not None else 0)
261 )
262 additional_rules.append(f"{fractional_part_rule} ::= {fractional_rule_part}")
263
264 # Define the float rule
265 float_rule = f"float-{max_digit if max_digit is not None else 'X'}-{min_digit if min_digit is not None else 'X'}-{max_precision if max_precision is not None else 'X'}-{min_precision if min_precision is not None else 'X'}"
266 additional_rules.append(f'{float_rule} ::= {integer_part_rule} "." {fractional_part_rule}')
267
268 # Generating the integer part rule definition, if necessary
269 if max_digit is not None or min_digit is not None:
270 integer_rule_part = "[0-9]"
271 if min_digit is not None and min_digit > 1:
272 integer_rule_part += " [0-9]" * (min_digit - 1)
273 if max_digit is not None:
274 integer_rule_part += "".join([" [0-9]?"] * (max_digit - (min_digit if min_digit is not None else 1)))
275 additional_rules.append(f"{integer_part_rule} ::= {integer_rule_part.strip()}")
276
277 return float_rule, additional_rules
278
279
280def generate_gbnf_rule_for_type(
281 model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None
282) -> tuple[str, list[str]]:
283 """
284 Generate GBNF rule for a given field type.
285
286 :param model_name: Name of the model.
287
288 :param field_name: Name of the field.
289 :param field_type: Type of the field.
290 :param is_optional: Whether the field is optional.
291 :param processed_models: List of processed models.
292 :param created_rules: List of created rules.
293 :param field_info: Additional information about the field (optional).
294
295 :return: Tuple containing the GBNF type and a list of additional rules.
296 :rtype: tuple[str, list]
297 """
298 rules = []
299
300 field_name = format_model_and_field_name(field_name)
301 gbnf_type = map_pydantic_type_to_gbnf(field_type)
302
303 origin_type = get_origin(field_type)
304 origin_type = field_type if origin_type is None else origin_type
305
306 if isclass(origin_type) and issubclass(origin_type, BaseModel):
307 nested_model_name = format_model_and_field_name(field_type.__name__)
308 nested_model_rules, _ = generate_gbnf_grammar(field_type, processed_models, created_rules)
309 rules.extend(nested_model_rules)
310 gbnf_type, rules = nested_model_name, rules
311 elif isclass(origin_type) and issubclass(origin_type, Enum):
312 enum_values = [f'"\\"{e.value}\\""' for e in field_type] # Adding escaped quotes
313 enum_rule = f"{model_name}-{field_name} ::= {' | '.join(enum_values)}"
314 rules.append(enum_rule)
315 gbnf_type, rules = model_name + "-" + field_name, rules
316 elif origin_type is list: # Array
317 element_type = get_args(field_type)[0]
318 element_rule_name, additional_rules = generate_gbnf_rule_for_type(
319 model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
320 )
321 rules.extend(additional_rules)
322 array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """
323 rules.append(array_rule)
324 gbnf_type, rules = model_name + "-" + field_name, rules
325
326 elif origin_type is set: # Array
327 element_type = get_args(field_type)[0]
328 element_rule_name, additional_rules = generate_gbnf_rule_for_type(
329 model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
330 )
331 rules.extend(additional_rules)
332 array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """
333 rules.append(array_rule)
334 gbnf_type, rules = model_name + "-" + field_name, rules
335
336 elif gbnf_type.startswith("custom-class-"):
337 rules.append(get_members_structure(field_type, gbnf_type))
338 elif gbnf_type.startswith("custom-dict-"):
339 key_type, value_type = get_args(field_type)
340
341 additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(
342 model_name, f"{field_name}-key-type", key_type, is_optional, processed_models, created_rules
343 )
344 additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(
345 model_name, f"{field_name}-value-type", value_type, is_optional, processed_models, created_rules
346 )
347 gbnf_type = rf'{gbnf_type} ::= "{{" ( {additional_key_type} ": " {additional_value_type} ("," "\n" ws {additional_key_type} ":" {additional_value_type})* )? "}}" '
348
349 rules.extend(additional_key_rules)
350 rules.extend(additional_value_rules)
351 elif gbnf_type.startswith("union-"):
352 union_types = get_args(field_type)
353 union_rules = []
354
355 for union_type in union_types:
356 if isinstance(union_type, GenericAlias):
357 union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
358 model_name, field_name, union_type, False, processed_models, created_rules
359 )
360 union_rules.append(union_gbnf_type)
361 rules.extend(union_rules_list)
362
363 elif not issubclass(union_type, type(None)):
364 union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
365 model_name, field_name, union_type, False, processed_models, created_rules
366 )
367 union_rules.append(union_gbnf_type)
368 rules.extend(union_rules_list)
369
370 # Defining the union grammar rule separately
371 if len(union_rules) == 1:
372 union_grammar_rule = f"{model_name}-{field_name}-optional ::= {' | '.join(union_rules)} | null"
373 else:
374 union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}"
375 rules.append(union_grammar_rule)
376 if len(union_rules) == 1:
377 gbnf_type = f"{model_name}-{field_name}-optional"
378 else:
379 gbnf_type = f"{model_name}-{field_name}-union"
380 elif isclass(origin_type) and issubclass(origin_type, str):
381 if field_info and hasattr(field_info, "json_schema_extra") and field_info.json_schema_extra is not None:
382 triple_quoted_string = field_info.json_schema_extra.get("triple_quoted_string", False)
383 markdown_string = field_info.json_schema_extra.get("markdown_code_block", False)
384
385 gbnf_type = PydanticDataType.TRIPLE_QUOTED_STRING.value if triple_quoted_string else PydanticDataType.STRING.value
386 gbnf_type = PydanticDataType.MARKDOWN_CODE_BLOCK.value if markdown_string else gbnf_type
387
388 elif field_info and hasattr(field_info, "pattern"):
389 # Convert regex pattern to grammar rule
390 regex_pattern = field_info.regex.pattern
391 gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}"
392 else:
393 gbnf_type = PydanticDataType.STRING.value
394
395 elif (
396 isclass(origin_type)
397 and issubclass(origin_type, float)
398 and field_info
399 and hasattr(field_info, "json_schema_extra")
400 and field_info.json_schema_extra is not None
401 ):
402 # Retrieve precision attributes for floats
403 max_precision = (
404 field_info.json_schema_extra.get("max_precision") if field_info and hasattr(field_info,
405 "json_schema_extra") else None
406 )
407 min_precision = (
408 field_info.json_schema_extra.get("min_precision") if field_info and hasattr(field_info,
409 "json_schema_extra") else None
410 )
411 max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info,
412 "json_schema_extra") else None
413 min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info,
414 "json_schema_extra") else None
415
416 # Generate GBNF rule for float with given attributes
417 gbnf_type, rules = generate_gbnf_float_rules(
418 max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision
419 )
420
421 elif (
422 isclass(origin_type)
423 and issubclass(origin_type, int)
424 and field_info
425 and hasattr(field_info, "json_schema_extra")
426 and field_info.json_schema_extra is not None
427 ):
428 # Retrieve digit attributes for integers
429 max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info,
430 "json_schema_extra") else None
431 min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info,
432 "json_schema_extra") else None
433
434 # Generate GBNF rule for integer with given attributes
435 gbnf_type, rules = generate_gbnf_integer_rules(max_digit=max_digits, min_digit=min_digits)
436 else:
437 gbnf_type, rules = gbnf_type, []
438
439 return gbnf_type, rules
440
441
442def generate_gbnf_grammar(model: type[BaseModel], processed_models: set[type[BaseModel]], created_rules: dict[str, list[str]]) -> tuple[list[str], bool]:
443 """
444
445 Generate GBnF Grammar
446
447 Generates a GBnF grammar for a given model.
448
449 :param model: A Pydantic model class to generate the grammar for. Must be a subclass of BaseModel.
450 :param processed_models: A set of already processed models to prevent infinite recursion.
451 :param created_rules: A dict containing already created rules to prevent duplicates.
452 :return: A list of GBnF grammar rules in string format. And two booleans indicating if an extra markdown or triple quoted string is in the grammar.
453 Example Usage:
454 ```
455 model = MyModel
456 processed_models = set()
457 created_rules = dict()
458
459 gbnf_grammar = generate_gbnf_grammar(model, processed_models, created_rules)
460 ```
461 """
462 if model in processed_models:
463 return [], False
464
465 processed_models.add(model)
466 model_name = format_model_and_field_name(model.__name__)
467
468 if not issubclass(model, BaseModel):
469 # For non-Pydantic classes, generate model_fields from __annotations__ or __init__
470 if hasattr(model, "__annotations__") and model.__annotations__:
471 model_fields = {name: (typ, ...) for name, typ in get_type_hints(model).items()}
472 else:
473 init_signature = inspect.signature(model.__init__)
474 parameters = init_signature.parameters
475 model_fields = {name: (param.annotation, param.default) for name, param in parameters.items() if
476 name != "self"}
477 else:
478 # For Pydantic models, use model_fields and check for ellipsis (required fields)
479 model_fields = get_type_hints(model)
480
481 model_rule_parts = []
482 nested_rules = []
483 has_markdown_code_block = False
484 has_triple_quoted_string = False
485 look_for_markdown_code_block = False
486 look_for_triple_quoted_string = False
487 for field_name, field_info in model_fields.items():
488 if not issubclass(model, BaseModel):
489 field_type, default_value = field_info
490 # Check if the field is optional (not required)
491 is_optional = (default_value is not inspect.Parameter.empty) and (default_value is not Ellipsis)
492 else:
493 field_type = field_info
494 field_info = model.model_fields[field_name]
495 is_optional = field_info.is_required is False and get_origin(field_type) is Optional
496 rule_name, additional_rules = generate_gbnf_rule_for_type(
497 model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models,
498 created_rules, field_info
499 )
500 look_for_markdown_code_block = True if rule_name == "markdown_code_block" else False
501 look_for_triple_quoted_string = True if rule_name == "triple_quoted_string" else False
502 if not look_for_markdown_code_block and not look_for_triple_quoted_string:
503 if rule_name not in created_rules:
504 created_rules[rule_name] = additional_rules
505 model_rule_parts.append(f' ws "\\"{field_name}\\"" ":" ws {rule_name}') # Adding escaped quotes
506 nested_rules.extend(additional_rules)
507 else:
508 has_triple_quoted_string = look_for_triple_quoted_string
509 has_markdown_code_block = look_for_markdown_code_block
510
511 fields_joined = r' "," "\n" '.join(model_rule_parts)
512 model_rule = rf'{model_name} ::= "{{" "\n" {fields_joined} "\n" ws "}}"'
513
514 has_special_string = False
515 if has_triple_quoted_string:
516 model_rule += '"\\n" ws "}"'
517 model_rule += '"\\n" triple-quoted-string'
518 has_special_string = True
519 if has_markdown_code_block:
520 model_rule += '"\\n" ws "}"'
521 model_rule += '"\\n" markdown-code-block'
522 has_special_string = True
523 all_rules = [model_rule] + nested_rules
524
525 return all_rules, has_special_string
526
527
528def generate_gbnf_grammar_from_pydantic_models(
529 models: list[type[BaseModel]], outer_object_name: str | None = None, outer_object_content: str | None = None,
530 list_of_outputs: bool = False
531) -> str:
532 """
533 Generate GBNF Grammar from Pydantic Models.
534
535 This method takes a list of Pydantic models and uses them to generate a GBNF grammar string. The generated grammar string can be used for parsing and validating data using the generated
536 * grammar.
537
538 Args:
539 models (list[type[BaseModel]]): A list of Pydantic models to generate the grammar from.
540 outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
541 outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
542 list_of_outputs (str, optional): Allows a list of output objects
543 Returns:
544 str: The generated GBNF grammar string.
545
546 Examples:
547 models = [UserModel, PostModel]
548 grammar = generate_gbnf_grammar_from_pydantic(models)
549 print(grammar)
550 # Output:
551 # root ::= UserModel | PostModel
552 # ...
553 """
554 processed_models: set[type[BaseModel]] = set()
555 all_rules = []
556 created_rules: dict[str, list[str]] = {}
557 if outer_object_name is None:
558 for model in models:
559 model_rules, _ = generate_gbnf_grammar(model, processed_models, created_rules)
560 all_rules.extend(model_rules)
561
562 if list_of_outputs:
563 root_rule = r'root ::= (" "| "\n") "[" ws grammar-models ("," ws grammar-models)* ws "]"' + "\n"
564 else:
565 root_rule = r'root ::= (" "| "\n") grammar-models' + "\n"
566 root_rule += "grammar-models ::= " + " | ".join(
567 [format_model_and_field_name(model.__name__) for model in models])
568 all_rules.insert(0, root_rule)
569 return "\n".join(all_rules)
570 elif outer_object_name is not None:
571 if list_of_outputs:
572 root_rule = (
573 rf'root ::= (" "| "\n") "[" ws {format_model_and_field_name(outer_object_name)} ("," ws {format_model_and_field_name(outer_object_name)})* ws "]"'
574 + "\n"
575 )
576 else:
577 root_rule = f"root ::= {format_model_and_field_name(outer_object_name)}\n"
578
579 model_rule = (
580 rf'{format_model_and_field_name(outer_object_name)} ::= (" "| "\n") "{{" ws "\"{outer_object_name}\"" ":" ws grammar-models'
581 )
582
583 fields_joined = " | ".join(
584 [rf"{format_model_and_field_name(model.__name__)}-grammar-model" for model in models])
585
586 grammar_model_rules = f"\ngrammar-models ::= {fields_joined}"
587 mod_rules = []
588 for model in models:
589 mod_rule = rf"{format_model_and_field_name(model.__name__)}-grammar-model ::= "
590 mod_rule += (
591 rf'"\"{model.__name__}\"" "," ws "\"{outer_object_content}\"" ":" ws {format_model_and_field_name(model.__name__)}' + "\n"
592 )
593 mod_rules.append(mod_rule)
594 grammar_model_rules += "\n" + "\n".join(mod_rules)
595
596 for model in models:
597 model_rules, has_special_string = generate_gbnf_grammar(model, processed_models,
598 created_rules)
599
600 if not has_special_string:
601 model_rules[0] += r'"\n" ws "}"'
602
603 all_rules.extend(model_rules)
604
605 all_rules.insert(0, root_rule + model_rule + grammar_model_rules)
606 return "\n".join(all_rules)
607
608
609def get_primitive_grammar(grammar):
610 """
611 Returns the needed GBNF primitive grammar for a given GBNF grammar string.
612
613 Args:
614 grammar (str): The string containing the GBNF grammar.
615
616 Returns:
617 str: GBNF primitive grammar string.
618 """
619 type_list: list[type[object]] = []
620 if "string-list" in grammar:
621 type_list.append(str)
622 if "boolean-list" in grammar:
623 type_list.append(bool)
624 if "integer-list" in grammar:
625 type_list.append(int)
626 if "float-list" in grammar:
627 type_list.append(float)
628 additional_grammar = [generate_list_rule(t) for t in type_list]
629 primitive_grammar = r"""
630boolean ::= "true" | "false"
631null ::= "null"
632string ::= "\"" (
633 [^"\\] |
634 "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
635 )* "\"" ws
636ws ::= ([ \t\n] ws)?
637float ::= ("-"? ([0] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
638
639integer ::= [0-9]+"""
640
641 any_block = ""
642 if "custom-class-any" in grammar:
643 any_block = """
644value ::= object | array | string | number | boolean | null
645
646object ::=
647 "{" ws (
648 string ":" ws value
649 ("," ws string ":" ws value)*
650 )? "}" ws
651
652array ::=
653 "[" ws (
654 value
655 ("," ws value)*
656 )? "]" ws
657
658number ::= integer | float"""
659
660 markdown_code_block_grammar = ""
661 if "markdown-code-block" in grammar:
662 markdown_code_block_grammar = r'''
663markdown-code-block ::= opening-triple-ticks markdown-code-block-content closing-triple-ticks
664markdown-code-block-content ::= ( [^`] | "`" [^`] | "`" "`" [^`] )*
665opening-triple-ticks ::= "```" "python" "\n" | "```" "c" "\n" | "```" "cpp" "\n" | "```" "txt" "\n" | "```" "text" "\n" | "```" "json" "\n" | "```" "javascript" "\n" | "```" "css" "\n" | "```" "html" "\n" | "```" "markdown" "\n"
666closing-triple-ticks ::= "```" "\n"'''
667
668 if "triple-quoted-string" in grammar:
669 markdown_code_block_grammar = r"""
670triple-quoted-string ::= triple-quotes triple-quoted-string-content triple-quotes
671triple-quoted-string-content ::= ( [^'] | "'" [^'] | "'" "'" [^'] )*
672triple-quotes ::= "'''" """
673 return "\n" + "\n".join(additional_grammar) + any_block + primitive_grammar + markdown_code_block_grammar
674
675
676def generate_markdown_documentation(
677 pydantic_models: list[type[BaseModel]], model_prefix="Model", fields_prefix="Fields",
678 documentation_with_field_description=True
679) -> str:
680 """
681 Generate markdown documentation for a list of Pydantic models.
682
683 Args:
684 pydantic_models (list[type[BaseModel]]): list of Pydantic model classes.
685 model_prefix (str): Prefix for the model section.
686 fields_prefix (str): Prefix for the fields section.
687 documentation_with_field_description (bool): Include field descriptions in the documentation.
688
689 Returns:
690 str: Generated text documentation.
691 """
692 documentation = ""
693 pyd_models: list[tuple[type[BaseModel], bool]] = [(model, True) for model in pydantic_models]
694 for model, add_prefix in pyd_models:
695 if add_prefix:
696 documentation += f"{model_prefix}: {model.__name__}\n"
697 else:
698 documentation += f"Model: {model.__name__}\n"
699
700 # Handling multi-line model description with proper indentation
701
702 class_doc = getdoc(model)
703 base_class_doc = getdoc(BaseModel)
704 class_description = class_doc if class_doc and class_doc != base_class_doc else ""
705 if class_description != "":
706 documentation += " Description: "
707 documentation += format_multiline_description(class_description, 0) + "\n"
708
709 if add_prefix:
710 # Indenting the fields section
711 documentation += f" {fields_prefix}:\n"
712 else:
713 documentation += f" Fields:\n" # noqa: F541
714 if isclass(model) and issubclass(model, BaseModel):
715 for name, field_type in get_type_hints(model).items():
716 # if name == "markdown_code_block":
717 # continue
718 if get_origin(field_type) == list:
719 element_type = get_args(field_type)[0]
720 if isclass(element_type) and issubclass(element_type, BaseModel):
721 pyd_models.append((element_type, False))
722 if get_origin(field_type) == Union:
723 element_types = get_args(field_type)
724 for element_type in element_types:
725 if isclass(element_type) and issubclass(element_type, BaseModel):
726 pyd_models.append((element_type, False))
727 documentation += generate_field_markdown(
728 name, field_type, model, documentation_with_field_description=documentation_with_field_description
729 )
730 documentation += "\n"
731
732 if hasattr(model, "Config") and hasattr(model.Config,
733 "json_schema_extra") and "example" in model.Config.json_schema_extra:
734 documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
735 json_example = json.dumps(model.Config.json_schema_extra["example"])
736 documentation += format_multiline_description(json_example, 2) + "\n"
737
738 return documentation
739
740
741def generate_field_markdown(
742 field_name: str, field_type: type[Any], model: type[BaseModel], depth=1,
743 documentation_with_field_description=True
744) -> str:
745 """
746 Generate markdown documentation for a Pydantic model field.
747
748 Args:
749 field_name (str): Name of the field.
750 field_type (type[Any]): Type of the field.
751 model (type[BaseModel]): Pydantic model class.
752 depth (int): Indentation depth in the documentation.
753 documentation_with_field_description (bool): Include field descriptions in the documentation.
754
755 Returns:
756 str: Generated text documentation for the field.
757 """
758 indent = " " * depth
759
760 field_info = model.model_fields.get(field_name)
761 field_description = field_info.description if field_info and field_info.description else ""
762
763 origin_type = get_origin(field_type)
764 origin_type = field_type if origin_type is None else origin_type
765
766 if origin_type == list:
767 element_type = get_args(field_type)[0]
768 field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})"
769 if field_description != "":
770 field_text += ":\n"
771 else:
772 field_text += "\n"
773 elif origin_type == Union:
774 element_types = get_args(field_type)
775 types = []
776 for element_type in element_types:
777 types.append(format_model_and_field_name(element_type.__name__))
778 field_text = f"{indent}{field_name} ({' or '.join(types)})"
779 if field_description != "":
780 field_text += ":\n"
781 else:
782 field_text += "\n"
783 else:
784 field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})"
785 if field_description != "":
786 field_text += ":\n"
787 else:
788 field_text += "\n"
789
790 if not documentation_with_field_description:
791 return field_text
792
793 if field_description != "":
794 field_text += f" Description: {field_description}\n"
795
796 # Check for and include field-specific examples if available
797 if hasattr(model, "Config") and hasattr(model.Config,
798 "json_schema_extra") and "example" in model.Config.json_schema_extra:
799 field_example = model.Config.json_schema_extra["example"].get(field_name)
800 if field_example is not None:
801 example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
802 field_text += f"{indent} Example: {example_text}\n"
803
804 if isclass(origin_type) and issubclass(origin_type, BaseModel):
805 field_text += f"{indent} Details:\n"
806 for name, type_ in get_type_hints(field_type).items():
807 field_text += generate_field_markdown(name, type_, field_type, depth + 2)
808
809 return field_text
810
811
812def format_json_example(example: dict[str, Any], depth: int) -> str:
813 """
814 Format a JSON example into a readable string with indentation.
815
816 Args:
817 example (dict): JSON example to be formatted.
818 depth (int): Indentation depth.
819
820 Returns:
821 str: Formatted JSON example string.
822 """
823 indent = " " * depth
824 formatted_example = "{\n"
825 for key, value in example.items():
826 value_text = f"'{value}'" if isinstance(value, str) else value
827 formatted_example += f"{indent}{key}: {value_text},\n"
828 formatted_example = formatted_example.rstrip(",\n") + "\n" + indent + "}"
829 return formatted_example
830
831
832def generate_text_documentation(
833 pydantic_models: list[type[BaseModel]], model_prefix="Model", fields_prefix="Fields",
834 documentation_with_field_description=True
835) -> str:
836 """
837 Generate text documentation for a list of Pydantic models.
838
839 Args:
840 pydantic_models (list[type[BaseModel]]): List of Pydantic model classes.
841 model_prefix (str): Prefix for the model section.
842 fields_prefix (str): Prefix for the fields section.
843 documentation_with_field_description (bool): Include field descriptions in the documentation.
844
845 Returns:
846 str: Generated text documentation.
847 """
848 documentation = ""
849 pyd_models: list[tuple[type[BaseModel], bool]] = [(model, True) for model in pydantic_models]
850 for model, add_prefix in pyd_models:
851 if add_prefix:
852 documentation += f"{model_prefix}: {model.__name__}\n"
853 else:
854 documentation += f"Model: {model.__name__}\n"
855
856 # Handling multi-line model description with proper indentation
857
858 class_doc = getdoc(model)
859 base_class_doc = getdoc(BaseModel)
860 class_description = class_doc if class_doc and class_doc != base_class_doc else ""
861 if class_description != "":
862 documentation += " Description: "
863 documentation += "\n" + format_multiline_description(class_description, 2) + "\n"
864
865 if isclass(model) and issubclass(model, BaseModel):
866 documentation_fields = ""
867 for name, field_type in get_type_hints(model).items():
868 # if name == "markdown_code_block":
869 # continue
870 if get_origin(field_type) == list:
871 element_type = get_args(field_type)[0]
872 if isclass(element_type) and issubclass(element_type, BaseModel):
873 pyd_models.append((element_type, False))
874 if get_origin(field_type) == Union:
875 element_types = get_args(field_type)
876 for element_type in element_types:
877 if isclass(element_type) and issubclass(element_type, BaseModel):
878 pyd_models.append((element_type, False))
879 documentation_fields += generate_field_text(
880 name, field_type, model, documentation_with_field_description=documentation_with_field_description
881 )
882 if documentation_fields != "":
883 if add_prefix:
884 documentation += f" {fields_prefix}:\n{documentation_fields}"
885 else:
886 documentation += f" Fields:\n{documentation_fields}"
887 documentation += "\n"
888
889 if hasattr(model, "Config") and hasattr(model.Config,
890 "json_schema_extra") and "example" in model.Config.json_schema_extra:
891 documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
892 json_example = json.dumps(model.Config.json_schema_extra["example"])
893 documentation += format_multiline_description(json_example, 2) + "\n"
894
895 return documentation
896
897
898def generate_field_text(
899 field_name: str, field_type: type[Any], model: type[BaseModel], depth=1,
900 documentation_with_field_description=True
901) -> str:
902 """
903 Generate text documentation for a Pydantic model field.
904
905 Args:
906 field_name (str): Name of the field.
907 field_type (type[Any]): Type of the field.
908 model (type[BaseModel]): Pydantic model class.
909 depth (int): Indentation depth in the documentation.
910 documentation_with_field_description (bool): Include field descriptions in the documentation.
911
912 Returns:
913 str: Generated text documentation for the field.
914 """
915 indent = " " * depth
916
917 field_info = model.model_fields.get(field_name)
918 field_description = field_info.description if field_info and field_info.description else ""
919
920 if get_origin(field_type) == list:
921 element_type = get_args(field_type)[0]
922 field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})"
923 if field_description != "":
924 field_text += ":\n"
925 else:
926 field_text += "\n"
927 elif get_origin(field_type) == Union:
928 element_types = get_args(field_type)
929 types = []
930 for element_type in element_types:
931 types.append(format_model_and_field_name(element_type.__name__))
932 field_text = f"{indent}{field_name} ({' or '.join(types)})"
933 if field_description != "":
934 field_text += ":\n"
935 else:
936 field_text += "\n"
937 else:
938 field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})"
939 if field_description != "":
940 field_text += ":\n"
941 else:
942 field_text += "\n"
943
944 if not documentation_with_field_description:
945 return field_text
946
947 if field_description != "":
948 field_text += f"{indent} Description: " + field_description + "\n"
949
950 # Check for and include field-specific examples if available
951 if hasattr(model, "Config") and hasattr(model.Config,
952 "json_schema_extra") and "example" in model.Config.json_schema_extra:
953 field_example = model.Config.json_schema_extra["example"].get(field_name)
954 if field_example is not None:
955 example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
956 field_text += f"{indent} Example: {example_text}\n"
957
958 if isclass(field_type) and issubclass(field_type, BaseModel):
959 field_text += f"{indent} Details:\n"
960 for name, type_ in get_type_hints(field_type).items():
961 field_text += generate_field_text(name, type_, field_type, depth + 2)
962
963 return field_text
964
965
966def format_multiline_description(description: str, indent_level: int) -> str:
967 """
968 Format a multiline description with proper indentation.
969
970 Args:
971 description (str): Multiline description.
972 indent_level (int): Indentation level.
973
974 Returns:
975 str: Formatted multiline description.
976 """
977 indent = " " * indent_level
978 return indent + description.replace("\n", "\n" + indent)
979
980
981def save_gbnf_grammar_and_documentation(
982 grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"
983):
984 """
985 Save GBNF grammar and documentation to specified files.
986
987 Args:
988 grammar (str): GBNF grammar string.
989 documentation (str): Documentation string.
990 grammar_file_path (str): File path to save the GBNF grammar.
991 documentation_file_path (str): File path to save the documentation.
992
993 Returns:
994 None
995 """
996 try:
997 with open(grammar_file_path, "w") as file:
998 file.write(grammar + get_primitive_grammar(grammar))
999 print(f"Grammar successfully saved to {grammar_file_path}")
1000 except IOError as e:
1001 print(f"An error occurred while saving the grammar file: {e}")
1002
1003 try:
1004 with open(documentation_file_path, "w") as file:
1005 file.write(documentation)
1006 print(f"Documentation successfully saved to {documentation_file_path}")
1007 except IOError as e:
1008 print(f"An error occurred while saving the documentation file: {e}")
1009
1010
1011def remove_empty_lines(string):
1012 """
1013 Remove empty lines from a string.
1014
1015 Args:
1016 string (str): Input string.
1017
1018 Returns:
1019 str: String with empty lines removed.
1020 """
1021 lines = string.splitlines()
1022 non_empty_lines = [line for line in lines if line.strip() != ""]
1023 string_no_empty_lines = "\n".join(non_empty_lines)
1024 return string_no_empty_lines
1025
1026
1027def generate_and_save_gbnf_grammar_and_documentation(
1028 pydantic_model_list,
1029 grammar_file_path="./generated_grammar.gbnf",
1030 documentation_file_path="./generated_grammar_documentation.md",
1031 outer_object_name: str | None = None,
1032 outer_object_content: str | None = None,
1033 model_prefix: str = "Output Model",
1034 fields_prefix: str = "Output Fields",
1035 list_of_outputs: bool = False,
1036 documentation_with_field_description=True,
1037):
1038 """
1039 Generate GBNF grammar and documentation, and save them to specified files.
1040
1041 Args:
1042 pydantic_model_list: List of Pydantic model classes.
1043 grammar_file_path (str): File path to save the generated GBNF grammar.
1044 documentation_file_path (str): File path to save the generated documentation.
1045 outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1046 outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1047 model_prefix (str): Prefix for the model section in the documentation.
1048 fields_prefix (str): Prefix for the fields section in the documentation.
1049 list_of_outputs (bool): Whether the output is a list of items.
1050 documentation_with_field_description (bool): Include field descriptions in the documentation.
1051
1052 Returns:
1053 None
1054 """
1055 documentation = generate_markdown_documentation(
1056 pydantic_model_list, model_prefix, fields_prefix,
1057 documentation_with_field_description=documentation_with_field_description
1058 )
1059 grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1060 list_of_outputs)
1061 grammar = remove_empty_lines(grammar)
1062 save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path)
1063
1064
1065def generate_gbnf_grammar_and_documentation(
1066 pydantic_model_list,
1067 outer_object_name: str | None = None,
1068 outer_object_content: str | None = None,
1069 model_prefix: str = "Output Model",
1070 fields_prefix: str = "Output Fields",
1071 list_of_outputs: bool = False,
1072 documentation_with_field_description=True,
1073):
1074 """
1075 Generate GBNF grammar and documentation for a list of Pydantic models.
1076
1077 Args:
1078 pydantic_model_list: List of Pydantic model classes.
1079 outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1080 outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1081 model_prefix (str): Prefix for the model section in the documentation.
1082 fields_prefix (str): Prefix for the fields section in the documentation.
1083 list_of_outputs (bool): Whether the output is a list of items.
1084 documentation_with_field_description (bool): Include field descriptions in the documentation.
1085
1086 Returns:
1087 tuple: GBNF grammar string, documentation string.
1088 """
1089 documentation = generate_markdown_documentation(
1090 copy(pydantic_model_list), model_prefix, fields_prefix,
1091 documentation_with_field_description=documentation_with_field_description
1092 )
1093 grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1094 list_of_outputs)
1095 grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1096 return grammar, documentation
1097
1098
1099def generate_gbnf_grammar_and_documentation_from_dictionaries(
1100 dictionaries: list[dict[str, Any]],
1101 outer_object_name: str | None = None,
1102 outer_object_content: str | None = None,
1103 model_prefix: str = "Output Model",
1104 fields_prefix: str = "Output Fields",
1105 list_of_outputs: bool = False,
1106 documentation_with_field_description=True,
1107):
1108 """
1109 Generate GBNF grammar and documentation from a list of dictionaries.
1110
1111 Args:
1112 dictionaries (list[dict]): List of dictionaries representing Pydantic models.
1113 outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1114 outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1115 model_prefix (str): Prefix for the model section in the documentation.
1116 fields_prefix (str): Prefix for the fields section in the documentation.
1117 list_of_outputs (bool): Whether the output is a list of items.
1118 documentation_with_field_description (bool): Include field descriptions in the documentation.
1119
1120 Returns:
1121 tuple: GBNF grammar string, documentation string.
1122 """
1123 pydantic_model_list = create_dynamic_models_from_dictionaries(dictionaries)
1124 documentation = generate_markdown_documentation(
1125 copy(pydantic_model_list), model_prefix, fields_prefix,
1126 documentation_with_field_description=documentation_with_field_description
1127 )
1128 grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1129 list_of_outputs)
1130 grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1131 return grammar, documentation
1132
1133
1134def create_dynamic_model_from_function(func: Callable[..., Any]):
1135 """
1136 Creates a dynamic Pydantic model from a given function's type hints and adds the function as a 'run' method.
1137
1138 Args:
1139 func (Callable): A function with type hints from which to create the model.
1140
1141 Returns:
1142 A dynamic Pydantic model class with the provided function as a 'run' method.
1143 """
1144
1145 # Get the signature of the function
1146 sig = inspect.signature(func)
1147
1148 # Parse the docstring
1149 assert func.__doc__ is not None
1150 docstring = parse(func.__doc__)
1151
1152 dynamic_fields = {}
1153 param_docs = []
1154 for param in sig.parameters.values():
1155 # Exclude 'self' parameter
1156 if param.name == "self":
1157 continue
1158
1159 # Assert that the parameter has a type annotation
1160 if param.annotation == inspect.Parameter.empty:
1161 raise TypeError(f"Parameter '{param.name}' in function '{func.__name__}' lacks a type annotation")
1162
1163 # Find the parameter's description in the docstring
1164 param_doc = next((d for d in docstring.params if d.arg_name == param.name), None)
1165
1166 # Assert that the parameter has a description
1167 if not param_doc or not param_doc.description:
1168 raise ValueError(
1169 f"Parameter '{param.name}' in function '{func.__name__}' lacks a description in the docstring")
1170
1171 # Add parameter details to the schema
1172 param_docs.append((param.name, param_doc))
1173 if param.default == inspect.Parameter.empty:
1174 default_value = ...
1175 else:
1176 default_value = param.default
1177 dynamic_fields[param.name] = (
1178 param.annotation if param.annotation != inspect.Parameter.empty else str, default_value)
1179 # Creating the dynamic model
1180 dynamic_model = create_model(f"{func.__name__}", **dynamic_fields)
1181
1182 for name, param_doc in param_docs:
1183 dynamic_model.model_fields[name].description = param_doc.description
1184
1185 dynamic_model.__doc__ = docstring.short_description
1186
1187 def run_method_wrapper(self):
1188 func_args = {name: getattr(self, name) for name, _ in dynamic_fields.items()}
1189 return func(**func_args)
1190
1191 # Adding the wrapped function as a 'run' method
1192 setattr(dynamic_model, "run", run_method_wrapper)
1193 return dynamic_model
1194
1195
1196def add_run_method_to_dynamic_model(model: type[BaseModel], func: Callable[..., Any]):
1197 """
1198 Add a 'run' method to a dynamic Pydantic model, using the provided function.
1199
1200 Args:
1201 model (type[BaseModel]): Dynamic Pydantic model class.
1202 func (Callable): Function to be added as a 'run' method to the model.
1203
1204 Returns:
1205 type[BaseModel]: Pydantic model class with the added 'run' method.
1206 """
1207
1208 def run_method_wrapper(self):
1209 func_args = {name: getattr(self, name) for name in model.model_fields}
1210 return func(**func_args)
1211
1212 # Adding the wrapped function as a 'run' method
1213 setattr(model, "run", run_method_wrapper)
1214
1215 return model
1216
1217
1218def create_dynamic_models_from_dictionaries(dictionaries: list[dict[str, Any]]):
1219 """
1220 Create a list of dynamic Pydantic model classes from a list of dictionaries.
1221
1222 Args:
1223 dictionaries (list[dict]): List of dictionaries representing model structures.
1224
1225 Returns:
1226 list[type[BaseModel]]: List of generated dynamic Pydantic model classes.
1227 """
1228 dynamic_models = []
1229 for func in dictionaries:
1230 model_name = format_model_and_field_name(func.get("name", ""))
1231 dyn_model = convert_dictionary_to_pydantic_model(func, model_name)
1232 dynamic_models.append(dyn_model)
1233 return dynamic_models
1234
1235
1236def map_grammar_names_to_pydantic_model_class(pydantic_model_list):
1237 output = {}
1238 for model in pydantic_model_list:
1239 output[format_model_and_field_name(model.__name__)] = model
1240
1241 return output
1242
1243
1244def json_schema_to_python_types(schema):
1245 type_map = {
1246 "any": Any,
1247 "string": str,
1248 "number": float,
1249 "integer": int,
1250 "boolean": bool,
1251 "array": list,
1252 }
1253 return type_map[schema]
1254
1255
1256def list_to_enum(enum_name, values):
1257 return Enum(enum_name, {value: value for value in values})
1258
1259
1260def convert_dictionary_to_pydantic_model(dictionary: dict[str, Any], model_name: str = "CustomModel") -> type[Any]:
1261 """
1262 Convert a dictionary to a Pydantic model class.
1263
1264 Args:
1265 dictionary (dict): Dictionary representing the model structure.
1266 model_name (str): Name of the generated Pydantic model.
1267
1268 Returns:
1269 type[BaseModel]: Generated Pydantic model class.
1270 """
1271 fields: dict[str, Any] = {}
1272
1273 if "properties" in dictionary:
1274 for field_name, field_data in dictionary.get("properties", {}).items():
1275 if field_data == "object":
1276 submodel = convert_dictionary_to_pydantic_model(dictionary, f"{model_name}_{field_name}")
1277 fields[field_name] = (submodel, ...)
1278 else:
1279 field_type = field_data.get("type", "str")
1280
1281 if field_data.get("enum", []):
1282 fields[field_name] = (list_to_enum(field_name, field_data.get("enum", [])), ...)
1283 elif field_type == "array":
1284 items = field_data.get("items", {})
1285 if items != {}:
1286 array = {"properties": items}
1287 array_type = convert_dictionary_to_pydantic_model(array, f"{model_name}_{field_name}_items")
1288 fields[field_name] = (List[array_type], ...)
1289 else:
1290 fields[field_name] = (list, ...)
1291 elif field_type == "object":
1292 submodel = convert_dictionary_to_pydantic_model(field_data, f"{model_name}_{field_name}")
1293 fields[field_name] = (submodel, ...)
1294 elif field_type == "required":
1295 required = field_data.get("enum", [])
1296 for key, field in fields.items():
1297 if key not in required:
1298 optional_type = fields[key][0]
1299 fields[key] = (Optional[optional_type], ...)
1300 else:
1301 field_type = json_schema_to_python_types(field_type)
1302 fields[field_name] = (field_type, ...)
1303 if "function" in dictionary:
1304 for field_name, field_data in dictionary.get("function", {}).items():
1305 if field_name == "name":
1306 model_name = field_data
1307 elif field_name == "description":
1308 fields["__doc__"] = field_data
1309 elif field_name == "parameters":
1310 return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1311
1312 if "parameters" in dictionary:
1313 field_data = {"function": dictionary}
1314 return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1315 if "required" in dictionary:
1316 required = dictionary.get("required", [])
1317 for key, field in fields.items():
1318 if key not in required:
1319 optional_type = fields[key][0]
1320 fields[key] = (Optional[optional_type], ...)
1321 custom_model = create_model(model_name, **fields)
1322 return custom_model