llmnpc - llama.cpp/examples/pydantic_models_to

Path: llmnpc / llama.cpp / examples / pydantic_models_to_grammar.py (raw)
   1from __future__ import annotations
   2
   3import inspect
   4import json
   5import re
   6from copy import copy
   7from enum import Enum
   8from inspect import getdoc, isclass
   9from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union, get_args, get_origin, get_type_hints
  10
  11from docstring_parser import parse
  12from pydantic import BaseModel, create_model
  13
  14if TYPE_CHECKING:
  15    from types import GenericAlias
  16else:
  17    # python 3.8 compat
  18    from typing import _GenericAlias as GenericAlias
  19
  20# TODO: fix this
  21# pyright: reportAttributeAccessIssue=information
  22
  23
  24class PydanticDataType(Enum):
  25    """
  26    Defines the data types supported by the grammar_generator.
  27
  28    Attributes:
  29        STRING (str): Represents a string data type.
  30        BOOLEAN (str): Represents a boolean data type.
  31        INTEGER (str): Represents an integer data type.
  32        FLOAT (str): Represents a float data type.
  33        OBJECT (str): Represents an object data type.
  34        ARRAY (str): Represents an array data type.
  35        ENUM (str): Represents an enum data type.
  36        CUSTOM_CLASS (str): Represents a custom class data type.
  37    """
  38
  39    STRING = "string"
  40    TRIPLE_QUOTED_STRING = "triple_quoted_string"
  41    MARKDOWN_CODE_BLOCK = "markdown_code_block"
  42    BOOLEAN = "boolean"
  43    INTEGER = "integer"
  44    FLOAT = "float"
  45    OBJECT = "object"
  46    ARRAY = "array"
  47    ENUM = "enum"
  48    ANY = "any"
  49    NULL = "null"
  50    CUSTOM_CLASS = "custom-class"
  51    CUSTOM_DICT = "custom-dict"
  52    SET = "set"
  53
  54
  55def map_pydantic_type_to_gbnf(pydantic_type: type[Any]) -> str:
  56    origin_type = get_origin(pydantic_type)
  57    origin_type = pydantic_type if origin_type is None else origin_type
  58
  59    if isclass(origin_type) and issubclass(origin_type, str):
  60        return PydanticDataType.STRING.value
  61    elif isclass(origin_type) and issubclass(origin_type, bool):
  62        return PydanticDataType.BOOLEAN.value
  63    elif isclass(origin_type) and issubclass(origin_type, int):
  64        return PydanticDataType.INTEGER.value
  65    elif isclass(origin_type) and issubclass(origin_type, float):
  66        return PydanticDataType.FLOAT.value
  67    elif isclass(origin_type) and issubclass(origin_type, Enum):
  68        return PydanticDataType.ENUM.value
  69
  70    elif isclass(origin_type) and issubclass(origin_type, BaseModel):
  71        return format_model_and_field_name(origin_type.__name__)
  72    elif origin_type is list:
  73        element_type = get_args(pydantic_type)[0]
  74        return f"{map_pydantic_type_to_gbnf(element_type)}-list"
  75    elif origin_type is set:
  76        element_type = get_args(pydantic_type)[0]
  77        return f"{map_pydantic_type_to_gbnf(element_type)}-set"
  78    elif origin_type is Union:
  79        union_types = get_args(pydantic_type)
  80        union_rules = [map_pydantic_type_to_gbnf(ut) for ut in union_types]
  81        return f"union-{'-or-'.join(union_rules)}"
  82    elif origin_type is Optional:
  83        element_type = get_args(pydantic_type)[0]
  84        return f"optional-{map_pydantic_type_to_gbnf(element_type)}"
  85    elif isclass(origin_type):
  86        return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(origin_type.__name__)}"
  87    elif origin_type is dict:
  88        key_type, value_type = get_args(pydantic_type)
  89        return f"custom-dict-key-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(key_type))}-value-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(value_type))}"
  90    else:
  91        return "unknown"
  92
  93
  94def format_model_and_field_name(model_name: str) -> str:
  95    parts = re.findall("[A-Z][^A-Z]*", model_name)
  96    if not parts:  # Check if the list is empty
  97        return model_name.lower().replace("_", "-")
  98    return "-".join(part.lower().replace("_", "-") for part in parts)
  99
 100
 101def generate_list_rule(element_type):
 102    """
 103    Generate a GBNF rule for a list of a given element type.
 104
 105    :param element_type: The type of the elements in the list (e.g., 'string').
 106    :return: A string representing the GBNF rule for a list of the given type.
 107    """
 108    rule_name = f"{map_pydantic_type_to_gbnf(element_type)}-list"
 109    element_rule = map_pydantic_type_to_gbnf(element_type)
 110    list_rule = rf'{rule_name} ::= "["  {element_rule} (","  {element_rule})* "]"'
 111    return list_rule
 112
 113
 114def get_members_structure(cls, rule_name):
 115    if issubclass(cls, Enum):
 116        # Handle Enum types
 117        members = [f'"\\"{member.value}\\""' for name, member in cls.__members__.items()]
 118        return f"{cls.__name__.lower()} ::= " + " | ".join(members)
 119    if cls.__annotations__ and cls.__annotations__ != {}:
 120        result = f'{rule_name} ::= "{{"'
 121        # Modify this comprehension
 122        members = [
 123            f'  "\\"{name}\\"" ":"  {map_pydantic_type_to_gbnf(param_type)}'
 124            for name, param_type in get_type_hints(cls).items()
 125            if name != "self"
 126        ]
 127
 128        result += '"," '.join(members)
 129        result += '  "}"'
 130        return result
 131    if rule_name == "custom-class-any":
 132        result = f"{rule_name} ::= "
 133        result += "value"
 134        return result
 135
 136    init_signature = inspect.signature(cls.__init__)
 137    parameters = init_signature.parameters
 138    result = f'{rule_name} ::=  "{{"'
 139    # Modify this comprehension too
 140    members = [
 141        f'  "\\"{name}\\"" ":"  {map_pydantic_type_to_gbnf(param.annotation)}'
 142        for name, param in parameters.items()
 143        if name != "self" and param.annotation != inspect.Parameter.empty
 144    ]
 145
 146    result += '", "'.join(members)
 147    result += '  "}"'
 148    return result
 149
 150
 151def regex_to_gbnf(regex_pattern: str) -> str:
 152    """
 153    Translate a basic regex pattern to a GBNF rule.
 154    Note: This function handles only a subset of simple regex patterns.
 155    """
 156    gbnf_rule = regex_pattern
 157
 158    # Translate common regex components to GBNF
 159    gbnf_rule = gbnf_rule.replace("\\d", "[0-9]")
 160    gbnf_rule = gbnf_rule.replace("\\s", "[ \t\n]")
 161
 162    # Handle quantifiers and other regex syntax that is similar in GBNF
 163    # (e.g., '*', '+', '?', character classes)
 164
 165    return gbnf_rule
 166
 167
 168def generate_gbnf_integer_rules(max_digit=None, min_digit=None):
 169    """
 170
 171    Generate GBNF Integer Rules
 172
 173    Generates GBNF (Generalized Backus-Naur Form) rules for integers based on the given maximum and minimum digits.
 174
 175    Parameters:
 176        max_digit (int): The maximum number of digits for the integer. Default is None.
 177        min_digit (int): The minimum number of digits for the integer. Default is None.
 178
 179    Returns:
 180        integer_rule (str): The identifier for the integer rule generated.
 181        additional_rules (list): A list of additional rules generated based on the given maximum and minimum digits.
 182
 183    """
 184    additional_rules = []
 185
 186    # Define the rule identifier based on max_digit and min_digit
 187    integer_rule = "integer-part"
 188    if max_digit is not None:
 189        integer_rule += f"-max{max_digit}"
 190    if min_digit is not None:
 191        integer_rule += f"-min{min_digit}"
 192
 193    # Handling Integer Rules
 194    if max_digit is not None or min_digit is not None:
 195        # Start with an empty rule part
 196        integer_rule_part = ""
 197
 198        # Add mandatory digits as per min_digit
 199        if min_digit is not None:
 200            integer_rule_part += "[0-9] " * min_digit
 201
 202        # Add optional digits up to max_digit
 203        if max_digit is not None:
 204            optional_digits = max_digit - (min_digit if min_digit is not None else 0)
 205            integer_rule_part += "".join(["[0-9]? " for _ in range(optional_digits)])
 206
 207        # Trim the rule part and append it to additional rules
 208        integer_rule_part = integer_rule_part.strip()
 209        if integer_rule_part:
 210            additional_rules.append(f"{integer_rule} ::= {integer_rule_part}")
 211
 212    return integer_rule, additional_rules
 213
 214
 215def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None):
 216    """
 217    Generate GBNF float rules based on the given constraints.
 218
 219    :param max_digit: Maximum number of digits in the integer part (default: None)
 220    :param min_digit: Minimum number of digits in the integer part (default: None)
 221    :param max_precision: Maximum number of digits in the fractional part (default: None)
 222    :param min_precision: Minimum number of digits in the fractional part (default: None)
 223    :return: A tuple containing the float rule and additional rules as a list
 224
 225    Example Usage:
 226    max_digit = 3
 227    min_digit = 1
 228    max_precision = 2
 229    min_precision = 1
 230    generate_gbnf_float_rules(max_digit, min_digit, max_precision, min_precision)
 231
 232    Output:
 233    ('float-3-1-2-1', ['integer-part-max3-min1 ::= [0-9] [0-9] [0-9]?', 'fractional-part-max2-min1 ::= [0-9] [0-9]?', 'float-3-1-2-1 ::= integer-part-max3-min1 "." fractional-part-max2-min
 234    *1'])
 235
 236    Note:
 237    GBNF stands for Generalized Backus-Naur Form, which is a notation technique to specify the syntax of programming languages or other formal grammars.
 238    """
 239    additional_rules = []
 240
 241    # Define the integer part rule
 242    integer_part_rule = (
 243        "integer-part"
 244        + (f"-max{max_digit}" if max_digit is not None else "")
 245        + (f"-min{min_digit}" if min_digit is not None else "")
 246    )
 247
 248    # Define the fractional part rule based on precision constraints
 249    fractional_part_rule = "fractional-part"
 250    fractional_rule_part = ""
 251    if max_precision is not None or min_precision is not None:
 252        fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + (
 253            f"-min{min_precision}" if min_precision is not None else ""
 254        )
 255        # Minimum number of digits
 256        fractional_rule_part = "[0-9]" * (min_precision if min_precision is not None else 1)
 257        # Optional additional digits
 258        fractional_rule_part += "".join(
 259            [" [0-9]?"] * ((max_precision - (
 260                min_precision if min_precision is not None else 1)) if max_precision is not None else 0)
 261        )
 262        additional_rules.append(f"{fractional_part_rule} ::= {fractional_rule_part}")
 263
 264    # Define the float rule
 265    float_rule = f"float-{max_digit if max_digit is not None else 'X'}-{min_digit if min_digit is not None else 'X'}-{max_precision if max_precision is not None else 'X'}-{min_precision if min_precision is not None else 'X'}"
 266    additional_rules.append(f'{float_rule} ::= {integer_part_rule} "." {fractional_part_rule}')
 267
 268    # Generating the integer part rule definition, if necessary
 269    if max_digit is not None or min_digit is not None:
 270        integer_rule_part = "[0-9]"
 271        if min_digit is not None and min_digit > 1:
 272            integer_rule_part += " [0-9]" * (min_digit - 1)
 273        if max_digit is not None:
 274            integer_rule_part += "".join([" [0-9]?"] * (max_digit - (min_digit if min_digit is not None else 1)))
 275        additional_rules.append(f"{integer_part_rule} ::= {integer_rule_part.strip()}")
 276
 277    return float_rule, additional_rules
 278
 279
 280def generate_gbnf_rule_for_type(
 281    model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None
 282) -> tuple[str, list[str]]:
 283    """
 284    Generate GBNF rule for a given field type.
 285
 286    :param model_name: Name of the model.
 287
 288    :param field_name: Name of the field.
 289    :param field_type: Type of the field.
 290    :param is_optional: Whether the field is optional.
 291    :param processed_models: List of processed models.
 292    :param created_rules: List of created rules.
 293    :param field_info: Additional information about the field (optional).
 294
 295    :return: Tuple containing the GBNF type and a list of additional rules.
 296    :rtype: tuple[str, list]
 297    """
 298    rules = []
 299
 300    field_name = format_model_and_field_name(field_name)
 301    gbnf_type = map_pydantic_type_to_gbnf(field_type)
 302
 303    origin_type = get_origin(field_type)
 304    origin_type = field_type if origin_type is None else origin_type
 305
 306    if isclass(origin_type) and issubclass(origin_type, BaseModel):
 307        nested_model_name = format_model_and_field_name(field_type.__name__)
 308        nested_model_rules, _ = generate_gbnf_grammar(field_type, processed_models, created_rules)
 309        rules.extend(nested_model_rules)
 310        gbnf_type, rules = nested_model_name, rules
 311    elif isclass(origin_type) and issubclass(origin_type, Enum):
 312        enum_values = [f'"\\"{e.value}\\""' for e in field_type]  # Adding escaped quotes
 313        enum_rule = f"{model_name}-{field_name} ::= {' | '.join(enum_values)}"
 314        rules.append(enum_rule)
 315        gbnf_type, rules = model_name + "-" + field_name, rules
 316    elif origin_type is list:  # Array
 317        element_type = get_args(field_type)[0]
 318        element_rule_name, additional_rules = generate_gbnf_rule_for_type(
 319            model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
 320        )
 321        rules.extend(additional_rules)
 322        array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})*  "]" """
 323        rules.append(array_rule)
 324        gbnf_type, rules = model_name + "-" + field_name, rules
 325
 326    elif origin_type is set:  # Array
 327        element_type = get_args(field_type)[0]
 328        element_rule_name, additional_rules = generate_gbnf_rule_for_type(
 329            model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
 330        )
 331        rules.extend(additional_rules)
 332        array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})*  "]" """
 333        rules.append(array_rule)
 334        gbnf_type, rules = model_name + "-" + field_name, rules
 335
 336    elif gbnf_type.startswith("custom-class-"):
 337        rules.append(get_members_structure(field_type, gbnf_type))
 338    elif gbnf_type.startswith("custom-dict-"):
 339        key_type, value_type = get_args(field_type)
 340
 341        additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(
 342            model_name, f"{field_name}-key-type", key_type, is_optional, processed_models, created_rules
 343        )
 344        additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(
 345            model_name, f"{field_name}-value-type", value_type, is_optional, processed_models, created_rules
 346        )
 347        gbnf_type = rf'{gbnf_type} ::= "{{"  ( {additional_key_type} ": "  {additional_value_type} ("," "\n" ws {additional_key_type} ":"  {additional_value_type})*  )? "}}" '
 348
 349        rules.extend(additional_key_rules)
 350        rules.extend(additional_value_rules)
 351    elif gbnf_type.startswith("union-"):
 352        union_types = get_args(field_type)
 353        union_rules = []
 354
 355        for union_type in union_types:
 356            if isinstance(union_type, GenericAlias):
 357                union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
 358                    model_name, field_name, union_type, False, processed_models, created_rules
 359                )
 360                union_rules.append(union_gbnf_type)
 361                rules.extend(union_rules_list)
 362
 363            elif not issubclass(union_type, type(None)):
 364                union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
 365                    model_name, field_name, union_type, False, processed_models, created_rules
 366                )
 367                union_rules.append(union_gbnf_type)
 368                rules.extend(union_rules_list)
 369
 370        # Defining the union grammar rule separately
 371        if len(union_rules) == 1:
 372            union_grammar_rule = f"{model_name}-{field_name}-optional ::= {' | '.join(union_rules)} | null"
 373        else:
 374            union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}"
 375        rules.append(union_grammar_rule)
 376        if len(union_rules) == 1:
 377            gbnf_type = f"{model_name}-{field_name}-optional"
 378        else:
 379            gbnf_type = f"{model_name}-{field_name}-union"
 380    elif isclass(origin_type) and issubclass(origin_type, str):
 381        if field_info and hasattr(field_info, "json_schema_extra") and field_info.json_schema_extra is not None:
 382            triple_quoted_string = field_info.json_schema_extra.get("triple_quoted_string", False)
 383            markdown_string = field_info.json_schema_extra.get("markdown_code_block", False)
 384
 385            gbnf_type = PydanticDataType.TRIPLE_QUOTED_STRING.value if triple_quoted_string else PydanticDataType.STRING.value
 386            gbnf_type = PydanticDataType.MARKDOWN_CODE_BLOCK.value if markdown_string else gbnf_type
 387
 388        elif field_info and hasattr(field_info, "pattern"):
 389            # Convert regex pattern to grammar rule
 390            regex_pattern = field_info.regex.pattern
 391            gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}"
 392        else:
 393            gbnf_type = PydanticDataType.STRING.value
 394
 395    elif (
 396        isclass(origin_type)
 397        and issubclass(origin_type, float)
 398        and field_info
 399        and hasattr(field_info, "json_schema_extra")
 400        and field_info.json_schema_extra is not None
 401    ):
 402        # Retrieve precision attributes for floats
 403        max_precision = (
 404            field_info.json_schema_extra.get("max_precision") if field_info and hasattr(field_info,
 405                                                                                        "json_schema_extra") else None
 406        )
 407        min_precision = (
 408            field_info.json_schema_extra.get("min_precision") if field_info and hasattr(field_info,
 409                                                                                        "json_schema_extra") else None
 410        )
 411        max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info,
 412                                                                                             "json_schema_extra") else None
 413        min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info,
 414                                                                                             "json_schema_extra") else None
 415
 416        # Generate GBNF rule for float with given attributes
 417        gbnf_type, rules = generate_gbnf_float_rules(
 418            max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision
 419        )
 420
 421    elif (
 422        isclass(origin_type)
 423        and issubclass(origin_type, int)
 424        and field_info
 425        and hasattr(field_info, "json_schema_extra")
 426        and field_info.json_schema_extra is not None
 427    ):
 428        # Retrieve digit attributes for integers
 429        max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info,
 430                                                                                             "json_schema_extra") else None
 431        min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info,
 432                                                                                             "json_schema_extra") else None
 433
 434        # Generate GBNF rule for integer with given attributes
 435        gbnf_type, rules = generate_gbnf_integer_rules(max_digit=max_digits, min_digit=min_digits)
 436    else:
 437        gbnf_type, rules = gbnf_type, []
 438
 439    return gbnf_type, rules
 440
 441
 442def generate_gbnf_grammar(model: type[BaseModel], processed_models: set[type[BaseModel]], created_rules: dict[str, list[str]]) -> tuple[list[str], bool]:
 443    """
 444
 445    Generate GBnF Grammar
 446
 447    Generates a GBnF grammar for a given model.
 448
 449    :param model: A Pydantic model class to generate the grammar for. Must be a subclass of BaseModel.
 450    :param processed_models: A set of already processed models to prevent infinite recursion.
 451    :param created_rules: A dict containing already created rules to prevent duplicates.
 452    :return: A list of GBnF grammar rules in string format. And two booleans indicating if an extra markdown or triple quoted string is in the grammar.
 453    Example Usage:
 454    ```
 455    model = MyModel
 456    processed_models = set()
 457    created_rules = dict()
 458
 459    gbnf_grammar = generate_gbnf_grammar(model, processed_models, created_rules)
 460    ```
 461    """
 462    if model in processed_models:
 463        return [], False
 464
 465    processed_models.add(model)
 466    model_name = format_model_and_field_name(model.__name__)
 467
 468    if not issubclass(model, BaseModel):
 469        # For non-Pydantic classes, generate model_fields from __annotations__ or __init__
 470        if hasattr(model, "__annotations__") and model.__annotations__:
 471            model_fields = {name: (typ, ...) for name, typ in get_type_hints(model).items()}
 472        else:
 473            init_signature = inspect.signature(model.__init__)
 474            parameters = init_signature.parameters
 475            model_fields = {name: (param.annotation, param.default) for name, param in parameters.items() if
 476                            name != "self"}
 477    else:
 478        # For Pydantic models, use model_fields and check for ellipsis (required fields)
 479        model_fields = get_type_hints(model)
 480
 481    model_rule_parts = []
 482    nested_rules = []
 483    has_markdown_code_block = False
 484    has_triple_quoted_string = False
 485    look_for_markdown_code_block = False
 486    look_for_triple_quoted_string = False
 487    for field_name, field_info in model_fields.items():
 488        if not issubclass(model, BaseModel):
 489            field_type, default_value = field_info
 490            # Check if the field is optional (not required)
 491            is_optional = (default_value is not inspect.Parameter.empty) and (default_value is not Ellipsis)
 492        else:
 493            field_type = field_info
 494            field_info = model.model_fields[field_name]
 495            is_optional = field_info.is_required is False and get_origin(field_type) is Optional
 496        rule_name, additional_rules = generate_gbnf_rule_for_type(
 497            model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models,
 498            created_rules, field_info
 499        )
 500        look_for_markdown_code_block = True if rule_name == "markdown_code_block" else False
 501        look_for_triple_quoted_string = True if rule_name == "triple_quoted_string" else False
 502        if not look_for_markdown_code_block and not look_for_triple_quoted_string:
 503            if rule_name not in created_rules:
 504                created_rules[rule_name] = additional_rules
 505            model_rule_parts.append(f' ws "\\"{field_name}\\"" ":" ws {rule_name}')  # Adding escaped quotes
 506            nested_rules.extend(additional_rules)
 507        else:
 508            has_triple_quoted_string = look_for_triple_quoted_string
 509            has_markdown_code_block = look_for_markdown_code_block
 510
 511    fields_joined = r' "," "\n" '.join(model_rule_parts)
 512    model_rule = rf'{model_name} ::= "{{" "\n" {fields_joined} "\n" ws "}}"'
 513
 514    has_special_string = False
 515    if has_triple_quoted_string:
 516        model_rule += '"\\n" ws "}"'
 517        model_rule += '"\\n" triple-quoted-string'
 518        has_special_string = True
 519    if has_markdown_code_block:
 520        model_rule += '"\\n" ws "}"'
 521        model_rule += '"\\n" markdown-code-block'
 522        has_special_string = True
 523    all_rules = [model_rule] + nested_rules
 524
 525    return all_rules, has_special_string
 526
 527
 528def generate_gbnf_grammar_from_pydantic_models(
 529    models: list[type[BaseModel]], outer_object_name: str | None = None, outer_object_content: str | None = None,
 530    list_of_outputs: bool = False
 531) -> str:
 532    """
 533    Generate GBNF Grammar from Pydantic Models.
 534
 535    This method takes a list of Pydantic models and uses them to generate a GBNF grammar string. The generated grammar string can be used for parsing and validating data using the generated
 536    * grammar.
 537
 538    Args:
 539        models (list[type[BaseModel]]): A list of Pydantic models to generate the grammar from.
 540        outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
 541        outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
 542        list_of_outputs (str, optional): Allows a list of output objects
 543    Returns:
 544        str: The generated GBNF grammar string.
 545
 546    Examples:
 547        models = [UserModel, PostModel]
 548        grammar = generate_gbnf_grammar_from_pydantic(models)
 549        print(grammar)
 550        # Output:
 551        # root ::= UserModel | PostModel
 552        # ...
 553    """
 554    processed_models: set[type[BaseModel]] = set()
 555    all_rules = []
 556    created_rules: dict[str, list[str]] = {}
 557    if outer_object_name is None:
 558        for model in models:
 559            model_rules, _ = generate_gbnf_grammar(model, processed_models, created_rules)
 560            all_rules.extend(model_rules)
 561
 562        if list_of_outputs:
 563            root_rule = r'root ::= (" "| "\n") "[" ws grammar-models ("," ws grammar-models)* ws "]"' + "\n"
 564        else:
 565            root_rule = r'root ::= (" "| "\n") grammar-models' + "\n"
 566        root_rule += "grammar-models ::= " + " | ".join(
 567            [format_model_and_field_name(model.__name__) for model in models])
 568        all_rules.insert(0, root_rule)
 569        return "\n".join(all_rules)
 570    elif outer_object_name is not None:
 571        if list_of_outputs:
 572            root_rule = (
 573                rf'root ::= (" "| "\n") "[" ws {format_model_and_field_name(outer_object_name)} ("," ws {format_model_and_field_name(outer_object_name)})* ws "]"'
 574                + "\n"
 575            )
 576        else:
 577            root_rule = f"root ::= {format_model_and_field_name(outer_object_name)}\n"
 578
 579        model_rule = (
 580            rf'{format_model_and_field_name(outer_object_name)} ::= (" "| "\n") "{{" ws "\"{outer_object_name}\""  ":" ws grammar-models'
 581        )
 582
 583        fields_joined = " | ".join(
 584            [rf"{format_model_and_field_name(model.__name__)}-grammar-model" for model in models])
 585
 586        grammar_model_rules = f"\ngrammar-models ::= {fields_joined}"
 587        mod_rules = []
 588        for model in models:
 589            mod_rule = rf"{format_model_and_field_name(model.__name__)}-grammar-model ::= "
 590            mod_rule += (
 591                rf'"\"{model.__name__}\"" "," ws "\"{outer_object_content}\"" ":" ws {format_model_and_field_name(model.__name__)}' + "\n"
 592            )
 593            mod_rules.append(mod_rule)
 594        grammar_model_rules += "\n" + "\n".join(mod_rules)
 595
 596        for model in models:
 597            model_rules, has_special_string = generate_gbnf_grammar(model, processed_models,
 598                                                                    created_rules)
 599
 600            if not has_special_string:
 601                model_rules[0] += r'"\n" ws "}"'
 602
 603            all_rules.extend(model_rules)
 604
 605        all_rules.insert(0, root_rule + model_rule + grammar_model_rules)
 606        return "\n".join(all_rules)
 607
 608
 609def get_primitive_grammar(grammar):
 610    """
 611    Returns the needed GBNF primitive grammar for a given GBNF grammar string.
 612
 613    Args:
 614        grammar (str): The string containing the GBNF grammar.
 615
 616    Returns:
 617        str: GBNF primitive grammar string.
 618    """
 619    type_list: list[type[object]] = []
 620    if "string-list" in grammar:
 621        type_list.append(str)
 622    if "boolean-list" in grammar:
 623        type_list.append(bool)
 624    if "integer-list" in grammar:
 625        type_list.append(int)
 626    if "float-list" in grammar:
 627        type_list.append(float)
 628    additional_grammar = [generate_list_rule(t) for t in type_list]
 629    primitive_grammar = r"""
 630boolean ::= "true" | "false"
 631null ::= "null"
 632string ::= "\"" (
 633        [^"\\] |
 634        "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
 635      )* "\"" ws
 636ws ::= ([ \t\n] ws)?
 637float ::= ("-"? ([0] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
 638
 639integer ::= [0-9]+"""
 640
 641    any_block = ""
 642    if "custom-class-any" in grammar:
 643        any_block = """
 644value ::= object | array | string | number | boolean | null
 645
 646object ::=
 647  "{" ws (
 648            string ":" ws value
 649    ("," ws string ":" ws value)*
 650  )? "}" ws
 651
 652array  ::=
 653  "[" ws (
 654            value
 655    ("," ws value)*
 656  )? "]" ws
 657
 658number ::= integer | float"""
 659
 660    markdown_code_block_grammar = ""
 661    if "markdown-code-block" in grammar:
 662        markdown_code_block_grammar = r'''
 663markdown-code-block ::= opening-triple-ticks markdown-code-block-content closing-triple-ticks
 664markdown-code-block-content ::= ( [^`] | "`" [^`] |  "`"  "`" [^`]  )*
 665opening-triple-ticks ::= "```" "python" "\n" | "```" "c" "\n" | "```" "cpp" "\n" | "```" "txt" "\n" | "```" "text" "\n" | "```" "json" "\n" | "```" "javascript" "\n" | "```" "css" "\n" | "```" "html" "\n" | "```" "markdown" "\n"
 666closing-triple-ticks ::= "```" "\n"'''
 667
 668    if "triple-quoted-string" in grammar:
 669        markdown_code_block_grammar = r"""
 670triple-quoted-string ::= triple-quotes triple-quoted-string-content triple-quotes
 671triple-quoted-string-content ::= ( [^'] | "'" [^'] |  "'"  "'" [^']  )*
 672triple-quotes ::= "'''" """
 673    return "\n" + "\n".join(additional_grammar) + any_block + primitive_grammar + markdown_code_block_grammar
 674
 675
 676def generate_markdown_documentation(
 677    pydantic_models: list[type[BaseModel]], model_prefix="Model", fields_prefix="Fields",
 678    documentation_with_field_description=True
 679) -> str:
 680    """
 681    Generate markdown documentation for a list of Pydantic models.
 682
 683    Args:
 684        pydantic_models (list[type[BaseModel]]): list of Pydantic model classes.
 685        model_prefix (str): Prefix for the model section.
 686        fields_prefix (str): Prefix for the fields section.
 687        documentation_with_field_description (bool): Include field descriptions in the documentation.
 688
 689    Returns:
 690        str: Generated text documentation.
 691    """
 692    documentation = ""
 693    pyd_models: list[tuple[type[BaseModel], bool]] = [(model, True) for model in pydantic_models]
 694    for model, add_prefix in pyd_models:
 695        if add_prefix:
 696            documentation += f"{model_prefix}: {model.__name__}\n"
 697        else:
 698            documentation += f"Model: {model.__name__}\n"
 699
 700        # Handling multi-line model description with proper indentation
 701
 702        class_doc = getdoc(model)
 703        base_class_doc = getdoc(BaseModel)
 704        class_description = class_doc if class_doc and class_doc != base_class_doc else ""
 705        if class_description != "":
 706            documentation += "  Description: "
 707            documentation += format_multiline_description(class_description, 0) + "\n"
 708
 709        if add_prefix:
 710            # Indenting the fields section
 711            documentation += f"  {fields_prefix}:\n"
 712        else:
 713            documentation += f"  Fields:\n"  # noqa: F541
 714        if isclass(model) and issubclass(model, BaseModel):
 715            for name, field_type in get_type_hints(model).items():
 716                # if name == "markdown_code_block":
 717                #    continue
 718                if get_origin(field_type) == list:
 719                    element_type = get_args(field_type)[0]
 720                    if isclass(element_type) and issubclass(element_type, BaseModel):
 721                        pyd_models.append((element_type, False))
 722                if get_origin(field_type) == Union:
 723                    element_types = get_args(field_type)
 724                    for element_type in element_types:
 725                        if isclass(element_type) and issubclass(element_type, BaseModel):
 726                            pyd_models.append((element_type, False))
 727                documentation += generate_field_markdown(
 728                    name, field_type, model, documentation_with_field_description=documentation_with_field_description
 729                )
 730            documentation += "\n"
 731
 732        if hasattr(model, "Config") and hasattr(model.Config,
 733                                                "json_schema_extra") and "example" in model.Config.json_schema_extra:
 734            documentation += f"  Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
 735            json_example = json.dumps(model.Config.json_schema_extra["example"])
 736            documentation += format_multiline_description(json_example, 2) + "\n"
 737
 738    return documentation
 739
 740
 741def generate_field_markdown(
 742    field_name: str, field_type: type[Any], model: type[BaseModel], depth=1,
 743    documentation_with_field_description=True
 744) -> str:
 745    """
 746    Generate markdown documentation for a Pydantic model field.
 747
 748    Args:
 749        field_name (str): Name of the field.
 750        field_type (type[Any]): Type of the field.
 751        model (type[BaseModel]): Pydantic model class.
 752        depth (int): Indentation depth in the documentation.
 753        documentation_with_field_description (bool): Include field descriptions in the documentation.
 754
 755    Returns:
 756        str: Generated text documentation for the field.
 757    """
 758    indent = "    " * depth
 759
 760    field_info = model.model_fields.get(field_name)
 761    field_description = field_info.description if field_info and field_info.description else ""
 762
 763    origin_type = get_origin(field_type)
 764    origin_type = field_type if origin_type is None else origin_type
 765
 766    if origin_type == list:
 767        element_type = get_args(field_type)[0]
 768        field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})"
 769        if field_description != "":
 770            field_text += ":\n"
 771        else:
 772            field_text += "\n"
 773    elif origin_type == Union:
 774        element_types = get_args(field_type)
 775        types = []
 776        for element_type in element_types:
 777            types.append(format_model_and_field_name(element_type.__name__))
 778        field_text = f"{indent}{field_name} ({' or '.join(types)})"
 779        if field_description != "":
 780            field_text += ":\n"
 781        else:
 782            field_text += "\n"
 783    else:
 784        field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})"
 785        if field_description != "":
 786            field_text += ":\n"
 787        else:
 788            field_text += "\n"
 789
 790    if not documentation_with_field_description:
 791        return field_text
 792
 793    if field_description != "":
 794        field_text += f"        Description: {field_description}\n"
 795
 796    # Check for and include field-specific examples if available
 797    if hasattr(model, "Config") and hasattr(model.Config,
 798                                            "json_schema_extra") and "example" in model.Config.json_schema_extra:
 799        field_example = model.Config.json_schema_extra["example"].get(field_name)
 800        if field_example is not None:
 801            example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
 802            field_text += f"{indent}  Example: {example_text}\n"
 803
 804    if isclass(origin_type) and issubclass(origin_type, BaseModel):
 805        field_text += f"{indent}  Details:\n"
 806        for name, type_ in get_type_hints(field_type).items():
 807            field_text += generate_field_markdown(name, type_, field_type, depth + 2)
 808
 809    return field_text
 810
 811
 812def format_json_example(example: dict[str, Any], depth: int) -> str:
 813    """
 814    Format a JSON example into a readable string with indentation.
 815
 816    Args:
 817        example (dict): JSON example to be formatted.
 818        depth (int): Indentation depth.
 819
 820    Returns:
 821        str: Formatted JSON example string.
 822    """
 823    indent = "    " * depth
 824    formatted_example = "{\n"
 825    for key, value in example.items():
 826        value_text = f"'{value}'" if isinstance(value, str) else value
 827        formatted_example += f"{indent}{key}: {value_text},\n"
 828    formatted_example = formatted_example.rstrip(",\n") + "\n" + indent + "}"
 829    return formatted_example
 830
 831
 832def generate_text_documentation(
 833    pydantic_models: list[type[BaseModel]], model_prefix="Model", fields_prefix="Fields",
 834    documentation_with_field_description=True
 835) -> str:
 836    """
 837    Generate text documentation for a list of Pydantic models.
 838
 839    Args:
 840        pydantic_models (list[type[BaseModel]]): List of Pydantic model classes.
 841        model_prefix (str): Prefix for the model section.
 842        fields_prefix (str): Prefix for the fields section.
 843        documentation_with_field_description (bool): Include field descriptions in the documentation.
 844
 845    Returns:
 846        str: Generated text documentation.
 847    """
 848    documentation = ""
 849    pyd_models: list[tuple[type[BaseModel], bool]] = [(model, True) for model in pydantic_models]
 850    for model, add_prefix in pyd_models:
 851        if add_prefix:
 852            documentation += f"{model_prefix}: {model.__name__}\n"
 853        else:
 854            documentation += f"Model: {model.__name__}\n"
 855
 856        # Handling multi-line model description with proper indentation
 857
 858        class_doc = getdoc(model)
 859        base_class_doc = getdoc(BaseModel)
 860        class_description = class_doc if class_doc and class_doc != base_class_doc else ""
 861        if class_description != "":
 862            documentation += "  Description: "
 863            documentation += "\n" + format_multiline_description(class_description, 2) + "\n"
 864
 865        if isclass(model) and issubclass(model, BaseModel):
 866            documentation_fields = ""
 867            for name, field_type in get_type_hints(model).items():
 868                # if name == "markdown_code_block":
 869                #    continue
 870                if get_origin(field_type) == list:
 871                    element_type = get_args(field_type)[0]
 872                    if isclass(element_type) and issubclass(element_type, BaseModel):
 873                        pyd_models.append((element_type, False))
 874                if get_origin(field_type) == Union:
 875                    element_types = get_args(field_type)
 876                    for element_type in element_types:
 877                        if isclass(element_type) and issubclass(element_type, BaseModel):
 878                            pyd_models.append((element_type, False))
 879                documentation_fields += generate_field_text(
 880                    name, field_type, model, documentation_with_field_description=documentation_with_field_description
 881                )
 882            if documentation_fields != "":
 883                if add_prefix:
 884                    documentation += f"  {fields_prefix}:\n{documentation_fields}"
 885                else:
 886                    documentation += f"  Fields:\n{documentation_fields}"
 887            documentation += "\n"
 888
 889        if hasattr(model, "Config") and hasattr(model.Config,
 890                                                "json_schema_extra") and "example" in model.Config.json_schema_extra:
 891            documentation += f"  Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
 892            json_example = json.dumps(model.Config.json_schema_extra["example"])
 893            documentation += format_multiline_description(json_example, 2) + "\n"
 894
 895    return documentation
 896
 897
 898def generate_field_text(
 899    field_name: str, field_type: type[Any], model: type[BaseModel], depth=1,
 900    documentation_with_field_description=True
 901) -> str:
 902    """
 903    Generate text documentation for a Pydantic model field.
 904
 905    Args:
 906        field_name (str): Name of the field.
 907        field_type (type[Any]): Type of the field.
 908        model (type[BaseModel]): Pydantic model class.
 909        depth (int): Indentation depth in the documentation.
 910        documentation_with_field_description (bool): Include field descriptions in the documentation.
 911
 912    Returns:
 913        str: Generated text documentation for the field.
 914    """
 915    indent = "    " * depth
 916
 917    field_info = model.model_fields.get(field_name)
 918    field_description = field_info.description if field_info and field_info.description else ""
 919
 920    if get_origin(field_type) == list:
 921        element_type = get_args(field_type)[0]
 922        field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})"
 923        if field_description != "":
 924            field_text += ":\n"
 925        else:
 926            field_text += "\n"
 927    elif get_origin(field_type) == Union:
 928        element_types = get_args(field_type)
 929        types = []
 930        for element_type in element_types:
 931            types.append(format_model_and_field_name(element_type.__name__))
 932        field_text = f"{indent}{field_name} ({' or '.join(types)})"
 933        if field_description != "":
 934            field_text += ":\n"
 935        else:
 936            field_text += "\n"
 937    else:
 938        field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})"
 939        if field_description != "":
 940            field_text += ":\n"
 941        else:
 942            field_text += "\n"
 943
 944    if not documentation_with_field_description:
 945        return field_text
 946
 947    if field_description != "":
 948        field_text += f"{indent}  Description: " + field_description + "\n"
 949
 950    # Check for and include field-specific examples if available
 951    if hasattr(model, "Config") and hasattr(model.Config,
 952                                            "json_schema_extra") and "example" in model.Config.json_schema_extra:
 953        field_example = model.Config.json_schema_extra["example"].get(field_name)
 954        if field_example is not None:
 955            example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
 956            field_text += f"{indent}  Example: {example_text}\n"
 957
 958    if isclass(field_type) and issubclass(field_type, BaseModel):
 959        field_text += f"{indent}  Details:\n"
 960        for name, type_ in get_type_hints(field_type).items():
 961            field_text += generate_field_text(name, type_, field_type, depth + 2)
 962
 963    return field_text
 964
 965
 966def format_multiline_description(description: str, indent_level: int) -> str:
 967    """
 968    Format a multiline description with proper indentation.
 969
 970    Args:
 971        description (str): Multiline description.
 972        indent_level (int): Indentation level.
 973
 974    Returns:
 975        str: Formatted multiline description.
 976    """
 977    indent = "    " * indent_level
 978    return indent + description.replace("\n", "\n" + indent)
 979
 980
 981def save_gbnf_grammar_and_documentation(
 982    grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"
 983):
 984    """
 985    Save GBNF grammar and documentation to specified files.
 986
 987    Args:
 988        grammar (str): GBNF grammar string.
 989        documentation (str): Documentation string.
 990        grammar_file_path (str): File path to save the GBNF grammar.
 991        documentation_file_path (str): File path to save the documentation.
 992
 993    Returns:
 994        None
 995    """
 996    try:
 997        with open(grammar_file_path, "w") as file:
 998            file.write(grammar + get_primitive_grammar(grammar))
 999        print(f"Grammar successfully saved to {grammar_file_path}")
1000    except IOError as e:
1001        print(f"An error occurred while saving the grammar file: {e}")
1002
1003    try:
1004        with open(documentation_file_path, "w") as file:
1005            file.write(documentation)
1006        print(f"Documentation successfully saved to {documentation_file_path}")
1007    except IOError as e:
1008        print(f"An error occurred while saving the documentation file: {e}")
1009
1010
1011def remove_empty_lines(string):
1012    """
1013    Remove empty lines from a string.
1014
1015    Args:
1016        string (str): Input string.
1017
1018    Returns:
1019        str: String with empty lines removed.
1020    """
1021    lines = string.splitlines()
1022    non_empty_lines = [line for line in lines if line.strip() != ""]
1023    string_no_empty_lines = "\n".join(non_empty_lines)
1024    return string_no_empty_lines
1025
1026
1027def generate_and_save_gbnf_grammar_and_documentation(
1028    pydantic_model_list,
1029    grammar_file_path="./generated_grammar.gbnf",
1030    documentation_file_path="./generated_grammar_documentation.md",
1031    outer_object_name: str | None = None,
1032    outer_object_content: str | None = None,
1033    model_prefix: str = "Output Model",
1034    fields_prefix: str = "Output Fields",
1035    list_of_outputs: bool = False,
1036    documentation_with_field_description=True,
1037):
1038    """
1039    Generate GBNF grammar and documentation, and save them to specified files.
1040
1041    Args:
1042        pydantic_model_list: List of Pydantic model classes.
1043        grammar_file_path (str): File path to save the generated GBNF grammar.
1044        documentation_file_path (str): File path to save the generated documentation.
1045        outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1046        outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1047        model_prefix (str): Prefix for the model section in the documentation.
1048        fields_prefix (str): Prefix for the fields section in the documentation.
1049        list_of_outputs (bool): Whether the output is a list of items.
1050        documentation_with_field_description (bool): Include field descriptions in the documentation.
1051
1052    Returns:
1053        None
1054    """
1055    documentation = generate_markdown_documentation(
1056        pydantic_model_list, model_prefix, fields_prefix,
1057        documentation_with_field_description=documentation_with_field_description
1058    )
1059    grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1060                                                         list_of_outputs)
1061    grammar = remove_empty_lines(grammar)
1062    save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path)
1063
1064
1065def generate_gbnf_grammar_and_documentation(
1066    pydantic_model_list,
1067    outer_object_name: str | None = None,
1068    outer_object_content: str | None = None,
1069    model_prefix: str = "Output Model",
1070    fields_prefix: str = "Output Fields",
1071    list_of_outputs: bool = False,
1072    documentation_with_field_description=True,
1073):
1074    """
1075    Generate GBNF grammar and documentation for a list of Pydantic models.
1076
1077    Args:
1078        pydantic_model_list: List of Pydantic model classes.
1079        outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1080        outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1081        model_prefix (str): Prefix for the model section in the documentation.
1082        fields_prefix (str): Prefix for the fields section in the documentation.
1083        list_of_outputs (bool): Whether the output is a list of items.
1084        documentation_with_field_description (bool): Include field descriptions in the documentation.
1085
1086    Returns:
1087        tuple: GBNF grammar string, documentation string.
1088    """
1089    documentation = generate_markdown_documentation(
1090        copy(pydantic_model_list), model_prefix, fields_prefix,
1091        documentation_with_field_description=documentation_with_field_description
1092    )
1093    grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1094                                                         list_of_outputs)
1095    grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1096    return grammar, documentation
1097
1098
1099def generate_gbnf_grammar_and_documentation_from_dictionaries(
1100    dictionaries: list[dict[str, Any]],
1101    outer_object_name: str | None = None,
1102    outer_object_content: str | None = None,
1103    model_prefix: str = "Output Model",
1104    fields_prefix: str = "Output Fields",
1105    list_of_outputs: bool = False,
1106    documentation_with_field_description=True,
1107):
1108    """
1109    Generate GBNF grammar and documentation from a list of dictionaries.
1110
1111    Args:
1112        dictionaries (list[dict]): List of dictionaries representing Pydantic models.
1113        outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1114        outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1115        model_prefix (str): Prefix for the model section in the documentation.
1116        fields_prefix (str): Prefix for the fields section in the documentation.
1117        list_of_outputs (bool): Whether the output is a list of items.
1118        documentation_with_field_description (bool): Include field descriptions in the documentation.
1119
1120    Returns:
1121        tuple: GBNF grammar string, documentation string.
1122    """
1123    pydantic_model_list = create_dynamic_models_from_dictionaries(dictionaries)
1124    documentation = generate_markdown_documentation(
1125        copy(pydantic_model_list), model_prefix, fields_prefix,
1126        documentation_with_field_description=documentation_with_field_description
1127    )
1128    grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content,
1129                                                         list_of_outputs)
1130    grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1131    return grammar, documentation
1132
1133
1134def create_dynamic_model_from_function(func: Callable[..., Any]):
1135    """
1136    Creates a dynamic Pydantic model from a given function's type hints and adds the function as a 'run' method.
1137
1138    Args:
1139        func (Callable): A function with type hints from which to create the model.
1140
1141    Returns:
1142        A dynamic Pydantic model class with the provided function as a 'run' method.
1143    """
1144
1145    # Get the signature of the function
1146    sig = inspect.signature(func)
1147
1148    # Parse the docstring
1149    assert func.__doc__ is not None
1150    docstring = parse(func.__doc__)
1151
1152    dynamic_fields = {}
1153    param_docs = []
1154    for param in sig.parameters.values():
1155        # Exclude 'self' parameter
1156        if param.name == "self":
1157            continue
1158
1159        # Assert that the parameter has a type annotation
1160        if param.annotation == inspect.Parameter.empty:
1161            raise TypeError(f"Parameter '{param.name}' in function '{func.__name__}' lacks a type annotation")
1162
1163        # Find the parameter's description in the docstring
1164        param_doc = next((d for d in docstring.params if d.arg_name == param.name), None)
1165
1166        # Assert that the parameter has a description
1167        if not param_doc or not param_doc.description:
1168            raise ValueError(
1169                f"Parameter '{param.name}' in function '{func.__name__}' lacks a description in the docstring")
1170
1171        # Add parameter details to the schema
1172        param_docs.append((param.name, param_doc))
1173        if param.default == inspect.Parameter.empty:
1174            default_value = ...
1175        else:
1176            default_value = param.default
1177        dynamic_fields[param.name] = (
1178            param.annotation if param.annotation != inspect.Parameter.empty else str, default_value)
1179    # Creating the dynamic model
1180    dynamic_model = create_model(f"{func.__name__}", **dynamic_fields)
1181
1182    for name, param_doc in param_docs:
1183        dynamic_model.model_fields[name].description = param_doc.description
1184
1185    dynamic_model.__doc__ = docstring.short_description
1186
1187    def run_method_wrapper(self):
1188        func_args = {name: getattr(self, name) for name, _ in dynamic_fields.items()}
1189        return func(**func_args)
1190
1191    # Adding the wrapped function as a 'run' method
1192    setattr(dynamic_model, "run", run_method_wrapper)
1193    return dynamic_model
1194
1195
1196def add_run_method_to_dynamic_model(model: type[BaseModel], func: Callable[..., Any]):
1197    """
1198    Add a 'run' method to a dynamic Pydantic model, using the provided function.
1199
1200    Args:
1201        model (type[BaseModel]): Dynamic Pydantic model class.
1202        func (Callable): Function to be added as a 'run' method to the model.
1203
1204    Returns:
1205        type[BaseModel]: Pydantic model class with the added 'run' method.
1206    """
1207
1208    def run_method_wrapper(self):
1209        func_args = {name: getattr(self, name) for name in model.model_fields}
1210        return func(**func_args)
1211
1212    # Adding the wrapped function as a 'run' method
1213    setattr(model, "run", run_method_wrapper)
1214
1215    return model
1216
1217
1218def create_dynamic_models_from_dictionaries(dictionaries: list[dict[str, Any]]):
1219    """
1220    Create a list of dynamic Pydantic model classes from a list of dictionaries.
1221
1222    Args:
1223        dictionaries (list[dict]): List of dictionaries representing model structures.
1224
1225    Returns:
1226        list[type[BaseModel]]: List of generated dynamic Pydantic model classes.
1227    """
1228    dynamic_models = []
1229    for func in dictionaries:
1230        model_name = format_model_and_field_name(func.get("name", ""))
1231        dyn_model = convert_dictionary_to_pydantic_model(func, model_name)
1232        dynamic_models.append(dyn_model)
1233    return dynamic_models
1234
1235
1236def map_grammar_names_to_pydantic_model_class(pydantic_model_list):
1237    output = {}
1238    for model in pydantic_model_list:
1239        output[format_model_and_field_name(model.__name__)] = model
1240
1241    return output
1242
1243
1244def json_schema_to_python_types(schema):
1245    type_map = {
1246        "any": Any,
1247        "string": str,
1248        "number": float,
1249        "integer": int,
1250        "boolean": bool,
1251        "array": list,
1252    }
1253    return type_map[schema]
1254
1255
1256def list_to_enum(enum_name, values):
1257    return Enum(enum_name, {value: value for value in values})
1258
1259
1260def convert_dictionary_to_pydantic_model(dictionary: dict[str, Any], model_name: str = "CustomModel") -> type[Any]:
1261    """
1262    Convert a dictionary to a Pydantic model class.
1263
1264    Args:
1265        dictionary (dict): Dictionary representing the model structure.
1266        model_name (str): Name of the generated Pydantic model.
1267
1268    Returns:
1269        type[BaseModel]: Generated Pydantic model class.
1270    """
1271    fields: dict[str, Any] = {}
1272
1273    if "properties" in dictionary:
1274        for field_name, field_data in dictionary.get("properties", {}).items():
1275            if field_data == "object":
1276                submodel = convert_dictionary_to_pydantic_model(dictionary, f"{model_name}_{field_name}")
1277                fields[field_name] = (submodel, ...)
1278            else:
1279                field_type = field_data.get("type", "str")
1280
1281                if field_data.get("enum", []):
1282                    fields[field_name] = (list_to_enum(field_name, field_data.get("enum", [])), ...)
1283                elif field_type == "array":
1284                    items = field_data.get("items", {})
1285                    if items != {}:
1286                        array = {"properties": items}
1287                        array_type = convert_dictionary_to_pydantic_model(array, f"{model_name}_{field_name}_items")
1288                        fields[field_name] = (List[array_type], ...)
1289                    else:
1290                        fields[field_name] = (list, ...)
1291                elif field_type == "object":
1292                    submodel = convert_dictionary_to_pydantic_model(field_data, f"{model_name}_{field_name}")
1293                    fields[field_name] = (submodel, ...)
1294                elif field_type == "required":
1295                    required = field_data.get("enum", [])
1296                    for key, field in fields.items():
1297                        if key not in required:
1298                            optional_type = fields[key][0]
1299                            fields[key] = (Optional[optional_type], ...)
1300                else:
1301                    field_type = json_schema_to_python_types(field_type)
1302                    fields[field_name] = (field_type, ...)
1303    if "function" in dictionary:
1304        for field_name, field_data in dictionary.get("function", {}).items():
1305            if field_name == "name":
1306                model_name = field_data
1307            elif field_name == "description":
1308                fields["__doc__"] = field_data
1309            elif field_name == "parameters":
1310                return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1311
1312    if "parameters" in dictionary:
1313        field_data = {"function": dictionary}
1314        return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1315    if "required" in dictionary:
1316        required = dictionary.get("required", [])
1317        for key, field in fields.items():
1318            if key not in required:
1319                optional_type = fields[key][0]
1320                fields[key] = (Optional[optional_type], ...)
1321    custom_model = create_model(model_name, **fields)
1322    return custom_model