llmnpc - llama.cpp/ggml/include/gguf.h

Path: llmnpc / llama.cpp / ggml / include / gguf.h (raw)
  1// This file contains functionality related to "GGUF" files, the binary file format used by ggml.
  2// GGUF files have the following structure:
  3//
  4// 1. File magic "GGUF" (4 bytes).
  5// 2. File version (uint32_t).
  6// 3. Number of ggml tensors in file (int64_t).
  7// 4. Number of key-value-pairs in file (int64_t).
  8// 5. For each KV pair:
  9//   1. The key (string).
 10//   2. The value type (gguf_type).
 11//   3a. If the value type is GGUF_TYPE_ARRAY:
 12//     1. The type of the array (gguf_type).
 13//     2. The number of elements in the array (uint64_t).
 14//     3. The binary representation of each element in the array.
 15//   3b. Otherwise:
 16//     1. The binary representation of the value.
 17// 6. For each ggml tensor:
 18//   1. The tensor name (string).
 19//   2. The number of dimensions of the tensor (uint32_t).
 20//   3. For each dimension:
 21//     1. The size of the tensor in the dimension (int64_t).
 22//   4. The tensor data type (ggml_type).
 23//   5. The tensor data offset in the tensor data binary blob (uint64_t).
 24// 7. The tensor data binary blob (optional, aligned).
 25//
 26// Strings are serialized as the string length (uint64_t) followed by the C string without the null terminator.
 27// All enums are stored as int32_t.
 28// All bool values are stored as int8_t.
 29// If the special key "general.alignment" (uint32_t) is defined it is used for alignment,
 30//   otherwise GGUF_DEFAULT_ALIGNMENT is used.
 31//
 32// Module maintainer: Johannes Gäßler (@JohannesGaessler, johannesg@5d6.de)
 33
 34#pragma once
 35
 36#include "ggml.h"
 37
 38#include <stdbool.h>
 39#include <stdint.h>
 40
 41#define GGUF_MAGIC   "GGUF"
 42#define GGUF_VERSION 3
 43
 44#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment"
 45
 46#define GGUF_DEFAULT_ALIGNMENT 32
 47
 48#ifdef  __cplusplus
 49extern "C" {
 50#endif
 51
 52    // types that can be stored as GGUF KV data
 53    enum gguf_type {
 54        GGUF_TYPE_UINT8   = 0,
 55        GGUF_TYPE_INT8    = 1,
 56        GGUF_TYPE_UINT16  = 2,
 57        GGUF_TYPE_INT16   = 3,
 58        GGUF_TYPE_UINT32  = 4,
 59        GGUF_TYPE_INT32   = 5,
 60        GGUF_TYPE_FLOAT32 = 6,
 61        GGUF_TYPE_BOOL    = 7,
 62        GGUF_TYPE_STRING  = 8,
 63        GGUF_TYPE_ARRAY   = 9,
 64        GGUF_TYPE_UINT64  = 10,
 65        GGUF_TYPE_INT64   = 11,
 66        GGUF_TYPE_FLOAT64 = 12,
 67        GGUF_TYPE_COUNT,       // marks the end of the enum
 68    };
 69
 70    struct gguf_context;
 71
 72    struct gguf_init_params {
 73        bool no_alloc;
 74
 75        // if not NULL, create a ggml_context and allocate the tensor data in it
 76        struct ggml_context ** ctx;
 77    };
 78
 79    GGML_API struct gguf_context * gguf_init_empty(void);
 80    GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
 81    //GGML_API struct gguf_context * gguf_init_from_buffer(..);
 82
 83    GGML_API void gguf_free(struct gguf_context * ctx);
 84
 85    GGML_API const char * gguf_type_name(enum gguf_type type);
 86
 87    GGML_API uint32_t gguf_get_version    (const struct gguf_context * ctx);
 88    GGML_API size_t   gguf_get_alignment  (const struct gguf_context * ctx);
 89    GGML_API size_t   gguf_get_data_offset(const struct gguf_context * ctx);
 90
 91    GGML_API int64_t      gguf_get_n_kv(const struct gguf_context * ctx);
 92    GGML_API int64_t      gguf_find_key(const struct gguf_context * ctx, const char * key); // returns -1 if key is not found
 93    GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int64_t key_id);
 94
 95    GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int64_t key_id);
 96    GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id);
 97
 98    // will abort if the wrong type is used for the key
 99    GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int64_t key_id);
100    GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int64_t key_id);
101    GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int64_t key_id);
102    GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int64_t key_id);
103    GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int64_t key_id);
104    GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int64_t key_id);
105    GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int64_t key_id);
106    GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int64_t key_id);
107    GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int64_t key_id);
108    GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int64_t key_id);
109    GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id);
110    GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int64_t key_id);
111    GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id);
112    GGML_API size_t       gguf_get_arr_n   (const struct gguf_context * ctx, int64_t key_id);
113
114    // get raw pointer to the first element of the array with the given key_id
115    // for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
116    GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id);
117
118    // get ith C string from array with given key_id
119    GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int64_t key_id, size_t i);
120
121    GGML_API int64_t        gguf_get_n_tensors    (const struct gguf_context * ctx);
122    GGML_API int64_t        gguf_find_tensor      (const struct gguf_context * ctx, const char * name); // returns -1 if the tensor is not found
123    GGML_API size_t         gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id);
124    GGML_API const char *   gguf_get_tensor_name  (const struct gguf_context * ctx, int64_t tensor_id);
125    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int64_t tensor_id);
126    GGML_API size_t         gguf_get_tensor_size  (const struct gguf_context * ctx, int64_t tensor_id);
127
128    // removes key if it exists, returns id that the key had prior to removal (-1 if it didn't exist)
129    GGML_API int64_t gguf_remove_key(struct gguf_context * ctx, const char * key);
130
131    // overrides an existing KV pair or adds a new one, the new KV pair is always at the back
132    GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t      val);
133    GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t       val);
134    GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t     val);
135    GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t      val);
136    GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t     val);
137    GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t      val);
138    GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float        val);
139    GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t     val);
140    GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t      val);
141    GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double       val);
142    GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool         val);
143    GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
144
145    // creates a new array with n elements of the given type and copies the corresponding number of bytes from data
146    GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n);
147
148    // creates a new array with n strings and copies the corresponding strings from data
149    GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, size_t n);
150
151    // set or add KV pairs from another context
152    GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
153
154    // add tensor to GGUF context, tensor name must be unique
155    GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
156
157    // after changing a tensor's type, the offsets of all tensors with higher indices are immediately recalculated
158    //   in such a way that the tensor data remains as one contiguous block (except for padding)
159    GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
160
161    // assumes that at least gguf_get_tensor_size bytes can be read from data
162    GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
163
164    // writing gguf files can be done in 3 ways:
165    //
166    // - write the entire gguf_context to a binary file in a single pass:
167    //
168    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ false);
169    //
170    // - write only the meta data to a file, then re-open the file and append the tensor data:
171    //
172    //   gguf_write_to_file(ctx, fname, /*only_meta =*/ true);
173    //   FILE * f = fopen(fname, "ab");
174    //   fwrite(f, ...); // write tensor data
175    //   fclose(f);
176    //
177    // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
178    //
179    //   FILE * f = fopen(fname, "wb");
180    //   const size_t size_meta = gguf_get_meta_size(ctx);
181    //   fseek(f, size_meta, SEEK_SET);
182    //   fwrite(f, ...); // write tensor data
183    //   void * data = malloc(size_meta);
184    //   gguf_get_meta_data(ctx, data);
185    //   rewind(f);
186    //   fwrite(data, 1, data, f);
187    //   free(data);
188    //   fclose(f);
189    //
190
191    // write the entire context to a binary file
192    GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
193
194    // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
195    GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
196
197    // writes the meta data to pointer "data"
198    GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data);
199
200#ifdef  __cplusplus
201}
202#endif