1#!/usr/bin/env python3
2from __future__ import annotations
3
4import logging
5import argparse
6import os
7import re
8import sys
9from pathlib import Path
10from typing import Any
11
12# Necessary to load the local gguf package
13if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
14 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
15
16from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402
17
18logger = logging.getLogger("gguf-dump")
19
20
21def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
22 file_endian = reader.endianess.name
23 if reader.byte_order == 'S':
24 host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
25 else:
26 host_endian = file_endian
27 return (host_endian, file_endian)
28
29
30# For more information about what field.parts and field.data represent,
31# please see the comments in the modify_gguf.py example.
32def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
33 host_endian, file_endian = get_file_host_endian(reader)
34 print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') # noqa: NP100
35 print(f'* Dumping {len(reader.fields)} key/value pair(s)') # noqa: NP100
36 for n, field in enumerate(reader.fields.values(), 1):
37 if not field.types:
38 pretty_type = 'N/A'
39 elif field.types[0] == GGUFValueType.ARRAY:
40 nest_count = len(field.types) - 1
41 pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
42 else:
43 pretty_type = str(field.types[-1].name)
44
45 log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
46 if field.types:
47 curr_type = field.types[0]
48 if curr_type == GGUFValueType.STRING:
49 content = field.contents()
50 if len(content) > 60:
51 content = content[:57] + '...'
52 log_message += ' = {0}'.format(repr(content))
53 elif curr_type in reader.gguf_scalar_to_np:
54 log_message += ' = {0}'.format(field.contents())
55 else:
56 content = repr(field.contents(slice(6)))
57 if len(field.data) > 6:
58 content = content[:-1] + ', ...]'
59 log_message += ' = {0}'.format(content)
60 print(log_message) # noqa: NP100
61 if args.no_tensors:
62 return
63 print(f'* Dumping {len(reader.tensors)} tensor(s)') # noqa: NP100
64 for n, tensor in enumerate(reader.tensors, 1):
65 prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
66 print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') # noqa: NP100
67
68
69def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
70 import json
71 host_endian, file_endian = get_file_host_endian(reader)
72 metadata: dict[str, Any] = {}
73 tensors: dict[str, Any] = {}
74 result = {
75 "filename": args.model,
76 "endian": file_endian,
77 "metadata": metadata,
78 "tensors": tensors,
79 }
80 for idx, field in enumerate(reader.fields.values()):
81 curr: dict[str, Any] = {
82 "index": idx,
83 "type": field.types[0].name if field.types else 'UNKNOWN',
84 "offset": field.offset,
85 }
86 metadata[field.name] = curr
87 if field.types[:1] == [GGUFValueType.ARRAY]:
88 curr["array_types"] = [t.name for t in field.types][1:]
89 if not args.json_array:
90 continue
91 curr["value"] = field.contents()
92 else:
93 curr["value"] = field.contents()
94 if not args.no_tensors:
95 for idx, tensor in enumerate(reader.tensors):
96 tensors[tensor.name] = {
97 "index": idx,
98 "shape": tensor.shape.tolist(),
99 "type": tensor.tensor_type.name,
100 "offset": tensor.field.offset,
101 }
102 json.dump(result, sys.stdout)
103
104
105def markdown_table_with_alignment_support(header_map: list[dict[str, str]], data: list[dict[str, Any]]):
106 # JSON to Markdown table formatting: https://stackoverflow.com/a/72983854/2850957
107
108 # Alignment Utility Function
109 def strAlign(padding: int, alignMode: str | None, strVal: str):
110 if alignMode == 'center':
111 return strVal.center(padding)
112 elif alignMode == 'right':
113 return strVal.rjust(padding - 1) + ' '
114 elif alignMode == 'left':
115 return ' ' + strVal.ljust(padding - 1)
116 else: # default left
117 return ' ' + strVal.ljust(padding - 1)
118
119 def dashAlign(padding: int, alignMode: str | None):
120 if alignMode == 'center':
121 return ':' + '-' * (padding - 2) + ':'
122 elif alignMode == 'right':
123 return '-' * (padding - 1) + ':'
124 elif alignMode == 'left':
125 return ':' + '-' * (padding - 1)
126 else: # default left
127 return '-' * (padding)
128
129 # Calculate Padding For Each Column Based On Header and Data Length
130 rowsPadding = {}
131 for index, columnEntry in enumerate(header_map):
132 padCount = max([len(str(v)) for d in data for k, v in d.items() if k == columnEntry['key_name']], default=0) + 2
133 headerPadCount = len(columnEntry['header_name']) + 2
134 rowsPadding[index] = headerPadCount if padCount <= headerPadCount else padCount
135
136 # Render Markdown Header
137 rows = []
138 rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(columnEntry['header_name'])) for index, columnEntry in enumerate(header_map)))
139 rows.append('|'.join(dashAlign(rowsPadding[index], columnEntry.get('align')) for index, columnEntry in enumerate(header_map)))
140
141 # Render Tabular Data
142 for item in data:
143 rows.append('|'.join(strAlign(rowsPadding[index], columnEntry.get('align'), str(item[columnEntry['key_name']])) for index, columnEntry in enumerate(header_map)))
144
145 # Convert Tabular String Rows Into String
146 tableString = ""
147 for row in rows:
148 tableString += f'|{row}|\n'
149
150 return tableString
151
152
153def element_count_rounded_notation(count: int) -> str:
154 if count > 1e15 :
155 # Quadrillion
156 scaled_amount = count * 1e-15
157 scale_suffix = "Q"
158 elif count > 1e12 :
159 # Trillions
160 scaled_amount = count * 1e-12
161 scale_suffix = "T"
162 elif count > 1e9 :
163 # Billions
164 scaled_amount = count * 1e-9
165 scale_suffix = "B"
166 elif count > 1e6 :
167 # Millions
168 scaled_amount = count * 1e-6
169 scale_suffix = "M"
170 elif count > 1e3 :
171 # Thousands
172 scaled_amount = count * 1e-3
173 scale_suffix = "K"
174 else:
175 # Under Thousands
176 scaled_amount = count
177 scale_suffix = ""
178 return f"{'~' if count > 1e3 else ''}{round(scaled_amount)}{scale_suffix}"
179
180
181def translate_tensor_name(name):
182 words = name.split(".")
183
184 # Source: https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#standardized-tensor-names
185 abbreviation_dictionary = {
186 'token_embd': 'Token embedding',
187 'pos_embd': 'Position embedding',
188 'output_norm': 'Output normalization',
189 'output': 'Output',
190 'attn_norm': 'Attention normalization',
191 'attn_norm_2': 'Attention normalization',
192 'attn_qkv': 'Attention query-key-value',
193 'attn_q': 'Attention query',
194 'attn_k': 'Attention key',
195 'attn_v': 'Attention value',
196 'attn_output': 'Attention output',
197 'ffn_norm': 'Feed-forward network normalization',
198 'ffn_up': 'Feed-forward network "up"',
199 'ffn_gate': 'Feed-forward network "gate"',
200 'ffn_down': 'Feed-forward network "down"',
201 'ffn_gate_inp': 'Expert-routing layer for the Feed-forward network in Mixture of Expert models',
202 'ffn_gate_exp': 'Feed-forward network "gate" layer per expert in Mixture of Expert models',
203 'ffn_down_exp': 'Feed-forward network "down" layer per expert in Mixture of Expert models',
204 'ffn_up_exp': 'Feed-forward network "up" layer per expert in Mixture of Expert models',
205 'ssm_in': 'State space model input projections',
206 'ssm_conv1d': 'State space model rolling/shift',
207 'ssm_x': 'State space model selective parametrization',
208 'ssm_a': 'State space model state compression',
209 'ssm_d': 'State space model skip connection',
210 'ssm_dt': 'State space model time step',
211 'ssm_out': 'State space model output projection',
212 'blk': 'Block',
213 'enc': 'Encoder',
214 'dec': 'Decoder',
215 }
216
217 expanded_words = []
218 for word in words:
219 word_norm = word.strip().lower()
220 if word_norm in abbreviation_dictionary:
221 expanded_words.append(abbreviation_dictionary[word_norm].title())
222 else:
223 expanded_words.append(word.title())
224
225 return ' '.join(expanded_words)
226
227
228def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
229 host_endian, file_endian = get_file_host_endian(reader)
230 markdown_content = ""
231 markdown_content += f'# {args.model} - GGUF Internal File Dump\n\n'
232 markdown_content += f'- Endian: {file_endian} endian\n'
233 markdown_content += '\n'
234 markdown_content += '## Key Value Metadata Store\n\n'
235 markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n'
236 markdown_content += '\n'
237 total_model_bytes = 0
238 total_model_elements = 0
239
240 kv_dump_table: list[dict[str, str | int]] = []
241 for n, field in enumerate(reader.fields.values(), 1):
242 if not field.types:
243 pretty_type = 'N/A'
244 elif field.types[0] == GGUFValueType.ARRAY:
245 nest_count = len(field.types) - 1
246 pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
247 else:
248 pretty_type = str(field.types[-1].name)
249
250 def escape_markdown_inline_code(value_string):
251 # Find the longest contiguous sequence of backticks in the string then
252 # wrap string with appropriate number of backticks required to escape it
253 max_backticks = max((len(match.group(0)) for match in re.finditer(r'`+', value_string)), default=0)
254 inline_code_marker = '`' * (max_backticks + 1)
255
256 # If the string starts or ends with a backtick, add a space at the beginning and end
257 if value_string.startswith('`') or value_string.endswith('`'):
258 value_string = f" {value_string} "
259
260 return f"{inline_code_marker}{value_string}{inline_code_marker}"
261
262 total_elements = len(field.data)
263 value = ""
264 if len(field.types) == 1:
265 curr_type = field.types[0]
266 if curr_type == GGUFValueType.STRING:
267 truncate_length = 60
268 value_string = str(bytes(field.parts[-1]), encoding='utf-8')
269 if len(value_string) > truncate_length:
270 head = escape_markdown_inline_code(value_string[:truncate_length // 2])
271 tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
272 value = "{head}...{tail}".format(head=head, tail=tail)
273 else:
274 value = escape_markdown_inline_code(value_string)
275 elif curr_type in reader.gguf_scalar_to_np:
276 value = str(field.parts[-1][0])
277 else:
278 if field.types[0] == GGUFValueType.ARRAY:
279 curr_type = field.types[1]
280 array_elements = []
281
282 if curr_type == GGUFValueType.STRING:
283 render_element = min(5, total_elements)
284 for element_pos in range(render_element):
285 truncate_length = 30
286 value_string = str(bytes(field.parts[-1 - (total_elements - element_pos - 1) * 2]), encoding='utf-8')
287 if len(value_string) > truncate_length:
288 head = escape_markdown_inline_code(value_string[:truncate_length // 2])
289 tail = escape_markdown_inline_code(value_string[-truncate_length // 2:])
290 value = "{head}...{tail}".format(head=head, tail=tail)
291 else:
292 value = escape_markdown_inline_code(value_string)
293 array_elements.append(value)
294
295 elif curr_type in reader.gguf_scalar_to_np:
296 render_element = min(7, total_elements)
297 for element_pos in range(render_element):
298 array_elements.append(str(field.parts[-1 - (total_elements - element_pos - 1)][0]))
299
300 value = f'[ {", ".join(array_elements).strip()}{", ..." if total_elements > len(array_elements) else ""} ]'
301
302 kv_dump_table.append({"n":n, "pretty_type":pretty_type, "total_elements":total_elements, "field_name":field.name, "value":value})
303
304 kv_dump_table_header_map = [
305 {'key_name':'n', 'header_name':'POS', 'align':'right'},
306 {'key_name':'pretty_type', 'header_name':'TYPE', 'align':'left'},
307 {'key_name':'total_elements', 'header_name':'Count', 'align':'right'},
308 {'key_name':'field_name', 'header_name':'Key', 'align':'left'},
309 {'key_name':'value', 'header_name':'Value', 'align':'left'},
310 ]
311
312 markdown_content += markdown_table_with_alignment_support(kv_dump_table_header_map, kv_dump_table)
313
314 markdown_content += "\n"
315
316 if not args.no_tensors:
317 # Group tensors by their prefix and maintain order
318 tensor_prefix_order: list[str] = []
319 tensor_name_to_key: dict[str, int] = {}
320 tensor_groups: dict[str, list[ReaderTensor]] = {}
321 total_elements = sum(tensor.n_elements for tensor in reader.tensors)
322
323 # Parsing Tensors Record
324 for key, tensor in enumerate(reader.tensors):
325 tensor_components = tensor.name.split('.')
326
327 # Classify Tensor Group
328 tensor_group_name = "base"
329 if tensor_components[0] == 'blk':
330 tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}"
331 elif tensor_components[0] in ['enc', 'dec'] and tensor_components[1] == 'blk':
332 tensor_group_name = f"{tensor_components[0]}.{tensor_components[1]}.{tensor_components[2]}"
333 elif tensor_components[0] in ['enc', 'dec']:
334 tensor_group_name = f"{tensor_components[0]}"
335
336 # Check if new Tensor Group
337 if tensor_group_name not in tensor_groups:
338 tensor_groups[tensor_group_name] = []
339 tensor_prefix_order.append(tensor_group_name)
340
341 # Record Tensor and Tensor Position
342 tensor_groups[tensor_group_name].append(tensor)
343 tensor_name_to_key[tensor.name] = key
344
345 # Tensors Mapping Dump
346 markdown_content += f'## Tensors Overview {element_count_rounded_notation(total_elements)} Elements\n\n'
347 markdown_content += f'Total number of elements in all tensors: {total_elements} Elements\n'
348 markdown_content += '\n'
349
350 for group in tensor_prefix_order:
351 tensors = tensor_groups[group]
352 group_elements = sum(tensor.n_elements for tensor in tensors)
353 markdown_content += f"- [{translate_tensor_name(group)} Tensor Group - {element_count_rounded_notation(group_elements)} Elements](#{group.replace('.', '_')})\n"
354
355 markdown_content += "\n"
356
357 markdown_content += "### Tensor Data Offset\n"
358 markdown_content += '\n'
359 markdown_content += 'This table contains the offset and data segment relative to start of file\n'
360 markdown_content += '\n'
361
362 tensor_mapping_table: list[dict[str, str | int]] = []
363 for key, tensor in enumerate(reader.tensors):
364 data_offset_pretty = '{0:#16x}'.format(tensor.data_offset)
365 data_size_pretty = '{0:#16x}'.format(tensor.n_bytes)
366 tensor_mapping_table.append({"t_id":key, "layer_name":tensor.name, "data_offset":data_offset_pretty, "data_size":data_size_pretty})
367
368 tensors_mapping_table_header_map = [
369 {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
370 {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
371 {'key_name':'data_offset', 'header_name':'Data Offset (B)', 'align':'right'},
372 {'key_name':'data_size', 'header_name':'Data Size (B)', 'align':'right'},
373 ]
374
375 markdown_content += markdown_table_with_alignment_support(tensors_mapping_table_header_map, tensor_mapping_table)
376 markdown_content += "\n"
377
378 for group in tensor_prefix_order:
379 tensors = tensor_groups[group]
380 group_elements = sum(tensor.n_elements for tensor in tensors)
381 group_percentage = group_elements / total_elements * 100
382 total_group_bytes = 0
383 total_group_elements = 0
384 markdown_content += f"### <a name=\"{group.replace('.', '_')}\">{translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements</a>\n\n"
385
386 # Precalculate column sizing for visual consistency
387 prettify_element_est_count_size: int = 1
388 prettify_element_count_size: int = 1
389 prettify_dimension_max_widths: dict[int, int] = {}
390 for tensor in tensors:
391 prettify_element_est_count_size = max(prettify_element_est_count_size, len(str(element_count_rounded_notation(tensor.n_elements))))
392 prettify_element_count_size = max(prettify_element_count_size, len(str(tensor.n_elements)))
393 for i, dimension_size in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))):
394 prettify_dimension_max_widths[i] = max(prettify_dimension_max_widths.get(i,1), len(str(dimension_size)))
395
396 # Generate Tensor Layer Table Content
397 tensor_dump_table: list[dict[str, str | int]] = []
398 for tensor in tensors:
399 human_friendly_name = translate_tensor_name(tensor.name.replace(".weight", ".(W)").replace(".bias", ".(B)"))
400 pretty_dimension = ' x '.join(f'{str(d):>{prettify_dimension_max_widths[i]}}' for i, d in enumerate(list(tensor.shape) + [1] * (4 - len(tensor.shape))))
401 element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})"
402 element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}"
403 type_name_string = f"{tensor.tensor_type.name}"
404 if tensor.n_elements > 0:
405 bpw = (tensor.n_bytes * 8) / tensor.n_elements
406 else:
407 bpw = float('nan')
408 tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string, "bpw": f"{bpw:.4f}"})
409 total_group_bytes += tensor.n_bytes
410 total_group_elements += tensor.n_elements
411
412 tensor_dump_table_header_map = [
413 {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'},
414 {'key_name':'layer_name', 'header_name':'Tensor Layer Name', 'align':'left'},
415 {'key_name':'human_layer_name', 'header_name':'Human Friendly Tensor Layer Name', 'align':'left'},
416 {'key_name':'element_count', 'header_name':'Elements', 'align':'left'},
417 {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'},
418 {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'},
419 {'key_name':'bpw', 'header_name':'BPW', 'align':'right'},
420 ]
421
422 markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table)
423
424 markdown_content += "\n"
425 markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n"
426 markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n"
427 if total_group_elements > 0:
428 total_group_bpw = (total_group_bytes * 8) / total_group_elements
429 markdown_content += f"- Bits per Weight (BPW) for {group}: {total_group_bpw:.4f} bits\n"
430 else:
431 markdown_content += f"- Bits per Weight (BPW) for {group}: undefined (no elements)\n"
432 markdown_content += "\n\n"
433 total_model_bytes += total_group_bytes
434 total_model_elements += total_group_elements
435
436 if total_model_elements > 0:
437 total_model_bpw = (total_model_bytes * 8) / total_model_elements
438 markdown_content += f"Total BPW for {os.path.basename(args.model)}: {total_model_bpw:.4f} bits"
439 else:
440 markdown_content += f"Total BPW for {os.path.basename(args.model)}: undefined (no elements)"
441 print(markdown_content) # noqa: NP100
442
443
444def main() -> None:
445 parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
446 parser.add_argument("model", type=str, help="GGUF format model filename")
447 parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
448 parser.add_argument("--json", action="store_true", help="Produce JSON output")
449 parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
450 parser.add_argument("--data-offset", action="store_true", help="Start of data offset")
451 parser.add_argument("--data-alignment", action="store_true", help="Data alignment applied globally to data field")
452 parser.add_argument("--markdown", action="store_true", help="Produce markdown output")
453 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
454
455 args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
456
457 logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
458
459 if not args.json and not args.markdown and not args.data_offset and not args.data_alignment:
460 logger.info(f'* Loading: {args.model}')
461
462 reader = GGUFReader(args.model, 'r')
463
464 if args.json:
465 dump_metadata_json(reader, args)
466 elif args.markdown:
467 dump_markdown_metadata(reader, args)
468 elif args.data_offset:
469 print(reader.data_offset) # noqa: NP100
470 elif args.data_alignment:
471 print(reader.alignment) # noqa: NP100
472 else:
473 dump_metadata(reader, args)
474
475
476if __name__ == '__main__':
477 main()