1#!/usr/bin/env python3
2from __future__ import annotations
3
4import uuid
5import hashlib
6
7import logging
8import argparse
9import os
10import sys
11from pathlib import Path
12
13from tqdm import tqdm
14
15# Necessary to load the local gguf package
16if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
17 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
18
19from gguf import GGUFReader # noqa: E402
20
21
22logger = logging.getLogger("gguf-hash")
23
24# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
25UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
26
27
28# For more information about what field.parts and field.data represent,
29# please see the comments in the modify_gguf.py example.
30def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
31 sha1 = hashlib.sha1()
32 sha256 = hashlib.sha256()
33 uuidv5_sha1 = hashlib.sha1()
34 uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
35
36 # Total Weight Calculation For Progress Bar
37 total_weights = 0
38 for n, tensor in enumerate(reader.tensors, 1):
39
40 # We don't need these
41 if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
42 continue
43
44 # Calculate Tensor Volume
45 sum_weights_in_tensor = 1
46 for dim in tensor.shape:
47 sum_weights_in_tensor *= dim
48 total_weights += sum_weights_in_tensor
49
50 # Hash Progress Bar
51 bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
52
53 # Hashing Process
54 for tensor in reader.tensors:
55
56 # We don't need these
57 if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
58 continue
59
60 # Progressbar
61 sum_weights_in_tensor = 1
62 for dim in tensor.shape:
63 sum_weights_in_tensor *= dim
64 bar.update(sum_weights_in_tensor)
65
66 if not no_layer:
67
68 sha1_layer = hashlib.sha1()
69 sha1_layer.update(tensor.data.data)
70 print("sha1 {0} {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
71
72 sha256_layer = hashlib.sha256()
73 sha256_layer.update(tensor.data.data)
74 print("sha256 {0} {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
75
76 sha1.update(tensor.data.data)
77 sha256.update(tensor.data.data)
78 uuidv5_sha1.update(tensor.data.data)
79
80 # Flush Hash Progress Bar
81 bar.close()
82
83 # Display Hash Output
84 print("sha1 {0} {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
85 print("sha256 {0} {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
86 print("uuid {0} {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
87
88
89def main() -> None:
90 parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
91 parser.add_argument("model", type=str, help="GGUF format model filename")
92 parser.add_argument("--no-layer", action="store_true", help="exclude per layer hash")
93 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
94 parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
95 args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
96 logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
97 reader = GGUFReader(args.model, 'r')
98 gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
99
100
101if __name__ == '__main__':
102 main()