1#!/usr/bin/env python3
  2from __future__ import annotations
  3
  4import uuid
  5import hashlib
  6
  7import logging
  8import argparse
  9import os
 10import sys
 11from pathlib import Path
 12
 13from tqdm import tqdm
 14
 15# Necessary to load the local gguf package
 16if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
 17    sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 18
 19from gguf import GGUFReader  # noqa: E402
 20
 21
 22logger = logging.getLogger("gguf-hash")
 23
 24# UUID_NAMESPACE_LLAMA_CPP = uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
 25UUID_NAMESPACE_LLAMA_CPP = uuid.UUID('ef001206-dadc-5f6d-a15f-3359e577d4e5')
 26
 27
 28# For more information about what field.parts and field.data represent,
 29# please see the comments in the modify_gguf.py example.
 30def gguf_hash(reader: GGUFReader, filename: str, disable_progress_bar: bool, no_layer: bool) -> None:
 31    sha1 = hashlib.sha1()
 32    sha256 = hashlib.sha256()
 33    uuidv5_sha1 = hashlib.sha1()
 34    uuidv5_sha1.update(UUID_NAMESPACE_LLAMA_CPP.bytes)
 35
 36    # Total Weight Calculation For Progress Bar
 37    total_weights = 0
 38    for n, tensor in enumerate(reader.tensors, 1):
 39
 40        # We don't need these
 41        if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
 42            continue
 43
 44        # Calculate Tensor Volume
 45        sum_weights_in_tensor = 1
 46        for dim in tensor.shape:
 47            sum_weights_in_tensor *= dim
 48        total_weights += sum_weights_in_tensor
 49
 50    # Hash Progress Bar
 51    bar = tqdm(desc="Hashing", total=total_weights, unit="weights", unit_scale=True, disable=disable_progress_bar)
 52
 53    # Hashing Process
 54    for tensor in reader.tensors:
 55
 56        # We don't need these
 57        if tensor.name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
 58            continue
 59
 60        # Progressbar
 61        sum_weights_in_tensor = 1
 62        for dim in tensor.shape:
 63            sum_weights_in_tensor *= dim
 64        bar.update(sum_weights_in_tensor)
 65
 66        if not no_layer:
 67
 68            sha1_layer = hashlib.sha1()
 69            sha1_layer.update(tensor.data.data)
 70            print("sha1      {0}  {1}:{2}".format(sha1_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
 71
 72            sha256_layer = hashlib.sha256()
 73            sha256_layer.update(tensor.data.data)
 74            print("sha256    {0}  {1}:{2}".format(sha256_layer.hexdigest(), filename, tensor.name)) # noqa: NP100
 75
 76        sha1.update(tensor.data.data)
 77        sha256.update(tensor.data.data)
 78        uuidv5_sha1.update(tensor.data.data)
 79
 80    # Flush Hash Progress Bar
 81    bar.close()
 82
 83    # Display Hash Output
 84    print("sha1      {0}  {1}".format(sha1.hexdigest(), filename)) # noqa: NP100
 85    print("sha256    {0}  {1}".format(sha256.hexdigest(), filename)) # noqa: NP100
 86    print("uuid      {0}  {1}".format(uuid.UUID(bytes=uuidv5_sha1.digest()[:16], version=5), filename)) # noqa: NP100
 87
 88
 89def main() -> None:
 90    parser = argparse.ArgumentParser(description="Dump GGUF file metadata")
 91    parser.add_argument("model",         type=str,            help="GGUF format model filename")
 92    parser.add_argument("--no-layer",    action="store_true", help="exclude per layer hash")
 93    parser.add_argument("--verbose",     action="store_true", help="increase output verbosity")
 94    parser.add_argument("--progressbar", action="store_true", help="enable progressbar")
 95    args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
 96    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
 97    reader = GGUFReader(args.model, 'r')
 98    gguf_hash(reader, args.model, not args.progressbar, args.no_layer)
 99
100
101if __name__ == '__main__':
102    main()