1#!/usr/bin/env python3
2from __future__ import annotations
3
4import logging
5import argparse
6import os
7import sys
8from tqdm import tqdm
9from pathlib import Path
10
11import numpy as np
12
13# Necessary to load the local gguf package
14if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
15 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
16
17import gguf
18
19logger = logging.getLogger("gguf-convert-endian")
20
21
22def byteswap_noop(tensor, block_offs):
23 # this function is used when byteswapping is not needed
24 pass
25
26
27def byteswap_q4_0(tensor, block_offs):
28 # Each block_q4_0 consists of an f16 delta (scaling factor) followed by 16 int8 quantizations.
29
30 # Byte-Swap f16 sized delta field
31 delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
32 delta.byteswap(inplace=True)
33
34
35def byteswap_q8_0(tensor, block_offs):
36 # Each block_q8_0 consists of an f16 delta (scaling factor) followed by 32 int8 quantizations.
37
38 # Byte-Swap f16 sized delta field
39 delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
40 delta.byteswap(inplace=True)
41
42
43def byteswap_q4_k(tensor, block_offs):
44 # Each block_q4_k consists of 2 f16 values followed by 140 int8 values.
45
46 # Byte-Swap f16 sized fields
47 delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
48 delta.byteswap(inplace=True)
49
50 delta = tensor.data[block_offs + 2:block_offs + 4].view(dtype=np.uint16)
51 delta.byteswap(inplace=True)
52
53
54def byteswap_q6_k(tensor, block_offs):
55 # Each block_q6_k consists of 208 int8 values followed by 1 f16 value.
56
57 # Byte-Swap f16 sized field
58 delta = tensor.data[block_offs + 208:block_offs + 210].view(dtype=np.uint16)
59 delta.byteswap(inplace=True)
60
61
62byteswap_tensors = {
63 gguf.GGMLQuantizationType.Q4_0: byteswap_q4_0,
64 gguf.GGMLQuantizationType.Q8_0: byteswap_q8_0,
65 gguf.GGMLQuantizationType.Q4_K: byteswap_q4_k,
66 gguf.GGMLQuantizationType.Q6_K: byteswap_q6_k,
67 gguf.GGMLQuantizationType.MXFP4: byteswap_noop,
68}
69
70
71def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
72 file_endian = reader.endianess.name
73 if reader.byte_order == 'S':
74 host_endian = 'BIG' if file_endian == 'LITTLE' else 'LITTLE'
75 else:
76 host_endian = file_endian
77 order = host_endian if args.order == "native" else args.order.upper()
78 logger.info(f"* Host is {host_endian} endian, GGUF file seems to be {file_endian} endian")
79 if file_endian == order:
80 logger.info(f"* File is already {order} endian. Nothing to do.")
81 sys.exit(0)
82 logger.info("* Checking tensors for conversion compatibility")
83 for tensor in reader.tensors:
84 if tensor.tensor_type not in byteswap_tensors and \
85 tensor.tensor_type not in (
86 gguf.GGMLQuantizationType.F32,
87 gguf.GGMLQuantizationType.F16,
88 gguf.GGMLQuantizationType.BF16,
89 ):
90 raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
91 logger.info(f"* Preparing to convert from {file_endian} to {order}")
92 if args.dry_run:
93 return
94 logger.warning("*** Warning *** Warning *** Warning **")
95 logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
96 if order != host_endian:
97 logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
98 logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
99 logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
100 response = input("YES, I am sure> ")
101 if response != "YES":
102 logger.warning("You didn't enter YES. Okay then, see ya!")
103 sys.exit(0)
104 logger.info(f"* Converting fields ({len(reader.fields)})")
105 for idx, field in enumerate(reader.fields.values()):
106 logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
107 for part in field.parts:
108 part.byteswap(inplace=True)
109 logger.info(f"* Converting tensors ({len(reader.tensors)})")
110
111 for idx, tensor in enumerate(pbar := tqdm(reader.tensors, desc="Converting tensor")):
112 log_message = (
113 f"Converting tensor {repr(tensor.name)}, "
114 f"type={tensor.tensor_type.name}, "
115 f"elements={tensor.n_elements} "
116 )
117
118 # Byte-swap each part of the tensor's field
119 for part in tensor.field.parts:
120 part.byteswap(inplace=True)
121
122 # Byte-swap tensor data if necessary
123 if tensor.tensor_type in byteswap_tensors:
124 # first flatten structure
125 oldshape = tensor.data.shape
126 newshape = 1
127 for i in tensor.data.shape:
128 newshape *= i
129
130 tensor.data.resize(newshape)
131
132 block_size = gguf.constants.GGML_QUANT_SIZES[tensor.tensor_type][1]
133 byteswap_func = byteswap_tensors[tensor.tensor_type]
134
135 n_blocks = len(tensor.data) // block_size
136 for block_num in (inner_pbar := tqdm(range(n_blocks), desc="Byte-swapping Blocks", leave=False)):
137 block_offs = block_num * block_size
138
139 byteswap_func(tensor, block_offs)
140
141 if block_num % 100000 == 0:
142 inner_pbar.set_description(f"Byte-swapping Blocks [{(n_blocks - block_num) // n_blocks}]")
143
144 # restore old shape in case it's ever used
145 tensor.data.resize(oldshape)
146 elif tensor.tensor_type == gguf.GGMLQuantizationType.BF16:
147 # Special case for BF16
148 # It is 2-bytes data, but by default view loads it as 1-byte data.
149 # Change to correct view before byteswapping.
150 tensor.data.view(dtype=np.uint16).byteswap(inplace=True)
151 else:
152 # Handle other tensor types
153 tensor.data.byteswap(inplace=True)
154
155 pbar.set_description(log_message)
156
157 logger.info("* Completion")
158
159
160def main() -> None:
161 parser = argparse.ArgumentParser(description="Convert GGUF file byte order")
162 parser.add_argument(
163 "model", type=str,
164 help="GGUF format model filename",
165 )
166 parser.add_argument(
167 "order", type=str, choices=['big', 'little', 'native'],
168 help="Requested byte order",
169 )
170 parser.add_argument(
171 "--dry-run", action="store_true",
172 help="Don't actually change anything",
173 )
174 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
175
176 args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
177
178 logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
179
180 logger.info(f'* Loading: {args.model}')
181 reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
182 convert_byteorder(reader, args)
183
184
185if __name__ == "__main__":
186 main()