1#!/usr/bin/env bash
 2
 3set -eu
 4
 5if [ $# -lt 1 ]
 6then
 7    echo "usage:   $0 path_to_build_binary [path_to_temp_folder]"
 8    echo "example: $0 ../../build/bin ../../tmp"
 9    exit 1
10fi
11
12if [ $# -gt 1 ]
13then
14    TMP_DIR=$2
15else
16    TMP_DIR=/tmp
17fi
18
19set -x
20
21SPLIT=$1/llama-gguf-split
22QUANTIZE=$1/llama-quantize
23MAIN=$1/llama-completion
24WORK_PATH=$TMP_DIR/quantize
25ROOT_DIR=$(realpath $(dirname $0)/../../)
26
27mkdir -p "$WORK_PATH"
28
29# Clean up in case of previously failed test
30rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
31
32# 1. Get a model
33(
34cd $WORK_PATH
35"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
36)
37echo PASS
38
39# 2. Split model
40$SPLIT --split-max-tensors 28  $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
41echo PASS
42echo
43
44# 3. Requant model with '--keep-split'
45$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
46echo PASS
47echo
48
49# 3a. Test the requanted model is loading properly
50$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
51echo PASS
52echo
53
54# 4. Requant mode without '--keep-split'
55$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
56echo PASS
57echo
58
59# 4b. Test the requanted model is loading properly
60$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf -p "I believe the meaning of life is" --n-predict 32
61echo PASS
62echo
63
64# Clean up
65rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf