1#!/usr/bin/env bash
 2
 3set -eu
 4
 5if [ $# -lt 1 ]
 6then
 7    echo "usage:   $0 path_to_build_binary [path_to_temp_folder]"
 8    echo "example: $0 ../../build/bin ../../tmp"
 9    exit 1
10fi
11
12if [ $# -gt 1 ]
13then
14    TMP_DIR=$2
15else
16    TMP_DIR=/tmp
17fi
18
19set -x
20
21SPLIT=$1/llama-gguf-split
22MAIN=$1/llama-completion
23WORK_PATH=$TMP_DIR/gguf-split
24ROOT_DIR=$(realpath $(dirname $0)/../../)
25
26mkdir -p "$WORK_PATH"
27
28# Clean up in case of previously failed test
29rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
30
31# 1. Get a model
32(
33cd $WORK_PATH
34"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
35)
36echo PASS
37
38# 2. Split with max tensors strategy
39$SPLIT --split-max-tensors 28  $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
40echo PASS
41echo
42
43# 2b. Test the sharded model is loading properly
44$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
45echo PASS
46echo
47
48# 3. Merge
49$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-merge.gguf
50echo PASS
51echo
52
53# 3b. Test the merged model is loading properly
54$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf -p "I believe the meaning of life is" --n-predict 32
55echo PASS
56echo
57
58# 4. Split with no tensors in the first split
59$SPLIT --split-max-tensors 32 --no-tensor-first-split $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
60echo PASS
61echo
62
63# 4b. Test the sharded model is loading properly
64$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00011.gguf -p "I believe the meaning of life is" --n-predict 32
65echo PASS
66echo
67
68# 5. Merge
69#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00012.gguf $WORK_PATH/ggml-model-merge-2.gguf
70#echo PASS
71#echo
72
73# 5b. Test the merged model is loading properly
74#$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32
75#echo PASS
76#echo
77
78# 6. Split with size strategy
79$SPLIT --split-max-size 500M $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-500M
80echo PASS
81echo
82
83# 6b. Test the sharded model is loading properly
84$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-500M-00001-of-00002.gguf -p "I believe the meaning of life is" --n-predict 32
85echo PASS
86echo
87
88# Clean up
89rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf