1#!/usr/bin/env bash
2
3# make sure we are in the right directory
4SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
5cd $SCRIPT_DIR
6
7#export LLAMA_CACHE="$SCRIPT_DIR/tmp"
8
9set -eux
10
11mkdir -p $SCRIPT_DIR/output
12
13PROJ_ROOT="$SCRIPT_DIR/../.."
14cd $PROJ_ROOT
15
16# Check if the first argument is "big", then run test with big models
17# This is useful if we're running the script on a larger machine, so we can test the big models
18RUN_BIG_TESTS=false
19if [ "${1:-}" = "big" ]; then
20 RUN_BIG_TESTS=true
21 echo "Include BIG models..."
22fi
23
24RUN_HUGE_TESTS=false
25if [ "${1:-}" = "huge" ]; then
26 RUN_HUGE_TESTS=true
27 RUN_BIG_TESTS=true
28 echo "Include BIG and HUGE models..."
29fi
30
31###############
32
33arr_prefix=()
34arr_hf=()
35arr_extra_args=()
36arr_file=()
37
38add_test_vision() {
39 local hf=$1
40 shift
41 local extra_args=""
42 if [ $# -gt 0 ]; then
43 extra_args=$(printf " %q" "$@")
44 fi
45 arr_prefix+=("[vision]")
46 arr_hf+=("$hf")
47 arr_extra_args+=("$extra_args")
48 arr_file+=("test-1.jpeg")
49}
50
51add_test_audio() {
52 local hf=$1
53 shift
54 local extra_args=""
55 if [ $# -gt 0 ]; then
56 extra_args=$(printf " %q" "$@")
57 fi
58 arr_prefix+=("[audio] ")
59 arr_hf+=("$hf")
60 arr_extra_args+=("$extra_args")
61 arr_file+=("test-2.mp3")
62}
63
64add_test_vision "ggml-org/SmolVLM-500M-Instruct-GGUF:Q8_0"
65add_test_vision "ggml-org/SmolVLM2-2.2B-Instruct-GGUF:Q4_K_M"
66add_test_vision "ggml-org/SmolVLM2-500M-Video-Instruct-GGUF:Q8_0"
67add_test_vision "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M"
68add_test_vision "THUDM/glm-edge-v-5b-gguf:Q4_K_M" -p "name of the newspaper?<__media__>"
69add_test_vision "second-state/Llava-v1.5-7B-GGUF:Q2_K" --chat-template vicuna
70add_test_vision "cjpais/llava-1.6-mistral-7b-gguf:Q3_K_M" --chat-template vicuna
71add_test_vision "ibm-research/granite-vision-3.2-2b-GGUF:Q4_K_M"
72add_test_vision "second-state/MiniCPM-Llama3-V-2_5-GGUF:Q2_K" # model from openbmb is corrupted
73add_test_vision "openbmb/MiniCPM-V-2_6-gguf:Q2_K"
74add_test_vision "openbmb/MiniCPM-o-2_6-gguf:Q4_0"
75add_test_vision "bartowski/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
76add_test_vision "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF:Q4_K_M"
77add_test_vision "ggml-org/InternVL2_5-1B-GGUF:Q8_0"
78add_test_vision "ggml-org/InternVL3-1B-Instruct-GGUF:Q8_0"
79add_test_vision "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
80add_test_vision "ggml-org/LFM2-VL-450M-GGUF:Q8_0"
81add_test_vision "ggml-org/granite-docling-258M-GGUF:Q8_0"
82add_test_vision "ggml-org/LightOnOCR-1B-1025-GGUF:Q8_0"
83
84add_test_audio "ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF:Q8_0"
85add_test_audio "ggml-org/Qwen2.5-Omni-3B-GGUF:Q4_K_M"
86add_test_audio "ggml-org/Voxtral-Mini-3B-2507-GGUF:Q4_K_M"
87add_test_audio "ggml-org/LFM2-Audio-1.5B-GGUF:Q8_0"
88
89# to test the big models, run: ./tests.sh big
90if [ "$RUN_BIG_TESTS" = true ]; then
91 add_test_vision "ggml-org/pixtral-12b-GGUF:Q4_K_M"
92 add_test_vision "ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF" --chat-template mistral-v7
93 add_test_vision "ggml-org/Qwen2-VL-2B-Instruct-GGUF:Q4_K_M"
94 add_test_vision "ggml-org/Qwen2-VL-7B-Instruct-GGUF:Q4_K_M"
95 add_test_vision "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF:Q4_K_M"
96 add_test_vision "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M"
97 add_test_vision "ggml-org/Qwen3-VL-2B-Instruct-GGUF:Q8_0"
98 add_test_vision "ggml-org/InternVL3-8B-Instruct-GGUF:Q4_K_M"
99 add_test_vision "ggml-org/InternVL3-14B-Instruct-GGUF:Q4_K_M"
100 add_test_vision "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
101 # add_test_vision "ggml-org/Qwen2.5-VL-32B-Instruct-GGUF:Q4_K_M" # does not work on my mac M3 Ultra
102 # add_test_vision "ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:Q4_K_M" # not always working
103
104 add_test_audio "ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF:Q4_K_M"
105 add_test_audio "ggml-org/Qwen2.5-Omni-7B-GGUF:Q4_K_M"
106fi
107
108# to test the huge models, run: ./tests.sh huge
109# this will run both the big and huge models
110# huge models are > 32B parameters
111if [ "$RUN_HUGE_TESTS" = true ]; then
112 add_test_vision "ggml-org/Qwen2.5-VL-72B-Instruct-GGUF:Q4_K_M"
113 add_test_vision "ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF:IQ1_S"
114fi
115
116# these models always give the wrong answer, not sure why
117# add_test_vision "ggml-org/SmolVLM-Instruct-GGUF:Q4_K_M"
118# add_test_vision "ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0"
119# add_test_vision "ggml-org/SmolVLM2-256M-Video-Instruct-GGUF:Q8_0"
120
121# this model has broken chat template, not usable
122# add_test_vision "cmp-nct/Yi-VL-6B-GGUF:Q5_K"
123# add_test_vision "guinmoon/MobileVLM-3B-GGUF:Q4_K_M" "deepseek"
124
125###############
126
127cmake --build build -j --target llama-mtmd-cli
128
129arr_res=()
130
131for i in "${!arr_hf[@]}"; do
132 bin="llama-mtmd-cli"
133 prefix="${arr_prefix[$i]}"
134 hf="${arr_hf[$i]}"
135 extra_args="${arr_extra_args[$i]}"
136 inp_file="${arr_file[$i]}"
137
138 echo "Running test with binary: $bin and HF model: $hf"
139 echo ""
140 echo ""
141
142 cmd="$(printf %q "$PROJ_ROOT/build/bin/$bin") \
143 -hf $(printf %q "$hf") \
144 --image $(printf %q "$SCRIPT_DIR/$inp_file") \
145 --temp 0 -n 128 \
146 ${extra_args}"
147
148 # if extra_args does not contain -p, we add a default prompt
149 if ! [[ "$extra_args" =~ "-p" ]]; then
150 cmd+=" -p \"what is the publisher name of the newspaper?\""
151 fi
152
153 output=$(eval "$cmd" 2>&1 | tee /dev/tty)
154
155 echo "$output" > $SCRIPT_DIR/output/$bin-$(echo "$hf" | tr '/' '-').log
156
157 # either contains "new york" or both "men" and "walk"
158 if echo "$output" | grep -iq "new york" \
159 || (echo "$output" | grep -iq "men" && echo "$output" | grep -iq "walk")
160 then
161 result="$prefix \033[32mOK\033[0m: $hf"
162 else
163 result="$prefix \033[31mFAIL\033[0m: $hf"
164 fi
165 echo -e "$result"
166 arr_res+=("$result")
167
168 echo ""
169 echo ""
170 echo ""
171 echo "#################################################"
172 echo "#################################################"
173 echo ""
174 echo ""
175done
176
177set +x
178
179for i in "${!arr_res[@]}"; do
180 echo -e "${arr_res[$i]}"
181done
182echo ""
183echo "Output logs are saved in $SCRIPT_DIR/output"