llmnpc - llama.cpp/scripts/snapdragon/qdc/tests/test

Path: llmnpc / llama.cpp / scripts / snapdragon / qdc / tests / test_bench.py (raw)
 1import pytest
 2import subprocess
 3import sys
 4
 5tmp_path='/data/local/tmp'
 6pkg_path=f'{tmp_path}/llama.cpp'
 7lib_path=f'{pkg_path}/lib'
 8bin_path=f'{pkg_path}/bin'
 9
10model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
11cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
12
13
14def run_cmd(cmd):
15    p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
16    sys.stdout.write(p.stdout)
17    assert(p.returncode == 0)
18
19
20@pytest.mark.dependency()
21def test_install():
22    run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
23    run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
24
25
26## Basic cli tests
27def run_llama_cli(dev, opts):
28    prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
29    opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
30    run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
31
32
33@pytest.mark.dependency(depends=['test_install'])
34def test_llama_cli_cpu():
35    run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
36
37
38@pytest.mark.dependency(depends=['test_install'])
39def test_llama_cli_gpu():
40    run_llama_cli('GPUOpenCL', '-fa on')
41
42
43@pytest.mark.dependency(depends=['test_install'])
44def test_llama_cli_npu():
45    run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
46
47
48## Basic bench tests
49def run_llama_bench(dev):
50    run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
51
52
53@pytest.mark.dependency(depends=['test_install'])
54def test_llama_bench_cpu():
55    run_llama_bench('none')
56
57
58def test_llama_bench_gpu():
59    run_llama_bench('GPUOpenCL')
60
61
62def test_llama_bench_npu():
63    run_llama_bench('HTP0')