diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/examples/sycl | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/examples/sycl')
| -rw-r--r-- | llama.cpp/examples/sycl/CMakeLists.txt | 9 | ||||
| -rw-r--r-- | llama.cpp/examples/sycl/README.md | 41 | ||||
| -rwxr-xr-x | llama.cpp/examples/sycl/build.sh | 23 | ||||
| -rw-r--r-- | llama.cpp/examples/sycl/ls-sycl-device.cpp | 13 | ||||
| -rwxr-xr-x | llama.cpp/examples/sycl/run-llama2.sh | 31 | ||||
| -rwxr-xr-x | llama.cpp/examples/sycl/test.sh | 130 | ||||
| -rw-r--r-- | llama.cpp/examples/sycl/win-build-sycl.bat | 31 | ||||
| -rw-r--r-- | llama.cpp/examples/sycl/win-run-llama2.bat | 11 | ||||
| -rw-r--r-- | llama.cpp/examples/sycl/win-test.bat | 11 |
9 files changed, 300 insertions, 0 deletions
diff --git a/llama.cpp/examples/sycl/CMakeLists.txt b/llama.cpp/examples/sycl/CMakeLists.txt new file mode 100644 index 0000000..e4d5083 --- /dev/null +++ b/llama.cpp/examples/sycl/CMakeLists.txt @@ -0,0 +1,9 @@ +# MIT license +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: MIT + +set(TARGET llama-ls-sycl-device) +add_executable(${TARGET} ls-sycl-device.cpp) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/llama.cpp/examples/sycl/README.md b/llama.cpp/examples/sycl/README.md new file mode 100644 index 0000000..8819d87 --- /dev/null +++ b/llama.cpp/examples/sycl/README.md @@ -0,0 +1,41 @@ +# llama.cpp/example/sycl + +This example program provides the tools for llama.cpp for SYCL on Intel GPU. + +## Tool + +|Tool Name| Function|Status| +|-|-|-| +|llama-ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support| + +### llama-ls-sycl-device + +List all SYCL devices with ID, compute capability, max work group size, ect. + +1. Build the llama.cpp for SYCL for the specified target *(using GGML_SYCL_TARGET)*. + +2. Enable oneAPI running environment *(if GGML_SYCL_TARGET is set to INTEL -default-)* + +``` +source /opt/intel/oneapi/setvars.sh +``` + +3. Execute + +``` +./build/bin/llama-ls-sycl-device +``` + +Check the ID in startup log, like: + +``` +found 2 SYCL devices: +| | | | |Max | |Max |Global | | +| | | | |compute|Max work|sub |mem | | +|ID| Device Type| Name|Version|units |group |group|size | Driver version| +|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------| +| 0| [level_zero:gpu:0]| Intel Arc A770 Graphics| 1.3| 512| 1024| 32| 16225M| 1.3.29138| +| 1| [level_zero:gpu:1]| Intel UHD Graphics 750| 1.3| 32| 512| 32| 62631M| 1.3.29138| + +``` + diff --git a/llama.cpp/examples/sycl/build.sh b/llama.cpp/examples/sycl/build.sh new file mode 100755 index 0000000..635e74f --- /dev/null +++ b/llama.cpp/examples/sycl/build.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# MIT license +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: MIT + +mkdir -p build +cd build +source /opt/intel/oneapi/setvars.sh + +#for FP16 +#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DLLAMA_OPENSSL=OFF # faster for long-prompt inference + +#for FP32 +cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_OPENSSL=OFF + +#build example/main +#cmake --build . --config Release --target main + +#build example/llama-bench +#cmake --build . --config Release --target llama-bench + +#build all binary +cmake --build . --config Release -j -v diff --git a/llama.cpp/examples/sycl/ls-sycl-device.cpp b/llama.cpp/examples/sycl/ls-sycl-device.cpp new file mode 100644 index 0000000..74a8b7f --- /dev/null +++ b/llama.cpp/examples/sycl/ls-sycl-device.cpp @@ -0,0 +1,13 @@ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// + + +#include "ggml-sycl.h" + +int main() { + ggml_backend_sycl_print_sycl_devices(); + return 0; +} diff --git a/llama.cpp/examples/sycl/run-llama2.sh b/llama.cpp/examples/sycl/run-llama2.sh new file mode 100755 index 0000000..d33f82f --- /dev/null +++ b/llama.cpp/examples/sycl/run-llama2.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# MIT license +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: MIT +export ONEAPI_DEVICE_SELECTOR="level_zero:0" +source /opt/intel/oneapi/setvars.sh + +#export GGML_SYCL_DEBUG=1 + +#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. + +INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:" +MODEL_FILE=models/llama-2-7b.Q4_0.gguf +NGL=99 +CONTEXT=4096 + +#support malloc device memory more than 4GB. +export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 + +LOAD_MODE='--mmap' +if [ $# -gt 0 ]; then + GGML_SYCL_DEVICE=$1 + echo "use $GGML_SYCL_DEVICE as main GPU" + #use signle GPU only + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none ${LOAD_MODE} + +else + #use multiple GPUs with same max compute units + ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} ${LOAD_MODE} +fi diff --git a/llama.cpp/examples/sycl/test.sh b/llama.cpp/examples/sycl/test.sh new file mode 100755 index 0000000..140c191 --- /dev/null +++ b/llama.cpp/examples/sycl/test.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# MIT license +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: MIT + +Help() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] + +This script processes files with specified options. + +Options: + -h, --help Display this help message and exit. + -c, --context <value> Set context length. Bigger need more memory. + -p, --promote <value> Prompt to start generation with. + -m, --model <value> Full model file path. + -mg,--main-gpu <value> Set main GPU ID (0 - n) for single GPU mode. + -sm,--split-mode <value> How to split the model across multiple GPUs, one of: + - none: use one GPU only + - layer (default): split layers and KV across GPUs + - row: split rows across GPUs + -ngl,--n-gpu-layers <value> Max. number of layers to store in VRAM (default: -1) + -lv,--log-verbosity <value> Set the verbosity threshold. Messages with a higher verbosity will be + ignored. Values: + - 0: generic output + - 1: error + - 2: warning + - 3: info + - 4: debug + + +EOF +} + +BIN_FILE=./build/bin/llama-completion +SEED=0 +GPUS_SETTING="" + +INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:" +MODEL_FILE=models/llama-2-7b.Q4_0.gguf +NGL=99 +CONTEXT=4096 +GGML_SYCL_DEVICE=-1 +SPLIT_MODE=layer +LOG_VERBOSE=3 +while [[ $# -gt 0 ]]; do + case "$1" in + -c|--context) + CONTEXT=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -p|--promote) + # Option that is a simple flag (boolean) + INPUT_PROMPT="$2" + # Shift once to consume the option flag + shift + shift + ;; + -m|--model) + MODEL_FILE="$2" + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -mg|--main-gpu) + GGML_SYCL_DEVICE=$2 + SPLIT_MODE=none + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -sm|--split-mode) + SPLIT_MODE=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -ngl|--n-gpu-layers) + NGL=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -lv|--log-verbosity) + LOG_VERBOSE=$2 + # Shift twice to consume both the option flag and its value + shift + shift + ;; + -h|--help) + Help + exit 0 + ;; + *) + # Handle unknown options or stop processing options + echo "Invalid option: $1" + # Optional: exit script or shift to treat remaining as positional args + exit 1 + ;; + esac +done + + + +source /opt/intel/oneapi/setvars.sh + +#export GGML_SYCL_DEBUG=1 + +#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. + +#support malloc device memory more than 4GB. +export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 +echo "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=${UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS}" + +if [ $GGML_SYCL_DEVICE -ne -1 ]; then + echo "Use $GGML_SYCL_DEVICE as main GPU" + #use signle GPU only + GPUS_SETTING="-mg $GGML_SYCL_DEVICE -sm ${SPLIT_MODE}" + export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}" + echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}" +else + echo "Use all Intel GPUs, including iGPU & dGPU" + fi + +echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap " +ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap + diff --git a/llama.cpp/examples/sycl/win-build-sycl.bat b/llama.cpp/examples/sycl/win-build-sycl.bat new file mode 100644 index 0000000..fc8b33b --- /dev/null +++ b/llama.cpp/examples/sycl/win-build-sycl.bat @@ -0,0 +1,31 @@ + +:: MIT license +:: Copyright (C) 2024 Intel Corporation +:: SPDX-License-Identifier: MIT + + +IF not exist build (mkdir build) +cd build +if %errorlevel% neq 0 goto ERROR + +@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force +if %errorlevel% neq 0 goto ERROR + +:: for FP16 +:: faster for long-prompt inference +:: cmake -G "MinGW Makefiles" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON + +:: for FP32 +cmake -G "Ninja" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release +if %errorlevel% neq 0 goto ERROR + +:: build all binary +cmake --build . -j +if %errorlevel% neq 0 goto ERROR + +cd .. +exit /B 0 + +:ERROR +echo comomand error: %errorlevel% +exit /B %errorlevel% diff --git a/llama.cpp/examples/sycl/win-run-llama2.bat b/llama.cpp/examples/sycl/win-run-llama2.bat new file mode 100644 index 0000000..1f2dab8 --- /dev/null +++ b/llama.cpp/examples/sycl/win-run-llama2.bat @@ -0,0 +1,11 @@ +:: MIT license +:: Copyright (C) 2024 Intel Corporation +:: SPDX-License-Identifier: MIT + +set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" +@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force + +:: support malloc device memory more than 4GB. +set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 +set LOAD_MODE="--mmap" +.\build\bin\llama-completion.exe -m models\llama-2-7b.Q4_0.gguf -no-cnv -p %INPUT2% -n 400 -e -ngl 99 -s 0 %LOAD_MODE% diff --git a/llama.cpp/examples/sycl/win-test.bat b/llama.cpp/examples/sycl/win-test.bat new file mode 100644 index 0000000..1f2dab8 --- /dev/null +++ b/llama.cpp/examples/sycl/win-test.bat @@ -0,0 +1,11 @@ +:: MIT license +:: Copyright (C) 2024 Intel Corporation +:: SPDX-License-Identifier: MIT + +set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" +@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force + +:: support malloc device memory more than 4GB. +set UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1 +set LOAD_MODE="--mmap" +.\build\bin\llama-completion.exe -m models\llama-2-7b.Q4_0.gguf -no-cnv -p %INPUT2% -n 400 -e -ngl 99 -s 0 %LOAD_MODE% |
