1#!/bin/bash
  2
  3#  MIT license
  4#  Copyright (C) 2024 Intel Corporation
  5#  SPDX-License-Identifier: MIT
  6
  7Help() {
  8  cat << EOF
  9Usage: $(basename "$0") [OPTIONS]
 10
 11This script processes files with specified options.
 12
 13Options:
 14  -h, --help    Display this help message and exit.
 15  -c, --context <value>    Set context length. Bigger need more memory.
 16  -p, --promote <value>    Prompt to start generation with.
 17  -m, --model   <value>    Full model file path.
 18  -mg,--main-gpu <value>   Set main GPU ID (0 - n) for single GPU mode.
 19  -sm,--split-mode <value> How to split the model across multiple GPUs, one of:
 20                            - none: use one GPU only
 21                            - layer (default): split layers and KV across GPUs
 22                            - row: split rows across GPUs
 23  -ngl,--n-gpu-layers <value>  Max. number of layers to store in VRAM (default: -1)
 24  -lv,--log-verbosity <value>  Set the verbosity threshold. Messages with a higher verbosity will be
 25                               ignored. Values:
 26                                - 0: generic output
 27                                - 1: error
 28                                - 2: warning
 29                                - 3: info
 30                                - 4: debug
 31
 32
 33EOF
 34}
 35
 36BIN_FILE=./build/bin/llama-completion
 37SEED=0
 38GPUS_SETTING=""
 39
 40INPUT_PROMPT="Building a website can be done in 10 simple steps:\nStep 1:"
 41MODEL_FILE=models/llama-2-7b.Q4_0.gguf
 42NGL=99
 43CONTEXT=4096
 44GGML_SYCL_DEVICE=-1
 45SPLIT_MODE=layer
 46LOG_VERBOSE=3
 47while [[ $# -gt 0 ]]; do
 48    case "$1" in
 49        -c|--context)
 50            CONTEXT=$2
 51            # Shift twice to consume both the option flag and its value
 52            shift
 53            shift
 54            ;;
 55        -p|--promote)
 56            # Option that is a simple flag (boolean)
 57            INPUT_PROMPT="$2"
 58            # Shift once to consume the option flag
 59            shift
 60            shift
 61            ;;
 62        -m|--model)
 63            MODEL_FILE="$2"
 64            # Shift twice to consume both the option flag and its value
 65            shift
 66            shift
 67            ;;
 68        -mg|--main-gpu)
 69            GGML_SYCL_DEVICE=$2
 70            SPLIT_MODE=none
 71            # Shift twice to consume both the option flag and its value
 72            shift
 73            shift
 74            ;;
 75        -sm|--split-mode)
 76            SPLIT_MODE=$2
 77            # Shift twice to consume both the option flag and its value
 78            shift
 79            shift
 80            ;;
 81        -ngl|--n-gpu-layers)
 82            NGL=$2
 83            # Shift twice to consume both the option flag and its value
 84            shift
 85            shift
 86            ;;
 87        -lv|--log-verbosity)
 88            LOG_VERBOSE=$2
 89            # Shift twice to consume both the option flag and its value
 90            shift
 91            shift
 92            ;;
 93        -h|--help)
 94            Help
 95            exit 0
 96            ;;
 97        *)
 98            # Handle unknown options or stop processing options
 99            echo "Invalid option: $1"
100            # Optional: exit script or shift to treat remaining as positional args
101            exit 1
102            ;;
103    esac
104done
105
106
107
108source /opt/intel/oneapi/setvars.sh
109
110#export GGML_SYCL_DEBUG=1
111
112#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
113
114#support malloc device memory more than 4GB.
115export UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=1
116echo "UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS=${UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS}"
117
118if [ $GGML_SYCL_DEVICE -ne -1 ]; then
119    echo "Use $GGML_SYCL_DEVICE as main GPU"
120    #use signle GPU only
121    GPUS_SETTING="-mg $GGML_SYCL_DEVICE -sm ${SPLIT_MODE}"
122    export ONEAPI_DEVICE_SELECTOR="level_zero:${$GGML_SYCL_DEVICE}"
123    echo "ONEAPI_DEVICE_SELECTOR=${ONEAPI_DEVICE_SELECTOR}"
124else
125   echo "Use all Intel GPUs, including iGPU & dGPU"
126 fi
127
128echo "run cmd: ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE}  --mmap "
129ZES_ENABLE_SYSMAN=1 ${BIN_FILE} -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s ${SEED} -c ${CONTEXT} ${GPUS_SETTING} -lv ${LOG_VERBOSE} --mmap
130