llmnpc - llama.cpp/.github/workflows/release.yml

Path: llmnpc / llama.cpp / .github / workflows / release.yml (raw)
  1name: Release
  2
  3on:
  4  workflow_dispatch: # allows manual triggering
  5    inputs:
  6      create_release:
  7        description: 'Create new release'
  8        required: true
  9        type: boolean
 10  push:
 11    branches:
 12      - master
 13    paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
 14
 15concurrency:
 16  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
 17  cancel-in-progress: true
 18
 19env:
 20  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
 21  CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
 22
 23jobs:
 24  macOS-arm64:
 25    runs-on: macos-14
 26
 27    steps:
 28      - name: Clone
 29        id: checkout
 30        uses: actions/checkout@v6
 31        with:
 32          fetch-depth: 0
 33
 34      - name: ccache
 35        uses: ggml-org/ccache-action@v1.2.16
 36        with:
 37          key: macOS-latest-cmake-arm64
 38          evict-old-files: 1d
 39
 40      - name: Build
 41        id: cmake_build
 42        run: |
 43          sysctl -a
 44          cmake -B build \
 45            -DCMAKE_INSTALL_RPATH='@loader_path' \
 46            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
 47            -DLLAMA_FATAL_WARNINGS=ON \
 48            -DLLAMA_BUILD_BORINGSSL=ON \
 49            -DGGML_METAL_USE_BF16=ON \
 50            -DGGML_METAL_EMBED_LIBRARY=ON \
 51            -DGGML_RPC=ON \
 52            ${{ env.CMAKE_ARGS }}
 53          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
 54
 55      - name: Determine tag name
 56        id: tag
 57        uses: ./.github/actions/get-tag-name
 58
 59      - name: Pack artifacts
 60        id: pack_artifacts
 61        run: |
 62          cp LICENSE ./build/bin/
 63          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
 64
 65      - name: Upload artifacts
 66        uses: actions/upload-artifact@v6
 67        with:
 68          path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
 69          name: llama-bin-macos-arm64.tar.gz
 70
 71  macOS-x64:
 72    runs-on: macos-15-intel
 73
 74    steps:
 75      - name: Clone
 76        id: checkout
 77        uses: actions/checkout@v6
 78        with:
 79          fetch-depth: 0
 80
 81      - name: ccache
 82        uses: ggml-org/ccache-action@v1.2.16
 83        with:
 84          key: macOS-latest-cmake-x64
 85          evict-old-files: 1d
 86
 87      - name: Build
 88        id: cmake_build
 89        run: |
 90          sysctl -a
 91          # Metal is disabled due to intermittent failures with Github runners not having a GPU:
 92          # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
 93          cmake -B build \
 94            -DCMAKE_INSTALL_RPATH='@loader_path' \
 95            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
 96            -DLLAMA_FATAL_WARNINGS=ON \
 97            -DLLAMA_BUILD_BORINGSSL=ON \
 98            -DGGML_METAL=OFF \
 99            -DGGML_RPC=ON \
100            -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
101          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
102
103      - name: Determine tag name
104        id: tag
105        uses: ./.github/actions/get-tag-name
106
107      - name: Pack artifacts
108        id: pack_artifacts
109        run: |
110          cp LICENSE ./build/bin/
111          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
112
113      - name: Upload artifacts
114        uses: actions/upload-artifact@v6
115        with:
116          path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
117          name: llama-bin-macos-x64.tar.gz
118
119  ubuntu-22-cpu:
120    strategy:
121      matrix:
122        include:
123          - build: 'x64'
124            os: ubuntu-22.04
125          - build: 's390x'
126            os: ubuntu-24.04-s390x
127          # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
128          # - build: 'arm64'
129          #   os: ubuntu-22.04-arm
130
131    runs-on: ${{ matrix.os }}
132
133    steps:
134      - name: Clone
135        id: checkout
136        uses: actions/checkout@v6
137        with:
138          fetch-depth: 0
139
140      - name: ccache
141        uses: ggml-org/ccache-action@v1.2.16
142        with:
143          key: ubuntu-cpu-cmake-${{ matrix.build }}
144          evict-old-files: 1d
145
146      - name: Dependencies
147        id: depends
148        run: |
149          sudo apt-get update
150          sudo apt-get install build-essential libssl-dev
151
152      - name: Build
153        id: cmake_build
154        run: |
155          cmake -B build \
156            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
157            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
158            -DGGML_BACKEND_DL=ON \
159            -DGGML_NATIVE=OFF \
160            -DGGML_CPU_ALL_VARIANTS=ON \
161            -DLLAMA_FATAL_WARNINGS=ON \
162            ${{ env.CMAKE_ARGS }}
163          cmake --build build --config Release -j $(nproc)
164
165      - name: Determine tag name
166        id: tag
167        uses: ./.github/actions/get-tag-name
168
169      - name: Pack artifacts
170        id: pack_artifacts
171        run: |
172          cp LICENSE ./build/bin/
173          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
174
175      - name: Upload artifacts
176        uses: actions/upload-artifact@v6
177        with:
178          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
179          name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
180
181  ubuntu-22-vulkan:
182    runs-on: ubuntu-22.04
183
184    steps:
185      - name: Clone
186        id: checkout
187        uses: actions/checkout@v6
188        with:
189          fetch-depth: 0
190
191      - name: ccache
192        uses: ggml-org/ccache-action@v1.2.16
193        with:
194          key: ubuntu-22-cmake-vulkan
195          evict-old-files: 1d
196
197      - name: Dependencies
198        id: depends
199        run: |
200          wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
201          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
202          sudo apt-get update -y
203          sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libssl-dev
204
205      - name: Build
206        id: cmake_build
207        run: |
208          cmake -B build \
209            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
210            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
211            -DGGML_BACKEND_DL=ON \
212            -DGGML_NATIVE=OFF \
213            -DGGML_CPU_ALL_VARIANTS=ON \
214            -DGGML_VULKAN=ON \
215            ${{ env.CMAKE_ARGS }}
216          cmake --build build --config Release -j $(nproc)
217
218      - name: Determine tag name
219        id: tag
220        uses: ./.github/actions/get-tag-name
221
222      - name: Pack artifacts
223        id: pack_artifacts
224        run: |
225          cp LICENSE ./build/bin/
226          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
227
228      - name: Upload artifacts
229        uses: actions/upload-artifact@v6
230        with:
231          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
232          name: llama-bin-ubuntu-vulkan-x64.tar.gz
233
234  windows-cpu:
235    runs-on: windows-2025
236
237    strategy:
238      matrix:
239        include:
240          - arch: 'x64'
241          - arch: 'arm64'
242
243    steps:
244      - name: Clone
245        uses: actions/checkout@v6
246        with:
247          fetch-depth: 0
248
249      - name: ccache
250        uses: ggml-org/ccache-action@v1.2.16
251        with:
252          key: windows-latest-cmake-cpu-${{ matrix.arch }}
253          variant: ccache
254          evict-old-files: 1d
255
256      - name: Install Ninja
257        run: |
258          choco install ninja
259
260      - name: Build
261        shell: cmd
262        run: |
263          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
264          cmake -S . -B build -G "Ninja Multi-Config" ^
265            -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
266            -DLLAMA_BUILD_BORINGSSL=ON ^
267            -DGGML_NATIVE=OFF ^
268            -DGGML_BACKEND_DL=ON ^
269            -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^
270            -DGGML_OPENMP=ON ^
271            ${{ env.CMAKE_ARGS }}
272          cmake --build build --config Release
273
274      - name: Pack artifacts
275        id: pack_artifacts
276        run: |
277          Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
278          7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
279
280      - name: Upload artifacts
281        uses: actions/upload-artifact@v6
282        with:
283          path: llama-bin-win-cpu-${{ matrix.arch }}.zip
284          name: llama-bin-win-cpu-${{ matrix.arch }}.zip
285
286  windows:
287    runs-on: windows-2025
288
289    env:
290      OPENBLAS_VERSION: 0.3.23
291      VULKAN_VERSION: 1.4.313.2
292
293    strategy:
294      matrix:
295        include:
296          - backend: 'vulkan'
297            arch: 'x64'
298            defines: '-DGGML_VULKAN=ON'
299            target: 'ggml-vulkan'
300          - backend: 'opencl-adreno'
301            arch: 'arm64'
302            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
303            target: 'ggml-opencl'
304
305    steps:
306      - name: Clone
307        id: checkout
308        uses: actions/checkout@v6
309
310      - name: ccache
311        uses: ggml-org/ccache-action@v1.2.16
312        with:
313          key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }}
314          variant: ccache
315          evict-old-files: 1d
316
317      - name: Install Vulkan SDK
318        id: get_vulkan
319        if: ${{ matrix.backend == 'vulkan' }}
320        run: |
321          curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
322          & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
323          Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
324          Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
325
326      - name: Install Ninja
327        id: install_ninja
328        run: |
329          choco install ninja
330
331      - name: Install OpenCL Headers and Libs
332        id: install_opencl
333        if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }}
334        run: |
335          git clone https://github.com/KhronosGroup/OpenCL-Headers
336          cd OpenCL-Headers
337          cmake -B build `
338            -DBUILD_TESTING=OFF `
339            -DOPENCL_HEADERS_BUILD_TESTING=OFF `
340            -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
341            -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
342          cmake --build build --target install
343          git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
344          cd OpenCL-ICD-Loader
345          cmake -B build-arm64-release `
346            -A arm64 `
347            -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
348            -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
349          cmake --build build-arm64-release --target install --config release
350
351      - name: Build
352        id: cmake_build
353        run: |
354          cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_BUILD_BORINGSSL=ON
355          cmake --build build --config Release --target ${{ matrix.target }}
356
357      - name: Pack artifacts
358        id: pack_artifacts
359        run: |
360          7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
361
362      - name: Upload artifacts
363        uses: actions/upload-artifact@v6
364        with:
365          path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
366          name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
367
368  windows-cuda:
369    runs-on: windows-2022
370
371    strategy:
372      matrix:
373        cuda: ['12.4', '13.1']
374
375    steps:
376      - name: Clone
377        id: checkout
378        uses: actions/checkout@v6
379
380      - name: Install ccache
381        uses: ggml-org/ccache-action@v1.2.16
382        with:
383          key: windows-cuda-${{ matrix.cuda }}
384          variant: ccache
385          evict-old-files: 1d
386
387      - name: Install Cuda Toolkit
388        uses: ./.github/actions/windows-setup-cuda
389        with:
390          cuda_version: ${{ matrix.cuda }}
391
392      - name: Install Ninja
393        id: install_ninja
394        run: |
395          choco install ninja
396
397      - name: Build
398        id: cmake_build
399        shell: cmd
400        # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
401        run: |
402          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
403          cmake -S . -B build -G "Ninja Multi-Config" ^
404            -DGGML_BACKEND_DL=ON ^
405            -DGGML_NATIVE=OFF ^
406            -DGGML_CPU=OFF ^
407            -DGGML_CUDA=ON ^
408            -DLLAMA_BUILD_BORINGSSL=ON ^
409            -DGGML_CUDA_CUB_3DOT2=ON
410          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
411          cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda
412
413      - name: Pack artifacts
414        id: pack_artifacts
415        run: |
416          7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
417
418      - name: Upload artifacts
419        uses: actions/upload-artifact@v6
420        with:
421          path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
422          name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
423
424      - name: Copy and pack Cuda runtime
425        run: |
426          echo "Cuda install location: ${{ env.CUDA_PATH }}"
427          $dst='.\build\bin\cudart\'
428          robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
429          robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
430          robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
431          7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*
432
433      - name: Upload Cuda runtime
434        uses: actions/upload-artifact@v6
435        with:
436          path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
437          name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
438
439  windows-sycl:
440    runs-on: windows-2022
441
442    defaults:
443      run:
444        shell: bash
445
446    env:
447      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
448      WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
449      ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
450
451    steps:
452      - name: Clone
453        id: checkout
454        uses: actions/checkout@v6
455
456      - name: ccache
457        uses: ggml-org/ccache-action@v1.2.16
458        with:
459          key: windows-latest-cmake-sycl
460          variant: ccache
461          evict-old-files: 1d
462
463      - name: Install
464        run:  |
465          scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
466
467      - name: Build
468        id: cmake_build
469        shell: cmd
470        run: |
471          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
472          cmake -G "Ninja" -B build ^
473            -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^
474            -DCMAKE_BUILD_TYPE=Release ^
475            -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^
476            -DGGML_CPU=OFF -DGGML_SYCL=ON ^
477            -DLLAMA_BUILD_BORINGSSL=ON
478          cmake --build build --target ggml-sycl -j
479
480      - name: Build the release package
481        id: pack_artifacts
482        run: |
483          echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
484
485          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
486          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
487          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
488
489          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
490          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin
491          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
492          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
493          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
494
495          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
496          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
497          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
498          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
499          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin
500          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-fallback-bfloat16.spv" ./build/bin
501          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-native-bfloat16.spv" ./build/bin
502
503          cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
504          cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
505
506          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin
507          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin
508          cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
509
510          echo "cp oneAPI running time dll files to ./build/bin done"
511          7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
512
513      - name: Upload the release package
514        uses: actions/upload-artifact@v6
515        with:
516          path: llama-bin-win-sycl-x64.zip
517          name: llama-bin-win-sycl-x64.zip
518
519  windows-hip:
520    runs-on: windows-2022
521
522    env:
523      HIPSDK_INSTALLER_VERSION: "25.Q3"
524
525    strategy:
526      matrix:
527        include:
528          - name: "radeon"
529            gpu_targets: "gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
530
531    steps:
532      - name: Clone
533        id: checkout
534        uses: actions/checkout@v6
535
536      - name: Grab rocWMMA package
537        id: grab_rocwmma
538        run: |
539          curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb"
540          7z x rocwmma.deb
541          7z x data.tar
542
543      - name: Cache ROCm Installation
544        id: cache-rocm
545        uses: actions/cache@v5
546        with:
547          path: C:\Program Files\AMD\ROCm
548          key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
549
550      - name: ccache
551        uses: ggml-org/ccache-action@v1.2.16
552        with:
553          key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
554          evict-old-files: 1d
555
556      - name: Install ROCm
557        if: steps.cache-rocm.outputs.cache-hit != 'true'
558        id: depends
559        run: |
560          $ErrorActionPreference = "Stop"
561          write-host "Downloading AMD HIP SDK Installer"
562          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
563          write-host "Installing AMD HIP SDK"
564          $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
565          $completed = $proc.WaitForExit(600000)
566          if (-not $completed) {
567              Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
568              $proc.Kill()
569              exit 1
570          }
571          if ($proc.ExitCode -ne 0) {
572              Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
573              exit 1
574          }
575          write-host "Completed AMD HIP SDK installation"
576
577      - name: Verify ROCm
578        id: verify
579        run: |
580          # Find and test ROCm installation
581          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
582          if (-not $clangPath) {
583            Write-Error "ROCm installation not found"
584            exit 1
585          }
586          & $clangPath.FullName --version
587
588      - name: Build
589        id: cmake_build
590        run: |
591          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
592          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
593          cmake -G "Unix Makefiles" -B build -S . `
594            -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
595            -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
596            -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
597            -DCMAKE_BUILD_TYPE=Release `
598            -DGGML_BACKEND_DL=ON `
599            -DGGML_NATIVE=OFF `
600            -DGGML_CPU=OFF `
601            -DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" `
602            -DGGML_HIP_ROCWMMA_FATTN=ON `
603            -DGGML_HIP=ON `
604            -DLLAMA_BUILD_BORINGSSL=ON
605          cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
606          md "build\bin\rocblas\library\"
607          md "build\bin\hipblaslt\library"
608          cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
609          cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\"
610          cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
611          cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
612          cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"
613
614      - name: Pack artifacts
615        id: pack_artifacts
616        run: |
617          7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
618
619      - name: Upload artifacts
620        uses: actions/upload-artifact@v6
621        with:
622          path: llama-bin-win-hip-${{ matrix.name }}-x64.zip
623          name: llama-bin-win-hip-${{ matrix.name }}-x64.zip
624
625  ios-xcode-build:
626    runs-on: macos-15
627
628    steps:
629      - name: Checkout code
630        uses: actions/checkout@v6
631        with:
632          fetch-depth: 0
633
634      - name: Setup Xcode
635        run: |
636          sudo xcode-select -s /Applications/Xcode_16.4.app
637
638      - name: Build
639        id: cmake_build
640        run: |
641          sysctl -a
642          cmake -B build -G Xcode \
643            -DGGML_METAL_USE_BF16=ON \
644            -DGGML_METAL_EMBED_LIBRARY=ON \
645            -DLLAMA_OPENSSL=OFF \
646            -DLLAMA_BUILD_EXAMPLES=OFF \
647            -DLLAMA_BUILD_TOOLS=OFF \
648            -DLLAMA_BUILD_TESTS=OFF \
649            -DLLAMA_BUILD_SERVER=OFF \
650            -DCMAKE_SYSTEM_NAME=iOS \
651            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
652            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
653          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
654
655      - name: xcodebuild for swift package
656        id: xcodebuild
657        run: |
658          ./build-xcframework.sh
659
660      - name: Build Xcode project
661        run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
662
663      - name: Determine tag name
664        id: tag
665        uses: ./.github/actions/get-tag-name
666
667      - name: Pack artifacts
668        id: pack_artifacts
669        run: |
670          # Zip file is required for Swift Package Manager, which does not support tar.gz for binary targets.
671          # For more details, see https://developer.apple.com/documentation/xcode/distributing-binary-frameworks-as-swift-packages
672          zip -r -y llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
673
674      - name: Upload artifacts
675        uses: actions/upload-artifact@v6
676        with:
677          path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
678          name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
679
680
681  openEuler-cann:
682    strategy:
683      matrix:
684        include:
685          # 910b with aclgraph (both architectures)
686          - arch: x86
687            chip_type: '910b'
688            build: 'Release'
689            use_acl_graph: 'on'
690          - arch: aarch64
691            chip_type: '910b'
692            build: 'Release'
693            use_acl_graph: 'on'
694          # 310p without aclgraph (both architectures)
695          - arch: x86
696            chip_type: '310p'
697            build: 'Release'
698            use_acl_graph: 'off'
699          - arch: aarch64
700            chip_type: '310p'
701            build: 'Release'
702            use_acl_graph: 'off'
703    runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
704    steps:
705      - name: Checkout
706        uses: actions/checkout@v6
707        with:
708          fetch-depth: 0
709
710      - name: Free up disk space
711        uses: ggml-org/free-disk-space@v1.3.1
712        with:
713          tool-cache: true
714
715      - name: Set container image
716        id: cann-image
717        run: |
718          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
719          echo "image=${image}" >> "${GITHUB_OUTPUT}"
720
721      - name: Pull container image
722        run: docker pull "${{ steps.cann-image.outputs.image }}"
723
724      - name: Build
725        env:
726          BUILD_TYPE: ${{ matrix.build }}
727          SOC_TYPE: ascend${{ matrix.chip_type }}
728          USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
729        run: |
730          HOST_UID=$(id -u)
731          HOST_GID=$(id -g)
732
733          docker run --rm \
734            -v "${PWD}:/workspace" \
735            -w /workspace \
736            -e SOC_TYPE=${SOC_TYPE} \
737            -e BUILD_TYPE=${BUILD_TYPE} \
738            -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
739            "${{ steps.cann-image.outputs.image }}" \
740            bash -lc '
741              set -e
742              yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
743              yum clean all && rm -rf /var/cache/yum
744              git config --global --add safe.directory "/workspace"
745              export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
746              cmake -S . -B build \
747                  -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
748                  -DGGML_CANN=on \
749                  -DSOC_TYPE=${SOC_TYPE} \
750                  -DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
751              cmake --build build -j $(nproc)
752
753              chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
754            '
755
756      - name: Determine tag name
757        id: tag
758        uses: ./.github/actions/get-tag-name
759
760      - name: Pack artifacts
761        run: |
762          cp LICENSE ./build/bin/
763          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin .
764
765      - name: Upload artifacts
766        uses: actions/upload-artifact@v6
767        with:
768          path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
769          name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
770
771  release:
772    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
773
774    # Fine-grant permission
775    # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
776    permissions:
777        contents: write # for creating release
778
779    runs-on: ubuntu-latest
780
781    needs:
782      - windows
783      - windows-cpu
784      - windows-cuda
785      - windows-sycl
786      - windows-hip
787      - ubuntu-22-cpu
788      - ubuntu-22-vulkan
789      - macOS-arm64
790      - macOS-x64
791      - ios-xcode-build
792      - openEuler-cann
793
794    steps:
795      - name: Clone
796        id: checkout
797        uses: actions/checkout@v6
798        with:
799          fetch-depth: 0
800
801      - name: Determine tag name
802        id: tag
803        uses: ./.github/actions/get-tag-name
804
805      - name: Download artifacts
806        id: download-artifact
807        uses: actions/download-artifact@v7
808        with:
809          path: ./artifact
810          merge-multiple: true
811
812      - name: Move artifacts
813        id: move_artifacts
814        run: |
815          mkdir -p release
816
817          echo "Adding CPU backend files to existing zips..."
818          for arch in x64 arm64; do
819            cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
820            temp_dir=$(mktemp -d)
821            echo "Extracting CPU backend for $arch..."
822            unzip "$cpu_zip" -d "$temp_dir"
823
824            echo "Adding CPU files to $arch zips..."
825            for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
826              if [[ "$target_zip" == "$cpu_zip" ]]; then
827                continue
828              fi
829              echo "Adding CPU backend to $(basename "$target_zip")"
830              realpath_target_zip=$(realpath "$target_zip")
831              (cd "$temp_dir" && zip -r "$realpath_target_zip" .)
832            done
833
834            rm -rf "$temp_dir"
835          done
836
837          echo "Renaming and moving zips to release..."
838          for zip_file in artifact/llama-bin-win-*.zip; do
839            base_name=$(basename "$zip_file" .zip)
840            zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
841            echo "Moving $zip_file to release/$zip_name"
842            mv "$zip_file" "release/$zip_name"
843          done
844
845          echo "Moving other artifacts..."
846          mv -v artifact/*.zip release
847          mv -v artifact/*.tar.gz release
848
849      - name: Create release
850        id: create_release
851        uses: ggml-org/action-create-release@v1
852        env:
853          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
854        with:
855          tag_name: ${{ steps.tag.outputs.name }}
856          body: |
857            <details open>
858
859            ${{ github.event.head_commit.message }}
860
861            </details>
862
863            **macOS/iOS:**
864            - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
865            - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
866            - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.zip)
867
868            **Linux:**
869            - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
870            - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
871            - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
872
873            **Windows:**
874            - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
875            - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
876            - [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip)
877            - [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip)
878            - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
879            - [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
880            - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
881
882            **openEuler:**
883            - [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz)
884            - [openEuler x86 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86-aclgraph.tar.gz)
885            - [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
886            - [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz)
887
888      - name: Upload release
889        id: upload_release
890        uses: actions/github-script@v8
891        with:
892          github-token: ${{secrets.GITHUB_TOKEN}}
893          script: |
894            const path = require('path');
895            const fs = require('fs');
896            const release_id = '${{ steps.create_release.outputs.id }}';
897            for (let file of await fs.readdirSync('./release')) {
898              if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
899                console.log('uploadReleaseAsset', file);
900                await github.rest.repos.uploadReleaseAsset({
901                  owner: context.repo.owner,
902                  repo: context.repo.repo,
903                  release_id: release_id,
904                  name: file,
905                  data: await fs.readFileSync(`./release/${file}`)
906                });
907              }
908            }