From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- .../.github/ISSUE_TEMPLATE/010-bug-compilation.yml | 88 + .../.github/ISSUE_TEMPLATE/011-bug-results.yml | 115 ++ llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml | 103 + .../.github/ISSUE_TEMPLATE/020-enhancement.yml | 51 + llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml | 52 + llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml | 28 + llama.cpp/.github/ISSUE_TEMPLATE/config.yml | 11 + llama.cpp/.github/actions/get-tag-name/action.yml | 22 + llama.cpp/.github/actions/install-exe/action.yml | 36 + .../actions/linux-setup-spacemit/action.yml | 20 + .../.github/actions/linux-setup-vulkan/action.yml | 20 + llama.cpp/.github/actions/unarchive-tar/action.yml | 27 + .../.github/actions/windows-setup-cuda/action.yml | 98 + .../.github/actions/windows-setup-rocm/action.yml | 15 + llama.cpp/.github/labeler.yml | 106 + llama.cpp/.github/pull_request_template.md | 1 + llama.cpp/.github/workflows/bench.yml.disabled | 304 +++ llama.cpp/.github/workflows/build-cache.yml | 89 + llama.cpp/.github/workflows/build-cmake-pkg.yml | 51 + llama.cpp/.github/workflows/build-linux-cross.yml | 298 +++ llama.cpp/.github/workflows/build.yml | 2138 ++++++++++++++++++++ llama.cpp/.github/workflows/check-vendor.yml | 52 + llama.cpp/.github/workflows/close-issue.yml | 28 + .../.github/workflows/copilot-setup-steps.yml | 57 + llama.cpp/.github/workflows/docker.yml | 226 +++ llama.cpp/.github/workflows/editorconfig.yml | 29 + llama.cpp/.github/workflows/gguf-publish.yml | 44 + llama.cpp/.github/workflows/labeler.yml | 17 + .../.github/workflows/pre-tokenizer-hashes.yml | 45 + .../workflows/python-check-requirements.yml | 33 + llama.cpp/.github/workflows/python-lint.yml | 30 + llama.cpp/.github/workflows/python-type-check.yml | 38 + llama.cpp/.github/workflows/release.yml | 908 +++++++++ llama.cpp/.github/workflows/server-metal.yml | 73 + llama.cpp/.github/workflows/server-webui.yml | 99 + llama.cpp/.github/workflows/server.yml | 147 ++ llama.cpp/.github/workflows/update-ops-docs.yml | 42 + llama.cpp/.github/workflows/winget.yml | 44 + 38 files changed, 5585 insertions(+) create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml create mode 100644 llama.cpp/.github/ISSUE_TEMPLATE/config.yml create mode 100644 llama.cpp/.github/actions/get-tag-name/action.yml create mode 100644 llama.cpp/.github/actions/install-exe/action.yml create mode 100644 llama.cpp/.github/actions/linux-setup-spacemit/action.yml create mode 100644 llama.cpp/.github/actions/linux-setup-vulkan/action.yml create mode 100644 llama.cpp/.github/actions/unarchive-tar/action.yml create mode 100644 llama.cpp/.github/actions/windows-setup-cuda/action.yml create mode 100644 llama.cpp/.github/actions/windows-setup-rocm/action.yml create mode 100644 llama.cpp/.github/labeler.yml create mode 100644 llama.cpp/.github/pull_request_template.md create mode 100644 llama.cpp/.github/workflows/bench.yml.disabled create mode 100644 llama.cpp/.github/workflows/build-cache.yml create mode 100644 llama.cpp/.github/workflows/build-cmake-pkg.yml create mode 100644 llama.cpp/.github/workflows/build-linux-cross.yml create mode 100644 llama.cpp/.github/workflows/build.yml create mode 100644 llama.cpp/.github/workflows/check-vendor.yml create mode 100644 llama.cpp/.github/workflows/close-issue.yml create mode 100644 llama.cpp/.github/workflows/copilot-setup-steps.yml create mode 100644 llama.cpp/.github/workflows/docker.yml create mode 100644 llama.cpp/.github/workflows/editorconfig.yml create mode 100644 llama.cpp/.github/workflows/gguf-publish.yml create mode 100644 llama.cpp/.github/workflows/labeler.yml create mode 100644 llama.cpp/.github/workflows/pre-tokenizer-hashes.yml create mode 100644 llama.cpp/.github/workflows/python-check-requirements.yml create mode 100644 llama.cpp/.github/workflows/python-lint.yml create mode 100644 llama.cpp/.github/workflows/python-type-check.yml create mode 100644 llama.cpp/.github/workflows/release.yml create mode 100644 llama.cpp/.github/workflows/server-metal.yml create mode 100644 llama.cpp/.github/workflows/server-webui.yml create mode 100644 llama.cpp/.github/workflows/server.yml create mode 100644 llama.cpp/.github/workflows/update-ops-docs.yml create mode 100644 llama.cpp/.github/workflows/winget.yml (limited to 'llama.cpp/.github') diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml b/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml new file mode 100644 index 0000000..c106f47 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml @@ -0,0 +1,88 @@ +name: Bug (compilation) +description: Something goes wrong when trying to compile llama.cpp. +title: "Compile bug: " +labels: ["bug-unconfirmed", "compilation"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for bug reports where the compilation of llama.cpp fails. + Before opening an issue, please confirm that the compilation still fails + after recreating the CMake build directory and with `-DGGML_CCACHE=OFF`. + If the compilation succeeds with ccache disabled you should be able to permanently fix the issue + by clearing `~/.cache/ccache` (on Linux). + - type: textarea + id: commit + attributes: + label: Git commit + description: Which commit are you trying to compile? + placeholder: | + $git rev-parse HEAD + 84a07a17b1b08cf2b9747c633a2372782848a27f + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: true + - type: dropdown + id: backends + attributes: + label: GGML backends + description: Which GGML backends do you know to be affected? + options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN] + multiple: true + validations: + required: true + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it. + If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us. + placeholder: > + I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY. + Here are the exact commands that I used: ... + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: command + attributes: + label: Compile command + description: > + Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + Please copy and paste any relevant log output, including any generated text. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml b/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml new file mode 100644 index 0000000..31202df --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml @@ -0,0 +1,115 @@ +name: Bug (model use) +description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module). +title: "Eval bug: " +labels: ["bug-unconfirmed", "model evaluation"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for bug reports where the model evaluation results + (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation. + If you encountered the issue while using an external UI (e.g. ollama), + please reproduce your issue using one of the examples/binaries in this repository. + The `llama-completion` binary can be used for simple and reproducible model inference. + - type: textarea + id: version + attributes: + label: Name and Version + description: Which version of our software are you running? (use `--version` to get a version string) + placeholder: | + $./llama-cli --version + version: 2999 (42b4109e) + built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: true + - type: dropdown + id: backends + attributes: + label: GGML backends + description: Which GGML backends do you know to be affected? + options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN] + multiple: true + validations: + required: true + - type: textarea + id: hardware + attributes: + label: Hardware + description: Which CPUs/GPUs are you using? + placeholder: > + e.g. Ryzen 5950X + 2x RTX 4090 + validations: + required: true + - type: textarea + id: model + attributes: + label: Models + description: > + Which model(s) at which quantization were you using when encountering the bug? + If you downloaded a GGUF file off of Huggingface, please provide a link. + placeholder: > + e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M + validations: + required: false + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it. + If you can narrow down the bug to specific hardware, compile flags, or command line arguments, + that information would be very much appreciated by us. + + If possible, please try to reproduce the issue using `llama-completion` with `-fit off`. + If you can only reproduce the issue with `-fit on`, please provide logs both with and without `--verbose`. + placeholder: > + e.g. when I run llama-completion with `-fa on` I get garbled outputs for very long prompts. + With short prompts or `-fa off` it works correctly. + Here are the exact commands that I used: ... + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + Please copy and paste any relevant log output, including the command that you entered and any generated text. + For very long logs (thousands of lines), preferably upload them as files instead. + On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command. + value: | +
+ Logs + + + ```console + + ``` +
+ + + validations: + required: true diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml b/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml new file mode 100644 index 0000000..8e867e7 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml @@ -0,0 +1,103 @@ +name: Bug (misc.) +description: Something is not working the way it should (and it's not covered by any of the above cases). +title: "Misc. bug: " +labels: ["bug-unconfirmed"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! + This issue template is intended for miscellaneous bugs that don't fit into any other category. + If you encountered the issue while using an external UI (e.g. ollama), + please reproduce your issue using one of the examples/binaries in this repository. + - type: textarea + id: version + attributes: + label: Name and Version + description: Which version of our software is affected? (You can use `--version` to get a version string.) + placeholder: | + $./llama-cli --version + version: 2999 (42b4109e) + built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu + validations: + required: true + - type: dropdown + id: operating-system + attributes: + label: Operating systems + description: Which operating systems do you know to be affected? + multiple: true + options: + - Linux + - Mac + - Windows + - BSD + - Other? (Please let us know in description) + validations: + required: false + - type: dropdown + id: module + attributes: + label: Which llama.cpp modules do you know to be affected? + multiple: true + options: + - Documentation/Github + - libllama (core library) + - llama-cli + - llama-server + - llama-bench + - llama-quantize + - Python/Bash scripts + - Test code + - Other (Please specify in the next section) + validations: + required: false + - type: textarea + id: command + attributes: + label: Command line + description: > + Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc. + This will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: false + - type: textarea + id: info + attributes: + label: Problem description & steps to reproduce + description: > + Please give us a summary of the problem and tell us how to reproduce it (if applicable). + validations: + required: true + - type: textarea + id: first_bad_commit + attributes: + label: First Bad Commit + description: > + If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing? + If possible, please do a git bisect and identify the exact commit that introduced the bug. + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + If applicable, please copy and paste any relevant log output, including any generated text. + If you are encountering problems specifically with the `llama_params_fit` module, always upload `--verbose` logs as well. + For very long logs (thousands of lines), please upload them as files instead. + On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command. + value: | +
+ Logs + + + ```console + + ``` +
+ + + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml b/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml new file mode 100644 index 0000000..cee1446 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml @@ -0,0 +1,51 @@ +name: Enhancement +description: Used to request enhancements for llama.cpp. +title: "Feature Request: " +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas) + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + description: Please confirm the following before submitting your enhancement request. + options: + - label: I am running the latest code. Mention the version if possible as well. + required: true + - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md). + required: true + - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). + required: true + - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share. + required: true + + - type: textarea + id: feature-description + attributes: + label: Feature Description + description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. + placeholder: Detailed description of the enhancement + validations: + required: true + + - type: textarea + id: motivation + attributes: + label: Motivation + description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. + placeholder: Explanation of why this feature is needed and its benefits + validations: + required: true + + - type: textarea + id: possible-implementation + attributes: + label: Possible Implementation + description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. + placeholder: Detailed description of potential implementation + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml b/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml new file mode 100644 index 0000000..e774550 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml @@ -0,0 +1,52 @@ +name: Research +description: Track new technical research area. +title: "Research: " +labels: ["research 🔬"] +body: + - type: markdown + attributes: + value: | + Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) + + - type: checkboxes + id: research-stage + attributes: + label: Research Stage + description: Track general state of this research ticket + options: + - label: Background Research (Let's try to avoid reinventing the wheel) + - label: Hypothesis Formed (How do you think this will work and it's effect?) + - label: Strategy / Implementation Forming + - label: Analysis of results + - label: Debrief / Documentation (So people in the future can learn from us) + + - type: textarea + id: background + attributes: + label: Previous existing literature and research + description: Whats the current state of the art and whats the motivation for this research? + + - type: textarea + id: hypothesis + attributes: + label: Hypothesis + description: How do you think this will work and it's effect? + + - type: textarea + id: implementation + attributes: + label: Implementation + description: Got an approach? e.g. a PR ready to go? + + - type: textarea + id: analysis + attributes: + label: Analysis + description: How does the proposed implementation behave? + + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + render: shell diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml b/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml new file mode 100644 index 0000000..2fe94e2 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml @@ -0,0 +1,28 @@ +name: Refactor (Maintainers) +description: Used to track refactoring opportunities. +title: "Refactor: " +labels: ["refactor"] +body: + - type: markdown + attributes: + value: | + Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. + Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. + + - type: textarea + id: background-description + attributes: + label: Background Description + description: Please provide a detailed written description of the pain points you are trying to solve. + placeholder: Detailed description behind your motivation to request refactor + validations: + required: true + + - type: textarea + id: possible-approaches + attributes: + label: Possible Refactor Approaches + description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list. + placeholder: Your idea of possible refactoring opportunity/approaches + validations: + required: false diff --git a/llama.cpp/.github/ISSUE_TEMPLATE/config.yml b/llama.cpp/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..0d24653 --- /dev/null +++ b/llama.cpp/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: true +contact_links: + - name: Got an idea? + url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas + about: Pop it there. It may then become an enhancement ticket. + - name: Got a question? + url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a + about: Ask a question there! + - name: Want to contribute? + url: https://github.com/ggml-org/llama.cpp/wiki/contribute + about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/llama.cpp/.github/actions/get-tag-name/action.yml b/llama.cpp/.github/actions/get-tag-name/action.yml new file mode 100644 index 0000000..7ace23b --- /dev/null +++ b/llama.cpp/.github/actions/get-tag-name/action.yml @@ -0,0 +1,22 @@ +name: "Determine tag name" +description: "Determine the tag name to use for a release" +outputs: + name: + description: "The name of the tag" + value: ${{ steps.tag.outputs.name }} + +runs: + using: "composite" + steps: + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi diff --git a/llama.cpp/.github/actions/install-exe/action.yml b/llama.cpp/.github/actions/install-exe/action.yml new file mode 100644 index 0000000..002bec8 --- /dev/null +++ b/llama.cpp/.github/actions/install-exe/action.yml @@ -0,0 +1,36 @@ +name: "Install exe" +description: "Download and install exe" +inputs: + url: + description: "URL of the exe installer" + required: true + args: + description: "Installer arguments" + required: true + timeout: + description: "Timeout (in ms)" + required: false + default: "600000" + +runs: + using: "composite" + steps: + - name: Install EXE + shell: pwsh + run: | + $ErrorActionPreference = "Stop" + write-host "Downloading Installer EXE" + Invoke-WebRequest -Uri "${{ inputs.url }}" -OutFile "${env:RUNNER_TEMP}\temp-install.exe" + write-host "Installing" + $proc = Start-Process "${env:RUNNER_TEMP}\temp-install.exe" -ArgumentList '${{ inputs.args }}' -NoNewWindow -PassThru + $completed = $proc.WaitForExit(${{ inputs.timeout }}) + if (-not $completed) { + Write-Error "Installer timed out. Killing the process" + $proc.Kill() + exit 1 + } + if ($proc.ExitCode -ne 0) { + Write-Error "Installer failed with exit code $($proc.ExitCode)" + exit 1 + } + write-host "Completed installation" diff --git a/llama.cpp/.github/actions/linux-setup-spacemit/action.yml b/llama.cpp/.github/actions/linux-setup-spacemit/action.yml new file mode 100644 index 0000000..e2193e8 --- /dev/null +++ b/llama.cpp/.github/actions/linux-setup-spacemit/action.yml @@ -0,0 +1,20 @@ +name: "Linux - Setup SpacemiT Toolchain" +description: "Setup SpacemiT Toolchain for Linux" +inputs: + path: + description: "Installation path" + required: true + version: + description: "SpacemiT toolchain version" + required: true + +runs: + using: "composite" + steps: + - name: Setup SpacemiT Toolchain + id: setup + uses: ./.github/actions/unarchive-tar + with: + url: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz + path: ${{ inputs.path }} + strip: 1 diff --git a/llama.cpp/.github/actions/linux-setup-vulkan/action.yml b/llama.cpp/.github/actions/linux-setup-vulkan/action.yml new file mode 100644 index 0000000..4d29837 --- /dev/null +++ b/llama.cpp/.github/actions/linux-setup-vulkan/action.yml @@ -0,0 +1,20 @@ +name: "Linux - Setup Vulkan SDK" +description: "Setup Vulkan SDK for Linux" +inputs: + path: + description: "Installation path" + required: true + version: + description: "Vulkan SDK version" + required: true + +runs: + using: "composite" + steps: + - name: Setup Vulkan SDK + id: setup + uses: ./.github/actions/unarchive-tar + with: + url: https://sdk.lunarg.com/sdk/download/${{ inputs.version }}/linux/vulkan_sdk.tar.xz + path: ${{ inputs.path }} + strip: 1 diff --git a/llama.cpp/.github/actions/unarchive-tar/action.yml b/llama.cpp/.github/actions/unarchive-tar/action.yml new file mode 100644 index 0000000..b97e402 --- /dev/null +++ b/llama.cpp/.github/actions/unarchive-tar/action.yml @@ -0,0 +1,27 @@ +name: "Unarchive tar" +description: "Download and unarchive tar into directory" +inputs: + url: + description: "URL of the tar archive" + required: true + path: + description: "Directory to unarchive into" + required: true + type: + description: "Compression type (tar option)" + required: false + default: "J" + strip: + description: "Strip components" + required: false + default: "0" + +runs: + using: "composite" + steps: + - name: Unarchive into directory + shell: bash + run: | + mkdir -p ${{ inputs.path }} + cd ${{ inputs.path }} + curl --no-progress-meter ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }} diff --git a/llama.cpp/.github/actions/windows-setup-cuda/action.yml b/llama.cpp/.github/actions/windows-setup-cuda/action.yml new file mode 100644 index 0000000..6ad6158 --- /dev/null +++ b/llama.cpp/.github/actions/windows-setup-cuda/action.yml @@ -0,0 +1,98 @@ +name: "Windows - Setup CUDA Toolkit" +description: "Setup CUDA Toolkit for Windows" +inputs: + cuda_version: + description: "CUDA toolkit version" + required: true + +runs: + using: "composite" + steps: + - name: Install Cuda Toolkit 11.7 + if: ${{ inputs.cuda_version == '11.7' }} + shell: pwsh + run: | + mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" + choco install unzip -y + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip" + unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Install Cuda Toolkit 12.4 + if: ${{ inputs.cuda_version == '12.4' }} + shell: pwsh + run: | + mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" + choco install unzip -y + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" + unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + + - name: Install Cuda Toolkit 13.1 + if: ${{ inputs.cuda_version == '13.1' }} + shell: pwsh + run: | + mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" + choco install unzip -y + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_crt/windows-x86_64/cuda_crt-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-13.2.0.9-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libnvvm/windows-x86_64/libnvvm-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-13.1.68-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-13.1.80-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-13.1.68-archive.zip" + curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-13.1.78-archive.zip" + unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_crt-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cudart-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvcc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvrtc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libcublas-windows-x86_64-13.2.0.9-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libnvvm-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvtx-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_profiler_api-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\visual_studio_integration-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cccl-windows-x86_64-13.1.78-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y + echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 + echo "CUDA_PATH_V13_1=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 diff --git a/llama.cpp/.github/actions/windows-setup-rocm/action.yml b/llama.cpp/.github/actions/windows-setup-rocm/action.yml new file mode 100644 index 0000000..b83e6e2 --- /dev/null +++ b/llama.cpp/.github/actions/windows-setup-rocm/action.yml @@ -0,0 +1,15 @@ +name: "Windows - Setup ROCm" +description: "Setup ROCm for Windows" +inputs: + version: + description: "ROCm version" + required: true + +runs: + using: "composite" + steps: + - name: Setup ROCm + uses: ./.github/actions/install-exe + with: + url: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ inputs.version }}-WinSvr2022-For-HIP.exe + args: -install diff --git a/llama.cpp/.github/labeler.yml b/llama.cpp/.github/labeler.yml new file mode 100644 index 0000000..08cfd7e --- /dev/null +++ b/llama.cpp/.github/labeler.yml @@ -0,0 +1,106 @@ +# https://github.com/actions/labeler +Apple Metal: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-metal.h + - ggml/src/ggml-metal/** + - README-metal.md +SYCL: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-sycl.h + - ggml/src/ggml-sycl/** + - docs/backend/SYCL.md + - examples/sycl/** +Nvidia GPU: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-cuda.h + - ggml/src/ggml-cuda/** +Vulkan: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-vulkan.h + - ggml/src/ggml-vulkan/** +IBM zDNN: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-zdnn.h + - ggml/src/ggml-zdnn/** +documentation: + - changed-files: + - any-glob-to-any-file: + - docs/** + - media/** +testing: + - changed-files: + - any-glob-to-any-file: + - tests/** +build: + - changed-files: + - any-glob-to-any-file: + - cmake/** + - CMakeLists.txt + - CMakePresets.json +examples: + - changed-files: + - any-glob-to-any-file: + - examples/** + - tools/** +devops: + - changed-files: + - any-glob-to-any-file: + - .devops/** + - .github/** + - ci/** +python: + - changed-files: + - any-glob-to-any-file: + - "**/*.py" + - requirements/** + - gguf-py/** + - .flake8 +script: + - changed-files: + - any-glob-to-any-file: + - scripts/** +android: + - changed-files: + - any-glob-to-any-file: + - examples/llama.android/** +server: + - changed-files: + - any-glob-to-any-file: + - tools/server/** +ggml: + - changed-files: + - any-glob-to-any-file: + - ggml/** +model: + - changed-files: + - any-glob-to-any-file: + - src/models/** +nix: + - changed-files: + - any-glob-to-any-file: + - "**/*.nix" + - .github/workflows/nix-*.yml + - .devops/nix/nixpkgs-instances.nix +embedding: + - changed-files: + - any-glob-to-any-file: examples/embedding/ +jinja parser: + - changed-files: + - any-glob-to-any-file: + - common/jinja/** +Ascend NPU: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-cann.h + - ggml/src/ggml-cann/** + - docs/backend/CANN.md +OpenCL: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-opencl.h + - ggml/src/ggml-opencl/** diff --git a/llama.cpp/.github/pull_request_template.md b/llama.cpp/.github/pull_request_template.md new file mode 100644 index 0000000..d0bdd73 --- /dev/null +++ b/llama.cpp/.github/pull_request_template.md @@ -0,0 +1 @@ +*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* diff --git a/llama.cpp/.github/workflows/bench.yml.disabled b/llama.cpp/.github/workflows/bench.yml.disabled new file mode 100644 index 0000000..f2d7e16 --- /dev/null +++ b/llama.cpp/.github/workflows/bench.yml.disabled @@ -0,0 +1,304 @@ +# TODO: there have been some issues with the workflow, so disabling for now +# https://github.com/ggml-org/llama.cpp/issues/7893 +# +# Benchmark +name: Benchmark + +on: + workflow_dispatch: + inputs: + gpu-series: + description: 'Azure GPU series to run with' + required: true + type: choice + options: + - Standard_NC4as_T4_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC80adis_H100_v5 + sha: + description: 'Commit SHA1 to build' + required: false + type: string + duration: + description: 'Duration of the bench' + type: string + default: 10m + + push: + branches: + - master + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp'] + pull_request_target: + types: [opened, synchronize, reopened] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp'] + schedule: + - cron: '04 2 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} + cancel-in-progress: true + +jobs: + bench-server-baseline: + runs-on: Standard_NC4as_T4_v3 + env: + RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it + N_USERS: 8 + DURATION: 10m + + strategy: + matrix: + model: [phi-2] + ftype: [q4_0, q8_0, f16] + include: + - model: phi-2 + ftype: q4_0 + pr_comment_enabled: "true" + + if: | + inputs.gpu-series == 'Standard_NC4as_T4_v3' + || github.event_name == 'pull_request_target' + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Install python env + id: pipenv + run: | + cd tools/server/bench + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + + - name: Prometheus + id: install_prometheus + run: | + wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz + tar xzf prometheus*.tar.gz --strip-components=1 + ./prometheus --config.file=tools/server/bench/prometheus.yml & + while ! nc -z localhost 9090; do + sleep 0.1 + done + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Install k6 and xk6-sse + id: k6_installation + run: | + cd tools/server/bench + go install go.k6.io/xk6/cmd/xk6@latest + xk6 build master \ + --with github.com/phymbert/xk6-sse + + - name: Build + id: cmake_build + run: | + set -eux + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CUBLAS=ON \ + -DCUDAToolkit_ROOT=/usr/local/cuda \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_CUDA_ARCHITECTURES=75 \ + -DLLAMA_FATAL_WARNINGS=OFF \ + -DLLAMA_ALL_WARNINGS=OFF \ + -DCMAKE_BUILD_TYPE=Release; + cmake --build build --config Release -j $(nproc) --target llama-server + + - name: Download the dataset + id: download_dataset + run: | + cd tools/server/bench + wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + + - name: Server bench + id: server_bench + env: + HEAD_REF: ${{ github.head_ref || github.ref_name }} + run: | + set -eux + + cd tools/server/bench + source venv/bin/activate + python bench.py \ + --runner-label ${{ env.RUNNER_LABEL }} \ + --name ${{ github.job }} \ + --branch $HEAD_REF \ + --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ + --scenario script.js \ + --duration ${{ github.event.inputs.duration || env.DURATION }} \ + --hf-repo ggml-org/models \ + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ + --model-path-prefix /models \ + --parallel ${{ env.N_USERS }} \ + -ngl 33 \ + --batch-size 2048 \ + --ubatch-size 256 \ + --ctx-size 16384 \ + --n-prompts 1000 \ + --max-prompt-tokens 1024 \ + --max-tokens 2048 + + cat results.github.env >> $GITHUB_ENV + + # Remove dataset as we do not want it in the artefact + rm ShareGPT_V3_unfiltered_cleaned_split.json + + - uses: actions/upload-artifact@v4 + with: + name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + compression-level: 9 + path: | + tools/server/bench/*.jpg + tools/server/bench/*.json + tools/server/bench/*.log + + - name: Commit status + uses: Sibz/github-status-action@v1 + with: + authToken: ${{secrets.GITHUB_TOKEN}} + sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + description: | + ${{ env.BENCH_RESULTS }} + state: 'success' + + - name: Upload benchmark images + uses: devicons/public-upload-to-imgur@v2.2.2 + continue-on-error: true # Important as it looks unstable: 503 + id: imgur_step + with: + client_id: ${{secrets.IMGUR_CLIENT_ID}} + path: | + tools/server/bench/prompt_tokens_seconds.jpg + tools/server/bench/predicted_tokens_seconds.jpg + tools/server/bench/kv_cache_usage_ratio.jpg + tools/server/bench/requests_processing.jpg + + - name: Extract mermaid + id: set_mermaid + run: | + set -eux + + cd tools/server/bench + PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) + echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) + echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) + echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV + echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + REQUESTS_PROCESSING=$(cat requests_processing.mermaid) + echo "REQUESTS_PROCESSING<> $GITHUB_ENV + echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Extract image url + id: extract_image_url + continue-on-error: true + run: | + set -eux + + echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV + echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV + echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV + echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV + + - name: Comment PR + uses: mshick/add-pr-comment@v2 + id: comment_pr + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} + with: + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + message: | +

+ + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +

+ +
+ + Expand details for performance related PR only + + - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} + - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} + - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s + - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s + - ${{ env.BENCH_GRAPH_XLABEL }} + + +

+ + prompt_tokens_seconds + +

+ + More + + ```mermaid + ${{ env.PROMPT_TOKENS_SECONDS }} + ``` + +
+ + predicted_tokens_seconds + +
+ More + + ```mermaid + ${{ env.PREDICTED_TOKENS_SECONDS }} + ``` + +
+ +

+ +
+ + Details + +

+ + kv_cache_usage_ratio + +

+ More + + ```mermaid + ${{ env.KV_CACHE_USAGE_RATIO }} + ``` + +
+ + requests_processing + +
+ More + + ```mermaid + ${{ env.REQUESTS_PROCESSING }} + ``` + +
+ +

+
+
diff --git a/llama.cpp/.github/workflows/build-cache.yml b/llama.cpp/.github/workflows/build-cache.yml new file mode 100644 index 0000000..3de0be9 --- /dev/null +++ b/llama.cpp/.github/workflows/build-cache.yml @@ -0,0 +1,89 @@ +name: Build Actions Cache + +on: + workflow_dispatch: # allows manual triggering + schedule: + - cron: '0 * * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + ubuntu-24-vulkan-cache: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Setup Cache + uses: actions/cache@v5 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + ubuntu-24-spacemit-cache: + runs-on: ubuntu-24.04 + + env: + # Make sure this is in sync with build-linux-cross.yml + SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup Cache + uses: actions/cache@v5 + id: cache-toolchain + with: + path: ./spacemit_toolchain + key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + + - name: Setup SpacemiT Toolchain + if: steps.cache-toolchain.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-spacemit + with: + path: ./spacemit_toolchain + version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} + + windows-2022-rocm-cache: + runs-on: windows-2022 + + env: + # Make sure this is in sync with build.yml + HIPSDK_INSTALLER_VERSION: "25.Q3" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup Cache + uses: actions/cache@v5 + id: cache-rocm + with: + path: C:\Program Files\AMD\ROCm + key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} + + - name: Setup ROCm + if: steps.cache-rocm.outputs.cache-hit != 'true' + uses: ./.github/actions/windows-setup-rocm + with: + version: ${{ env.HIPSDK_INSTALLER_VERSION }} diff --git a/llama.cpp/.github/workflows/build-cmake-pkg.yml b/llama.cpp/.github/workflows/build-cmake-pkg.yml new file mode 100644 index 0000000..259efa4 --- /dev/null +++ b/llama.cpp/.github/workflows/build-cmake-pkg.yml @@ -0,0 +1,51 @@ +name: Build relocatable cmake package +on: + workflow_dispatch: + workflow_call: + +jobs: + linux: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y build-essential tcl + + - name: Build + run: | + PREFIX="$(pwd)"/inst + cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \ + -DLLAMA_OPENSSL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release + cmake --build build --config Release + cmake --install build --prefix "$PREFIX" --config Release + + export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake + tclsh <<'EOF' + set build(commit) [string trim [exec git rev-parse --short HEAD]] + set build(number) [string trim [exec git rev-list --count HEAD]] + set build(version) "0.0.$build(number)" + + set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]] + set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \ + "set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \ + "set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"] + + puts -nonewline "Checking llama-config.cmake version... " + foreach check $checks { + if {![regexp -expanded -- $check $llamaconfig]} { + puts "\"$check\" failed!" + exit 1 + } + } + puts "success." + EOF + + cd examples/simple-cmake-pkg + cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake + cmake --build build diff --git a/llama.cpp/.github/workflows/build-linux-cross.yml b/llama.cpp/.github/workflows/build-linux-cross.yml new file mode 100644 index 0000000..8b6ebaf --- /dev/null +++ b/llama.cpp/.github/workflows/build-linux-cross.yml @@ -0,0 +1,298 @@ +name: Build on Linux using cross-compiler +on: + workflow_dispatch: + workflow_call: + +jobs: + # ubuntu-24-riscv64-cpu-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v6 + # - name: Setup Riscv + # run: | + # sudo dpkg --add-architecture riscv64 + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # gcc-14-riscv64-linux-gnu \ + # g++-14-riscv64-linux-gnu + + # - name: Build + # run: | + # cmake -B build -DLLAMA_OPENSSL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) + + # ubuntu-24-riscv64-vulkan-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v6 + # - name: Setup Riscv + # run: | + # sudo dpkg --add-architecture riscv64 + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # glslc \ + # gcc-14-riscv64-linux-gnu \ + # g++-14-riscv64-linux-gnu \ + # libvulkan-dev:riscv64 + + # - name: Build + # run: | + # cmake -B build -DLLAMA_OPENSSL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_VULKAN=ON \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) + + # ubuntu-24-arm64-vulkan-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v6 + # - name: Setup Arm64 + # run: | + # sudo dpkg --add-architecture arm64 + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # glslc \ + # crossbuild-essential-arm64 \ + # libvulkan-dev:arm64 + + # - name: Build + # run: | + # cmake -B build -DLLAMA_OPENSSL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_VULKAN=ON \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + # -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ + # -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) + + debian-13-loongarch64-cpu-cross: + runs-on: ubuntu-24.04 + container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671 + + steps: + - uses: actions/checkout@v6 + - name: Setup LoongArch + run: | + rm -f /etc/apt/sources.list.d/* + cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list + deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main + EOF + ( echo 'quiet "true";'; \ + echo 'APT::Get::Assume-Yes "true";'; \ + echo 'APT::Install-Recommends "false";'; \ + echo 'Acquire::Check-Valid-Until "false";'; \ + echo 'Acquire::Retries "5";'; \ + ) > /etc/apt/apt.conf.d/99snapshot-repos + + apt-get update + apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip + dpkg --add-architecture loong64 + + # Add arch-specific repositories for non-amd64 architectures + cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list + deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main + EOF + + apt-get update || true ;# Prevent failure due to missing URLs. + + apt-get install -y --no-install-recommends \ + build-essential \ + gcc-14-loongarch64-linux-gnu \ + g++-14-loongarch64-linux-gnu + + - name: Build + run: | + cmake -B build -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \ + -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + debian-13-loongarch64-vulkan-cross: + runs-on: ubuntu-24.04 + container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671 + + steps: + - uses: actions/checkout@v6 + - name: Setup LoongArch + run: | + rm -f /etc/apt/sources.list.d/* + cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list + deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main + EOF + ( echo 'quiet "true";'; \ + echo 'APT::Get::Assume-Yes "true";'; \ + echo 'APT::Install-Recommends "false";'; \ + echo 'Acquire::Check-Valid-Until "false";'; \ + echo 'Acquire::Retries "5";'; \ + ) > /etc/apt/apt.conf.d/99snapshot-repos + + apt-get update + apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip + dpkg --add-architecture loong64 + + # Add arch-specific repositories for non-amd64 architectures + cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list + deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main + EOF + + apt-get update || true ;# Prevent failure due to missing URLs. + + apt-get install -y --no-install-recommends \ + build-essential \ + glslc \ + gcc-14-loongarch64-linux-gnu \ + g++-14-loongarch64-linux-gnu \ + libvulkan-dev:loong64 + + - name: Build + run: | + cmake -B build -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_VULKAN=ON \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \ + -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \ + -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + cmake --build build --config Release -j $(nproc) + + ubuntu-24-riscv64-cpu-spacemit-ime-cross: + runs-on: ubuntu-24.04 + + env: + # Make sure this is in sync with build-cache.yml + SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2" + + steps: + - uses: actions/checkout@v6 + + - name: Use SpacemiT Toolchain Cache + uses: actions/cache@v5 + id: cache-toolchain + with: + path: ./spacemit_toolchain + key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }} + + - name: Setup SpacemiT Toolchain + if: steps.cache-toolchain.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-spacemit + with: + path: ./spacemit_toolchain + version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }} + + - name: Build + run: | + export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain + cmake -B build -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DGGML_CPU_RISCV64_SPACEMIT=ON \ + -DGGML_RVV=ON \ + -DGGML_RV_ZFH=ON \ + -DGGML_RV_ZICBOP=ON \ + -DGGML_RV_ZIHINTPAUSE=ON \ + -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \ + -DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake + + cmake --build build --config Release -j $(nproc) diff --git a/llama.cpp/.github/workflows/build.yml b/llama.cpp/.github/workflows/build.yml new file mode 100644 index 0000000..6c7ab71 --- /dev/null +++ b/llama.cpp/.github/workflows/build.yml @@ -0,0 +1,2138 @@ +name: CI + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + '.github/workflows/build.yml', + '.github/workflows/build-linux-cross.yml', + '.github/workflows/build-cmake-pkg.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl', + '**/*.wgsl' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + '.github/workflows/build.yml', + '.github/workflows/build-linux-cross.yml', + '.github/workflows/build-cmake-pkg.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp', + '**/*.glsl', + '**/*.wgsl' + ] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + +jobs: + macOS-latest-cmake-arm64: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64 + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build \ + -DCMAKE_BUILD_RPATH="@loader_path" \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=OFF \ + -DGGML_METAL_SHADER_DEBUG=ON \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1 + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + macOS-latest-cmake-x64: + runs-on: macos-15-intel + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-x64 + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + # Metal is disabled due to intermittent failures with Github runners not having a GPU: + # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 + cmake -B build \ + -DCMAKE_BUILD_RPATH="@loader_path" \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_METAL=OFF \ + -DGGML_RPC=ON \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + macOS-latest-cmake-arm64-webgpu: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64-webgpu + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dawn Dependency + id: dawn-depends + run: | + DAWN_VERSION="v2.0.0" + DAWN_OWNER="reeselevine" + DAWN_REPO="dawn" + DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release" + echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + curl -L -o artifact.zip \ + "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + mkdir dawn + unzip artifact.zip + tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 + + - name: Build + id: cmake_build + run: | + export CMAKE_PREFIX_PATH=dawn + cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-cpu-cmake: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-22.04 + - build: 'arm64' + os: ubuntu-22.04-arm + - build: 's390x' + os: ubuntu-24.04-s390x + - build: 'ppc64le' + os: ubuntu-24.04-ppc64le + + runs-on: ${{ matrix.os }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-cmake-${{ matrix.build }} + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build Dependencies + id: build_depends + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + python3 python3-pip python3-dev \ + libjpeg-dev build-essential libssl-dev \ + git-lfs + + - name: Python Dependencies + id: python_depends + run: | + python3 -m pip install --upgrade pip + pip3 install ./gguf-py + + - name: Swap Endianness + id: endianness + if: ${{ matrix.build == 's390x' }} + run: | + for f in models/*.gguf; do + echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big + done + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + - name: Test llama2c conversion + id: llama2c_test + if: ${{ matrix.build != 's390x' }} + run: | + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + + - name: Test llama2c (s390x) + id: llama2c_test_s390x + if: ${{ matrix.build == 's390x' }} + run: | + cd build + echo "Fetch llama2c big-endian model" + wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf + ./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + + ubuntu-latest-cmake-sanitizer: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-latest-llguidance: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + ubuntu-latest-cmake-rpc: + runs-on: ubuntu-latest + + continue-on-error: true + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + # - name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: ubuntu-latest-cmake-rpc + # evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DGGML_RPC=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose + + ubuntu-24-cmake-vulkan-deb: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-cmake-vulkan-deb + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get install -y glslc libvulkan-dev libssl-dev + + - name: Configure + id: cmake_configure + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DGGML_VULKAN=ON + + - name: Build + id: cmake_build + run: | + cmake --build build -j $(nproc) + + ubuntu-24-cmake-vulkan: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-cmake-vulkan + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Use Vulkan SDK Cache + uses: actions/cache@v5 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + - name: Build + id: cmake_build + run: | + source ./vulkan_sdk/setup-env.sh + cmake -B build \ + -DGGML_VULKAN=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + export GGML_VK_VISIBLE_DEVICES=0 + export GGML_VK_DISABLE_F16=1 + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 4800 + + ubuntu-24-cmake-webgpu: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-cmake-webgpu + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Use Vulkan SDK Cache + uses: actions/cache@v5 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + - name: Dawn Dependency + id: dawn-depends + run: | + sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev + DAWN_VERSION="v2.0.0" + DAWN_OWNER="reeselevine" + DAWN_REPO="dawn" + DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-ubuntu-latest-Release" + echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + curl -L -o artifact.zip \ + "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + mkdir dawn + unzip artifact.zip + tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 + + - name: Build + id: cmake_build + run: | + export Dawn_DIR=dawn/lib64/cmake/Dawn + cmake -B build \ + -DGGML_WEBGPU=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 3600 + + ubuntu-24-wasm-webgpu: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-latest-wasm-webgpu + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Install Emscripten + run: | + git clone https://github.com/emscripten-core/emsdk.git + cd emsdk + ./emsdk install latest + ./emsdk activate latest + + - name: Fetch emdawnwebgpu + run: | + DAWN_TAG="v20251027.212519" + EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip" + echo "Downloading ${EMDAWN_PKG}" + curl -L -o emdawn.zip \ + "https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}" + unzip emdawn.zip + + - name: Build WASM WebGPU + run: | + source emsdk/emsdk_env.sh + emcmake cmake -B build-wasm \ + -DGGML_WEBGPU=ON \ + -DLLAMA_OPENSSL=OFF \ + -DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg + + cmake --build build-wasm --target test-backend-ops -j $(nproc) + + ubuntu-22-cmake-hip: + runs-on: ubuntu-22.04 + container: rocm/dev-ubuntu-22.04:6.1.2 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev rocwmma-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-hip + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with native CMake HIP support + id: cmake_build + run: | + cmake -B build -S . \ + -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ + -DGGML_HIP_ROCWMMA_FATTN=ON \ + -DGGML_HIP=ON + cmake --build build --config Release -j $(nproc) + + ubuntu-22-cmake-musa: + runs-on: ubuntu-22.04 + container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + apt-get update + apt-get install -y build-essential git cmake libssl-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-musa + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with native CMake MUSA support + id: cmake_build + run: | + cmake -B build -S . \ + -DGGML_MUSA=ON + cmake --build build --config Release -j $(nproc) + + ubuntu-22-cmake-sycl: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v6 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + cmake -B build \ + -DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx + cmake --build build --config Release -j $(nproc) + + ubuntu-22-cmake-sycl-fp16: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v6 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl-fp16 + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + cmake -B build \ + -DGGML_SYCL=ON \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=icpx \ + -DGGML_SYCL_F16=ON + cmake --build build --config Release -j $(nproc) + + build-linux-cross: + uses: ./.github/workflows/build-linux-cross.yml + + build-cmake-pkg: + uses: ./.github/workflows/build-cmake-pkg.yml + + macOS-latest-cmake-ios: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-ios + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-cmake-tvos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-tvos + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-cmake-visionos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=visionOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + macOS-latest-swift: + runs-on: macos-latest + needs: ios-xcode-build + + strategy: + matrix: + destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Download xcframework artifact + uses: actions/download-artifact@v7 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + + - name: Build llama.cpp with CMake + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + windows-msys2: + runs-on: windows-2025 + + strategy: + fail-fast: false + matrix: + include: + - { sys: UCRT64, env: ucrt-x86_64, build: Release } + - { sys: CLANG64, env: clang-x86_64, build: Release } + + steps: + - name: Clone + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-msys2 + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Setup ${{ matrix.sys }} + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{matrix.sys}} + install: >- + base-devel + git + mingw-w64-${{matrix.env}}-toolchain + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-openblas + + - name: Build using CMake + shell: msys2 {0} + run: | + cmake -B build + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + - name: Clean after building using CMake + shell: msys2 {0} + run: | + rm -rf build + + - name: Build using CMake w/ OpenBLAS + shell: msys2 {0} + run: | + cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + windows-latest-cmake: + runs-on: windows-2025 + + env: + OPENBLAS_VERSION: 0.3.23 + SDE_VERSION: 9.33.0-2024-01-07 + VULKAN_VERSION: 1.4.313.2 + + strategy: + matrix: + include: + - build: 'cpu-x64 (static)' + arch: 'x64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF' + - build: 'openblas-x64' + arch: 'x64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + - build: 'vulkan-x64' + arch: 'x64' + defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' + - build: 'llvm-arm64' + arch: 'arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' + - build: 'llvm-arm64-opencl-adreno' + arch: 'arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.build }} + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Download OpenBLAS + id: get_openblas + if: ${{ matrix.build == 'openblas-x64' }} + run: | + curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" + curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" + mkdir $env:RUNNER_TEMP/openblas + tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') + & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll + + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.build == 'vulkan-x64' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }} + run: | + git clone https://github.com/KhronosGroup/OpenCL-Headers + cd OpenCL-Headers + cmake -B build ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build --target install + git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader + cd OpenCL-ICD-Loader + cmake -B build-arm64-release ` + -A arm64 ` + -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build-arm64-release --target install --config release + + - name: Build + id: cmake_build + run: | + cmake -S . -B build ${{ matrix.defines }} ` + -DLLAMA_BUILD_BORINGSSL=ON + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} + + - name: Add libopenblas.dll + id: add_libopenblas_dll + if: ${{ matrix.build == 'openblas-x64' }} + run: | + cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll + cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt + + - name: Test + id: cmake_test + if: ${{ matrix.arch == 'x64' }} + run: | + cd build + ctest -L main -C Release --verbose --timeout 900 + + # TODO: disabled for now, consider adding tests for all CPU variants instead + # - name: Test (Intel SDE) + # id: cmake_test_sde + # if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation + # run: | + # curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" + # # for some weird reason windows tar doesn't like sde tar.xz + # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz + # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar + # $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) + # cd build + # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 + # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 + + ubuntu-latest-cmake-cuda: + runs-on: ubuntu-latest + container: nvidia/cuda:12.6.2-devel-ubuntu24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + apt update + apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-cuda + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with CMake + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + cmake -S . -B build -G Ninja \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON \ + -DGGML_CUDA_CUB_3DOT2=ON + cmake --build build + + windows-2022-cmake-cuda: + runs-on: windows-2022 + + strategy: + matrix: + cuda: ['12.4'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-cuda-${{ matrix.cuda }} + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Install Cuda Toolkit + uses: ./.github/actions/windows-setup-cuda + with: + cuda_version: ${{ matrix.cuda }} + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Build + id: cmake_build + shell: cmd + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DLLAMA_BUILD_SERVER=ON ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=ON ^ + -DGGML_CUDA=ON ^ + -DGGML_RPC=ON ^ + -DGGML_CUDA_CUB_3DOT2=ON + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% -t ggml + cmake --build build --config Release + + windows-latest-cmake-sycl: + runs-on: windows-2022 + + defaults: + run: + shell: bash + + env: + WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe + WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel + ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-sycl + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Install + run: | + scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + + # TODO: add ssl support ; we will also need to modify win-build-sycl.bat to accept user-specified args + + - name: Build + id: cmake_build + run: examples/sycl/win-build-sycl.bat + + windows-latest-cmake-hip: + runs-on: windows-2022 + + env: + # The ROCm version must correspond to the version used in the HIP SDK. + ROCM_VERSION: "6.4.2" + # Make sure this is in sync with build-cache.yml + HIPSDK_INSTALLER_VERSION: "25.Q3" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Grab rocWMMA package + id: grab_rocwmma + run: | + curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }}/pool/main/r/rocwmma-dev/rocwmma-dev_1.7.0.60402-120~24.04_amd64.deb" + 7z x rocwmma.deb + 7z x data.tar + + - name: Use ROCm Installation Cache + uses: actions/cache@v5 + id: cache-rocm + with: + path: C:\Program Files\AMD\ROCm + key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} + + - name: Setup ROCm + if: steps.cache-rocm.outputs.cache-hit != 'true' + uses: ./.github/actions/windows-setup-rocm + with: + version: ${{ env.HIPSDK_INSTALLER_VERSION }} + + - name: Verify ROCm + id: verify + run: | + # Find and test ROCm installation + $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 + if (-not $clangPath) { + Write-Error "ROCm installation not found" + exit 1 + } + & $clangPath.FullName --version + + - name: Install ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ${{ github.job }} + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build + id: cmake_build + run: | + $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + cmake -G "Unix Makefiles" -B build -S . ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` + -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" ` + -DCMAKE_BUILD_TYPE=Release ` + -DLLAMA_BUILD_BORINGSSL=ON ` + -DROCM_DIR="${env:HIP_PATH}" ` + -DGGML_HIP=ON ` + -DGGML_HIP_ROCWMMA_FATTN=ON ` + -DGGML_RPC=ON + cmake --build build -j ${env:NUMBER_OF_PROCESSORS} + + ios-xcode-build: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Setup Xcode + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: latest-stable + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + - name: xcodebuild for swift package + id: xcodebuild + run: | + ./build-xcframework.sh + + - name: Upload xcframework artifact + uses: actions/upload-artifact@v6 + with: + name: llama-xcframework + path: build-apple/llama.xcframework/ + retention-days: 1 + + - name: Build Xcode project + run: | + xcodebuild -downloadPlatform iOS + xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + + android-build: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v6 + + # Disabled due to size (400MB) and always 0 cache hits + # - name: ccache + # uses: ggml-org/ccache-action@v1.2.16 + # with: + # key: android-build + # evict-old-files: 1d + + - name: Set up JDK + uses: actions/setup-java@v5 + with: + java-version: 17 + distribution: zulu + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - name: Build + run: | + cd examples/llama.android + ./gradlew build --no-daemon + + android-ndk-build: + runs-on: ubuntu-latest + + env: + OPENCL_VERSION: 2025.07.22 + + strategy: + matrix: + include: + - build: 'arm64-cpu' + defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' + - build: 'arm64-snapdragon' + defines: '--preset arm64-android-snapdragon-release' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + mkdir opencl + curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz + tar -xaf opencl/headers.tar.gz -C opencl + tar -xaf opencl/clhpp.tar.gz -C opencl + tar -xaf opencl/icd-loader.tar.gz -C opencl + sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include + sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL + cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION} + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared + cmake --build build + sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android + rm -rf opencl + + - name: Install Hexagon SDK + id: install_hexsdk + if: ${{ matrix.build == 'arm64-snapdragon' }} + env: + HEXSDK_VER: 6.4.0.2 + HEXTLS_VER: 19.0.04 + run: | + curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz + mkdir hex-sdk + tar -xaf hex-sdk.tar.gz -C hex-sdk + ls -l hex-sdk + sudo mv hex-sdk /opt/hexagon + echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV" + echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV" + echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV" + echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV" + echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV" + echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV" + + - name: Update CMake presets + id: update_presets + if: ${{ matrix.build == 'arm64-snapdragon' }} + run: | + cp docs/backend/snapdragon/CMakeUserPresets.json . + + - name: Build + id: ndk_build + run: | + cmake ${{ matrix.defines }} -B build + cmake --build build + cmake --install build --prefix pkg-adb/llama.cpp + + - name: Test + id: cmake_test + run: | + echo "FIXME: test on devices" + + openEuler-latest-cmake-cann: + defaults: + run: + shell: bash -el {0} + strategy: + matrix: + arch: [x86, aarch64] + chip_type: ['910b', '310p'] + build: ['Release'] + use_acl_graph: ['on', 'off'] + exclude: + # 310P does not support USE_ACL_GRAPH=on + - chip_type: '310p' + use_acl_graph: 'on' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Free up disk space + uses: ggml-org/free-disk-space@v1.3.1 + with: + tool-cache: true + + - name: Set container image + id: cann-image + run: | + image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" + echo "image=${image}" >> "${GITHUB_OUTPUT}" + + - name: Pull container image + run: docker pull "${{ steps.cann-image.outputs.image }}" + + - name: Build + env: + BUILD_TYPE: ${{ matrix.build }} + SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} + run: | + HOST_UID=$(id -u) + HOST_GID=$(id -g) + + docker run --rm \ + -v "${PWD}:/workspace" \ + -w /workspace \ + -e SOC_TYPE=${SOC_TYPE} \ + -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ + "${{ steps.cann-image.outputs.image }}" \ + bash -lc ' + set -e + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel + yum clean all && rm -rf /var/cache/yum + git config --global --add safe.directory "/workspace" + export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DGGML_CANN=on \ + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} + cmake --build build -j $(nproc) + + chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build + ' + +# TODO: simplify the following workflows using a matrix +# TODO: run lighter CI on PRs and the full CI only on master (if needed) + ggml-ci-x64-cpu-low-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-low-perf + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-low-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-low-perf + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-cpu-high-perf: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-x64-cpu-high-perf + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-arm64-cpu-high-perf-sve: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-high-perf-sve + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ggml-ci-x64-nvidia-cuda: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + nvidia-smi + GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-x64-nvidia-vulkan-cm: + runs-on: [self-hosted, Linux, X64, NVIDIA] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-x64-nvidia-vulkan-cm2: + runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-x64-cpu-amx: + runs-on: [self-hosted, Linux, X64, CPU, AMX] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + # ggml-ci-x64-amd-vulkan: + # runs-on: [self-hosted, Linux, X64, AMD] + + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 + + # - name: Test + # id: ggml-ci + # run: | + # vulkaninfo --summary + # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + # ggml-ci-x64-amd-rocm: + # runs-on: [self-hosted, Linux, X64, AMD] + + # steps: + # - name: Clone + # id: checkout + # uses: actions/checkout@v6 + + # - name: Test + # id: ggml-ci + # run: | + # amd-smi static + # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + + ggml-ci-mac-metal: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-webgpu: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dawn Dependency + id: dawn-depends + run: | + DAWN_VERSION="v2.0.0" + DAWN_OWNER="reeselevine" + DAWN_REPO="dawn" + DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release" + echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + curl -L -o artifact.zip \ + "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip" + mkdir dawn + unzip artifact.zip + tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1 + + - name: Test + id: ggml-ci + run: | + GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ + bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-mac-vulkan: + runs-on: [self-hosted, macOS, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + vulkaninfo --summary + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-arm64-cpu-kleidiai: + runs-on: ubuntu-22.04-arm + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-cpu-kleidiai + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y build-essential + + - name: Test + id: ggml-ci + run: | + GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + + ubuntu-cpu-cmake-riscv64-native: + runs-on: RISCV64 + + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Check environment + run: | + uname -a + gcc --version + g++ --version + ldd --version + cmake --version + rustc --version + + - name: Setup ccache + run: | + # Set unique cache directory for this job + export CCACHE_DIR="$HOME/.ccache/cpu-cmake-rv64-native" + mkdir -p "$CCACHE_DIR" + + # Configure ccache for optimal performance + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + + # Enable more aggressive caching + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DGGML_RPC=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + - name: Test llama2c conversion + id: llama2c_test + run: | + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + + ubuntu-cmake-sanitizer-riscv64-native: + runs-on: RISCV64 + + continue-on-error: true + + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug] + + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: GCC version check + run: | + gcc --version + g++ --version + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup ccache + run: | + # Unique cache directory per matrix combination + export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}" + mkdir -p "$CCACHE_DIR" + + # Configure ccache + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + # Export for subsequent steps + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + if: ${{ matrix.sanitizer != 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=ON \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Build (no OpenMP) + id: cmake_build_no_openmp + if: ${{ matrix.sanitizer == 'THREAD' }} + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + + ubuntu-llguidance-riscv64-native: + runs-on: RISCV64 + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: GCC version check + run: | + gcc --version + g++ --version + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup ccache + run: | + export CCACHE_DIR="$HOME/.ccache/llguidance-riscv64" + mkdir -p "$CCACHE_DIR" + + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_OPENSSL=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=OFF \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DLLAMA_LLGUIDANCE=ON \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 + + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + + ubuntu-cmake-rpc-riscv64-native: + runs-on: RISCV64 + + continue-on-error: true + + steps: + - name: Install dependencies + run: | + sudo apt-get update + + # Install necessary packages + sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential libssl-dev wget ccache git-lfs + + # Set gcc-14 and g++-14 as the default compilers + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100 + sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc + sudo ln -sf /usr/bin/g++-14 /usr/bin/g++ + + # Install Rust stable version + rustup install stable + rustup default stable + + git lfs install + + - name: GCC version check + run: | + gcc --version + g++ --version + + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Setup ccache + run: | + export CCACHE_DIR="$HOME/.ccache/rpc-riscv64" + mkdir -p "$CCACHE_DIR" + + ccache --set-config=max_size=5G + ccache --set-config=compression=true + ccache --set-config=compression_level=6 + ccache --set-config=cache_dir="$CCACHE_DIR" + ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime + ccache --set-config=hash_dir=false + + echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV + echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_EXAMPLES=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_TESTS=ON \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + -DGGML_RPC=ON + + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose + + ggml-ci-arm64-graviton4-kleidiai: + runs-on: ah-ubuntu_22_04-c8g_8x + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Dependencies + id: depends + run: | + set -euxo pipefail + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \ + apt-get install -y \ + build-essential \ + python3-venv \ + gpg \ + wget \ + time \ + git-lfs + + git lfs install + + # install the latest cmake + sudo install -d /usr/share/keyrings + wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \ + | gpg --dearmor \ + | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null + echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \ + | sudo tee /etc/apt/sources.list.d/kitware.list + sudo apt-get update + sudo apt-get install -y cmake + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ggml-ci-arm64-graviton4-kleidiai + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Test + id: ggml-ci + run: | + GG_BUILD_KLEIDIAI=1 \ + GG_BUILD_EXTRA_TESTS_0=1 \ + bash ./ci/run.sh ./tmp/results ./tmp/mnt diff --git a/llama.cpp/.github/workflows/check-vendor.yml b/llama.cpp/.github/workflows/check-vendor.yml new file mode 100644 index 0000000..1671ed7 --- /dev/null +++ b/llama.cpp/.github/workflows/check-vendor.yml @@ -0,0 +1,52 @@ +name: Check vendor + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + paths: [ + 'vendor/**', + 'scripts/sync_vendor.py' + ] + + pull_request: + types: [opened, synchronize, reopened] + paths: [ + 'vendor/**', + 'scripts/sync_vendor.py' + ] + +jobs: + check-vendor: + runs-on: ubuntu-slim + + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: '3.x' + + - name: Run vendor sync + run: | + set -euo pipefail + python3 scripts/sync_vendor.py + + - name: Check for changes + run: | + set -euo pipefail + # detect modified or untracked files + changed=$(git status --porcelain --untracked-files=all || true) + if [ -n "$changed" ]; then + echo "Vendor sync modified files:" + echo "$changed" | awk '{ print $2 }' | sed '/^$/d' + echo "Failing because vendor files mismatch. Please update scripts/sync_vendor.py" + exit 1 + else + echo "Vendor files are up-to-date." + fi diff --git a/llama.cpp/.github/workflows/close-issue.yml b/llama.cpp/.github/workflows/close-issue.yml new file mode 100644 index 0000000..ec3df08 --- /dev/null +++ b/llama.cpp/.github/workflows/close-issue.yml @@ -0,0 +1,28 @@ +name: Close inactive issues +on: + schedule: + - cron: "42 0 * * *" + +# Fine-grant permission +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token +permissions: + issues: write + +jobs: + close-issues: + runs-on: ubuntu-slim + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v10 + with: + exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap" + days-before-issue-stale: 30 + days-before-issue-close: 14 + stale-issue-label: "stale" + close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." + days-before-pr-stale: -1 + days-before-pr-close: -1 + operations-per-run: 10000 + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama.cpp/.github/workflows/copilot-setup-steps.yml b/llama.cpp/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 0000000..fc3cec5 --- /dev/null +++ b/llama.cpp/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,57 @@ +name: "Copilot Setup Steps" + +# Automatically run the setup steps when they are changed to allow for easy validation, and +# allow manual testing through the repository's "Actions" tab +on: + workflow_dispatch: + push: + paths: + - .github/workflows/copilot-setup-steps.yml + pull_request: + paths: + - .github/workflows/copilot-setup-steps.yml + +jobs: + # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. + copilot-setup-steps: + runs-on: ubuntu-latest + + # Set the permissions to the lowest permissions possible needed for your steps. + # Copilot will be given its own token for its operations. + permissions: + # If you want to clone the repository as part of your setup steps, for example to install dependencies, you'll need the `contents: read` permission. If you don't clone the repository in your setup steps, Copilot will do this for you automatically after the steps complete. + contents: read + + # You can define any steps you want, and they will run before the agent starts. + # If you do not check out your code, Copilot will do this for you. + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: copilot-setup-steps + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + # Install git-clang-format script for formatting only changed code + wget -O /tmp/git-clang-format https://raw.githubusercontent.com/llvm/llvm-project/release/18.x/clang/tools/clang-format/git-clang-format + sudo cp /tmp/git-clang-format /usr/local/bin/git-clang-format + sudo chmod +x /usr/local/bin/git-clang-format + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + python3 -m venv .venv + .venv/bin/activate + pip install -r requirements/requirements-all.txt -r tools/server/tests/requirements.txt + pip install flake8 pyright pre-commit diff --git a/llama.cpp/.github/workflows/docker.yml b/llama.cpp/.github/workflows/docker.yml new file mode 100644 index 0000000..8062177 --- /dev/null +++ b/llama.cpp/.github/workflows/docker.yml @@ -0,0 +1,226 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + workflow_dispatch: # allows manual triggering + schedule: + # Rebuild daily rather than on every push because it is expensive + - cron: '12 4 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +# Fine-grant permission +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token +permissions: + packages: write + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + + runs-on: ${{ matrix.config.runs_on }} + env: + COMMIT_SHA: ${{ github.sha }} + strategy: + fail-fast: false + matrix: + config: + # Multi-stage build + # Note: the arm64 images are failing, which prevents the amd64 images from being built + # https://github.com/ggml-org/llama.cpp/issues/11888 + #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } + - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } + - { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" } + - { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" } + - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } + - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } + - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } + - { tag: "s390x", dockerfile: ".devops/s390x.Dockerfile", platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04-s390x" } + - { tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } + steps: + - name: Check out the repo + uses: actions/checkout@v6 + with: + fetch-depth: 0 # preserve git history, so we can determine the build number + + - name: Set up QEMU + if: ${{ matrix.config.tag != 's390x' }} + uses: docker/setup-qemu-action@v3 + with: + image: tonistiigi/binfmt:qemu-v7.0.0-28 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine source tag name + id: srctag + uses: ./.github/actions/get-tag-name + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + + - name: Determine image tag name + id: tag + shell: bash + run: | + REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case + REPO_NAME="${{ github.event.repository.name }}" + PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" + + # list all tags possible + tags="${{ matrix.config.tag }}" + for tag in $tags; do + if [[ "$tag" == "cpu" ]]; then + TYPE="" + else + TYPE="-$tag" + fi + CACHETAGS="${PREFIX}buildcache${TYPE}" + FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" + LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" + SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" + done + echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT + echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT + echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT + echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT + echo "cache_output_tags=$CACHETAGS" # print out for debugging + echo "full_output_tags=$FULLTAGS" # print out for debugging + echo "light_output_tags=$LIGHTTAGS" # print out for debugging + echo "server_output_tags=$SERVERTAGS" # print out for debugging + env: + GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' + + - name: Free Disk Space (Ubuntu) + if: ${{ matrix.config.free_disk_space == true }} + uses: ggml-org/free-disk-space@v1.3.1 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Build and push Full Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.full_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: full + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} + cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max + + - name: Build and push Light Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.light_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: light + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} + cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max + + - name: Build and push Server Docker image (tagged + versioned) + if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} + uses: docker/build-push-action@v6 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + # tag list is generated from step above + tags: ${{ steps.tag.outputs.server_output_tags }} + file: ${{ matrix.config.dockerfile }} + target: server + provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} + # using github experimental cache + #cache-from: type=gha + #cache-to: type=gha,mode=max + # return to this if the experimental github cache is having issues + #cache-to: type=local,dest=/tmp/.buildx-cache + #cache-from: type=local,src=/tmp/.buildx-cache + # using registry cache (no storage limit) + cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }} + cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max + + create_tag: + name: Create and push git tag + runs-on: ubuntu-22.04 + permissions: + contents: write + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Determine source tag name + id: srctag + uses: ./.github/actions/get-tag-name + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + + - name: Create and push git tag + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git tag ${{ steps.srctag.outputs.name }} || exit 0 + git push origin ${{ steps.srctag.outputs.name }} || exit 0 diff --git a/llama.cpp/.github/workflows/editorconfig.yml b/llama.cpp/.github/workflows/editorconfig.yml new file mode 100644 index 0000000..702dc89 --- /dev/null +++ b/llama.cpp/.github/workflows/editorconfig.yml @@ -0,0 +1,29 @@ +name: EditorConfig Checker + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + pull_request: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + editorconfig: + runs-on: ubuntu-slim + steps: + - uses: actions/checkout@v6 + - uses: editorconfig-checker/action-editorconfig-checker@v2 + with: + version: v3.0.3 + - run: editorconfig-checker diff --git a/llama.cpp/.github/workflows/gguf-publish.yml b/llama.cpp/.github/workflows/gguf-publish.yml new file mode 100644 index 0000000..0e95766 --- /dev/null +++ b/llama.cpp/.github/workflows/gguf-publish.yml @@ -0,0 +1,44 @@ +# This workflow will upload a Python Package using Twine when a GGUF release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# See `gguf-py/README.md` for how to make a release. + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + workflow_dispatch: + push: + # Pattern matched against refs/tags + tags: + - 'gguf-v*' # Push events to every version tag + + +jobs: + deploy: + + runs-on: ubuntu-slim + + steps: + - uses: actions/checkout@v6 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.9.x' + - name: Install dependencies + run: | + cd gguf-py + python -m pip install poetry + poetry install + + - name: Build package + run: cd gguf-py && poetry build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + packages-dir: gguf-py/dist diff --git a/llama.cpp/.github/workflows/labeler.yml b/llama.cpp/.github/workflows/labeler.yml new file mode 100644 index 0000000..eab20c6 --- /dev/null +++ b/llama.cpp/.github/workflows/labeler.yml @@ -0,0 +1,17 @@ +name: "Pull Request Labeler" +on: +- pull_request_target + +jobs: + labeler: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-slim + steps: + - uses: actions/checkout@v6 + with: + repository: "ggml-org/llama.cpp" + - uses: actions/labeler@v6 + with: + configuration-path: '.github/labeler.yml' diff --git a/llama.cpp/.github/workflows/pre-tokenizer-hashes.yml b/llama.cpp/.github/workflows/pre-tokenizer-hashes.yml new file mode 100644 index 0000000..7126b62 --- /dev/null +++ b/llama.cpp/.github/workflows/pre-tokenizer-hashes.yml @@ -0,0 +1,45 @@ +name: Check Pre-Tokenizer Hashes + +on: + push: + paths: + - 'convert_hf_to_gguf.py' + - 'convert_hf_to_gguf_update.py' + pull_request: + paths: + - 'convert_hf_to_gguf.py' + - 'convert_hf_to_gguf_update.py' + +jobs: + pre-tokenizer-hashes: + runs-on: ubuntu-slim + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: Install Python dependencies + run: | + python3 -m venv .venv + .venv/bin/pip install -r requirements/requirements-convert_hf_to_gguf_update.txt + + - name: Update pre-tokenizer hashes + run: | + cp convert_hf_to_gguf.py /tmp + .venv/bin/python convert_hf_to_gguf_update.py --check-missing + + - name: Check if committed pre-tokenizer hashes matches generated version + run: | + if ! diff -q convert_hf_to_gguf.py /tmp/convert_hf_to_gguf.py; then + echo "Model pre-tokenizer hashes (in convert_hf_to_gguf.py) do not match generated hashes (from convert_hf_to_gguf_update.py)." + echo "To fix: run ./convert_hf_to_gguf_update.py and commit the updated convert_hf_to_gguf.py along with your changes" + echo "Differences found:" + diff convert_hf_to_gguf.py /tmp/convert_hf_to_gguf.py || true + exit 1 + fi + echo "Model pre-tokenizer hashes are up to date." diff --git a/llama.cpp/.github/workflows/python-check-requirements.yml b/llama.cpp/.github/workflows/python-check-requirements.yml new file mode 100644 index 0000000..1219b87 --- /dev/null +++ b/llama.cpp/.github/workflows/python-check-requirements.yml @@ -0,0 +1,33 @@ +name: Python check requirements.txt + +on: + push: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - '**/requirements*.txt' + pull_request: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - '**/requirements*.txt' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + python-check-requirements: + runs-on: ubuntu-slim + name: check-requirements + steps: + - name: Check out source repository + uses: actions/checkout@v6 + - name: Set up Python environment + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: Run check-requirements.sh script + run: bash scripts/check-requirements.sh diff --git a/llama.cpp/.github/workflows/python-lint.yml b/llama.cpp/.github/workflows/python-lint.yml new file mode 100644 index 0000000..8d1dd7a --- /dev/null +++ b/llama.cpp/.github/workflows/python-lint.yml @@ -0,0 +1,30 @@ +name: flake8 Lint + +on: + push: + branches: + - master + paths: ['.github/workflows/python-lint.yml', '**/*.py'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/python-lint.yml', '**/*.py'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + flake8-lint: + runs-on: ubuntu-slim + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v6 + - name: Set up Python environment + uses: actions/setup-python@v6 + with: + python-version: "3.11" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + plugins: "flake8-no-print" diff --git a/llama.cpp/.github/workflows/python-type-check.yml b/llama.cpp/.github/workflows/python-type-check.yml new file mode 100644 index 0000000..e801a9f --- /dev/null +++ b/llama.cpp/.github/workflows/python-type-check.yml @@ -0,0 +1,38 @@ +name: Python Type-Check + +on: + push: + paths: + - '.github/workflows/python-type-check.yml' + - 'pyrightconfig.json' + - '**.py' + - '**/requirements*.txt' + pull_request: + paths: + - '.github/workflows/python-type-check.yml' + - 'pyrightconfig.json' + - '**.py' + - '**/requirements*.txt' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + python-type-check: + runs-on: ubuntu-latest + name: pyright type-check + steps: + - name: Check out source repository + uses: actions/checkout@v6 + - name: Set up Python environment + uses: actions/setup-python@v6 + with: + python-version: "3.11" + pip-install: -r requirements/requirements-all.txt + - name: Type-check with Pyright + uses: jakebailey/pyright-action@v2 + with: + version: 1.1.382 + level: warning + warnings: true diff --git a/llama.cpp/.github/workflows/release.yml b/llama.cpp/.github/workflows/release.yml new file mode 100644 index 0000000..1914c08 --- /dev/null +++ b/llama.cpp/.github/workflows/release.yml @@ -0,0 +1,908 @@ +name: Release + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" + +jobs: + macOS-arm64: + runs-on: macos-14 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64 + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build \ + -DCMAKE_INSTALL_RPATH='@loader_path' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DGGML_RPC=ON \ + ${{ env.CMAKE_ARGS }} + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + id: pack_artifacts + run: | + cp LICENSE ./build/bin/ + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz + name: llama-bin-macos-arm64.tar.gz + + macOS-x64: + runs-on: macos-15-intel + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-x64 + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + sysctl -a + # Metal is disabled due to intermittent failures with Github runners not having a GPU: + # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 + cmake -B build \ + -DCMAKE_INSTALL_RPATH='@loader_path' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_METAL=OFF \ + -DGGML_RPC=ON \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + id: pack_artifacts + run: | + cp LICENSE ./build/bin/ + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -s ",./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz + name: llama-bin-macos-x64.tar.gz + + ubuntu-22-cpu: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-22.04 + - build: 's390x' + os: ubuntu-24.04-s390x + # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm + # - build: 'arm64' + # os: ubuntu-22.04-arm + + runs-on: ${{ matrix.os }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-cmake-${{ matrix.build }} + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DCMAKE_INSTALL_RPATH='$ORIGIN' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DGGML_BACKEND_DL=ON \ + -DGGML_NATIVE=OFF \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DLLAMA_FATAL_WARNINGS=ON \ + ${{ env.CMAKE_ARGS }} + cmake --build build --config Release -j $(nproc) + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + id: pack_artifacts + run: | + cp LICENSE ./build/bin/ + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz + name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz + + ubuntu-22-vulkan: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-vulkan + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libssl-dev + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DCMAKE_INSTALL_RPATH='$ORIGIN' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DGGML_BACKEND_DL=ON \ + -DGGML_NATIVE=OFF \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DGGML_VULKAN=ON \ + ${{ env.CMAKE_ARGS }} + cmake --build build --config Release -j $(nproc) + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + id: pack_artifacts + run: | + cp LICENSE ./build/bin/ + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz + name: llama-bin-ubuntu-vulkan-x64.tar.gz + + windows-cpu: + runs-on: windows-2025 + + strategy: + matrix: + include: + - arch: 'x64' + - arch: 'arm64' + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-cpu-${{ matrix.arch }} + variant: ccache + evict-old-files: 1d + + - name: Install Ninja + run: | + choco install ninja + + - name: Build + shell: cmd + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} + cmake -S . -B build -G "Ninja Multi-Config" ^ + -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ + -DGGML_OPENMP=ON ^ + ${{ env.CMAKE_ARGS }} + cmake --build build --config Release + + - name: Pack artifacts + id: pack_artifacts + run: | + Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ + 7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-bin-win-cpu-${{ matrix.arch }}.zip + name: llama-bin-win-cpu-${{ matrix.arch }}.zip + + windows: + runs-on: windows-2025 + + env: + OPENBLAS_VERSION: 0.3.23 + VULKAN_VERSION: 1.4.313.2 + + strategy: + matrix: + include: + - backend: 'vulkan' + arch: 'x64' + defines: '-DGGML_VULKAN=ON' + target: 'ggml-vulkan' + - backend: 'opencl-adreno' + arch: 'arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + target: 'ggml-opencl' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} + variant: ccache + evict-old-files: 1d + + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.backend == 'vulkan' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }} + run: | + git clone https://github.com/KhronosGroup/OpenCL-Headers + cd OpenCL-Headers + cmake -B build ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build --target install + git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader + cd OpenCL-ICD-Loader + cmake -B build-arm64-release ` + -A arm64 ` + -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build-arm64-release --target install --config release + + - name: Build + id: cmake_build + run: | + cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_BUILD_BORINGSSL=ON + cmake --build build --config Release --target ${{ matrix.target }} + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip + name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip + + windows-cuda: + runs-on: windows-2022 + + strategy: + matrix: + cuda: ['12.4', '13.1'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-cuda-${{ matrix.cuda }} + variant: ccache + evict-old-files: 1d + + - name: Install Cuda Toolkit + uses: ./.github/actions/windows-setup-cuda + with: + cuda_version: ${{ matrix.cuda }} + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Build + id: cmake_build + shell: cmd + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_CPU=OFF ^ + -DGGML_CUDA=ON ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ + -DGGML_CUDA_CUB_3DOT2=ON + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip + name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip + + - name: Copy and pack Cuda runtime + run: | + echo "Cuda install location: ${{ env.CUDA_PATH }}" + $dst='.\build\bin\cudart\' + robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* + + - name: Upload Cuda runtime + uses: actions/upload-artifact@v6 + with: + path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip + name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip + + windows-sycl: + runs-on: windows-2022 + + defaults: + run: + shell: bash + + env: + WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe + WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel + ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-sycl + variant: ccache + evict-old-files: 1d + + - name: Install + run: | + scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + + - name: Build + id: cmake_build + shell: cmd + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force + cmake -G "Ninja" -B build ^ + -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^ + -DCMAKE_BUILD_TYPE=Release ^ + -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^ + -DGGML_CPU=OFF -DGGML_SYCL=ON ^ + -DLLAMA_BUILD_BORINGSSL=ON + cmake --build build --target ggml-sycl -j + + - name: Build the release package + id: pack_artifacts + run: | + echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" + + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-fallback-bfloat16.spv" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-native-bfloat16.spv" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin + + cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin + cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin + + echo "cp oneAPI running time dll files to ./build/bin done" + 7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/* + + - name: Upload the release package + uses: actions/upload-artifact@v6 + with: + path: llama-bin-win-sycl-x64.zip + name: llama-bin-win-sycl-x64.zip + + windows-hip: + runs-on: windows-2022 + + env: + HIPSDK_INSTALLER_VERSION: "25.Q3" + + strategy: + matrix: + include: + - name: "radeon" + gpu_targets: "gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Grab rocWMMA package + id: grab_rocwmma + run: | + curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb" + 7z x rocwmma.deb + 7z x data.tar + + - name: Cache ROCm Installation + id: cache-rocm + uses: actions/cache@v5 + with: + path: C:\Program Files\AMD\ROCm + key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 + evict-old-files: 1d + + - name: Install ROCm + if: steps.cache-rocm.outputs.cache-hit != 'true' + id: depends + run: | + $ErrorActionPreference = "Stop" + write-host "Downloading AMD HIP SDK Installer" + Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" + write-host "Installing AMD HIP SDK" + $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru + $completed = $proc.WaitForExit(600000) + if (-not $completed) { + Write-Error "ROCm installation timed out after 10 minutes. Killing the process" + $proc.Kill() + exit 1 + } + if ($proc.ExitCode -ne 0) { + Write-Error "ROCm installation failed with exit code $($proc.ExitCode)" + exit 1 + } + write-host "Completed AMD HIP SDK installation" + + - name: Verify ROCm + id: verify + run: | + # Find and test ROCm installation + $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 + if (-not $clangPath) { + Write-Error "ROCm installation not found" + exit 1 + } + & $clangPath.FullName --version + + - name: Build + id: cmake_build + run: | + $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + cmake -G "Unix Makefiles" -B build -S . ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` + -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" ` + -DCMAKE_BUILD_TYPE=Release ` + -DGGML_BACKEND_DL=ON ` + -DGGML_NATIVE=OFF ` + -DGGML_CPU=OFF ` + -DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" ` + -DGGML_HIP_ROCWMMA_FATTN=ON ` + -DGGML_HIP=ON ` + -DLLAMA_BUILD_BORINGSSL=ON + cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS} + md "build\bin\rocblas\library\" + md "build\bin\hipblaslt\library" + cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\hipblaslt.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" + cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" + cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\" + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\* + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-bin-win-hip-${{ matrix.name }}-x64.zip + name: llama-bin-win-hip-${{ matrix.name }}-x64.zip + + ios-xcode-build: + runs-on: macos-15 + + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Setup Xcode + run: | + sudo xcode-select -s /Applications/Xcode_16.4.app + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build -G Xcode \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ + -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + + - name: xcodebuild for swift package + id: xcodebuild + run: | + ./build-xcframework.sh + + - name: Build Xcode project + run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + id: pack_artifacts + run: | + # Zip file is required for Swift Package Manager, which does not support tar.gz for binary targets. + # For more details, see https://developer.apple.com/documentation/xcode/distributing-binary-frameworks-as-swift-packages + zip -r -y llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-xcframework.zip + name: llama-${{ steps.tag.outputs.name }}-xcframework.zip + + + openEuler-cann: + strategy: + matrix: + include: + # 910b with aclgraph (both architectures) + - arch: x86 + chip_type: '910b' + build: 'Release' + use_acl_graph: 'on' + - arch: aarch64 + chip_type: '910b' + build: 'Release' + use_acl_graph: 'on' + # 310p without aclgraph (both architectures) + - arch: x86 + chip_type: '310p' + build: 'Release' + use_acl_graph: 'off' + - arch: aarch64 + chip_type: '310p' + build: 'Release' + use_acl_graph: 'off' + runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Free up disk space + uses: ggml-org/free-disk-space@v1.3.1 + with: + tool-cache: true + + - name: Set container image + id: cann-image + run: | + image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}" + echo "image=${image}" >> "${GITHUB_OUTPUT}" + + - name: Pull container image + run: docker pull "${{ steps.cann-image.outputs.image }}" + + - name: Build + env: + BUILD_TYPE: ${{ matrix.build }} + SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} + run: | + HOST_UID=$(id -u) + HOST_GID=$(id -g) + + docker run --rm \ + -v "${PWD}:/workspace" \ + -w /workspace \ + -e SOC_TYPE=${SOC_TYPE} \ + -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ + "${{ steps.cann-image.outputs.image }}" \ + bash -lc ' + set -e + yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel + yum clean all && rm -rf /var/cache/yum + git config --global --add safe.directory "/workspace" + export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DGGML_CANN=on \ + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} + cmake --build build -j $(nproc) + + chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build + ' + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Pack artifacts + run: | + cp LICENSE ./build/bin/ + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + + - name: Upload artifacts + uses: actions/upload-artifact@v6 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz + name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz + + release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + + # Fine-grant permission + # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token + permissions: + contents: write # for creating release + + runs-on: ubuntu-latest + + needs: + - windows + - windows-cpu + - windows-cuda + - windows-sycl + - windows-hip + - ubuntu-22-cpu + - ubuntu-22-vulkan + - macOS-arm64 + - macOS-x64 + - ios-xcode-build + - openEuler-cann + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Download artifacts + id: download-artifact + uses: actions/download-artifact@v7 + with: + path: ./artifact + merge-multiple: true + + - name: Move artifacts + id: move_artifacts + run: | + mkdir -p release + + echo "Adding CPU backend files to existing zips..." + for arch in x64 arm64; do + cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip" + temp_dir=$(mktemp -d) + echo "Extracting CPU backend for $arch..." + unzip "$cpu_zip" -d "$temp_dir" + + echo "Adding CPU files to $arch zips..." + for target_zip in artifact/llama-bin-win-*-${arch}.zip; do + if [[ "$target_zip" == "$cpu_zip" ]]; then + continue + fi + echo "Adding CPU backend to $(basename "$target_zip")" + realpath_target_zip=$(realpath "$target_zip") + (cd "$temp_dir" && zip -r "$realpath_target_zip" .) + done + + rm -rf "$temp_dir" + done + + echo "Renaming and moving zips to release..." + for zip_file in artifact/llama-bin-win-*.zip; do + base_name=$(basename "$zip_file" .zip) + zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip" + echo "Moving $zip_file to release/$zip_name" + mv "$zip_file" "release/$zip_name" + done + + echo "Moving other artifacts..." + mv -v artifact/*.zip release + mv -v artifact/*.tar.gz release + + - name: Create release + id: create_release + uses: ggml-org/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.name }} + body: | +
+ + ${{ github.event.head_commit.message }} + +
+ + **macOS/iOS:** + - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz) + - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz) + - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.zip) + + **Linux:** + - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz) + - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz) + - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz) + + **Windows:** + - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip) + - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip) + - [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip) + - [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.1-x64.zip) - [CUDA 13.1 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.1-x64.zip) + - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip) + - [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip) + - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip) + + **openEuler:** + - [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz) + - [openEuler x86 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86-aclgraph.tar.gz) + - [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz) + - [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz) + + - name: Upload release + id: upload_release + uses: actions/github-script@v8 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const path = require('path'); + const fs = require('fs'); + const release_id = '${{ steps.create_release.outputs.id }}'; + for (let file of await fs.readdirSync('./release')) { + if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) { + console.log('uploadReleaseAsset', file); + await github.rest.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: release_id, + name: file, + data: await fs.readFileSync(`./release/${file}`) + }); + } + } diff --git a/llama.cpp/.github/workflows/server-metal.yml b/llama.cpp/.github/workflows/server-metal.yml new file mode 100644 index 0000000..1d707be --- /dev/null +++ b/llama.cpp/.github/workflows/server-metal.yml @@ -0,0 +1,73 @@ +name: Server-Metal + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/server-metal.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server-metal: + runs-on: [self-hosted, macOS, ARM64] + + name: server-metal (${{ matrix.wf_name }}) + strategy: + matrix: + build_type: [Release] + wf_name: ["GPUx1"] + include: + - build_type: Release + extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "GPUx1, backend-sampling" + - build_type: Release + extra_args: "GGML_METAL_DEVICES=2" + wf_name: "GPUx2" + - build_type: Release + extra_args: "GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "GPUx2, backend-sampling" + fail-fast: false + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build -DGGML_SCHED_NO_REALLOC=ON + cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow" diff --git a/llama.cpp/.github/workflows/server-webui.yml b/llama.cpp/.github/workflows/server-webui.yml new file mode 100644 index 0000000..94899c9 --- /dev/null +++ b/llama.cpp/.github/workflows/server-webui.yml @@ -0,0 +1,99 @@ +# Server WebUI build and tests +name: Server WebUI + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + push: + branches: + - master + paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/server-webui.yml', 'tools/server/webui/**.*', 'tools/server/tests/**.*', 'tools/server/public/**'] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + webui-check: + name: WebUI Checks + runs-on: ubuntu-latest + continue-on-error: true + steps: + - name: Checkout code + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Setup Node.js + id: node + uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + cache-dependency-path: "tools/server/webui/package-lock.json" + + - name: Install dependencies + id: setup + if: ${{ steps.node.conclusion == 'success' }} + run: npm ci + working-directory: tools/server/webui + + - name: Run type checking + if: ${{ always() && steps.setup.conclusion == 'success' }} + run: npm run check + working-directory: tools/server/webui + + - name: Run linting + if: ${{ always() && steps.setup.conclusion == 'success' }} + run: npm run lint + working-directory: tools/server/webui + + - name: Build application + if: ${{ always() && steps.setup.conclusion == 'success' }} + run: npm run build + working-directory: tools/server/webui + + - name: Install Playwright browsers + id: playwright + if: ${{ always() && steps.setup.conclusion == 'success' }} + run: npx playwright install --with-deps + working-directory: tools/server/webui + + - name: Build Storybook + if: ${{ always() && steps.playwright.conclusion == 'success' }} + run: npm run build-storybook + working-directory: tools/server/webui + + - name: Run Client tests + if: ${{ always() && steps.playwright.conclusion == 'success' }} + run: npm run test:client + working-directory: tools/server/webui + + - name: Run Unit tests + if: ${{ always() && steps.playwright.conclusion == 'success' }} + run: npm run test:unit + working-directory: tools/server/webui + + - name: Run UI tests + if: ${{ always() && steps.playwright.conclusion == 'success' }} + run: npm run test:ui -- --testTimeout=60000 + working-directory: tools/server/webui + + - name: Run E2E tests + if: ${{ always() && steps.playwright.conclusion == 'success' }} + run: npm run test:e2e + working-directory: tools/server/webui diff --git a/llama.cpp/.github/workflows/server.yml b/llama.cpp/.github/workflows/server.yml new file mode 100644 index 0000000..99d0522 --- /dev/null +++ b/llama.cpp/.github/workflows/server.yml @@ -0,0 +1,147 @@ +# Server build and tests +name: Server + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] + +env: + LLAMA_LOG_COLORS: 1 + LLAMA_LOG_PREFIX: 1 + LLAMA_LOG_TIMESTAMPS: 1 + LLAMA_LOG_VERBOSITY: 10 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server: + runs-on: ubuntu-latest + + strategy: + matrix: + sanitizer: [ADDRESS, UNDEFINED] # THREAD is very slow + build_type: [RelWithDebInfo] + include: + - build_type: Release + sanitizer: "" + extra_args: "" + - build_type: Release + sanitizer: "" + extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + fail-fast: false + + steps: + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + xxd \ + git \ + cmake \ + curl \ + wget \ + language-pack-en \ + libssl-dev + + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_BUILD_BORINGSSL=ON \ + -DGGML_SCHED_NO_REALLOC=ON \ + -DGGML_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DGGML_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DGGML_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} \ + -DLLAMA_SANITIZE_ADDRESS=${{ matrix.sanitizer == 'ADDRESS' }} \ + -DLLAMA_SANITIZE_THREAD=${{ matrix.sanitizer == 'THREAD' }} \ + -DLLAMA_SANITIZE_UNDEFINED=${{ matrix.sanitizer == 'UNDEFINED' }} + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + pip-install: -r tools/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow" + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd tools/server/tests + export ${{ matrix.extra_args }} + SLOW_TESTS=1 pytest -v -x + + server-windows: + runs-on: windows-2022 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build -DLLAMA_BUILD_BORINGSSL=ON -DGGML_SCHED_NO_REALLOC=ON + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v6 + with: + python-version: '3.11' + pip-install: -r tools/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} + run: | + cd tools/server/tests + $env:PYTHONIOENCODING = ":replace" + pytest -v -x -m "not slow" + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd tools/server/tests + $env:SLOW_TESTS = "1" + pytest -v -x diff --git a/llama.cpp/.github/workflows/update-ops-docs.yml b/llama.cpp/.github/workflows/update-ops-docs.yml new file mode 100644 index 0000000..2ab06eb --- /dev/null +++ b/llama.cpp/.github/workflows/update-ops-docs.yml @@ -0,0 +1,42 @@ +name: Update Operations Documentation + +on: + push: + paths: + - 'docs/ops.md' + - 'docs/ops/**' + - 'scripts/create_ops_docs.py' + pull_request: + paths: + - 'docs/ops.md' + - 'docs/ops/**' + - 'scripts/create_ops_docs.py' + +jobs: + update-ops-docs: + runs-on: ubuntu-slim + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.x' + + - name: Generate operations documentation to temporary file + run: | + mkdir -p /tmp/ops_check + ./scripts/create_ops_docs.py /tmp/ops_check/ops.md + + - name: Check if docs/ops.md matches generated version + run: | + if ! diff -q docs/ops.md /tmp/ops_check/ops.md; then + echo "Operations documentation (docs/ops.md) is not up to date with the backend CSV files." + echo "To fix: run ./scripts/create_ops_docs.py and commit the updated docs/ops.md along with your changes" + echo "Differences found:" + diff docs/ops.md /tmp/ops_check/ops.md || true + exit 1 + fi + echo "Operations documentation is up to date." diff --git a/llama.cpp/.github/workflows/winget.yml b/llama.cpp/.github/workflows/winget.yml new file mode 100644 index 0000000..2047c27 --- /dev/null +++ b/llama.cpp/.github/workflows/winget.yml @@ -0,0 +1,44 @@ +name: Update Winget Package + +on: + workflow_dispatch: # allows manual triggering + schedule: + - cron: '28 5 * * *' # Update every day at 5:28 UTC + +jobs: + update: + name: Update Winget Package + runs-on: ubuntu-latest + if: github.repository_owner == 'ggml-org' + + steps: + - name: Install cargo binstall + uses: cargo-bins/cargo-binstall@268643a6b5ea099f5718ee5cd3ff7dc89a5eb49b + + - name: Install komac + run: | + cargo binstall komac@2.11.2 -y + + - name: Find latest release + id: find_latest_release + uses: actions/github-script@v8 + with: + script: | + const { data: releases } = await github.rest.repos.listReleases({ + owner: context.repo.owner, + repo: context.repo.repo, + }); + const { tag_name: version, assets: assets } = releases.find(({assets}) => assets.find(asset => asset.name.includes('win-vulkan'))); + const { browser_download_url: asset_url } = assets.find(asset => asset.name.includes('win-vulkan')); + console.log("Latest release:", version); + core.setOutput('VERSION', version); + core.setOutput('ASSETURL', asset_url); + + - name: Update manifest + run: | + echo "Updating manifest..." + komac update --version ${{ steps.find_latest_release.outputs.VERSION }} \ + --urls "${{ steps.find_latest_release.outputs.ASSETURL }}" \ + --token ${{ secrets.WINGET_GITHUB_TOKEN }} \ + --submit \ + ggml.llamacpp -- cgit v1.2.3