llmnpc - llama.cpp/.github/workflows/server-metal.yml

Path: llmnpc / llama.cpp / .github / workflows / server-metal.yml (raw)
 1name: Server-Metal
 2
 3on:
 4  workflow_dispatch: # allows manual triggering
 5    inputs:
 6      sha:
 7        description: 'Commit SHA1 to build'
 8        required: false
 9        type: string
10      slow_tests:
11        description: 'Run slow tests'
12        required: true
13        type: boolean
14  push:
15    branches:
16      - master
17    paths: ['.github/workflows/server-metal.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
18
19env:
20  LLAMA_LOG_COLORS: 1
21  LLAMA_LOG_PREFIX: 1
22  LLAMA_LOG_TIMESTAMPS: 1
23  LLAMA_LOG_VERBOSITY: 10
24
25concurrency:
26  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
27  cancel-in-progress: true
28
29jobs:
30  server-metal:
31    runs-on: [self-hosted, macOS, ARM64]
32
33    name: server-metal (${{ matrix.wf_name }})
34    strategy:
35      matrix:
36        build_type: [Release]
37        wf_name: ["GPUx1"]
38        include:
39          - build_type: Release
40            extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
41            wf_name:    "GPUx1, backend-sampling"
42          - build_type: Release
43            extra_args: "GGML_METAL_DEVICES=2"
44            wf_name:    "GPUx2"
45          - build_type: Release
46            extra_args: "GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1"
47            wf_name:    "GPUx2, backend-sampling"
48      fail-fast: false
49
50    steps:
51      - name: Clone
52        id: checkout
53        uses: actions/checkout@v6
54        with:
55          fetch-depth: 0
56          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
57
58      - name: Build
59        id: cmake_build
60        run: |
61          cmake -B build -DGGML_SCHED_NO_REALLOC=ON
62          cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
63
64      - name: Tests
65        id: server_integration_tests
66        if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
67        run: |
68          cd tools/server/tests
69          python3 -m venv venv
70          source venv/bin/activate
71          pip install -r requirements.txt
72          export ${{ matrix.extra_args }}
73          pytest -v -x -m "not slow"