1ARG GCC_VERSION=15.2.0
  2ARG UBUNTU_VERSION=24.04
  3
  4### Build Llama.cpp stage
  5FROM gcc:${GCC_VERSION} AS build
  6
  7RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
  8    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
  9    apt update -y && \
 10    apt upgrade -y && \
 11    apt install -y --no-install-recommends \
 12        git cmake ccache ninja-build \
 13        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
 14        libopenblas-dev libssl-dev && \
 15    rm -rf /var/lib/apt/lists/*
 16
 17WORKDIR /app
 18COPY . .
 19
 20RUN --mount=type=cache,target=/root/.ccache \
 21    --mount=type=cache,target=/app/build \
 22    cmake -S . -B build -G Ninja \
 23        -DCMAKE_BUILD_TYPE=Release \
 24        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
 25        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
 26        -DLLAMA_BUILD_TESTS=OFF \
 27        -DGGML_NATIVE=OFF \
 28        -DGGML_BACKEND_DL=ON \
 29        -DGGML_CPU_ALL_VARIANTS=ON \
 30        -DGGML_BLAS=ON \
 31        -DGGML_BLAS_VENDOR=OpenBLAS && \
 32    cmake --build build --config Release -j $(nproc) && \
 33    cmake --install build --prefix /opt/llama.cpp
 34
 35COPY *.py             /opt/llama.cpp/bin
 36COPY .devops/tools.sh /opt/llama.cpp/bin
 37
 38COPY gguf-py          /opt/llama.cpp/gguf-py
 39COPY requirements.txt /opt/llama.cpp/gguf-py
 40COPY requirements     /opt/llama.cpp/gguf-py/requirements
 41
 42
 43### Collect all llama.cpp binaries, libraries and distro libraries
 44FROM scratch AS collector
 45
 46# Copy llama.cpp binaries and libraries
 47COPY --from=build /opt/llama.cpp/bin     /llama.cpp/bin
 48COPY --from=build /opt/llama.cpp/lib     /llama.cpp/lib
 49COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
 50
 51
 52### Base image
 53FROM ubuntu:${UBUNTU_VERSION} AS base
 54
 55RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
 56    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
 57    apt update -y && \
 58    apt install -y --no-install-recommends \
 59        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
 60        # See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
 61        curl libgomp1 libopenblas-dev && \
 62    apt autoremove -y && \
 63    apt clean -y && \
 64    rm -rf /tmp/* /var/tmp/* && \
 65    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
 66    find /var/cache -type f -delete
 67
 68# Copy llama.cpp libraries
 69COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
 70
 71
 72### Full
 73FROM base AS full
 74
 75ENV PATH="/root/.cargo/bin:${PATH}"
 76WORKDIR /app
 77
 78RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
 79    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
 80    apt update -y && \
 81    apt install -y \
 82        git cmake libjpeg-dev \
 83        python3 python3-pip python3-dev && \
 84    apt autoremove -y && \
 85    apt clean -y && \
 86    rm -rf /tmp/* /var/tmp/* && \
 87    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
 88    find /var/cache -type f -delete
 89
 90RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 91
 92COPY --from=collector /llama.cpp/bin /app
 93COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
 94
 95RUN pip install --no-cache-dir --break-system-packages \
 96        -r /app/gguf-py/requirements.txt
 97
 98ENTRYPOINT [ "/app/tools.sh" ]
 99
100
101### CLI Only
102FROM base AS light
103
104WORKDIR /llama.cpp/bin
105
106# Copy llama.cpp binaries and libraries
107COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109
110ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111
112
113### Server
114FROM base AS server
115
116ENV LLAMA_ARG_HOST=0.0.0.0
117
118WORKDIR /llama.cpp/bin
119
120# Copy llama.cpp binaries and libraries
121COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
122COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
123
124EXPOSE 8080
125
126ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]