1ARG GCC_VERSION=15.2.0
2ARG UBUNTU_VERSION=24.04
3
4### Build Llama.cpp stage
5FROM gcc:${GCC_VERSION} AS build
6
7RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
8 --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
9 apt update -y && \
10 apt upgrade -y && \
11 apt install -y --no-install-recommends \
12 git cmake ccache ninja-build \
13 # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
14 libopenblas-dev libssl-dev && \
15 rm -rf /var/lib/apt/lists/*
16
17WORKDIR /app
18COPY . .
19
20RUN --mount=type=cache,target=/root/.ccache \
21 --mount=type=cache,target=/app/build \
22 cmake -S . -B build -G Ninja \
23 -DCMAKE_BUILD_TYPE=Release \
24 -DCMAKE_C_COMPILER_LAUNCHER=ccache \
25 -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
26 -DLLAMA_BUILD_TESTS=OFF \
27 -DGGML_NATIVE=OFF \
28 -DGGML_BACKEND_DL=ON \
29 -DGGML_CPU_ALL_VARIANTS=ON \
30 -DGGML_BLAS=ON \
31 -DGGML_BLAS_VENDOR=OpenBLAS && \
32 cmake --build build --config Release -j $(nproc) && \
33 cmake --install build --prefix /opt/llama.cpp
34
35COPY *.py /opt/llama.cpp/bin
36COPY .devops/tools.sh /opt/llama.cpp/bin
37
38COPY gguf-py /opt/llama.cpp/gguf-py
39COPY requirements.txt /opt/llama.cpp/gguf-py
40COPY requirements /opt/llama.cpp/gguf-py/requirements
41
42
43### Collect all llama.cpp binaries, libraries and distro libraries
44FROM scratch AS collector
45
46# Copy llama.cpp binaries and libraries
47COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
48COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
49COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
50
51
52### Base image
53FROM ubuntu:${UBUNTU_VERSION} AS base
54
55RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
56 --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
57 apt update -y && \
58 apt install -y --no-install-recommends \
59 # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
60 # See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
61 curl libgomp1 libopenblas-dev && \
62 apt autoremove -y && \
63 apt clean -y && \
64 rm -rf /tmp/* /var/tmp/* && \
65 find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
66 find /var/cache -type f -delete
67
68# Copy llama.cpp libraries
69COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
70
71
72### Full
73FROM base AS full
74
75ENV PATH="/root/.cargo/bin:${PATH}"
76WORKDIR /app
77
78RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
79 --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
80 apt update -y && \
81 apt install -y \
82 git cmake libjpeg-dev \
83 python3 python3-pip python3-dev && \
84 apt autoremove -y && \
85 apt clean -y && \
86 rm -rf /tmp/* /var/tmp/* && \
87 find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
88 find /var/cache -type f -delete
89
90RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
91
92COPY --from=collector /llama.cpp/bin /app
93COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
94
95RUN pip install --no-cache-dir --break-system-packages \
96 -r /app/gguf-py/requirements.txt
97
98ENTRYPOINT [ "/app/tools.sh" ]
99
100
101### CLI Only
102FROM base AS light
103
104WORKDIR /llama.cpp/bin
105
106# Copy llama.cpp binaries and libraries
107COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109
110ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111
112
113### Server
114FROM base AS server
115
116ENV LLAMA_ARG_HOST=0.0.0.0
117
118WORKDIR /llama.cpp/bin
119
120# Copy llama.cpp binaries and libraries
121COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
122COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
123
124EXPOSE 8080
125
126ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]