llmnpc - llama.cpp/.devops/rocm.Dockerfile

Path: llmnpc / llama.cpp / .devops / rocm.Dockerfile (raw)
  1ARG UBUNTU_VERSION=24.04
  2
  3# This needs to generally match the container host's environment.
  4ARG ROCM_VERSION=7.0
  5ARG AMDGPU_VERSION=7.0
  6
  7# Target the ROCm build image
  8ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
  9
 10### Build image
 11FROM ${BASE_ROCM_DEV_CONTAINER} AS build
 12
 13# Unless otherwise specified, we make a fat build.
 14# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
 15# This is mostly tied to rocBLAS supported archs.
 16# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
 17# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
 18
 19ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
 20#ARG ROCM_DOCKER_ARCH='gfx1151'
 21
 22# Set ROCm architectures
 23ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
 24
 25RUN apt-get update \
 26    && apt-get install -y \
 27    build-essential \
 28    cmake \
 29    git \
 30    libssl-dev \
 31    curl \
 32    libgomp1
 33
 34WORKDIR /app
 35
 36COPY . .
 37
 38RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
 39    cmake -S . -B build \
 40        -DGGML_HIP=ON \
 41        -DGGML_HIP_ROCWMMA_FATTN=ON \
 42        -DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
 43        -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
 44        -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
 45    && cmake --build build --config Release -j$(nproc)
 46
 47RUN mkdir -p /app/lib \
 48    && find build -name "*.so*" -exec cp -P {} /app/lib \;
 49
 50RUN mkdir -p /app/full \
 51    && cp build/bin/* /app/full \
 52    && cp *.py /app/full \
 53    && cp -r gguf-py /app/full \
 54    && cp -r requirements /app/full \
 55    && cp requirements.txt /app/full \
 56    && cp .devops/tools.sh /app/full/tools.sh
 57
 58## Base image
 59FROM ${BASE_ROCM_DEV_CONTAINER} AS base
 60
 61RUN apt-get update \
 62    && apt-get install -y libgomp1 curl\
 63    && apt autoremove -y \
 64    && apt clean -y \
 65    && rm -rf /tmp/* /var/tmp/* \
 66    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
 67    && find /var/cache -type f -delete
 68
 69COPY --from=build /app/lib/ /app
 70
 71### Full
 72FROM base AS full
 73
 74COPY --from=build /app/full /app
 75
 76WORKDIR /app
 77
 78RUN apt-get update \
 79    && apt-get install -y \
 80    git \
 81    python3-pip \
 82    python3 \
 83    python3-wheel\
 84    && pip install --break-system-packages --upgrade setuptools \
 85    && pip install --break-system-packages -r requirements.txt \
 86    && apt autoremove -y \
 87    && apt clean -y \
 88    && rm -rf /tmp/* /var/tmp/* \
 89    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
 90    && find /var/cache -type f -delete
 91
 92ENTRYPOINT ["/app/tools.sh"]
 93
 94### Light, CLI only
 95FROM base AS light
 96
 97COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
 98
 99WORKDIR /app
100
101ENTRYPOINT [ "/app/llama-cli" ]
102
103### Server, Server only
104FROM base AS server
105
106ENV LLAMA_ARG_HOST=0.0.0.0
107
108COPY --from=build /app/full/llama-server /app
109
110WORKDIR /app
111
112HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
113
114ENTRYPOINT [ "/app/llama-server" ]