1# ==============================================================================
2# ARGUMENTS
3# ==============================================================================
4
5# Define the CANN base image for easier version updates later
6ARG CHIP_TYPE=910b
7ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
8
9# ==============================================================================
10# BUILD STAGE
11# Compile all binary files and libraries
12# ==============================================================================
13FROM ${CANN_BASE_IMAGE} AS build
14
15# -- Install build dependencies --
16RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
17 yum clean all && \
18 rm -rf /var/cache/yum
19
20# -- Set the working directory --
21WORKDIR /app
22
23# -- Copy project files --
24COPY . .
25
26# -- Set CANN environment variables (required for compilation) --
27# Using ENV instead of `source` allows environment variables to persist across the entire image layer
28ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
29ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
30ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
31ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
32ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
33# ... You can add other environment variables from the original file as needed ...
34# For brevity, only core variables are listed here. You can paste the original ENV list here.
35
36# -- Build llama.cpp --
37# Use the passed CHIP_TYPE argument and add general build options
38ARG CHIP_TYPE
39RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
40 && \
41 cmake -B build \
42 -DGGML_CANN=ON \
43 -DCMAKE_BUILD_TYPE=Release \
44 -DSOC_TYPE=ascend${CHIP_TYPE} \
45 -DUSE_ACL_GRAPH=ON \
46 . && \
47 cmake --build build --config Release -j$(nproc)
48
49# -- Organize build artifacts for copying in later stages --
50# Create a lib directory to store all .so files
51RUN mkdir -p /app/lib && \
52 find build -name "*.so*" -exec cp -P {} /app/lib \;
53
54# Create a full directory to store all executables and Python scripts
55RUN mkdir -p /app/full && \
56 cp build/bin/* /app/full/ && \
57 cp *.py /app/full/ && \
58 cp -r gguf-py /app/full/ && \
59 cp -r requirements /app/full/ && \
60 cp requirements.txt /app/full/
61 # If you have a tools.sh script, make sure it is copied here
62 # cp .devops/tools.sh /app/full/tools.sh
63
64# ==============================================================================
65# BASE STAGE
66# Create a minimal base image with CANN runtime and common libraries
67# ==============================================================================
68FROM ${CANN_BASE_IMAGE} AS base
69
70# -- Install runtime dependencies --
71RUN yum install -y libgomp curl && \
72 yum clean all && \
73 rm -rf /var/cache/yum
74
75# -- Set CANN environment variables (required for runtime) --
76ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
77ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
78ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
79ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
80# ... You can add other environment variables from the original file as needed ...
81
82WORKDIR /app
83
84# Copy compiled .so files from the build stage
85COPY --from=build /app/lib/ /app
86
87# ==============================================================================
88# FINAL STAGES (TARGETS)
89# ==============================================================================
90
91### Target: full
92# Complete image with all tools, Python bindings, and dependencies
93# ==============================================================================
94FROM base AS full
95
96COPY --from=build /app/full /app
97
98# Install Python dependencies
99RUN yum install -y git python3 python3-pip && \
100 pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
101 pip3 install --no-cache-dir -r requirements.txt && \
102 yum clean all && \
103 rm -rf /var/cache/yum
104
105# You need to provide a tools.sh script as the entrypoint
106ENTRYPOINT ["/app/tools.sh"]
107# If there is no tools.sh, you can set the default to start the server
108# ENTRYPOINT ["/app/llama-server"]
109
110### Target: light
111# Lightweight image containing only llama-cli and llama-completion
112# ==============================================================================
113FROM base AS light
114
115COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
116
117ENTRYPOINT [ "/app/llama-cli" ]
118
119### Target: server
120# Dedicated server image containing only llama-server
121# ==============================================================================
122FROM base AS server
123
124ENV LLAMA_ARG_HOST=0.0.0.0
125
126COPY --from=build /app/full/llama-server /app
127
128HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
129
130ENTRYPOINT [ "/app/llama-server" ]