1{
2 lib,
3 glibc,
4 config,
5 stdenv,
6 runCommand,
7 cmake,
8 ninja,
9 pkg-config,
10 git,
11 mpi,
12 blas,
13 cudaPackages,
14 autoAddDriverRunpath,
15 darwin,
16 rocmPackages,
17 vulkan-headers,
18 vulkan-loader,
19 curl,
20 shaderc,
21 useBlas ?
22 builtins.all (x: !x) [
23 useCuda
24 useMetalKit
25 useRocm
26 useVulkan
27 ]
28 && blas.meta.available,
29 useCuda ? config.cudaSupport,
30 useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31 # Increases the runtime closure size by ~700M
32 useMpi ? false,
33 useRocm ? config.rocmSupport,
34 rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35 useVulkan ? false,
36 useRpc ? false,
37 llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
38
39 # It's necessary to consistently use backendStdenv when building with CUDA support,
40 # otherwise we get libstdc++ errors downstream.
41 effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
42 enableStatic ? effectiveStdenv.hostPlatform.isStatic,
43 precompileMetalShaders ? false,
44}:
45
46let
47 inherit (lib)
48 cmakeBool
49 cmakeFeature
50 optionalAttrs
51 optionals
52 strings
53 ;
54
55 stdenv = throw "Use effectiveStdenv instead";
56
57 suffices =
58 lib.optionals useBlas [ "BLAS" ]
59 ++ lib.optionals useCuda [ "CUDA" ]
60 ++ lib.optionals useMetalKit [ "MetalKit" ]
61 ++ lib.optionals useMpi [ "MPI" ]
62 ++ lib.optionals useRocm [ "ROCm" ]
63 ++ lib.optionals useVulkan [ "Vulkan" ];
64
65 pnameSuffix =
66 strings.optionalString (suffices != [ ])
67 "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
68 descriptionSuffix = strings.optionalString (
69 suffices != [ ]
70 ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
71
72 xcrunHost = runCommand "xcrunHost" { } ''
73 mkdir -p $out/bin
74 ln -s /usr/bin/xcrun $out/bin
75 '';
76
77 # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
78 # separately
79 darwinBuildInputs =
80 with darwin.apple_sdk.frameworks;
81 [
82 Accelerate
83 CoreVideo
84 CoreGraphics
85 ]
86 ++ optionals useMetalKit [ MetalKit ];
87
88 cudaBuildInputs = with cudaPackages; [
89 cuda_cudart
90 cuda_cccl # <nv/target>
91 libcublas
92 ];
93
94 rocmBuildInputs = with rocmPackages; [
95 clr
96 hipblas
97 rocblas
98 ];
99
100 vulkanBuildInputs = [
101 vulkan-headers
102 vulkan-loader
103 shaderc
104 ];
105in
106
107effectiveStdenv.mkDerivation (finalAttrs: {
108 pname = "llama-cpp${pnameSuffix}";
109 version = llamaVersion;
110
111 # Note: none of the files discarded here are visible in the sandbox or
112 # affect the output hash. This also means they can be modified without
113 # triggering a rebuild.
114 src = lib.cleanSourceWith {
115 filter =
116 name: type:
117 let
118 noneOf = builtins.all (x: !x);
119 baseName = baseNameOf name;
120 in
121 noneOf [
122 (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
123 (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
124 (lib.hasPrefix "." baseName) # Skip hidden files and directories
125 (baseName == "flake.lock")
126 ];
127 src = lib.cleanSource ../../.;
128 };
129
130 postPatch = ''
131 '';
132
133 # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
134 # `default.metallib` may be compiled with Metal compiler from XCode
135 # and we need to escape sandbox on MacOS to access Metal compiler.
136 # `xcrun` is used find the path of the Metal compiler, which is varible
137 # and not on $PATH
138 # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
139 __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
140
141 nativeBuildInputs =
142 [
143 cmake
144 ninja
145 pkg-config
146 git
147 ]
148 ++ optionals useCuda [
149 cudaPackages.cuda_nvcc
150
151 autoAddDriverRunpath
152 ]
153 ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
154 ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
155
156 buildInputs =
157 optionals effectiveStdenv.isDarwin darwinBuildInputs
158 ++ optionals useCuda cudaBuildInputs
159 ++ optionals useMpi [ mpi ]
160 ++ optionals useRocm rocmBuildInputs
161 ++ optionals useBlas [ blas ]
162 ++ optionals useVulkan vulkanBuildInputs;
163
164 cmakeFlags =
165 [
166 (cmakeBool "LLAMA_BUILD_SERVER" true)
167 (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
168 (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
169 (cmakeBool "GGML_NATIVE" false)
170 (cmakeBool "GGML_BLAS" useBlas)
171 (cmakeBool "GGML_CUDA" useCuda)
172 (cmakeBool "GGML_HIP" useRocm)
173 (cmakeBool "GGML_METAL" useMetalKit)
174 (cmakeBool "GGML_VULKAN" useVulkan)
175 (cmakeBool "GGML_STATIC" enableStatic)
176 (cmakeBool "GGML_RPC" useRpc)
177 ]
178 ++ optionals useCuda [
179 (
180 with cudaPackages.flags;
181 cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
182 builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
183 )
184 )
185 ]
186 ++ optionals useRocm [
187 (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
188 (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
189 ]
190 ++ optionals useMetalKit [
191 (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
192 (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
193 ];
194
195 # Environment variables needed for ROCm
196 env = optionalAttrs useRocm {
197 ROCM_PATH = "${rocmPackages.clr}";
198 HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
199 };
200
201 # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
202 # if they haven't been added yet.
203 postInstall = ''
204 mkdir -p $out/include
205 cp $src/include/llama.h $out/include/
206 '';
207
208 meta = {
209 # Configurations we don't want even the CI to evaluate. Results in the
210 # "unsupported platform" messages. This is mostly a no-op, because
211 # cudaPackages would've refused to evaluate anyway.
212 badPlatforms = optionals useCuda lib.platforms.darwin;
213
214 # Configurations that are known to result in build failures. Can be
215 # overridden by importing Nixpkgs with `allowBroken = true`.
216 broken = (useMetalKit && !effectiveStdenv.isDarwin);
217
218 description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
219 homepage = "https://github.com/ggml-org/llama.cpp/";
220 license = lib.licenses.mit;
221
222 # Accommodates `nix run` and `lib.getExe`
223 mainProgram = "llama-cli";
224
225 # These people might respond, on the best effort basis, if you ping them
226 # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
227 # Consider adding yourself to this list if you want to ensure this flake
228 # stays maintained and you're willing to invest your time. Do not add
229 # other people without their consent. Consider removing people after
230 # they've been unreachable for long periods of time.
231
232 # Note that lib.maintainers is defined in Nixpkgs, but you may just add
233 # an attrset following the same format as in
234 # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
235 maintainers = with lib.maintainers; [
236 philiptaron
237 SomeoneSerge
238 ];
239
240 # Extend `badPlatforms` instead
241 platforms = lib.platforms.all;
242 };
243})