1{
  2  lib,
  3  glibc,
  4  config,
  5  stdenv,
  6  runCommand,
  7  cmake,
  8  ninja,
  9  pkg-config,
 10  git,
 11  mpi,
 12  blas,
 13  cudaPackages,
 14  autoAddDriverRunpath,
 15  darwin,
 16  rocmPackages,
 17  vulkan-headers,
 18  vulkan-loader,
 19  curl,
 20  shaderc,
 21  useBlas ?
 22    builtins.all (x: !x) [
 23      useCuda
 24      useMetalKit
 25      useRocm
 26      useVulkan
 27    ]
 28    && blas.meta.available,
 29  useCuda ? config.cudaSupport,
 30  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
 31  # Increases the runtime closure size by ~700M
 32  useMpi ? false,
 33  useRocm ? config.rocmSupport,
 34  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
 35  useVulkan ? false,
 36  useRpc ? false,
 37  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
 38
 39  # It's necessary to consistently use backendStdenv when building with CUDA support,
 40  # otherwise we get libstdc++ errors downstream.
 41  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
 42  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
 43  precompileMetalShaders ? false,
 44}:
 45
 46let
 47  inherit (lib)
 48    cmakeBool
 49    cmakeFeature
 50    optionalAttrs
 51    optionals
 52    strings
 53    ;
 54
 55  stdenv = throw "Use effectiveStdenv instead";
 56
 57  suffices =
 58    lib.optionals useBlas [ "BLAS" ]
 59    ++ lib.optionals useCuda [ "CUDA" ]
 60    ++ lib.optionals useMetalKit [ "MetalKit" ]
 61    ++ lib.optionals useMpi [ "MPI" ]
 62    ++ lib.optionals useRocm [ "ROCm" ]
 63    ++ lib.optionals useVulkan [ "Vulkan" ];
 64
 65  pnameSuffix =
 66    strings.optionalString (suffices != [ ])
 67      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
 68  descriptionSuffix = strings.optionalString (
 69    suffices != [ ]
 70  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
 71
 72  xcrunHost = runCommand "xcrunHost" { } ''
 73    mkdir -p $out/bin
 74    ln -s /usr/bin/xcrun $out/bin
 75  '';
 76
 77  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
 78  # separately
 79  darwinBuildInputs =
 80    with darwin.apple_sdk.frameworks;
 81    [
 82      Accelerate
 83      CoreVideo
 84      CoreGraphics
 85    ]
 86    ++ optionals useMetalKit [ MetalKit ];
 87
 88  cudaBuildInputs = with cudaPackages; [
 89    cuda_cudart
 90    cuda_cccl # <nv/target>
 91    libcublas
 92  ];
 93
 94  rocmBuildInputs = with rocmPackages; [
 95    clr
 96    hipblas
 97    rocblas
 98  ];
 99
100  vulkanBuildInputs = [
101    vulkan-headers
102    vulkan-loader
103    shaderc
104  ];
105in
106
107effectiveStdenv.mkDerivation (finalAttrs: {
108  pname = "llama-cpp${pnameSuffix}";
109  version = llamaVersion;
110
111  # Note: none of the files discarded here are visible in the sandbox or
112  # affect the output hash. This also means they can be modified without
113  # triggering a rebuild.
114  src = lib.cleanSourceWith {
115    filter =
116      name: type:
117      let
118        noneOf = builtins.all (x: !x);
119        baseName = baseNameOf name;
120      in
121      noneOf [
122        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
123        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
124        (lib.hasPrefix "." baseName) # Skip hidden files and directories
125        (baseName == "flake.lock")
126      ];
127    src = lib.cleanSource ../../.;
128  };
129
130  postPatch = ''
131  '';
132
133  # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
134  # `default.metallib` may be compiled with Metal compiler from XCode
135  # and we need to escape sandbox on MacOS to access Metal compiler.
136  # `xcrun` is used find the path of the Metal compiler, which is varible
137  # and not on $PATH
138  # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
139  __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
140
141  nativeBuildInputs =
142    [
143      cmake
144      ninja
145      pkg-config
146      git
147    ]
148    ++ optionals useCuda [
149      cudaPackages.cuda_nvcc
150
151      autoAddDriverRunpath
152    ]
153    ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
154    ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
155
156  buildInputs =
157    optionals effectiveStdenv.isDarwin darwinBuildInputs
158    ++ optionals useCuda cudaBuildInputs
159    ++ optionals useMpi [ mpi ]
160    ++ optionals useRocm rocmBuildInputs
161    ++ optionals useBlas [ blas ]
162    ++ optionals useVulkan vulkanBuildInputs;
163
164  cmakeFlags =
165    [
166      (cmakeBool "LLAMA_BUILD_SERVER" true)
167      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
168      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
169      (cmakeBool "GGML_NATIVE" false)
170      (cmakeBool "GGML_BLAS" useBlas)
171      (cmakeBool "GGML_CUDA" useCuda)
172      (cmakeBool "GGML_HIP" useRocm)
173      (cmakeBool "GGML_METAL" useMetalKit)
174      (cmakeBool "GGML_VULKAN" useVulkan)
175      (cmakeBool "GGML_STATIC" enableStatic)
176      (cmakeBool "GGML_RPC" useRpc)
177    ]
178    ++ optionals useCuda [
179      (
180        with cudaPackages.flags;
181        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
182          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
183        )
184      )
185    ]
186    ++ optionals useRocm [
187      (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
188      (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
189    ]
190    ++ optionals useMetalKit [
191      (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
192      (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
193    ];
194
195  # Environment variables needed for ROCm
196  env = optionalAttrs useRocm {
197    ROCM_PATH = "${rocmPackages.clr}";
198    HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
199  };
200
201  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
202  # if they haven't been added yet.
203  postInstall = ''
204    mkdir -p $out/include
205    cp $src/include/llama.h $out/include/
206  '';
207
208  meta = {
209    # Configurations we don't want even the CI to evaluate. Results in the
210    # "unsupported platform" messages. This is mostly a no-op, because
211    # cudaPackages would've refused to evaluate anyway.
212    badPlatforms = optionals useCuda lib.platforms.darwin;
213
214    # Configurations that are known to result in build failures. Can be
215    # overridden by importing Nixpkgs with `allowBroken = true`.
216    broken = (useMetalKit && !effectiveStdenv.isDarwin);
217
218    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
219    homepage = "https://github.com/ggml-org/llama.cpp/";
220    license = lib.licenses.mit;
221
222    # Accommodates `nix run` and `lib.getExe`
223    mainProgram = "llama-cli";
224
225    # These people might respond, on the best effort basis, if you ping them
226    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
227    # Consider adding yourself to this list if you want to ensure this flake
228    # stays maintained and you're willing to invest your time. Do not add
229    # other people without their consent. Consider removing people after
230    # they've been unreachable for long periods of time.
231
232    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
233    # an attrset following the same format as in
234    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
235    maintainers = with lib.maintainers; [
236      philiptaron
237      SomeoneSerge
238    ];
239
240    # Extend `badPlatforms` instead
241    platforms = lib.platforms.all;
242  };
243})