aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt')
-rw-r--r--llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt689
1 files changed, 689 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
new file mode 100644
index 0000000..7622d0b
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
@@ -0,0 +1,689 @@
1function(ggml_add_cpu_backend_features cpu_name arch)
2 # The feature detection code is compiled as a separate target so that
3 # it can be built without the architecture flags
4 # Since multiple variants of the CPU backend may be included in the same
5 # build, using set_source_files_properties() to set the arch flags is not possible
6 set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7 add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8 target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
9 target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10 target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11 set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
12 target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
13endfunction()
14
15function(ggml_add_cpu_backend_variant_impl tag_name)
16 if (tag_name)
17 set(GGML_CPU_NAME ggml-cpu-${tag_name})
18 else()
19 set(GGML_CPU_NAME ggml-cpu)
20 endif()
21
22 ggml_add_backend_library(${GGML_CPU_NAME})
23
24 list (APPEND GGML_CPU_SOURCES
25 ggml-cpu/ggml-cpu.c
26 ggml-cpu/ggml-cpu.cpp
27 ggml-cpu/repack.cpp
28 ggml-cpu/repack.h
29 ggml-cpu/hbm.cpp
30 ggml-cpu/hbm.h
31 ggml-cpu/quants.c
32 ggml-cpu/quants.h
33 ggml-cpu/traits.cpp
34 ggml-cpu/traits.h
35 ggml-cpu/amx/amx.cpp
36 ggml-cpu/amx/amx.h
37 ggml-cpu/amx/mmq.cpp
38 ggml-cpu/amx/mmq.h
39 ggml-cpu/ggml-cpu-impl.h
40 ggml-cpu/common.h
41 ggml-cpu/binary-ops.h
42 ggml-cpu/binary-ops.cpp
43 ggml-cpu/unary-ops.h
44 ggml-cpu/unary-ops.cpp
45 ggml-cpu/simd-mappings.h
46 ggml-cpu/vec.h
47 ggml-cpu/vec.cpp
48 ggml-cpu/ops.h
49 ggml-cpu/ops.cpp
50 )
51
52 target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
53 target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
54
55 if (APPLE AND GGML_ACCELERATE)
56 find_library(ACCELERATE_FRAMEWORK Accelerate)
57 if (ACCELERATE_FRAMEWORK)
58 message(STATUS "Accelerate framework found")
59
60 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
61 target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
62 target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
63
64 target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
65 else()
66 message(WARNING "Accelerate framework not found")
67 endif()
68 endif()
69
70 if (GGML_OPENMP)
71 find_package(OpenMP)
72 if (OpenMP_FOUND)
73 set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "")
74 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
75
76 target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
77 else()
78 set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "")
79 message(WARNING "OpenMP not found")
80 endif()
81 endif()
82
83 if (GGML_LLAMAFILE)
84 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
85
86 list(APPEND GGML_CPU_SOURCES
87 ggml-cpu/llamafile/sgemm.cpp
88 ggml-cpu/llamafile/sgemm.h)
89 endif()
90
91 if (GGML_CPU_HBM)
92 find_library(memkind memkind REQUIRED)
93
94 message(STATUS "Using memkind for CPU HBM")
95
96 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
97
98 target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
99 endif()
100
101 if (GGML_SYSTEM_ARCH STREQUAL "ARM")
102 message(STATUS "ARM detected")
103 list(APPEND GGML_CPU_SOURCES
104 ggml-cpu/arch/arm/quants.c
105 ggml-cpu/arch/arm/repack.cpp
106 )
107
108 if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
109 message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
110 else()
111 check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
112 if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
113 list(APPEND ARCH_FLAGS -mfp16-format=ieee)
114 endif()
115
116 if (GGML_NATIVE)
117 # -mcpu=native does not always enable all the features in some compilers,
118 # so we check for them manually and enable them if available
119
120 execute_process(
121 COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
122 INPUT_FILE "/dev/null"
123 OUTPUT_QUIET
124 ERROR_VARIABLE ARM_MCPU
125 RESULT_VARIABLE ARM_MCPU_RESULT
126 )
127 if (NOT ARM_MCPU_RESULT)
128 string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
129 string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
130
131 # on some old GCC we need to read -march=
132 if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
133 set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
134 elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
135 set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
136 endif()
137 endif()
138
139 if ("${ARM_NATIVE_FLAG}" STREQUAL "")
140 set(ARM_NATIVE_FLAG -mcpu=native)
141 message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
142 else()
143 message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
144 endif()
145
146 include(CheckCXXSourceRuns)
147
148 macro(check_arm_feature tag feature code)
149 set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
150 set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
151 check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
152 if (GGML_MACHINE_SUPPORTS_${tag})
153 set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}")
154 else()
155 set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
156 check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
157 if (GGML_MACHINE_SUPPORTS_no${tag})
158 set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}")
159 list(APPEND ARCH_FLAGS -U__ARM_FEATURE_${feature})
160 endif()
161 endif()
162 set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
163 endmacro()
164
165 check_arm_feature(dotprod DOTPROD "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
166 check_arm_feature(i8mm MATMUL_INT8 "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
167 check_arm_feature(sve SVE "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
168 check_arm_feature(sme SME "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
169
170 list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
171 else()
172 if (GGML_CPU_ARM_ARCH)
173 list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
174 elseif(GGML_CPU_ALL_VARIANTS)
175 # Begin with the lowest baseline
176 set(ARM_MCPU "armv8-a")
177 set(ARCH_TAGS "")
178 set(ARCH_DEFINITIONS "")
179
180 # When a feature is selected, bump the MCPU to the first
181 # version that supported it
182 if (GGML_INTERNAL_DOTPROD)
183 set(ARM_MCPU "armv8.2-a")
184 set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
185 list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
186 endif()
187 if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
188 set(ARM_MCPU "armv8.2-a")
189 set(ARCH_TAGS "${ARCH_TAGS}+fp16")
190 list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
191 endif()
192 if (GGML_INTERNAL_SVE)
193 set(ARM_MCPU "armv8.2-a")
194 set(ARCH_TAGS "${ARCH_TAGS}+sve")
195 list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
196 endif()
197 if (GGML_INTERNAL_MATMUL_INT8)
198 set(ARM_MCPU "armv8.6-a")
199 set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
200 list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
201 endif()
202 if (GGML_INTERNAL_SVE2)
203 set(ARM_MCPU "armv8.6-a")
204 set(ARCH_TAGS "${ARCH_TAGS}+sve2")
205 list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
206 endif()
207 if (GGML_INTERNAL_NOSVE)
208 set(ARCH_TAGS "${ARCH_TAGS}+nosve")
209 endif()
210 if (GGML_INTERNAL_SME)
211 set(ARM_MCPU "armv9.2-a")
212 set(ARCH_TAGS "${ARCH_TAGS}+sme")
213 list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
214 endif()
215 list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
216 ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
217 endif()
218 endif()
219
220 message(STATUS "Checking for ARM features using flags:")
221 foreach(flag IN LISTS ARCH_FLAGS)
222 message(STATUS " ${flag}")
223 endforeach()
224
225 include(CheckCXXSourceCompiles)
226 set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
227 string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
228 set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
229 foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
230 set(ARM_FEATURE "HAVE_${feature}")
231 check_cxx_source_compiles(
232 "
233 #if !defined(__ARM_FEATURE_${feature})
234 # error \"Feature ${feature} is not defined\"
235 #endif
236 int main() { return 0; }
237 "
238 ${ARM_FEATURE}
239 )
240 endforeach()
241 set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
242 endif()
243 elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
244 message(STATUS "x86 detected")
245 list(APPEND GGML_CPU_SOURCES
246 ggml-cpu/arch/x86/quants.c
247 ggml-cpu/arch/x86/repack.cpp
248 )
249
250 if (MSVC)
251 # instruction set detection for MSVC only
252 if (GGML_NATIVE)
253 include(ggml-cpu/cmake/FindSIMD.cmake)
254 endif ()
255 if (GGML_AVX512)
256 list(APPEND ARCH_FLAGS /arch:AVX512)
257 # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
258 # MSVC has no compile-time flags enabling specific
259 # AVX512 extensions, neither it defines the
260 # macros corresponding to the extensions.
261 # Do it manually.
262 list(APPEND ARCH_DEFINITIONS GGML_AVX512)
263 if (GGML_AVX512_VBMI)
264 list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
265 if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
266 list(APPEND ARCH_FLAGS -mavx512vbmi)
267 endif()
268 endif()
269 if (GGML_AVX512_VNNI)
270 list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
271 if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
272 list(APPEND ARCH_FLAGS -mavx512vnni)
273 endif()
274 endif()
275 if (GGML_AVX512_BF16)
276 list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
277 if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
278 list(APPEND ARCH_FLAGS -mavx512bf16)
279 endif()
280 endif()
281 if (GGML_AMX_TILE)
282 list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
283 endif()
284 if (GGML_AMX_INT8)
285 list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
286 endif()
287 if (GGML_AMX_BF16)
288 list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
289 endif()
290 elseif (GGML_AVX2)
291 list(APPEND ARCH_FLAGS /arch:AVX2)
292 list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
293 elseif (GGML_AVX)
294 list(APPEND ARCH_FLAGS /arch:AVX)
295 list(APPEND ARCH_DEFINITIONS GGML_AVX)
296 elseif (GGML_SSE42)
297 list(APPEND ARCH_FLAGS /arch:SSE4.2)
298 list(APPEND ARCH_DEFINITIONS GGML_SSE42)
299 endif()
300 if (GGML_AVX_VNNI)
301 list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
302 endif()
303 if (GGML_BMI2)
304 # MSVC does not define macro __BMI2__
305 list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2)
306 endif()
307 else ()
308 if (GGML_NATIVE)
309 list(APPEND ARCH_FLAGS -march=native)
310 else ()
311 if (GGML_SSE42)
312 list(APPEND ARCH_FLAGS -msse4.2)
313 list(APPEND ARCH_DEFINITIONS GGML_SSE42)
314 endif()
315 if (GGML_F16C)
316 list(APPEND ARCH_FLAGS -mf16c)
317 list(APPEND ARCH_DEFINITIONS GGML_F16C)
318 endif()
319 if (GGML_FMA)
320 list(APPEND ARCH_FLAGS -mfma)
321 list(APPEND ARCH_DEFINITIONS GGML_FMA)
322 endif()
323 if (GGML_BMI2)
324 list(APPEND ARCH_FLAGS -mbmi2)
325 list(APPEND ARCH_DEFINITIONS GGML_BMI2)
326 endif()
327 if (GGML_AVX)
328 list(APPEND ARCH_FLAGS -mavx)
329 list(APPEND ARCH_DEFINITIONS GGML_AVX)
330 endif()
331 if (GGML_AVX2)
332 list(APPEND ARCH_FLAGS -mavx2)
333 list(APPEND ARCH_DEFINITIONS GGML_AVX2)
334 endif()
335 if (GGML_AVX_VNNI)
336 list(APPEND ARCH_FLAGS -mavxvnni)
337 list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
338 endif()
339 if (GGML_AVX512)
340 list(APPEND ARCH_FLAGS -mavx512f)
341 list(APPEND ARCH_FLAGS -mavx512cd)
342 list(APPEND ARCH_FLAGS -mavx512vl)
343 list(APPEND ARCH_FLAGS -mavx512dq)
344 list(APPEND ARCH_FLAGS -mavx512bw)
345 list(APPEND ARCH_DEFINITIONS GGML_AVX512)
346 endif()
347 if (GGML_AVX512_VBMI)
348 list(APPEND ARCH_FLAGS -mavx512vbmi)
349 list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
350 endif()
351 if (GGML_AVX512_VNNI)
352 list(APPEND ARCH_FLAGS -mavx512vnni)
353 list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
354 endif()
355 if (GGML_AVX512_BF16)
356 list(APPEND ARCH_FLAGS -mavx512bf16)
357 list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
358 endif()
359 if (GGML_AMX_TILE)
360 list(APPEND ARCH_FLAGS -mamx-tile)
361 list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
362 endif()
363 if (GGML_AMX_INT8)
364 list(APPEND ARCH_FLAGS -mamx-int8)
365 list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
366 endif()
367 if (GGML_AMX_BF16)
368 list(APPEND ARCH_FLAGS -mamx-bf16)
369 list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
370 endif()
371 endif()
372 endif()
373
374 if (GGML_BACKEND_DL)
375 if (GGML_NATIVE)
376 # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
377 message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
378 endif()
379 ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
380 endif()
381 elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
382 message(STATUS "PowerPC detected")
383 list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
384 if (GGML_NATIVE)
385 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
386 file(READ "/proc/cpuinfo" POWER10_M)
387 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
388 execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
389 endif()
390
391 string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
392 string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
393 string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
394
395 if (EXTRACTED_NUMBER GREATER_EQUAL 10)
396 list(APPEND ARCH_FLAGS -mcpu=power10)
397 elseif (EXTRACTED_NUMBER EQUAL 9)
398 list(APPEND ARCH_FLAGS -mcpu=power9)
399 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
400 list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
401 else()
402 list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
403 endif()
404 elseif(GGML_CPU_ALL_VARIANTS)
405 # Begin with the lowest baseline
406 set(ARCH_DEFINITIONS "")
407
408 # When a feature is selected, bump the MCPU to the first
409 # version that supported it
410 foreach(PVER RANGE 7 11)
411 if(DEFINED GGML_INTERNAL_POWER${PVER})
412 set(POWERPC_MCPU "power${PVER}")
413 list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
414 endif()
415 endforeach()
416 if (GGML_INTERNAL_VSX)
417 list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
418 list(APPEND ARCH_FLAGS -mvsx)
419 endif()
420
421 if (DEFINED POWERPC_MCPU)
422 list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
423 endif()
424 ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
425 else()
426 if (GGML_CPU_POWERPC_CPUTYPE)
427 list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
428 endif()
429 endif()
430 elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
431 message(STATUS "loongarch64 detected")
432 list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
433
434 list(APPEND ARCH_FLAGS -march=loongarch64)
435 if (GGML_LASX)
436 list(APPEND ARCH_FLAGS -mlasx)
437 endif()
438 if (GGML_LSX)
439 list(APPEND ARCH_FLAGS -mlsx)
440 endif()
441 elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
442 message(STATUS "riscv64 detected")
443 list(APPEND GGML_CPU_SOURCES
444 ggml-cpu/arch/riscv/quants.c
445 ggml-cpu/arch/riscv/repack.cpp
446 )
447 if (GGML_CPU_RISCV64_SPACEMIT)
448 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC})
449 list(APPEND GGML_CPU_SOURCES
450 ggml-cpu/spacemit/ime.cpp
451 ggml-cpu/spacemit/ime.h
452 ggml-cpu/spacemit/ime1_kernels.cpp
453 ggml-cpu/spacemit/ime_kernels.h
454 )
455 endif()
456 if(NOT GGML_CPU_ALL_VARIANTS)
457 set(MARCH_STR "rv64gc")
458 if (GGML_RV_ZFH)
459 string(APPEND MARCH_STR "_zfh")
460 endif()
461
462 if (GGML_XTHEADVECTOR)
463 string(APPEND MARCH_STR "_xtheadvector")
464 elseif (GGML_RVV)
465 string(APPEND MARCH_STR "_v")
466 if (GGML_RV_ZVFH)
467 string(APPEND MARCH_STR "_zvfh")
468 endif()
469 if (GGML_RV_ZVFBFWMA)
470 string(APPEND MARCH_STR "_zvfbfwma")
471 endif()
472 endif()
473 if (GGML_RV_ZICBOP)
474 string(APPEND MARCH_STR "_zicbop")
475 endif()
476 if (GGML_RV_ZIHINTPAUSE)
477 string(APPEND MARCH_STR "_zihintpause")
478 endif()
479 list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
480 else()
481 # Begin with the lowest baseline
482 set(ARCH_DEFINITIONS "")
483
484 if (GGML_INTERNAL_RVV)
485 message(STATUS "RVV enabled")
486 list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
487 list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
488 endif()
489
490 ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
491 endif()
492 elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
493 message(STATUS "s390x detected")
494 list(APPEND GGML_CPU_SOURCES
495 ggml-cpu/arch/s390/quants.c)
496
497 # for native compilation
498 if (GGML_NATIVE)
499 # check machine level to determine target
500 file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
501 string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
502
503 # TODO: Separation to determine activation of VX/VXE/VXE2
504 if (${S390X_M} MATCHES "8561|8562")
505 message(STATUS "z15 target")
506 list(APPEND ARCH_FLAGS -march=z15)
507 elseif (${S390X_M} MATCHES "3931")
508 message(STATUS "z16 target")
509 list(APPEND ARCH_FLAGS -march=z16)
510 elseif (${S390X_M} MATCHES "9175|9176")
511 # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
512 # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
513 message(STATUS "z17 target")
514 list(APPEND ARCH_FLAGS -march=arch15)
515 else()
516 message(STATUS "Unknown target")
517 message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
518 list(APPEND ARCH_FLAGS -march=native -mtune=native)
519 endif()
520 # for cross-compilation
521 elseif(GGML_CPU_ALL_VARIANTS)
522 # range through IBM z15 to z17
523 # NOTE: update when a new hardware level is released
524 foreach (ZHW RANGE 15 17)
525 if(DEFINED GGML_INTERNAL_Z${ZHW})
526 message(STATUS "z${ZHW} cross-compile target")
527 list(APPEND ARCH_FLAGS -march=z${ZHW})
528 endif()
529 endforeach()
530 endif()
531
532 if (GGML_VXE OR GGML_INTERNAL_VXE2)
533 message(STATUS "VXE2 enabled")
534 list(APPEND ARCH_FLAGS -mvx -mzvector)
535 list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2)
536 endif()
537
538 if (GGML_INTERNAL_NNPA)
539 message(STATUS "NNPA enabled")
540 list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA)
541 endif()
542
543 ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS})
544 elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
545 message(STATUS "Wasm detected")
546 list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
547 else()
548 message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
549 list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
550 endif()
551
552 if (GGML_CPU_REPACK)
553 target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
554 endif()
555
556 if (GGML_CPU_KLEIDIAI)
557 message(STATUS "Using KleidiAI optimized kernels if applicable")
558
559 # Disable the KleidiAI tests
560 set(KLEIDIAI_BUILD_TESTS OFF)
561
562 # Fetch KleidiAI sources:
563 include(FetchContent)
564 set(KLEIDIAI_COMMIT_TAG "v1.16.0")
565 set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
566 set(KLEIDIAI_ARCHIVE_MD5 "0a9e9008adb6031f9e8cf70dff4a3321")
567
568 if (POLICY CMP0135)
569 cmake_policy(SET CMP0135 NEW)
570 endif()
571
572 FetchContent_Declare(KleidiAI_Download
573 URL ${KLEIDIAI_DOWNLOAD_URL}
574 DOWNLOAD_EXTRACT_TIMESTAMP NEW
575 URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5})
576
577 FetchContent_MakeAvailable(KleidiAI_Download)
578 FetchContent_GetProperties(KleidiAI_Download
579 SOURCE_DIR KLEIDIAI_SRC
580 POPULATED KLEIDIAI_POPULATED)
581
582 if (NOT KLEIDIAI_POPULATED)
583 message(FATAL_ERROR "KleidiAI source downloaded failed.")
584 endif()
585
586 add_compile_definitions(GGML_USE_CPU_KLEIDIAI)
587
588 # Remove kleidiai target after fetching it
589 if (TARGET kleidiai)
590 set_target_properties(kleidiai PROPERTIES EXCLUDE_FROM_ALL TRUE)
591 endif()
592
593 list(APPEND GGML_CPU_SOURCES
594 ggml-cpu/kleidiai/kleidiai.cpp
595 ggml-cpu/kleidiai/kernels.cpp
596 ggml-cpu/kleidiai/kleidiai.h
597 ggml-cpu/kleidiai/kernels.h
598 )
599
600 # KleidiAI
601 include_directories(
602 ${KLEIDIAI_SRC}/
603 ${KLEIDIAI_SRC}/kai/
604 ${KLEIDIAI_SRC}/kai/ukernels/
605 ${KLEIDIAI_SRC}/kai/ukernels/matmul/
606 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
607 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
608 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
609 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
610
611 set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}")
612 if (NOT ARCH_FLAGS_TEMP)
613 string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}")
614 endif()
615 string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED)
616 string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
617 string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
618 string(FIND "${ARCH_FLAGS_TEMP}" "+sve" SVE_ENABLED)
619
620 set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP})
621
622 list(APPEND GGML_KLEIDIAI_SOURCES
623 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c
624 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
625 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
626 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
627 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
628 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
629 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
630
631 if (NOT DOTPROD_ENABLED MATCHES -1)
632 list(APPEND GGML_KLEIDIAI_SOURCES
633 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
634 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
635 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
636 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
637 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
638 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
639 endif()
640
641 if (NOT I8MM_ENABLED MATCHES -1)
642 list(APPEND GGML_KLEIDIAI_SOURCES
643 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
644 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
645 endif()
646
647 if (NOT SME_ENABLED MATCHES -1)
648 list(APPEND GGML_KLEIDIAI_SOURCES
649 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
650 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
651 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
652 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
653 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
654 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
655 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
656 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
657 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
658 ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c
659 ${KLEIDIAI_SRC}/kai/kai_common_sme_asm.S)
660 set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
661 endif()
662
663 if (NOT SVE_ENABLED MATCHES -1)
664 list(APPEND GGML_KLEIDIAI_SOURCES
665 ${KLEIDIAI_SRC}/kai/kai_common_sve_asm.S
666 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_asm.S
667 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.c
668 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_asm.S
669 ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.c)
670 endif()
671
672 set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
673 list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES})
674 endif()
675
676 message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
677 target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
678 target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
679 target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
680
681 if (EMSCRIPTEN)
682 set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
683 endif()
684
685 if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
686 # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
687 target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
688 endif()
689endfunction()