1function(ggml_add_cpu_backend_features cpu_name arch)
  2    # The feature detection code is compiled as a separate target so that
  3    # it can be built without the architecture flags
  4    # Since multiple variants of the CPU backend may be included in the same
  5    # build, using set_source_files_properties() to set the arch flags is not possible
  6    set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
  7    add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
  8    target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
  9    target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
 10    target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
 11    set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 12    target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME})
 13endfunction()
 14
 15function(ggml_add_cpu_backend_variant_impl tag_name)
 16    if (tag_name)
 17        set(GGML_CPU_NAME ggml-cpu-${tag_name})
 18    else()
 19        set(GGML_CPU_NAME ggml-cpu)
 20    endif()
 21
 22    ggml_add_backend_library(${GGML_CPU_NAME})
 23
 24    list (APPEND GGML_CPU_SOURCES
 25        ggml-cpu/ggml-cpu.c
 26        ggml-cpu/ggml-cpu.cpp
 27        ggml-cpu/repack.cpp
 28        ggml-cpu/repack.h
 29        ggml-cpu/hbm.cpp
 30        ggml-cpu/hbm.h
 31        ggml-cpu/quants.c
 32        ggml-cpu/quants.h
 33        ggml-cpu/traits.cpp
 34        ggml-cpu/traits.h
 35        ggml-cpu/amx/amx.cpp
 36        ggml-cpu/amx/amx.h
 37        ggml-cpu/amx/mmq.cpp
 38        ggml-cpu/amx/mmq.h
 39        ggml-cpu/ggml-cpu-impl.h
 40        ggml-cpu/common.h
 41        ggml-cpu/binary-ops.h
 42        ggml-cpu/binary-ops.cpp
 43        ggml-cpu/unary-ops.h
 44        ggml-cpu/unary-ops.cpp
 45        ggml-cpu/simd-mappings.h
 46        ggml-cpu/vec.h
 47        ggml-cpu/vec.cpp
 48        ggml-cpu/ops.h
 49        ggml-cpu/ops.cpp
 50        )
 51
 52    target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
 53    target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
 54
 55    if (APPLE AND GGML_ACCELERATE)
 56        find_library(ACCELERATE_FRAMEWORK Accelerate)
 57        if (ACCELERATE_FRAMEWORK)
 58            message(STATUS "Accelerate framework found")
 59
 60            target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
 61            target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
 62            target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
 63
 64            target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
 65        else()
 66            message(WARNING "Accelerate framework not found")
 67        endif()
 68    endif()
 69
 70    if (GGML_OPENMP)
 71        find_package(OpenMP)
 72        if (OpenMP_FOUND)
 73            set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "")
 74            target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
 75
 76            target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
 77        else()
 78            set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "")
 79            message(WARNING "OpenMP not found")
 80        endif()
 81    endif()
 82
 83    if (GGML_LLAMAFILE)
 84        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
 85
 86        list(APPEND GGML_CPU_SOURCES
 87                    ggml-cpu/llamafile/sgemm.cpp
 88                    ggml-cpu/llamafile/sgemm.h)
 89    endif()
 90
 91    if (GGML_CPU_HBM)
 92        find_library(memkind memkind REQUIRED)
 93
 94        message(STATUS "Using memkind for CPU HBM")
 95
 96        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
 97
 98        target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
 99    endif()
100
101    if (GGML_SYSTEM_ARCH STREQUAL "ARM")
102        message(STATUS "ARM detected")
103        list(APPEND GGML_CPU_SOURCES
104            ggml-cpu/arch/arm/quants.c
105            ggml-cpu/arch/arm/repack.cpp
106            )
107
108        if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
109            message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
110        else()
111            check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
112            if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
113                list(APPEND ARCH_FLAGS -mfp16-format=ieee)
114            endif()
115
116            if (GGML_NATIVE)
117                # -mcpu=native does not always enable all the features in some compilers,
118                # so we check for them manually and enable them if available
119
120                execute_process(
121                    COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
122                    INPUT_FILE "/dev/null"
123                    OUTPUT_QUIET
124                    ERROR_VARIABLE ARM_MCPU
125                    RESULT_VARIABLE ARM_MCPU_RESULT
126                )
127                if (NOT ARM_MCPU_RESULT)
128                    string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
129                    string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
130
131                    # on some old GCC we need to read -march=
132                    if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
133                        set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
134                    elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
135                        set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
136                    endif()
137                endif()
138
139                if ("${ARM_NATIVE_FLAG}" STREQUAL "")
140                    set(ARM_NATIVE_FLAG -mcpu=native)
141                    message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
142                else()
143                    message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
144                endif()
145
146                include(CheckCXXSourceRuns)
147
148                macro(check_arm_feature tag feature code)
149                    set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
150                    set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
151                    check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
152                    if (GGML_MACHINE_SUPPORTS_${tag})
153                        set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}")
154                    else()
155                        set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
156                        check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
157                        if (GGML_MACHINE_SUPPORTS_no${tag})
158                            set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}")
159                            list(APPEND ARCH_FLAGS -U__ARM_FEATURE_${feature})
160                        endif()
161                    endif()
162                    set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
163                endmacro()
164
165                check_arm_feature(dotprod DOTPROD     "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
166                check_arm_feature(i8mm    MATMUL_INT8 "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
167                check_arm_feature(sve     SVE         "#include <arm_sve.h>\nint main()  { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
168                check_arm_feature(sme     SME         "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
169
170                list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
171            else()
172                if (GGML_CPU_ARM_ARCH)
173                    list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
174                elseif(GGML_CPU_ALL_VARIANTS)
175                    # Begin with the lowest baseline
176                    set(ARM_MCPU "armv8-a")
177                    set(ARCH_TAGS "")
178                    set(ARCH_DEFINITIONS "")
179
180                    # When a feature is selected, bump the MCPU to the first
181                    # version that supported it
182                    if (GGML_INTERNAL_DOTPROD)
183                        set(ARM_MCPU "armv8.2-a")
184                        set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
185                        list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
186                    endif()
187                    if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
188                        set(ARM_MCPU "armv8.2-a")
189                        set(ARCH_TAGS "${ARCH_TAGS}+fp16")
190                        list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
191                    endif()
192                    if (GGML_INTERNAL_SVE)
193                        set(ARM_MCPU "armv8.2-a")
194                        set(ARCH_TAGS "${ARCH_TAGS}+sve")
195                        list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
196                    endif()
197                    if (GGML_INTERNAL_MATMUL_INT8)
198                        set(ARM_MCPU "armv8.6-a")
199                        set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
200                        list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
201                    endif()
202                    if (GGML_INTERNAL_SVE2)
203                        set(ARM_MCPU "armv8.6-a")
204                        set(ARCH_TAGS "${ARCH_TAGS}+sve2")
205                        list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
206                    endif()
207                    if (GGML_INTERNAL_NOSVE)
208                        set(ARCH_TAGS "${ARCH_TAGS}+nosve")
209                    endif()
210                    if (GGML_INTERNAL_SME)
211                        set(ARM_MCPU "armv9.2-a")
212                        set(ARCH_TAGS "${ARCH_TAGS}+sme")
213                        list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
214                    endif()
215                    list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
216                    ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
217                endif()
218            endif()
219
220            message(STATUS "Checking for ARM features using flags:")
221            foreach(flag IN LISTS ARCH_FLAGS)
222                message(STATUS "  ${flag}")
223            endforeach()
224
225            include(CheckCXXSourceCompiles)
226            set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
227            string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
228            set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
229            foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
230                set(ARM_FEATURE "HAVE_${feature}")
231                check_cxx_source_compiles(
232                    "
233                    #if !defined(__ARM_FEATURE_${feature})
234                    #  error \"Feature ${feature} is not defined\"
235                    #endif
236                    int main() { return 0; }
237                    "
238                    ${ARM_FEATURE}
239                )
240            endforeach()
241            set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
242        endif()
243    elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
244        message(STATUS "x86 detected")
245        list(APPEND GGML_CPU_SOURCES
246            ggml-cpu/arch/x86/quants.c
247            ggml-cpu/arch/x86/repack.cpp
248            )
249
250        if (MSVC)
251            # instruction set detection for MSVC only
252            if (GGML_NATIVE)
253                include(ggml-cpu/cmake/FindSIMD.cmake)
254            endif ()
255            if (GGML_AVX512)
256                list(APPEND ARCH_FLAGS /arch:AVX512)
257                # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
258                # MSVC has no compile-time flags enabling specific
259                # AVX512 extensions, neither it defines the
260                # macros corresponding to the extensions.
261                # Do it manually.
262                list(APPEND ARCH_DEFINITIONS GGML_AVX512)
263                if (GGML_AVX512_VBMI)
264                    list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
265                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
266                        list(APPEND ARCH_FLAGS -mavx512vbmi)
267                    endif()
268                endif()
269                if (GGML_AVX512_VNNI)
270                    list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
271                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
272                        list(APPEND ARCH_FLAGS -mavx512vnni)
273                    endif()
274                endif()
275                if (GGML_AVX512_BF16)
276                    list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
277                    if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
278                        list(APPEND ARCH_FLAGS -mavx512bf16)
279                    endif()
280                endif()
281                if (GGML_AMX_TILE)
282                    list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
283                endif()
284                if (GGML_AMX_INT8)
285                    list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
286                endif()
287                if (GGML_AMX_BF16)
288                    list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
289                endif()
290            elseif (GGML_AVX2)
291                list(APPEND ARCH_FLAGS /arch:AVX2)
292                list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
293            elseif (GGML_AVX)
294                list(APPEND ARCH_FLAGS /arch:AVX)
295                list(APPEND ARCH_DEFINITIONS GGML_AVX)
296            elseif (GGML_SSE42)
297                list(APPEND ARCH_FLAGS /arch:SSE4.2)
298                list(APPEND ARCH_DEFINITIONS GGML_SSE42)
299            endif()
300            if (GGML_AVX_VNNI)
301                list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
302            endif()
303            if (GGML_BMI2)
304                # MSVC does not define macro __BMI2__
305                list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2)
306            endif()
307        else ()
308            if (GGML_NATIVE)
309                list(APPEND ARCH_FLAGS -march=native)
310            else ()
311                if (GGML_SSE42)
312                    list(APPEND ARCH_FLAGS -msse4.2)
313                    list(APPEND ARCH_DEFINITIONS GGML_SSE42)
314                endif()
315                if (GGML_F16C)
316                    list(APPEND ARCH_FLAGS -mf16c)
317                    list(APPEND ARCH_DEFINITIONS GGML_F16C)
318                endif()
319                if (GGML_FMA)
320                    list(APPEND ARCH_FLAGS -mfma)
321                    list(APPEND ARCH_DEFINITIONS GGML_FMA)
322                endif()
323                if (GGML_BMI2)
324                    list(APPEND ARCH_FLAGS -mbmi2)
325                    list(APPEND ARCH_DEFINITIONS GGML_BMI2)
326                endif()
327                if (GGML_AVX)
328                    list(APPEND ARCH_FLAGS -mavx)
329                    list(APPEND ARCH_DEFINITIONS GGML_AVX)
330                endif()
331                if (GGML_AVX2)
332                    list(APPEND ARCH_FLAGS -mavx2)
333                    list(APPEND ARCH_DEFINITIONS GGML_AVX2)
334                endif()
335                if (GGML_AVX_VNNI)
336                    list(APPEND ARCH_FLAGS -mavxvnni)
337                    list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
338                endif()
339                if (GGML_AVX512)
340                    list(APPEND ARCH_FLAGS -mavx512f)
341                    list(APPEND ARCH_FLAGS -mavx512cd)
342                    list(APPEND ARCH_FLAGS -mavx512vl)
343                    list(APPEND ARCH_FLAGS -mavx512dq)
344                    list(APPEND ARCH_FLAGS -mavx512bw)
345                    list(APPEND ARCH_DEFINITIONS GGML_AVX512)
346                endif()
347                if (GGML_AVX512_VBMI)
348                    list(APPEND ARCH_FLAGS -mavx512vbmi)
349                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
350                endif()
351                if (GGML_AVX512_VNNI)
352                    list(APPEND ARCH_FLAGS -mavx512vnni)
353                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
354                endif()
355                if (GGML_AVX512_BF16)
356                    list(APPEND ARCH_FLAGS -mavx512bf16)
357                    list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
358                endif()
359                if (GGML_AMX_TILE)
360                    list(APPEND ARCH_FLAGS -mamx-tile)
361                    list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
362                endif()
363                if (GGML_AMX_INT8)
364                    list(APPEND ARCH_FLAGS -mamx-int8)
365                    list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
366                endif()
367                if (GGML_AMX_BF16)
368                    list(APPEND ARCH_FLAGS -mamx-bf16)
369                    list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
370                endif()
371            endif()
372        endif()
373
374        if (GGML_BACKEND_DL)
375            if (GGML_NATIVE)
376                # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
377                message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
378            endif()
379            ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS})
380        endif()
381    elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
382        message(STATUS "PowerPC detected")
383        list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
384        if (GGML_NATIVE)
385            if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
386                file(READ "/proc/cpuinfo" POWER10_M)
387            elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
388                execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
389            endif()
390
391            string(TOUPPER "${POWER10_M}" POWER10_M_UPPER)
392            string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}")
393            string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
394
395            if (EXTRACTED_NUMBER GREATER_EQUAL 10)
396                list(APPEND ARCH_FLAGS -mcpu=power10)
397            elseif (EXTRACTED_NUMBER EQUAL 9)
398                list(APPEND ARCH_FLAGS -mcpu=power9)
399            elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
400                list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
401            else()
402                list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
403            endif()
404        elseif(GGML_CPU_ALL_VARIANTS)
405            # Begin with the lowest baseline
406            set(ARCH_DEFINITIONS "")
407
408            # When a feature is selected, bump the MCPU to the first
409            # version that supported it
410            foreach(PVER RANGE 7 11)
411                if(DEFINED GGML_INTERNAL_POWER${PVER})
412                    set(POWERPC_MCPU "power${PVER}")
413                    list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
414                endif()
415            endforeach()
416            if (GGML_INTERNAL_VSX)
417                list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
418                list(APPEND ARCH_FLAGS -mvsx)
419            endif()
420
421            if (DEFINED POWERPC_MCPU)
422                list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
423            endif()
424            ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
425        else()
426            if (GGML_CPU_POWERPC_CPUTYPE)
427                list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
428            endif()
429        endif()
430    elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
431        message(STATUS "loongarch64 detected")
432        list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
433
434        list(APPEND ARCH_FLAGS -march=loongarch64)
435        if (GGML_LASX)
436            list(APPEND ARCH_FLAGS -mlasx)
437        endif()
438        if (GGML_LSX)
439            list(APPEND ARCH_FLAGS -mlsx)
440        endif()
441    elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
442        message(STATUS "riscv64 detected")
443        list(APPEND GGML_CPU_SOURCES
444            ggml-cpu/arch/riscv/quants.c
445            ggml-cpu/arch/riscv/repack.cpp
446            )
447        if (GGML_CPU_RISCV64_SPACEMIT)
448            target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC})
449            list(APPEND GGML_CPU_SOURCES
450                ggml-cpu/spacemit/ime.cpp
451                ggml-cpu/spacemit/ime.h
452                ggml-cpu/spacemit/ime1_kernels.cpp
453                ggml-cpu/spacemit/ime_kernels.h
454            )
455        endif()
456        if(NOT GGML_CPU_ALL_VARIANTS)
457            set(MARCH_STR "rv64gc")
458            if (GGML_RV_ZFH)
459                string(APPEND MARCH_STR "_zfh")
460            endif()
461
462            if (GGML_XTHEADVECTOR)
463                string(APPEND MARCH_STR "_xtheadvector")
464            elseif (GGML_RVV)
465                string(APPEND MARCH_STR "_v")
466                if (GGML_RV_ZVFH)
467                    string(APPEND MARCH_STR "_zvfh")
468                endif()
469                if (GGML_RV_ZVFBFWMA)
470                    string(APPEND MARCH_STR "_zvfbfwma")
471                endif()
472            endif()
473            if (GGML_RV_ZICBOP)
474                string(APPEND MARCH_STR "_zicbop")
475            endif()
476            if (GGML_RV_ZIHINTPAUSE)
477                string(APPEND MARCH_STR "_zihintpause")
478            endif()
479            list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
480        else()
481            # Begin with the lowest baseline
482            set(ARCH_DEFINITIONS "")
483
484            if (GGML_INTERNAL_RVV)
485                message(STATUS "RVV enabled")
486                list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
487                list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
488            endif()
489
490            ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
491        endif()
492    elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
493        message(STATUS "s390x detected")
494        list(APPEND GGML_CPU_SOURCES
495            ggml-cpu/arch/s390/quants.c)
496
497        # for native compilation
498        if (GGML_NATIVE)
499            # check machine level to determine target
500            file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
501            string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
502
503            # TODO: Separation to determine activation of VX/VXE/VXE2
504            if (${S390X_M} MATCHES "8561|8562")
505                message(STATUS "z15 target")
506                list(APPEND ARCH_FLAGS -march=z15)
507            elseif (${S390X_M} MATCHES "3931")
508                message(STATUS "z16 target")
509                list(APPEND ARCH_FLAGS -march=z16)
510            elseif (${S390X_M} MATCHES "9175|9176")
511                # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version.
512                #       binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15.
513                message(STATUS "z17 target")
514                list(APPEND ARCH_FLAGS -march=arch15)
515            else()
516                message(STATUS "Unknown target")
517                message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
518                list(APPEND ARCH_FLAGS -march=native -mtune=native)
519            endif()
520        # for cross-compilation
521        elseif(GGML_CPU_ALL_VARIANTS)
522            # range through IBM z15 to z17
523            # NOTE: update when a new hardware level is released
524            foreach (ZHW RANGE 15 17)
525                if(DEFINED GGML_INTERNAL_Z${ZHW})
526                    message(STATUS "z${ZHW} cross-compile target")
527                    list(APPEND ARCH_FLAGS -march=z${ZHW})
528                endif()
529            endforeach()
530        endif()
531
532        if (GGML_VXE OR GGML_INTERNAL_VXE2)
533            message(STATUS "VXE2 enabled")
534            list(APPEND ARCH_FLAGS -mvx -mzvector)
535            list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2)
536        endif()
537
538        if (GGML_INTERNAL_NNPA)
539            message(STATUS "NNPA enabled")
540            list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA)
541        endif()
542
543        ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS})
544    elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
545        message(STATUS "Wasm detected")
546        list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
547    else()
548        message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
549        list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
550    endif()
551
552    if (GGML_CPU_REPACK)
553        target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
554    endif()
555
556    if (GGML_CPU_KLEIDIAI)
557        message(STATUS "Using KleidiAI optimized kernels if applicable")
558
559        # Disable the KleidiAI tests
560        set(KLEIDIAI_BUILD_TESTS  OFF)
561
562        # Fetch KleidiAI sources:
563        include(FetchContent)
564        set(KLEIDIAI_COMMIT_TAG "v1.16.0")
565        set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
566        set(KLEIDIAI_ARCHIVE_MD5  "0a9e9008adb6031f9e8cf70dff4a3321")
567
568        if (POLICY CMP0135)
569            cmake_policy(SET CMP0135 NEW)
570        endif()
571
572        FetchContent_Declare(KleidiAI_Download
573            URL ${KLEIDIAI_DOWNLOAD_URL}
574            DOWNLOAD_EXTRACT_TIMESTAMP NEW
575            URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5})
576
577        FetchContent_MakeAvailable(KleidiAI_Download)
578        FetchContent_GetProperties(KleidiAI_Download
579            SOURCE_DIR  KLEIDIAI_SRC
580            POPULATED   KLEIDIAI_POPULATED)
581
582        if (NOT KLEIDIAI_POPULATED)
583            message(FATAL_ERROR "KleidiAI source downloaded failed.")
584        endif()
585
586        add_compile_definitions(GGML_USE_CPU_KLEIDIAI)
587
588        # Remove kleidiai target after fetching it
589        if (TARGET kleidiai)
590            set_target_properties(kleidiai PROPERTIES EXCLUDE_FROM_ALL TRUE)
591        endif()
592
593        list(APPEND GGML_CPU_SOURCES
594            ggml-cpu/kleidiai/kleidiai.cpp
595            ggml-cpu/kleidiai/kernels.cpp
596            ggml-cpu/kleidiai/kleidiai.h
597            ggml-cpu/kleidiai/kernels.h
598            )
599
600        # KleidiAI
601        include_directories(
602            ${KLEIDIAI_SRC}/
603            ${KLEIDIAI_SRC}/kai/
604            ${KLEIDIAI_SRC}/kai/ukernels/
605            ${KLEIDIAI_SRC}/kai/ukernels/matmul/
606            ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
607            ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
608            ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
609            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
610
611        set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}")
612        if (NOT ARCH_FLAGS_TEMP)
613            string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}")
614        endif()
615        string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED)
616        string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
617        string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
618        string(FIND "${ARCH_FLAGS_TEMP}" "+sve" SVE_ENABLED)
619
620        set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP})
621
622        list(APPEND GGML_KLEIDIAI_SOURCES
623            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c
624            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
625            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
626            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
627            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
628            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
629            ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
630
631        if (NOT DOTPROD_ENABLED MATCHES -1)
632            list(APPEND GGML_KLEIDIAI_SOURCES
633                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
634                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
635                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
636                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
637                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
638                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
639        endif()
640
641        if (NOT I8MM_ENABLED MATCHES -1)
642            list(APPEND GGML_KLEIDIAI_SOURCES
643                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
644                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
645        endif()
646
647        if (NOT SME_ENABLED MATCHES -1)
648            list(APPEND GGML_KLEIDIAI_SOURCES
649                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
650                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
651                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
652                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
653                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
654                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
655                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
656                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
657                ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
658                ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c
659                ${KLEIDIAI_SRC}/kai/kai_common_sme_asm.S)
660            set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
661        endif()
662
663        if (NOT SVE_ENABLED MATCHES -1)
664            list(APPEND GGML_KLEIDIAI_SOURCES
665                ${KLEIDIAI_SRC}/kai/kai_common_sve_asm.S
666                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_asm.S
667                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.c
668                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_asm.S
669                ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.c)
670        endif()
671
672        set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
673        list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES})
674    endif()
675
676    message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
677    target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
678    target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
679    target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
680
681    if (EMSCRIPTEN)
682        set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
683    endif()
684
685    if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
686        # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
687        target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
688    endif()
689endfunction()