diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | 689 |
1 files changed, 689 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt new file mode 100644 index 0000000..7622d0b --- /dev/null +++ b/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt | |||
| @@ -0,0 +1,689 @@ | |||
| 1 | function(ggml_add_cpu_backend_features cpu_name arch) | ||
| 2 | # The feature detection code is compiled as a separate target so that | ||
| 3 | # it can be built without the architecture flags | ||
| 4 | # Since multiple variants of the CPU backend may be included in the same | ||
| 5 | # build, using set_source_files_properties() to set the arch flags is not possible | ||
| 6 | set(GGML_CPU_FEATS_NAME ${cpu_name}-feats) | ||
| 7 | add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp) | ||
| 8 | target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include) | ||
| 9 | target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN}) | ||
| 10 | target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) | ||
| 11 | set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||
| 12 | target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME}) | ||
| 13 | endfunction() | ||
| 14 | |||
| 15 | function(ggml_add_cpu_backend_variant_impl tag_name) | ||
| 16 | if (tag_name) | ||
| 17 | set(GGML_CPU_NAME ggml-cpu-${tag_name}) | ||
| 18 | else() | ||
| 19 | set(GGML_CPU_NAME ggml-cpu) | ||
| 20 | endif() | ||
| 21 | |||
| 22 | ggml_add_backend_library(${GGML_CPU_NAME}) | ||
| 23 | |||
| 24 | list (APPEND GGML_CPU_SOURCES | ||
| 25 | ggml-cpu/ggml-cpu.c | ||
| 26 | ggml-cpu/ggml-cpu.cpp | ||
| 27 | ggml-cpu/repack.cpp | ||
| 28 | ggml-cpu/repack.h | ||
| 29 | ggml-cpu/hbm.cpp | ||
| 30 | ggml-cpu/hbm.h | ||
| 31 | ggml-cpu/quants.c | ||
| 32 | ggml-cpu/quants.h | ||
| 33 | ggml-cpu/traits.cpp | ||
| 34 | ggml-cpu/traits.h | ||
| 35 | ggml-cpu/amx/amx.cpp | ||
| 36 | ggml-cpu/amx/amx.h | ||
| 37 | ggml-cpu/amx/mmq.cpp | ||
| 38 | ggml-cpu/amx/mmq.h | ||
| 39 | ggml-cpu/ggml-cpu-impl.h | ||
| 40 | ggml-cpu/common.h | ||
| 41 | ggml-cpu/binary-ops.h | ||
| 42 | ggml-cpu/binary-ops.cpp | ||
| 43 | ggml-cpu/unary-ops.h | ||
| 44 | ggml-cpu/unary-ops.cpp | ||
| 45 | ggml-cpu/simd-mappings.h | ||
| 46 | ggml-cpu/vec.h | ||
| 47 | ggml-cpu/vec.cpp | ||
| 48 | ggml-cpu/ops.h | ||
| 49 | ggml-cpu/ops.cpp | ||
| 50 | ) | ||
| 51 | |||
| 52 | target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) | ||
| 53 | target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) | ||
| 54 | |||
| 55 | if (APPLE AND GGML_ACCELERATE) | ||
| 56 | find_library(ACCELERATE_FRAMEWORK Accelerate) | ||
| 57 | if (ACCELERATE_FRAMEWORK) | ||
| 58 | message(STATUS "Accelerate framework found") | ||
| 59 | |||
| 60 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE) | ||
| 61 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK) | ||
| 62 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64) | ||
| 63 | |||
| 64 | target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) | ||
| 65 | else() | ||
| 66 | message(WARNING "Accelerate framework not found") | ||
| 67 | endif() | ||
| 68 | endif() | ||
| 69 | |||
| 70 | if (GGML_OPENMP) | ||
| 71 | find_package(OpenMP) | ||
| 72 | if (OpenMP_FOUND) | ||
| 73 | set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "") | ||
| 74 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP) | ||
| 75 | |||
| 76 | target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) | ||
| 77 | else() | ||
| 78 | set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "") | ||
| 79 | message(WARNING "OpenMP not found") | ||
| 80 | endif() | ||
| 81 | endif() | ||
| 82 | |||
| 83 | if (GGML_LLAMAFILE) | ||
| 84 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE) | ||
| 85 | |||
| 86 | list(APPEND GGML_CPU_SOURCES | ||
| 87 | ggml-cpu/llamafile/sgemm.cpp | ||
| 88 | ggml-cpu/llamafile/sgemm.h) | ||
| 89 | endif() | ||
| 90 | |||
| 91 | if (GGML_CPU_HBM) | ||
| 92 | find_library(memkind memkind REQUIRED) | ||
| 93 | |||
| 94 | message(STATUS "Using memkind for CPU HBM") | ||
| 95 | |||
| 96 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM) | ||
| 97 | |||
| 98 | target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind) | ||
| 99 | endif() | ||
| 100 | |||
| 101 | if (GGML_SYSTEM_ARCH STREQUAL "ARM") | ||
| 102 | message(STATUS "ARM detected") | ||
| 103 | list(APPEND GGML_CPU_SOURCES | ||
| 104 | ggml-cpu/arch/arm/quants.c | ||
| 105 | ggml-cpu/arch/arm/repack.cpp | ||
| 106 | ) | ||
| 107 | |||
| 108 | if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||
| 109 | message(FATAL_ERROR "MSVC is not supported for ARM, use clang") | ||
| 110 | else() | ||
| 111 | check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) | ||
| 112 | if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") | ||
| 113 | list(APPEND ARCH_FLAGS -mfp16-format=ieee) | ||
| 114 | endif() | ||
| 115 | |||
| 116 | if (GGML_NATIVE) | ||
| 117 | # -mcpu=native does not always enable all the features in some compilers, | ||
| 118 | # so we check for them manually and enable them if available | ||
| 119 | |||
| 120 | execute_process( | ||
| 121 | COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v - | ||
| 122 | INPUT_FILE "/dev/null" | ||
| 123 | OUTPUT_QUIET | ||
| 124 | ERROR_VARIABLE ARM_MCPU | ||
| 125 | RESULT_VARIABLE ARM_MCPU_RESULT | ||
| 126 | ) | ||
| 127 | if (NOT ARM_MCPU_RESULT) | ||
| 128 | string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}") | ||
| 129 | string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}") | ||
| 130 | |||
| 131 | # on some old GCC we need to read -march= | ||
| 132 | if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native") | ||
| 133 | set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}") | ||
| 134 | elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native") | ||
| 135 | set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}") | ||
| 136 | endif() | ||
| 137 | endif() | ||
| 138 | |||
| 139 | if ("${ARM_NATIVE_FLAG}" STREQUAL "") | ||
| 140 | set(ARM_NATIVE_FLAG -mcpu=native) | ||
| 141 | message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used") | ||
| 142 | else() | ||
| 143 | message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}") | ||
| 144 | endif() | ||
| 145 | |||
| 146 | include(CheckCXXSourceRuns) | ||
| 147 | |||
| 148 | macro(check_arm_feature tag feature code) | ||
| 149 | set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||
| 150 | set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}") | ||
| 151 | check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag}) | ||
| 152 | if (GGML_MACHINE_SUPPORTS_${tag}) | ||
| 153 | set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}") | ||
| 154 | else() | ||
| 155 | set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}") | ||
| 156 | check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag}) | ||
| 157 | if (GGML_MACHINE_SUPPORTS_no${tag}) | ||
| 158 | set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}") | ||
| 159 | list(APPEND ARCH_FLAGS -U__ARM_FEATURE_${feature}) | ||
| 160 | endif() | ||
| 161 | endif() | ||
| 162 | set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||
| 163 | endmacro() | ||
| 164 | |||
| 165 | check_arm_feature(dotprod DOTPROD "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }") | ||
| 166 | check_arm_feature(i8mm MATMUL_INT8 "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }") | ||
| 167 | check_arm_feature(sve SVE "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }") | ||
| 168 | check_arm_feature(sme SME "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }") | ||
| 169 | |||
| 170 | list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}") | ||
| 171 | else() | ||
| 172 | if (GGML_CPU_ARM_ARCH) | ||
| 173 | list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) | ||
| 174 | elseif(GGML_CPU_ALL_VARIANTS) | ||
| 175 | # Begin with the lowest baseline | ||
| 176 | set(ARM_MCPU "armv8-a") | ||
| 177 | set(ARCH_TAGS "") | ||
| 178 | set(ARCH_DEFINITIONS "") | ||
| 179 | |||
| 180 | # When a feature is selected, bump the MCPU to the first | ||
| 181 | # version that supported it | ||
| 182 | if (GGML_INTERNAL_DOTPROD) | ||
| 183 | set(ARM_MCPU "armv8.2-a") | ||
| 184 | set(ARCH_TAGS "${ARCH_TAGS}+dotprod") | ||
| 185 | list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) | ||
| 186 | endif() | ||
| 187 | if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) | ||
| 188 | set(ARM_MCPU "armv8.2-a") | ||
| 189 | set(ARCH_TAGS "${ARCH_TAGS}+fp16") | ||
| 190 | list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) | ||
| 191 | endif() | ||
| 192 | if (GGML_INTERNAL_SVE) | ||
| 193 | set(ARM_MCPU "armv8.2-a") | ||
| 194 | set(ARCH_TAGS "${ARCH_TAGS}+sve") | ||
| 195 | list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) | ||
| 196 | endif() | ||
| 197 | if (GGML_INTERNAL_MATMUL_INT8) | ||
| 198 | set(ARM_MCPU "armv8.6-a") | ||
| 199 | set(ARCH_TAGS "${ARCH_TAGS}+i8mm") | ||
| 200 | list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) | ||
| 201 | endif() | ||
| 202 | if (GGML_INTERNAL_SVE2) | ||
| 203 | set(ARM_MCPU "armv8.6-a") | ||
| 204 | set(ARCH_TAGS "${ARCH_TAGS}+sve2") | ||
| 205 | list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) | ||
| 206 | endif() | ||
| 207 | if (GGML_INTERNAL_NOSVE) | ||
| 208 | set(ARCH_TAGS "${ARCH_TAGS}+nosve") | ||
| 209 | endif() | ||
| 210 | if (GGML_INTERNAL_SME) | ||
| 211 | set(ARM_MCPU "armv9.2-a") | ||
| 212 | set(ARCH_TAGS "${ARCH_TAGS}+sme") | ||
| 213 | list(APPEND ARCH_DEFINITIONS GGML_USE_SME) | ||
| 214 | endif() | ||
| 215 | list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") | ||
| 216 | ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) | ||
| 217 | endif() | ||
| 218 | endif() | ||
| 219 | |||
| 220 | message(STATUS "Checking for ARM features using flags:") | ||
| 221 | foreach(flag IN LISTS ARCH_FLAGS) | ||
| 222 | message(STATUS " ${flag}") | ||
| 223 | endforeach() | ||
| 224 | |||
| 225 | include(CheckCXXSourceCompiles) | ||
| 226 | set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | ||
| 227 | string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}") | ||
| 228 | set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}") | ||
| 229 | foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME) | ||
| 230 | set(ARM_FEATURE "HAVE_${feature}") | ||
| 231 | check_cxx_source_compiles( | ||
| 232 | " | ||
| 233 | #if !defined(__ARM_FEATURE_${feature}) | ||
| 234 | # error \"Feature ${feature} is not defined\" | ||
| 235 | #endif | ||
| 236 | int main() { return 0; } | ||
| 237 | " | ||
| 238 | ${ARM_FEATURE} | ||
| 239 | ) | ||
| 240 | endforeach() | ||
| 241 | set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | ||
| 242 | endif() | ||
| 243 | elseif (GGML_SYSTEM_ARCH STREQUAL "x86") | ||
| 244 | message(STATUS "x86 detected") | ||
| 245 | list(APPEND GGML_CPU_SOURCES | ||
| 246 | ggml-cpu/arch/x86/quants.c | ||
| 247 | ggml-cpu/arch/x86/repack.cpp | ||
| 248 | ) | ||
| 249 | |||
| 250 | if (MSVC) | ||
| 251 | # instruction set detection for MSVC only | ||
| 252 | if (GGML_NATIVE) | ||
| 253 | include(ggml-cpu/cmake/FindSIMD.cmake) | ||
| 254 | endif () | ||
| 255 | if (GGML_AVX512) | ||
| 256 | list(APPEND ARCH_FLAGS /arch:AVX512) | ||
| 257 | # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__ | ||
| 258 | # MSVC has no compile-time flags enabling specific | ||
| 259 | # AVX512 extensions, neither it defines the | ||
| 260 | # macros corresponding to the extensions. | ||
| 261 | # Do it manually. | ||
| 262 | list(APPEND ARCH_DEFINITIONS GGML_AVX512) | ||
| 263 | if (GGML_AVX512_VBMI) | ||
| 264 | list(APPEND ARCH_DEFINITIONS __AVX512VBMI__) | ||
| 265 | if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||
| 266 | list(APPEND ARCH_FLAGS -mavx512vbmi) | ||
| 267 | endif() | ||
| 268 | endif() | ||
| 269 | if (GGML_AVX512_VNNI) | ||
| 270 | list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI) | ||
| 271 | if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||
| 272 | list(APPEND ARCH_FLAGS -mavx512vnni) | ||
| 273 | endif() | ||
| 274 | endif() | ||
| 275 | if (GGML_AVX512_BF16) | ||
| 276 | list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16) | ||
| 277 | if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | ||
| 278 | list(APPEND ARCH_FLAGS -mavx512bf16) | ||
| 279 | endif() | ||
| 280 | endif() | ||
| 281 | if (GGML_AMX_TILE) | ||
| 282 | list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE) | ||
| 283 | endif() | ||
| 284 | if (GGML_AMX_INT8) | ||
| 285 | list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8) | ||
| 286 | endif() | ||
| 287 | if (GGML_AMX_BF16) | ||
| 288 | list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16) | ||
| 289 | endif() | ||
| 290 | elseif (GGML_AVX2) | ||
| 291 | list(APPEND ARCH_FLAGS /arch:AVX2) | ||
| 292 | list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C) | ||
| 293 | elseif (GGML_AVX) | ||
| 294 | list(APPEND ARCH_FLAGS /arch:AVX) | ||
| 295 | list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||
| 296 | elseif (GGML_SSE42) | ||
| 297 | list(APPEND ARCH_FLAGS /arch:SSE4.2) | ||
| 298 | list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||
| 299 | endif() | ||
| 300 | if (GGML_AVX_VNNI) | ||
| 301 | list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI) | ||
| 302 | endif() | ||
| 303 | if (GGML_BMI2) | ||
| 304 | # MSVC does not define macro __BMI2__ | ||
| 305 | list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2) | ||
| 306 | endif() | ||
| 307 | else () | ||
| 308 | if (GGML_NATIVE) | ||
| 309 | list(APPEND ARCH_FLAGS -march=native) | ||
| 310 | else () | ||
| 311 | if (GGML_SSE42) | ||
| 312 | list(APPEND ARCH_FLAGS -msse4.2) | ||
| 313 | list(APPEND ARCH_DEFINITIONS GGML_SSE42) | ||
| 314 | endif() | ||
| 315 | if (GGML_F16C) | ||
| 316 | list(APPEND ARCH_FLAGS -mf16c) | ||
| 317 | list(APPEND ARCH_DEFINITIONS GGML_F16C) | ||
| 318 | endif() | ||
| 319 | if (GGML_FMA) | ||
| 320 | list(APPEND ARCH_FLAGS -mfma) | ||
| 321 | list(APPEND ARCH_DEFINITIONS GGML_FMA) | ||
| 322 | endif() | ||
| 323 | if (GGML_BMI2) | ||
| 324 | list(APPEND ARCH_FLAGS -mbmi2) | ||
| 325 | list(APPEND ARCH_DEFINITIONS GGML_BMI2) | ||
| 326 | endif() | ||
| 327 | if (GGML_AVX) | ||
| 328 | list(APPEND ARCH_FLAGS -mavx) | ||
| 329 | list(APPEND ARCH_DEFINITIONS GGML_AVX) | ||
| 330 | endif() | ||
| 331 | if (GGML_AVX2) | ||
| 332 | list(APPEND ARCH_FLAGS -mavx2) | ||
| 333 | list(APPEND ARCH_DEFINITIONS GGML_AVX2) | ||
| 334 | endif() | ||
| 335 | if (GGML_AVX_VNNI) | ||
| 336 | list(APPEND ARCH_FLAGS -mavxvnni) | ||
| 337 | list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI) | ||
| 338 | endif() | ||
| 339 | if (GGML_AVX512) | ||
| 340 | list(APPEND ARCH_FLAGS -mavx512f) | ||
| 341 | list(APPEND ARCH_FLAGS -mavx512cd) | ||
| 342 | list(APPEND ARCH_FLAGS -mavx512vl) | ||
| 343 | list(APPEND ARCH_FLAGS -mavx512dq) | ||
| 344 | list(APPEND ARCH_FLAGS -mavx512bw) | ||
| 345 | list(APPEND ARCH_DEFINITIONS GGML_AVX512) | ||
| 346 | endif() | ||
| 347 | if (GGML_AVX512_VBMI) | ||
| 348 | list(APPEND ARCH_FLAGS -mavx512vbmi) | ||
| 349 | list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI) | ||
| 350 | endif() | ||
| 351 | if (GGML_AVX512_VNNI) | ||
| 352 | list(APPEND ARCH_FLAGS -mavx512vnni) | ||
| 353 | list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI) | ||
| 354 | endif() | ||
| 355 | if (GGML_AVX512_BF16) | ||
| 356 | list(APPEND ARCH_FLAGS -mavx512bf16) | ||
| 357 | list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16) | ||
| 358 | endif() | ||
| 359 | if (GGML_AMX_TILE) | ||
| 360 | list(APPEND ARCH_FLAGS -mamx-tile) | ||
| 361 | list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE) | ||
| 362 | endif() | ||
| 363 | if (GGML_AMX_INT8) | ||
| 364 | list(APPEND ARCH_FLAGS -mamx-int8) | ||
| 365 | list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8) | ||
| 366 | endif() | ||
| 367 | if (GGML_AMX_BF16) | ||
| 368 | list(APPEND ARCH_FLAGS -mamx-bf16) | ||
| 369 | list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16) | ||
| 370 | endif() | ||
| 371 | endif() | ||
| 372 | endif() | ||
| 373 | |||
| 374 | if (GGML_BACKEND_DL) | ||
| 375 | if (GGML_NATIVE) | ||
| 376 | # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE | ||
| 377 | message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") | ||
| 378 | endif() | ||
| 379 | ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS}) | ||
| 380 | endif() | ||
| 381 | elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") | ||
| 382 | message(STATUS "PowerPC detected") | ||
| 383 | list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c) | ||
| 384 | if (GGML_NATIVE) | ||
| 385 | if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") | ||
| 386 | file(READ "/proc/cpuinfo" POWER10_M) | ||
| 387 | elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") | ||
| 388 | execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M) | ||
| 389 | endif() | ||
| 390 | |||
| 391 | string(TOUPPER "${POWER10_M}" POWER10_M_UPPER) | ||
| 392 | string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}") | ||
| 393 | string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}") | ||
| 394 | |||
| 395 | if (EXTRACTED_NUMBER GREATER_EQUAL 10) | ||
| 396 | list(APPEND ARCH_FLAGS -mcpu=power10) | ||
| 397 | elseif (EXTRACTED_NUMBER EQUAL 9) | ||
| 398 | list(APPEND ARCH_FLAGS -mcpu=power9) | ||
| 399 | elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") | ||
| 400 | list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native) | ||
| 401 | else() | ||
| 402 | list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64) | ||
| 403 | endif() | ||
| 404 | elseif(GGML_CPU_ALL_VARIANTS) | ||
| 405 | # Begin with the lowest baseline | ||
| 406 | set(ARCH_DEFINITIONS "") | ||
| 407 | |||
| 408 | # When a feature is selected, bump the MCPU to the first | ||
| 409 | # version that supported it | ||
| 410 | foreach(PVER RANGE 7 11) | ||
| 411 | if(DEFINED GGML_INTERNAL_POWER${PVER}) | ||
| 412 | set(POWERPC_MCPU "power${PVER}") | ||
| 413 | list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER}) | ||
| 414 | endif() | ||
| 415 | endforeach() | ||
| 416 | if (GGML_INTERNAL_VSX) | ||
| 417 | list(APPEND ARCH_DEFINITIONS GGML_USE_VSX) | ||
| 418 | list(APPEND ARCH_FLAGS -mvsx) | ||
| 419 | endif() | ||
| 420 | |||
| 421 | if (DEFINED POWERPC_MCPU) | ||
| 422 | list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU}) | ||
| 423 | endif() | ||
| 424 | ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS}) | ||
| 425 | else() | ||
| 426 | if (GGML_CPU_POWERPC_CPUTYPE) | ||
| 427 | list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE}) | ||
| 428 | endif() | ||
| 429 | endif() | ||
| 430 | elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64") | ||
| 431 | message(STATUS "loongarch64 detected") | ||
| 432 | list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c) | ||
| 433 | |||
| 434 | list(APPEND ARCH_FLAGS -march=loongarch64) | ||
| 435 | if (GGML_LASX) | ||
| 436 | list(APPEND ARCH_FLAGS -mlasx) | ||
| 437 | endif() | ||
| 438 | if (GGML_LSX) | ||
| 439 | list(APPEND ARCH_FLAGS -mlsx) | ||
| 440 | endif() | ||
| 441 | elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64") | ||
| 442 | message(STATUS "riscv64 detected") | ||
| 443 | list(APPEND GGML_CPU_SOURCES | ||
| 444 | ggml-cpu/arch/riscv/quants.c | ||
| 445 | ggml-cpu/arch/riscv/repack.cpp | ||
| 446 | ) | ||
| 447 | if (GGML_CPU_RISCV64_SPACEMIT) | ||
| 448 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC}) | ||
| 449 | list(APPEND GGML_CPU_SOURCES | ||
| 450 | ggml-cpu/spacemit/ime.cpp | ||
| 451 | ggml-cpu/spacemit/ime.h | ||
| 452 | ggml-cpu/spacemit/ime1_kernels.cpp | ||
| 453 | ggml-cpu/spacemit/ime_kernels.h | ||
| 454 | ) | ||
| 455 | endif() | ||
| 456 | if(NOT GGML_CPU_ALL_VARIANTS) | ||
| 457 | set(MARCH_STR "rv64gc") | ||
| 458 | if (GGML_RV_ZFH) | ||
| 459 | string(APPEND MARCH_STR "_zfh") | ||
| 460 | endif() | ||
| 461 | |||
| 462 | if (GGML_XTHEADVECTOR) | ||
| 463 | string(APPEND MARCH_STR "_xtheadvector") | ||
| 464 | elseif (GGML_RVV) | ||
| 465 | string(APPEND MARCH_STR "_v") | ||
| 466 | if (GGML_RV_ZVFH) | ||
| 467 | string(APPEND MARCH_STR "_zvfh") | ||
| 468 | endif() | ||
| 469 | if (GGML_RV_ZVFBFWMA) | ||
| 470 | string(APPEND MARCH_STR "_zvfbfwma") | ||
| 471 | endif() | ||
| 472 | endif() | ||
| 473 | if (GGML_RV_ZICBOP) | ||
| 474 | string(APPEND MARCH_STR "_zicbop") | ||
| 475 | endif() | ||
| 476 | if (GGML_RV_ZIHINTPAUSE) | ||
| 477 | string(APPEND MARCH_STR "_zihintpause") | ||
| 478 | endif() | ||
| 479 | list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d) | ||
| 480 | else() | ||
| 481 | # Begin with the lowest baseline | ||
| 482 | set(ARCH_DEFINITIONS "") | ||
| 483 | |||
| 484 | if (GGML_INTERNAL_RVV) | ||
| 485 | message(STATUS "RVV enabled") | ||
| 486 | list(APPEND ARCH_DEFINITIONS GGML_USE_RVV) | ||
| 487 | list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d) | ||
| 488 | endif() | ||
| 489 | |||
| 490 | ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS}) | ||
| 491 | endif() | ||
| 492 | elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") | ||
| 493 | message(STATUS "s390x detected") | ||
| 494 | list(APPEND GGML_CPU_SOURCES | ||
| 495 | ggml-cpu/arch/s390/quants.c) | ||
| 496 | |||
| 497 | # for native compilation | ||
| 498 | if (GGML_NATIVE) | ||
| 499 | # check machine level to determine target | ||
| 500 | file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) | ||
| 501 | string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS}) | ||
| 502 | |||
| 503 | # TODO: Separation to determine activation of VX/VXE/VXE2 | ||
| 504 | if (${S390X_M} MATCHES "8561|8562") | ||
| 505 | message(STATUS "z15 target") | ||
| 506 | list(APPEND ARCH_FLAGS -march=z15) | ||
| 507 | elseif (${S390X_M} MATCHES "3931") | ||
| 508 | message(STATUS "z16 target") | ||
| 509 | list(APPEND ARCH_FLAGS -march=z16) | ||
| 510 | elseif (${S390X_M} MATCHES "9175|9176") | ||
| 511 | # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. | ||
| 512 | # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. | ||
| 513 | message(STATUS "z17 target") | ||
| 514 | list(APPEND ARCH_FLAGS -march=arch15) | ||
| 515 | else() | ||
| 516 | message(STATUS "Unknown target") | ||
| 517 | message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") | ||
| 518 | list(APPEND ARCH_FLAGS -march=native -mtune=native) | ||
| 519 | endif() | ||
| 520 | # for cross-compilation | ||
| 521 | elseif(GGML_CPU_ALL_VARIANTS) | ||
| 522 | # range through IBM z15 to z17 | ||
| 523 | # NOTE: update when a new hardware level is released | ||
| 524 | foreach (ZHW RANGE 15 17) | ||
| 525 | if(DEFINED GGML_INTERNAL_Z${ZHW}) | ||
| 526 | message(STATUS "z${ZHW} cross-compile target") | ||
| 527 | list(APPEND ARCH_FLAGS -march=z${ZHW}) | ||
| 528 | endif() | ||
| 529 | endforeach() | ||
| 530 | endif() | ||
| 531 | |||
| 532 | if (GGML_VXE OR GGML_INTERNAL_VXE2) | ||
| 533 | message(STATUS "VXE2 enabled") | ||
| 534 | list(APPEND ARCH_FLAGS -mvx -mzvector) | ||
| 535 | list(APPEND ARCH_DEFINITIONS GGML_USE_VXE2) | ||
| 536 | endif() | ||
| 537 | |||
| 538 | if (GGML_INTERNAL_NNPA) | ||
| 539 | message(STATUS "NNPA enabled") | ||
| 540 | list(APPEND ARCH_DEFINITIONS GGML_USE_NNPA) | ||
| 541 | endif() | ||
| 542 | |||
| 543 | ggml_add_cpu_backend_features(${GGML_CPU_NAME} s390 ${ARCH_DEFINITIONS}) | ||
| 544 | elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm") | ||
| 545 | message(STATUS "Wasm detected") | ||
| 546 | list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c) | ||
| 547 | else() | ||
| 548 | message(WARNING "Unknown CPU architecture. Falling back to generic implementations.") | ||
| 549 | list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC) | ||
| 550 | endif() | ||
| 551 | |||
| 552 | if (GGML_CPU_REPACK) | ||
| 553 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK) | ||
| 554 | endif() | ||
| 555 | |||
| 556 | if (GGML_CPU_KLEIDIAI) | ||
| 557 | message(STATUS "Using KleidiAI optimized kernels if applicable") | ||
| 558 | |||
| 559 | # Disable the KleidiAI tests | ||
| 560 | set(KLEIDIAI_BUILD_TESTS OFF) | ||
| 561 | |||
| 562 | # Fetch KleidiAI sources: | ||
| 563 | include(FetchContent) | ||
| 564 | set(KLEIDIAI_COMMIT_TAG "v1.16.0") | ||
| 565 | set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz") | ||
| 566 | set(KLEIDIAI_ARCHIVE_MD5 "0a9e9008adb6031f9e8cf70dff4a3321") | ||
| 567 | |||
| 568 | if (POLICY CMP0135) | ||
| 569 | cmake_policy(SET CMP0135 NEW) | ||
| 570 | endif() | ||
| 571 | |||
| 572 | FetchContent_Declare(KleidiAI_Download | ||
| 573 | URL ${KLEIDIAI_DOWNLOAD_URL} | ||
| 574 | DOWNLOAD_EXTRACT_TIMESTAMP NEW | ||
| 575 | URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5}) | ||
| 576 | |||
| 577 | FetchContent_MakeAvailable(KleidiAI_Download) | ||
| 578 | FetchContent_GetProperties(KleidiAI_Download | ||
| 579 | SOURCE_DIR KLEIDIAI_SRC | ||
| 580 | POPULATED KLEIDIAI_POPULATED) | ||
| 581 | |||
| 582 | if (NOT KLEIDIAI_POPULATED) | ||
| 583 | message(FATAL_ERROR "KleidiAI source downloaded failed.") | ||
| 584 | endif() | ||
| 585 | |||
| 586 | add_compile_definitions(GGML_USE_CPU_KLEIDIAI) | ||
| 587 | |||
| 588 | # Remove kleidiai target after fetching it | ||
| 589 | if (TARGET kleidiai) | ||
| 590 | set_target_properties(kleidiai PROPERTIES EXCLUDE_FROM_ALL TRUE) | ||
| 591 | endif() | ||
| 592 | |||
| 593 | list(APPEND GGML_CPU_SOURCES | ||
| 594 | ggml-cpu/kleidiai/kleidiai.cpp | ||
| 595 | ggml-cpu/kleidiai/kernels.cpp | ||
| 596 | ggml-cpu/kleidiai/kleidiai.h | ||
| 597 | ggml-cpu/kleidiai/kernels.h | ||
| 598 | ) | ||
| 599 | |||
| 600 | # KleidiAI | ||
| 601 | include_directories( | ||
| 602 | ${KLEIDIAI_SRC}/ | ||
| 603 | ${KLEIDIAI_SRC}/kai/ | ||
| 604 | ${KLEIDIAI_SRC}/kai/ukernels/ | ||
| 605 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/ | ||
| 606 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/ | ||
| 607 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/ | ||
| 608 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/ | ||
| 609 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/) | ||
| 610 | |||
| 611 | set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}") | ||
| 612 | if (NOT ARCH_FLAGS_TEMP) | ||
| 613 | string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}") | ||
| 614 | endif() | ||
| 615 | string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED) | ||
| 616 | string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED) | ||
| 617 | string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED) | ||
| 618 | string(FIND "${ARCH_FLAGS_TEMP}" "+sve" SVE_ENABLED) | ||
| 619 | |||
| 620 | set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP}) | ||
| 621 | |||
| 622 | list(APPEND GGML_KLEIDIAI_SOURCES | ||
| 623 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c | ||
| 624 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c | ||
| 625 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c | ||
| 626 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c | ||
| 627 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c | ||
| 628 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c | ||
| 629 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c) | ||
| 630 | |||
| 631 | if (NOT DOTPROD_ENABLED MATCHES -1) | ||
| 632 | list(APPEND GGML_KLEIDIAI_SOURCES | ||
| 633 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | ||
| 634 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c | ||
| 635 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c | ||
| 636 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c | ||
| 637 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c | ||
| 638 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c) | ||
| 639 | endif() | ||
| 640 | |||
| 641 | if (NOT I8MM_ENABLED MATCHES -1) | ||
| 642 | list(APPEND GGML_KLEIDIAI_SOURCES | ||
| 643 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c | ||
| 644 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c) | ||
| 645 | endif() | ||
| 646 | |||
| 647 | if (NOT SME_ENABLED MATCHES -1) | ||
| 648 | list(APPEND GGML_KLEIDIAI_SOURCES | ||
| 649 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c | ||
| 650 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c | ||
| 651 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c | ||
| 652 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S | ||
| 653 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c | ||
| 654 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S | ||
| 655 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c | ||
| 656 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S | ||
| 657 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c | ||
| 658 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c | ||
| 659 | ${KLEIDIAI_SRC}/kai/kai_common_sme_asm.S) | ||
| 660 | set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2") | ||
| 661 | endif() | ||
| 662 | |||
| 663 | if (NOT SVE_ENABLED MATCHES -1) | ||
| 664 | list(APPEND GGML_KLEIDIAI_SOURCES | ||
| 665 | ${KLEIDIAI_SRC}/kai/kai_common_sve_asm.S | ||
| 666 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod_asm.S | ||
| 667 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p8x8_1x8_sve_dotprod.c | ||
| 668 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm_asm.S | ||
| 669 | ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p8x8_16x8_sve_i8mm.c) | ||
| 670 | endif() | ||
| 671 | |||
| 672 | set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}") | ||
| 673 | list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES}) | ||
| 674 | endif() | ||
| 675 | |||
| 676 | message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}") | ||
| 677 | target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES}) | ||
| 678 | target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS}) | ||
| 679 | target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS}) | ||
| 680 | |||
| 681 | if (EMSCRIPTEN) | ||
| 682 | set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128") | ||
| 683 | endif() | ||
| 684 | |||
| 685 | if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") | ||
| 686 | # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math" | ||
| 687 | target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math") | ||
| 688 | endif() | ||
| 689 | endfunction() | ||
