diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt b/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt new file mode 100644 index 0000000..b6094fb --- /dev/null +++ b/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt @@ -0,0 +1,146 @@ +find_package(OpenCL REQUIRED) +find_package(Python3 REQUIRED) + +set(TARGET_NAME ggml-opencl) + +ggml_add_backend_library(${TARGET_NAME} + ggml-opencl.cpp + ../../include/ggml-opencl.h) +target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES}) +target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS}) + +if (GGML_OPENCL_PROFILING) + message(STATUS "OpenCL profiling enabled (increases CPU overhead)") + add_compile_definitions(GGML_OPENCL_PROFILING) +endif () + +add_compile_definitions(GGML_OPENCL_SOA_Q) +add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION}) + +if (GGML_OPENCL_USE_ADRENO_KERNELS) + message(STATUS "OpenCL will use matmul kernels optimized for Adreno") + add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS) +endif () + +if (GGML_OPENCL_EMBED_KERNELS) + add_compile_definitions(GGML_OPENCL_EMBED_KERNELS) + + set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/autogenerated") + + target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated") +endif () + +function(ggml_opencl_add_kernel KNAME) + set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h) + set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl) + + if (GGML_OPENCL_EMBED_KERNELS) + message(STATUS "opencl: embedding kernel ${KNAME}") + + # Python must be accessible from command line + add_custom_command( + OUTPUT ${KERN_HDR} + COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR} + DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT} + COMMENT "Generate ${KERN_HDR}" + ) + + target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR}) + else () + message(STATUS "opencl: adding kernel ${KNAME}") + configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY) + endif () +endfunction() + +set(GGML_OPENCL_KERNELS + add + add_id + argsort + tri + fill + clamp + cpy + cvt + diag_mask_inf + div + gelu + gemv_noshuffle_general + gemv_noshuffle + get_rows + glu + group_norm + solve_tri + im2col_f32 + im2col_f16 + mean + mul_mat_Ab_Bi_8x4 + mul_mv_f16_f16 + mul_mv_f16_f32_1row + mul_mv_f16_f32_l4 + mul_mv_f16_f32 + mul_mv_f32_f32 + mul_mv_q4_0_f32 + mul_mv_q4_0_f32_v + mul_mv_q4_0_f32_8x_flat + mul_mv_q4_0_f32_1d_8x_flat + mul_mv_q4_0_f32_1d_16x_flat + mul_mv_q4_k_f32 + mul_mv_q6_k_f32 + mul_mv_q6_k_f32_flat + mul_mv_q8_0_f32 + mul_mv_q8_0_f32_flat + mul_mv_mxfp4_f32 + mul_mv_mxfp4_f32_flat + mul_mv_id_q4_0_f32_8x_flat + mul_mv_id_q8_0_f32 + mul_mv_id_q8_0_f32_flat + mul_mv_id_mxfp4_f32 + mul_mv_id_mxfp4_f32_flat + gemm_moe_mxfp4_f32 + gemv_moe_mxfp4_f32 + mul_mm_f32_f32_l4_lm + mul_mm_f16_f32_l4_lm + mul_mm_q8_0_f32_l4_lm + mul_mm_q6_k_f32_l4_lm + mul_mm_q8_0_f32_8x4 + gemv_noshuffle_general_q8_0_f32 + mul + norm + relu + rms_norm + rope + scale + set_rows + sigmoid + silu + softmax_4_f32 + softmax_4_f16 + softmax_f32 + softmax_f16 + sqr + sqrt + ssm_conv + sub + sum_rows + transpose + concat + tsembd + upscale + tanh + expm1 + softplus + pad + repeat + mul_mat_f16_f32 + mul_mm_f16_f32_kq_kqv + conv2d + conv2d_f16_f32 + flash_attn_f32_f16 + flash_attn_f16 + flash_attn_f32 +) + +foreach (K ${GGML_OPENCL_KERNELS}) + ggml_opencl_add_kernel(${K}) +endforeach() |
