1find_package(OpenCL REQUIRED)
  2find_package(Python3 REQUIRED)
  3
  4set(TARGET_NAME ggml-opencl)
  5
  6ggml_add_backend_library(${TARGET_NAME}
  7                         ggml-opencl.cpp
  8                         ../../include/ggml-opencl.h)
  9target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES})
 10target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS})
 11
 12if (GGML_OPENCL_PROFILING)
 13    message(STATUS "OpenCL profiling enabled (increases CPU overhead)")
 14    add_compile_definitions(GGML_OPENCL_PROFILING)
 15endif ()
 16
 17add_compile_definitions(GGML_OPENCL_SOA_Q)
 18add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
 19
 20if (GGML_OPENCL_USE_ADRENO_KERNELS)
 21    message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
 22    add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
 23endif ()
 24
 25if (GGML_OPENCL_EMBED_KERNELS)
 26    add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)
 27
 28    set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
 29    file(MAKE_DIRECTORY     "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
 30
 31    target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
 32endif ()
 33
 34function(ggml_opencl_add_kernel KNAME)
 35    set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h)
 36    set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl)
 37
 38    if (GGML_OPENCL_EMBED_KERNELS)
 39        message(STATUS "opencl: embedding kernel ${KNAME}")
 40
 41        # Python must be accessible from command line
 42        add_custom_command(
 43            OUTPUT ${KERN_HDR}
 44            COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR}
 45            DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT}
 46            COMMENT "Generate ${KERN_HDR}"
 47        )
 48
 49        target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR})
 50    else ()
 51        message(STATUS "opencl: adding kernel ${KNAME}")
 52        configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY)
 53    endif ()
 54endfunction()
 55
 56set(GGML_OPENCL_KERNELS
 57    add
 58    add_id
 59    argsort
 60    tri
 61    fill
 62    clamp
 63    cpy
 64    cvt
 65    diag_mask_inf
 66    div
 67    gelu
 68    gemv_noshuffle_general
 69    gemv_noshuffle
 70    get_rows
 71    glu
 72    group_norm
 73    solve_tri
 74    im2col_f32
 75    im2col_f16
 76    mean
 77    mul_mat_Ab_Bi_8x4
 78    mul_mv_f16_f16
 79    mul_mv_f16_f32_1row
 80    mul_mv_f16_f32_l4
 81    mul_mv_f16_f32
 82    mul_mv_f32_f32
 83    mul_mv_q4_0_f32
 84    mul_mv_q4_0_f32_v
 85    mul_mv_q4_0_f32_8x_flat
 86    mul_mv_q4_0_f32_1d_8x_flat
 87    mul_mv_q4_0_f32_1d_16x_flat
 88    mul_mv_q4_k_f32
 89    mul_mv_q6_k_f32
 90    mul_mv_q6_k_f32_flat
 91    mul_mv_q8_0_f32
 92    mul_mv_q8_0_f32_flat
 93    mul_mv_mxfp4_f32
 94    mul_mv_mxfp4_f32_flat
 95    mul_mv_id_q4_0_f32_8x_flat
 96    mul_mv_id_q8_0_f32
 97    mul_mv_id_q8_0_f32_flat
 98    mul_mv_id_mxfp4_f32
 99    mul_mv_id_mxfp4_f32_flat
100    gemm_moe_mxfp4_f32
101    gemv_moe_mxfp4_f32
102    mul_mm_f32_f32_l4_lm
103    mul_mm_f16_f32_l4_lm
104    mul_mm_q8_0_f32_l4_lm
105    mul_mm_q6_k_f32_l4_lm
106    mul_mm_q8_0_f32_8x4
107    gemv_noshuffle_general_q8_0_f32
108    mul
109    norm
110    relu
111    rms_norm
112    rope
113    scale
114    set_rows
115    sigmoid
116    silu
117    softmax_4_f32
118    softmax_4_f16
119    softmax_f32
120    softmax_f16
121    sqr
122    sqrt
123    ssm_conv
124    sub
125    sum_rows
126    transpose
127    concat
128    tsembd
129    upscale
130    tanh
131    expm1
132    softplus
133    pad
134    repeat
135    mul_mat_f16_f32
136    mul_mm_f16_f32_kq_kqv
137    conv2d
138    conv2d_f16_f32
139    flash_attn_f32_f16
140    flash_attn_f16
141    flash_attn_f32
142)
143
144foreach (K ${GGML_OPENCL_KERNELS})
145    ggml_opencl_add_kernel(${K})
146endforeach()