1find_package(OpenCL REQUIRED)
2find_package(Python3 REQUIRED)
3
4set(TARGET_NAME ggml-opencl)
5
6ggml_add_backend_library(${TARGET_NAME}
7 ggml-opencl.cpp
8 ../../include/ggml-opencl.h)
9target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES})
10target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS})
11
12if (GGML_OPENCL_PROFILING)
13 message(STATUS "OpenCL profiling enabled (increases CPU overhead)")
14 add_compile_definitions(GGML_OPENCL_PROFILING)
15endif ()
16
17add_compile_definitions(GGML_OPENCL_SOA_Q)
18add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
19
20if (GGML_OPENCL_USE_ADRENO_KERNELS)
21 message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
22 add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
23endif ()
24
25if (GGML_OPENCL_EMBED_KERNELS)
26 add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)
27
28 set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
29 file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
30
31 target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
32endif ()
33
34function(ggml_opencl_add_kernel KNAME)
35 set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h)
36 set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl)
37
38 if (GGML_OPENCL_EMBED_KERNELS)
39 message(STATUS "opencl: embedding kernel ${KNAME}")
40
41 # Python must be accessible from command line
42 add_custom_command(
43 OUTPUT ${KERN_HDR}
44 COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR}
45 DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT}
46 COMMENT "Generate ${KERN_HDR}"
47 )
48
49 target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR})
50 else ()
51 message(STATUS "opencl: adding kernel ${KNAME}")
52 configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY)
53 endif ()
54endfunction()
55
56set(GGML_OPENCL_KERNELS
57 add
58 add_id
59 argsort
60 tri
61 fill
62 clamp
63 cpy
64 cvt
65 diag_mask_inf
66 div
67 gelu
68 gemv_noshuffle_general
69 gemv_noshuffle
70 get_rows
71 glu
72 group_norm
73 solve_tri
74 im2col_f32
75 im2col_f16
76 mean
77 mul_mat_Ab_Bi_8x4
78 mul_mv_f16_f16
79 mul_mv_f16_f32_1row
80 mul_mv_f16_f32_l4
81 mul_mv_f16_f32
82 mul_mv_f32_f32
83 mul_mv_q4_0_f32
84 mul_mv_q4_0_f32_v
85 mul_mv_q4_0_f32_8x_flat
86 mul_mv_q4_0_f32_1d_8x_flat
87 mul_mv_q4_0_f32_1d_16x_flat
88 mul_mv_q4_k_f32
89 mul_mv_q6_k_f32
90 mul_mv_q6_k_f32_flat
91 mul_mv_q8_0_f32
92 mul_mv_q8_0_f32_flat
93 mul_mv_mxfp4_f32
94 mul_mv_mxfp4_f32_flat
95 mul_mv_id_q4_0_f32_8x_flat
96 mul_mv_id_q8_0_f32
97 mul_mv_id_q8_0_f32_flat
98 mul_mv_id_mxfp4_f32
99 mul_mv_id_mxfp4_f32_flat
100 gemm_moe_mxfp4_f32
101 gemv_moe_mxfp4_f32
102 mul_mm_f32_f32_l4_lm
103 mul_mm_f16_f32_l4_lm
104 mul_mm_q8_0_f32_l4_lm
105 mul_mm_q6_k_f32_l4_lm
106 mul_mm_q8_0_f32_8x4
107 gemv_noshuffle_general_q8_0_f32
108 mul
109 norm
110 relu
111 rms_norm
112 rope
113 scale
114 set_rows
115 sigmoid
116 silu
117 softmax_4_f32
118 softmax_4_f16
119 softmax_f32
120 softmax_f16
121 sqr
122 sqrt
123 ssm_conv
124 sub
125 sum_rows
126 transpose
127 concat
128 tsembd
129 upscale
130 tanh
131 expm1
132 softplus
133 pad
134 repeat
135 mul_mat_f16_f32
136 mul_mm_f16_f32_kq_kqv
137 conv2d
138 conv2d_f16_f32
139 flash_attn_f32_f16
140 flash_attn_f16
141 flash_attn_f32
142)
143
144foreach (K ${GGML_OPENCL_KERNELS})
145 ggml_opencl_add_kernel(${K})
146endforeach()