summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt
blob: b6094fb68b0a322e71de577d9214b0e8c25aa7ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
find_package(OpenCL REQUIRED)
find_package(Python3 REQUIRED)

set(TARGET_NAME ggml-opencl)

ggml_add_backend_library(${TARGET_NAME}
                         ggml-opencl.cpp
                         ../../include/ggml-opencl.h)
target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES})
target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS})

if (GGML_OPENCL_PROFILING)
    message(STATUS "OpenCL profiling enabled (increases CPU overhead)")
    add_compile_definitions(GGML_OPENCL_PROFILING)
endif ()

add_compile_definitions(GGML_OPENCL_SOA_Q)
add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})

if (GGML_OPENCL_USE_ADRENO_KERNELS)
    message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
    add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
endif ()

if (GGML_OPENCL_EMBED_KERNELS)
    add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)

    set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
    file(MAKE_DIRECTORY     "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")

    target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
endif ()

function(ggml_opencl_add_kernel KNAME)
    set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h)
    set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl)

    if (GGML_OPENCL_EMBED_KERNELS)
        message(STATUS "opencl: embedding kernel ${KNAME}")

        # Python must be accessible from command line
        add_custom_command(
            OUTPUT ${KERN_HDR}
            COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR}
            DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT}
            COMMENT "Generate ${KERN_HDR}"
        )

        target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR})
    else ()
        message(STATUS "opencl: adding kernel ${KNAME}")
        configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY)
    endif ()
endfunction()

set(GGML_OPENCL_KERNELS
    add
    add_id
    argsort
    tri
    fill
    clamp
    cpy
    cvt
    diag_mask_inf
    div
    gelu
    gemv_noshuffle_general
    gemv_noshuffle
    get_rows
    glu
    group_norm
    solve_tri
    im2col_f32
    im2col_f16
    mean
    mul_mat_Ab_Bi_8x4
    mul_mv_f16_f16
    mul_mv_f16_f32_1row
    mul_mv_f16_f32_l4
    mul_mv_f16_f32
    mul_mv_f32_f32
    mul_mv_q4_0_f32
    mul_mv_q4_0_f32_v
    mul_mv_q4_0_f32_8x_flat
    mul_mv_q4_0_f32_1d_8x_flat
    mul_mv_q4_0_f32_1d_16x_flat
    mul_mv_q4_k_f32
    mul_mv_q6_k_f32
    mul_mv_q6_k_f32_flat
    mul_mv_q8_0_f32
    mul_mv_q8_0_f32_flat
    mul_mv_mxfp4_f32
    mul_mv_mxfp4_f32_flat
    mul_mv_id_q4_0_f32_8x_flat
    mul_mv_id_q8_0_f32
    mul_mv_id_q8_0_f32_flat
    mul_mv_id_mxfp4_f32
    mul_mv_id_mxfp4_f32_flat
    gemm_moe_mxfp4_f32
    gemv_moe_mxfp4_f32
    mul_mm_f32_f32_l4_lm
    mul_mm_f16_f32_l4_lm
    mul_mm_q8_0_f32_l4_lm
    mul_mm_q6_k_f32_l4_lm
    mul_mm_q8_0_f32_8x4
    gemv_noshuffle_general_q8_0_f32
    mul
    norm
    relu
    rms_norm
    rope
    scale
    set_rows
    sigmoid
    silu
    softmax_4_f32
    softmax_4_f16
    softmax_f32
    softmax_f16
    sqr
    sqrt
    ssm_conv
    sub
    sum_rows
    transpose
    concat
    tsembd
    upscale
    tanh
    expm1
    softplus
    pad
    repeat
    mul_mat_f16_f32
    mul_mm_f16_f32_kq_kqv
    conv2d
    conv2d_f16_f32
    flash_attn_f32_f16
    flash_attn_f16
    flash_attn_f32
)

foreach (K ${GGML_OPENCL_KERNELS})
    ggml_opencl_add_kernel(${K})
endforeach()