summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl')
-rw-r--r--llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl109
1 files changed, 109 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl
new file mode 100644
index 0000000..2c4887a
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl
@@ -0,0 +1,109 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+kernel void kernel_tanh_f32(
+ global const float * src0,
+ ulong offset0,
+ global float * dst,
+ ulong offsetd
+) {
+ src0 = (global float*)((global char*)src0 + offset0);
+ dst = (global float*)((global char*)dst + offsetd);
+
+ dst[get_global_id(0)] = tanh(src0[get_global_id(0)]);
+}
+
+kernel void kernel_tanh_f32_4(
+ global const float4 * src0,
+ ulong offset0,
+ global float4 * dst,
+ ulong offsetd
+) {
+ src0 = (global float4*)((global char*)src0 + offset0);
+ dst = (global float4*)((global char*)dst + offsetd);
+
+ dst[get_global_id(0)] = tanh(src0[get_global_id(0)]);
+}
+
+kernel void kernel_tanh_f16(
+ global const half * src0,
+ ulong offset0,
+ global half * dst,
+ ulong offsetd
+) {
+ src0 = (global half*)((global char*)src0 + offset0);
+ dst = (global half*)((global char*)dst + offsetd);
+
+ dst[get_global_id(0)] = tanh(src0[get_global_id(0)]);
+}
+
+kernel void kernel_tanh_f16_4(
+ global const half4 * src0,
+ ulong offset0,
+ global half4 * dst,
+ ulong offsetd
+) {
+ src0 = (global half4*)((global char*)src0 + offset0);
+ dst = (global half4*)((global char*)dst + offsetd);
+
+ dst[get_global_id(0)] = tanh(src0[get_global_id(0)]);
+}
+
+kernel void kernel_tanh_f32_nc(
+ global const char * src0,
+ ulong offset0,
+ global char * dst,
+ ulong offsetd,
+ int ne00,
+ ulong nb00,
+ ulong nb01,
+ ulong nb02,
+ ulong nb03,
+ ulong nb0,
+ ulong nb1,
+ ulong nb2,
+ ulong nb3
+) {
+ src0 = src0 + offset0;
+ dst = dst + offsetd;
+
+ const int i3 = get_group_id(2);
+ const int i2 = get_group_id(1);
+ const int i1 = get_group_id(0);
+
+ for (int i0 = get_local_id(0); i0 < ne00; i0 += get_local_size(0)) {
+ global const float * x = (global const float *)(src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
+ global float * y = (global float *)(dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
+
+ *y = tanh(*x);
+ }
+}
+
+kernel void kernel_tanh_f16_nc(
+ global const char * src0,
+ ulong offset0,
+ global char * dst,
+ ulong offsetd,
+ int ne00,
+ ulong nb00,
+ ulong nb01,
+ ulong nb02,
+ ulong nb03,
+ ulong nb0,
+ ulong nb1,
+ ulong nb2,
+ ulong nb3
+) {
+ src0 = src0 + offset0;
+ dst = dst + offsetd;
+
+ const int i3 = get_group_id(2);
+ const int i2 = get_group_id(1);
+ const int i1 = get_group_id(0);
+
+ for (int i0 = get_local_id(0); i0 < ne00; i0 += get_local_size(0)) {
+ global const half * x = (global const half *)(src0 + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
+ global half * y = (global half *)(dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
+
+ *y = tanh(*x);
+ }
+}