summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl')
-rw-r--r--llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl82
1 files changed, 82 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl
new file mode 100644
index 0000000..126298a
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-opencl/kernels/expm1.cl
@@ -0,0 +1,82 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+//------------------------------------------------------------------------------
+// expm1
+//------------------------------------------------------------------------------
+kernel void kernel_expm1_f32_nd(
+ global void * p_src0_base,
+ ulong off_src0_abs,
+ global void * p_dst_base,
+ ulong off_dst_abs,
+ int ne00,
+ int ne01,
+ int ne02,
+ int ne03,
+ ulong nb00,
+ ulong nb01,
+ ulong nb02,
+ ulong nb03,
+ int ne10,
+ int ne11,
+ int ne12,
+ int ne13,
+ ulong nb10,
+ ulong nb11,
+ ulong nb12,
+ ulong nb13
+) {
+ int i0 = get_global_id(0);
+ int i1 = get_global_id(1);
+ int i2 = get_global_id(2);
+
+ if (i0 < ne10 && i1 < ne11 && i2 < ne12) {
+ for (int i3 = 0; i3 < ne13; ++i3) {
+ ulong src_offset_in_tensor = (ulong)i0*nb00 + (ulong)i1*nb01 + (ulong)i2*nb02 + (ulong)i3*nb03;
+ global const float *src_val_ptr = (global const float *)((global char *)p_src0_base + off_src0_abs + src_offset_in_tensor);
+
+ ulong dst_offset_in_tensor = (ulong)i0*nb10 + (ulong)i1*nb11 + (ulong)i2*nb12 + (ulong)i3*nb13;
+ global float *dst_val_ptr = (global float *)((global char *)p_dst_base + off_dst_abs + dst_offset_in_tensor);
+
+ *dst_val_ptr = exp(*src_val_ptr) - 1;
+ }
+ }
+}
+
+kernel void kernel_expm1_f16_nd(
+ global void * p_src0_base,
+ ulong off_src0_abs,
+ global void * p_dst_base,
+ ulong off_dst_abs,
+ int ne00,
+ int ne01,
+ int ne02,
+ int ne03,
+ ulong nb00,
+ ulong nb01,
+ ulong nb02,
+ ulong nb03,
+ int ne10,
+ int ne11,
+ int ne12,
+ int ne13,
+ ulong nb10,
+ ulong nb11,
+ ulong nb12,
+ ulong nb13
+) {
+ int i0 = get_global_id(0);
+ int i1 = get_global_id(1);
+ int i2 = get_global_id(2);
+
+ if (i0 < ne10 && i1 < ne11 && i2 < ne12) {
+ for (int i3 = 0; i3 < ne13; ++i3) {
+ ulong src_offset_in_tensor = (ulong)i0*nb00 + (ulong)i1*nb01 + (ulong)i2*nb02 + (ulong)i3*nb03;
+ global const half *src_val_ptr = (global const half *)((global char *)p_src0_base + off_src0_abs + src_offset_in_tensor);
+
+ ulong dst_offset_in_tensor = (ulong)i0*nb10 + (ulong)i1*nb11 + (ulong)i2*nb12 + (ulong)i3*nb13;
+ global half *dst_val_ptr = (global half *)((global char *)p_dst_base + off_dst_abs + dst_offset_in_tensor);
+
+ *dst_val_ptr = exp(*src_val_ptr) - 1;
+ }
+ }
+}