summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl')
-rw-r--r--llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl38
1 files changed, 38 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl
new file mode 100644
index 0000000..53951a5
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl
@@ -0,0 +1,38 @@
+kernel void kernel_repeat_f32(
+ global const char * src0,
+ ulong offset0,
+ global char * dst,
+ ulong offsetd,
+ int ne00,
+ int ne01,
+ int ne02,
+ int ne03,
+ ulong nb00,
+ ulong nb01,
+ ulong nb02,
+ ulong nb03,
+ int ne0,
+ ulong nb0,
+ ulong nb1,
+ ulong nb2,
+ ulong nb3
+) {
+ src0 = src0 + offset0;
+ dst = dst + offsetd;
+
+ const int i3 = get_group_id(2);
+ const int i2 = get_group_id(1);
+ const int i1 = get_group_id(0);
+
+ const int i03 = i3%ne03;
+ const int i02 = i2%ne02;
+ const int i01 = i1%ne01;
+
+ global const char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01;
+ global char * dst_ptr = dst + i3*nb3 + i2*nb2 + i1*nb1;
+
+ for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
+ const int i00 = i0%ne00;
+ *((global float *)(dst_ptr + i0*nb0)) = *((global float *)(src0_ptr + i00*nb00));
+ }
+}