diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl new file mode 100644 index 0000000..53951a5 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl @@ -0,0 +1,38 @@ +kernel void kernel_repeat_f32( + global const char * src0, + ulong offset0, + global char * dst, + ulong offsetd, + int ne00, + int ne01, + int ne02, + int ne03, + ulong nb00, + ulong nb01, + ulong nb02, + ulong nb03, + int ne0, + ulong nb0, + ulong nb1, + ulong nb2, + ulong nb3 +) { + src0 = src0 + offset0; + dst = dst + offsetd; + + const int i3 = get_group_id(2); + const int i2 = get_group_id(1); + const int i1 = get_group_id(0); + + const int i03 = i3%ne03; + const int i02 = i2%ne02; + const int i01 = i1%ne01; + + global const char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01; + global char * dst_ptr = dst + i3*nb3 + i2*nb2 + i1*nb1; + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const int i00 = i0%ne00; + *((global float *)(dst_ptr + i0*nb0)) = *((global float *)(src0_ptr + i00*nb00)); + } +} |
