1kernel void kernel_repeat_f32(
2 global const char * src0,
3 ulong offset0,
4 global char * dst,
5 ulong offsetd,
6 int ne00,
7 int ne01,
8 int ne02,
9 int ne03,
10 ulong nb00,
11 ulong nb01,
12 ulong nb02,
13 ulong nb03,
14 int ne0,
15 ulong nb0,
16 ulong nb1,
17 ulong nb2,
18 ulong nb3
19) {
20 src0 = src0 + offset0;
21 dst = dst + offsetd;
22
23 const int i3 = get_group_id(2);
24 const int i2 = get_group_id(1);
25 const int i1 = get_group_id(0);
26
27 const int i03 = i3%ne03;
28 const int i02 = i2%ne02;
29 const int i01 = i1%ne01;
30
31 global const char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01;
32 global char * dst_ptr = dst + i3*nb3 + i2*nb2 + i1*nb1;
33
34 for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
35 const int i00 = i0%ne00;
36 *((global float *)(dst_ptr + i0*nb0)) = *((global float *)(src0_ptr + i00*nb00));
37 }
38}