1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
3kernel void kernel_im2col_f32(
4 global float * src1,
5 ulong offset1,
6 global float * dst,
7 ulong offsetd,
8 ulong batch_offset,
9 ulong delta_offset,
10 long IW,
11 long IH,
12 long IC,
13 long OW,
14 long OH,
15 long KW,
16 long KH,
17 long pelements,
18 long CHW,
19 int s0,
20 int s1,
21 int p0,
22 int p1,
23 int d0,
24 int d1
25) {
26 long i = get_global_id(0);
27 if (i >= pelements) {
28 return;
29 }
30
31 src1 = (global float*)((global char*)src1 + offset1);
32 dst = (global float*)((global char*)dst + offsetd);
33
34 long ksize = OW * KH;
35 long kx = i / ksize;
36 long kd = kx * ksize;
37 long ky = (i - kd) / OW;
38 long ix = i % OW;
39
40 long oh = get_group_id(1);
41 long batch = get_group_id(2) / IC;
42 long ic = get_group_id(2) % IC;
43
44 long iiw = ix * s0 + kx * d0 - p0;
45 long iih = oh * s1 + ky * d1 - p1;
46
47 long offset_dst =
48 ((batch * OH + oh) * OW + ix) * CHW +
49 (ic * (KW * KH) + ky * KW + kx);
50
51 if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
52 dst[offset_dst] = 0.0f;
53 } else {
54 long offset_src = ic * delta_offset + batch * batch_offset;
55 dst[offset_dst] = src1[offset_src + iih * IW + iiw];
56 }
57}