1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 2
 3//------------------------------------------------------------------------------
 4// add_id
 5//------------------------------------------------------------------------------
 6kernel void kernel_add_id(
 7    global char * src0,
 8    ulong         offset0,
 9    global char * src1,
10    ulong         offset1,
11    global char * src2,
12    ulong         offset2,
13    global char * dst,
14    ulong         offsetd,
15    ulong         nb01,
16    ulong         nb02,
17    ulong         nb11,
18    ulong         nb21,
19    int           ne0,
20    int           ne1
21) {
22    src0 = (global char*)((global char*)src0 + offset0);
23    src1 = (global char*)((global char*)src1 + offset1);
24    src2 = (global char*)((global char*)src2 + offset2);
25    dst  = (global char*)((global char*)dst  + offsetd);
26
27    int i1 = get_group_id(0);
28    int i2 = get_group_id(1);
29
30    const int i11 = *((global const int *) (src2 + i1*sizeof(int) + i2*nb21));
31
32    const size_t nb1 = ne0 * sizeof(float);
33    const size_t nb2 = ne1 * nb1;
34
35    global float * dst_row  = (global float *)((global char *)dst  + i1*nb1 + i2*nb2);
36    global float * src0_row = (global float *)((global char *)src0 + i1*nb01 + i2*nb02);
37    global float * src1_row = (global float *)((global char *)src1 + i11*nb11);
38
39    for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
40        dst_row[i0] = src0_row[i0] + src1_row[i0];
41    }
42}