1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
3//------------------------------------------------------------------------------
4// add_id
5//------------------------------------------------------------------------------
6kernel void kernel_add_id(
7 global char * src0,
8 ulong offset0,
9 global char * src1,
10 ulong offset1,
11 global char * src2,
12 ulong offset2,
13 global char * dst,
14 ulong offsetd,
15 ulong nb01,
16 ulong nb02,
17 ulong nb11,
18 ulong nb21,
19 int ne0,
20 int ne1
21) {
22 src0 = (global char*)((global char*)src0 + offset0);
23 src1 = (global char*)((global char*)src1 + offset1);
24 src2 = (global char*)((global char*)src2 + offset2);
25 dst = (global char*)((global char*)dst + offsetd);
26
27 int i1 = get_group_id(0);
28 int i2 = get_group_id(1);
29
30 const int i11 = *((global const int *) (src2 + i1*sizeof(int) + i2*nb21));
31
32 const size_t nb1 = ne0 * sizeof(float);
33 const size_t nb2 = ne1 * nb1;
34
35 global float * dst_row = (global float *)((global char *)dst + i1*nb1 + i2*nb2);
36 global float * src0_row = (global float *)((global char *)src0 + i1*nb01 + i2*nb02);
37 global float * src1_row = (global float *)((global char *)src1 + i11*nb11);
38
39 for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
40 dst_row[i0] = src0_row[i0] + src1_row[i0];
41 }
42}