1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 2
 3//------------------------------------------------------------------------------
 4// tri
 5//------------------------------------------------------------------------------
 6__kernel void kernel_tri_f32(
 7        global float * src0,
 8        ulong offset0,
 9        global float * dst,
10        ulong offsetd,
11        int n,
12        int ne0,
13        int ne1,
14        int tri_type
15) {
16    src0 = (global float*)((global char*)src0 + offset0);
17    dst = (global float*)((global char*)dst + offsetd);
18
19    int idx = get_global_id(0);
20    if (idx >= n) return;
21
22    int i0 = idx % ne0;
23    int i1 = (idx / ne0) % ne1;
24
25    int keep = 0;
26    if (tri_type == 0) keep = (i0 >= i1);
27    else if (tri_type == 1) keep = (i0 >  i1);
28    else if (tri_type == 2) keep = (i0 <= i1);
29    else                    keep = (i0 <  i1);
30
31    dst[idx] = keep ? src0[idx] : 0.0f;
32}