1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
3//------------------------------------------------------------------------------
4// tri
5//------------------------------------------------------------------------------
6__kernel void kernel_tri_f32(
7 global float * src0,
8 ulong offset0,
9 global float * dst,
10 ulong offsetd,
11 int n,
12 int ne0,
13 int ne1,
14 int tri_type
15) {
16 src0 = (global float*)((global char*)src0 + offset0);
17 dst = (global float*)((global char*)dst + offsetd);
18
19 int idx = get_global_id(0);
20 if (idx >= n) return;
21
22 int i0 = idx % ne0;
23 int i1 = (idx / ne0) % ne1;
24
25 int keep = 0;
26 if (tri_type == 0) keep = (i0 >= i1);
27 else if (tri_type == 1) keep = (i0 > i1);
28 else if (tri_type == 2) keep = (i0 <= i1);
29 else keep = (i0 < i1);
30
31 dst[idx] = keep ? src0[idx] : 0.0f;
32}