1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
3kernel void kernel_sqrt_cont_f32(
4 global float * src0,
5 ulong offset0,
6 global float * dst,
7 ulong offsetd
8) {
9 src0 = (global float*)((global char*)src0 + offset0);
10 dst = (global float*)((global char*)dst + offsetd);
11
12 uint gid = get_global_id(0);
13 dst[gid] = sqrt(src0[gid]);
14}
15
16kernel void kernel_sqrt_cont_f32_4(
17 global float4 * src0,
18 ulong offset0,
19 global float4 * dst,
20 ulong offsetd
21) {
22 src0 = (global float4*)((global char*)src0 + offset0);
23 dst = (global float4*)((global char*)dst + offsetd);
24
25 uint gid = get_global_id(0);
26 dst[gid] = sqrt(src0[gid]);
27}
28
29kernel void kernel_sqrt_cont_f16(
30 global half * src0,
31 ulong offset0,
32 global half * dst,
33 ulong offsetd
34) {
35 src0 = (global half*)((global char*)src0 + offset0);
36 dst = (global half*)((global char*)dst + offsetd);
37
38 uint gid = get_global_id(0);
39 dst[gid] = convert_half(sqrt(convert_float(src0[gid])));
40}
41
42kernel void kernel_sqrt_cont_f16_4(
43 global half4 * src0,
44 ulong offset0,
45 global half4 * dst,
46 ulong offsetd
47) {
48 src0 = (global half4*)((global char*)src0 + offset0);
49 dst = (global half4*)((global char*)dst + offsetd);
50
51 uint gid = get_global_id(0);
52 dst[gid] = convert_half4(sqrt(convert_float4(src0[gid])));
53}