1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 2
 3kernel void kernel_sqrt_cont_f32(
 4    global float * src0,
 5    ulong          offset0,
 6    global float * dst,
 7    ulong          offsetd
 8) {
 9    src0 = (global float*)((global char*)src0 + offset0);
10    dst  = (global float*)((global char*)dst + offsetd);
11
12    uint gid = get_global_id(0);
13    dst[gid] = sqrt(src0[gid]);
14}
15
16kernel void kernel_sqrt_cont_f32_4(
17    global float4 * src0,
18    ulong           offset0,
19    global float4 * dst,
20    ulong           offsetd
21) {
22    src0 = (global float4*)((global char*)src0 + offset0);
23    dst  = (global float4*)((global char*)dst + offsetd);
24
25    uint gid = get_global_id(0);
26    dst[gid] = sqrt(src0[gid]);
27}
28
29kernel void kernel_sqrt_cont_f16(
30    global half * src0,
31    ulong         offset0,
32    global half * dst,
33    ulong         offsetd
34) {
35    src0 = (global half*)((global char*)src0 + offset0);
36    dst  = (global half*)((global char*)dst + offsetd);
37
38    uint gid = get_global_id(0);
39    dst[gid] = convert_half(sqrt(convert_float(src0[gid])));
40}
41
42kernel void kernel_sqrt_cont_f16_4(
43    global half4 * src0,
44    ulong          offset0,
45    global half4 * dst,
46    ulong          offsetd
47) {
48    src0 = (global half4*)((global char*)src0 + offset0);
49    dst  = (global half4*)((global char*)dst + offsetd);
50
51    uint gid = get_global_id(0);
52    dst[gid] = convert_half4(sqrt(convert_float4(src0[gid])));
53}