summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl')
-rw-r--r--llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl57
1 files changed, 57 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl
new file mode 100644
index 0000000..cf6cdaa
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl
@@ -0,0 +1,57 @@
1#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
3kernel void kernel_im2col_f16(
4 global float * src1,
5 ulong offset1,
6 global half * dst,
7 ulong offsetd,
8 ulong batch_offset,
9 ulong delta_offset,
10 long IW,
11 long IH,
12 long IC,
13 long OW,
14 long OH,
15 long KW,
16 long KH,
17 long pelements,
18 long CHW,
19 int s0,
20 int s1,
21 int p0,
22 int p1,
23 int d0,
24 int d1
25) {
26 long i = get_global_id(0);
27 if (i >= pelements) {
28 return;
29 }
30
31 src1 = (global float*)((global char*)src1 + offset1);
32 dst = (global half*)((global char*)dst + offsetd);
33
34 long ksize = OW * KH;
35 long kx = i / ksize;
36 long kd = kx * ksize;
37 long ky = (i - kd) / OW;
38 long ix = i % OW;
39
40 long oh = get_group_id(1);
41 long batch = get_group_id(2) / IC;
42 long ic = get_group_id(2) % IC;
43
44 long iiw = ix * s0 + kx * d0 - p0;
45 long iih = oh * s1 + ky * d1 - p1;
46
47 long offset_dst =
48 ((batch * OH + oh) * OW + ix) * CHW +
49 (ic * (KW * KH) + ky * KW + kx);
50
51 if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
52 dst[offset_dst] = 0.0f;
53 } else {
54 long offset_src = ic * delta_offset + batch * batch_offset;
55 dst[offset_dst] = src1[offset_src + iih * IW + iiw];
56 }
57}