diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl b/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl new file mode 100644 index 0000000..cf6cdaa --- /dev/null +++ b/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | #pragma OPENCL EXTENSION cl_khr_fp16 : enable | ||
| 2 | |||
| 3 | kernel void kernel_im2col_f16( | ||
| 4 | global float * src1, | ||
| 5 | ulong offset1, | ||
| 6 | global half * dst, | ||
| 7 | ulong offsetd, | ||
| 8 | ulong batch_offset, | ||
| 9 | ulong delta_offset, | ||
| 10 | long IW, | ||
| 11 | long IH, | ||
| 12 | long IC, | ||
| 13 | long OW, | ||
| 14 | long OH, | ||
| 15 | long KW, | ||
| 16 | long KH, | ||
| 17 | long pelements, | ||
| 18 | long CHW, | ||
| 19 | int s0, | ||
| 20 | int s1, | ||
| 21 | int p0, | ||
| 22 | int p1, | ||
| 23 | int d0, | ||
| 24 | int d1 | ||
| 25 | ) { | ||
| 26 | long i = get_global_id(0); | ||
| 27 | if (i >= pelements) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | |||
| 31 | src1 = (global float*)((global char*)src1 + offset1); | ||
| 32 | dst = (global half*)((global char*)dst + offsetd); | ||
| 33 | |||
| 34 | long ksize = OW * KH; | ||
| 35 | long kx = i / ksize; | ||
| 36 | long kd = kx * ksize; | ||
| 37 | long ky = (i - kd) / OW; | ||
| 38 | long ix = i % OW; | ||
| 39 | |||
| 40 | long oh = get_group_id(1); | ||
| 41 | long batch = get_group_id(2) / IC; | ||
| 42 | long ic = get_group_id(2) % IC; | ||
| 43 | |||
| 44 | long iiw = ix * s0 + kx * d0 - p0; | ||
| 45 | long iih = oh * s1 + ky * d1 - p1; | ||
| 46 | |||
| 47 | long offset_dst = | ||
| 48 | ((batch * OH + oh) * OW + ix) * CHW + | ||
| 49 | (ic * (KW * KH) + ky * KW + kx); | ||
| 50 | |||
| 51 | if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { | ||
| 52 | dst[offset_dst] = 0.0f; | ||
| 53 | } else { | ||
| 54 | long offset_src = ic * delta_offset + batch * batch_offset; | ||
| 55 | dst[offset_dst] = src1[offset_src + iih * IW + iiw]; | ||
| 56 | } | ||
| 57 | } | ||
