1#version 450
2
3#include "types.glsl"
4
5#extension GL_EXT_shader_16bit_storage : require
6
7layout(push_constant) uniform parameter {
8 uint IW; uint IH;
9 uint OW; uint OH;
10 uint OC;
11 uint pelements;
12 uint op;
13 int k0; int k1;
14 int s0; int s1;
15 int p0; int p1;
16} p;
17
18#define BLOCK_SIZE 512
19#define FLT_MAX 3.402823466e+38F
20#define OP_POOL_MAX 0u
21#define OP_POOL_AVG 1u
22
23layout (local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;
24
25layout(binding = 0) readonly buffer X {A_TYPE data_a[];};
26layout(binding = 1) writeonly buffer D {D_TYPE data_d[];};
27
28void main() {
29 const uint idx = gl_GlobalInvocationID.x;
30 if (idx >= p.pelements) {
31 return;
32 }
33
34 const uint O_HW = p.OW * p.OH;
35
36 const uint nc = idx / O_HW;
37 const uint cur_oh = (idx % O_HW) / p.OW;
38 const uint cur_ow = (idx % O_HW) % p.OW;
39
40 const int start_h = int(cur_oh) * p.s0 - p.p0;
41 const uint bh = max(start_h, 0);
42 const uint eh = min(start_h + p.k0, p.IH);
43
44 const int start_w = int(cur_ow) * p.s1 - p.p1;
45 const uint bw = max(start_w, 0);
46 const uint ew = min(start_w + p.k1, p.IW);
47
48 const float scale = 1.0 / float(p.k0 * p.k1);
49 float res;
50
51 if (p.op == OP_POOL_AVG) {
52 res = 0.0;
53 } else if (p.op == OP_POOL_MAX) {
54 res = -FLT_MAX;
55 } else {
56 return;
57 }
58
59 #pragma unroll
60 for (uint i = bh; i < eh; i++) {
61 #pragma unroll
62 for (uint j = bw; j < ew; j++) {
63 const float cur = D_TYPE(data_a[nc * p.IH * p.IW + i * p.IW + j]);
64
65 if (p.op == OP_POOL_AVG) {
66 res += cur * scale;
67 } else if (p.op == OP_POOL_MAX) {
68 res = max(res, cur);
69 }
70 }
71 }
72
73 data_d[nc * O_HW + cur_oh * p.OW + cur_ow] = res;
74}