1#version 450
 2
 3#include "types.glsl"
 4
 5#extension GL_EXT_shader_16bit_storage : require
 6
 7layout(push_constant) uniform parameter {
 8    uint IW; uint IH;
 9    uint OW; uint OH;
10    uint OC;
11    uint pelements;
12    uint op;
13    int k0; int k1;
14    int s0; int s1;
15    int p0; int p1;
16} p;
17
18#define BLOCK_SIZE 512
19#define FLT_MAX 3.402823466e+38F
20#define OP_POOL_MAX 0u
21#define OP_POOL_AVG 1u
22
23layout (local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in;
24
25layout(binding = 0) readonly buffer X {A_TYPE data_a[];};
26layout(binding = 1) writeonly buffer D {D_TYPE data_d[];};
27
28void main() {
29    const uint idx = gl_GlobalInvocationID.x;
30    if (idx >= p.pelements) {
31        return;
32    }
33
34    const uint O_HW = p.OW * p.OH;
35
36    const uint nc = idx / O_HW;
37    const uint cur_oh = (idx % O_HW) / p.OW;
38    const uint cur_ow = (idx % O_HW) % p.OW;
39
40    const int start_h = int(cur_oh) * p.s0 - p.p0;
41    const uint bh = max(start_h, 0);
42    const uint eh = min(start_h + p.k0, p.IH);
43
44    const int start_w = int(cur_ow) * p.s1 - p.p1;
45    const uint bw = max(start_w, 0);
46    const uint ew = min(start_w + p.k1, p.IW);
47
48    const float scale = 1.0 / float(p.k0 * p.k1);
49    float res;
50
51    if (p.op == OP_POOL_AVG) {
52        res = 0.0;
53    } else if (p.op == OP_POOL_MAX) {
54        res = -FLT_MAX;
55    } else {
56        return;
57    }
58
59    #pragma unroll
60    for (uint i = bh; i < eh; i++) {
61        #pragma unroll
62        for (uint j = bw; j < ew; j++) {
63            const float cur = D_TYPE(data_a[nc * p.IH * p.IW + i * p.IW + j]);
64
65            if (p.op == OP_POOL_AVG) {
66                res += cur * scale;
67            } else if (p.op == OP_POOL_MAX) {
68                res = max(res, cur);
69            }
70        }
71    }
72
73    data_d[nc * O_HW + cur_oh * p.OW + cur_ow] = res;
74}