1@group(0) @binding(0)
2#ifdef VEC4
3var<storage, read_write> src: array<vec4<f32>>;
4#define VEC_SIZE 4
5#else
6var<storage, read_write> src: array<f32>;
7#define VEC_SIZE 1
8#endif
9
10@group(0) @binding(1)
11var<storage, read_write> dst: array<i32>;
12
13struct Params {
14 offset_src: u32, // in elements
15 offset_dst: u32, // in elements
16 ne0: u32,
17};
18
19@group(0) @binding(2)
20var<uniform> params: Params;
21
22const FLOAT_MIN: f32 = -1.0e9;
23
24struct Pair {
25 value: f32,
26 index: i32
27};
28
29var<workgroup> shared_max: array<Pair, WG_SIZE>;
30
31@compute @workgroup_size(WG_SIZE)
32fn main(@builtin(workgroup_id) wid: vec3<u32>,
33 @builtin(local_invocation_id) lid: vec3<u32>) {
34 let row_idx = params.offset_src + wid.x * params.ne0;
35 var local_pair = Pair(FLOAT_MIN, -1);
36#ifdef VEC4
37 for (var col = lid.x; col < params.ne0/VEC_SIZE; col += WG_SIZE) {
38 let vec_val = src[row_idx / VEC_SIZE + col];
39 for (var v = 0u; v < VEC_SIZE; v++) {
40 let val = vec_val[v];
41 if (val >= local_pair.value) {
42 local_pair = Pair(val, i32(col * VEC_SIZE + v));
43 }
44 }
45 }
46#else
47 for (var col = lid.x; col < params.ne0; col += WG_SIZE) {
48 if (src[row_idx + col] >= local_pair.value) {
49 local_pair = Pair(src[row_idx + col], i32(col));
50 }
51 }
52#endif
53 shared_max[lid.x] = local_pair;
54 workgroupBarrier();
55 var offset: u32 = WG_SIZE >> 1;
56 while (offset > 0) {
57 if (lid.x < offset) {
58 let a = shared_max[lid.x];
59 let b = shared_max[lid.x + offset];
60 if (b.value > a.value) {
61 shared_max[lid.x] = b;
62 } else if (b.value == a.value && b.index > a.index) {
63 shared_max[lid.x] = b;
64 }
65 }
66 workgroupBarrier();
67 offset >>= 1;
68 }
69 if (lid.x == 0u) {
70 dst[params.offset_dst + wid.x] = shared_max[0].index;
71 }
72}