From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- .../ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl | 66 ++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl (limited to 'llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl') diff --git a/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl b/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl new file mode 100644 index 0000000..e622552 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-webgpu/wgsl-shaders/cumsum.wgsl @@ -0,0 +1,66 @@ +@group(0) @binding(0) +var src: array; + +@group(0) @binding(1) +var dst: array; + +struct Params { + offset_src: u32, // in elements + offset_dst: u32, // in elements + ne0: u32, +}; + +@group(0) @binding(2) +var params: Params; + +var shared_sum: array; + +@compute @workgroup_size(WG_SIZE) +fn main(@builtin(workgroup_id) wid: vec3, + @builtin(local_invocation_id) lid: vec3) { + let row_idx = params.offset_src + wid.x * params.ne0; + let elems = (params.ne0 + WG_SIZE - 1) / WG_SIZE; + var local_sum: f32 = 0.0; + for (var col = lid.x * elems; col < (lid.x + 1) * elems && col < params.ne0; col ++) { + local_sum += src[row_idx + col]; + } + shared_sum[lid.x] = local_sum; + workgroupBarrier(); + + // upsweep + var offset = 1u; + while (offset < WG_SIZE) { + let idx = (lid.x + 1) * offset * 2 - 1; + if (idx < WG_SIZE) { + shared_sum[idx] = shared_sum[idx] + shared_sum[idx - offset]; + } + workgroupBarrier(); + offset <<= 1; + } + + // set last to 0 for exclusive sum + if (lid.x == 0) { + shared_sum[WG_SIZE - 1] = 0.0; + } + workgroupBarrier(); + + // downsweep + offset = WG_SIZE >> 1; + while (offset > 0) { + let idx = (lid.x + 1) * offset * 2 - 1; + if (idx < WG_SIZE) { + let t = shared_sum[idx - offset]; + shared_sum[idx - offset] = shared_sum[idx]; + shared_sum[idx] = shared_sum[idx] + t; + } + workgroupBarrier(); + offset = offset >> 1; + } + + // shared_sum[lid] is exclusive prefix sum up to this thread. + var running_sum = shared_sum[lid.x]; + for (var col = lid.x * elems; col < (lid.x + 1) * elems && col < params.ne0; col ++) { + running_sum += src[row_idx + col]; + dst[params.offset_dst + wid.x * params.ne0 + col] = running_sum; + } +} -- cgit v1.2.3