llmnpc - llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp

Path: llmnpc / llama.cpp / ggml / src / ggml-vulkan / vulkan-shaders / upscale.comp (raw)
  1#version 450
  2
  3layout (push_constant) uniform parameter
  4{
  5    uint ne; uint a_offset; uint d_offset;
  6    uint ne00; uint ne01;
  7    uint nb00; uint nb01; uint nb02; uint nb03;
  8    uint ne10; uint ne11; uint ne12; uint ne13;
  9    float sf0; float sf1; float sf2; float sf3;
 10    float pixel_offset;
 11} p;
 12
 13#include "types.glsl"
 14
 15layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
 16
 17layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
 18layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
 19
 20// from ggml.h: enum ggml_scale_mode, enum ggml_scale_flag
 21#define NEAREST  0
 22#define BILINEAR 1
 23#define BICUBIC  2
 24#define BILINEAR_ANTIALIAS 513
 25
 26layout (constant_id = 0) const uint scale_mode = 0;
 27
 28float fetch_nearest(uint i10, uint i11, uint i12, uint i13) {
 29    const uint i00 = uint(i10 / p.sf0);
 30    const uint i01 = uint(i11 / p.sf1);
 31    const uint i02 = uint(i12 / p.sf2);
 32    const uint i03 = uint(i13 / p.sf3);
 33
 34    return data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + i01 * p.nb01 + i00 * p.nb00];
 35}
 36
 37float fetch_bilinear(ivec2 c0, ivec2 c1, vec2 d, uint i12, uint i13) {
 38    const uint i02 = uint(i12 / p.sf2);
 39    const uint i03 = uint(i13 / p.sf3);
 40    const uint base = p.a_offset + i03 * p.nb03 + i02 * p.nb02;
 41
 42    const float v00 = data_a[base + c0.y * p.nb01 + c0.x * p.nb00];
 43    const float v01 = data_a[base + c0.y * p.nb01 + c1.x * p.nb00];
 44    const float v10 = data_a[base + c1.y * p.nb01 + c0.x * p.nb00];
 45    const float v11 = data_a[base + c1.y * p.nb01 + c1.x * p.nb00];
 46
 47    return
 48        v00 * (1.0-d.x) * (1.0-d.y) +
 49        v01 * d.x       * (1.0-d.y) +
 50        v10 * (1.0-d.x) * d.y +
 51        v11 * d.x       * d.y;
 52}
 53
 54float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
 55    const ivec2 ne0 = ivec2(p.ne00, p.ne01);
 56
 57    const vec2 c = (vec2(i10, i11) + p.pixel_offset) / vec2(p.sf0, p.sf1) - p.pixel_offset;
 58    const vec2 c0f = floor(c);
 59    const vec2 d = c - c0f;
 60    const ivec2 c0 = max(ivec2(c0f), 0);
 61    const ivec2 c1 = min(ivec2(c0f + 1), ne0 - 1);
 62
 63    return fetch_bilinear(c0, c1, d, i12, i13);
 64}
 65
 66float triangle_filter(float x) {
 67    return max(1.0f - abs(x), 0.0f);
 68}
 69
 70float interpolate_bilinear_antialias(uint i10, uint i11, uint i12, uint i13) {
 71    const float support1  = max(1.0f, 1.0f / p.sf1);
 72    const float invscale1 = 1.0f / support1;
 73    const float support0  = max(1.0f, 1.0f / p.sf0);
 74    const float invscale0 = 1.0f / support0;
 75
 76    const uint i02 = uint(i12 / p.sf2);
 77    const uint i03 = uint(i13 / p.sf3);
 78
 79    const float y = (float(i11) + p.pixel_offset) / p.sf1;
 80    const float x = (float(i10) + p.pixel_offset) / p.sf0;
 81
 82    // the range of source pixels that contribute
 83    const int x_min = max(int(x - support0 + p.pixel_offset), 0);
 84    const int x_max = min(int(x + support0 + p.pixel_offset), int(p.ne00));
 85    const int y_min = max(int(y - support1 + p.pixel_offset), 0);
 86    const int y_max = min(int(y + support1 + p.pixel_offset), int(p.ne01));
 87
 88    // bilinear filter with antialiasing
 89    float val = 0.0f;
 90    float total_weight = 0.0f;
 91
 92    for (int sy = y_min; sy < y_max; sy++) {
 93        const float weight_y = triangle_filter((sy - y + p.pixel_offset) * invscale1);
 94
 95        for (int sx = x_min; sx < x_max; sx++) {
 96            const float weight_x = triangle_filter((sx - x + p.pixel_offset) * invscale0);
 97            const float weight = weight_x * weight_y;
 98
 99            if (weight <= 0.0f) {
100                continue;
101            }
102
103            const float pixel = data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + sy * p.nb01 + sx * p.nb00];
104            val += pixel * weight;
105            total_weight += weight;
106        }
107    }
108
109    if (total_weight > 0.0f) {
110        val /= total_weight;
111    }
112
113    return val;
114}
115
116// Bicubic interpolation with alpha = -0.75
117// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
118const vec4 bcoeffs1 = vec4( 1.25, -2.25,  0.0, 1.0);
119const vec4 bcoeffs2 = vec4(-0.75,  3.75, -6.0, 3.0);
120vec4 powers(float x) { return vec4(x*x*x, x*x, x, 1); }
121
122float bicubic(float p0, float p1, float p2, float p3, float x) {
123    return p0 * dot(bcoeffs2, powers(x + 1)) +
124           p1 * dot(bcoeffs1, powers(x    )) +
125           p2 * dot(bcoeffs1, powers(1 - x)) +
126           p3 * dot(bcoeffs2, powers(2 - x));
127}
128
129#define FETCH(a,b) data_a[base + clamp(i.x+(a), 0, res.x) * p.nb00 + clamp(i.y+(b), 0, res.y) * p.nb01]
130
131float interpolate_bicubic(uint i10, uint i11, uint i12, uint i13) {
132    const ivec2 res = ivec2(p.ne00 - 1, p.ne01 - 1);
133
134    const vec2 coord = (vec2(i10, i11) + p.pixel_offset) / vec2(p.sf0, p.sf1) - p.pixel_offset;
135    const vec2 d = fract(coord);
136    const ivec2 i = ivec2(floor(coord));
137
138    const uint i02 = uint(i12 / p.sf2);
139    const uint i03 = uint(i13 / p.sf3);
140    const uint base = p.a_offset + i03 * p.nb03 + i02 * p.nb02;
141
142    return bicubic(
143        bicubic(FETCH(-1,-1), FETCH(0,-1), FETCH(1,-1), FETCH(2,-1), d.x),
144        bicubic(FETCH(-1, 0), FETCH(0, 0), FETCH(1, 0), FETCH(2, 0), d.x),
145        bicubic(FETCH(-1, 1), FETCH(0, 1), FETCH(1, 1), FETCH(2, 1), d.x),
146        bicubic(FETCH(-1, 2), FETCH(0, 2), FETCH(1, 2), FETCH(2, 2), d.x), d.y);
147}
148
149void main() {
150    const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
151
152    if (idx >= p.ne) {
153        return;
154    }
155
156    const uint i10 = idx % p.ne10;
157    const uint i11 = (idx / p.ne10) % p.ne11;
158    const uint i12 = (idx / (p.ne10 * p.ne11)) % p.ne12;
159    const uint i13 = (idx / (p.ne10 * p.ne11 * p.ne12)) % p.ne13;
160
161    float result;
162    switch (scale_mode) {
163        case NEAREST:
164            result = fetch_nearest(i10, i11, i12, i13);
165            break;
166        case BILINEAR:
167            result = interpolate_bilinear(i10, i11, i12, i13);
168            break;
169        case BICUBIC:
170            result = interpolate_bicubic(i10, i11, i12, i13);
171            break;
172        case BILINEAR_ANTIALIAS:
173            result = interpolate_bilinear_antialias(i10, i11, i12, i13);
174            break;
175    }
176
177    data_d[p.d_offset + idx] = D_TYPE(result);
178}