From b96b82fc852b83ea6f58ffcdc264308f60f79211 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Fri, 26 Dec 2025 10:00:57 -0600 Subject: [PATCH] vulkan: Support UPSCALE w/antialias (#18327) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 14 +++-- .../ggml-vulkan/vulkan-shaders/upscale.comp | 54 +++++++++++++++++++ tests/test-backend-ops.cpp | 32 ++++++----- 3 files changed, 84 insertions(+), 16 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index e7ce518fba..7ae2e38356 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -651,7 +651,7 @@ struct vk_device_struct { vk_pipeline pipeline_add_id_f32; vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32; - vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32; + vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32, pipeline_upscale_bilinear_antialias_f32; vk_pipeline pipeline_scale_f32; vk_pipeline pipeline_sqr_f32; vk_pipeline pipeline_sqrt_f32; @@ -3956,6 +3956,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1); ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1); ggml_vk_create_pipeline(device, device->pipeline_upscale_bicubic_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BICUBIC}, 1); + ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_antialias_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS}, 1); ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); @@ -8433,7 +8434,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return nullptr; case GGML_OP_UPSCALE: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF); + uint32_t mode = (ggml_get_op_params_i32(dst, 0) & (0xFF | GGML_SCALE_FLAG_ANTIALIAS)); switch (mode) { case GGML_SCALE_MODE_NEAREST: return ctx->device->pipeline_upscale_nearest_f32; @@ -8441,6 +8442,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return ctx->device->pipeline_upscale_bilinear_f32; case GGML_SCALE_MODE_BICUBIC: return ctx->device->pipeline_upscale_bicubic_f32; + case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS: + return ctx->device->pipeline_upscale_bilinear_antialias_f32; default: return nullptr; } @@ -14341,7 +14344,12 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm } return true; case GGML_OP_UPSCALE: - return op->src[0]->type == GGML_TYPE_F32 && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS); + if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) { + if ((op->op_params[0] & 0xFF) != GGML_SCALE_MODE_BILINEAR) { + return false; + } + } + return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_ACC: return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_CONCAT: diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp index 037ab0c78f..f7d12a8dda 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp @@ -21,6 +21,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; #define NEAREST 0 #define BILINEAR 1 #define BICUBIC 2 +#define BILINEAR_ANTIALIAS 513 layout (constant_id = 0) const uint scale_mode = 0; @@ -62,6 +63,56 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) { return fetch_bilinear(c0, c1, d, i12, i13); } +float triangle_filter(float x) { + return max(1.0f - abs(x), 0.0f); +} + +float interpolate_bilinear_antialias(uint i10, uint i11, uint i12, uint i13) { + const float support1 = max(1.0f, 1.0f / p.sf1); + const float invscale1 = 1.0f / support1; + const float support0 = max(1.0f, 1.0f / p.sf0); + const float invscale0 = 1.0f / support0; + + const uint i02 = uint(i12 / p.sf2); + const uint i03 = uint(i13 / p.sf3); + + const float y = (float(i11) + p.pixel_offset) / p.sf1; + const float x = (float(i10) + p.pixel_offset) / p.sf0; + + // the range of source pixels that contribute + const int x_min = max(int(x - support0 + p.pixel_offset), 0); + const int x_max = min(int(x + support0 + p.pixel_offset), int(p.ne00)); + const int y_min = max(int(y - support1 + p.pixel_offset), 0); + const int y_max = min(int(y + support1 + p.pixel_offset), int(p.ne01)); + + // bilinear filter with antialiasing + float val = 0.0f; + float total_weight = 0.0f; + + for (int sy = y_min; sy < y_max; sy++) { + const float weight_y = triangle_filter((sy - y + p.pixel_offset) * invscale1); + + for (int sx = x_min; sx < x_max; sx++) { + const float weight_x = triangle_filter((sx - x + p.pixel_offset) * invscale0); + const float weight = weight_x * weight_y; + + if (weight <= 0.0f) { + continue; + } + + const float pixel = data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + sy * p.nb01 + sx * p.nb00]; + val += pixel * weight; + total_weight += weight; + } + } + + if (total_weight > 0.0f) { + val /= total_weight; + } + + return val; +} + // Bicubic interpolation with alpha = -0.75 // https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm const vec4 bcoeffs1 = vec4( 1.25, -2.25, 0.0, 1.0); @@ -118,6 +169,9 @@ void main() { case BICUBIC: result = interpolate_bicubic(i10, i11, i12, i13); break; + case BILINEAR_ANTIALIAS: + result = interpolate_bilinear_antialias(i10, i11, i12, i13); + break; } data_d[p.d_offset + idx] = D_TYPE(result); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index a801455f16..0b981b1788 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -402,12 +402,20 @@ static std::string var_to_str(ggml_op_pool pool) { } static std::string var_to_str(ggml_scale_mode mode) { - switch (mode) { - case GGML_SCALE_MODE_NEAREST: return "nearest"; - case GGML_SCALE_MODE_BILINEAR: return "bilinear"; - case GGML_SCALE_MODE_BICUBIC: return "bicubic"; - default: return std::to_string(mode); + std::string str; + switch (mode & 0xFF) { + case GGML_SCALE_MODE_NEAREST: str = "nearest"; break; + case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break; + case GGML_SCALE_MODE_BICUBIC: str = "bicubic"; break; + default: str = std::to_string(mode); break; } + if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) { + str += "|align_corners"; + } + if (mode & GGML_SCALE_FLAG_ANTIALIAS) { + str += "|antialias"; + } + return str; } #define VAR_TO_STR(x) (#x "=" + var_to_str(x)) @@ -5535,18 +5543,16 @@ struct test_interpolate : public test_case { const ggml_type type; const std::array ne; const std::array ne_tgt; - const uint32_t mode = GGML_SCALE_MODE_NEAREST; + const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST; std::string vars() override { - ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF); - std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none"; - return VARS_TO_STR5(type, ne, ne_tgt, mode, flags); + return VARS_TO_STR4(type, ne, ne_tgt, mode); } test_interpolate(ggml_type type = GGML_TYPE_F32, std::array ne = {2, 5, 7, 11}, std::array ne_tgt = {5, 7, 11, 13}, - uint32_t mode = GGML_SCALE_MODE_NEAREST) + ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST) : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {} ggml_tensor * build_graph(ggml_context * ctx) override { @@ -7883,9 +7889,9 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode)); } for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) { - test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS)); - test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS)); - test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS)); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS))); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS))); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS))); } test_cases.emplace_back(new test_sum());