From 873c825611d9cb76427931b5e74642bade4853dd Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Mon, 13 Apr 2026 07:14:58 +0530 Subject: [PATCH] sycl: disable Q1_0 in backend and cleanup unused variables (#21807) --- ggml/src/ggml-sycl/convert.cpp | 2 +- ggml/src/ggml-sycl/dequantize.hpp | 1 + ggml/src/ggml-sycl/element_wise.cpp | 2 +- ggml/src/ggml-sycl/gated_delta_net.cpp | 10 ++++------ ggml/src/ggml-sycl/ggml-sycl.cpp | 7 +++++++ ggml/src/ggml-sycl/upscale.cpp | 8 ++++---- 6 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp index d7f60cbc9e..f12419426a 100644 --- a/ggml/src/ggml-sycl/convert.cpp +++ b/ggml/src/ggml-sycl/convert.cpp @@ -488,7 +488,7 @@ static void dequantize_row_nvfp4_sycl(const void * vx, dst_t * y, const int64_t const int nb = k / QK_NVFP4; stream->parallel_for( sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { dequantize_block_nvfp4(vx, y, k); }); } diff --git a/ggml/src/ggml-sycl/dequantize.hpp b/ggml/src/ggml-sycl/dequantize.hpp index f992db33b2..68c3db3061 100644 --- a/ggml/src/ggml-sycl/dequantize.hpp +++ b/ggml/src/ggml-sycl/dequantize.hpp @@ -14,6 +14,7 @@ #define GGML_SYCL_DEQUANTIZE_HPP #include "common.hpp" +#include "convert.hpp" typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v); typedef void (*dequantize_kernel_t_reorder)(const void *d, const int64_t ib, const void *qs, diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index ec0247528c..249e80c826 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -355,7 +355,7 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst, const int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE; stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, ne13, s1, s2, s3, offset); }); } diff --git a/ggml/src/ggml-sycl/gated_delta_net.cpp b/ggml/src/ggml-sycl/gated_delta_net.cpp index 648455c134..ebc587524b 100644 --- a/ggml/src/ggml-sycl/gated_delta_net.cpp +++ b/ggml/src/ggml-sycl/gated_delta_net.cpp @@ -176,14 +176,12 @@ static void launch_gated_delta_net(const float * q_d, const sycl::uint3 neqk1_magic = init_fastdiv_values(neqk1); const sycl::uint3 rq3_magic = init_fastdiv_values(rq3); - int cc = ggml_sycl_info().devices[ggml_sycl_get_device()].cc; - switch (S_v) { case 16: { constexpr int sv = 16; stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + [=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { gated_delta_net_sycl(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale); @@ -194,7 +192,7 @@ static void launch_gated_delta_net(const float * q_d, { constexpr int sv = 32; stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + [=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { gated_delta_net_sycl(q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale); @@ -205,7 +203,7 @@ static void launch_gated_delta_net(const float * q_d, { constexpr int sv = 64; stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + [=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { gated_delta_net_sycl( q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale); @@ -217,7 +215,7 @@ static void launch_gated_delta_net(const float * q_d, { constexpr int sv = 128; stream->parallel_for(sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + [=](sycl::nd_item<3> /*item_ct1*/) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { gated_delta_net_sycl( q_d, k_d, v_d, g_d, b_d, s_d, dst_d, H, n_tokens, n_seqs, sq1, sq2, sq3, sv1, sv2, sv3, sb1, sb2, sb3, neqk1_magic, rq3_magic, scale); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 989c91a6ab..ea79d2538c 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4727,12 +4727,19 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g struct ggml_tensor * a = op->src[0]; struct ggml_tensor * b = op->src[1]; + // disable Q1_0 until implementation + if (a->type == GGML_TYPE_Q1_0 || b->type == GGML_TYPE_Q1_0) { + return false; + } + if (a->ne[3] != b->ne[3]) { return false; } ggml_type src0_type = op->src[0]->type; + + // TODO: The configuration below needs more work to be supported with oneDNN if (ggml_is_permuted(a) && !ggml_is_contiguous(a) && a->ne[2] > 1 && a->ne[3] > 1 && src0_type == GGML_TYPE_F16) { diff --git a/ggml/src/ggml-sycl/upscale.cpp b/ggml/src/ggml-sycl/upscale.cpp index 18c743de44..e42cb419d8 100644 --- a/ggml/src/ggml-sycl/upscale.cpp +++ b/ggml/src/ggml-sycl/upscale.cpp @@ -272,7 +272,7 @@ static void upscale_f32_sycl(const float * x, sycl::nd_range<3>( sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); }); } @@ -304,7 +304,7 @@ static void upscale_f32_bilinear_sycl(const float * x, sycl::nd_range<3>( sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { upscale_f32_bilinear_antialias( x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset); @@ -314,7 +314,7 @@ static void upscale_f32_bilinear_sycl(const float * x, sycl::nd_range<3>( sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { upscale_f32_bilinear( x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset); @@ -349,7 +349,7 @@ static void upscale_f32_bicubic_sycl(const float * x, sycl::nd_range<3>( sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { + [=](sycl::nd_item<3> /*item_ct1*/) { upscale_f32_bicubic( x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);