experimenting subgroup change for specific kernels only

This commit is contained in:
Nakasaka, Masato 2025-11-26 00:22:21 -08:00
parent 4a796af20c
commit 710f84862f
1 changed files with 7 additions and 3 deletions

View File

@ -2656,10 +2656,13 @@ static const std::unordered_map<std::string, uint32_t> rdna2_pipelines = {
static constexpr uint32_t RDNA_DEFAULT_SUBGROUP_SIZE = 32;
static const std::unordered_map<std::string, uint32_t> xe2_onward_pipelines = {
{"matmul_id_subgroup_q4_k_f32_f16acc_aligned_m", 16},
{"matmul_id_subgroup_q6_k_f32_f16acc_aligned_m", 16},
};
// Intel GPU can use subgroup 8, 16, or 32 depending on architeture.
// Pre-Xe2 is 8, 16, or 32 and Xe2 onward is 16 or 32. 32 is the default if nothing is specified.
// We are using 16 as current default since we see better compute utilization.
static constexpr uint32_t INTEL_DEFAULT_SUBGROUP_SIZE = 16;
// Pre-Xe2 is 8, 16, or 32. Xe2 onward is 16 or 32. 32 is the default if nothing is specified.
static constexpr uint32_t INTEL_DEFAULT_SUBGROUP_SIZE = 32;
// Define configurations for different GPUs.
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
@ -2686,6 +2689,7 @@ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
{
vk_device_architecture::INTEL_XE2_ONWARD,
{
xe2_onward_pipelines,
},
INTEL_DEFAULT_SUBGROUP_SIZE
},