Only apply subgroup size change to M size kernels
This commit is contained in:
parent
7e05215f9a
commit
fe8a3db76a
|
|
@ -2889,7 +2889,7 @@ static constexpr uint32_t RDNA_DEFAULT_SUBGROUP_SIZE = 32;
|
||||||
|
|
||||||
static std::vector<uint32_t> calc_specialization_constant_intel_xe2_onward(const PipelineConfigParameter& config, const std::vector<uint32_t>& current) {
|
static std::vector<uint32_t> calc_specialization_constant_intel_xe2_onward(const PipelineConfigParameter& config, const std::vector<uint32_t>& current) {
|
||||||
std::vector<uint32_t> output = current;
|
std::vector<uint32_t> output = current;
|
||||||
// replacing subgroup_size_8 with new value for l_warptile_mmq and m_warptile_mmq
|
// replacing subgroup_size_8 with current subgroup size for m_warptile_mmq
|
||||||
output[4] = config.subgroup_size;
|
output[4] = config.subgroup_size;
|
||||||
output[10] = config.subgroup_size;
|
output[10] = config.subgroup_size;
|
||||||
return output;
|
return output;
|
||||||
|
|
@ -2897,14 +2897,12 @@ static std::vector<uint32_t> calc_specialization_constant_intel_xe2_onward(const
|
||||||
|
|
||||||
static const std::unordered_map<std::string, PipelineConfigParameter> xe2_onward_pipelines = {
|
static const std::unordered_map<std::string, PipelineConfigParameter> xe2_onward_pipelines = {
|
||||||
{"matmul_id_subgroup_q4_k_f32_f16acc_aligned_m", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
{"matmul_id_subgroup_q4_k_f32_f16acc_aligned_m", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
||||||
{"matmul_id_subgroup_q4_k_f32_f16acc_aligned_l", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
|
||||||
{"matmul_id_subgroup_q6_k_f32_f16acc_aligned_m", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
{"matmul_id_subgroup_q6_k_f32_f16acc_aligned_m", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
||||||
{"matmul_id_subgroup_q6_k_f32_f16acc_aligned_l", {16, {}, calc_specialization_constant_intel_xe2_onward}},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Intel GPU can use subgroup 8, 16, or 32 depending on architeture.
|
// Intel GPU can use subgroup 8, 16, or 32 depending on architeture.
|
||||||
// Pre-Xe2 is 8, 16, or 32. Xe2 onward is 16 or 32. 32 is the default if nothing is specified.
|
// Pre-Xe2 is 8, 16, or 32. Xe2 onward is 16 or 32. 32 is the default if nothing is specified.
|
||||||
static constexpr uint32_t INTEL_DEFAULT_SUBGROUP_SIZE = 16;
|
static constexpr uint32_t INTEL_DEFAULT_SUBGROUP_SIZE = 32;
|
||||||
|
|
||||||
// Define configurations for different GPUs.
|
// Define configurations for different GPUs.
|
||||||
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
|
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue