HIP: fix RDNA3 FP16/BF16 matrix multiplication (#17817)
This commit is contained in:
parent
a28e3c7567
commit
f334b79494
|
|
@ -160,9 +160,9 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
return ampere_mma_available(cc);
|
return ampere_mma_available(cc);
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
return volta_mma_available(cc) || turing_mma_available(cc) || amd_wmma_available(cc);
|
return volta_mma_available(cc) || turing_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
|
||||||
case GGML_TYPE_BF16:
|
case GGML_TYPE_BF16:
|
||||||
return ampere_mma_available(cc) || amd_wmma_available(cc);
|
return ampere_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue