hip: use fnuz fp8 for conversion on CDNA3 (#21040)

This commit is contained in:
uvos 2026-03-26 23:06:33 +01:00 committed by GitHub
parent 8c60b8a2be
commit 7ca0c9cca7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 6 additions and 0 deletions

View File

@ -802,7 +802,13 @@ static __device__ __forceinline__ float ggml_cuda_e8m0_to_fp32(uint8_t x) {
static __device__ __forceinline__ float ggml_cuda_ue4m3_to_fp32(uint8_t x) {
#ifdef FP8_AVAILABLE
const uint32_t bits = x * (x != 0x7F && x != 0xFF); // Convert NaN to 0.0f to match CPU implementation.
#if defined(GGML_USE_HIP) && defined(CDNA3)
// ROCm dose not support fp8 in software on devices with fp8 hardware,
// but CDNA3 supports only e4m3_fnuz (no inf).
const __hip_fp8_e4m3_fnuz xf = *reinterpret_cast<const __hip_fp8_e4m3_fnuz *>(&bits);
#else
const __nv_fp8_e4m3 xf = *reinterpret_cast<const __nv_fp8_e4m3 *>(&bits);
#endif // defined(GGML_USE_HIP) && defined(GGML_USE_HIP)
return static_cast<float>(xf) / 2;
#else
NO_DEVICE_CODE;