diff --git a/ggml/src/ggml-cuda/convert.cuh b/ggml/src/ggml-cuda/convert.cuh index c429583661..c22282139f 100644 --- a/ggml/src/ggml-cuda/convert.cuh +++ b/ggml/src/ggml-cuda/convert.cuh @@ -62,8 +62,8 @@ template #else return {x.x, x.y}; #endif // GGML_USE_HIP - } else if constexpr (std::is_same_v) { -#if defined(__CUDA_ARCH__) + } else if constexpr (std::is_same_v && std::is_same_v) { + #if defined(__CUDA_ARCH__) #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && defined(CUDART_VERSION) && CUDART_VERSION >= 12050 // This matches cuda_fp8.h's version gate. // This uses the same fp8 conversion that __nv_fp8_e4m3 uses internally. __half h = __half(__nv_cvt_fp8_to_halfraw((__nv_fp8_storage_t) x.x, __NV_E4M3));