From 73357da45b4a965e6f74e0c23c48df8f6346c033 Mon Sep 17 00:00:00 2001 From: hogeheer499 Date: Thu, 12 Mar 2026 23:09:41 +0100 Subject: [PATCH] ggml-cuda: fix UMA memory detection for HIP/ROCm on AMD APUs AMD APUs report prop.integrated=1 which triggers the UMA memory path from #17368. This overrides hipMemGetInfo() (accurate) with /proc/meminfo MemAvailable (too low), losing ~30 GiB on a 128GB Strix Halo system. For HIP builds, only enter the UMA path when GGML_CUDA_ENABLE_UNIFIED_MEMORY is explicitly set. This preserves correct behavior for both cases: - Default: hipMemGetInfo() reports accurate TTM-backed memory - GGML_CUDA_ENABLE_UNIFIED_MEMORY=1: /proc/meminfo is used (system RAM mode) Tested on AMD Ryzen AI MAX+ 395, Radeon 8060S (gfx1151), 128GB, ROCm 7.1. Fixes: ggml-org#18159 --- ggml/src/ggml-cuda/ggml-cuda.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 9d2aacf4b2..97151481f1 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4569,7 +4569,14 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * // Check if UMA is explicitly enabled via environment variable bool uma_env = getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr; + +#if defined(GGML_USE_HIP) + // On AMD APUs, prop.integrated is true but hipMemGetInfo() already returns + // the correct TTM-backed memory. Only use the UMA path when explicitly requested. + bool is_uma = uma_env; +#else bool is_uma = prop.integrated > 0 || uma_env; +#endif if (is_uma) { // For UMA systems (like DGX Spark), use system memory info