From 73357da45b4a965e6f74e0c23c48df8f6346c033 Mon Sep 17 00:00:00 2001 From: hogeheer499 Date: Thu, 12 Mar 2026 23:09:41 +0100 Subject: [PATCH 1/2] ggml-cuda: fix UMA memory detection for HIP/ROCm on AMD APUs AMD APUs report prop.integrated=1 which triggers the UMA memory path from #17368. This overrides hipMemGetInfo() (accurate) with /proc/meminfo MemAvailable (too low), losing ~30 GiB on a 128GB Strix Halo system. For HIP builds, only enter the UMA path when GGML_CUDA_ENABLE_UNIFIED_MEMORY is explicitly set. This preserves correct behavior for both cases: - Default: hipMemGetInfo() reports accurate TTM-backed memory - GGML_CUDA_ENABLE_UNIFIED_MEMORY=1: /proc/meminfo is used (system RAM mode) Tested on AMD Ryzen AI MAX+ 395, Radeon 8060S (gfx1151), 128GB, ROCm 7.1. Fixes: ggml-org#18159 --- ggml/src/ggml-cuda/ggml-cuda.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 9d2aacf4b2..97151481f1 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4569,7 +4569,14 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * // Check if UMA is explicitly enabled via environment variable bool uma_env = getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr; + +#if defined(GGML_USE_HIP) + // On AMD APUs, prop.integrated is true but hipMemGetInfo() already returns + // the correct TTM-backed memory. Only use the UMA path when explicitly requested. + bool is_uma = uma_env; +#else bool is_uma = prop.integrated > 0 || uma_env; +#endif if (is_uma) { // For UMA systems (like DGX Spark), use system memory info From e0dace50d06159bd927289401afaeb26b132ae5b Mon Sep 17 00:00:00 2001 From: hogeheer499-commits Date: Fri, 13 Mar 2026 17:37:40 +0100 Subject: [PATCH 2/2] Update ggml/src/ggml-cuda/ggml-cuda.cu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Johannes Gäßler --- ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 97151481f1..ed9af0c409 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4576,7 +4576,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * bool is_uma = uma_env; #else bool is_uma = prop.integrated > 0 || uma_env; -#endif +#endif // defined(GGML_USE_HIP) if (is_uma) { // For UMA systems (like DGX Spark), use system memory info