From 97ae46e4601d474b9fb6d9d262ee74a3ea644775 Mon Sep 17 00:00:00 2001 From: hogeheer499-commits Date: Thu, 19 Mar 2026 20:31:52 +0100 Subject: [PATCH] use max of hipMemGetInfo and /proc/meminfo for UMA free memory instead of always overwriting with /proc/meminfo, take whichever is higher. this way systems where hipMemGetInfo already reports TTM-backed memory correctly (like Strix Halo 128GB) keep their value, while systems where /proc/meminfo is higher still get the full amount. removes the HIP-specific #ifdef since the max approach works for both CUDA and HIP. --- ggml/src/ggml-cuda/ggml-cuda.cu | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 335d695f38..a94f445777 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4608,21 +4608,19 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * // Check if UMA is explicitly enabled via environment variable bool uma_env = getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr; -#if defined(GGML_USE_HIP) - // On AMD APUs, prop.integrated is true but hipMemGetInfo() already returns - // the correct TTM-backed memory. Only use the UMA path when explicitly requested. - bool is_uma = uma_env; -#else bool is_uma = prop.integrated > 0 || uma_env; -#endif // defined(GGML_USE_HIP) if (is_uma) { - // For UMA systems (like DGX Spark), use system memory info long available_memory_kb = 0; long free_swap_kb = 0; if (ggml_backend_cuda_get_available_uma_memory(&available_memory_kb, &free_swap_kb) && available_memory_kb > 0) { - *free = (size_t)available_memory_kb * 1024; + // use whichever value is higher — on some AMD APUs hipMemGetInfo already + // accounts for TTM-backed memory and returns more than /proc/meminfo + size_t proc_free = (size_t)available_memory_kb * 1024; + if (proc_free > *free) { + *free = proc_free; + } } else { GGML_LOG_ERROR("%s: /proc/meminfo reading failed, using cudaMemGetInfo\n", __func__); }