use max of hipMemGetInfo and /proc/meminfo for UMA free memory

instead of always overwriting with /proc/meminfo, take whichever
is higher. this way systems where hipMemGetInfo already reports
TTM-backed memory correctly (like Strix Halo 128GB) keep their
value, while systems where /proc/meminfo is higher still get the
full amount.

removes the HIP-specific #ifdef since the max approach works for
both CUDA and HIP.
This commit is contained in:
hogeheer499-commits 2026-03-19 20:31:52 +01:00
parent 40b234d6b4
commit 97ae46e460
1 changed files with 6 additions and 8 deletions

View File

@ -4608,21 +4608,19 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
// Check if UMA is explicitly enabled via environment variable
bool uma_env = getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr;
#if defined(GGML_USE_HIP)
// On AMD APUs, prop.integrated is true but hipMemGetInfo() already returns
// the correct TTM-backed memory. Only use the UMA path when explicitly requested.
bool is_uma = uma_env;
#else
bool is_uma = prop.integrated > 0 || uma_env;
#endif // defined(GGML_USE_HIP)
if (is_uma) {
// For UMA systems (like DGX Spark), use system memory info
long available_memory_kb = 0;
long free_swap_kb = 0;
if (ggml_backend_cuda_get_available_uma_memory(&available_memory_kb, &free_swap_kb) && available_memory_kb > 0) {
*free = (size_t)available_memory_kb * 1024;
// use whichever value is higher — on some AMD APUs hipMemGetInfo already
// accounts for TTM-backed memory and returns more than /proc/meminfo
size_t proc_free = (size_t)available_memory_kb * 1024;
if (proc_free > *free) {
*free = proc_free;
}
} else {
GGML_LOG_ERROR("%s: /proc/meminfo reading failed, using cudaMemGetInfo\n", __func__);
}