use max of hipMemGetInfo and /proc/meminfo for UMA free memory
instead of always overwriting with /proc/meminfo, take whichever is higher. this way systems where hipMemGetInfo already reports TTM-backed memory correctly (like Strix Halo 128GB) keep their value, while systems where /proc/meminfo is higher still get the full amount. removes the HIP-specific #ifdef since the max approach works for both CUDA and HIP.
This commit is contained in:
parent
40b234d6b4
commit
97ae46e460
|
|
@ -4608,21 +4608,19 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
|
|||
// Check if UMA is explicitly enabled via environment variable
|
||||
bool uma_env = getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr;
|
||||
|
||||
#if defined(GGML_USE_HIP)
|
||||
// On AMD APUs, prop.integrated is true but hipMemGetInfo() already returns
|
||||
// the correct TTM-backed memory. Only use the UMA path when explicitly requested.
|
||||
bool is_uma = uma_env;
|
||||
#else
|
||||
bool is_uma = prop.integrated > 0 || uma_env;
|
||||
#endif // defined(GGML_USE_HIP)
|
||||
|
||||
if (is_uma) {
|
||||
// For UMA systems (like DGX Spark), use system memory info
|
||||
long available_memory_kb = 0;
|
||||
long free_swap_kb = 0;
|
||||
|
||||
if (ggml_backend_cuda_get_available_uma_memory(&available_memory_kb, &free_swap_kb) && available_memory_kb > 0) {
|
||||
*free = (size_t)available_memory_kb * 1024;
|
||||
// use whichever value is higher — on some AMD APUs hipMemGetInfo already
|
||||
// accounts for TTM-backed memory and returns more than /proc/meminfo
|
||||
size_t proc_free = (size_t)available_memory_kb * 1024;
|
||||
if (proc_free > *free) {
|
||||
*free = proc_free;
|
||||
}
|
||||
} else {
|
||||
GGML_LOG_ERROR("%s: /proc/meminfo reading failed, using cudaMemGetInfo\n", __func__);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue