vulkan: iGPU memory reporting fix (#17110)
* vulkan: use all device-local heaps for memory availability reporting Co-authored-by: Giuseppe Scrivano <gscrivan@redhat.com> * use all available heaps for iGPU memory reporting * Allow multiple memory types per buffer request for devices with split heaps --------- Co-authored-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
8a3519b708
commit
7f3e9d339c
|
|
@ -2159,17 +2159,18 @@ static void ggml_vk_queue_command_pools_cleanup(vk_device& device) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::vector<uint32_t> ggml_vk_find_memory_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
|
||||||
|
std::vector<uint32_t> indices;
|
||||||
|
|
||||||
static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
|
|
||||||
for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
|
for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
|
||||||
vk::MemoryType memory_type = mem_props->memoryTypes[i];
|
vk::MemoryType memory_type = mem_props->memoryTypes[i];
|
||||||
if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
|
if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
|
||||||
(flags & memory_type.propertyFlags) == flags &&
|
(flags & memory_type.propertyFlags) == flags &&
|
||||||
mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
|
mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
|
||||||
return static_cast<int32_t>(i);
|
indices.push_back(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return UINT32_MAX;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std::initializer_list<vk::MemoryPropertyFlags> & req_flags_list) {
|
static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std::initializer_list<vk::MemoryPropertyFlags> & req_flags_list) {
|
||||||
|
|
@ -2212,22 +2213,24 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std
|
||||||
for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
|
for (auto it = req_flags_list.begin(); it != req_flags_list.end(); it++) {
|
||||||
const auto & req_flags = *it;
|
const auto & req_flags = *it;
|
||||||
|
|
||||||
uint32_t memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
|
const std::vector<uint32_t> memory_type_indices = ggml_vk_find_memory_properties(&mem_props, &mem_req, req_flags);
|
||||||
|
|
||||||
if (memory_type_index == UINT32_MAX) {
|
if (memory_type_indices.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
buf->memory_property_flags = req_flags;
|
buf->memory_property_flags = req_flags;
|
||||||
|
|
||||||
try {
|
for (auto mtype_it = memory_type_indices.begin(); mtype_it != memory_type_indices.end(); mtype_it++) {
|
||||||
buf->device_memory = device->device.allocateMemory({ mem_req.size, memory_type_index, &mem_flags_info });
|
try {
|
||||||
break;
|
buf->device_memory = device->device.allocateMemory({ mem_req.size, *mtype_it, &mem_flags_info });
|
||||||
} catch (const vk::SystemError& e) {
|
break;
|
||||||
// loop and retry
|
} catch (const vk::SystemError& e) {
|
||||||
// during last attempt throw the exception
|
// loop and retry
|
||||||
if (it + 1 == req_flags_list.end()) {
|
// during last attempt throw the exception
|
||||||
device->device.destroyBuffer(buf->buffer);
|
if (it + 1 == req_flags_list.end() && mtype_it + 1 == memory_type_indices.end()) {
|
||||||
throw e;
|
device->device.destroyBuffer(buf->buffer);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -13204,25 +13207,28 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total
|
||||||
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
||||||
vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
|
vk::PhysicalDeviceMemoryBudgetPropertiesEXT budgetprops;
|
||||||
vk::PhysicalDeviceMemoryProperties2 memprops = {};
|
vk::PhysicalDeviceMemoryProperties2 memprops = {};
|
||||||
bool membudget_supported = vk_instance.device_supports_membudget[device];
|
const bool membudget_supported = vk_instance.device_supports_membudget[device];
|
||||||
|
const bool is_integrated_gpu = vkdev.getProperties().deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||||
|
|
||||||
if (membudget_supported) {
|
if (membudget_supported) {
|
||||||
memprops.pNext = &budgetprops;
|
memprops.pNext = &budgetprops;
|
||||||
}
|
}
|
||||||
vkdev.getMemoryProperties2(&memprops);
|
vkdev.getMemoryProperties2(&memprops);
|
||||||
|
|
||||||
|
*total = 0;
|
||||||
|
*free = 0;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
|
for (uint32_t i = 0; i < memprops.memoryProperties.memoryHeapCount; ++i) {
|
||||||
const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
|
const vk::MemoryHeap & heap = memprops.memoryProperties.memoryHeaps[i];
|
||||||
|
|
||||||
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
if (is_integrated_gpu || (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
|
||||||
*total = heap.size;
|
*total += heap.size;
|
||||||
|
|
||||||
if (membudget_supported && i < budgetprops.heapUsage.size()) {
|
if (membudget_supported && i < budgetprops.heapUsage.size()) {
|
||||||
*free = budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
|
*free += budgetprops.heapBudget[i] - budgetprops.heapUsage[i];
|
||||||
} else {
|
} else {
|
||||||
*free = heap.size;
|
*free += heap.size;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue