From 31f6032f1bdcc891d4dabe33a688da7a668f825c Mon Sep 17 00:00:00 2001 From: Simon Redman Date: Thu, 29 Jan 2026 22:34:25 -0500 Subject: [PATCH] Refactor vk_semaphore to be a shared_ptr object. This resolves a bug with semaphore creation. Previously, we would try to return the pointer to the vector's memory. This is prone to change if the vector resized which, indeed, it is prone to do. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 39 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 3852867c29..5552f9d6a1 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -905,10 +905,15 @@ struct vk_event { vk::Fence fence; }; -struct vk_semaphore { - vk::Semaphore s; - uint64_t value; +struct vk_semaphore_struct { + vk::Semaphore s; // Underlying semaphore object + uint64_t value; // The value used when signaling/waiting (only for timeline semaphores) + const vk_device device; // The device this semaphore was created on (needed for cleanup) + + vk_semaphore_struct(vk::Semaphore semaphore, uint64_t initial_value, vk_device dev) + : s(semaphore), value(initial_value), device(dev) {} }; +typedef std::shared_ptr vk_semaphore; struct vk_submission { vk::CommandBuffer buffer; @@ -2271,12 +2276,12 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { tl_signal_semaphores.push_back({}); for (size_t i = 0; i < submission.wait_semaphores.size(); i++) { stage_flags[idx].push_back(ctx->p->q->stage_flags); - tl_wait_vals[idx].push_back(submission.wait_semaphores[i].value); - tl_wait_semaphores[idx].push_back(submission.wait_semaphores[i].s); + tl_wait_vals[idx].push_back(submission.wait_semaphores[i]->value); + tl_wait_semaphores[idx].push_back(submission.wait_semaphores[i]->s); } for (size_t i = 0; i < submission.signal_semaphores.size(); i++) { - tl_signal_vals[idx].push_back(submission.signal_semaphores[i].value); - tl_signal_semaphores[idx].push_back(submission.signal_semaphores[i].s); + tl_signal_vals[idx].push_back(submission.signal_semaphores[i]->value); + tl_signal_semaphores[idx].push_back(submission.signal_semaphores[i]->s); } tl_submit_infos.push_back({ (uint32_t) submission.wait_semaphores.size(), @@ -2381,26 +2386,28 @@ static vk_context ggml_vk_create_temporary_context(vk_command_pool& p) { return result; } -static vk_semaphore * ggml_vk_create_binary_semaphore(ggml_backend_vk_context * ctx) { - VK_LOG_DEBUG("ggml_vk_create_timeline_semaphore()"); +static vk_semaphore ggml_vk_create_binary_semaphore(ggml_backend_vk_context * ctx) { + VK_LOG_DEBUG("ggml_vk_create_binary_semaphore()"); vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eBinary, 0 }; vk::SemaphoreCreateInfo ci{}; ci.setPNext(&tci); vk::Semaphore semaphore = ctx->device->device.createSemaphore(ci); - ctx->gc.semaphores.push_back({ semaphore, 0 }); - return &ctx->gc.semaphores[ctx->gc.semaphores.size() - 1]; + vk_semaphore sem = std::make_shared(semaphore, 0, ctx->device); + ctx->gc.semaphores.push_back(sem); + return sem; } -static vk_semaphore * ggml_vk_create_timeline_semaphore(ggml_backend_vk_context * ctx) { +static vk_semaphore ggml_vk_create_timeline_semaphore(ggml_backend_vk_context * ctx) { VK_LOG_DEBUG("ggml_vk_create_timeline_semaphore()"); if (ctx->semaphore_idx >= ctx->gc.tl_semaphores.size()) { vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 }; vk::SemaphoreCreateInfo ci{}; ci.setPNext(&tci); vk::Semaphore semaphore = ctx->device->device.createSemaphore(ci); - ctx->gc.tl_semaphores.push_back({ semaphore, 0 }); + vk_semaphore sem = std::make_shared(semaphore, 0, ctx->device); + ctx->gc.tl_semaphores.push_back(sem); } - return &ctx->gc.tl_semaphores[ctx->semaphore_idx++]; + return ctx->gc.tl_semaphores[ctx->semaphore_idx++]; } static vk::Event ggml_vk_create_event(ggml_backend_vk_context * ctx) { @@ -12795,12 +12802,12 @@ static void ggml_vk_graph_cleanup(ggml_backend_vk_context * ctx) { ggml_vk_command_pool_cleanup(ctx->device, ctx->compute_cmd_pool); for (size_t i = 0; i < ctx->gc.semaphores.size(); i++) { - ctx->device->device.destroySemaphore({ ctx->gc.semaphores[i].s }); + ctx->gc.semaphores[i]->device->device.destroySemaphore({ ctx->gc.semaphores[i]->s }); } ctx->gc.semaphores.clear(); for (size_t i = 0; i < ctx->gc.tl_semaphores.size(); i++) { - ctx->device->device.destroySemaphore({ ctx->gc.tl_semaphores[i].s }); + ctx->gc.tl_semaphores[i]->device->device.destroySemaphore({ ctx->gc.tl_semaphores[i]->s }); } ctx->gc.tl_semaphores.clear(); ctx->semaphore_idx = 0;