From 58deae173e9ca14a3dc50cb252cbcb3204df4482 Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Fri, 13 Mar 2026 13:40:53 +0100
Subject: [PATCH 1/9] vulkan: fix event wait submission, event command buffer
 reset

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 3c81805b84..2551a7cdce 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -14881,8 +14881,6 @@ static void ggml_backend_vk_event_wait(ggml_backend_t backend, ggml_backend_even
     vk_context compute_ctx = ggml_vk_get_compute_ctx(ctx);
 
     ggml_vk_wait_events(compute_ctx, {vkev->event});
-    ggml_vk_ctx_end(compute_ctx);
-    ctx->compute_ctx.reset();
 }
 
 // TODO: enable async and synchronize
@@ -15705,6 +15703,7 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     // Finished using current command buffer so we flag for reuse
     if (vkev->cmd_buffer) {
         vkev->cmd_buffer->in_use = false;
+        vkev->cmd_buffer = nullptr;
     }
 }
 

From c0d100e0fc5f0d21159a41aff7ed3c2f5af706db Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Fri, 13 Mar 2026 13:49:31 +0100
Subject: [PATCH 2/9] fix event command buffer reset validation error

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 2551a7cdce..5da61845b0 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -191,6 +191,7 @@ struct vk_queue;
 
 struct vk_command_buffer {
     vk::CommandBuffer buf;
+    uint64_t use_counter = 0;
     bool in_use = false;
 };
 
@@ -946,6 +947,7 @@ struct vk_event {
     vk::Event event;
     vk::Fence fence;
     vk_command_buffer* cmd_buffer = nullptr;
+    uint64_t cmd_buffer_use_counter = 0;
 };
 
 struct vk_semaphore {
@@ -2319,7 +2321,7 @@ static vk_command_buffer* ggml_vk_create_cmd_buffer(vk_device& device, vk_comman
         vk::CommandBufferLevel::ePrimary,
         1);
     const std::vector<vk::CommandBuffer> cmd_buffers = device->device.allocateCommandBuffers(command_buffer_alloc_info);
-    p.cmd_buffers.push_back({ cmd_buffers.front(), true });
+    p.cmd_buffers.push_back({ cmd_buffers.front(), 0, true });
     return &p.cmd_buffers[p.cmd_buffers.size()-1];
 }
 
@@ -6392,6 +6394,7 @@ static vk_subbuffer ggml_vk_tensor_subbuffer(
 static vk_command_buffer* ggml_vk_get_or_create_cmd_buffer(vk_device& device, vk_command_pool& pool) {
     for (auto& cmd_buffer : pool.cmd_buffers) {
         if (!cmd_buffer.in_use) {
+            cmd_buffer.use_counter++;
             cmd_buffer.in_use = true;
             return &cmd_buffer;
         }
@@ -14870,6 +14873,7 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev
     ggml_vk_submit(compute_ctx, {vkev->fence});
     ctx->submit_pending = true;
     vkev->cmd_buffer = cmd_buf;
+    vkev->cmd_buffer_use_counter = cmd_buf->use_counter;
     ctx->compute_ctx.reset();
 }
 
@@ -15702,7 +15706,10 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     VK_CHECK(device->device.waitForFences({ vkev->fence }, true, UINT64_MAX), "event_synchronize");
     // Finished using current command buffer so we flag for reuse
     if (vkev->cmd_buffer) {
-        vkev->cmd_buffer->in_use = false;
+        // Only flag for reuse if it hasn't been reused already
+        if (vkev->cmd_buffer_use_counter == vkev->cmd_buffer->use_counter) {
+            vkev->cmd_buffer->in_use = false;
+        }
         vkev->cmd_buffer = nullptr;
     }
 }

From 2204bcedc8b07b957e2f93d99d6397394d59f10b Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Fri, 13 Mar 2026 13:53:23 +0100
Subject: [PATCH 3/9] also reset command buffers before reuse

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 5da61845b0..613e266a76 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13800,6 +13800,7 @@ static void ggml_vk_synchronize(ggml_backend_vk_context * ctx) {
         ctx->submit_pending = false;
         if (cmd_buf) {
             cmd_buf->in_use = false;
+            cmd_buf->buf.reset();
         }
     }
 
@@ -15709,6 +15710,7 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
         // Only flag for reuse if it hasn't been reused already
         if (vkev->cmd_buffer_use_counter == vkev->cmd_buffer->use_counter) {
             vkev->cmd_buffer->in_use = false;
+            vkev->cmd_buffer->buf.reset();
         }
         vkev->cmd_buffer = nullptr;
     }

From 08a4ba6f03448cec5c994dcc5ddfed048cb6c380 Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Fri, 13 Mar 2026 16:02:51 +0100
Subject: [PATCH 4/9] use timeline semaphores instead of fences for
 event_synchronize

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 43 +++++++++++++++++++---------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 613e266a76..baf125a297 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -939,22 +939,22 @@ struct vk_subbuffer {
     }
 };
 
+struct vk_semaphore {
+    vk::Semaphore s;
+    uint64_t value;
+};
+
 // vk_event is used for the event-related backend interfaces. It uses 'event' for
-// event_wait and 'fence' for event_synchronize. Polling on an event for
+// event_wait and a timeline semaphore for event_synchronize. Polling on an event for
 // event_synchronize wouldn't be sufficient to wait for command buffers to complete,
 // and would lead to validation errors.
 struct vk_event {
     vk::Event event;
-    vk::Fence fence;
+    vk_semaphore tl_semaphore;
     vk_command_buffer* cmd_buffer = nullptr;
     uint64_t cmd_buffer_use_counter = 0;
 };
 
-struct vk_semaphore {
-    vk::Semaphore s;
-    uint64_t value;
-};
-
 struct vk_submission {
     vk_command_buffer* buffer = nullptr;
     std::vector<vk_semaphore> wait_semaphores;
@@ -2790,6 +2790,15 @@ static void ggml_vk_sync_buffers(ggml_backend_vk_context* ctx, vk_context& subct
     );
 }
 
+static void ggml_vk_reset_event(vk_context& ctx, vk::Event& event) {
+    VK_LOG_DEBUG("ggml_vk_set_event()");
+
+    ctx->s->buffer->buf.resetEvent(
+        event,
+        ctx->p->q->stage_flags
+    );
+}
+
 static void ggml_vk_set_event(vk_context& ctx, vk::Event& event) {
     VK_LOG_DEBUG("ggml_vk_set_event()");
 
@@ -14864,14 +14873,14 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev
 
     // the backend interface doesn't have an explicit reset, so reset it here
     // before we record the command to set it
-    ctx->device->device.resetEvent(vkev->event);
-    ctx->device->device.resetFences({ vkev->fence });
-
+    ggml_vk_reset_event(compute_ctx, vkev->event);
     ggml_vk_set_event(compute_ctx, vkev->event);
 
+    vkev->tl_semaphore.value++;
+    compute_ctx->s->signal_semaphores.push_back(vkev->tl_semaphore);
     ggml_vk_ctx_end(compute_ctx);
 
-    ggml_vk_submit(compute_ctx, {vkev->fence});
+    ggml_vk_submit(compute_ctx, {});
     ctx->submit_pending = true;
     vkev->cmd_buffer = cmd_buf;
     vkev->cmd_buffer_use_counter = cmd_buf->use_counter;
@@ -15677,9 +15686,13 @@ static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t
 
     // The event/fence is expected to initially be in the signaled state.
     vkev->event = device->device.createEvent({});
-    vkev->fence = device->device.createFence({vk::FenceCreateFlagBits::eSignaled});
     device->device.setEvent(vkev->event);
 
+    vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 };
+    vk::SemaphoreCreateInfo ci{};
+    ci.setPNext(&tci);
+    vkev->tl_semaphore = { device->device.createSemaphore(ci), 0 };
+
     return new ggml_backend_event {
         /* .device  = */ dev,
         /* .context = */ vkev,
@@ -15692,7 +15705,7 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe
 
     vk_event *vkev = (vk_event *)event->context;
 
-    device->device.destroyFence(vkev->fence);
+    device->device.destroySemaphore(vkev->tl_semaphore.s);
     device->device.destroyEvent(vkev->event);
     delete vkev;
     delete event;
@@ -15704,7 +15717,9 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     auto device = ggml_vk_get_device(ctx->device);
     vk_event *vkev = (vk_event *)event->context;
 
-    VK_CHECK(device->device.waitForFences({ vkev->fence }, true, UINT64_MAX), "event_synchronize");
+    vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, { vkev->tl_semaphore.s }, { vkev->tl_semaphore.value }};
+    VK_CHECK(device->device.waitSemaphores(swi, UINT64_MAX), "event_synchronize");
+
     // Finished using current command buffer so we flag for reuse
     if (vkev->cmd_buffer) {
         // Only flag for reuse if it hasn't been reused already

From eebf21c3e9d18faad9382d33dc513e02505fed29 Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Fri, 13 Mar 2026 17:14:47 +0100
Subject: [PATCH 5/9] don't use initializer list for semaphore wait info

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index baf125a297..26305b48d5 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -15717,7 +15717,9 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     auto device = ggml_vk_get_device(ctx->device);
     vk_event *vkev = (vk_event *)event->context;
 
-    vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, { vkev->tl_semaphore.s }, { vkev->tl_semaphore.value }};
+    vk::Semaphore sem = vkev->tl_semaphore.s;
+    uint64_t val = vkev->tl_semaphore.value;
+    vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, sem, val};
     VK_CHECK(device->device.waitSemaphores(swi, UINT64_MAX), "event_synchronize");
 
     // Finished using current command buffer so we flag for reuse

From 4374b5ab9a49c768d08e69bae9e9a5e19d1c50ea Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Sat, 14 Mar 2026 06:39:20 +0100
Subject: [PATCH 6/9] use multiple events to avoid reset issues

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 63 ++++++++++++++++++----------
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 26305b48d5..52765e0760 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -944,12 +944,13 @@ struct vk_semaphore {
     uint64_t value;
 };
 
-// vk_event is used for the event-related backend interfaces. It uses 'event' for
+// vk_event is used for the event-related backend interfaces. It uses vk::Events for
 // event_wait and a timeline semaphore for event_synchronize. Polling on an event for
 // event_synchronize wouldn't be sufficient to wait for command buffers to complete,
 // and would lead to validation errors.
 struct vk_event {
-    vk::Event event;
+    std::vector<vk::Event> events;
+    size_t next_event_idx;
     vk_semaphore tl_semaphore;
     vk_command_buffer* cmd_buffer = nullptr;
     uint64_t cmd_buffer_use_counter = 0;
@@ -14871,10 +14872,14 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev
     vk_context compute_ctx = ggml_vk_get_compute_ctx(ctx);
     auto* cmd_buf = compute_ctx->s->buffer; // retrieve pointer before it gets reset
 
-    // the backend interface doesn't have an explicit reset, so reset it here
-    // before we record the command to set it
-    ggml_vk_reset_event(compute_ctx, vkev->event);
-    ggml_vk_set_event(compute_ctx, vkev->event);
+    // Grab the next event and record it, create one if necessary
+    if (vkev->next_event_idx == vkev->events.size()) {
+        vkev->events.push_back(ctx->device->device.createEvent({}));
+    }
+
+    vk::Event& cur_event = vkev->events[vkev->next_event_idx];
+    vkev->next_event_idx++;
+    ggml_vk_set_event(compute_ctx, cur_event);
 
     vkev->tl_semaphore.value++;
     compute_ctx->s->signal_semaphores.push_back(vkev->tl_semaphore);
@@ -14894,7 +14899,11 @@ static void ggml_backend_vk_event_wait(ggml_backend_t backend, ggml_backend_even
 
     vk_context compute_ctx = ggml_vk_get_compute_ctx(ctx);
 
-    ggml_vk_wait_events(compute_ctx, {vkev->event});
+    if (vkev->next_event_idx > 0) {
+        // Wait for latest event
+        vk::Event& cur_event = vkev->events[vkev->next_event_idx - 1];
+        ggml_vk_wait_events(compute_ctx, { cur_event });
+    }
 }
 
 // TODO: enable async and synchronize
@@ -15684,9 +15693,8 @@ static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t
         return nullptr;
     }
 
-    // The event/fence is expected to initially be in the signaled state.
-    vkev->event = device->device.createEvent({});
-    device->device.setEvent(vkev->event);
+    // No events initially, they get created on demand
+    vkev->next_event_idx = 0;
 
     vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 };
     vk::SemaphoreCreateInfo ci{};
@@ -15706,7 +15714,9 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe
     vk_event *vkev = (vk_event *)event->context;
 
     device->device.destroySemaphore(vkev->tl_semaphore.s);
-    device->device.destroyEvent(vkev->event);
+    for (auto& event : vkev->events) {
+        device->device.destroyEvent(event);
+    }
     delete vkev;
     delete event;
 }
@@ -15717,19 +15727,28 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     auto device = ggml_vk_get_device(ctx->device);
     vk_event *vkev = (vk_event *)event->context;
 
-    vk::Semaphore sem = vkev->tl_semaphore.s;
-    uint64_t val = vkev->tl_semaphore.value;
-    vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, sem, val};
-    VK_CHECK(device->device.waitSemaphores(swi, UINT64_MAX), "event_synchronize");
+    // Only do something if the event has actually been used
+    if (vkev->next_event_idx > 0) {
+        vk::Semaphore sem = vkev->tl_semaphore.s;
+        uint64_t val = vkev->tl_semaphore.value;
+        vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, sem, val};
+        VK_CHECK(device->device.waitSemaphores(swi, UINT64_MAX), "event_synchronize");
 
-    // Finished using current command buffer so we flag for reuse
-    if (vkev->cmd_buffer) {
-        // Only flag for reuse if it hasn't been reused already
-        if (vkev->cmd_buffer_use_counter == vkev->cmd_buffer->use_counter) {
-            vkev->cmd_buffer->in_use = false;
-            vkev->cmd_buffer->buf.reset();
+        // Reset all events and flag for for reuse
+        for (size_t i = 0; i < vkev->next_event_idx; i++) {
+            device->device.resetEvent(vkev->events[i]);
+        }
+        vkev->next_event_idx = 0;
+
+        // Finished using current command buffer so we flag for reuse
+        if (vkev->cmd_buffer) {
+            // Only flag for reuse if it hasn't been reused already
+            if (vkev->cmd_buffer_use_counter == vkev->cmd_buffer->use_counter) {
+                vkev->cmd_buffer->in_use = false;
+                vkev->cmd_buffer->buf.reset();
+            }
+            vkev->cmd_buffer = nullptr;
         }
-        vkev->cmd_buffer = nullptr;
     }
 }
 

From 9adf514ae4b8b9f37d1620d6924e755652009acd Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Sat, 14 Mar 2026 09:03:29 +0100
Subject: [PATCH 7/9] fix event reuse issue with multiple vectors

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 60 +++++++++++++++++++---------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 52765e0760..1cec66d8f3 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -949,8 +949,12 @@ struct vk_semaphore {
 // event_synchronize wouldn't be sufficient to wait for command buffers to complete,
 // and would lead to validation errors.
 struct vk_event {
-    std::vector<vk::Event> events;
-    size_t next_event_idx;
+    std::vector<vk::Event> events_free; // Events available for reuse
+    std::vector<vk::Event> events_pending; // Events that may still be waited on
+    std::vector<vk::Event> events_submitted; // Events that are fully submitted and can be reused on next synchronize
+    vk::Event event;
+    bool has_event;
+
     vk_semaphore tl_semaphore;
     vk_command_buffer* cmd_buffer = nullptr;
     uint64_t cmd_buffer_use_counter = 0;
@@ -14872,14 +14876,25 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev
     vk_context compute_ctx = ggml_vk_get_compute_ctx(ctx);
     auto* cmd_buf = compute_ctx->s->buffer; // retrieve pointer before it gets reset
 
-    // Grab the next event and record it, create one if necessary
-    if (vkev->next_event_idx == vkev->events.size()) {
-        vkev->events.push_back(ctx->device->device.createEvent({}));
+    if (vkev->has_event) {
+        // Move pending to submitted
+        vkev->events_submitted.insert(vkev->events_submitted.end(), vkev->events_pending.begin(), vkev->events_pending.end());
+        vkev->events_pending.clear();
+        // Move existing event into pending
+        vkev->events_pending.push_back(vkev->event);
     }
 
-    vk::Event& cur_event = vkev->events[vkev->next_event_idx];
-    vkev->next_event_idx++;
-    ggml_vk_set_event(compute_ctx, cur_event);
+    // Grab the next event and record it, create one if necessary
+    if (vkev->events_free.empty()) {
+        vkev->event = ctx->device->device.createEvent({});
+    } else {
+        vkev->event = vkev->events_free.back();
+        vkev->events_free.pop_back();
+    }
+
+    vkev->has_event = true;
+
+    ggml_vk_set_event(compute_ctx, vkev->event);
 
     vkev->tl_semaphore.value++;
     compute_ctx->s->signal_semaphores.push_back(vkev->tl_semaphore);
@@ -14899,10 +14914,9 @@ static void ggml_backend_vk_event_wait(ggml_backend_t backend, ggml_backend_even
 
     vk_context compute_ctx = ggml_vk_get_compute_ctx(ctx);
 
-    if (vkev->next_event_idx > 0) {
+    if (vkev->has_event) {
         // Wait for latest event
-        vk::Event& cur_event = vkev->events[vkev->next_event_idx - 1];
-        ggml_vk_wait_events(compute_ctx, { cur_event });
+        ggml_vk_wait_events(compute_ctx, { vkev->event });
     }
 }
 
@@ -15694,7 +15708,7 @@ static ggml_backend_event_t ggml_backend_vk_device_event_new(ggml_backend_dev_t
     }
 
     // No events initially, they get created on demand
-    vkev->next_event_idx = 0;
+    vkev->has_event = false;
 
     vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 };
     vk::SemaphoreCreateInfo ci{};
@@ -15714,9 +15728,18 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe
     vk_event *vkev = (vk_event *)event->context;
 
     device->device.destroySemaphore(vkev->tl_semaphore.s);
-    for (auto& event : vkev->events) {
+    for (auto& event : vkev->events_free) {
         device->device.destroyEvent(event);
     }
+    for (auto& event : vkev->events_pending) {
+        device->device.destroyEvent(event);
+    }
+    for (auto& event : vkev->events_submitted) {
+        device->device.destroyEvent(event);
+    }
+    if (vkev->has_event) {
+        device->device.destroyEvent(vkev->event);
+    }
     delete vkev;
     delete event;
 }
@@ -15728,17 +15751,18 @@ static void ggml_backend_vk_device_event_synchronize(ggml_backend_dev_t dev, ggm
     vk_event *vkev = (vk_event *)event->context;
 
     // Only do something if the event has actually been used
-    if (vkev->next_event_idx > 0) {
+    if (vkev->has_event) {
         vk::Semaphore sem = vkev->tl_semaphore.s;
         uint64_t val = vkev->tl_semaphore.value;
         vk::SemaphoreWaitInfo swi{vk::SemaphoreWaitFlags{}, sem, val};
         VK_CHECK(device->device.waitSemaphores(swi, UINT64_MAX), "event_synchronize");
 
-        // Reset all events and flag for for reuse
-        for (size_t i = 0; i < vkev->next_event_idx; i++) {
-            device->device.resetEvent(vkev->events[i]);
+        // Reset and move submitted events
+        for (auto& event : vkev->events_submitted) {
+            device->device.resetEvent(event);
         }
-        vkev->next_event_idx = 0;
+        vkev->events_free.insert(vkev->events_free.end(), vkev->events_submitted.begin(), vkev->events_submitted.end());
+        vkev->events_submitted.clear();
 
         // Finished using current command buffer so we flag for reuse
         if (vkev->cmd_buffer) {

From d287bbbb8a6faf62ed744fa28f4e7fd1e3175e79 Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Sat, 14 Mar 2026 15:54:43 +0100
Subject: [PATCH 8/9] add semaphore wait condition also if compute_ctx already
 exists

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 1cec66d8f3..09f11746ff 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -6513,15 +6513,16 @@ static void ggml_vk_ctx_begin(vk_device& device, vk_context& subctx) {
 }
 
 static vk_context ggml_vk_get_compute_ctx(ggml_backend_vk_context * ctx) {
+    vk_context result;
     if (!ctx->compute_ctx.expired()) {
-        return ctx->compute_ctx.lock();
+        result = ctx->compute_ctx.lock();
+    } else {
+        result = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
+
+        ctx->compute_ctx = result;
+        ggml_vk_ctx_begin(ctx->device, result);
     }
 
-    vk_context result = ggml_vk_create_context(ctx, ctx->compute_cmd_pool);
-
-    ctx->compute_ctx = result;
-    ggml_vk_ctx_begin(ctx->device, result);
-
     if (ctx->device->async_use_transfer_queue && ctx->transfer_semaphore_last_submitted < ctx->transfer_semaphore.value) {
         result->s->wait_semaphores.push_back(ctx->transfer_semaphore);
         ctx->transfer_semaphore_last_submitted = ctx->transfer_semaphore.value;

From a338a1e206309bf1c98a237a73feacea1fba3b7e Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Sun, 15 Mar 2026 08:59:39 +0100
Subject: [PATCH 9/9] remove event pending stage

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 09f11746ff..42f4576fd9 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -950,7 +950,6 @@ struct vk_semaphore {
 // and would lead to validation errors.
 struct vk_event {
     std::vector<vk::Event> events_free; // Events available for reuse
-    std::vector<vk::Event> events_pending; // Events that may still be waited on
     std::vector<vk::Event> events_submitted; // Events that are fully submitted and can be reused on next synchronize
     vk::Event event;
     bool has_event;
@@ -14878,11 +14877,8 @@ static void ggml_backend_vk_event_record(ggml_backend_t backend, ggml_backend_ev
     auto* cmd_buf = compute_ctx->s->buffer; // retrieve pointer before it gets reset
 
     if (vkev->has_event) {
-        // Move pending to submitted
-        vkev->events_submitted.insert(vkev->events_submitted.end(), vkev->events_pending.begin(), vkev->events_pending.end());
-        vkev->events_pending.clear();
-        // Move existing event into pending
-        vkev->events_pending.push_back(vkev->event);
+        // Move existing event into submitted
+        vkev->events_submitted.push_back(vkev->event);
     }
 
     // Grab the next event and record it, create one if necessary
@@ -15732,9 +15728,6 @@ static void ggml_backend_vk_device_event_free(ggml_backend_dev_t dev, ggml_backe
     for (auto& event : vkev->events_free) {
         device->device.destroyEvent(event);
     }
-    for (auto& event : vkev->events_pending) {
-        device->device.destroyEvent(event);
-    }
     for (auto& event : vkev->events_submitted) {
         device->device.destroyEvent(event);
     }