From 4aa45880424501ddcfeef578969a3edeab8d7e31 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 2 Jan 2026 19:26:15 +0200
Subject: [PATCH] cont : fix reserve

---
 src/llama-context.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index e505ef40c2..16d81c95be 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -351,6 +351,8 @@ llama_context::~llama_context() {
 void llama_context::reserve() {
     LLAMA_LOG_INFO("%s: reserving ...\n", __func__);
 
+    synchronize();
+
     const uint32_t n_seqs = cparams.n_seq_max;
     const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
 
@@ -497,6 +499,10 @@ void llama_context::reserve() {
 }
 
 void llama_context::synchronize() {
+    if (!sched) {
+        return;
+    }
+
     ggml_backend_sched_synchronize(sched.get());
 
     // FIXME: if multiple single tokens are evaluated without a synchronization,