From 459b7ae7b9a6885672576db141caf1c460901481 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Fri, 28 Nov 2025 13:46:51 +0100
Subject: [PATCH] squash! sampling : support intermixed backend/cpu samplers

Fix llama-save-load-state which currently fails by handling the case
when batch.logits is nullptr (like when loading state) by allocating
space for all outputs as CPU logits.
---
 src/llama-context.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index f74248127c..b8c5accff8 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1676,6 +1676,10 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
                 }
             }
         }
+    } else {
+        // When batch.logits is nullptr (when loading state with a dummy batch),
+        // allocate CPU logits.
+        batch_needs_cpu_logits = true;
     }
 
     size_t backend_float_count = 0;