From 459b7ae7b9a6885672576db141caf1c460901481 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 28 Nov 2025 13:46:51 +0100 Subject: [PATCH] squash! sampling : support intermixed backend/cpu samplers Fix llama-save-load-state which currently fails by handling the case when batch.logits is nullptr (like when loading state) by allocating space for all outputs as CPU logits. --- src/llama-context.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index f74248127c..b8c5accff8 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1676,6 +1676,10 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba } } } + } else { + // When batch.logits is nullptr (when loading state with a dummy batch), + // allocate CPU logits. + batch_needs_cpu_logits = true; } size_t backend_float_count = 0;