llama_context: synchronize before reallocating output buffer (#17974)

This commit is contained in:
Jeff Bolz 2025-12-13 09:19:51 -06:00 committed by GitHub
parent 4d5ae24c0a
commit 5266379bca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 1 additions and 0 deletions

View File

@ -1318,6 +1318,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
// This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
#endif
synchronize();
buf_output = nullptr;
logits = nullptr;
embd = nullptr;