llama_context: synchronize before reallocating output buffer (#17974)
This commit is contained in:
parent
4d5ae24c0a
commit
5266379bca
|
|
@ -1318,6 +1318,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
|
|||
// This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
|
||||
LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||
#endif
|
||||
synchronize();
|
||||
buf_output = nullptr;
|
||||
logits = nullptr;
|
||||
embd = nullptr;
|
||||
|
|
|
|||
Loading…
Reference in New Issue