llama_context: synchronize before reallocating output buffer (#17974)
This commit is contained in:
parent
4d5ae24c0a
commit
5266379bca
|
|
@ -1318,6 +1318,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
|
||||||
// This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
|
// This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
|
||||||
LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||||
#endif
|
#endif
|
||||||
|
synchronize();
|
||||||
buf_output = nullptr;
|
buf_output = nullptr;
|
||||||
logits = nullptr;
|
logits = nullptr;
|
||||||
embd = nullptr;
|
embd = nullptr;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue