From cf0e1475c59e3e609e6cce89deb626a20f2682bf Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Mon, 1 Dec 2025 09:13:47 +0100
Subject: [PATCH] sampling : lower log level for output buffer reallocations
 [no ci]

This commit changes the logging level for output buffer reallocations
in the llama_context::output_reserve function from INFO to DEBUG.

The motivation for this is that it currently logs to info and when
enabling verbose logging for llama-cli this will get mixed with the
output, for example:

```console
What is the capital of Sweden?output_reserve: reallocating output buffer from size 0.58 MiB to 1.74 MiB
 1. Stockholm
2\. Helsinki
Based are the options
1. Stockholm
Explanation: Stockholm is the capital of
...
```
---
 src/llama-context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 7f7b838e14..78f12011c4 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1718,7 +1718,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
         if (buf_output) {
 #ifndef NDEBUG
             // This doesn't happen often, but may be annoying in some cases (like the HellaSwag benchmark)
-            LLAMA_LOG_INFO("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
+            LLAMA_LOG_DEBUG("%s: reallocating output buffer from size %.02f MiB to %.02f MiB\n", __func__, prev_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
 #endif
             buf_output = nullptr;
             logits = nullptr;