context : fix reserve token padding to n_seqs (#18536)

This commit is contained in:
Georgi Gerganov 2026-01-03 15:45:34 +02:00 committed by GitHub
parent 0f2e42ca1d
commit a554a1ecc7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 1 additions and 1 deletions

View File

@ -1458,7 +1458,7 @@ ggml_cgraph * llama_context::graph_reserve(
if (n_tokens % n_seqs != 0) {
n_tokens = ((n_tokens + (n_seqs - 1)) / n_seqs) * n_seqs; // round to next multiple of n_seqs
n_outputs = std::min(n_outputs, n_tokens);
n_outputs = std::max(n_outputs, n_tokens);
LLAMA_LOG_DEBUG("%s: making n_tokens a multiple of n_seqs - n_tokens = %u, n_seqs = %u, n_outputs = %u\n", __func__, n_tokens, n_seqs, n_outputs);
}