From 5d770b9db815c8662e190050842d36bc8db174cb Mon Sep 17 00:00:00 2001 From: 65a <10104049+65a@users.noreply.github.com> Date: Fri, 16 Jan 2026 17:23:56 -0800 Subject: [PATCH] Handle n_ctx 0 for models that entirely fit with n_ctx_train --- src/llama.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index f1096d960e..ec74084a76 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -81,7 +81,11 @@ static std::vector llama_get_device_memory_data( throw std::runtime_error("failed to load model"); } - llama_context * ctx = llama_init_from_model(model, *cparams); + llama_context_params cparams_copy = *cparams; + if (cparams_copy.n_ctx == 0) + cparams_copy.n_ctx = model->hparams.n_ctx_train; + + llama_context * ctx = llama_init_from_model(model, cparams_copy); if (ctx == nullptr) { llama_model_free(model); llama_log_set(ud.original_logger.callback, ud.original_logger.user_data); @@ -236,6 +240,8 @@ static void llama_params_fit_impl( if (projected_free_per_device[0] >= margins[0]) { LLAMA_LOG_INFO("%s: will leave %" PRId64 " >= %" PRId64 " MiB of free device memory, no changes needed\n", __func__, projected_free_per_device[0]/MiB, margins[0]/MiB); + if (cparams->n_ctx == 0) + cparams->n_ctx = hp_nct; return; } } else { @@ -248,6 +254,8 @@ static void llama_params_fit_impl( } if (!changes_needed) { LLAMA_LOG_INFO("%s: targets for free memory can be met on all devices, no changes needed\n", __func__); + if (cparams->n_ctx == 0) + cparams->n_ctx = hp_nct; return; } }