From 1b60f08d67fdfc9369704443a6805a156d35d086 Mon Sep 17 00:00:00 2001 From: Pop Flamingo Date: Sat, 28 Feb 2026 14:04:29 +0100 Subject: [PATCH 1/2] Re-enable manual LoRA adapter free --- include/llama-cpp.h | 4 +--- include/llama.h | 5 ++--- src/llama-adapter.cpp | 15 ++++++++++++--- src/llama-adapter.h | 4 +++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/llama-cpp.h b/include/llama-cpp.h index 807e77f628..8f6368177d 100644 --- a/include/llama-cpp.h +++ b/include/llama-cpp.h @@ -21,9 +21,7 @@ struct llama_sampler_deleter { }; struct llama_adapter_lora_deleter { - void operator()(llama_adapter_lora *) { - // llama_adapter_lora_free is deprecated - } + void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); } }; typedef std::unique_ptr llama_model_ptr; diff --git a/include/llama.h b/include/llama.h index 077f66dc65..0626483a85 100644 --- a/include/llama.h +++ b/include/llama.h @@ -647,9 +647,8 @@ extern "C" { LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size); // Manually free a LoRA adapter - // NOTE: loaded adapters will be free when the associated model is deleted - LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter), - "adapters are now freed together with the associated model"); + // NOTE: loaded adapters that are not manually freed will be freed when the associated model is deleted + LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter); // Get the invocation tokens if the current lora is an alora LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter); diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp index d6a5800e63..2f2cc12af0 100644 --- a/src/llama-adapter.cpp +++ b/src/llama-adapter.cpp @@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_ } llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) { - llama_adapter_lora * adapter = new llama_adapter_lora(); + llama_adapter_lora * adapter = new llama_adapter_lora(model); try { llama_adapter_lora_init_impl(*model, path_lora, *adapter); @@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, return snprintf(buf, buf_size, "%s", it->second.c_str()); } -void llama_adapter_lora_free(llama_adapter_lora *) { - // deprecated: adapters are freed by llama_model's destructor +void llama_adapter_lora_free(llama_adapter_lora * adapter) { + if (adapter == nullptr) { + return; + } + + if (adapter->model != nullptr) { + adapter->model->loras.erase(adapter); + adapter->model = nullptr; + } + + delete adapter; } uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) { diff --git a/src/llama-adapter.h b/src/llama-adapter.h index aa3ab63ad7..f0b1e50f81 100644 --- a/src/llama-adapter.h +++ b/src/llama-adapter.h @@ -61,6 +61,8 @@ struct llama_adapter_lora_weight { }; struct llama_adapter_lora { + llama_model * model = nullptr; + // map tensor name to lora_a_b std::unordered_map ab_map; @@ -75,7 +77,7 @@ struct llama_adapter_lora { // activated lora (aLoRA) std::vector alora_invocation_tokens; - llama_adapter_lora() = default; + explicit llama_adapter_lora(llama_model * model) : model(model) {} ~llama_adapter_lora() = default; llama_adapter_lora_weight * get_weight(ggml_tensor * w); From 662cda0788afe4b45fc33c7b1055b87c01bc91a4 Mon Sep 17 00:00:00 2001 From: Pop Flamingo Date: Mon, 2 Mar 2026 16:43:49 +0100 Subject: [PATCH 2/2] Remove stale "all adapters must be loaded before context creation" stale comments --- common/common.cpp | 2 +- include/llama.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 53bddc4ef2..5db251051b 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1067,7 +1067,7 @@ common_init_result::common_init_result(common_params & params) : const llama_vocab * vocab = llama_model_get_vocab(model); - // load and optionally apply lora adapters (must be loaded before context creation) + // load and optionally apply lora adapters for (auto & la : params.lora_adapters) { llama_adapter_lora_ptr lora; lora.reset(llama_adapter_lora_init(model, la.path.c_str())); diff --git a/include/llama.h b/include/llama.h index 0626483a85..4cac194d0d 100644 --- a/include/llama.h +++ b/include/llama.h @@ -623,7 +623,6 @@ extern "C" { // Load a LoRA adapter from file // The adapter is valid as long as the associated model is not freed - // All adapters must be loaded before context creation LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init( struct llama_model * model, const char * path_lora);