Re-enable manual LoRA adapter free

2026-02-28 14:04:29 +01:00 · 2026-02-28 14:04:29 +01:00 · 1b60f08d67
parent 4720819d45
commit 1b60f08d67
4 changed files with 18 additions and 10 deletions
--- a/include/llama-cpp.h
+++ b/include/llama-cpp.h
@ -21,9 +21,7 @@ struct llama_sampler_deleter {
 };

 struct llama_adapter_lora_deleter {
-    void operator()(llama_adapter_lora *) {
-        // llama_adapter_lora_free is deprecated
-    }
+    void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
 };

 typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
--- a/include/llama.h
+++ b/include/llama.h
@ -647,9 +647,8 @@ extern "C" {
    LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);

    // Manually free a LoRA adapter
-    // NOTE: loaded adapters will be free when the associated model is deleted
-    LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter),
-            "adapters are now freed together with the associated model");
+    // NOTE: loaded adapters that are not manually freed will be freed when the associated model is deleted
+    LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);

    // Get the invocation tokens if the current lora is an alora
    LLAMA_API uint64_t            llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
--- a/src/llama-adapter.cpp
+++ b/src/llama-adapter.cpp
@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
 }

 llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
-    llama_adapter_lora * adapter = new llama_adapter_lora();
+    llama_adapter_lora * adapter = new llama_adapter_lora(model);

    try {
        llama_adapter_lora_init_impl(*model, path_lora, *adapter);
@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
    return snprintf(buf, buf_size, "%s", it->second.c_str());
 }

-void llama_adapter_lora_free(llama_adapter_lora *) {
-    // deprecated: adapters are freed by llama_model's destructor
+void llama_adapter_lora_free(llama_adapter_lora * adapter) {
+    if (adapter == nullptr) {
+        return;
+    }
+
+    if (adapter->model != nullptr) {
+        adapter->model->loras.erase(adapter);
+        adapter->model = nullptr;
+    }
+
+    delete adapter;
 }

 uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
--- a/src/llama-adapter.h
+++ b/src/llama-adapter.h
@ -61,6 +61,8 @@ struct llama_adapter_lora_weight {
 };

 struct llama_adapter_lora {
+    llama_model * model = nullptr;
+
    // map tensor name to lora_a_b
    std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;

@ -75,7 +77,7 @@ struct llama_adapter_lora {
    // activated lora (aLoRA)
    std::vector<llama_token> alora_invocation_tokens;

-    llama_adapter_lora() = default;
+    explicit llama_adapter_lora(llama_model * model) : model(model) {}
    ~llama_adapter_lora() = default;

    llama_adapter_lora_weight * get_weight(ggml_tensor * w);