deprecate llama_adapter_lora_free

This commit is contained in:
Xuan Son Nguyen 2025-12-31 12:07:07 +01:00
parent f5e8bfddc3
commit cb5e0f8734
5 changed files with 12 additions and 18 deletions

View File

@ -21,7 +21,9 @@ struct llama_sampler_deleter {
}; };
struct llama_adapter_lora_deleter { struct llama_adapter_lora_deleter {
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); } void operator()(llama_adapter_lora *) {
// llama_adapter_lora_free is deprecated
}
}; };
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr; typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;

View File

@ -633,7 +633,8 @@ extern "C" {
// Manually free a LoRA adapter // Manually free a LoRA adapter
// NOTE: loaded adapters will be free when the associated model is deleted // NOTE: loaded adapters will be free when the associated model is deleted
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter); LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter),
"adapters are now freed together with the associated model");
// Get the invocation tokens if the current lora is an alora // Get the invocation tokens if the current lora is an alora
LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter); LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);

View File

@ -146,11 +146,9 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
return nullptr; return nullptr;
} }
static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_lora & adapter) { static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora); LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
llama_model & model = adapter.model;
ggml_context * ctx_init; ggml_context * ctx_init;
gguf_init_params meta_gguf_params = { gguf_init_params meta_gguf_params = {
/* .no_alloc = */ true, /* .no_alloc = */ true,
@ -420,10 +418,10 @@ static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_l
} }
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) { llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
llama_adapter_lora * adapter = new llama_adapter_lora(*model); llama_adapter_lora * adapter = new llama_adapter_lora();
try { try {
llama_adapter_lora_init_impl(path_lora, *adapter); llama_adapter_lora_init_impl(*model, path_lora, *adapter);
return adapter; return adapter;
} catch (const std::exception & err) { } catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what()); LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
@ -473,13 +471,8 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
return snprintf(buf, buf_size, "%s", it->second.c_str()); return snprintf(buf, buf_size, "%s", it->second.c_str());
} }
void llama_adapter_lora_free(llama_adapter_lora * adapter) { void llama_adapter_lora_free(llama_adapter_lora *) {
// remove adapter from associated model // deprecated: adapters are freed by llama_model's destructor
auto & model = adapter->model;
GGML_ASSERT(model.loras.find(adapter) != model.loras.end());
model.loras.erase(adapter);
delete adapter;
} }
uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) { uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {

View File

@ -59,8 +59,6 @@ struct llama_adapter_lora_weight {
}; };
struct llama_adapter_lora { struct llama_adapter_lora {
llama_model & model;
// map tensor name to lora_a_b // map tensor name to lora_a_b
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map; std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
@ -75,7 +73,7 @@ struct llama_adapter_lora {
// activated lora (aLoRA) // activated lora (aLoRA)
std::vector<llama_token> alora_invocation_tokens; std::vector<llama_token> alora_invocation_tokens;
llama_adapter_lora(llama_model & model) : model(model) {} llama_adapter_lora() = default;
~llama_adapter_lora() = default; ~llama_adapter_lora() = default;
llama_adapter_lora_weight * get_weight(ggml_tensor * w); llama_adapter_lora_weight * get_weight(ggml_tensor * w);

View File

@ -469,7 +469,7 @@ llama_model::llama_model(const llama_model_params & params) : params(params), pi
llama_model::~llama_model() { llama_model::~llama_model() {
for (auto * lora : loras) { for (auto * lora : loras) {
llama_adapter_lora_free(lora); delete lora;
} }
} }