llama : re-enable manual LoRA adapter free (#19983)
* Re-enable manual LoRA adapter free * Remove stale "all adapters must be loaded before context creation" stale comments
This commit is contained in:
parent
f4049ad735
commit
312cf03328
|
|
@ -1067,7 +1067,7 @@ common_init_result::common_init_result(common_params & params) :
|
||||||
|
|
||||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||||
|
|
||||||
// load and optionally apply lora adapters (must be loaded before context creation)
|
// load and optionally apply lora adapters
|
||||||
for (auto & la : params.lora_adapters) {
|
for (auto & la : params.lora_adapters) {
|
||||||
llama_adapter_lora_ptr lora;
|
llama_adapter_lora_ptr lora;
|
||||||
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,7 @@ struct llama_sampler_deleter {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_adapter_lora_deleter {
|
struct llama_adapter_lora_deleter {
|
||||||
void operator()(llama_adapter_lora *) {
|
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
|
||||||
// llama_adapter_lora_free is deprecated
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
|
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
|
||||||
|
|
|
||||||
|
|
@ -636,7 +636,6 @@ extern "C" {
|
||||||
|
|
||||||
// Load a LoRA adapter from file
|
// Load a LoRA adapter from file
|
||||||
// The adapter is valid as long as the associated model is not freed
|
// The adapter is valid as long as the associated model is not freed
|
||||||
// All adapters must be loaded before context creation
|
|
||||||
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
|
||||||
struct llama_model * model,
|
struct llama_model * model,
|
||||||
const char * path_lora);
|
const char * path_lora);
|
||||||
|
|
@ -660,9 +659,8 @@ extern "C" {
|
||||||
LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
|
LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
|
||||||
|
|
||||||
// Manually free a LoRA adapter
|
// Manually free a LoRA adapter
|
||||||
// NOTE: loaded adapters will be free when the associated model is deleted
|
// NOTE: loaded adapters that are not manually freed will be freed when the associated model is deleted
|
||||||
LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter),
|
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);
|
||||||
"adapters are now freed together with the associated model");
|
|
||||||
|
|
||||||
// Get the invocation tokens if the current lora is an alora
|
// Get the invocation tokens if the current lora is an alora
|
||||||
LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
|
LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
|
||||||
|
|
|
||||||
|
|
@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
|
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
|
||||||
llama_adapter_lora * adapter = new llama_adapter_lora();
|
llama_adapter_lora * adapter = new llama_adapter_lora(model);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
|
||||||
|
|
@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
|
||||||
return snprintf(buf, buf_size, "%s", it->second.c_str());
|
return snprintf(buf, buf_size, "%s", it->second.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_adapter_lora_free(llama_adapter_lora *) {
|
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
|
||||||
// deprecated: adapters are freed by llama_model's destructor
|
if (adapter == nullptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (adapter->model != nullptr) {
|
||||||
|
adapter->model->loras.erase(adapter);
|
||||||
|
adapter->model = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete adapter;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
|
uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ struct llama_adapter_lora_weight {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_adapter_lora {
|
struct llama_adapter_lora {
|
||||||
|
llama_model * model = nullptr;
|
||||||
|
|
||||||
// map tensor name to lora_a_b
|
// map tensor name to lora_a_b
|
||||||
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
|
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
|
||||||
|
|
||||||
|
|
@ -75,7 +77,7 @@ struct llama_adapter_lora {
|
||||||
// activated lora (aLoRA)
|
// activated lora (aLoRA)
|
||||||
std::vector<llama_token> alora_invocation_tokens;
|
std::vector<llama_token> alora_invocation_tokens;
|
||||||
|
|
||||||
llama_adapter_lora() = default;
|
explicit llama_adapter_lora(llama_model * model) : model(model) {}
|
||||||
~llama_adapter_lora() = default;
|
~llama_adapter_lora() = default;
|
||||||
|
|
||||||
llama_adapter_lora_weight * get_weight(ggml_tensor * w);
|
llama_adapter_lora_weight * get_weight(ggml_tensor * w);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue