From 67f99e605fe4897baac76e04948e73313f5c0e1a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Feb 2026 08:19:31 +0200 Subject: [PATCH] cont : alternative lora API --- common/common.cpp | 6 +-- include/llama.h | 24 +++------ src/llama-context.cpp | 115 ++++++++++++------------------------------ src/llama-context.h | 15 ++---- 4 files changed, 43 insertions(+), 117 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 2f4cab1a86..a13bce80a3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1242,7 +1242,7 @@ common_init_result_ptr common_init_from_params(common_params & params) { return res; } - int err = llama_apply_adapter_cvec( + int err = llama_set_adapter_cvec( lctx, cvec.data.data(), cvec.data.size(), @@ -1344,7 +1344,7 @@ std::string get_model_endpoint() { } void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { - std::vector loras; + std::vector loras; std::vector scales; for (auto & la: lora) { @@ -1352,7 +1352,7 @@ void common_set_adapter_lora(struct llama_context * ctx, std::vectorsecond == scale) { - return; - } + if (adapters_lora_are_same(adapters, n_adapters, scales)) { + return; } - loras[adapter] = scale; + loras.clear(); + + for (size_t i = 0; i < n_adapters; i ++) { + if (scales[i] != 0.0f) { + loras[adapters[i]] = scales[i]; + } + } sched_need_reserve = true; } -bool llama_context::rm_adapter_lora( - llama_adapter_lora * adapter) { - LLAMA_LOG_DEBUG("%s: adapter = %p\n", __func__, (void *) adapter); - - auto it = loras.find(adapter); - if (it != loras.end()) { - loras.erase(it); - - sched_need_reserve = true; - - return true; - } - - return false; -} - -void llama_context::put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { +bool llama_context::adapters_lora_are_same(llama_adapter_lora ** adapters, size_t n_adapters, float * scales) { LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters); - if (are_adapter_loras_same(num_adapters, adapters, scales)) { - return; - } - - clear_adapter_lora(); - - for (size_t i = 0; i < num_adapters; i ++) { - if (scales[i] != 0.0f) { - set_adapter_lora(adapters[i], scales[i]); - } - } -} - -bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { - LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters); - - if (num_adapters != loras.size()) { + if (n_adapters != loras.size()) { return false; } - for (size_t i = 0; i < num_adapters; i ++) { + for (size_t i = 0; i < n_adapters; i ++) { auto it = loras.find(adapters[i]); if (it == loras.end() || it->second != scales[i]) { @@ -1127,19 +1097,7 @@ bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lo return true; } -void llama_context::clear_adapter_lora() { - LLAMA_LOG_DEBUG("%s: call\n", __func__); - - if (loras.empty()) { - return; - } - - loras.clear(); - - sched_need_reserve = true; -} - -bool llama_context::apply_adapter_cvec( +bool llama_context::set_adapter_cvec( const float * data, size_t len, int32_t n_embd, @@ -3256,39 +3214,28 @@ uint32_t llama_get_sampled_probs_count_ith(llama_context * ctx, int32_t i) { // llama adapter API -int32_t llama_set_adapter_lora( +int32_t llama_set_adapters_lora( llama_context * ctx, - llama_adapter_lora * adapter, - float scale) { - ctx->set_adapter_lora(adapter, scale); + llama_adapter_lora ** adapters, + size_t n_adapters, + float * scales) { + if (adapters == nullptr || scales == nullptr) { + GGML_ASSERT(n_adapters == 0 && "invalid llama_set_adapters_lora call"); + } + + ctx->set_adapters_lora(adapters, n_adapters, scales); return 0; } -int32_t llama_rm_adapter_lora( - llama_context * ctx, - llama_adapter_lora * adapter) { - bool res = ctx->rm_adapter_lora(adapter); - - return res ? 0 : -1; -} - -void llama_clear_adapter_lora(llama_context * ctx) { - ctx->clear_adapter_lora(); -} - -void llama_put_adapter_loras(llama_context * ctx, size_t num_adapters, llama_adapter_lora ** adapters, float * scales) { - ctx->put_adapter_loras(num_adapters, adapters, scales); -} - -int32_t llama_apply_adapter_cvec( +int32_t llama_set_adapter_cvec( llama_context * ctx, - const float * data, - size_t len, - int32_t n_embd, - int32_t il_start, - int32_t il_end) { - bool res = ctx->apply_adapter_cvec(data, len, n_embd, il_start, il_end); + const float * data, + size_t len, + int32_t n_embd, + int32_t il_start, + int32_t il_end) { + bool res = ctx->set_adapter_cvec(data, len, n_embd, il_start, il_end); return res ? 0 : -1; } diff --git a/src/llama-context.h b/src/llama-context.h index adcb79208f..6d795b5d88 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -104,20 +104,11 @@ struct llama_context { void set_causal_attn(bool value); void set_warmup(bool value); - void set_adapter_lora( - llama_adapter_lora * adapter, - float scale); + void set_adapters_lora(llama_adapter_lora ** adapters, size_t n_adapters, float * scales); - bool rm_adapter_lora( - llama_adapter_lora * adapter); + bool adapters_lora_are_same(llama_adapter_lora ** adapters, size_t n_adapters, float * scales); - void put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales); - - bool are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales); - - void clear_adapter_lora(); - - bool apply_adapter_cvec( + bool set_adapter_cvec( const float * data, size_t len, int32_t n_embd,