cont : alternative lora API

This commit is contained in:
Georgi Gerganov 2026-02-13 08:19:31 +02:00
parent 3b82c3d3e7
commit 67f99e605f
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
4 changed files with 43 additions and 117 deletions

View File

@ -1242,7 +1242,7 @@ common_init_result_ptr common_init_from_params(common_params & params) {
return res;
}
int err = llama_apply_adapter_cvec(
int err = llama_set_adapter_cvec(
lctx,
cvec.data.data(),
cvec.data.size(),
@ -1344,7 +1344,7 @@ std::string get_model_endpoint() {
}
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
std::vector<llama_adapter_lora*> loras;
std::vector<llama_adapter_lora *> loras;
std::vector<float> scales;
for (auto & la: lora) {
@ -1352,7 +1352,7 @@ void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adap
scales.push_back(la.scale);
}
llama_put_adapter_loras(ctx, loras.size(), loras.data(), scales.data());
llama_set_adapters_lora(ctx, loras.data(), loras.size(), scales.data());
}
struct llama_model_params common_model_params_to_llama(common_params & params) {

View File

@ -656,24 +656,12 @@ extern "C" {
// The following functions operate on a llama_context, hence the naming: llama_verb_...
// Add a loaded LoRA adapter to given context
// This will not modify model's weight
LLAMA_API int32_t llama_set_adapter_lora(
struct llama_context * ctx,
struct llama_adapter_lora * adapter,
float scale);
// Remove a specific LoRA adapter from given context
// Return -1 if the adapter is not present in the context
LLAMA_API int32_t llama_rm_adapter_lora(
struct llama_context * ctx,
struct llama_adapter_lora * adapter);
// Remove all LoRA adapters from given context
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
// Set LoRa adapters on the context. Will only modify if the adapters currently in context are different.
LLAMA_API void llama_put_adapter_loras(struct llama_context * ctx, size_t num_adapters, struct llama_adapter_lora ** adapters, float * scales);
LLAMA_API int32_t llama_set_adapters_lora(
struct llama_context * ctx,
struct llama_adapter_lora ** adapters,
size_t n_adapters,
float * scales);
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
// the currently loaded vector.
@ -681,7 +669,7 @@ extern "C" {
// to an n_embd x n_layers buffer starting from layer 1.
// il_start and il_end are the layer range the vector should apply to (both inclusive)
// See llama_control_vector_load in common to load a control vector.
LLAMA_API int32_t llama_apply_adapter_cvec(
LLAMA_API int32_t llama_set_adapter_cvec(
struct llama_context * ctx,
const float * data,
size_t len,

View File

@ -1061,62 +1061,32 @@ bool llama_context::set_sampler(llama_seq_id seq_id, llama_sampler * sampler) {
return true;
}
void llama_context::set_adapter_lora(
llama_adapter_lora * adapter,
float scale) {
LLAMA_LOG_DEBUG("%s: adapter = %p, scale = %f\n", __func__, (void *) adapter, scale);
void llama_context::set_adapters_lora(llama_adapter_lora ** adapters, size_t n_adapters, float * scales) {
LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters);
if (auto it = loras.find(adapter); it != loras.end()) {
if (it->second == scale) {
return;
}
if (adapters_lora_are_same(adapters, n_adapters, scales)) {
return;
}
loras[adapter] = scale;
loras.clear();
for (size_t i = 0; i < n_adapters; i ++) {
if (scales[i] != 0.0f) {
loras[adapters[i]] = scales[i];
}
}
sched_need_reserve = true;
}
bool llama_context::rm_adapter_lora(
llama_adapter_lora * adapter) {
LLAMA_LOG_DEBUG("%s: adapter = %p\n", __func__, (void *) adapter);
auto it = loras.find(adapter);
if (it != loras.end()) {
loras.erase(it);
sched_need_reserve = true;
return true;
}
return false;
}
void llama_context::put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
bool llama_context::adapters_lora_are_same(llama_adapter_lora ** adapters, size_t n_adapters, float * scales) {
LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters);
if (are_adapter_loras_same(num_adapters, adapters, scales)) {
return;
}
clear_adapter_lora();
for (size_t i = 0; i < num_adapters; i ++) {
if (scales[i] != 0.0f) {
set_adapter_lora(adapters[i], scales[i]);
}
}
}
bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters);
if (num_adapters != loras.size()) {
if (n_adapters != loras.size()) {
return false;
}
for (size_t i = 0; i < num_adapters; i ++) {
for (size_t i = 0; i < n_adapters; i ++) {
auto it = loras.find(adapters[i]);
if (it == loras.end() || it->second != scales[i]) {
@ -1127,19 +1097,7 @@ bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lo
return true;
}
void llama_context::clear_adapter_lora() {
LLAMA_LOG_DEBUG("%s: call\n", __func__);
if (loras.empty()) {
return;
}
loras.clear();
sched_need_reserve = true;
}
bool llama_context::apply_adapter_cvec(
bool llama_context::set_adapter_cvec(
const float * data,
size_t len,
int32_t n_embd,
@ -3256,39 +3214,28 @@ uint32_t llama_get_sampled_probs_count_ith(llama_context * ctx, int32_t i) {
// llama adapter API
int32_t llama_set_adapter_lora(
int32_t llama_set_adapters_lora(
llama_context * ctx,
llama_adapter_lora * adapter,
float scale) {
ctx->set_adapter_lora(adapter, scale);
llama_adapter_lora ** adapters,
size_t n_adapters,
float * scales) {
if (adapters == nullptr || scales == nullptr) {
GGML_ASSERT(n_adapters == 0 && "invalid llama_set_adapters_lora call");
}
ctx->set_adapters_lora(adapters, n_adapters, scales);
return 0;
}
int32_t llama_rm_adapter_lora(
llama_context * ctx,
llama_adapter_lora * adapter) {
bool res = ctx->rm_adapter_lora(adapter);
return res ? 0 : -1;
}
void llama_clear_adapter_lora(llama_context * ctx) {
ctx->clear_adapter_lora();
}
void llama_put_adapter_loras(llama_context * ctx, size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
ctx->put_adapter_loras(num_adapters, adapters, scales);
}
int32_t llama_apply_adapter_cvec(
int32_t llama_set_adapter_cvec(
llama_context * ctx,
const float * data,
size_t len,
int32_t n_embd,
int32_t il_start,
int32_t il_end) {
bool res = ctx->apply_adapter_cvec(data, len, n_embd, il_start, il_end);
const float * data,
size_t len,
int32_t n_embd,
int32_t il_start,
int32_t il_end) {
bool res = ctx->set_adapter_cvec(data, len, n_embd, il_start, il_end);
return res ? 0 : -1;
}

View File

@ -104,20 +104,11 @@ struct llama_context {
void set_causal_attn(bool value);
void set_warmup(bool value);
void set_adapter_lora(
llama_adapter_lora * adapter,
float scale);
void set_adapters_lora(llama_adapter_lora ** adapters, size_t n_adapters, float * scales);
bool rm_adapter_lora(
llama_adapter_lora * adapter);
bool adapters_lora_are_same(llama_adapter_lora ** adapters, size_t n_adapters, float * scales);
void put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales);
bool are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales);
void clear_adapter_lora();
bool apply_adapter_cvec(
bool set_adapter_cvec(
const float * data,
size_t len,
int32_t n_embd,