Refactoring to use new llama_put_adapter_loras
This commit is contained in:
parent
0dfcd3b607
commit
3b82c3d3e7
|
|
@ -1344,12 +1344,15 @@ std::string get_model_endpoint() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
|
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
|
||||||
llama_clear_adapter_lora(ctx);
|
std::vector<llama_adapter_lora*> loras;
|
||||||
for (auto & la : lora) {
|
std::vector<float> scales;
|
||||||
if (la.scale != 0.0f) {
|
|
||||||
llama_set_adapter_lora(ctx, la.ptr, la.scale);
|
for (auto & la: lora) {
|
||||||
}
|
loras.push_back(la.ptr);
|
||||||
|
scales.push_back(la.scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_put_adapter_loras(ctx, loras.size(), loras.data(), scales.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_model_params common_model_params_to_llama(common_params & params) {
|
struct llama_model_params common_model_params_to_llama(common_params & params) {
|
||||||
|
|
|
||||||
|
|
@ -672,6 +672,9 @@ extern "C" {
|
||||||
// Remove all LoRA adapters from given context
|
// Remove all LoRA adapters from given context
|
||||||
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
|
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
|
||||||
|
|
||||||
|
// Set LoRa adapters on the context. Will only modify if the adapters currently in context are different.
|
||||||
|
LLAMA_API void llama_put_adapter_loras(struct llama_context * ctx, size_t num_adapters, struct llama_adapter_lora ** adapters, float * scales);
|
||||||
|
|
||||||
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
|
||||||
// the currently loaded vector.
|
// the currently loaded vector.
|
||||||
// n_embd should be the size of a single layer's control, and data should point
|
// n_embd should be the size of a single layer's control, and data should point
|
||||||
|
|
|
||||||
|
|
@ -1093,6 +1093,40 @@ bool llama_context::rm_adapter_lora(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_context::put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
|
||||||
|
LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters);
|
||||||
|
|
||||||
|
if (are_adapter_loras_same(num_adapters, adapters, scales)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_adapter_lora();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_adapters; i ++) {
|
||||||
|
if (scales[i] != 0.0f) {
|
||||||
|
set_adapter_lora(adapters[i], scales[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool llama_context::are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
|
||||||
|
LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters);
|
||||||
|
|
||||||
|
if (num_adapters != loras.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_adapters; i ++) {
|
||||||
|
auto it = loras.find(adapters[i]);
|
||||||
|
|
||||||
|
if (it == loras.end() || it->second != scales[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void llama_context::clear_adapter_lora() {
|
void llama_context::clear_adapter_lora() {
|
||||||
LLAMA_LOG_DEBUG("%s: call\n", __func__);
|
LLAMA_LOG_DEBUG("%s: call\n", __func__);
|
||||||
|
|
||||||
|
|
@ -3243,6 +3277,10 @@ void llama_clear_adapter_lora(llama_context * ctx) {
|
||||||
ctx->clear_adapter_lora();
|
ctx->clear_adapter_lora();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_put_adapter_loras(llama_context * ctx, size_t num_adapters, llama_adapter_lora ** adapters, float * scales) {
|
||||||
|
ctx->put_adapter_loras(num_adapters, adapters, scales);
|
||||||
|
}
|
||||||
|
|
||||||
int32_t llama_apply_adapter_cvec(
|
int32_t llama_apply_adapter_cvec(
|
||||||
llama_context * ctx,
|
llama_context * ctx,
|
||||||
const float * data,
|
const float * data,
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,10 @@ struct llama_context {
|
||||||
bool rm_adapter_lora(
|
bool rm_adapter_lora(
|
||||||
llama_adapter_lora * adapter);
|
llama_adapter_lora * adapter);
|
||||||
|
|
||||||
|
void put_adapter_loras(size_t num_adapters, llama_adapter_lora ** adapters, float * scales);
|
||||||
|
|
||||||
|
bool are_adapter_loras_same(size_t num_adapters, llama_adapter_lora ** adapters, float * scales);
|
||||||
|
|
||||||
void clear_adapter_lora();
|
void clear_adapter_lora();
|
||||||
|
|
||||||
bool apply_adapter_cvec(
|
bool apply_adapter_cvec(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue