Validate if imatrix contains activations
This commit is contained in:
parent
9adae08789
commit
017945a3b2
|
|
@ -35,6 +35,7 @@ static bool is_iq(const enum ggml_type t) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum ggml_type fallback_type(const enum ggml_type new_type) {
|
static enum ggml_type fallback_type(const enum ggml_type new_type) {
|
||||||
switch (new_type) {
|
switch (new_type) {
|
||||||
case GGML_TYPE_TQ1_0:
|
case GGML_TYPE_TQ1_0:
|
||||||
|
|
@ -61,6 +62,7 @@ static enum ggml_type fallback_type(const enum ggml_type new_type) {
|
||||||
return new_type;
|
return new_type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void zeros(std::ofstream & file, size_t n) {
|
static void zeros(std::ofstream & file, size_t n) {
|
||||||
char zero = 0;
|
char zero = 0;
|
||||||
for (size_t i = 0; i < n; ++i) {
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
|
@ -135,6 +137,7 @@ struct quantize_state_impl {
|
||||||
int n_fallback = 0;
|
int n_fallback = 0;
|
||||||
|
|
||||||
bool has_imatrix = false;
|
bool has_imatrix = false;
|
||||||
|
bool has_activations = false;
|
||||||
|
|
||||||
// used to figure out if a model shares tok_embd with the output weight
|
// used to figure out if a model shares tok_embd with the output weight
|
||||||
bool has_output = false;
|
bool has_output = false;
|
||||||
|
|
@ -652,14 +655,15 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
if (params->only_copy) {
|
if (params->only_copy) {
|
||||||
ftype = ml.ftype;
|
ftype = ml.ftype;
|
||||||
}
|
}
|
||||||
const std::unordered_map<std::string, std::vector<float>> * imatrix_data = nullptr;
|
const std::unordered_map<std::string, std::vector<float>> * values_data = nullptr;
|
||||||
|
const std::unordered_map<std::string, std::vector<float>> * activations_data = nullptr;
|
||||||
if (params->imatrix) {
|
if (params->imatrix) {
|
||||||
imatrix_data = static_cast<const std::unordered_map<std::string, std::vector<float>>*>(params->imatrix);
|
values_data = static_cast<const std::unordered_map<std::string, std::vector<float>>*>(params->imatrix);
|
||||||
if (imatrix_data) {
|
if (values_data) {
|
||||||
LLAMA_LOG_INFO("================================ Have weights data with %d entries\n",int(imatrix_data->size()));
|
LLAMA_LOG_INFO("================================ Have weights data with %d entries\n",int(values_data->size()));
|
||||||
qs.has_imatrix = true;
|
qs.has_imatrix = true;
|
||||||
// check imatrix for nans or infs
|
// check imatrix for nans or infs
|
||||||
for (const auto & kv : *imatrix_data) {
|
for (const auto & kv : *values_data) {
|
||||||
for (float f : kv.second) {
|
for (float f : kv.second) {
|
||||||
if (!std::isfinite(f)) {
|
if (!std::isfinite(f)) {
|
||||||
throw std::runtime_error(format("imatrix contains non-finite value %f\n", f));
|
throw std::runtime_error(format("imatrix contains non-finite value %f\n", f));
|
||||||
|
|
@ -668,8 +672,22 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (params->activations) {
|
||||||
|
activations_data = static_cast<const std::unordered_map<std::string, std::vector<float>>*>(params->activations);
|
||||||
|
if (activations_data) {
|
||||||
|
LLAMA_LOG_INFO("================================ Have activations data with %d entries\n",int(activations_data->size()));
|
||||||
|
qs.has_activations = true;
|
||||||
|
// check activations for nans or infs
|
||||||
|
for (const auto & kv : *activations_data) {
|
||||||
|
for (float f : kv.second) {
|
||||||
|
if (!std::isfinite(f)) {
|
||||||
|
throw std::runtime_error(format("activations contain non-finite value %f\n", f));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const size_t align = GGUF_DEFAULT_ALIGNMENT;
|
|
||||||
gguf_context_ptr ctx_out { gguf_init_empty() };
|
gguf_context_ptr ctx_out { gguf_init_empty() };
|
||||||
|
|
||||||
std::vector<int> prune_list = {};
|
std::vector<int> prune_list = {};
|
||||||
|
|
@ -846,6 +864,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
const auto tn = LLM_TN(model.arch);
|
const auto tn = LLM_TN(model.arch);
|
||||||
new_ofstream(0);
|
new_ofstream(0);
|
||||||
for (const auto * it : tensors) {
|
for (const auto * it : tensors) {
|
||||||
|
const size_t align = GGUF_DEFAULT_ALIGNMENT;
|
||||||
const auto & weight = *it;
|
const auto & weight = *it;
|
||||||
ggml_tensor * tensor = weight.tensor;
|
ggml_tensor * tensor = weight.tensor;
|
||||||
if (weight.idx != cur_split && params->keep_split) {
|
if (weight.idx != cur_split && params->keep_split) {
|
||||||
|
|
@ -864,10 +883,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
ml.load_data_for(tensor);
|
ml.load_data_for(tensor);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("[%4d/%4d] %36s - [%s], type = %6s, ",
|
LLAMA_LOG_INFO("[%4d/%4d] %36s - [%s], type = %6s, ",
|
||||||
++idx, ml.n_tensors,
|
++idx, ml.n_tensors, ggml_get_name(tensor), llama_format_tensor_shape(tensor).c_str(), ggml_type_name(tensor->type));
|
||||||
ggml_get_name(tensor),
|
|
||||||
llama_format_tensor_shape(tensor).c_str(),
|
|
||||||
ggml_type_name(tensor->type));
|
|
||||||
|
|
||||||
// This used to be a regex, but <regex> has an extreme cost to compile times.
|
// This used to be a regex, but <regex> has an extreme cost to compile times.
|
||||||
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?
|
||||||
|
|
@ -967,9 +983,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||||
const int64_t nelements = ggml_nelements(tensor);
|
const int64_t nelements = ggml_nelements(tensor);
|
||||||
|
|
||||||
const float * imatrix = nullptr;
|
const float * imatrix = nullptr;
|
||||||
if (imatrix_data) {
|
if (values_data) {
|
||||||
auto it = imatrix_data->find(remap_imatrix(tensor->name, mapped));
|
auto it = values_data->find(remap_imatrix(tensor->name, mapped));
|
||||||
if (it == imatrix_data->end()) {
|
if (it == values_data->end()) {
|
||||||
LLAMA_LOG_INFO("\n====== %s: did not find weights for %s\n", __func__, tensor->name);
|
LLAMA_LOG_INFO("\n====== %s: did not find weights for %s\n", __func__, tensor->name);
|
||||||
} else {
|
} else {
|
||||||
if (it->second.size() == (size_t)tensor->ne[0]*tensor->ne[2]) {
|
if (it->second.size() == (size_t)tensor->ne[0]*tensor->ne[2]) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue