Avoid division by zero if truncation occurs
This commit is contained in:
parent
ee05d6bc0b
commit
f22b3097eb
|
|
@ -790,28 +790,24 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale for the rows we didn't sample in this expert: multiply by stride-ish factor
|
// Scale for the rows we didn't sample in this expert: multiply by stride-ish factor
|
||||||
const float scale_rows = rows_per_expert / std::max<int64_t>(1, rs);
|
const float scale_rows = (float)rows_per_expert / std::max(1.0f, (float)rs);
|
||||||
total_err *= scale_rows;
|
total_err *= scale_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
return total_err;
|
return total_err;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Produce per-tensor candidate lists
|
|
||||||
std::vector<tensor_info> all;
|
std::vector<tensor_info> all;
|
||||||
all.reserve(tensors.size());
|
all.reserve(tensors.size());
|
||||||
|
|
||||||
for (const auto * tw : tensors) {
|
for (const auto * tw : tensors) {
|
||||||
// Temporary workers for dequantization
|
|
||||||
std::vector<std::thread> workers;
|
std::vector<std::thread> workers;
|
||||||
workers.reserve(std::max(1, nthread));
|
workers.reserve(std::max(1, nthread));
|
||||||
|
|
||||||
ggml_tensor * t = tw->tensor;
|
ggml_tensor * t = tw->tensor;
|
||||||
const std::string name = ggml_get_name(t);
|
const std::string name = ggml_get_name(t);
|
||||||
|
|
||||||
if (!can_quantize(t)) {
|
if (!can_quantize(t)) { continue; }
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12d elements)\n", __func__, name.c_str(), (int)ggml_nelements(t));
|
LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12d elements)\n", __func__, name.c_str(), (int)ggml_nelements(t));
|
||||||
if (!ml.use_mmap) {
|
if (!ml.use_mmap) {
|
||||||
|
|
@ -820,7 +816,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
}
|
}
|
||||||
ml.load_data_for(t);
|
ml.load_data_for(t);
|
||||||
|
|
||||||
// Prepare f32 weights for error estimates
|
|
||||||
const int64_t nelem = ggml_nelements(t);
|
const int64_t nelem = ggml_nelements(t);
|
||||||
std::vector<no_init<float>> f32_conv_buf;
|
std::vector<no_init<float>> f32_conv_buf;
|
||||||
float * f32_data = nullptr;
|
float * f32_data = nullptr;
|
||||||
|
|
@ -955,13 +950,13 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
if (ti.choice >= (int)ti.candidate.size() - 1) { continue; }
|
if (ti.choice >= (int)ti.candidate.size() - 1) { continue; }
|
||||||
|
|
||||||
int j = next_distinct_idx(ti);
|
int j = next_distinct_idx(ti);
|
||||||
if (j < 0) { continue; } // no larger-size candidate remains
|
if (j < 0) { continue; }
|
||||||
|
|
||||||
const auto &cur = ti.candidate[ti.choice];
|
const auto &cur = ti.candidate[ti.choice];
|
||||||
const auto &nxt = ti.candidate[j];
|
const auto &nxt = ti.candidate[j];
|
||||||
|
|
||||||
size_t delta_bytes = nxt.bytes - cur.bytes;
|
size_t delta_bytes = nxt.bytes - cur.bytes;
|
||||||
if (delta_bytes == 0) { continue; } // should not happen after dedup, but be safe
|
if (delta_bytes == 0) { continue; }
|
||||||
|
|
||||||
double err = (double)cur.error - (double)nxt.error;
|
double err = (double)cur.error - (double)nxt.error;
|
||||||
err = std::max(err, 0.0); // do not penalize due to sampling noise
|
err = std::max(err, 0.0); // do not penalize due to sampling noise
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue