quants : Add memsets and other fixes for IQ quants (#19861)
* Add memsets and other fixes for IQ quants * Make memset unconditional, change Laux back to L * Move another memset
This commit is contained in:
parent
7463687161
commit
649f06481e
|
|
@ -3104,6 +3104,11 @@ static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML
|
|||
}
|
||||
float scale = make_qp_quants(32, kMaxQ+1, xval, (uint8_t*)L, weight);
|
||||
float eff_max = scale*kMaxQ;
|
||||
if (eff_max <= 0) {
|
||||
scales[ib] = 0;
|
||||
memset(L, 0, 32);
|
||||
continue;
|
||||
}
|
||||
float best = 0;
|
||||
for (int is = -6; is <= 6; ++is) {
|
||||
float id = (2*kMaxQ-1+is*0.1f)/eff_max;
|
||||
|
|
@ -3273,9 +3278,9 @@ static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_
|
|||
}
|
||||
float max = xval[0];
|
||||
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
|
||||
memset(L, 0, 16);
|
||||
if (max < GROUP_MAX_EPS) {
|
||||
scales[ib] = 0;
|
||||
memset(L, 0, 16);
|
||||
continue;
|
||||
}
|
||||
float best = 0;
|
||||
|
|
@ -3714,9 +3719,9 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT
|
|||
}
|
||||
float max = xval[0];
|
||||
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
|
||||
memset(L, 0, 32);
|
||||
if (max < GROUP_MAX_EPS_IQ3_XXS) {
|
||||
scales[ib] = 0;
|
||||
memset(L, 0, 32);
|
||||
continue;
|
||||
}
|
||||
float best = 0;
|
||||
|
|
@ -3922,6 +3927,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT
|
|||
}
|
||||
float max = xval[0];
|
||||
for (int i = 1; i < block_size; ++i) max = MAX(max, xval[i]);
|
||||
memset(L, 0, block_size);
|
||||
if (!max) {
|
||||
scales[ib] = 0;
|
||||
continue;
|
||||
|
|
@ -4245,6 +4251,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|||
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
|
||||
if (max < GROUP_MAX_EPS_IQ1_S) {
|
||||
scales[ib] = 0;
|
||||
shifts[ib] = 1;
|
||||
memset(L, 1, block_size);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -4285,7 +4292,12 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|||
}
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
|
||||
if (besti1 < 0 || besti2 < 0 || best_shift == 0) {
|
||||
scales[ib] = 0;
|
||||
shifts[ib] = 1;
|
||||
memset(L, 1, block_size);
|
||||
continue;
|
||||
}
|
||||
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
|
||||
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
|
||||
for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
|
||||
|
|
@ -4429,6 +4441,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|||
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
|
||||
if (max < GROUP_MAX_EPS_IQ1_M) {
|
||||
scales[ib] = 0;
|
||||
shifts[ib] = 0;
|
||||
memset(L, 1, block_size);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -4527,7 +4540,12 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|||
}
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_k >= 0);
|
||||
if (besti1 < 0 || besti2 < 0 || best_k < 0) {
|
||||
scales[ib] = 0;
|
||||
shifts[ib] = 0;
|
||||
memset(L, 1, block_size);
|
||||
continue;
|
||||
}
|
||||
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
|
||||
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
|
||||
for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
|
||||
|
|
@ -4874,6 +4892,7 @@ static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_R
|
|||
}
|
||||
float max = xval[0];
|
||||
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
|
||||
memset(L, 0, 16);
|
||||
if (max < GROUP_MAX_EPS_IQ2_S) {
|
||||
scales[ib] = 0;
|
||||
continue;
|
||||
|
|
|
|||
Loading…
Reference in New Issue