quants : Add memsets and other fixes for IQ quants (#19861)

* Add memsets and other fixes for IQ quants

* Make memset unconditional, change Laux back to L

* Move another memset
This commit is contained in:
Bartowski 2026-03-06 16:06:56 -05:00 committed by GitHub
parent 7463687161
commit 649f06481e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 23 additions and 4 deletions

View File

@ -3104,6 +3104,11 @@ static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML
}
float scale = make_qp_quants(32, kMaxQ+1, xval, (uint8_t*)L, weight);
float eff_max = scale*kMaxQ;
if (eff_max <= 0) {
scales[ib] = 0;
memset(L, 0, 32);
continue;
}
float best = 0;
for (int is = -6; is <= 6; ++is) {
float id = (2*kMaxQ-1+is*0.1f)/eff_max;
@ -3273,9 +3278,9 @@ static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
memset(L, 0, 16);
if (max < GROUP_MAX_EPS) {
scales[ib] = 0;
memset(L, 0, 16);
continue;
}
float best = 0;
@ -3714,9 +3719,9 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT
}
float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
memset(L, 0, 32);
if (max < GROUP_MAX_EPS_IQ3_XXS) {
scales[ib] = 0;
memset(L, 0, 32);
continue;
}
float best = 0;
@ -3922,6 +3927,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT
}
float max = xval[0];
for (int i = 1; i < block_size; ++i) max = MAX(max, xval[i]);
memset(L, 0, block_size);
if (!max) {
scales[ib] = 0;
continue;
@ -4245,6 +4251,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (max < GROUP_MAX_EPS_IQ1_S) {
scales[ib] = 0;
shifts[ib] = 1;
memset(L, 1, block_size);
continue;
}
@ -4285,7 +4292,12 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
}
}
}
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
if (besti1 < 0 || besti2 < 0 || best_shift == 0) {
scales[ib] = 0;
shifts[ib] = 1;
memset(L, 1, block_size);
continue;
}
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
@ -4429,6 +4441,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (max < GROUP_MAX_EPS_IQ1_M) {
scales[ib] = 0;
shifts[ib] = 0;
memset(L, 1, block_size);
continue;
}
@ -4527,7 +4540,12 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
}
}
}
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_k >= 0);
if (besti1 < 0 || besti2 < 0 || best_k < 0) {
scales[ib] = 0;
shifts[ib] = 0;
memset(L, 1, block_size);
continue;
}
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
@ -4874,6 +4892,7 @@ static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_R
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
memset(L, 0, 16);
if (max < GROUP_MAX_EPS_IQ2_S) {
scales[ib] = 0;
continue;