Merge b1366757cf into 18ddaea2ae
This commit is contained in:
commit
424f5aa298
|
|
@ -1630,11 +1630,10 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
|
|||
GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
|
||||
constexpr int nrows_interleaved = 4;
|
||||
|
||||
block_q4_0x4 * dst = (block_q4_0x4 *)t->data;
|
||||
const block_q4_0 * src = (const block_q4_0 *)data;
|
||||
block_q4_0 dst_tmp[4];
|
||||
int nrow = ggml_nrows(t);
|
||||
int nblocks = t->ne[0] / QK4_0;
|
||||
block_q4_0x4 * dst_base = (block_q4_0x4 *)t->data;
|
||||
const block_q4_0 * src_base = (const block_q4_0 *)data;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nblocks = t->ne[0] / QK4_0;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
|
||||
|
||||
|
|
@ -1642,14 +1641,23 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_q4_0 * src = src_base + b * nblocks;
|
||||
block_q4_0x4 * dst = dst_base + bg * nblocks;
|
||||
block_q4_0 dst_tmp[4];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_q4_0x4(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_q4_0x4(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
@ -1661,11 +1669,10 @@ static int repack_q4_K_to_q4_K_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
GGML_ASSERT(interleave_block == 8 || interleave_block == 4);
|
||||
constexpr int nrows_interleaved = 8;
|
||||
|
||||
block_q4_Kx8 * dst = (block_q4_Kx8*)t->data;
|
||||
const block_q4_K * src = (const block_q4_K*) data;
|
||||
block_q4_K dst_tmp[8];
|
||||
int nrow = ggml_nrows(t);
|
||||
int nblocks = t->ne[0] / QK_K;
|
||||
block_q4_Kx8 * dst_base = (block_q4_Kx8*)t->data;
|
||||
const block_q4_K * src_base = (const block_q4_K*) data;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nblocks = t->ne[0] / QK_K;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_K));
|
||||
|
||||
|
|
@ -1673,14 +1680,23 @@ static int repack_q4_K_to_q4_K_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_q4_K * src = src_base + b * nblocks;
|
||||
block_q4_Kx8 * dst = dst_base + bg * nblocks;
|
||||
block_q4_K dst_tmp[8];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++ ) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_q4_Kx8(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_q4_Kx8(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
@ -1692,11 +1708,10 @@ static int repack_q2_K_to_q2_K_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
GGML_ASSERT(interleave_block == 8);
|
||||
constexpr int nrows_interleaved = 8;
|
||||
|
||||
block_q2_Kx8 * dst = (block_q2_Kx8*)t->data;
|
||||
const block_q2_K * src = (const block_q2_K*) data;
|
||||
block_q2_K dst_tmp[8];
|
||||
int nrow = ggml_nrows(t);
|
||||
int nblocks = t->ne[0] / QK_K;
|
||||
block_q2_Kx8 * dst_base = (block_q2_Kx8*)t->data;
|
||||
const block_q2_K * src_base = (const block_q2_K*) data;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nblocks = t->ne[0] / QK_K;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q2_K));
|
||||
|
||||
|
|
@ -1704,14 +1719,23 @@ static int repack_q2_K_to_q2_K_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_q2_K * src = src_base + b * nblocks;
|
||||
block_q2_Kx8 * dst = dst_base + bg * nblocks;
|
||||
block_q2_K dst_tmp[8];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++ ) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_q2_Kx8(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_q2_Kx8(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
@ -1723,11 +1747,10 @@ static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
GGML_ASSERT(interleave_block == 8);
|
||||
constexpr int nrows_interleaved = 8;
|
||||
|
||||
block_q4_0x8 * dst = (block_q4_0x8*)t->data;
|
||||
const block_q4_0 * src = (const block_q4_0*) data;
|
||||
block_q4_0 dst_tmp[8];
|
||||
int nrow = ggml_nrows(t);
|
||||
int nblocks = t->ne[0] / QK4_0;
|
||||
block_q4_0x8 * dst_base = (block_q4_0x8*)t->data;
|
||||
const block_q4_0 * src_base = (const block_q4_0*) data;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nblocks = t->ne[0] / QK4_0;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
|
||||
|
||||
|
|
@ -1735,14 +1758,23 @@ static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor * t, int interleave_block
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_q4_0 * src = src_base + b * nblocks;
|
||||
block_q4_0x8 * dst = dst_base + bg * nblocks;
|
||||
block_q4_0 dst_tmp[8];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++ ) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_q4_0x8(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_q4_0x8(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
@ -1820,14 +1852,12 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
|
|||
GGML_ASSERT(t->type == GGML_TYPE_IQ4_NL);
|
||||
GGML_ASSERT(interleave_block == 4);
|
||||
|
||||
const block_iq4_nl * src = (const block_iq4_nl *)data;
|
||||
block_iq4_nlx4 * dst = ( block_iq4_nlx4 *)t->data;
|
||||
const block_iq4_nl * src_base = (const block_iq4_nl *)data;
|
||||
block_iq4_nlx4 * dst_base = (block_iq4_nlx4 *)t->data;
|
||||
|
||||
block_iq4_nl dst_tmp[4];
|
||||
|
||||
int nrow = ggml_nrows(t);
|
||||
int nrows_interleaved = 4;
|
||||
int nblocks = t->ne[0] / QK4_NL;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nrows_interleaved = 4;
|
||||
const int nblocks = t->ne[0] / QK4_NL;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_iq4_nl));
|
||||
|
||||
|
|
@ -1835,14 +1865,23 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_iq4_nl * src = src_base + b * nblocks;
|
||||
block_iq4_nlx4 * dst = dst_base + bg * nblocks;
|
||||
block_iq4_nl dst_tmp[4];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_iq4_nlx4(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_iq4_nlx4(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
@ -1877,14 +1916,12 @@ static int repack_iq4_nl_to_iq4_nl_8_bl(struct ggml_tensor * t, int interleave_b
|
|||
GGML_ASSERT(t->type == GGML_TYPE_IQ4_NL);
|
||||
GGML_ASSERT(interleave_block == 8);
|
||||
|
||||
const block_iq4_nl * src = (const block_iq4_nl *)data;
|
||||
block_iq4_nlx8 * dst = ( block_iq4_nlx8 *)t->data;
|
||||
const block_iq4_nl * src_base = (const block_iq4_nl *)data;
|
||||
block_iq4_nlx8 * dst_base = (block_iq4_nlx8 *)t->data;
|
||||
|
||||
block_iq4_nl dst_tmp[8];
|
||||
|
||||
int nrow = ggml_nrows(t);
|
||||
int nrows_interleaved = 8;
|
||||
int nblocks = t->ne[0] / QK4_NL;
|
||||
const int nrow = ggml_nrows(t);
|
||||
const int nrows_interleaved = 8;
|
||||
const int nblocks = t->ne[0] / QK4_NL;
|
||||
|
||||
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_iq4_nl));
|
||||
|
||||
|
|
@ -1892,14 +1929,23 @@ static int repack_iq4_nl_to_iq4_nl_8_bl(struct ggml_tensor * t, int interleave_b
|
|||
return -1;
|
||||
}
|
||||
|
||||
for (int b = 0; b < nrow; b += nrows_interleaved) {
|
||||
const int n_row_groups = nrow / nrows_interleaved;
|
||||
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int bg = 0; bg < n_row_groups; bg++) {
|
||||
const int b = bg * nrows_interleaved;
|
||||
const block_iq4_nl * src = src_base + b * nblocks;
|
||||
block_iq4_nlx8 * dst = dst_base + bg * nblocks;
|
||||
block_iq4_nl dst_tmp[8];
|
||||
|
||||
for (int64_t x = 0; x < nblocks; x++) {
|
||||
for (int i = 0; i < nrows_interleaved; i++) {
|
||||
dst_tmp[i] = src[x + i * nblocks];
|
||||
}
|
||||
*dst++ = make_block_iq4_nlx8(dst_tmp, interleave_block);
|
||||
dst[x] = make_block_iq4_nlx8(dst_tmp, interleave_block);
|
||||
}
|
||||
src += nrows_interleaved * nblocks;
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue