diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 4cb7cdeb07..f94426ddd7 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -1916,9 +1916,10 @@ static block_q4_Kx8 make_block_q4_Kx8(block_q4_K * in, unsigned int blck_size_in int src_offset = (i / 8) * blck_size_interleave; int dst_offset = i * blck_size_interleave; + // buffer large enough for the max interleave block size (8 bytes) uint64_t elems; - memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t)); - memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t)); + memcpy(&elems, &in[src_id].qs[src_offset], blck_size_interleave); + memcpy(&out.qs[dst_offset], &elems, blck_size_interleave); } // The below logic is designed so as to unpack and rearrange scales and mins values in Q4_K