ggml-cpu: Fix gcc 15 ICE on ppc64le (#20083) (#20130)

This patch addresses an Internal Compiler Error (Segmentation fault)
observed with gcc 15 by replacing the intrinsic + cast by doing
a cat on the data first and then calling the intrinsic. This bypasses the
buggy compiler path while maintaining identical instruction selection.

Performance Verification:
Assembly analysis on RHEL 9 (GCC 15.1.1) confirms that both the original
code and this fix generate the identical Power10 prefixed load instruction:
    `plxv 40, 2(14)`

This ensures zero performance regression while unblocking builds on
newer toolchains.

Reproduced on:
- Alpine Linux + GCC 15.2.0-r2
- RHEL 9  + GCC 15.1.1 (gcc-toolset-15)

Signed-off-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>
This commit is contained in:
shalinib-ibm 2026-03-06 20:52:39 +05:30 committed by GitHub
parent 1e38a7a6fa
commit c6980ff29d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 16 additions and 16 deletions

View File

@ -2497,7 +2497,7 @@ class tinyBLAS_Q0_PPC {
for (int r = 0; r < 8; r++) {
const block_q4_0 * current_blk = rows_base[r] + blk;
vector float v_scale = vec_extract_fp32_from_shorth(vec_splats(current_blk->d));
vector signed char v_qs = reinterpret_cast<vector signed char>(vec_xl(0, current_blk->qs));
vector signed char v_qs = vec_xl(0, (const vector signed char *)current_blk->qs);
vector signed char c1, c2;
unpack_q4_to_q8(v_qs, c1, c2);
convert_and_scale_q8(c1, v_scale, hp_res[r][0], hp_res[r][1]);
@ -2611,14 +2611,14 @@ class tinyBLAS_Q0_PPC {
i = (cols >> 2);
if (i > 0) {
do {
c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
c5[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset5->qs));
c6[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset6->qs));
c7[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset7->qs));
c8[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset8->qs));
c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
c5[1] = vec_xl(0, (const vector signed char *)aoffset5->qs);
c6[1] = vec_xl(0, (const vector signed char *)aoffset6->qs);
c7[1] = vec_xl(0, (const vector signed char *)aoffset7->qs);
c8[1] = vec_xl(0, (const vector signed char *)aoffset8->qs);
process_q4_elements(c1, & comparray[0]);
process_q4_elements(c2, & comparray[1]);
@ -2657,10 +2657,10 @@ class tinyBLAS_Q0_PPC {
i = (cols >> 2);
if (i > 0) {
do {
c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
process_q4_elements(c1, & comparray[0]);
process_q4_elements(c2, & comparray[1]);
@ -2686,9 +2686,9 @@ class tinyBLAS_Q0_PPC {
if (i > 0) {
do {
switch(rows) {
case 3: c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
case 2: c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
case 1: c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
case 3: c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
case 2: c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
case 1: c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
break;
}
process_q4_elements(c1, & comparray[0]);