This patch addresses an Internal Compiler Error (Segmentation fault)
observed with gcc 15 by replacing the intrinsic + cast by doing
a cat on the data first and then calling the intrinsic. This bypasses the
buggy compiler path while maintaining identical instruction selection.
Performance Verification:
Assembly analysis on RHEL 9 (GCC 15.1.1) confirms that both the original
code and this fix generate the identical Power10 prefixed load instruction:
`plxv 40, 2(14)`
This ensures zero performance regression while unblocking builds on
newer toolchains.
Reproduced on:
- Alpine Linux + GCC 15.2.0-r2
- RHEL 9 + GCC 15.1.1 (gcc-toolset-15)
Signed-off-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>
This commit is contained in:
parent
1e38a7a6fa
commit
c6980ff29d
|
|
@ -2497,7 +2497,7 @@ class tinyBLAS_Q0_PPC {
|
|||
for (int r = 0; r < 8; r++) {
|
||||
const block_q4_0 * current_blk = rows_base[r] + blk;
|
||||
vector float v_scale = vec_extract_fp32_from_shorth(vec_splats(current_blk->d));
|
||||
vector signed char v_qs = reinterpret_cast<vector signed char>(vec_xl(0, current_blk->qs));
|
||||
vector signed char v_qs = vec_xl(0, (const vector signed char *)current_blk->qs);
|
||||
vector signed char c1, c2;
|
||||
unpack_q4_to_q8(v_qs, c1, c2);
|
||||
convert_and_scale_q8(c1, v_scale, hp_res[r][0], hp_res[r][1]);
|
||||
|
|
@ -2611,14 +2611,14 @@ class tinyBLAS_Q0_PPC {
|
|||
i = (cols >> 2);
|
||||
if (i > 0) {
|
||||
do {
|
||||
c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
|
||||
c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
|
||||
c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
|
||||
c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
|
||||
c5[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset5->qs));
|
||||
c6[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset6->qs));
|
||||
c7[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset7->qs));
|
||||
c8[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset8->qs));
|
||||
c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
|
||||
c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
|
||||
c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
|
||||
c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
|
||||
c5[1] = vec_xl(0, (const vector signed char *)aoffset5->qs);
|
||||
c6[1] = vec_xl(0, (const vector signed char *)aoffset6->qs);
|
||||
c7[1] = vec_xl(0, (const vector signed char *)aoffset7->qs);
|
||||
c8[1] = vec_xl(0, (const vector signed char *)aoffset8->qs);
|
||||
|
||||
process_q4_elements(c1, & comparray[0]);
|
||||
process_q4_elements(c2, & comparray[1]);
|
||||
|
|
@ -2657,10 +2657,10 @@ class tinyBLAS_Q0_PPC {
|
|||
i = (cols >> 2);
|
||||
if (i > 0) {
|
||||
do {
|
||||
c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
|
||||
c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
|
||||
c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
|
||||
c4[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset4->qs));
|
||||
c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
|
||||
c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
|
||||
c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
|
||||
c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs);
|
||||
|
||||
process_q4_elements(c1, & comparray[0]);
|
||||
process_q4_elements(c2, & comparray[1]);
|
||||
|
|
@ -2686,9 +2686,9 @@ class tinyBLAS_Q0_PPC {
|
|||
if (i > 0) {
|
||||
do {
|
||||
switch(rows) {
|
||||
case 3: c3[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset3->qs));
|
||||
case 2: c2[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset2->qs));
|
||||
case 1: c1[1] = reinterpret_cast<vector signed char>(vec_xl(0, aoffset1->qs));
|
||||
case 3: c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs);
|
||||
case 2: c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs);
|
||||
case 1: c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs);
|
||||
break;
|
||||
}
|
||||
process_q4_elements(c1, & comparray[0]);
|
||||
|
|
|
|||
Loading…
Reference in New Issue