This commit is contained in:
Hongrui Chen 2025-12-25 13:57:17 +08:00
parent 2058f28b3e
commit 5e3db77f6e
1 changed files with 3 additions and 3 deletions

View File

@ -915,7 +915,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
uint32_t nv0 = n / VLEN_FP16; // num full fp16 hvx vectors
uint32_t nv1 = n % VLEN_FP16; // leftover elements
const HVX_Vector zero = Q6_Vh_vsplat_R(0x3C00); // 1.0 in fp16
const HVX_Vector one = Q6_Vh_vsplat_R(0x3C00); // 1.0 in fp16
HVX_Vector rsum = Q6_V_vsplat_R(0);
uint32_t i = 0;
@ -923,7 +923,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
for (i = 0; i < nv0; i++) {
HVX_VectorPair yp = vy[i];
HVX_Vector x = vx[i];
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), zero); // mul by 1.0
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), one); // mul by 1.0
HVX_Vector hi = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_hi_W(xp)), Q6_V_hi_W(yp));
HVX_Vector lo = Q6_Vqf32_vmpy_VsfVsf(Q6_Vsf_equals_Vqf32(Q6_V_lo_W(xp)), Q6_V_lo_W(yp));
@ -935,7 +935,7 @@ static void vec_dot_f16_f32(const int n, float * restrict s, const void * restri
if (nv1) {
HVX_VectorPair yp = vy[i];
HVX_Vector x = vx[i];
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), zero); // mul by 1.0
HVX_VectorPair xp = Q6_Wqf32_vmpy_VhfVhf(Q6_Vh_vshuff_Vh(x), one); // mul by 1.0
HVX_Vector l_x;
HVX_Vector l_y;