From 76a81ac2d6fdf7cc6a5ca5809573fee20a8f1961 Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Fri, 28 Mar 2025 11:24:53 -0700 Subject: [PATCH] Fix unaligned buffer causing crash on GCC. Thanks @ufownl, fixes #508 PiperOrigin-RevId: 741590339 --- ops/matmul-inl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ops/matmul-inl.h b/ops/matmul-inl.h index 782c3e7..2ff959d 100644 --- a/ops/matmul-inl.h +++ b/ops/matmul-inl.h @@ -111,7 +111,7 @@ class MMStoreHorizontalSumsIntoC { VF C30, VF C31, VF C32, VF C33, // const size_t row_c, const size_t col_c, const MMArgs& args, const RowPtr& C) const { - float buf[16 * hn::MaxLanes(df)]; + HWY_ALIGN float buf[16 * hn::MaxLanes(df)]; const size_t N = hn::Lanes(df); // Horizontal reductions (`ReduceSum`) are rather expensive, entailing // log(N) operations for vectors of length N. Because `kNR` == 4, we @@ -226,7 +226,7 @@ class MMAddHorizontalSumsIntoPartial { static_assert(HWY_HAVE_FLOAT64, "Disable Armv7 NEON: we require fp64"); const hn::Repartition dd; - double buf[16 * hn::MaxLanes(dd)]; + HWY_ALIGN double buf[16 * hn::MaxLanes(dd)]; using VD = hn::Vec; const size_t ND = hn::Lanes(dd); VD C00 = SumOfPromotedPairs(dd, F00);