mirror of https://github.com/google/gemma.cpp.git
use hwy/simd for RMSNorm(f, bf, f) calculation
This commit is contained in:
parent
bb9b023502
commit
5f016fb433
28
ops.h
28
ops.h
|
|
@ -362,12 +362,30 @@ static HWY_NOINLINE HWY_MAYBE_UNUSED void RMSNorm(
|
||||||
static HWY_NOINLINE HWY_MAYBE_UNUSED void RMSNorm(
|
static HWY_NOINLINE HWY_MAYBE_UNUSED void RMSNorm(
|
||||||
const float* HWY_RESTRICT x, const hwy::bfloat16_t* HWY_RESTRICT weight,
|
const float* HWY_RESTRICT x, const hwy::bfloat16_t* HWY_RESTRICT weight,
|
||||||
float* HWY_RESTRICT out, size_t size) {
|
float* HWY_RESTRICT out, size_t size) {
|
||||||
|
namespace hn = hwy::HWY_NAMESPACE;
|
||||||
|
|
||||||
constexpr float eps = 1e-6f;
|
constexpr float eps = 1e-6f;
|
||||||
float ss = SquaredL2(x, size);
|
constexpr size_t unroll_size = 2;
|
||||||
ss = 1.0f / sqrtf(ss / StaticCast<float>(size) + eps);
|
|
||||||
for (size_t j = 0; j < size; j++) {
|
const hn::ScalableTag<hwy::bfloat16_t> dbf;
|
||||||
// Note 1.0f centering here
|
const hn::Repartition<float, decltype(dbf)> df32;
|
||||||
out[j] = (1.0f + hwy::F32FromBF16(weight[j])) * (ss * x[j]);
|
const size_t N32 = hn::Lanes(df32);
|
||||||
|
|
||||||
|
const float ss = SquaredL2(x, size);
|
||||||
|
const auto vss =
|
||||||
|
hn::Set(df32, 1.0f / sqrtf(ss / StaticCast<float>(size) + eps));
|
||||||
|
|
||||||
|
HWY_DASSERT(size % (unroll_size * MaxLanes(df32)) == 0);
|
||||||
|
for (size_t i = 0; i < size; i += unroll_size * N32) {
|
||||||
|
const hn::Vec<decltype(dbf)> w16 = hn::LoadU(dbf, weight + i);
|
||||||
|
const auto w0 = hn::PromoteLowerTo(df32, w16);
|
||||||
|
const auto w1 = hn::PromoteUpperTo(df32, w16);
|
||||||
|
const auto m0 = hn::Mul(vss, hn::LoadU(df32, x + i));
|
||||||
|
const auto m1 = hn::Mul(vss, hn::LoadU(df32, x + i + N32));
|
||||||
|
|
||||||
|
// (1+weight) * m = m + weight*m = one FMA.
|
||||||
|
hn::StoreU(hn::MulAdd(m0, w0, m0), df32, out + i);
|
||||||
|
hn::StoreU(hn::MulAdd(m1, w1, m1), df32, out + i + N32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue