// Copyright 2023 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef HWY_DISABLED_TARGETS // Exclude HWY_SCALAR due to 2x bf16 -> f32. #define HWY_DISABLED_TARGETS HWY_SCALAR #endif #include #include #include #include #include #include #include "compression/compress.h" #include "hwy/aligned_allocator.h" #include "hwy/base.h" #include "hwy/contrib/thread_pool/thread_pool.h" // clang-format off #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "ops/gemma_matvec_test.cc" // NOLINT // clang-format on #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" // After highway.h #include "ops/matvec-inl.h" #include "ops/ops-inl.h" // MulByConst HWY_BEFORE_NAMESPACE(); namespace gcpp { namespace HWY_NAMESPACE { template hwy::AlignedFreeUniquePtr SimpleMatVecAdd( const CompressedArray& mat, const hwy::AlignedFreeUniquePtr& vec, const hwy::AlignedFreeUniquePtr& add) { hwy::AlignedFreeUniquePtr uncompressed_mat = hwy::AllocateAligned(kOuter * kInner); hwy::AlignedFreeUniquePtr out = hwy::AllocateAligned(kOuter); HWY_ASSERT(uncompressed_mat && out); Decompress(mat, 0, uncompressed_mat.get(), kOuter * kInner); MulByConst(mat.scale(), uncompressed_mat.get(), kOuter * kInner); for (size_t idx_row = 0; idx_row < kOuter; idx_row++) { out[idx_row] = add[idx_row]; for (size_t idx_col = 0; idx_col < kInner; idx_col++) { out[idx_row] += uncompressed_mat[kInner * idx_row + idx_col] * vec[idx_col]; } } return out; } template CompressedArray GenerateMat(size_t offset, hwy::ThreadPool& pool) { gcpp::CompressWorkingSet ws; CompressedArray mat; std::array content; const float scale = 1.0f / kInner; pool.Run(0, kOuter, [&](const size_t i, size_t /*thread*/) { for (size_t j = 0; j < kInner; j++) { content[i * kInner + j] = static_cast((i * kInner + j + offset) * scale); } }); Compress(content, ws, mat, pool); mat.set_scale(1.9f); // Arbitrary value, different from 1. return mat; } template hwy::AlignedFreeUniquePtr GenerateVec(size_t offset) { hwy::AlignedFreeUniquePtr vec = hwy::AllocateAligned(length); HWY_ASSERT(vec); for (size_t idx = 0; idx < length; idx++) { vec[idx] = static_cast(idx + offset); } return vec; } template void AssertClose(const hwy::AlignedFreeUniquePtr& a, const hwy::AlignedFreeUniquePtr& b) { for (size_t idx = 0; idx < length; idx++) { const float rel_abs_delta = std::abs(a[idx] - b[idx]) / std::max(std::abs(a[idx]), std::abs(b[idx])); EXPECT_LT(rel_abs_delta, 2e-6) << "a[" << idx << "]=" << a[idx] << ", b[" << idx << "]=" << b[idx]; } } void TestMatVecAdd() { hwy::ThreadPool pool(hwy::ThreadPool::MaxThreads()); constexpr size_t kOuter = 128 * 3; constexpr size_t kInner = 128 * 5; CompressedArray mat = GenerateMat(0, pool); hwy::AlignedFreeUniquePtr vec = GenerateVec(0); hwy::AlignedFreeUniquePtr add = GenerateVec(0); hwy::AlignedFreeUniquePtr even_odd = hwy::AllocateAligned(kInner * pool.NumWorkers()); hwy::AlignedFreeUniquePtr expected_out = SimpleMatVecAdd(mat, vec, add); hwy::AlignedFreeUniquePtr actual_out = hwy::AllocateAligned(kOuter); HWY_ASSERT(vec && add && even_odd && expected_out && actual_out); MatVecAdd(mat, 0, vec.get(), add.get(), even_odd.get(), actual_out.get(), pool); AssertClose(actual_out, expected_out); } void TestTwoMatVecAdd() { hwy::ThreadPool pool(hwy::ThreadPool::MaxThreads()); constexpr size_t kOuter = 128 * 3; constexpr size_t kInner = 128 * 5; CompressedArray mat0 = GenerateMat(0, pool); CompressedArray mat1 = GenerateMat(1, pool); hwy::AlignedFreeUniquePtr vec = GenerateVec(0); hwy::AlignedFreeUniquePtr add0 = GenerateVec(0); hwy::AlignedFreeUniquePtr add1 = GenerateVec(1); hwy::AlignedFreeUniquePtr expected_out0 = SimpleMatVecAdd(mat0, vec, add0); hwy::AlignedFreeUniquePtr expected_out1 = SimpleMatVecAdd(mat1, vec, add1); hwy::AlignedFreeUniquePtr actual_out0 = hwy::AllocateAligned(kOuter); hwy::AlignedFreeUniquePtr actual_out1 = hwy::AllocateAligned(kOuter); HWY_ASSERT(vec && add0 && add1 && expected_out0 && actual_out0 && expected_out1 && actual_out1); TwoMatVecAdd(mat0, mat1, 0, vec.get(), add0.get(), add1.get(), actual_out0.get(), actual_out1.get(), pool); AssertClose(actual_out0, expected_out0); AssertClose(actual_out1, expected_out1); } void TestTwoOfsMatVecAddLoop() { hwy::ThreadPool pool(hwy::ThreadPool::MaxThreads()); constexpr size_t kOuter = 128 * 3; constexpr size_t kInner = 128 * 5; CompressedArray mat = GenerateMat(0, pool); hwy::AlignedFreeUniquePtr vec = GenerateVec(0); hwy::AlignedFreeUniquePtr add0 = GenerateVec(0); hwy::AlignedFreeUniquePtr add1 = GenerateVec(1); hwy::AlignedFreeUniquePtr expected_out0 = SimpleMatVecAdd(mat, vec, add0); hwy::AlignedFreeUniquePtr expected_out1 = SimpleMatVecAdd(mat, vec, add1); hwy::AlignedFreeUniquePtr actual_out0 = hwy::AllocateAligned(kOuter); hwy::AlignedFreeUniquePtr actual_out1 = hwy::AllocateAligned(kOuter); HWY_ASSERT(vec && add0 && add1 && expected_out0 && actual_out0 && expected_out1 && actual_out1); TwoOfsMatVecAddLoop(mat, 0, 0, vec.get(), add0.get(), add1.get(), actual_out0.get(), actual_out1.get()); AssertClose(actual_out0, expected_out0); AssertClose(actual_out1, expected_out1); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace gcpp HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace gcpp { HWY_BEFORE_TEST(MatVecTest); HWY_EXPORT_AND_TEST_P(MatVecTest, TestMatVecAdd); HWY_EXPORT_AND_TEST_P(MatVecTest, TestTwoMatVecAdd); HWY_EXPORT_AND_TEST_P(MatVecTest, TestTwoOfsMatVecAddLoop); HWY_AFTER_TEST(); } // namespace gcpp #endif