From a939b5fc9fa38f593b071b38e3c48399ee3bb07d Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Wed, 17 Apr 2024 01:43:32 -0700 Subject: [PATCH] Update distortion.h to weighted average, add distortion_test. More thorough checks in sfp_test and nuq_test. nuq_test: use deterministic input generator. PiperOrigin-RevId: 625602019 --- compression/BUILD | 24 +++- compression/distortion.h | 217 ++++++++++++++++++++++++++------- compression/distortion_test.cc | 99 +++++++++++++++ compression/nuq_test.cc | 143 +++++++++++++--------- compression/sfp_test.cc | 76 +++++++++--- compression/stats.h | 2 +- compression/test_util.h | 24 +++- 7 files changed, 456 insertions(+), 129 deletions(-) create mode 100644 compression/distortion_test.cc diff --git a/compression/BUILD b/compression/BUILD index 5b525a5..b0c8431 100644 --- a/compression/BUILD +++ b/compression/BUILD @@ -12,12 +12,8 @@ package( cc_library( name = "blob_store", - srcs = [ - "blob_store.cc", - ], - hdrs = [ - "blob_store.h", - ], + srcs = ["blob_store.cc"], + hdrs = ["blob_store.h"], deps = [ "@hwy//:hwy", "@hwy//:thread_pool", @@ -39,7 +35,23 @@ cc_library( name = "distortion", hdrs = ["distortion.h"], deps = [ + ":stats", "@hwy//:hwy", + "@hwy//hwy/contrib/sort:vqsort", + ], +) + +cc_test( + name = "distortion_test", + size = "small", + srcs = ["distortion_test.cc"], + deps = [ + ":distortion", + ":test_util", + "@googletest//:gtest_main", + "@hwy//:hwy", + "@hwy//:hwy_test_util", + "@hwy//:nanobenchmark", # Unpredictable1 ], ) diff --git a/compression/distortion.h b/compression/distortion.h index f272b52..67736dd 100644 --- a/compression/distortion.h +++ b/compression/distortion.h @@ -15,85 +15,214 @@ #ifndef THIRD_PARTY_GEMMA_CPP_COMPRESSION_DISTORTION_H_ #define THIRD_PARTY_GEMMA_CPP_COMPRESSION_DISTORTION_H_ + #include // pow #include +#include -#include "hwy/base.h" // ScalarAbs +#include + +#include "compression/stats.h" +#include "hwy/aligned_allocator.h" // HWY_ALIGNMENT +#include "hwy/base.h" // ScalarAbs +#include "hwy/contrib/sort/vqsort.h" namespace gcpp { +// Returns `sum` and `err` such that `sum + err` is exactly equal to `a + b`, +// despite floating-point rounding. `sum` is already the best estimate, so do +// not actually add `err` to it. Knuth98/Moller65. Unlike Fast2Sum [Dekker71], +// this does not require any relative ordering of the exponents of a and b. +template +static inline T TwoSum(T a, T b, T& err) { + const T sum = a + b; + const T a2 = sum - b; + const T b2 = sum - a2; + const T err_a = a - a2; + const T err_b = b - b2; + err = err_a + err_b; + return sum; +} + +// Accumulates numbers with about twice the precision of T using 7 * n FLOPS. +// Rump/Ogita/Oishi08, Algorithm 6.11 in Handbook of Floating-Point Arithmetic. +template +class CascadedSummation { + public: + void Notify(T t) { + T err; + sum_ = TwoSum(sum_, t, err); + sum_err_ += err; + } + + void Assimilate(const CascadedSummation& other) { + Notify(other.sum_); + sum_err_ += other.sum_err_; + } + + // Allows users to observe how much difference the extra precision made. + T Err() const { return sum_err_; } + + // Returns the sum of all `t` passed to `Notify`. + T Total() const { return sum_ + sum_err_; } + + private: + T sum_ = T{0}; + T sum_err_ = T{0}; +}; + +// Summarizes the error of a distortion (e.g. quantization) applied to a series +// of numbers. +// Users should check all four resulting metrics (NumExact, NumRoundedToZero, +// GeomeanValueDivL1, WeightedAverageL1) because each covers different aspects. class DistortionStats { public: void Notify(float original, float distorted) { (void)padding_; // prevent unused member warning - const double l1 = hwy::ScalarAbs(original - distorted); + const bool rounded_to_zero = (original != 0.0f) && (distorted == 0.0f); + // We expect original == 0 is not distorted (can be exactly represented). + HWY_ASSERT(original != 0.0f || distorted == 0.0f); - if (l1 > max_l1_) { - max_l1_ = l1; - max_idx_ = n_; + s_original_.Notify(original); + const float l1f = hwy::ScalarAbs(original - distorted); + const double l1 = static_cast(l1f); + s_l1_.Notify(l1f); + b_l1_.Notify(HWY_MIN(99, static_cast(l1f * 1E4))); + if (l1f != 0.0f) { + l1_.push_back(l1f); + } + sum_l1_.Notify(l1f); + if (rounded_to_zero) sum_l1_rounded_.Notify(l1f); + + // Event counts + { + n_ += 1; + // Rounding (small) negative numbers to 0 does not influence dot products + // as much as an actual sign flip, so do not count them. + n_sign_flip_ += + ((original < 0.0f) != (distorted < 0.0f)) && !rounded_to_zero; + n_exact_ += (l1f == 0.0f); + n_rounded_to_zero += rounded_to_zero; } - const double pow3 = l1 * l1 * l1; - sum_pow3_ += pow3; - sum_pow6_ += pow3 * pow3; - n_ += 1; - - // Avoid division by zero, which happens when there is no error. NumExact() - // reports the number of times this happens. - if (l1 != 0.0) { - const double rel = 1.0 + hwy::ScalarAbs(original) / l1; - // Logarithm is required to prevent overflow. A hierarchical geomean + // Signal to noise ratio (Shannon's channel capacity, NOT the L2-based and + // logarithmic PSNR) to estimate the ratios of original to the L1 norm. + if (l1f != 0.0) { // prevent division by zero + const double snr = + 1.0 + static_cast(hwy::ScalarAbs(original)) / l1; + // For numerical purposes (prevents overflow). A hierarchical geomean // could also work, but that is more complex and not necessarily better. - sum_log_rel_ += log(rel); - num_rel_ += 1; + // We will return exp() of the arithmetic mean. + sum_log_snr_ += log(snr); + num_snr_ += 1; } } void Assimilate(const DistortionStats& other) { - if (other.max_l1_ > max_l1_) { - max_l1_ = other.max_l1_; - max_idx_ = other.max_idx_; - } + s_original_.Assimilate(other.s_original_); + s_l1_.Assimilate(other.s_l1_); + b_l1_.Assimilate(other.b_l1_); + sum_l1_.Assimilate(other.sum_l1_); + sum_l1_rounded_.Assimilate(other.sum_l1_rounded_); + l1_.insert(l1_.end(), other.l1_.begin(), other.l1_.end()); - sum_pow3_ += other.sum_pow3_; - sum_pow6_ += other.sum_pow6_; n_ += other.n_; + n_sign_flip_ += other.n_sign_flip_; + n_exact_ += other.n_exact_; + n_rounded_to_zero += other.n_rounded_to_zero; - sum_log_rel_ += other.sum_log_rel_; - num_rel_ += other.num_rel_; + sum_log_snr_ += other.sum_log_snr_; + num_snr_ += other.num_snr_; } - size_t NumExact() const { return n_ - num_rel_; } + size_t NumExact() const { return n_exact_; } + size_t NumSignFlip() const { return n_sign_flip_; } + size_t NumRoundedToZero() const { return n_rounded_to_zero; } + // Total absolute error. + double SumL1() const { return sum_l1_.Total(); } + // Total absolute error for numbers that were rounded to zero. + double SumL1Rounded() const { return sum_l1_rounded_.Total(); } + // Returns geomean of 1 + S/N (Shannon channel capacity). This is computed via + // the ratio of input magnitude to nonzero L1 norms. Higher is better. double GeomeanValueDivL1() const { - if (num_rel_ == 0) return 0.0; - return exp(sum_log_rel_ / static_cast(num_rel_)); + if (num_snr_ == 0) return 0.0; + return exp(sum_log_snr_ / static_cast(num_snr_)); } - double PNorm() const { - // p-norms are a compromise between max-norm (penalizes the largest error - // without dilution, but does not notice any other errors) and L1 (all - // errors contribute, but large errors are diluted by smaller ones). - const double norm3 = pow(sum_pow3_ / static_cast(n_), 1.0 / 3); - const double norm6 = pow(sum_pow6_ / static_cast(n_), 1.0 / 6); - return 0.5 * (norm3 + norm6); + // Returns weighted average of nonzero L1 norms. Those further from the median + // L1 norm are much more heavily weighted, such that this behaves more like + // the L-infinity norm, but still includes all differences, not just the max. + // Lower is better, magnitude depends on the input magnitude. + double WeightedAverageL1() const { + if (l1_.empty()) return 0.0f; // all exact + + std::vector weights(l1_); // copy so we can modify + const float median = [&weights]() { + const size_t mid = weights.size() / 2; + // We just want the median; partial sort is faster if available (v1.2). +#if HWY_MAJOR > 1 || HWY_MINOR >= 2 + hwy::VQSelect(weights.data(), weights.size(), mid, hwy::SortAscending()); +#else + hwy::VQSort(weights.data(), weights.size(), hwy::SortAscending()); +#endif + return weights[mid]; + }(); + weights = l1_; // restore original order + + // Replace with distance from median (might have too few samples for mode). + float max_abs = -1.0f; + for (float& d : weights) { + d = hwy::ScalarAbs(d - median); + max_abs = HWY_MAX(max_abs, d); + } + HWY_ASSERT(max_abs >= 0.0f); + // All equal - return the distance value to prevent division by zero. + if (max_abs == 0.0f) return median; + + // Normalize to max difference and exponentiate. + const double inv_max = 1.0 / static_cast(max_abs); + double sum_weights = 0.0; + for (float& w : weights) { + const double normalized = static_cast(w) * inv_max; + const double amplified = exp(4.0 * normalized * normalized); + sum_weights += amplified; + w = static_cast(amplified); + } + // At least 1.0 per weight, plus more for at least one weight because we + // verified via max_abs that not all are equal. + HWY_ASSERT(sum_weights > static_cast(weights.size())); + + // Return weighted average. + double weighted_sum = 0.0; + for (size_t i = 0; i < weights.size(); ++i) { + weighted_sum += l1_[i] * weights[i]; + } + return weighted_sum / sum_weights; } - size_t MaxIndex() const { return max_idx_; } - double MaxL1() const { return max_l1_; } + Stats& L1() { return s_l1_; } + Stats& Original() { return s_original_; } private: + Stats s_original_; + Stats s_l1_; + Bins<100> b_l1_; + CascadedSummation sum_l1_; // all + CascadedSummation sum_l1_rounded_; // only if rounded_to_zero + std::vector l1_; + + // Event counts size_t n_ = 0; - size_t max_idx_ = 0; // index that had l1 = max_l1_. - double max_l1_ = -1.0; + size_t n_sign_flip_ = 0; + size_t n_exact_ = 0; + size_t n_rounded_to_zero = 0; - double sum_pow3_ = 0.0; - double sum_pow6_ = 0.0; + double sum_log_snr_ = 0.0; + size_t num_snr_ = 0; - double sum_log_rel_ = 0.0; - size_t num_rel_ = 0; - double padding_; // prevents false sharing + uint8_t padding_[HWY_ALIGNMENT]; // prevents false sharing }; } // namespace gcpp diff --git a/compression/distortion_test.cc b/compression/distortion_test.cc new file mode 100644 index 0000000..0889c41 --- /dev/null +++ b/compression/distortion_test.cc @@ -0,0 +1,99 @@ +// Copyright 2024 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "compression/distortion.h" + +#include + +#include "compression/test_util.h" +#include "hwy/nanobenchmark.h" +#include "hwy/tests/hwy_gtest.h" +#include "hwy/tests/test_util-inl.h" // HWY_ASSERT_EQ + +namespace gcpp { +namespace { + +#if !HWY_TEST_STANDALONE +class DistortionTest : public testing::Test {}; +#endif + +TEST(DistortionTest, TestCascadedSummation) { + CascadedSummation cs; + // Example from Priest92. Exact sum is 2. + const double kHuge = 9007199254740992.0 * hwy::Unpredictable1(); // 2^53 + const double kNeg = -4503599627370495.0 * hwy::Unpredictable1(); // -(2^52-1) + const double kIn[6] = {kHuge, kHuge - 2.0, kNeg, kNeg, kNeg, kNeg}; + for (double in : kIn) { + cs.Notify(in); + } + HWY_ASSERT_EQ(2.0, cs.Total()); +} + +// Number of exact and rounded-to-zero matches expectations. +TEST(DistortionTest, TestCounts) { + // Arbitrary positive/negative original, zero distorted. + DistortionStats stats; + for (size_t i = 1; i < 10; ++i) { + stats.Notify(i / 100.0f, 0.0f); + stats.Notify(i / -100.0f, 0.0f); + } + HWY_ASSERT(stats.NumExact() == 0); + HWY_ASSERT(stats.NumRoundedToZero() == 18); + + // Add some exact (same): + size_t num_exact = 0; + for (float x = 0.0f; x <= 1.5f; x += 0.25f) { + stats.Notify(x, x); + stats.Notify(-x, -x); + num_exact += 2; + } + HWY_ASSERT_EQ(num_exact, stats.NumExact()); + HWY_ASSERT(stats.NumRoundedToZero() == 18); // unchanged +} + +// Few large differences are diluted in SNR but not WeightedAverageL1. +TEST(DistortionTest, TestDilution) { + DistortionStats stats; + for (size_t i = 0; i < 100; ++i) { + stats.Notify(0.998f, 0.999f); // small + } + HWY_ASSERT(IsInside(900.0, 1000.0, stats.GeomeanValueDivL1())); + // All-equal WeightedSum is exact. + HWY_ASSERT(IsNear(0.001, stats.WeightedAverageL1())); + + // Now add a large difference: + stats.Notify(1.875f - 0.0625f, 1.875f); // max magnitude, 3-bit mantissa + // .. WeightedAverageL1 is closer to it. + HWY_ASSERT(IsInside(0.020, 0.025, stats.WeightedAverageL1())); + + // Add a small and large difference: + stats.Notify((1.75f - 0.125f) / 1024, 1.75f / 1024); // small, 2-bit mantissa + stats.Notify(-1.875f + 0.0625f, -1.875f); // larger negative + // .. SNR is still barely affected. + HWY_ASSERT(IsInside(890.0, 900.0, stats.GeomeanValueDivL1())); + // .. WeightedAverageL1 is higher after another large error. + HWY_ASSERT(IsInside(0.030, 0.035, stats.WeightedAverageL1())); + + // With these inputs, none are exact nor round to zero. + HWY_ASSERT(stats.NumExact() == 0); + HWY_ASSERT(stats.NumRoundedToZero() == 0); + HWY_ASSERT_EQ(0.0, stats.SumL1Rounded()); + HWY_ASSERT(IsInside(0.220, 0.23, stats.SumL1())); +} + +} // namespace +} // namespace gcpp + +HWY_TEST_MAIN(); diff --git a/compression/nuq_test.cc b/compression/nuq_test.cc index 1f8de40..3a95c1b 100644 --- a/compression/nuq_test.cc +++ b/compression/nuq_test.cc @@ -18,6 +18,8 @@ #define HWY_DISABLED_TARGETS HWY_SCALAR #endif +#include "compression/nuq.h" + #include #include #include @@ -25,8 +27,10 @@ #include // std::shuffle #include +#include "compression/test_util.h" #include "hwy/aligned_allocator.h" #include "hwy/base.h" +#include "hwy/tests/test_util.h" #include "hwy/timer.h" // clang-format off @@ -36,8 +40,6 @@ #include "hwy/foreach_target.h" // IWYU pragma: keep // Other headers that include Highway must come after foreach_target.h #include "compression/nuq-inl.h" -#include "compression/nuq.h" -#include "compression/test_util.h" #include "hwy/highway.h" #include "hwy/tests/hwy_gtest.h" #include "hwy/tests/test_util-inl.h" @@ -114,12 +116,16 @@ struct TestPlateaus { HWY_ASSERT(indices[i] < kClusters); stats.Notify(in[i], centers[indices[i]]); } - const float pnorm = stats.PNorm(); - const float snr = stats.GeomeanValueDivL1(); - fprintf(stderr, "p-norm %.3E snr %.2f @%zu = %.4E\n", pnorm, snr, - stats.MaxIndex(), stats.MaxL1()); - HWY_ASSERT(pnorm == 0.0f); - HWY_ASSERT(snr == 0.0f); + // Zero error. + HWY_ASSERT_EQ(kGroupSize, stats.NumExact()); + HWY_ASSERT_EQ(0, stats.NumSignFlip()); + HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); + HWY_ASSERT_EQ(0.0, stats.SumL1()); + HWY_ASSERT_EQ(0.0f, stats.GeomeanValueDivL1()); + HWY_ASSERT_EQ(0.0f, stats.WeightedAverageL1()); + // Input was symmetric and zero-mean. + HWY_ASSERT(gcpp::IsInside(-0.05, 0.05, stats.Original().Mean())); + HWY_ASSERT(gcpp::IsNear(0.0, stats.Original().Skewness())); } }; @@ -157,16 +163,19 @@ struct TestRamp { HWY_ASSERT(indices[i] < kClusters); stats.Notify(in[i], centers[indices[i]]); } - const float pnorm = stats.PNorm(); - const float snr = stats.GeomeanValueDivL1(); - fprintf(stderr, "p-norm %.3E snr %.2f @%zu = %.4E\n", pnorm, snr, - stats.MaxIndex(), stats.MaxL1()); - static_assert(kGroupSize == 128 || kGroupSize == 256, "Update expected"); - const float expected_pnorm = kGroupSize == 128 ? 2.08E-2f : 2.1E-2f; - const float expected_snr = kGroupSize == 128 ? 16.9f : 17.6f; - HWY_ASSERT(expected_pnorm <= pnorm && pnorm < 1.02f * expected_pnorm); - HWY_ASSERT(expected_snr <= snr && snr < 1.01f * expected_snr); + // Low error. + HWY_ASSERT_EQ(0, stats.NumExact()); + HWY_ASSERT(stats.NumSignFlip() < 10); + HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); + HWY_ASSERT_EQ(kGroupSize / kClusters / 4.0, stats.SumL1()); + HWY_ASSERT(gcpp::IsInside(17.0, 18.0, stats.GeomeanValueDivL1())); + HWY_ASSERT(gcpp::IsInside(0.005, 0.010, stats.WeightedAverageL1())); + HWY_ASSERT(stats.L1().Max() <= 0.04f); + // Input was symmetric about 0.05. + HWY_ASSERT(gcpp::IsNear(0.05, stats.Original().Mean(), 0.01)); + HWY_ASSERT(gcpp::IsNear(0.0, stats.Original().Skewness(), 1E-4)); + static_assert(kGroupSize == 256, "Update expected"); } }; @@ -207,15 +216,16 @@ struct TestNormal { HWY_ASSERT(indices[i] < kClusters); stats.Notify(in[i], centers[indices[i]]); } - const float pnorm = stats.PNorm(); - const float snr = stats.GeomeanValueDivL1(); - fprintf(stderr, "p-norm %.3E snr %.2f @%zu = %.4E\n", pnorm, snr, - stats.MaxIndex(), stats.MaxL1()); + + // Moderate error. + HWY_ASSERT_EQ(0, stats.NumExact()); + HWY_ASSERT(stats.NumSignFlip() < kGroupSize / kClusters); + HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); + HWY_ASSERT(gcpp::IsInside(5.0, 6.0, stats.SumL1())); + HWY_ASSERT(gcpp::IsInside(12.7, 12.8, stats.GeomeanValueDivL1())); + HWY_ASSERT(gcpp::IsInside(0.036, 0.037, stats.WeightedAverageL1())); + HWY_ASSERT(stats.L1().Max() <= 0.10f); static_assert(kGroupSize == 256, "Update expected"); - const float expected_pnorm = 3.68E-2f; - const float expected_snr = 12.7f; - HWY_ASSERT(expected_pnorm <= pnorm && pnorm < 1.02f * expected_pnorm); - HWY_ASSERT(expected_snr <= snr && snr < 1.01f * expected_snr); } }; @@ -235,10 +245,9 @@ struct TestOffset { auto nuq = hwy::AllocateAligned(NuqStream::PackedEnd(total)); HWY_ASSERT(in && dec1 && dec2 && nuq); - std::mt19937 rng(123); - std::normal_distribution dist{0.001f, 0.3f}; + hwy::RandomState rng; for (size_t i = 0; i < total; ++i) { - in[i] = dist(rng); + in[i] = static_cast(RandomGaussian(rng)); } // Encode + decode everything @@ -278,11 +287,13 @@ struct TestStream { auto nuq = hwy::AllocateAligned(NuqStream::PackedEnd(num)); HWY_ASSERT(in && out && nuq); - std::mt19937 rng(123); - std::normal_distribution dist{0.001f, 0.3f}; + hwy::RandomState rng; + Stats in_stats; for (size_t i = 0; i < num; ++i) { - in[i] = dist(rng); + in[i] = static_cast(RandomGaussian(rng)); + in_stats.Notify(in[i]); } + VerifyGaussian(in_stats); ClusterBuf buf; double elapsed = hwy::HighestValue(); @@ -311,15 +322,16 @@ struct TestStream { for (size_t i = 0; i < num; ++i) { stats.Notify(in[i], hwy::ConvertScalarTo(out[i])); } - const float pnorm = stats.PNorm(); - const float snr = stats.GeomeanValueDivL1(); - fprintf(stderr, "p-norm %.3E snr %.2f @%zu = %.4E\n", pnorm, snr, - stats.MaxIndex(), stats.MaxL1()); - static_assert(kGroupSize == 128 || kGroupSize == 256, "Update expected"); - const float expected_pnorm = kGroupSize == 128 ? 3.44E-2f : 3.88E-2f; - const float expected_snr = kGroupSize == 128 ? 15.0f : 13.3f; - HWY_ASSERT(expected_pnorm <= pnorm && pnorm < 1.02f * expected_pnorm); - HWY_ASSERT(expected_snr <= snr && snr < 1.01f * expected_snr); + + // Moderate error. + HWY_ASSERT_EQ(0, stats.NumExact()); + HWY_ASSERT(stats.NumSignFlip() < num / kClusters); + HWY_ASSERT_EQ(0, stats.NumRoundedToZero()); + HWY_ASSERT(gcpp::IsInside(23.0, 24.0, stats.SumL1())); + HWY_ASSERT(gcpp::IsInside(13.0, 13.3, stats.GeomeanValueDivL1())); + HWY_ASSERT(gcpp::IsInside(0.034, 0.035, stats.WeightedAverageL1())); + HWY_ASSERT(stats.L1().Max() <= 0.11f); + static_assert(kGroupSize == 256, "Update expected"); } }; @@ -348,9 +360,8 @@ struct TestDot { hwy::RandomState rng; Stats in_stats; for (size_t i = 0; i < num; ++i) { - const float r = static_cast(RandomGaussian(rng)); - in_stats.Notify(r); - in[i] = r; + in[i] = static_cast(RandomGaussian(rng)); + in_stats.Notify(in[i]); } for (size_t i = 0; i < num; ++i) { const float r = static_cast(RandomGaussian(rng)); @@ -365,7 +376,7 @@ struct TestDot { HWY_ASSERT(unused_clusters == 0); // Compute dot product without decompression. - double actual = 0.0; + float actual = 0.0f; double elapsed = hwy::HighestValue(); for (size_t rep = 0; rep < 20; ++rep) { hn::Vec sum0 = hn::Zero(df); @@ -386,8 +397,8 @@ struct TestDot { num * sizeof(in[0]) * 1E-6 / elapsed); // Exact and decompressed dot products for comparison. - double exact = 0.0; // using original input - double expected = 0.0; // using decoded NUQ + float exact = 0.0f; // using original input + float expected = 0.0f; // using decoded NUQ DistortionStats dec_stats; Stats ratios; for (size_t i = 0; i < num; ++i) { @@ -399,24 +410,42 @@ struct TestDot { ratios.Notify(exact / expected); } } + const bool isBF = sizeof(T) == 2; const double dec_snr = dec_stats.GeomeanValueDivL1(); + const double dec_wl1 = dec_stats.WeightedAverageL1(); const double dot_snr = 1.0 / hwy::ScalarAbs(1.0 - ratios.GeometricMean()); // exact and actual fluctuate due to the combination of NUQ imprecision, // and whether vec[i] is negative or positive, so this is quite loose. const float final_ratio = HWY_MIN(exact / actual, actual / exact); - fprintf(stderr, "ratios %s\n", ratios.ToString().c_str()); - fprintf(stderr, - "exact %.3f e2 %.4f actual %.4f final_ratio %.3f dec_snr %.2f " - "dot_snr %.2f\n", - exact, expected, actual, final_ratio, dec_snr, dot_snr); + if (HWY_ONCE) { + fprintf(stderr, "ratios %s\n", ratios.ToString().c_str()); + fprintf(stderr, + "exact %.3f e2 %.4f actual %.4f final_ratio %.3f dec_snr %.2f " + "dot_snr %.2f dec_wl1 %.4f\n", + exact, expected, actual, final_ratio, dec_snr, dot_snr, dec_wl1); + } // Final values are not too far apart. - HWY_ASSERT(0.88f <= final_ratio && final_ratio <= 1.0f); + HWY_ASSERT(gcpp::IsInside(0.88f, 1.0f, final_ratio)); // Decompressed and uncompressed dot should match exactly. - HWY_ASSERT(hwy::ScalarAbs(expected - actual) < 1E-4f); - // dec[] is close to in[], but we already check that in TestStream. - HWY_ASSERT(dec_snr >= 13.0); - // Geomean of ratios for each i is an approximation of the actual SNR. - HWY_ASSERT(dot_snr >= (sizeof(T) == 2 ? 17.0 : 14.0)); + HWY_ASSERT(gcpp::IsNear(expected, actual, 1E-4f)); + // Geomean of ratios for each i should be very close to one. + HWY_ASSERT(dot_snr >= (isBF ? 17.7 : 14.3)); + + // dec[] is close to in[], but we already check that in TestStream with the + // same input distribution. + HWY_ASSERT(gcpp::IsNear(13.1, dec_snr, 0.1)); + HWY_ASSERT(gcpp::IsNear(0.034, dec_wl1, 0.001)); + HWY_ASSERT(gcpp::IsNear(23.5, dec_stats.SumL1(), 0.1)); + HWY_ASSERT(dec_stats.NumSignFlip() < num / kClusters); + HWY_ASSERT_EQ(0, dec_stats.NumExact()); + HWY_ASSERT_EQ(0, dec_stats.NumRoundedToZero()); + HWY_ASSERT_EQ(0.0, dec_stats.SumL1Rounded()); + // Absolute decode errors are in [0, 0.11], and somewhat right-tailed. + HWY_ASSERT(gcpp::IsInside(0.0f, 2E-5f, dec_stats.L1().Min())); + HWY_ASSERT(gcpp::IsInside(0.09f, 0.11f, dec_stats.L1().Max())); + HWY_ASSERT(gcpp::IsInside(0.02, 0.03, dec_stats.L1().Mean())); + HWY_ASSERT(gcpp::IsInside(1.0, 1.1, dec_stats.L1().Skewness())); + HWY_ASSERT(gcpp::IsInside(4.0, 5.0, dec_stats.L1().Kurtosis())); static_assert(kGroupSize == 256, "Update expected*"); } }; diff --git a/compression/sfp_test.cc b/compression/sfp_test.cc index f7936a3..c08945e 100644 --- a/compression/sfp_test.cc +++ b/compression/sfp_test.cc @@ -26,6 +26,7 @@ #include +#include "compression/test_util.h" #include "hwy/aligned_allocator.h" #include "hwy/base.h" #include "hwy/timer.h" @@ -37,7 +38,6 @@ #include "hwy/foreach_target.h" // IWYU pragma: keep // Any highway.h must come after foreach_target.h #include "compression/sfp-inl.h" -#include "compression/test_util.h" #include "hwy/highway.h" #include "hwy/tests/hwy_gtest.h" #include "hwy/tests/test_util-inl.h" @@ -304,10 +304,37 @@ struct TestEncDec { sum += hwy::ConvertScalarTo(hwy::ScalarAbs(in[i])); stats.Notify(hwy::ConvertScalarTo(in[i]), out); } - const double avg = sum / num; - fprintf(stderr, "Avg magnitude %.3E, p-norm %.3E snr %.2f @%zu = %.4E\n", - avg, stats.PNorm(), stats.GeomeanValueDivL1(), stats.MaxIndex(), - stats.MaxL1()); + const double avg_in = sum / num; + const double snr = stats.GeomeanValueDivL1(); + const double wl1 = stats.WeightedAverageL1(); + if (false) { + fprintf(stderr, + "Num inputs %zu, avg %.3E, exact %zu round0 %zu (sum %E) snr " + "%.2f wL1 %f\n", + num, avg_in, stats.NumExact(), stats.NumRoundedToZero(), + stats.SumL1Rounded(), snr, wl1); + } + HWY_ASSERT(stats.Original().Count() == stats.L1().Count()); + // Inputs are in [-1.875, 1.875], symmetric, and heavy-tailed. + HWY_ASSERT(stats.Original().Min() == -1.875f); + HWY_ASSERT(stats.Original().Max() == 1.875f); + HWY_ASSERT(gcpp::IsInside(-1E-6, 1E-6, stats.Original().Mean())); + HWY_ASSERT(gcpp::IsInside(-1E-6, 1E-6, stats.Original().Skewness())); + HWY_ASSERT(gcpp::IsInside(80.0, 100.0, stats.Original().Kurtosis())); + // Absolute errors are in [0, 0.0625], and (heavy) right-tailed. + HWY_ASSERT(stats.L1().Min() == 0.0f); + HWY_ASSERT(stats.L1().Max() == 0.0625f); + HWY_ASSERT(gcpp::IsInside(4E-4, 5E-4, stats.L1().Mean())); + HWY_ASSERT(gcpp::IsInside(10.0, 15.0, stats.L1().Skewness())); + HWY_ASSERT(gcpp::IsInside(150.0, 200.0, stats.L1().Kurtosis())); + // SNR is low because many *tiny* numbers are rounded to zero. + HWY_ASSERT_EQ(3322, stats.NumRoundedToZero()); + HWY_ASSERT(gcpp::IsInside(5E-6, 6E-6, stats.SumL1Rounded())); + HWY_ASSERT(gcpp::IsInside(1.880, 1.885, stats.SumL1())); + HWY_ASSERT_EQ(256, stats.NumExact()); + HWY_ASSERT_EQ(0, stats.NumSignFlip()); + HWY_ASSERT(gcpp::IsInside(2.70, 2.75, snr)); + HWY_ASSERT(gcpp::IsInside(0.010, 0.011, wl1)); // = half of mean |x|. } } }; @@ -378,7 +405,7 @@ struct TestDot { SfpCodec::Enc(d, in.get(), num, sfp.get()); // Compute dot product without decompression. - double actual = 0.0; + float actual = 0.0f; double elapsed = hwy::HighestValue(); for (size_t rep = 0; rep < 200; ++rep) { hn::Vec sum0 = hn::Zero(df); @@ -414,24 +441,41 @@ struct TestDot { ratios.Notify(exact / expected); } } + const bool isBF = sizeof(T) == 2; const double dec_snr = dec_stats.GeomeanValueDivL1(); + const double dec_wl1 = dec_stats.WeightedAverageL1(); const double dot_snr = 1.0 / hwy::ScalarAbs(1.0 - ratios.GeometricMean()); // exact and actual fluctuate due to the combination of SFP imprecision, // and whether vec[i] is negative or positive, so this is quite loose. const float final_ratio = HWY_MIN(exact / actual, actual / exact); - fprintf(stderr, "ratios %s\n", ratios.ToString().c_str()); - fprintf(stderr, - "exact %.3f e2 %.4f actual %.4f final_ratio %.3f dec_snr %.2f " - "dot_snr %.2f\n", - exact, expected, actual, final_ratio, dec_snr, dot_snr); + if (HWY_ONCE) { + fprintf(stderr, "ratios %s\n", ratios.ToString().c_str()); + fprintf(stderr, + "exact %.3f e2 %.4f actual %.4f final_ratio %.3f dec_snr %.2f " + "dot_snr %.2f dec_wl1 %.5f\n", + exact, expected, actual, final_ratio, dec_snr, dot_snr, dec_wl1); + } // Final values are not too far apart. - HWY_ASSERT(0.87f <= final_ratio && final_ratio <= 1.0f); + HWY_ASSERT(gcpp::IsInside(0.87f, 1.0f, final_ratio)); // Decompressed and uncompressed dot should match exactly. - HWY_ASSERT(hwy::ScalarAbs(expected - actual) < 1E-4f); - // dec[] is close to in[], but we already check that in TestEncDec. - HWY_ASSERT(dec_snr >= 50.0); + HWY_ASSERT(gcpp::IsNear(expected, actual, 1E-4f)); // Geomean of ratios for each i should be very close to one. - HWY_ASSERT(dot_snr >= (sizeof(T) == 2 ? 70.0 : 1000.0)); + HWY_ASSERT(dot_snr >= (isBF ? 70.0 : 1000.0)); + + // dec[] is close to in[]. We also check that in TestEncDec, but for much + // smaller input magnitudes. + HWY_ASSERT(gcpp::IsNear(isBF ? 51.0 : 64.0, dec_snr, 1.0)); + HWY_ASSERT(gcpp::IsNear(isBF ? 0.013 : 0.012, dec_wl1, 0.001)); + HWY_ASSERT(gcpp::IsNear(isBF ? 6.2 : 6.3, dec_stats.SumL1(), 0.1)); + HWY_ASSERT_EQ(0, dec_stats.NumSignFlip()); + HWY_ASSERT_EQ(0, dec_stats.NumRoundedToZero()); + HWY_ASSERT_EQ(0.0, dec_stats.SumL1Rounded()); + // Absolute decode errors are in [0, 5E-2], and somewhat right-tailed. + HWY_ASSERT(gcpp::IsInside(0.0f, 2E-6f, dec_stats.L1().Min())); + HWY_ASSERT(gcpp::IsInside(3E-2f, 5E-2f, dec_stats.L1().Max())); + HWY_ASSERT(gcpp::IsInside(4E-3, 7E-3, dec_stats.L1().Mean())); + HWY_ASSERT(gcpp::IsInside(1.8, 1.9, dec_stats.L1().Skewness())); + HWY_ASSERT(gcpp::IsInside(6.0, 7.0, dec_stats.L1().Kurtosis())); } }; diff --git a/compression/stats.h b/compression/stats.h index 01f97f7..12985f4 100644 --- a/compression/stats.h +++ b/compression/stats.h @@ -45,7 +45,7 @@ class Bins { } } - void Print(const char* caption) { + void Print(const char* caption) const { fprintf(stderr, "\n%s [%zu]\n", caption, N); size_t last_nonzero = 0; for (size_t i = N - 1; i < N; --i) { diff --git a/compression/test_util.h b/compression/test_util.h index 0db9b2e..7be07e0 100644 --- a/compression/test_util.h +++ b/compression/test_util.h @@ -49,12 +49,26 @@ HWY_INLINE double RandomGaussian(hwy::RandomState& rng) { return plus_minus_1 * std::sqrt(kReps / 3.0); }; +// Returns true if val is inside [min, max]. +template +static inline bool IsInside(T expected_min, T expected_max, T val) { + return expected_min <= val && val <= expected_max; +} + +template +static inline bool IsNear(T expected, T val, T epsilon = T{1E-6}) { + return IsInside(expected - epsilon, expected + epsilon, val); +} + HWY_INLINE void VerifyGaussian(Stats& stats) { - const double stddev = stats.StandardDeviation(); - HWY_ASSERT(-0.01 <= stats.Mean() && stats.Mean() <= 0.01); - HWY_ASSERT(0.30 <= stddev && stddev <= 0.35); - HWY_ASSERT(-1.1 <= stats.Min() && stats.Min() <= -0.9); - HWY_ASSERT(0.9 <= stats.Max() && stats.Max() <= 1.1); + // Inputs are roughly [-1, 1] and symmetric about zero. + HWY_ASSERT(IsNear(-1.0f, stats.Min(), 0.10f)); + HWY_ASSERT(IsNear(+1.0f, stats.Max(), 0.10f)); + HWY_ASSERT(IsInside(-2E-3, 2E-3, stats.Mean())); + HWY_ASSERT(IsInside(-0.15, 0.15, stats.Skewness())); + // Near-Gaussian. + HWY_ASSERT(IsInside(0.30, 0.35, stats.StandardDeviation())); + HWY_ASSERT(IsNear(3.0, stats.Kurtosis(), 0.3)); } } // namespace gcpp