mirror of https://github.com/google/gemma.cpp.git
289 lines
11 KiB
C++
289 lines
11 KiB
C++
// Copyright 2023 Google LLC
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "compression/types.h"
|
|
#ifndef HWY_DISABLED_TARGETS
|
|
#define HWY_DISABLED_TARGETS GEMMA_DISABLED_TARGETS
|
|
#endif // HWY_DISABLED_TARGETS
|
|
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
|
|
#include "compression/compress.h"
|
|
#include "compression/distortion.h"
|
|
#include "util/test_util.h"
|
|
#include "hwy/aligned_allocator.h"
|
|
#include "hwy/base.h"
|
|
#include "hwy/contrib/thread_pool/thread_pool.h"
|
|
#include "hwy/tests/hwy_gtest.h"
|
|
// clang-format off
|
|
#undef HWY_TARGET_INCLUDE
|
|
#define HWY_TARGET_INCLUDE "compression/compress_test.cc" // NOLINT
|
|
// clang-format on
|
|
#include "hwy/foreach_target.h" // IWYU pragma: keep
|
|
#include "hwy/highway.h"
|
|
// After highway.h
|
|
#include "compression/test_util-inl.h"
|
|
|
|
HWY_BEFORE_NAMESPACE();
|
|
namespace gcpp {
|
|
namespace HWY_NAMESPACE {
|
|
namespace hn = hwy::HWY_NAMESPACE;
|
|
|
|
// Calls Compress and Decompress2 and verifies the distortion/error.
|
|
template <typename Packed>
|
|
struct TestDecompress2 {
|
|
template <typename T, class D>
|
|
HWY_INLINE void operator()(T /*unused*/, D d) {
|
|
const size_t N = hn::Lanes(d);
|
|
CompressWorkingSet work;
|
|
hwy::ThreadPool pool(0);
|
|
hwy::RandomState rng;
|
|
|
|
const size_t num = 2 * N;
|
|
const size_t packed_num = CompressedArrayElements<Packed>(num);
|
|
auto raw = hwy::AllocateAligned<float>(num); // Compress requires f32
|
|
auto packed = hwy::AllocateAligned<Packed>(packed_num);
|
|
auto dec = hwy::AllocateAligned<T>(num);
|
|
HWY_ASSERT(raw && packed && dec);
|
|
const auto packed_span = MakeSpan(packed.get(), packed_num);
|
|
|
|
hwy::Stats in_stats;
|
|
for (size_t i = 0; i < num; ++i) {
|
|
raw[i] = static_cast<float>(RandomGaussian(rng));
|
|
in_stats.Notify(raw[i]);
|
|
}
|
|
// Short inputs fail VerifyGaussian.
|
|
|
|
const size_t packed_ofs = 0;
|
|
Compress(raw.get(), num, work, packed_span, packed_ofs, pool);
|
|
hn::Vec<D> raw0, raw1;
|
|
Decompress2(d, MakeConst(packed_span), packed_ofs, raw0, raw1);
|
|
hn::Store(raw0, d, dec.get());
|
|
hn::Store(raw1, d, dec.get() + N);
|
|
|
|
DistortionStats stats;
|
|
for (size_t i = 0; i < num; ++i) {
|
|
stats.Notify(raw[i], hwy::ConvertScalarTo<float>(dec[i]));
|
|
}
|
|
|
|
if constexpr (true) { // leave enabled due to sporadic failures
|
|
fprintf(stderr,
|
|
"TypeName<Packed>() %s TypeName<T>() %s: num %zu: stats.SumL1() "
|
|
"%f stats.GeomeanValueDivL1() %f stats.WeightedAverageL1() %f "
|
|
"stats.L1().Max() %f\n",
|
|
TypeName<Packed>(), TypeName<T>(), num, stats.SumL1(),
|
|
stats.GeomeanValueDivL1(), stats.WeightedAverageL1(),
|
|
stats.L1().Max());
|
|
}
|
|
|
|
constexpr bool kFromFloat = hwy::IsSame<Packed, float>();
|
|
constexpr bool kToFloat = hwy::IsSame<T, float>();
|
|
if constexpr (kFromFloat && kToFloat) { // Lossless
|
|
HWY_ASSERT(stats.NumExact() == num);
|
|
HWY_ASSERT(stats.SumL1() == 0.0f);
|
|
HWY_ASSERT(stats.L1().Max() == 0.0f);
|
|
} else if constexpr (hwy::IsSame<Packed, BF16>() ||
|
|
(kFromFloat && hwy::IsSame<T, BF16>())) {
|
|
// Small roundoff error. BF16 to float is not lossless because the
|
|
// comparison is with float `raw`, prior to the Compress to BF16.
|
|
HWY_ASSERT(stats.L1().Max() <= 2E-3f);
|
|
HWY_ASSERT(IsInside(3E-4, 2E-3, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(600.0, 900.0, stats.GeomeanValueDivL1()));
|
|
} else if constexpr (hwy::IsSame<Packed, SfpStream>()) {
|
|
HWY_ASSERT(stats.SumL1() <= 0.4f);
|
|
HWY_ASSERT(stats.L1().Max() <= 0.04f);
|
|
HWY_ASSERT(IsInside(0.01, 0.03, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(48.0, 72.0, stats.GeomeanValueDivL1()));
|
|
} else if constexpr (hwy::IsSame<Packed, NuqStream>()) {
|
|
static_assert(NuqStream::kGroupSize == 256, "Update expected");
|
|
HWY_ASSERT(stats.SumL1() <= 1.2f);
|
|
HWY_ASSERT(stats.L1().Max() <= 0.08f);
|
|
HWY_ASSERT(IsInside(0.02, 0.05, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(18.0, 62.0, stats.GeomeanValueDivL1()));
|
|
} else {
|
|
HWY_ABORT("Unhandled type requested by ForeachPackedAndRawType");
|
|
}
|
|
}
|
|
};
|
|
|
|
void TestAllDecompress2() { ForeachPackedAndRawType<TestDecompress2>(); }
|
|
|
|
// Calls Compress and DecompressAndZeroPad for all short lengths and verifies
|
|
// the distortion/error.
|
|
template <typename Packed>
|
|
struct TestShortLengths {
|
|
template <typename T, class D>
|
|
HWY_INLINE void operator()(T /*unused*/, D d) {
|
|
const size_t N = hn::Lanes(d);
|
|
CompressWorkingSet work;
|
|
hwy::ThreadPool pool(0);
|
|
hwy::RandomState rng;
|
|
|
|
for (size_t num = 1; num < 5 * hn::Lanes(d); ++num) {
|
|
const size_t packed_num = CompressedArrayElements<Packed>(num);
|
|
|
|
auto raw = hwy::AllocateAligned<float>(num); // Compress requires f32
|
|
auto packed = hwy::AllocateAligned<Packed>(packed_num);
|
|
auto dec = hwy::AllocateAligned<T>(hwy::RoundUpTo(num, N));
|
|
HWY_ASSERT(raw && packed && dec);
|
|
const auto packed_span = MakeSpan(packed.get(), packed_num);
|
|
|
|
hwy::Stats in_stats;
|
|
for (size_t i = 0; i < num; ++i) {
|
|
raw[i] = static_cast<float>(RandomGaussian(rng));
|
|
in_stats.Notify(raw[i]);
|
|
}
|
|
// Short inputs fail VerifyGaussian.
|
|
|
|
const size_t packed_ofs = 0;
|
|
Compress(raw.get(), num, work, packed_span, packed_ofs, pool);
|
|
DecompressAndZeroPad(d, MakeConst(packed_span), packed_ofs, dec.get(),
|
|
num);
|
|
|
|
DistortionStats stats;
|
|
for (size_t i = 0; i < num; ++i) {
|
|
stats.Notify(raw[i], hwy::ConvertScalarTo<float>(dec[i]));
|
|
}
|
|
|
|
if constexpr (true) {
|
|
fprintf(stderr, "%s %s: %zu: %f %f %f %f\n", TypeName<Packed>(),
|
|
TypeName<T>(), num, stats.SumL1(), stats.GeomeanValueDivL1(),
|
|
stats.WeightedAverageL1(), stats.L1().Max());
|
|
}
|
|
|
|
constexpr bool kFromFloat = hwy::IsSame<Packed, float>();
|
|
constexpr bool kToFloat = hwy::IsSame<T, float>();
|
|
if constexpr (kFromFloat && kToFloat) { // Lossless
|
|
HWY_ASSERT(stats.NumExact() == num);
|
|
HWY_ASSERT(stats.SumL1() == 0.0f);
|
|
HWY_ASSERT(stats.L1().Max() == 0.0f);
|
|
} else if (hwy::IsSame<Packed, BF16>() ||
|
|
(kFromFloat && hwy::IsSame<T, BF16>())) {
|
|
// Small roundoff error. BF16 to float is not lossless because the
|
|
// comparison is with float `raw`, prior to the Compress to BF16.
|
|
HWY_ASSERT(stats.L1().Max() <= 4E-3f);
|
|
HWY_ASSERT(IsInside(1E-5, 3E-3, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(300.0, 2200.0, stats.GeomeanValueDivL1()));
|
|
} else if (hwy::IsSame<Packed, SfpStream>()) {
|
|
HWY_ASSERT(stats.SumL1() <= 1.3f);
|
|
HWY_ASSERT(stats.L1().Max() <= 0.08f);
|
|
HWY_ASSERT(IsInside(7E-5, 0.05, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(28.0, 200.0, stats.GeomeanValueDivL1()));
|
|
} else if (hwy::IsSame<Packed, NuqStream>()) {
|
|
static_assert(NuqStream::kGroupSize == 256, "Update expected");
|
|
HWY_ASSERT(stats.SumL1() <= 4.6f);
|
|
HWY_ASSERT(stats.L1().Max() <= 0.14f);
|
|
HWY_ASSERT(IsInside(7E-5, 0.06, stats.WeightedAverageL1()));
|
|
HWY_ASSERT(IsInside(11.0, 180.0, stats.GeomeanValueDivL1()));
|
|
} else {
|
|
HWY_ABORT("Unhandled type requested by ForeachPackedAndRawType");
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
void TestAllShortLengths() { ForeachPackedAndRawType<TestShortLengths>(); }
|
|
|
|
// Verifies the arguments and remainder handling of `DecompressAndCompress*`.
|
|
class TestDecompressAndCompress {
|
|
public:
|
|
template <typename T, class D>
|
|
HWY_INLINE void operator()(T /*unused*/, D d) {
|
|
ForeachActivationType3<Test>(d);
|
|
}
|
|
|
|
private:
|
|
struct Test {
|
|
template <typename T1, typename T2, typename T3, /*Deduced:*/ class D>
|
|
void operator()(T1, T2, T3, D d) {
|
|
hwy::RandomState rng;
|
|
using DF = hn::Repartition<float, D>;
|
|
using VF = hn::Vec<DF>;
|
|
const DF df;
|
|
|
|
for (size_t num = 1; num < 7 * hn::Lanes(d); ++num) {
|
|
auto p = hwy::AllocateAligned<T1>(num);
|
|
auto p1 = hwy::AllocateAligned<T2>(num);
|
|
auto p2 = hwy::AllocateAligned<T3>(num);
|
|
auto out = hwy::AllocateAligned<T1>(num);
|
|
auto expected1 = hwy::AllocateAligned<T1>(num);
|
|
auto expected2 = hwy::AllocateAligned<T1>(num);
|
|
auto expected3 = hwy::AllocateAligned<T1>(num);
|
|
HWY_ASSERT(p && p1 && p2 && out && expected1 && expected2 && expected3);
|
|
// Two bits each, totalling 6 bits which fit in the BF16 mantissa.
|
|
for (size_t i = 0; i < num; ++i) {
|
|
const size_t mod = i & 3;
|
|
p[i] = hwy::ConvertScalarTo<T1>(mod);
|
|
p1[i] = hwy::ConvertScalarTo<T2>(mod << 2);
|
|
p2[i] = hwy::ConvertScalarTo<T3>(mod << 4);
|
|
// For `Decompress1AndCompressInplace` to not overwrite `p`.
|
|
out[i] = p[i];
|
|
expected1[i] = hwy::ConvertScalarTo<T1>(mod);
|
|
expected2[i] = hwy::ConvertScalarTo<T1>((mod << 2) | mod);
|
|
expected3[i] =
|
|
hwy::ConvertScalarTo<T1>((mod << 4) | (mod << 2) | mod);
|
|
}
|
|
|
|
DecompressAndCompressInplace(df, p.get(), num,
|
|
[](DF, VF v) HWY_ATTR -> VF { return v; });
|
|
HWY_ASSERT_ARRAY_EQ(expected1.get(), p.get(), num);
|
|
|
|
// Uses `out` so as not to overwrite `p`.
|
|
Decompress1AndCompressInplace(
|
|
df, out.get(), num, p1.get(),
|
|
[](DF, VF v, VF v1) HWY_ATTR -> VF { return hn::Add(v, v1); });
|
|
HWY_ASSERT_ARRAY_EQ(expected2.get(), out.get(), num);
|
|
|
|
Decompress1AndCompressTo(df, out.get(), num, p.get(),
|
|
[](DF, VF v) HWY_ATTR -> VF { return v; });
|
|
HWY_ASSERT_ARRAY_EQ(expected1.get(), out.get(), num);
|
|
|
|
Decompress2AndCompressTo(df, out.get(), num, p.get(), p1.get(),
|
|
[](DF, VF v, VF v1)
|
|
HWY_ATTR -> VF { return hn::Add(v, v1); });
|
|
HWY_ASSERT_ARRAY_EQ(expected2.get(), out.get(), num);
|
|
|
|
Decompress3AndCompressTo(
|
|
df, out.get(), num, p.get(), p1.get(), p2.get(),
|
|
[](DF, VF v, VF v1, VF v2)
|
|
HWY_ATTR -> VF { return hn::Add(hn::Add(v, v1), v2); });
|
|
HWY_ASSERT_ARRAY_EQ(expected3.get(), out.get(), num);
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
void TestAllDecompressAndCompress() {
|
|
// The Highway Test interface (`ForGE128Vectors`) only supports a single type.
|
|
// We hard-code one here, and use `ForeachActivationType` internally.
|
|
hn::ForGE128Vectors<TestDecompressAndCompress>()(float());
|
|
}
|
|
|
|
// NOLINTNEXTLINE(google-readability-namespace-comments)
|
|
} // namespace HWY_NAMESPACE
|
|
} // namespace gcpp
|
|
HWY_AFTER_NAMESPACE();
|
|
|
|
#if HWY_ONCE
|
|
namespace gcpp {
|
|
HWY_BEFORE_TEST(CompressTest);
|
|
HWY_EXPORT_AND_TEST_P(CompressTest, TestAllDecompress2);
|
|
HWY_EXPORT_AND_TEST_P(CompressTest, TestAllShortLengths);
|
|
HWY_EXPORT_AND_TEST_P(CompressTest, TestAllDecompressAndCompress);
|
|
HWY_AFTER_TEST();
|
|
} // namespace gcpp
|
|
#endif // HWY_ONCE
|