mirror of https://github.com/google/gemma.cpp.git
Add ToFloatSlow, move RandomFloat to test_util
PiperOrigin-RevId: 837412290
This commit is contained in:
parent
c153d5255b
commit
ccb49bc82f
|
|
@ -171,6 +171,7 @@ cc_library(
|
|||
name = "test_util",
|
||||
hdrs = ["util/test_util.h"],
|
||||
deps = [
|
||||
":basics",
|
||||
"@highway//:hwy",
|
||||
"@highway//:hwy_test_util",
|
||||
"@highway//:stats",
|
||||
|
|
@ -575,6 +576,7 @@ cc_library(
|
|||
":mat",
|
||||
":matmul",
|
||||
":matmul_env",
|
||||
":test_util",
|
||||
":model_store",
|
||||
":ops",
|
||||
":threading",
|
||||
|
|
|
|||
|
|
@ -135,8 +135,8 @@ cc_test(
|
|||
# for test_suite.
|
||||
tags = ["hwy_ops_test"],
|
||||
deps = [
|
||||
":compress",
|
||||
":distortion",
|
||||
":sfp",
|
||||
"@googletest//:gtest_main", # buildcleaner: keep
|
||||
"//:test_util",
|
||||
"@highway//:hwy",
|
||||
|
|
@ -182,7 +182,6 @@ cc_library(
|
|||
"//:mat",
|
||||
"//:threading_context",
|
||||
"@highway//:hwy",
|
||||
"@highway//:nanobenchmark",
|
||||
"@highway//:profiler",
|
||||
"@highway//:stats",
|
||||
"@highway//:thread_pool",
|
||||
|
|
|
|||
|
|
@ -82,6 +82,8 @@ struct CompressTraits<float> {
|
|||
hn::StoreU(raw1, df, packed.ptr + packed_ofs + NF);
|
||||
}
|
||||
|
||||
static float ToFloatSlow(const Packed x) { return x; }
|
||||
|
||||
template <class DBF16, HWY_IF_BF16_D(DBF16), class VBF16 = hn::Vec<DBF16>>
|
||||
static HWY_INLINE void Load2(DBF16 dbf16,
|
||||
const PackedSpan<const Packed>& packed,
|
||||
|
|
@ -254,6 +256,10 @@ struct CompressTraits<BF16> {
|
|||
packed.ptr + packed_ofs);
|
||||
}
|
||||
|
||||
static float ToFloatSlow(const Packed x) {
|
||||
return hwy::ConvertScalarTo<float>(x);
|
||||
}
|
||||
|
||||
template <class DBF16, HWY_IF_BF16_D(DBF16)>
|
||||
static HWY_INLINE void Load2(DBF16 dbf16,
|
||||
const PackedSpan<const Packed>& packed,
|
||||
|
|
@ -397,6 +403,27 @@ struct CompressTraits<SfpStream> {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: this does not take into account the per-tensor scale.
|
||||
static float ToFloatSlow(const Packed x) {
|
||||
uint32_t sfp = x.byte;
|
||||
HWY_ASSERT(sfp != 0x80); // -0 is reserved
|
||||
|
||||
const uint32_t sign32 = (sfp & 0x80) << 24;
|
||||
sfp &= 0x7F;
|
||||
const bool large_e = sfp >= 64;
|
||||
const size_t m_bits = large_e ? 3 : 2;
|
||||
uint32_t m = sfp & ((1u << m_bits) - 1u);
|
||||
size_t e = sfp >> m_bits;
|
||||
if (sfp == 0) return 0.0f;
|
||||
const uint32_t e_bias = large_e ? 15 : 23;
|
||||
const uint32_t exp32 = static_cast<uint32_t>(127 + e - e_bias) << 23;
|
||||
const uint32_t mnt32 = m << (23 - m_bits);
|
||||
const uint32_t binary32 = sign32 | exp32 | mnt32;
|
||||
float result;
|
||||
hwy::CopySameSize(&binary32, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <class D> // Caller checks this is f32 or bf16
|
||||
static HWY_INLINE void Load2(D d, const PackedSpan<const Packed>& packed,
|
||||
const size_t packed_ofs, hn::Vec<D>& raw0,
|
||||
|
|
@ -437,6 +464,12 @@ struct CompressTraits<I8Stream> {
|
|||
IntCodec::Dec2(d, packed, packed_ofs, raw0, raw1);
|
||||
}
|
||||
|
||||
static float ToFloatSlow(const Packed x) {
|
||||
HWY_DASSERT(!"Not supported - requires a stream");
|
||||
return 0.0f;
|
||||
}
|
||||
// Store2 is not yet implemented.
|
||||
|
||||
template <class D, typename Raw>
|
||||
static HWY_INLINE void DecompressAndZeroPad(
|
||||
D d, const PackedSpan<const Packed>& packed, const size_t packed_ofs,
|
||||
|
|
@ -483,6 +516,10 @@ struct CompressTraits<NuqStream> {
|
|||
NuqCodec::Dec2(d, packed, packed_ofs, raw0, raw1);
|
||||
}
|
||||
|
||||
static float ToFloatSlow(const Packed x) {
|
||||
HWY_DASSERT(!"Not supported - requires a stream");
|
||||
return 0.0f;
|
||||
}
|
||||
// Store2 is not yet implemented.
|
||||
|
||||
template <class D, typename Raw>
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ cc_library(
|
|||
"//io",
|
||||
"//io:blob_store",
|
||||
"@highway//:hwy",
|
||||
"@highway//:thread_pool",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -37,37 +37,23 @@
|
|||
#include "hwy/foreach_target.h" // IWYU pragma: keep
|
||||
#include "hwy/highway.h"
|
||||
// After highway.h
|
||||
#include "compression/sfp-inl.h"
|
||||
#include "compression/compress-inl.h"
|
||||
#include "hwy/tests/test_util-inl.h"
|
||||
|
||||
HWY_BEFORE_NAMESPACE();
|
||||
namespace gcpp {
|
||||
namespace HWY_NAMESPACE {
|
||||
|
||||
// Decode
|
||||
float F32FromSFP8(uint32_t sfp) {
|
||||
HWY_ASSERT(sfp < 256);
|
||||
HWY_ASSERT(sfp != 0x80); // -0 is reserved
|
||||
HWY_INLINE_VAR constexpr bool kPrint = false;
|
||||
|
||||
const uint32_t sign32 = (sfp & 0x80) << 24;
|
||||
sfp &= 0x7F;
|
||||
const bool large_e = sfp >= 64;
|
||||
const size_t m_bits = large_e ? 3 : 2;
|
||||
uint32_t m = sfp & ((1u << m_bits) - 1u);
|
||||
size_t e = sfp >> m_bits;
|
||||
if (sfp == 0) return 0.0f;
|
||||
const uint32_t e_bias = large_e ? 15 : 23;
|
||||
const uint32_t exp32 = static_cast<uint32_t>(127 + e - e_bias) << 23;
|
||||
const uint32_t mnt32 = m << (23 - m_bits);
|
||||
const uint32_t binary32 = sign32 | exp32 | mnt32;
|
||||
float result;
|
||||
hwy::CopySameSize(&binary32, &result);
|
||||
return result;
|
||||
static float F32FromSFP8(uint32_t sfp) {
|
||||
return CompressTraits<SfpStream>::ToFloatSlow(
|
||||
SfpStream{static_cast<uint8_t>(sfp)});
|
||||
}
|
||||
|
||||
// Used for HWY_AVX3_DL and newer.
|
||||
void PrintTables() {
|
||||
if (HWY_ONCE && false) {
|
||||
if (HWY_ONCE && kPrint) {
|
||||
uint8_t hi[128];
|
||||
fprintf(stderr, "lo\n");
|
||||
for (uint32_t sfp = 0; sfp < 128; ++sfp) {
|
||||
|
|
@ -92,7 +78,7 @@ void TestAllUnique() {
|
|||
unique.insert(F32FromSFP8(sfp));
|
||||
}
|
||||
HWY_ASSERT_EQ(size_t{255}, unique.size());
|
||||
if (false) {
|
||||
if (kPrint) {
|
||||
for (float f : unique) {
|
||||
fprintf(stderr, "%e\n", f);
|
||||
}
|
||||
|
|
@ -163,7 +149,7 @@ HWY_INLINE uint32_t SFP8FromF32(float f) {
|
|||
if (m == 0) m = 1;
|
||||
}
|
||||
|
||||
if (false) {
|
||||
if (kPrint) {
|
||||
fprintf(stderr, "in %x round %x rounded %x e %d m %x large_e %d\n",
|
||||
org_binary32, round, rounded, e, m, large_e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -891,18 +891,6 @@ class DotStats {
|
|||
hwy::Stats s_times[kVariants];
|
||||
};
|
||||
|
||||
// Returns normalized value in [-1, 1).
|
||||
float RandomFloat(RngStream& rng) {
|
||||
const uint32_t exp = hwy::BitCastScalar<uint32_t>(1.0f);
|
||||
const uint32_t mantissa_mask = hwy::MantissaMask<float>();
|
||||
const uint32_t representation = exp | (rng() & mantissa_mask);
|
||||
const float f12 = hwy::BitCastScalar<float>(representation);
|
||||
HWY_DASSERT(1.0f <= f12 && f12 < 2.0f); // exponent is 2^0, only mantissa
|
||||
const float f = (2.0f * (f12 - 1.0f)) - 1.0f;
|
||||
HWY_DASSERT(-1.0f <= f && f < 1.0f);
|
||||
return f;
|
||||
}
|
||||
|
||||
// `raw` holds the decompressed values, so that the test measures only the
|
||||
// error from the Dot algorithms, not the compression.
|
||||
template <typename Packed>
|
||||
|
|
|
|||
|
|
@ -19,8 +19,10 @@
|
|||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm> // std::sort
|
||||
#include <cmath>
|
||||
|
||||
#include "util/basics.h" // RngStream
|
||||
#include "hwy/base.h"
|
||||
|
||||
// IWYU pragma: begin_exports
|
||||
|
|
@ -30,9 +32,35 @@
|
|||
|
||||
namespace gcpp {
|
||||
|
||||
// Excludes outliers; we might not have enough samples for a reliable mode.
|
||||
HWY_INLINE double TrimmedMean(double* seconds, size_t num) {
|
||||
std::sort(seconds, seconds + num);
|
||||
double sum = 0;
|
||||
int count = 0;
|
||||
for (size_t i = num / 4; i < num / 2; ++i) {
|
||||
sum += seconds[i];
|
||||
count += 1;
|
||||
}
|
||||
HWY_DASSERT(num != 0);
|
||||
return sum / count;
|
||||
}
|
||||
|
||||
// Returns normalized value in [-1, 1).
|
||||
HWY_INLINE float RandomFloat(RngStream& rng) {
|
||||
const uint32_t exp = hwy::BitCastScalar<uint32_t>(1.0f);
|
||||
const uint32_t mantissa_mask = hwy::MantissaMask<float>();
|
||||
const uint32_t representation = exp | (rng() & mantissa_mask);
|
||||
const float f12 = hwy::BitCastScalar<float>(representation);
|
||||
HWY_DASSERT(1.0f <= f12 && f12 < 2.0f); // exponent is 2^0, only mantissa
|
||||
const float f = (2.0f * (f12 - 1.0f)) - 1.0f;
|
||||
HWY_DASSERT(-1.0f <= f && f < 1.0f);
|
||||
return f;
|
||||
}
|
||||
|
||||
// Returns random Gaussian (mean=0, stddev=1/3 similar to expected weights)
|
||||
// using the central limit theorem. Avoid std::normal_distribution for
|
||||
// consistent cross-platform output.
|
||||
// TODO: use RngStream instead of RandomState.
|
||||
HWY_INLINE double RandomGaussian(hwy::RandomState& rng) {
|
||||
uint64_t sum = 0;
|
||||
constexpr int kReps = 40;
|
||||
|
|
|
|||
Loading…
Reference in New Issue