Add compression/ comments, especially on SFP range

PiperOrigin-RevId: 642238720
This commit is contained in:
Jan Wassenberg 2024-06-11 05:47:07 -07:00 committed by Copybara-Service
parent c557ad23a8
commit a0e808e341
3 changed files with 18 additions and 4 deletions

View File

@ -22,6 +22,7 @@
#include <stdio.h> #include <stdio.h>
#include <array> #include <array>
#include <cmath> // lroundf, only if COMPRESS_STATS
#include "compression/blob_store.h" #include "compression/blob_store.h"
#include "compression/compress.h" #include "compression/compress.h"
@ -55,6 +56,7 @@ namespace hn = hwy::HWY_NAMESPACE;
template <typename T> // primary, must specialize template <typename T> // primary, must specialize
struct CompressTraits {}; struct CompressTraits {};
// Useful for backprop/, where weights are currently f32.
template <> template <>
struct CompressTraits<float> { struct CompressTraits<float> {
using MatT = float; using MatT = float;
@ -267,11 +269,14 @@ struct CompressTraits<hwy::bfloat16_t> {
} }
}; };
// Switching floating point: 8-bit, 2..3 mantissa bits.
template <> template <>
struct CompressTraits<SfpStream> { struct CompressTraits<SfpStream> {
using MatT = SfpStream; using MatT = SfpStream;
static constexpr bool kSupportsEvenOdd = true; static constexpr bool kSupportsEvenOdd = true;
// Callers are responsible for scaling `in` such that its magnitudes do not
// exceed 1.875. See CompressedArray::scale().
template <class DF, HWY_IF_F32_D(DF)> template <class DF, HWY_IF_F32_D(DF)>
static HWY_INLINE void Compress(DF df, const float* HWY_RESTRICT in, static HWY_INLINE void Compress(DF df, const float* HWY_RESTRICT in,
size_t num, CompressPerThread& tls, size_t num, CompressPerThread& tls,
@ -351,6 +356,7 @@ struct CompressTraits<SfpStream> {
} }
}; };
// Nonuniform quantization, 4.5 bits per element, two separate streams.
template <> template <>
struct CompressTraits<NuqStream> { struct CompressTraits<NuqStream> {
using MatT = NuqStream; using MatT = NuqStream;
@ -525,12 +531,12 @@ HWY_INLINE float Dot(DF df, const CompressedArray<MatT, kCapacity>& compressed,
return compressed.scale() * dot_result; return compressed.scale() * dot_result;
} }
// Callback used by ForeachTensor. // Functor called for each tensor, which compresses and stores them along with
// their scaling factors to BlobStore.
class Compressor { class Compressor {
public: public:
explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {} explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {}
// Called for each tensor; compresses it and stores to the cache.
template <typename MatT, size_t kCapacity> template <typename MatT, size_t kCapacity>
void operator()(const char* name, const float* weights, void operator()(const char* name, const float* weights,
CompressedArray<MatT, kCapacity>& compressed) { CompressedArray<MatT, kCapacity>& compressed) {

View File

@ -79,6 +79,9 @@ class CompressedArray {
MatT* data() { return data_.data(); } MatT* data() { return data_.data(); }
const MatT* data() const { return data_.data(); } const MatT* data() const { return data_.data(); }
// Decoded elements should be multiplied by this to restore their original
// range. This is required because SfpStream can only encode a limited range
// of magnitudes.
float scale() const { return scale_[0]; } float scale() const { return scale_[0]; }
void set_scale(float scale) { scale_[0] = scale; } void set_scale(float scale) { scale_[0] = scale; }
@ -90,6 +93,7 @@ class CompressedArray {
private: private:
std::array<MatT, NumCompressed()> data_; std::array<MatT, NumCompressed()> data_;
// Blobs are at least kBlobAlign bytes anyway.
float scale_[kBlobAlign / sizeof(float)]; float scale_[kBlobAlign / sizeof(float)];
}; };
@ -172,6 +176,8 @@ hwy::uint128_t CacheKey(const char* name) {
return MakeKey((std::string(1, prefix) + name).c_str()); return MakeKey((std::string(1, prefix) + name).c_str());
} }
// Functor called for each tensor, which loads them and their scaling factors
// from BlobStore.
class CacheLoader { class CacheLoader {
public: public:
explicit CacheLoader(const Path& blob_filename) { explicit CacheLoader(const Path& blob_filename) {

View File

@ -260,7 +260,8 @@ class SfpCodec {
hi = hn::BitwiseIfThenElse(k80, sign_in_msb, hn::ShiftRight<1>(biased_e)); hi = hn::BitwiseIfThenElse(k80, sign_in_msb, hn::ShiftRight<1>(biased_e));
} }
// Encodes `num` bf16 values from `in_bf` to `out_packed`. // Encodes `num` bf16 values from `in_bf` to `out_packed`. Their magnitude
// must be at most 1.875.
template <class DBF, HWY_IF_BF16_D(DBF)> template <class DBF, HWY_IF_BF16_D(DBF)>
static HWY_INLINE void Enc(DBF dbf, const hwy::bfloat16_t* HWY_RESTRICT in_bf, static HWY_INLINE void Enc(DBF dbf, const hwy::bfloat16_t* HWY_RESTRICT in_bf,
size_t num, SfpStream* HWY_RESTRICT out_packed) { size_t num, SfpStream* HWY_RESTRICT out_packed) {
@ -288,7 +289,8 @@ class SfpCodec {
} }
} }
// Encodes `num` f32 values from `in_f` to `packed`. // Encodes `num` f32 values from `in_f` to `packed`. Their magnitude
// must be at most 1.875.
template <class DF, HWY_IF_F32_D(DF)> template <class DF, HWY_IF_F32_D(DF)>
static HWY_INLINE void Enc(DF df, const float* HWY_RESTRICT in_f, size_t num, static HWY_INLINE void Enc(DF df, const float* HWY_RESTRICT in_f, size_t num,
SfpStream* HWY_RESTRICT out_packed) { SfpStream* HWY_RESTRICT out_packed) {