Add compression/ comments, especially on SFP range

PiperOrigin-RevId: 642238720
2024-06-11 05:47:07 -07:00 · 2024-06-11 05:47:07 -07:00 · a0e808e341
parent c557ad23a8
commit a0e808e341
3 changed files with 18 additions and 4 deletions
--- a/compression/compress-inl.h
+++ b/compression/compress-inl.h
@ -22,6 +22,7 @@
 #include <stdio.h>
 #include <array>
 #include <cmath>  // lroundf, only if COMPRESS_STATS
 #include "compression/blob_store.h"
 #include "compression/compress.h"
@ -55,6 +56,7 @@ namespace hn = hwy::HWY_NAMESPACE;
 template <typename T>  // primary, must specialize
 struct CompressTraits {};
 // Useful for backprop/, where weights are currently f32.
 template <>
 struct CompressTraits<float> {
  using MatT = float;
@ -267,11 +269,14 @@ struct CompressTraits<hwy::bfloat16_t> {
  }
 };
 // Switching floating point: 8-bit, 2..3 mantissa bits.
 template <>
 struct CompressTraits<SfpStream> {
  using MatT = SfpStream;
  static constexpr bool kSupportsEvenOdd = true;
  // Callers are responsible for scaling `in` such that its magnitudes do not
  // exceed 1.875. See CompressedArray::scale().
  template <class DF, HWY_IF_F32_D(DF)>
  static HWY_INLINE void Compress(DF df, const float* HWY_RESTRICT in,
                                  size_t num, CompressPerThread& tls,
@ -351,6 +356,7 @@ struct CompressTraits<SfpStream> {
  }
 };
 // Nonuniform quantization, 4.5 bits per element, two separate streams.
 template <>
 struct CompressTraits<NuqStream> {
  using MatT = NuqStream;
@ -525,12 +531,12 @@ HWY_INLINE float Dot(DF df, const CompressedArray<MatT, kCapacity>& compressed,
  return compressed.scale() * dot_result;
 }
-// Callback used by ForeachTensor.
+// Functor called for each tensor, which compresses and stores them along with
 // their scaling factors to BlobStore.
 class Compressor {
 public:
  explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {}
  // Called for each tensor; compresses it and stores to the cache.
  template <typename MatT, size_t kCapacity>
  void operator()(const char* name, const float* weights,
                  CompressedArray<MatT, kCapacity>& compressed) {
--- a/compression/compress.h
+++ b/compression/compress.h
@ -79,6 +79,9 @@ class CompressedArray {
  MatT* data() { return data_.data(); }
  const MatT* data() const { return data_.data(); }
  // Decoded elements should be multiplied by this to restore their original
  // range. This is required because SfpStream can only encode a limited range
  // of magnitudes.
  float scale() const { return scale_[0]; }
  void set_scale(float scale) { scale_[0] = scale; }
@ -90,6 +93,7 @@ class CompressedArray {
 private:
  std::array<MatT, NumCompressed()> data_;
  // Blobs are at least kBlobAlign bytes anyway.
  float scale_[kBlobAlign / sizeof(float)];
 };
@ -172,6 +176,8 @@ hwy::uint128_t CacheKey(const char* name) {
  return MakeKey((std::string(1, prefix) + name).c_str());
 }
 // Functor called for each tensor, which loads them and their scaling factors
 // from BlobStore.
 class CacheLoader {
 public:
  explicit CacheLoader(const Path& blob_filename) {
--- a/compression/sfp-inl.h
+++ b/compression/sfp-inl.h
@ -260,7 +260,8 @@ class SfpCodec {
    hi = hn::BitwiseIfThenElse(k80, sign_in_msb, hn::ShiftRight<1>(biased_e));
  }
-  // Encodes `num` bf16 values from `in_bf` to `out_packed`.
+  // Encodes `num` bf16 values from `in_bf` to `out_packed`. Their magnitude
  // must be at most 1.875.
  template <class DBF, HWY_IF_BF16_D(DBF)>
  static HWY_INLINE void Enc(DBF dbf, const hwy::bfloat16_t* HWY_RESTRICT in_bf,
                             size_t num, SfpStream* HWY_RESTRICT out_packed) {
@ -288,7 +289,8 @@ class SfpCodec {
    }
  }
-  // Encodes `num` f32 values from `in_f` to `packed`.
+  // Encodes `num` f32 values from `in_f` to `packed`. Their magnitude
  // must be at most 1.875.
  template <class DF, HWY_IF_F32_D(DF)>
  static HWY_INLINE void Enc(DF df, const float* HWY_RESTRICT in_f, size_t num,
                             SfpStream* HWY_RESTRICT out_packed) {