Add compression/ comments, especially on SFP range

PiperOrigin-RevId: 642238720
2024-06-11 05:47:07 -07:00 · 2024-06-11 05:47:07 -07:00 · a0e808e341
parent c557ad23a8
commit a0e808e341
3 changed files with 18 additions and 4 deletions
--- a/compression/compress-inl.h
+++ b/compression/compress-inl.h
@ -22,6 +22,7 @@
 #include <stdio.h>

 #include <array>
+#include <cmath>  // lroundf, only if COMPRESS_STATS

 #include "compression/blob_store.h"
 #include "compression/compress.h"
@ -55,6 +56,7 @@ namespace hn = hwy::HWY_NAMESPACE;
 template <typename T>  // primary, must specialize
 struct CompressTraits {};

+// Useful for backprop/, where weights are currently f32.
 template <>
 struct CompressTraits<float> {
  using MatT = float;
@ -267,11 +269,14 @@ struct CompressTraits<hwy::bfloat16_t> {
  }
 };

+// Switching floating point: 8-bit, 2..3 mantissa bits.
 template <>
 struct CompressTraits<SfpStream> {
  using MatT = SfpStream;
  static constexpr bool kSupportsEvenOdd = true;

+  // Callers are responsible for scaling `in` such that its magnitudes do not
+  // exceed 1.875. See CompressedArray::scale().
  template <class DF, HWY_IF_F32_D(DF)>
  static HWY_INLINE void Compress(DF df, const float* HWY_RESTRICT in,
                                  size_t num, CompressPerThread& tls,
@ -351,6 +356,7 @@ struct CompressTraits<SfpStream> {
  }
 };

+// Nonuniform quantization, 4.5 bits per element, two separate streams.
 template <>
 struct CompressTraits<NuqStream> {
  using MatT = NuqStream;
@ -525,12 +531,12 @@ HWY_INLINE float Dot(DF df, const CompressedArray<MatT, kCapacity>& compressed,
  return compressed.scale() * dot_result;
 }

-// Callback used by ForeachTensor.
+// Functor called for each tensor, which compresses and stores them along with
+// their scaling factors to BlobStore.
 class Compressor {
 public:
  explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {}

-  // Called for each tensor; compresses it and stores to the cache.
  template <typename MatT, size_t kCapacity>
  void operator()(const char* name, const float* weights,
                  CompressedArray<MatT, kCapacity>& compressed) {
--- a/compression/compress.h
+++ b/compression/compress.h
@ -79,6 +79,9 @@ class CompressedArray {
  MatT* data() { return data_.data(); }
  const MatT* data() const { return data_.data(); }

+  // Decoded elements should be multiplied by this to restore their original
+  // range. This is required because SfpStream can only encode a limited range
+  // of magnitudes.
  float scale() const { return scale_[0]; }
  void set_scale(float scale) { scale_[0] = scale; }

@ -90,6 +93,7 @@ class CompressedArray {

 private:
  std::array<MatT, NumCompressed()> data_;
+  // Blobs are at least kBlobAlign bytes anyway.
  float scale_[kBlobAlign / sizeof(float)];
 };

@ -172,6 +176,8 @@ hwy::uint128_t CacheKey(const char* name) {
  return MakeKey((std::string(1, prefix) + name).c_str());
 }

+// Functor called for each tensor, which loads them and their scaling factors
+// from BlobStore.
 class CacheLoader {
 public:
  explicit CacheLoader(const Path& blob_filename) {
--- a/compression/sfp-inl.h
+++ b/compression/sfp-inl.h
@ -260,7 +260,8 @@ class SfpCodec {
    hi = hn::BitwiseIfThenElse(k80, sign_in_msb, hn::ShiftRight<1>(biased_e));
  }

-  // Encodes `num` bf16 values from `in_bf` to `out_packed`.
+  // Encodes `num` bf16 values from `in_bf` to `out_packed`. Their magnitude
+  // must be at most 1.875.
  template <class DBF, HWY_IF_BF16_D(DBF)>
  static HWY_INLINE void Enc(DBF dbf, const hwy::bfloat16_t* HWY_RESTRICT in_bf,
                             size_t num, SfpStream* HWY_RESTRICT out_packed) {
@ -288,7 +289,8 @@ class SfpCodec {
    }
  }

-  // Encodes `num` f32 values from `in_f` to `packed`.
+  // Encodes `num` f32 values from `in_f` to `packed`. Their magnitude
+  // must be at most 1.875.
  template <class DF, HWY_IF_F32_D(DF)>
  static HWY_INLINE void Enc(DF df, const float* HWY_RESTRICT in_f, size_t num,
                             SfpStream* HWY_RESTRICT out_packed) {