Fix mismatch between blob_store and compress interfaces (bytes)

PiperOrigin-RevId: 673027268
2024-09-10 10:58:27 -07:00 · 2024-09-10 10:58:27 -07:00 · 13a9f76f64
parent 8c0a8834c1
commit 13a9f76f64
5 changed files with 61 additions and 52 deletions
--- a/compression/blob_store.cc
+++ b/compression/blob_store.cc
@ -49,7 +49,7 @@ namespace {
 void EnqueueChunkRequests(uint64_t offset, uint64_t size, uint8_t* data,
                          std::vector<BlobIO>& requests) {
  // Split into chunks for load-balancing even if blob sizes vary.
-  constexpr size_t kChunkSize = 4 * 1024 * 1024;
+  constexpr size_t kChunkSize = 4 * 1024 * 1024;  // bytes

  // Split into whole chunks and possibly one remainder.
  uint64_t pos = 0;
--- a/compression/blob_store.h
+++ b/compression/blob_store.h
@ -48,12 +48,13 @@ using BlobError = int;
 // 128), which can help performance.
 static constexpr size_t kBlobAlign = 256;

+// One I/O request, serviced by threads in a pool.
 struct BlobIO {
  BlobIO(uint64_t offset, size_t size, void* data, uint64_t padding)
      : offset(offset), size(size), data(data), padding(padding) {}

  uint64_t offset;
-  size_t size;
+  size_t size;  // bytes
  void* data;
  uint64_t padding;
 };
@ -66,7 +67,8 @@ class BlobReader {
  // Opens `filename` and reads its header.
  BlobError Open(const Path& filename);

-  // Enqueues read requests if `key` is found and its size matches `size`.
+  // Enqueues read requests if `key` is found and its size matches `size`, which
+  // is in units of bytes.
  BlobError Enqueue(hwy::uint128_t key, void* data, size_t size);

  // Reads all enqueued requests.
@ -80,6 +82,7 @@ class BlobReader {

 class BlobWriter {
 public:
+  // `size` is in bytes.
  void Add(hwy::uint128_t key, const void* data, size_t size) {
    keys_.push_back(key);
    blobs_.emplace_back(static_cast<const uint8_t*>(data), size);
--- a/compression/compress-inl.h
+++ b/compression/compress-inl.h
@ -624,21 +624,22 @@ class Compressor {
  explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {}

  template <typename Packed, size_t kCapacity>
-  void operator()(const char* name, const float* weights,
+  void operator()(const char* name, const float* HWY_RESTRICT weights,
                  CompressedArray<Packed, kCapacity>& compressed) {
-    Insert(name, weights, kCapacity, work_, compressed.CompressedSize(),
-           compressed.data(), 0, pool_);
+    Insert(name, weights, kCapacity, work_, compressed.GetSpan(),
+           /*packed_ofs=*/0, pool_);
  }

  template <typename Packed>
-  void Insert(const char* name, const float* weights, size_t weights_count,
-              CompressWorkingSet& work, size_t out_capacity, Packed* packed,
-              size_t packed_ofs, hwy::ThreadPool& pool) {
-    fprintf(stderr, "Regenerating %s (%zuM), please wait\n", name,
-            weights_count / (1000 * 1000));
-    Compress(weights, weights_count, work_,
-             PackedSpan<Packed>{packed, weights_count}, 0, pool_);
-    writer_.Add(CacheKey<Packed>(name), packed, out_capacity);
+  void Insert(const char* name, const float* HWY_RESTRICT weights,
+              size_t num_weights, CompressWorkingSet& work,
+              const PackedSpan<Packed>& packed, size_t packed_ofs,
+              hwy::ThreadPool& pool) {
+    fprintf(stderr, "Compressing %s (%zuM), please wait\n", name,
+            num_weights / (1000 * 1000));
+    Compress(weights, num_weights, work_, packed, packed_ofs, pool_);
+    const size_t num_bytes = packed.num * sizeof(Packed);
+    writer_.Add(CacheKey<Packed>(name), packed.ptr, num_bytes);
  }

  void AddScales(const float* scales, size_t len) {
--- a/compression/compress.h
+++ b/compression/compress.h
@ -43,20 +43,20 @@ namespace gcpp {
 // Compressed representation of floating-point elements. The array length may
 // differ from the number of elements. Associated operations such as Dot are
 // implemented in SIMD code and are thus non-member functions.
-template <typename MatT, size_t kCapacity>
+template <typename Packed, size_t kCapacity>
 class CompressedArray {
 public:
-  using value_type = MatT;
+  using value_type = Packed;

  // Note that whenever you access data(), you have to consider a scale() that
  // may be different from 1.0f.
-  MatT* data() { return data_.data(); }
-  const MatT* data() const { return data_.data(); }
+  Packed* data() { return data_.data(); }
+  const Packed* data() const { return data_.data(); }
  // The const accessor data_scale1() asserts (!) that the scale is 1.0f, so
  // calling it means "I am sure the scale is 1 and therefore ignore the scale".
  // A scale of 0 indicates that the scale has likely never been set, so is
  // "implicitly 1".
-  const MatT* data_scale1() const {
+  const Packed* data_scale1() const {
    HWY_ASSERT(scale() == 1.f || scale() == 0.f);
    return data_.data();
  }
@ -67,14 +67,17 @@ class CompressedArray {
  float scale() const { return scale_[0]; }
  void set_scale(float scale) { scale_[0] = scale; }

-  constexpr size_t size() const { return kCapacity; }
+  constexpr size_t NumElements() const { return kCapacity; }

-  constexpr size_t CompressedSize() const {
-    return data_.size() * sizeof(MatT);
+  // Returns total number of packed elements for `BlobReader::Enqueue` and
+  // `Compress`. This differs from `NumElements` for `Packed=NuqStream`.
+  PackedSpan<Packed> GetSpan() { return MakeSpan(data(), data_.size()); }
+  PackedSpan<const Packed> GetSpan() const {
+    return MakeSpan(data(), data_.size());
  }

 private:
-  std::array<MatT, CompressedArrayElements<MatT>(kCapacity)> data_;
+  std::array<Packed, CompressedArrayElements<Packed>(kCapacity)> data_;
  // Blobs are at least kBlobAlign bytes anyway.
  float scale_[kBlobAlign / sizeof(float)];
 };
@ -146,13 +149,13 @@ struct CompressWorkingSet {
 // Returns key for the given tensor name. Also encodes the type, so that
 // changing the representation automatically invalidates prior cached files
 // (the new blob name will not be found).
-template <typename MatT>
+template <typename Packed>
 hwy::uint128_t CacheKey(const char* name) {
  // Already used/retired: s, S, n, 1
-  const char prefix = hwy::IsSame<MatT, float>()       ? 'F'
-                      : hwy::IsSame<MatT, BF16>()      ? 'B'
-                      : hwy::IsSame<MatT, SfpStream>() ? '$'
-                      : hwy::IsSame<MatT, NuqStream>() ? '2'
+  const char prefix = hwy::IsSame<Packed, float>()       ? 'F'
+                      : hwy::IsSame<Packed, BF16>()      ? 'B'
+                      : hwy::IsSame<Packed, SfpStream>() ? '$'
+                      : hwy::IsSame<Packed, NuqStream>() ? '2'
                                                         : '?';

  return MakeKey((std::string(1, prefix) + name).c_str());
@ -173,17 +176,18 @@ class CacheLoader {
  }

  // Called for each tensor, enqueues read requests.
-  template <typename MatT, size_t kCapacity>
+  template <typename Packed, size_t kCapacity>
  void operator()(const char* name, const float* null,
-                  CompressedArray<MatT, kCapacity>& compressed) {
+                  CompressedArray<Packed, kCapacity>& compressed) {
    HWY_DASSERT(null == nullptr);

    // Skip if reader_ is invalid or any load failed: we will regenerate
    // everything because it's rare to update only a few tensors.
    if (err_ != 0) return;

-    err_ = reader_.Enqueue(CacheKey<MatT>(name), compressed.data(),
-                           compressed.CompressedSize());
+    const PackedSpan<Packed> span = compressed.GetSpan();
+    const size_t num_bytes = span.num * sizeof(Packed);
+    err_ = reader_.Enqueue(CacheKey<Packed>(name), span.ptr, num_bytes);
    compressed.set_scale(1.0f);
    if (err_ != 0) {
      fprintf(stderr, "Failed to read cache %s (error %d)\n", name, err_);
--- a/compression/python/compression_clif_aux.cc
+++ b/compression/python/compression_clif_aux.cc
@ -4,6 +4,8 @@
 #include <vector>

 #include "compression/compress.h"
+#include "compression/shared.h"
+#include "hwy/aligned_allocator.h"

 #undef HWY_TARGET_INCLUDE
 #define HWY_TARGET_INCLUDE \
@ -18,6 +20,7 @@
 #ifndef GEMMA_ONCE
 #define GEMMA_ONCE

+#include "third_party/absl/types/span.h"
 #include "compression/io.h"
 #include "hwy/base.h"
 #include "hwy/contrib/thread_pool/thread_pool.h"
@ -47,33 +50,31 @@ namespace gcpp {
 namespace HWY_NAMESPACE {

 class SbsWriterImpl : public WriterInterface {
+  template <typename Packed>
+  hwy::AlignedFreeUniquePtr<Packed[]> AllocateAndCompress(
+      const std::string& name, absl::Span<const float> weights) {
+    const size_t num_packed = CompressedArrayElements<Packed>(weights.size());
+    auto packed = hwy::AllocateAligned<Packed>(num_packed);
+    PackedSpan<Packed> span = MakeSpan(packed.get(), num_packed);
+    compressor_.Insert(name.c_str(), weights.data(), weights.size(),
+                       working_set_, span, /*packed_ofs=*/0, pool_);
+    return packed;
+  }
+
 public:
  SbsWriterImpl() : pool_(0), compressor_(pool_) {}

  void Insert(std::string name, absl::Span<const float> weights) override {
-    const size_t out_size = CompressedArrayElements<SfpStream>(weights.size());
-    sfp_streams_.push_back(std::vector<SfpStream>(out_size));
-    compressor_.Insert<SfpStream>(name.data(), weights.data(), weights.size(),
-                                  working_set_, out_size,
-                                  sfp_streams_.back().data(), 0, pool_);
+    sfp_streams_.push_back(AllocateAndCompress<SfpStream>(name, weights));
  }

  void InsertNUQ(std::string name, absl::Span<const float> weights) override {
-    const size_t out_size = CompressedArrayElements<NuqStream>(weights.size());
-    nuq_streams_.push_back(std::vector<NuqStream>(out_size));
-    compressor_.Insert<NuqStream>(name.data(), weights.data(), weights.size(),
-                                  working_set_, out_size,
-                                  nuq_streams_.back().data(), 0, pool_);
+    nuq_streams_.push_back(AllocateAndCompress<NuqStream>(name, weights));
  }

  void InsertBfloat16(std::string name,
                      absl::Span<const float> weights) override {
-    const size_t out_size =
-        CompressedArrayElements<hwy::bfloat16_t>(weights.size());
-    bf16_streams_.push_back(std::vector<hwy::bfloat16_t>(out_size));
-    compressor_.Insert<hwy::bfloat16_t>(name.data(), weights.data(),
-                                        weights.size(), working_set_, out_size,
-                                        bf16_streams_.back().data(), 0, pool_);
+    bf16_streams_.push_back(AllocateAndCompress<BF16>(name, weights));
  }

  void AddScales(const std::vector<float>& scales) override {
@ -89,9 +90,9 @@ class SbsWriterImpl : public WriterInterface {
  hwy::ThreadPool pool_;
  Compressor compressor_;
  CompressWorkingSet working_set_;
-  std::vector<std::vector<SfpStream>> sfp_streams_;
-  std::vector<std::vector<NuqStream>> nuq_streams_;
-  std::vector<std::vector<hwy::bfloat16_t>> bf16_streams_;
+  std::vector<hwy::AlignedFreeUniquePtr<SfpStream[]>> sfp_streams_;
+  std::vector<hwy::AlignedFreeUniquePtr<NuqStream[]>> nuq_streams_;
+  std::vector<hwy::AlignedFreeUniquePtr<BF16[]>> bf16_streams_;
  std::vector<float> scales_;
 };