mirror of https://github.com/google/gemma.cpp.git
Add compression/ comments, especially on SFP range
PiperOrigin-RevId: 642238720
This commit is contained in:
parent
c557ad23a8
commit
a0e808e341
|
|
@ -22,6 +22,7 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include <array>
|
||||
#include <cmath> // lroundf, only if COMPRESS_STATS
|
||||
|
||||
#include "compression/blob_store.h"
|
||||
#include "compression/compress.h"
|
||||
|
|
@ -55,6 +56,7 @@ namespace hn = hwy::HWY_NAMESPACE;
|
|||
template <typename T> // primary, must specialize
|
||||
struct CompressTraits {};
|
||||
|
||||
// Useful for backprop/, where weights are currently f32.
|
||||
template <>
|
||||
struct CompressTraits<float> {
|
||||
using MatT = float;
|
||||
|
|
@ -267,11 +269,14 @@ struct CompressTraits<hwy::bfloat16_t> {
|
|||
}
|
||||
};
|
||||
|
||||
// Switching floating point: 8-bit, 2..3 mantissa bits.
|
||||
template <>
|
||||
struct CompressTraits<SfpStream> {
|
||||
using MatT = SfpStream;
|
||||
static constexpr bool kSupportsEvenOdd = true;
|
||||
|
||||
// Callers are responsible for scaling `in` such that its magnitudes do not
|
||||
// exceed 1.875. See CompressedArray::scale().
|
||||
template <class DF, HWY_IF_F32_D(DF)>
|
||||
static HWY_INLINE void Compress(DF df, const float* HWY_RESTRICT in,
|
||||
size_t num, CompressPerThread& tls,
|
||||
|
|
@ -351,6 +356,7 @@ struct CompressTraits<SfpStream> {
|
|||
}
|
||||
};
|
||||
|
||||
// Nonuniform quantization, 4.5 bits per element, two separate streams.
|
||||
template <>
|
||||
struct CompressTraits<NuqStream> {
|
||||
using MatT = NuqStream;
|
||||
|
|
@ -525,12 +531,12 @@ HWY_INLINE float Dot(DF df, const CompressedArray<MatT, kCapacity>& compressed,
|
|||
return compressed.scale() * dot_result;
|
||||
}
|
||||
|
||||
// Callback used by ForeachTensor.
|
||||
// Functor called for each tensor, which compresses and stores them along with
|
||||
// their scaling factors to BlobStore.
|
||||
class Compressor {
|
||||
public:
|
||||
explicit Compressor(hwy::ThreadPool& pool) : pool_(pool) {}
|
||||
|
||||
// Called for each tensor; compresses it and stores to the cache.
|
||||
template <typename MatT, size_t kCapacity>
|
||||
void operator()(const char* name, const float* weights,
|
||||
CompressedArray<MatT, kCapacity>& compressed) {
|
||||
|
|
|
|||
|
|
@ -79,6 +79,9 @@ class CompressedArray {
|
|||
MatT* data() { return data_.data(); }
|
||||
const MatT* data() const { return data_.data(); }
|
||||
|
||||
// Decoded elements should be multiplied by this to restore their original
|
||||
// range. This is required because SfpStream can only encode a limited range
|
||||
// of magnitudes.
|
||||
float scale() const { return scale_[0]; }
|
||||
void set_scale(float scale) { scale_[0] = scale; }
|
||||
|
||||
|
|
@ -90,6 +93,7 @@ class CompressedArray {
|
|||
|
||||
private:
|
||||
std::array<MatT, NumCompressed()> data_;
|
||||
// Blobs are at least kBlobAlign bytes anyway.
|
||||
float scale_[kBlobAlign / sizeof(float)];
|
||||
};
|
||||
|
||||
|
|
@ -172,6 +176,8 @@ hwy::uint128_t CacheKey(const char* name) {
|
|||
return MakeKey((std::string(1, prefix) + name).c_str());
|
||||
}
|
||||
|
||||
// Functor called for each tensor, which loads them and their scaling factors
|
||||
// from BlobStore.
|
||||
class CacheLoader {
|
||||
public:
|
||||
explicit CacheLoader(const Path& blob_filename) {
|
||||
|
|
|
|||
|
|
@ -260,7 +260,8 @@ class SfpCodec {
|
|||
hi = hn::BitwiseIfThenElse(k80, sign_in_msb, hn::ShiftRight<1>(biased_e));
|
||||
}
|
||||
|
||||
// Encodes `num` bf16 values from `in_bf` to `out_packed`.
|
||||
// Encodes `num` bf16 values from `in_bf` to `out_packed`. Their magnitude
|
||||
// must be at most 1.875.
|
||||
template <class DBF, HWY_IF_BF16_D(DBF)>
|
||||
static HWY_INLINE void Enc(DBF dbf, const hwy::bfloat16_t* HWY_RESTRICT in_bf,
|
||||
size_t num, SfpStream* HWY_RESTRICT out_packed) {
|
||||
|
|
@ -288,7 +289,8 @@ class SfpCodec {
|
|||
}
|
||||
}
|
||||
|
||||
// Encodes `num` f32 values from `in_f` to `packed`.
|
||||
// Encodes `num` f32 values from `in_f` to `packed`. Their magnitude
|
||||
// must be at most 1.875.
|
||||
template <class DF, HWY_IF_F32_D(DF)>
|
||||
static HWY_INLINE void Enc(DF df, const float* HWY_RESTRICT in_f, size_t num,
|
||||
SfpStream* HWY_RESTRICT out_packed) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue