From 3a5a6dbcadaf35588a8df9893281eae61367daa8 Mon Sep 17 00:00:00 2001 From: RangerUFO Date: Sun, 9 Feb 2025 00:13:25 +0800 Subject: [PATCH] Fix the link error when building `compress_weights` with Clang on macOS --- gemma/weights.cc | 58 ++++++++++++++++++------------------------------ gemma/weights.h | 25 ++++++++++++++++++++- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/gemma/weights.cc b/gemma/weights.cc index 161a65d..d281391 100644 --- a/gemma/weights.cc +++ b/gemma/weights.cc @@ -257,8 +257,8 @@ void ModelWeightsStorage::CreateForType(Type weight_type, } } -template -void LayerWeightsPtrs::Reshape(MatStorage* storage) { +template <> +void LayerWeightsPtrs::Reshape(MatStorage* storage) { if (attn_vec_einsum_w.data() == nullptr) return; const size_t model_dim = layer_config.model_dim; @@ -271,48 +271,34 @@ void LayerWeightsPtrs::Reshape(MatStorage* storage) { att_weights.SetPtr(*storage); } - if (hwy::IsSame()) { - const hwy::HWY_NAMESPACE::ScalableTag df; + const hwy::HWY_NAMESPACE::ScalableTag df; - hwy::AlignedFreeUniquePtr attn_vec_einsum_w_tmp = - hwy::AllocateAligned(model_dim * heads * qkv_dim); - hwy::AlignedFreeUniquePtr att_weights_tmp = - hwy::AllocateAligned(model_dim * heads * qkv_dim); + hwy::AlignedFreeUniquePtr attn_vec_einsum_w_tmp = + hwy::AllocateAligned(model_dim * heads * qkv_dim); + hwy::AlignedFreeUniquePtr att_weights_tmp = + hwy::AllocateAligned(model_dim * heads * qkv_dim); - HWY_NAMESPACE::DecompressAndZeroPad( - df, MakeSpan(attn_vec_einsum_w.data(), model_dim * heads * qkv_dim), 0, - attn_vec_einsum_w_tmp.get(), model_dim * heads * qkv_dim); - - for (size_t m = 0; m < model_dim; ++m) { - float* HWY_RESTRICT out_row = att_weights_tmp.get() + m * heads * qkv_dim; - for (size_t h = 0; h < heads; ++h) { - hwy::CopyBytes( - attn_vec_einsum_w_tmp.get() + h * model_dim * qkv_dim + m * qkv_dim, - out_row + h * qkv_dim, qkv_dim * sizeof(float)); - } - } - - CompressWorkingSet work; - hwy::ThreadPool pool(0); - - HWY_NAMESPACE::Compress( - att_weights_tmp.get(), model_dim * heads * qkv_dim, work, - MakeSpan(att_weights.data(), model_dim * heads * qkv_dim), - /*packed_ofs=*/0, pool); - - att_weights.set_scale(attn_vec_einsum_w.scale()); - - return; - } + HWY_NAMESPACE::DecompressAndZeroPad( + df, MakeSpan(attn_vec_einsum_w.data(), model_dim * heads * qkv_dim), 0, + attn_vec_einsum_w_tmp.get(), model_dim * heads * qkv_dim); for (size_t m = 0; m < model_dim; ++m) { - Weight* HWY_RESTRICT out_row = att_weights.data() + m * heads * qkv_dim; + float* HWY_RESTRICT out_row = att_weights_tmp.get() + m * heads * qkv_dim; for (size_t h = 0; h < heads; ++h) { hwy::CopyBytes( - attn_vec_einsum_w.data() + h * model_dim * qkv_dim + m * qkv_dim, - out_row + h * qkv_dim, qkv_dim * sizeof(Weight)); + attn_vec_einsum_w_tmp.get() + h * model_dim * qkv_dim + m * qkv_dim, + out_row + h * qkv_dim, qkv_dim * sizeof(float)); } } + + CompressWorkingSet work; + hwy::ThreadPool pool(0); + + HWY_NAMESPACE::Compress( + att_weights_tmp.get(), model_dim * heads * qkv_dim, work, + MakeSpan(att_weights.data(), model_dim * heads * qkv_dim), + /*packed_ofs=*/0, pool); + att_weights.set_scale(attn_vec_einsum_w.scale()); } diff --git a/gemma/weights.h b/gemma/weights.h index 3e13226..1a71777 100644 --- a/gemma/weights.h +++ b/gemma/weights.h @@ -179,7 +179,30 @@ struct LayerWeightsPtrs { // Initializes att_weights from attn_vec_einsum_w, hence this must be called // after loading weights via ForEachTensor. // TODO: update compression/convert_weights to bake this in. - void Reshape(MatStorage* storage); + void Reshape(MatStorage* storage) { + static_assert(!hwy::IsSame()); + + if (attn_vec_einsum_w.data() == nullptr) return; + + const size_t model_dim = layer_config.model_dim; + const size_t heads = layer_config.heads; + const size_t qkv_dim = layer_config.qkv_dim; + + // Reshape [kHeads, kModelDim, kQKVDim] to [kModelDim, kHeads * kQKVDim]. + if (storage != nullptr) { + storage->Allocate(); + att_weights.SetPtr(*storage); + } + for (size_t m = 0; m < model_dim; ++m) { + Weight* HWY_RESTRICT out_row = att_weights.data() + m * heads * qkv_dim; + for (size_t h = 0; h < heads; ++h) { + hwy::CopyBytes( + attn_vec_einsum_w.data() + h * model_dim * qkv_dim + m * qkv_dim, + out_row + h * qkv_dim, qkv_dim * sizeof(Weight)); + } + } + att_weights.set_scale(attn_vec_einsum_w.scale()); + } // Used by ForEachTensor for per-layer tensors. #define GEMMA_CALL_FUNC(member) \