diff --git a/compression/fields.h b/compression/fields.h index 33f3a65..a17b48c 100644 --- a/compression/fields.h +++ b/compression/fields.h @@ -50,7 +50,7 @@ namespace gcpp { // fields: not required for the intended use case of `ModelConfig`. // - support any other languages than C++ and Python (for the exporter). -class IFields; // breaks circular dependency +struct IFields; // breaks circular dependency // Visitors are internal-only, but their base class is visible to user code // because their `IFields::VisitFields` calls `visitor.operator()`. diff --git a/compression/nuq-inl.h b/compression/nuq-inl.h index d12a630..c2d26a0 100644 --- a/compression/nuq-inl.h +++ b/compression/nuq-inl.h @@ -252,7 +252,6 @@ class NuqClustering { using VF = hn::Vec; using MF = hn::Mask; using VI = hn::Vec; - const VI k1 = hn::Set(di, 1); const size_t N = hn::Lanes(df); HWY_DASSERT(kGroupSize % N == 0); diff --git a/gemma/configs.cc b/gemma/configs.cc index 62fcb51..2c4f887 100644 --- a/gemma/configs.cc +++ b/gemma/configs.cc @@ -207,6 +207,7 @@ static LayerConfig LayerConfigGriffin2B(size_t model_dim) { config.kv_heads = 1; config.qkv_dim = 256; config.conv1d_width = 4; + HWY_DASSERT(config.conv1d_width <= kMaxConv1DWidth); config.ff_biases = true; config.softmax_attn_output_biases = true; config.optimized_gating = false; diff --git a/gemma/configs.h b/gemma/configs.h index 1ef56dd..42693e6 100644 --- a/gemma/configs.h +++ b/gemma/configs.h @@ -44,6 +44,7 @@ namespace gcpp { static constexpr size_t kSeqLen = GEMMA_MAX_SEQLEN; static constexpr size_t kTopK = GEMMA_TOPK; static constexpr size_t kVocabSize = 256000; +static constexpr size_t kMaxConv1DWidth = 4; using EmbedderInputT = BF16; diff --git a/gemma/gemma-inl.h b/gemma/gemma-inl.h index 666e9d5..01b3930 100644 --- a/gemma/gemma-inl.h +++ b/gemma/gemma-inl.h @@ -109,7 +109,7 @@ HWY_NOINLINE void GriffinRecurrent(size_t batch_start, size_t num_tokens, const size_t layer_offset = layer * model_dim * (conv_1d_width - 1); // cache[i] = input at time t-i. - float* HWY_RESTRICT cache[HWY_MAX(conv_1d_width, 1)]; + float* HWY_RESTRICT cache[kMaxConv1DWidth]; cache[0] = x; for (size_t i = 1; i < conv_1d_width; i++) { cache[i] = @@ -887,6 +887,7 @@ HWY_NOINLINE void VitTransformerLayer(size_t num_tokens, size_t layer, const size_t model_dim = activations.weights_config.model_dim; auto type = layer_weights->layer_config.type; HWY_DASSERT(type == LayerAttentionType::kVit); + (void)type; auto& x = activations.x; HWY_DASSERT(x.BatchSize() == num_tokens); diff --git a/gemma/weights.h b/gemma/weights.h index 5ddea0d..3e13226 100644 --- a/gemma/weights.h +++ b/gemma/weights.h @@ -189,7 +189,7 @@ struct LayerWeightsPtrs { } \ if (tensors[0]->Ptr() != nullptr || fet != ForEachType::kIgnoreNulls) { \ func(ptrs[0]->member.CacheName(layer_idx, sep, sep_index).c_str(), \ - hwy::Span(tensors, ptrs.size())); \ + hwy::Span(tensors.data(), ptrs.size())); \ } \ } @@ -197,7 +197,7 @@ struct LayerWeightsPtrs { static void ForEachTensor(const std::vector*>& ptrs, int layer_idx, ForEachType fet, Func func, char sep = ' ', int sep_index = -1) { - MatPtr* tensors[ptrs.size()]; + std::vector tensors(ptrs.size(), nullptr); auto type = ptrs[0]->layer_config.type; if (type == LayerAttentionType::kVit) { // MHA. @@ -449,7 +449,7 @@ struct ModelWeightsPtrs { ForEachType fet, Func func) { std::vector*> layers(ptrs.size()); std::vector*> vit_layers(ptrs.size()); - MatPtr* tensors[ptrs.size()]; + std::vector tensors(ptrs.size(), nullptr); // Variables used by GEMMA_CALL_FUNC. int layer_idx = -1; char sep = ' '; diff --git a/util/allocator.cc b/util/allocator.cc index 3ff2b5a..f87ed50 100644 --- a/util/allocator.cc +++ b/util/allocator.cc @@ -177,7 +177,7 @@ Allocator::PtrAndDeleter Allocator::AllocBytes(size_t bytes) { }; return PtrAndDeleter{p, Deleter(call_munmap, bytes)}; #elif HWY_OS_WIN - const auto call_free = [](void* ptr, void*) { _aligned_free(ptr); }; + const auto call_free = [](void* ptr, size_t) { _aligned_free(ptr); }; const size_t alignment = HWY_MAX(vector_bytes_, line_bytes_); return PtrAndDeleter{_aligned_malloc(bytes, alignment), Deleter(call_free, bytes)};