diff --git a/gemma/flash_attention.cc b/gemma/flash_attention.cc index 55d0522..ce12a13 100644 --- a/gemma/flash_attention.cc +++ b/gemma/flash_attention.cc @@ -17,7 +17,9 @@ #include #include +#include #include +#include #include #include "compression/types.h" // GEMMA_DISABLED_TARGETS diff --git a/gemma/flash_attention.h b/gemma/flash_attention.h index 236c7dc..dd5d42a 100644 --- a/gemma/flash_attention.h +++ b/gemma/flash_attention.h @@ -60,6 +60,7 @@ namespace gcpp { size_t layer_idx, const MatPtr& query_norm_scale, \ AttentionActivationsPtrs& activations, QBatch& qbatch, \ ThreadingContext& ctx); \ + \ /* NOLINTNEXTLINE(google-readability-namespace-comments) */ \ } // namespace NAMESPACE diff --git a/gemma/kv_cache.cc b/gemma/kv_cache.cc index ded8df5..8948644 100644 --- a/gemma/kv_cache.cc +++ b/gemma/kv_cache.cc @@ -51,7 +51,6 @@ KVCache KVCache::Copy() { KVCache copy(kv_cache.Extents(), allocator_); CopyMat(kv_cache, copy.kv_cache); - return copy; } @@ -59,7 +58,9 @@ std::vector ToKVCachePtrs(const hwy::Span& kv_caches) { std::vector ptrs; ptrs.reserve(kv_caches.size()); for (size_t i = 0; i < kv_caches.size(); ++i) { - ptrs.push_back(KVCachePtr{.kv_cache = kv_caches[i].kv_cache}); + ptrs.push_back(KVCachePtr{ + .kv_cache = kv_caches[i].kv_cache, + }); } return ptrs; } diff --git a/gemma/kv_cache.h b/gemma/kv_cache.h index 37a4d0e..ebd7235 100644 --- a/gemma/kv_cache.h +++ b/gemma/kv_cache.h @@ -17,9 +17,11 @@ #define THIRD_PARTY_GEMMA_CPP_GEMMA_KV_CACHE_H_ #include + +#include #include -#include "gemma/configs.h" // ModelConfig +#include "gemma/configs.h" // ModelConfig #include "gemma/gemma_args.h" // InferenceArgs #include "util/basics.h" // BF16 #include "util/mat.h" @@ -31,12 +33,13 @@ using KV_t = float; struct KVCache { KVCache(const ModelConfig& config, const InferenceArgs& inference_args, const Allocator& allocator); - // Returns a deep copy of the KVCache. Use explicit function instead of // copy ctor to make the cost explicit. KVCache Copy(); - size_t SeqLen() const { return kv_cache.Rows(); } + size_t SeqLen() const { + return kv_cache.Rows(); + } MatStorageT kv_cache; // [seq_len, layers * kv_heads * qkv_dim * 2] @@ -49,7 +52,9 @@ struct KVCache { // A non-owning view of a KVCache. struct KVCachePtr { - size_t SeqLen() const { return kv_cache.Rows(); } + size_t SeqLen() const { + return kv_cache.Rows(); + } MatPtrT kv_cache; }; diff --git a/ops/ops-inl.h b/ops/ops-inl.h index 6eac06f..4291733 100644 --- a/ops/ops-inl.h +++ b/ops/ops-inl.h @@ -25,6 +25,7 @@ #include #include #include // std::enable_if_t +#include #include #include "ops/matmul.h"