diff --git a/BUILD.bazel b/BUILD.bazel index 0e85a26..1a67f65 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -528,6 +528,7 @@ cc_test( ":kv_cache", ":threading_context", "//testing/base/public:gunit_main", + "@highway//:hwy", ], ) diff --git a/gemma/kv_cache.cc b/gemma/kv_cache.cc index 8948644..2fe6885 100644 --- a/gemma/kv_cache.cc +++ b/gemma/kv_cache.cc @@ -58,9 +58,7 @@ std::vector ToKVCachePtrs(const hwy::Span& kv_caches) { std::vector ptrs; ptrs.reserve(kv_caches.size()); for (size_t i = 0; i < kv_caches.size(); ++i) { - ptrs.push_back(KVCachePtr{ - .kv_cache = kv_caches[i].kv_cache, - }); + ptrs.push_back(kv_caches[i].ToPtr()); } return ptrs; } diff --git a/gemma/kv_cache.h b/gemma/kv_cache.h index 1d78f7d..66f94f2 100644 --- a/gemma/kv_cache.h +++ b/gemma/kv_cache.h @@ -53,7 +53,11 @@ struct KVCache { MatStorageT kv_cache; // [seq_len, layers * kv_heads * qkv_dim * 2] - KVCachePtr ToPtr() { return KVCachePtr{.kv_cache = kv_cache}; } + KVCachePtr ToPtr() { + return KVCachePtr{ + .kv_cache = kv_cache, + }; + } private: const Allocator& allocator_; diff --git a/gemma/kv_cache_test.cc b/gemma/kv_cache_test.cc index 2849cfe..bc1f8bc 100644 --- a/gemma/kv_cache_test.cc +++ b/gemma/kv_cache_test.cc @@ -1,11 +1,42 @@ #include "gemma/kv_cache.h" +#include + #include "gtest/gtest.h" #include "gemma/configs.h" #include "gemma/gemma_args.h" #include "util/threading_context.h" +#include "hwy/aligned_allocator.h" namespace gcpp { namespace { +TEST(KVCacheTest, KVCacheToPtrs) { + ModelConfig model_config; + model_config.max_seq_len = 1024; + model_config.num_layers = 2; + for (int i = 0; i < model_config.num_layers; ++i) { + model_config.layer_configs.push_back(LayerConfig()); + model_config.layer_configs.back().kv_heads = 4; + model_config.layer_configs.back().qkv_dim = 256; + } + InferenceArgs inference_args; + inference_args.seq_len = 1024; + RuntimeConfig runtime_config; + runtime_config.attention_impl = AttentionImpl::kFlash; + ThreadingArgs threading_args; + ThreadingContext ctx(threading_args); + std::vector caches; + caches.emplace_back(model_config, inference_args, runtime_config, + ctx.allocator); + inference_args.seq_len = 512; + caches.emplace_back(model_config, inference_args, runtime_config, + ctx.allocator); + + std::vector ptrs = ToKVCachePtrs({caches.data(), caches.size()}); + ASSERT_EQ(ptrs.size(), 2); + EXPECT_EQ(ptrs[0].kv_cache.Row(0), caches[0].kv_cache.Row(0)); + EXPECT_EQ(ptrs[1].kv_cache.Row(0), caches[1].kv_cache.Row(0)); +} + } // namespace } // namespace gcpp