Clean up toPtrs to delegate to toPtr

PiperOrigin-RevId: 840214969
This commit is contained in:
Martin Stolle 2025-12-04 06:21:35 -08:00 committed by Copybara-Service
parent 2b4436beb6
commit 9348048885
4 changed files with 38 additions and 4 deletions

View File

@ -528,6 +528,7 @@ cc_test(
":kv_cache", ":kv_cache",
":threading_context", ":threading_context",
"//testing/base/public:gunit_main", "//testing/base/public:gunit_main",
"@highway//:hwy",
], ],
) )

View File

@ -58,9 +58,7 @@ std::vector<KVCachePtr> ToKVCachePtrs(const hwy::Span<KVCache>& kv_caches) {
std::vector<KVCachePtr> ptrs; std::vector<KVCachePtr> ptrs;
ptrs.reserve(kv_caches.size()); ptrs.reserve(kv_caches.size());
for (size_t i = 0; i < kv_caches.size(); ++i) { for (size_t i = 0; i < kv_caches.size(); ++i) {
ptrs.push_back(KVCachePtr{ ptrs.push_back(kv_caches[i].ToPtr());
.kv_cache = kv_caches[i].kv_cache,
});
} }
return ptrs; return ptrs;
} }

View File

@ -53,7 +53,11 @@ struct KVCache {
MatStorageT<KV_t> kv_cache; // [seq_len, layers * kv_heads * qkv_dim * 2] MatStorageT<KV_t> kv_cache; // [seq_len, layers * kv_heads * qkv_dim * 2]
KVCachePtr ToPtr() { return KVCachePtr{.kv_cache = kv_cache}; } KVCachePtr ToPtr() {
return KVCachePtr{
.kv_cache = kv_cache,
};
}
private: private:
const Allocator& allocator_; const Allocator& allocator_;

View File

@ -1,11 +1,42 @@
#include "gemma/kv_cache.h" #include "gemma/kv_cache.h"
#include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "gemma/configs.h" #include "gemma/configs.h"
#include "gemma/gemma_args.h" #include "gemma/gemma_args.h"
#include "util/threading_context.h" #include "util/threading_context.h"
#include "hwy/aligned_allocator.h"
namespace gcpp { namespace gcpp {
namespace { namespace {
TEST(KVCacheTest, KVCacheToPtrs) {
ModelConfig model_config;
model_config.max_seq_len = 1024;
model_config.num_layers = 2;
for (int i = 0; i < model_config.num_layers; ++i) {
model_config.layer_configs.push_back(LayerConfig());
model_config.layer_configs.back().kv_heads = 4;
model_config.layer_configs.back().qkv_dim = 256;
}
InferenceArgs inference_args;
inference_args.seq_len = 1024;
RuntimeConfig runtime_config;
runtime_config.attention_impl = AttentionImpl::kFlash;
ThreadingArgs threading_args;
ThreadingContext ctx(threading_args);
std::vector<KVCache> caches;
caches.emplace_back(model_config, inference_args, runtime_config,
ctx.allocator);
inference_args.seq_len = 512;
caches.emplace_back(model_config, inference_args, runtime_config,
ctx.allocator);
std::vector<KVCachePtr> ptrs = ToKVCachePtrs({caches.data(), caches.size()});
ASSERT_EQ(ptrs.size(), 2);
EXPECT_EQ(ptrs[0].kv_cache.Row(0), caches[0].kv_cache.Row(0));
EXPECT_EQ(ptrs[1].kv_cache.Row(0), caches[1].kv_cache.Row(0));
}
} // namespace } // namespace
} // namespace gcpp } // namespace gcpp