mirror of https://github.com/google/gemma.cpp.git
Clean up toPtrs to delegate to toPtr
PiperOrigin-RevId: 840214969
This commit is contained in:
parent
2b4436beb6
commit
9348048885
|
|
@ -528,6 +528,7 @@ cc_test(
|
|||
":kv_cache",
|
||||
":threading_context",
|
||||
"//testing/base/public:gunit_main",
|
||||
"@highway//:hwy",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -58,9 +58,7 @@ std::vector<KVCachePtr> ToKVCachePtrs(const hwy::Span<KVCache>& kv_caches) {
|
|||
std::vector<KVCachePtr> ptrs;
|
||||
ptrs.reserve(kv_caches.size());
|
||||
for (size_t i = 0; i < kv_caches.size(); ++i) {
|
||||
ptrs.push_back(KVCachePtr{
|
||||
.kv_cache = kv_caches[i].kv_cache,
|
||||
});
|
||||
ptrs.push_back(kv_caches[i].ToPtr());
|
||||
}
|
||||
return ptrs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,11 @@ struct KVCache {
|
|||
|
||||
MatStorageT<KV_t> kv_cache; // [seq_len, layers * kv_heads * qkv_dim * 2]
|
||||
|
||||
KVCachePtr ToPtr() { return KVCachePtr{.kv_cache = kv_cache}; }
|
||||
KVCachePtr ToPtr() {
|
||||
return KVCachePtr{
|
||||
.kv_cache = kv_cache,
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
const Allocator& allocator_;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,42 @@
|
|||
#include "gemma/kv_cache.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "gemma/configs.h"
|
||||
#include "gemma/gemma_args.h"
|
||||
#include "util/threading_context.h"
|
||||
#include "hwy/aligned_allocator.h"
|
||||
namespace gcpp {
|
||||
namespace {
|
||||
|
||||
TEST(KVCacheTest, KVCacheToPtrs) {
|
||||
ModelConfig model_config;
|
||||
model_config.max_seq_len = 1024;
|
||||
model_config.num_layers = 2;
|
||||
for (int i = 0; i < model_config.num_layers; ++i) {
|
||||
model_config.layer_configs.push_back(LayerConfig());
|
||||
model_config.layer_configs.back().kv_heads = 4;
|
||||
model_config.layer_configs.back().qkv_dim = 256;
|
||||
}
|
||||
InferenceArgs inference_args;
|
||||
inference_args.seq_len = 1024;
|
||||
RuntimeConfig runtime_config;
|
||||
runtime_config.attention_impl = AttentionImpl::kFlash;
|
||||
ThreadingArgs threading_args;
|
||||
ThreadingContext ctx(threading_args);
|
||||
std::vector<KVCache> caches;
|
||||
caches.emplace_back(model_config, inference_args, runtime_config,
|
||||
ctx.allocator);
|
||||
inference_args.seq_len = 512;
|
||||
caches.emplace_back(model_config, inference_args, runtime_config,
|
||||
ctx.allocator);
|
||||
|
||||
std::vector<KVCachePtr> ptrs = ToKVCachePtrs({caches.data(), caches.size()});
|
||||
ASSERT_EQ(ptrs.size(), 2);
|
||||
EXPECT_EQ(ptrs[0].kv_cache.Row(0), caches[0].kv_cache.Row(0));
|
||||
EXPECT_EQ(ptrs[1].kv_cache.Row(0), caches[1].kv_cache.Row(0));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace gcpp
|
||||
|
|
|
|||
Loading…
Reference in New Issue