From 2c5706f15922a1063d6b4820d755857078370543 Mon Sep 17 00:00:00 2001 From: Andrey Mikhaylov Date: Fri, 12 Apr 2024 11:29:29 +0000 Subject: [PATCH] Add comments regarding layers output usage. --- .vscode/settings.json | 6 ++++++ gemma/gemma.h | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7330a09 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "cmake.configureOnOpen": false, + "files.associations": { + "array": "cpp" + } +} \ No newline at end of file diff --git a/gemma/gemma.h b/gemma/gemma.h index 7128440..4120bfa 100644 --- a/gemma/gemma.h +++ b/gemma/gemma.h @@ -32,7 +32,13 @@ namespace gcpp { using GemmaWeightT = GEMMA_WEIGHT_T; using EmbedderInputT = hwy::bfloat16_t; -using LayersOutputT = std::function; +// Will be called for layers output with: +// - position in the tokens sequence +// - name of the data, p.ex. "tokens", "block.1", "final_norm" +// - ponter to the data array +// - size of the data array +using LayersOutputT = + std::function; constexpr size_t kPrefillBatchSize = 16; constexpr bool kSystemPrompt = false; @@ -93,6 +99,8 @@ KVCache CreateKVCache(size_t size_cache_pos, size_t seq_len, using StreamFunc = std::function; using AcceptFunc = std::function; +// layers_output is optional; if set - it will be called with the activations +// output after applying each layer. void GenerateGemma(Gemma& gemma, size_t max_tokens, size_t max_generated_tokens, float temperature, const std::vector& prompt, size_t start_pos, KVCache& kv_cache, hwy::ThreadPool& pool,