From 4283cfef30d196b59351ebd697e2d4cb82ea2bc8 Mon Sep 17 00:00:00 2001 From: Sascha Rogmann Date: Thu, 5 Feb 2026 23:02:14 +0100 Subject: [PATCH] spec : add n_call_begin, n_call_accept --- common/speculative.cpp | 12 ++++++++---- docs/speculative.md | 7 ++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/common/speculative.cpp b/common/speculative.cpp index 4edfadc7b2..24b2cf9eb8 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -113,8 +113,10 @@ static bool common_speculative_are_compatible( struct common_speculative_state { const enum common_speculative_type type; - // TODO: add n_call_begin, n_call_accept - size_t n_call_draft = 0; // number of times this implementation was called. + size_t n_call_begin = 0; // number of times this implementation was called for refresh. + size_t n_call_draft = 0; // number of times this implementation was called for generation. + size_t n_call_accept = 0; // number of times this implementation was called for accumulation. + size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation. size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model. size_t n_gen_tokens = 0; // number of tokens generated by this implementation. @@ -950,6 +952,7 @@ void common_speculative_begin(common_speculative * spec, const llama_tokens & pr for (auto & impl : spec->impls) { common_time_meas tm(impl->t_begin_us, !impl->gen_perf); impl->begin(prompt); + impl->n_call_begin++; } } @@ -1002,6 +1005,7 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) { } impl->accept(n_accepted); + impl->n_call_accept++; } } @@ -1022,9 +1026,9 @@ void common_speculative_print_stats(const common_speculative * spec) { str_perf = ""; } - LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n", + LOG_INF("statistics %s: #calls(b,g,a) = %zu %zu %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n", common_speculative_type_to_str(impl->type).c_str(), - impl->n_call_draft, + impl->n_call_begin, impl->n_call_draft, impl->n_call_accept, impl->n_gen_drafts, impl->n_acc_drafts, impl->n_gen_tokens, diff --git a/docs/speculative.md b/docs/speculative.md index 31856c157a..29da332875 100644 --- a/docs/speculative.md +++ b/docs/speculative.md @@ -169,7 +169,12 @@ draft acceptance rate = 0.70312 ( 90 accepted / 128 generated) statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms ``` -- `#calls`: number of calls of this implementations +``` +statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts = 26, #gen tokens = 1248, #acc tokens = 968, dur(b,g,a) = 2.234, 1.427, 0.016 ms +``` + + +- `#calls(b,g,a)`: number of calls of begin (new prompt), generation and accumulation of this implementations - `#gen drafts`: number of drafts generated by this implementation - `#acc drafts`: number of drafts accepted (partially) by the main model - `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)