From 4283cfef30d196b59351ebd697e2d4cb82ea2bc8 Mon Sep 17 00:00:00 2001
From: Sascha Rogmann <github@rogmann.org>
Date: Thu, 5 Feb 2026 23:02:14 +0100
Subject: [PATCH] spec : add n_call_begin, n_call_accept

---
 common/speculative.cpp | 12 ++++++++----
 docs/speculative.md    |  7 ++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/common/speculative.cpp b/common/speculative.cpp
index 4edfadc7b2..24b2cf9eb8 100644
--- a/common/speculative.cpp
+++ b/common/speculative.cpp
@@ -113,8 +113,10 @@ static bool common_speculative_are_compatible(
 struct common_speculative_state {
     const enum common_speculative_type type;
 
-    // TODO: add n_call_begin, n_call_accept
-    size_t n_call_draft = 0; // number of times this implementation was called.
+    size_t n_call_begin  = 0; // number of times this implementation was called for refresh.
+    size_t n_call_draft  = 0; // number of times this implementation was called for generation.
+    size_t n_call_accept = 0; // number of times this implementation was called for accumulation.
+
     size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation.
     size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model.
     size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
@@ -950,6 +952,7 @@ void common_speculative_begin(common_speculative * spec, const llama_tokens & pr
     for (auto & impl : spec->impls) {
         common_time_meas tm(impl->t_begin_us, !impl->gen_perf);
         impl->begin(prompt);
+        impl->n_call_begin++;
     }
 }
 
@@ -1002,6 +1005,7 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) {
         }
 
         impl->accept(n_accepted);
+        impl->n_call_accept++;
     }
 }
 
@@ -1022,9 +1026,9 @@ void common_speculative_print_stats(const common_speculative * spec) {
             str_perf = "";
         }
 
-        LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
+        LOG_INF("statistics %s: #calls(b,g,a) = %zu %zu %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
                 common_speculative_type_to_str(impl->type).c_str(),
-                impl->n_call_draft,
+                impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
                 impl->n_gen_drafts,
                 impl->n_acc_drafts,
                 impl->n_gen_tokens,
diff --git a/docs/speculative.md b/docs/speculative.md
index 31856c157a..29da332875 100644
--- a/docs/speculative.md
+++ b/docs/speculative.md
@@ -169,7 +169,12 @@ draft acceptance rate = 0.70312 (   90 accepted /   128 generated)
 statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms
 ```
 
-- `#calls`: number of calls of this implementations
+```
+statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts = 26, #gen tokens = 1248, #acc tokens = 968, dur(b,g,a) = 2.234, 1.427, 0.016 ms
+```
+
+
+- `#calls(b,g,a)`: number of calls of begin (new prompt), generation and accumulation of this implementations
 - `#gen drafts`: number of drafts generated by this implementation
 - `#acc drafts`: number of drafts accepted (partially) by the main model
 - `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)