spec : add n_call_begin, n_call_accept

2026-02-05 23:02:14 +01:00 · 2026-02-05 23:02:14 +01:00 · 4283cfef30
parent a5c174d971
commit 4283cfef30
2 changed files with 14 additions and 5 deletions
--- a/common/speculative.cpp
+++ b/common/speculative.cpp
@ -113,8 +113,10 @@ static bool common_speculative_are_compatible(
 struct common_speculative_state {
    const enum common_speculative_type type;
-    // TODO: add n_call_begin, n_call_accept
+    size_t n_call_begin  = 0; // number of times this implementation was called for refresh.
-    size_t n_call_draft = 0; // number of times this implementation was called.
+    size_t n_call_draft  = 0; // number of times this implementation was called for generation.
    size_t n_call_accept = 0; // number of times this implementation was called for accumulation.
    size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation.
    size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model.
    size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
@ -950,6 +952,7 @@ void common_speculative_begin(common_speculative * spec, const llama_tokens & pr
    for (auto & impl : spec->impls) {
        common_time_meas tm(impl->t_begin_us, !impl->gen_perf);
        impl->begin(prompt);
        impl->n_call_begin++;
    }
 }
@ -1002,6 +1005,7 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) {
        }
        impl->accept(n_accepted);
        impl->n_call_accept++;
    }
 }
@ -1022,9 +1026,9 @@ void common_speculative_print_stats(const common_speculative * spec) {
            str_perf = "";
        }
-        LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
+        LOG_INF("statistics %s: #calls(b,g,a) = %zu %zu %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
                common_speculative_type_to_str(impl->type).c_str(),
-                impl->n_call_draft,
+                impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
                impl->n_gen_drafts,
                impl->n_acc_drafts,
                impl->n_gen_tokens,
--- a/docs/speculative.md
+++ b/docs/speculative.md
@ -169,7 +169,12 @@ draft acceptance rate = 0.70312 (   90 accepted /   128 generated)
 statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms
 ```
- `#calls`: number of calls of this implementations
+```
 statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts = 26, #gen tokens = 1248, #acc tokens = 968, dur(b,g,a) = 2.234, 1.427, 0.016 ms
 ```
 - `#calls(b,g,a)`: number of calls of begin (new prompt), generation and accumulation of this implementations
 - `#gen drafts`: number of drafts generated by this implementation
 - `#acc drafts`: number of drafts accepted (partially) by the main model
 - `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)