spec : add n_call_begin, n_call_accept

This commit is contained in:
Sascha Rogmann 2026-02-05 23:02:14 +01:00
parent a5c174d971
commit 4283cfef30
2 changed files with 14 additions and 5 deletions

View File

@ -113,8 +113,10 @@ static bool common_speculative_are_compatible(
struct common_speculative_state { struct common_speculative_state {
const enum common_speculative_type type; const enum common_speculative_type type;
// TODO: add n_call_begin, n_call_accept size_t n_call_begin = 0; // number of times this implementation was called for refresh.
size_t n_call_draft = 0; // number of times this implementation was called. size_t n_call_draft = 0; // number of times this implementation was called for generation.
size_t n_call_accept = 0; // number of times this implementation was called for accumulation.
size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation. size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation.
size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model. size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model.
size_t n_gen_tokens = 0; // number of tokens generated by this implementation. size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
@ -950,6 +952,7 @@ void common_speculative_begin(common_speculative * spec, const llama_tokens & pr
for (auto & impl : spec->impls) { for (auto & impl : spec->impls) {
common_time_meas tm(impl->t_begin_us, !impl->gen_perf); common_time_meas tm(impl->t_begin_us, !impl->gen_perf);
impl->begin(prompt); impl->begin(prompt);
impl->n_call_begin++;
} }
} }
@ -1002,6 +1005,7 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) {
} }
impl->accept(n_accepted); impl->accept(n_accepted);
impl->n_call_accept++;
} }
} }
@ -1022,9 +1026,9 @@ void common_speculative_print_stats(const common_speculative * spec) {
str_perf = ""; str_perf = "";
} }
LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n", LOG_INF("statistics %s: #calls(b,g,a) = %zu %zu %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
common_speculative_type_to_str(impl->type).c_str(), common_speculative_type_to_str(impl->type).c_str(),
impl->n_call_draft, impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
impl->n_gen_drafts, impl->n_gen_drafts,
impl->n_acc_drafts, impl->n_acc_drafts,
impl->n_gen_tokens, impl->n_gen_tokens,

View File

@ -169,7 +169,12 @@ draft acceptance rate = 0.70312 ( 90 accepted / 128 generated)
statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms
``` ```
- `#calls`: number of calls of this implementations ```
statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts = 26, #gen tokens = 1248, #acc tokens = 968, dur(b,g,a) = 2.234, 1.427, 0.016 ms
```
- `#calls(b,g,a)`: number of calls of begin (new prompt), generation and accumulation of this implementations
- `#gen drafts`: number of drafts generated by this implementation - `#gen drafts`: number of drafts generated by this implementation
- `#acc drafts`: number of drafts accepted (partially) by the main model - `#acc drafts`: number of drafts accepted (partially) by the main model
- `#gen tokens`: number of tokens generated by this implementation (including rejected tokens) - `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)