spec : add n_call_begin, n_call_accept
This commit is contained in:
parent
a5c174d971
commit
4283cfef30
|
|
@ -113,8 +113,10 @@ static bool common_speculative_are_compatible(
|
||||||
struct common_speculative_state {
|
struct common_speculative_state {
|
||||||
const enum common_speculative_type type;
|
const enum common_speculative_type type;
|
||||||
|
|
||||||
// TODO: add n_call_begin, n_call_accept
|
size_t n_call_begin = 0; // number of times this implementation was called for refresh.
|
||||||
size_t n_call_draft = 0; // number of times this implementation was called.
|
size_t n_call_draft = 0; // number of times this implementation was called for generation.
|
||||||
|
size_t n_call_accept = 0; // number of times this implementation was called for accumulation.
|
||||||
|
|
||||||
size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation.
|
size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation.
|
||||||
size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model.
|
size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model.
|
||||||
size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
|
size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
|
||||||
|
|
@ -950,6 +952,7 @@ void common_speculative_begin(common_speculative * spec, const llama_tokens & pr
|
||||||
for (auto & impl : spec->impls) {
|
for (auto & impl : spec->impls) {
|
||||||
common_time_meas tm(impl->t_begin_us, !impl->gen_perf);
|
common_time_meas tm(impl->t_begin_us, !impl->gen_perf);
|
||||||
impl->begin(prompt);
|
impl->begin(prompt);
|
||||||
|
impl->n_call_begin++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1002,6 +1005,7 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl->accept(n_accepted);
|
impl->accept(n_accepted);
|
||||||
|
impl->n_call_accept++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1022,9 +1026,9 @@ void common_speculative_print_stats(const common_speculative * spec) {
|
||||||
str_perf = "";
|
str_perf = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
|
LOG_INF("statistics %s: #calls(b,g,a) = %zu %zu %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n",
|
||||||
common_speculative_type_to_str(impl->type).c_str(),
|
common_speculative_type_to_str(impl->type).c_str(),
|
||||||
impl->n_call_draft,
|
impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
|
||||||
impl->n_gen_drafts,
|
impl->n_gen_drafts,
|
||||||
impl->n_acc_drafts,
|
impl->n_acc_drafts,
|
||||||
impl->n_gen_tokens,
|
impl->n_gen_tokens,
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,12 @@ draft acceptance rate = 0.70312 ( 90 accepted / 128 generated)
|
||||||
statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms
|
statistics ngram_mod: #calls = 810, #gen drafts = 15, #acc drafts = 15, #gen tokens = 960, #acc tokens = 730, dur(b,g,a) = 0.149, 0.347, 0.005 ms
|
||||||
```
|
```
|
||||||
|
|
||||||
- `#calls`: number of calls of this implementations
|
```
|
||||||
|
statistics ngram_map_k: #calls(b,g,a) = 6 1690 26, #gen drafts = 26, #acc drafts = 26, #gen tokens = 1248, #acc tokens = 968, dur(b,g,a) = 2.234, 1.427, 0.016 ms
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
- `#calls(b,g,a)`: number of calls of begin (new prompt), generation and accumulation of this implementations
|
||||||
- `#gen drafts`: number of drafts generated by this implementation
|
- `#gen drafts`: number of drafts generated by this implementation
|
||||||
- `#acc drafts`: number of drafts accepted (partially) by the main model
|
- `#acc drafts`: number of drafts accepted (partially) by the main model
|
||||||
- `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)
|
- `#gen tokens`: number of tokens generated by this implementation (including rejected tokens)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue