diff --git a/common/speculative.cpp b/common/speculative.cpp index 67dd621f8c..4edfadc7b2 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -113,13 +113,12 @@ static bool common_speculative_are_compatible( struct common_speculative_state { const enum common_speculative_type type; - // TODO: rename to n_call_draft, n_gen_drafts, n_acc_drafts, n_gen_tokens, n_acc_tokens // TODO: add n_call_begin, n_call_accept - size_t drafts_call_count = 0; // number of times this implementation was called. - size_t drafts_generated_count = 0; // number of times a draft or part was generated by this implementation. - size_t drafts_accepted_count = 0; // number of times a draft or part was accepted by the target model. - size_t drafts_generated_tokens = 0; // number of tokens generated by this implementation. - size_t drafts_accepted_tokens = 0; // number of tokens accepted by the target model. + size_t n_call_draft = 0; // number of times this implementation was called. + size_t n_gen_drafts = 0; // number of times a draft or part was generated by this implementation. + size_t n_acc_drafts = 0; // number of times a draft or part was accepted by the target model. + size_t n_gen_tokens = 0; // number of tokens generated by this implementation. + size_t n_acc_tokens = 0; // number of tokens accepted by the target model. // TODO: track performance of most recent calls const bool gen_perf = true; // whether to generate performance stats. @@ -967,17 +966,17 @@ llama_tokens common_speculative_draft( { common_time_meas tm(impl->t_draft_us, !impl->gen_perf); impl->draft(params, prompt_tgt, id_last, result); - impl->drafts_call_count++; + impl->n_call_draft++; } if (!result.empty()) { LOG_DBG("%s: called impl %s, hist size = %zu, call_count = %zu, gen = %zu\n", __func__, common_speculative_type_to_str(impl.get()->type).c_str(), prompt_tgt.size(), - impl.get()->drafts_call_count, result.size()); + impl.get()->n_call_draft, result.size()); spec->curr_impl = impl.get(); // set current implementation for stats - impl->drafts_generated_count++; - impl->drafts_generated_tokens += result.size(); + impl->n_gen_drafts++; + impl->n_gen_tokens += result.size(); break; // We have a draft, so break out of the loop and return it. } @@ -998,8 +997,8 @@ void common_speculative_accept(common_speculative * spec, uint16_t n_accepted) { { common_time_meas tm(impl->t_accept_us, !impl->gen_perf); if (n_accepted > 0) { - impl->drafts_accepted_count++; - impl->drafts_accepted_tokens += n_accepted; + impl->n_acc_drafts++; + impl->n_acc_tokens += n_accepted; } impl->accept(n_accepted); @@ -1025,11 +1024,11 @@ void common_speculative_print_stats(const common_speculative * spec) { LOG_INF("statistics %s: #calls = %zu, #gen drafts = %zu, #acc drafts = %zu, #gen tokens = %zu, #acc tokens = %zu%s\n", common_speculative_type_to_str(impl->type).c_str(), - impl->drafts_call_count, - impl->drafts_generated_count, - impl->drafts_accepted_count, - impl->drafts_generated_tokens, - impl->drafts_accepted_tokens, + impl->n_call_draft, + impl->n_gen_drafts, + impl->n_acc_drafts, + impl->n_gen_tokens, + impl->n_acc_tokens, str_perf.c_str()); } }