Add TTFT to TimingInfo

PiperOrigin-RevId: 634378994
This commit is contained in:
Apoorv Reddy 2024-05-16 07:15:57 -07:00 committed by Copybara-Service
parent eb0b96e0a8
commit 8e641eb4cd
3 changed files with 7 additions and 1 deletions

View File

@ -1171,6 +1171,9 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma,
if (!runtime_config.stream_token(token, activations.logits[token])) { if (!runtime_config.stream_token(token, activations.logits[token])) {
token = EOS_ID; token = EOS_ID;
} }
if (generate_pos == 0) {
timing_info.time_to_first_token = hwy::platform::Now() - gen_start;
}
} else { } else {
// We would take this branch if we were not doing Prefill but would // We would take this branch if we were not doing Prefill but would
// process the tokens of the prompt one at a time. // process the tokens of the prompt one at a time.

View File

@ -99,6 +99,7 @@ struct Gemma {
struct TimingInfo { struct TimingInfo {
double prefill_tok_sec = 0.0; double prefill_tok_sec = 0.0;
double gen_tok_sec = 0.0; double gen_tok_sec = 0.0;
double time_to_first_token = 0;
}; };
KVCache CreateKVCache(Model type); // convenient workaround for now KVCache CreateKVCache(Model type); // convenient workaround for now

View File

@ -224,7 +224,9 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training,
<< "\n" << "\n"
<< timing_info.prefill_tok_sec << " prefill tokens / sec" << timing_info.prefill_tok_sec << " prefill tokens / sec"
<< "\n" << "\n"
<< timing_info.gen_tok_sec << " tokens / sec" << "\n"; << timing_info.gen_tok_sec << " tokens / sec" << "\n"
<< static_cast<int>(timing_info.time_to_first_token * 1000)
<< " milliseconds time to first token" << "\n";
} }
std::cout << "\n\n"; std::cout << "\n\n";
} }