mirror of https://github.com/google/gemma.cpp.git
parent
eb0b96e0a8
commit
8e641eb4cd
|
|
@ -1171,6 +1171,9 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma,
|
||||||
if (!runtime_config.stream_token(token, activations.logits[token])) {
|
if (!runtime_config.stream_token(token, activations.logits[token])) {
|
||||||
token = EOS_ID;
|
token = EOS_ID;
|
||||||
}
|
}
|
||||||
|
if (generate_pos == 0) {
|
||||||
|
timing_info.time_to_first_token = hwy::platform::Now() - gen_start;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// We would take this branch if we were not doing Prefill but would
|
// We would take this branch if we were not doing Prefill but would
|
||||||
// process the tokens of the prompt one at a time.
|
// process the tokens of the prompt one at a time.
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,7 @@ struct Gemma {
|
||||||
struct TimingInfo {
|
struct TimingInfo {
|
||||||
double prefill_tok_sec = 0.0;
|
double prefill_tok_sec = 0.0;
|
||||||
double gen_tok_sec = 0.0;
|
double gen_tok_sec = 0.0;
|
||||||
|
double time_to_first_token = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
KVCache CreateKVCache(Model type); // convenient workaround for now
|
KVCache CreateKVCache(Model type); // convenient workaround for now
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,9 @@ void ReplGemma(gcpp::Gemma& model, ModelTraining training,
|
||||||
<< "\n"
|
<< "\n"
|
||||||
<< timing_info.prefill_tok_sec << " prefill tokens / sec"
|
<< timing_info.prefill_tok_sec << " prefill tokens / sec"
|
||||||
<< "\n"
|
<< "\n"
|
||||||
<< timing_info.gen_tok_sec << " tokens / sec" << "\n";
|
<< timing_info.gen_tok_sec << " tokens / sec" << "\n"
|
||||||
|
<< static_cast<int>(timing_info.time_to_first_token * 1000)
|
||||||
|
<< " milliseconds time to first token" << "\n";
|
||||||
}
|
}
|
||||||
std::cout << "\n\n";
|
std::cout << "\n\n";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue