mirror of https://github.com/google/gemma.cpp.git
commit
b27d8d6b92
|
|
@ -1057,11 +1057,14 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma, size_t max_tokens,
|
|||
// In single-turn (non-chat) usage, pos and pos_offset start at 0 and are
|
||||
// always equal.
|
||||
size_t pos_offset = 0; // offset relative to pos
|
||||
|
||||
auto prefill_phase = [&]() HWY_ATTR {
|
||||
bool keep_on = true;
|
||||
const double prefill_start = hwy::platform::Now();
|
||||
|
||||
// Prefill stops before prompt_size - 1 since the last prompt token is the
|
||||
// first input token for generation.
|
||||
while (pos_offset < prompt_size - 1) {
|
||||
while (pos_offset < prompt_size - 1 && keep_on) {
|
||||
const size_t batch_size =
|
||||
std::min(kPrefillBatchSize, prompt_size - 1 - pos_offset);
|
||||
HWY_DASSERT(batch_size <= kPrefillBatchSize);
|
||||
|
|
@ -1070,7 +1073,10 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma, size_t max_tokens,
|
|||
Prefill<kPrefillBatchSize>(batch_tokens, batch_size, pos, weights,
|
||||
prefill_activations, kv_cache, pool, inner_pool);
|
||||
for (size_t idx = 0; idx < batch_size; ++idx) {
|
||||
stream_token(batch_tokens[idx], 0.0f);
|
||||
keep_on = stream_token(batch_tokens[idx], 0.0f);
|
||||
if(!keep_on) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos += batch_size;
|
||||
pos_offset += batch_size;
|
||||
|
|
@ -1085,6 +1091,11 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma, size_t max_tokens,
|
|||
std::cout << "\n[ Prefill tokens / sec = " << prefill_tok_sec << " ]";
|
||||
}
|
||||
|
||||
return keep_on;
|
||||
};
|
||||
|
||||
auto transform_phase = [&]() HWY_ATTR {
|
||||
|
||||
const double gen_start = hwy::platform::Now();
|
||||
|
||||
HWY_DASSERT(pos_offset == prompt_size - 1);
|
||||
|
|
@ -1132,6 +1143,11 @@ void GenerateImpl(GemmaImpl<TConfig>& gemma, size_t max_tokens,
|
|||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if(prefill_phase()) {
|
||||
transform_phase();
|
||||
}
|
||||
}
|
||||
|
||||
#define TOKEN(token_id) TokenString(gemma, token_id).c_str()
|
||||
|
|
|
|||
Loading…
Reference in New Issue