mirror of https://github.com/google/gemma.cpp.git
Address review feedback: Fix prefill_tbatch_size and variable placement issues
This commit is contained in:
parent
27c28cc938
commit
f55c321397
|
|
@ -156,8 +156,7 @@ void ReplGemma(const ThreadingArgs& threading, const InferenceArgs& inference,
|
||||||
std::cout << token_text << std::flush;
|
std::cout << token_text << std::flush;
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
// Flag to check if we should exit after processing non-interactive prompt
|
|
||||||
bool exit_after_generation = !inference.prompt.empty();
|
|
||||||
while (true) { // Loop until user quits.
|
while (true) { // Loop until user quits.
|
||||||
tokens_generated_this_turn = 0;
|
tokens_generated_this_turn = 0;
|
||||||
|
|
||||||
|
|
@ -224,7 +223,7 @@ void ReplGemma(const ThreadingArgs& threading, const InferenceArgs& inference,
|
||||||
std::cout << "\n\n";
|
std::cout << "\n\n";
|
||||||
|
|
||||||
// Break the loop if in non-interactive mode
|
// Break the loop if in non-interactive mode
|
||||||
if (exit_after_generation) {
|
if (!inference.prompt.empty()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue