mirror of https://github.com/google/gemma.cpp.git
Use benchmark_helper in py bindings (adds BOS)
Also remove thread clamp (OK to be zero or large). PiperOrigin-RevId: 648657155
This commit is contained in:
parent
e527e7662e
commit
b1c1ec1d59
|
|
@ -806,16 +806,16 @@ Activations<TConfig, kBatchSize>& GetActivations(
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// Placeholder for internal test3, do not remove
|
bool StreamToken(size_t query_idx, size_t pos, int token, float prob,
|
||||||
|
|
||||||
bool StreamToken(size_t query_idx, size_t pos, int token, float weight,
|
|
||||||
const RuntimeConfig& runtime_config) {
|
const RuntimeConfig& runtime_config) {
|
||||||
if (runtime_config.batch_stream_token) {
|
if (runtime_config.batch_stream_token) {
|
||||||
return runtime_config.batch_stream_token(query_idx, pos, token, weight);
|
return runtime_config.batch_stream_token(query_idx, pos, token, prob);
|
||||||
}
|
}
|
||||||
return runtime_config.stream_token(token, weight);
|
return runtime_config.stream_token(token, prob);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Placeholder for internal test3, do not remove
|
||||||
|
|
||||||
template <class TConfig, size_t kQueryBatchSize>
|
template <class TConfig, size_t kQueryBatchSize>
|
||||||
void GenerateT(const ByteStorageT& weights_u8, const ByteStorageT& prefill_u8,
|
void GenerateT(const ByteStorageT& weights_u8, const ByteStorageT& prefill_u8,
|
||||||
const ByteStorageT& decode_u8,
|
const ByteStorageT& decode_u8,
|
||||||
|
|
|
||||||
|
|
@ -133,8 +133,7 @@ class AppArgs : public ArgsBase<AppArgs> {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline size_t GetSupportedThreadCount() {
|
static inline size_t GetSupportedThreadCount() {
|
||||||
return std::clamp(hwy::ThreadPool::MaxThreads(), size_t{1},
|
return std::min(hwy::ThreadPool::MaxThreads(), kMaxThreads);
|
||||||
std::min(kMaxThreads, size_t{18}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Path log; // output
|
Path log; // output
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue