diff --git a/gemma.cc b/gemma.cc index 195b177..dcd87ed 100644 --- a/gemma.cc +++ b/gemma.cc @@ -295,7 +295,7 @@ HWY_NOINLINE void Attention(size_t batch_start, size_t batch_idx, size_t layer, static constexpr size_t kModelDim = gcpp::Activations::kModelDim; static constexpr size_t kHeads = TConfig::kHeads; - const float kQueryScale = 1.0 / sqrtf(static_cast(kQKVDim)); + static const float kQueryScale = static_cast(1.0 / sqrt(static_cast(kQKVDim))); pool.Run(0, kHeads, [&](const uint64_t head, size_t /*thread*/) HWY_ATTR { // linear projections to QKV @@ -418,7 +418,7 @@ HWY_NOINLINE void Prefill(const int* tokens, size_t num_tokens, size_t pos, hwy::ThreadPool& inner_pool) { PROFILER_ZONE("Gen.Prefill\\Att\\FFW"); static constexpr size_t kModelDim = TConfig::kModelDim; - static const float kEmbScaling = sqrtf(static_cast(kModelDim)); + static const float kEmbScaling = static_cast(sqrt(static_cast(kModelDim))); pool.Run( 0, num_tokens, [&](const uint64_t token_idx, size_t /*thread*/) HWY_ATTR { @@ -473,7 +473,7 @@ void Transformer(int token, size_t pos, static constexpr size_t kLayers = TConfig::kLayers; static constexpr size_t kModelDim = TConfig::kModelDim; - static const float kEmbScaling = sqrtf(static_cast(kModelDim)); + static const float kEmbScaling = static_cast(sqrt(static_cast(kModelDim))); Decompress(c_weights.c_embedder_input_embedding, token * kModelDim, activations.x.data(), kModelDim); diff --git a/run.cc b/run.cc index 50b9a24..71481f9 100644 --- a/run.cc +++ b/run.cc @@ -186,7 +186,7 @@ void ReplGemma(gcpp::Gemma& model, hwy::ThreadPool& pool, if (abs_pos > 0) { // Prepend "" token if this is a multi-turn dialogue // continuation. - prompt_string = "\n" + prompt_string; + prompt_string = "model\n" + prompt_string; } }