From c6587efe70ad2a657a991b971b35c1eb5f46f6cd Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Wed, 25 Feb 2026 13:10:20 -0800 Subject: [PATCH] Improve instrumentation for ViT parts PiperOrigin-RevId: 875302990 --- gemma/gemma.cc | 1 + gemma/vit.cc | 2 +- ops/ops-inl.h | 2 +- paligemma/image.cc | 2 ++ util/zones.cc | 3 +++ util/zones.h | 1 + 6 files changed, 9 insertions(+), 2 deletions(-) diff --git a/gemma/gemma.cc b/gemma/gemma.cc index 90bbca3..af2c447 100644 --- a/gemma/gemma.cc +++ b/gemma/gemma.cc @@ -726,6 +726,7 @@ void GenerateImageTokensT(const ModelConfig& config, const RuntimeConfig& runtime_config, size_t seq_len, const WeightsPtrs& weights, const Image& image, ImageTokens& image_tokens, MatMulEnv& env) { + GCPP_ZONE(env.ctx, hwy::Profiler::GlobalIdx(), Zones::kGenImageTokens); if (config.vit_config.layer_configs.empty()) { HWY_ABORT("Model does not support generating image tokens."); } diff --git a/gemma/vit.cc b/gemma/vit.cc index 31c6f0f..be14b12 100644 --- a/gemma/vit.cc +++ b/gemma/vit.cc @@ -76,7 +76,7 @@ class VitAttention { const size_t seq_len = static_cast(activations_.attention.div_seq_len.GetDivisor()); const float query_scale = 1.0f / sqrtf(static_cast(qkv_dim)); - PROFILER_ZONE("Gen.VitAttention.DotSoftmax"); + PROFILER_ZONE("Gen.VitAttention.DotSoftmaxMatrix"); MatPtrT& Q = activations_.attention.vit_Q; MatPtrT& K = activations_.attention.vit_K; diff --git a/ops/ops-inl.h b/ops/ops-inl.h index 5ff11a8..affde22 100644 --- a/ops/ops-inl.h +++ b/ops/ops-inl.h @@ -25,7 +25,6 @@ #include #include #include // std::enable_if_t -#include #include #include "ops/matmul.h" @@ -1869,6 +1868,7 @@ HWY_NOINLINE HWY_MAYBE_UNUSED TokenAndProb FusedSoftmaxAndSampleTopK( // Performs 4x4 average pooling across row vectors // Input has 4096 (64*64) rows, output has 256 (16*16) rows // Each output row is the average of a 4x4 block of input rows +// This is surprisingly inexpensive for small images (<1 ms). template MatStorageT AvgPool4x4(MatStorageT& input, const Allocator& allocator) { const Extents2D extents = input.Extents(); diff --git a/paligemma/image.cc b/paligemma/image.cc index d8b0cfc..821f9bf 100644 --- a/paligemma/image.cc +++ b/paligemma/image.cc @@ -100,6 +100,7 @@ bool Image::ReadPPM(const std::string& filename) { return ReadPPM(hwy::Span(content.data(), content.size())); } +// This is surprisingly inexpensive for small images (3 ms). bool Image::ReadPPM(const hwy::Span& buf) { const char* pos = CheckP6Format(buf.cbegin(), buf.cend()); if (!pos) { @@ -171,6 +172,7 @@ void Image::Set(int width, int height, const float* data) { } } +// This is surprisingly inexpensive for small images (2 ms). void Image::Resize(int new_width, int new_height) { std::vector new_data(new_width * new_height * 3); // TODO: go to bilinear interpolation, or antialias. diff --git a/util/zones.cc b/util/zones.cc index 6480b96..d1f9b8c 100644 --- a/util/zones.cc +++ b/util/zones.cc @@ -47,6 +47,8 @@ const char* ZoneName(Zones zone) { return "Gen.EmbeddingMatmul"; case Zones::kGenFFW: return "Gen.FFW"; + case Zones::kGenImageTokens: + return "Gen.ImageTokens"; case Zones::kGenSampleTop1: return "Gen.SampleTop1"; case Zones::kGenSampleTopK: @@ -111,6 +113,7 @@ hwy::ProfilerFlags ZoneFlags(Zones zone) { case Zones::kGenEmbed: case Zones::kGenEmbeddingMatmul: case Zones::kGenFFW: + case Zones::kGenImageTokens: return hwy::ProfilerFlags::kInclusive; default: return hwy::ProfilerFlags::kDefault; diff --git a/util/zones.h b/util/zones.h index ac96ad0..a6d40fa 100644 --- a/util/zones.h +++ b/util/zones.h @@ -29,6 +29,7 @@ enum class Zones { // Keep sorted kGenEmbed, kGenEmbeddingMatmul, kGenFFW, + kGenImageTokens, kGenSampleTop1, kGenSampleTopK, kGenStats,