Improve instrumentation for ViT parts

PiperOrigin-RevId: 875302990
This commit is contained in:
Jan Wassenberg 2026-02-25 13:10:20 -08:00 committed by Copybara-Service
parent df162ead7c
commit c6587efe70
6 changed files with 9 additions and 2 deletions

View File

@ -726,6 +726,7 @@ void GenerateImageTokensT(const ModelConfig& config,
const RuntimeConfig& runtime_config, size_t seq_len,
const WeightsPtrs& weights, const Image& image,
ImageTokens& image_tokens, MatMulEnv& env) {
GCPP_ZONE(env.ctx, hwy::Profiler::GlobalIdx(), Zones::kGenImageTokens);
if (config.vit_config.layer_configs.empty()) {
HWY_ABORT("Model does not support generating image tokens.");
}

View File

@ -76,7 +76,7 @@ class VitAttention {
const size_t seq_len =
static_cast<size_t>(activations_.attention.div_seq_len.GetDivisor());
const float query_scale = 1.0f / sqrtf(static_cast<float>(qkv_dim));
PROFILER_ZONE("Gen.VitAttention.DotSoftmax");
PROFILER_ZONE("Gen.VitAttention.DotSoftmaxMatrix");
MatPtrT<float>& Q = activations_.attention.vit_Q;
MatPtrT<float>& K = activations_.attention.vit_K;

View File

@ -25,7 +25,6 @@
#include <cstdint>
#include <random>
#include <type_traits> // std::enable_if_t
#include <utility>
#include <vector>
#include "ops/matmul.h"
@ -1869,6 +1868,7 @@ HWY_NOINLINE HWY_MAYBE_UNUSED TokenAndProb FusedSoftmaxAndSampleTopK(
// Performs 4x4 average pooling across row vectors
// Input has 4096 (64*64) rows, output has 256 (16*16) rows
// Each output row is the average of a 4x4 block of input rows
// This is surprisingly inexpensive for small images (<1 ms).
template <typename T>
MatStorageT<T> AvgPool4x4(MatStorageT<T>& input, const Allocator& allocator) {
const Extents2D extents = input.Extents();

View File

@ -100,6 +100,7 @@ bool Image::ReadPPM(const std::string& filename) {
return ReadPPM(hwy::Span<const char>(content.data(), content.size()));
}
// This is surprisingly inexpensive for small images (3 ms).
bool Image::ReadPPM(const hwy::Span<const char>& buf) {
const char* pos = CheckP6Format(buf.cbegin(), buf.cend());
if (!pos) {
@ -171,6 +172,7 @@ void Image::Set(int width, int height, const float* data) {
}
}
// This is surprisingly inexpensive for small images (2 ms).
void Image::Resize(int new_width, int new_height) {
std::vector<float> new_data(new_width * new_height * 3);
// TODO: go to bilinear interpolation, or antialias.

View File

@ -47,6 +47,8 @@ const char* ZoneName(Zones zone) {
return "Gen.EmbeddingMatmul";
case Zones::kGenFFW:
return "Gen.FFW";
case Zones::kGenImageTokens:
return "Gen.ImageTokens";
case Zones::kGenSampleTop1:
return "Gen.SampleTop1";
case Zones::kGenSampleTopK:
@ -111,6 +113,7 @@ hwy::ProfilerFlags ZoneFlags(Zones zone) {
case Zones::kGenEmbed:
case Zones::kGenEmbeddingMatmul:
case Zones::kGenFFW:
case Zones::kGenImageTokens:
return hwy::ProfilerFlags::kInclusive;
default:
return hwy::ProfilerFlags::kDefault;

View File

@ -29,6 +29,7 @@ enum class Zones { // Keep sorted
kGenEmbed,
kGenEmbeddingMatmul,
kGenFFW,
kGenImageTokens,
kGenSampleTop1,
kGenSampleTopK,
kGenStats,