mirror of https://github.com/google/gemma.cpp.git
Improve instrumentation for ViT parts
PiperOrigin-RevId: 875302990
This commit is contained in:
parent
df162ead7c
commit
c6587efe70
|
|
@ -726,6 +726,7 @@ void GenerateImageTokensT(const ModelConfig& config,
|
|||
const RuntimeConfig& runtime_config, size_t seq_len,
|
||||
const WeightsPtrs& weights, const Image& image,
|
||||
ImageTokens& image_tokens, MatMulEnv& env) {
|
||||
GCPP_ZONE(env.ctx, hwy::Profiler::GlobalIdx(), Zones::kGenImageTokens);
|
||||
if (config.vit_config.layer_configs.empty()) {
|
||||
HWY_ABORT("Model does not support generating image tokens.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class VitAttention {
|
|||
const size_t seq_len =
|
||||
static_cast<size_t>(activations_.attention.div_seq_len.GetDivisor());
|
||||
const float query_scale = 1.0f / sqrtf(static_cast<float>(qkv_dim));
|
||||
PROFILER_ZONE("Gen.VitAttention.DotSoftmax");
|
||||
PROFILER_ZONE("Gen.VitAttention.DotSoftmaxMatrix");
|
||||
|
||||
MatPtrT<float>& Q = activations_.attention.vit_Q;
|
||||
MatPtrT<float>& K = activations_.attention.vit_K;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <type_traits> // std::enable_if_t
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ops/matmul.h"
|
||||
|
|
@ -1869,6 +1868,7 @@ HWY_NOINLINE HWY_MAYBE_UNUSED TokenAndProb FusedSoftmaxAndSampleTopK(
|
|||
// Performs 4x4 average pooling across row vectors
|
||||
// Input has 4096 (64*64) rows, output has 256 (16*16) rows
|
||||
// Each output row is the average of a 4x4 block of input rows
|
||||
// This is surprisingly inexpensive for small images (<1 ms).
|
||||
template <typename T>
|
||||
MatStorageT<T> AvgPool4x4(MatStorageT<T>& input, const Allocator& allocator) {
|
||||
const Extents2D extents = input.Extents();
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ bool Image::ReadPPM(const std::string& filename) {
|
|||
return ReadPPM(hwy::Span<const char>(content.data(), content.size()));
|
||||
}
|
||||
|
||||
// This is surprisingly inexpensive for small images (3 ms).
|
||||
bool Image::ReadPPM(const hwy::Span<const char>& buf) {
|
||||
const char* pos = CheckP6Format(buf.cbegin(), buf.cend());
|
||||
if (!pos) {
|
||||
|
|
@ -171,6 +172,7 @@ void Image::Set(int width, int height, const float* data) {
|
|||
}
|
||||
}
|
||||
|
||||
// This is surprisingly inexpensive for small images (2 ms).
|
||||
void Image::Resize(int new_width, int new_height) {
|
||||
std::vector<float> new_data(new_width * new_height * 3);
|
||||
// TODO: go to bilinear interpolation, or antialias.
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@ const char* ZoneName(Zones zone) {
|
|||
return "Gen.EmbeddingMatmul";
|
||||
case Zones::kGenFFW:
|
||||
return "Gen.FFW";
|
||||
case Zones::kGenImageTokens:
|
||||
return "Gen.ImageTokens";
|
||||
case Zones::kGenSampleTop1:
|
||||
return "Gen.SampleTop1";
|
||||
case Zones::kGenSampleTopK:
|
||||
|
|
@ -111,6 +113,7 @@ hwy::ProfilerFlags ZoneFlags(Zones zone) {
|
|||
case Zones::kGenEmbed:
|
||||
case Zones::kGenEmbeddingMatmul:
|
||||
case Zones::kGenFFW:
|
||||
case Zones::kGenImageTokens:
|
||||
return hwy::ProfilerFlags::kInclusive;
|
||||
default:
|
||||
return hwy::ProfilerFlags::kDefault;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ enum class Zones { // Keep sorted
|
|||
kGenEmbed,
|
||||
kGenEmbeddingMatmul,
|
||||
kGenFFW,
|
||||
kGenImageTokens,
|
||||
kGenSampleTop1,
|
||||
kGenSampleTopK,
|
||||
kGenStats,
|
||||
|
|
|
|||
Loading…
Reference in New Issue