Skip to content

Commit bbbd595

Browse files
jan-wassenbergcopybara-github
authored andcommitted
Improve instrumentation for ViT parts
PiperOrigin-RevId: 875227194
1 parent df162ea commit bbbd595

6 files changed

Lines changed: 9 additions & 2 deletions

File tree

gemma/gemma.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ void GenerateImageTokensT(const ModelConfig& config,
726726
const RuntimeConfig& runtime_config, size_t seq_len,
727727
const WeightsPtrs& weights, const Image& image,
728728
ImageTokens& image_tokens, MatMulEnv& env) {
729+
GCPP_ZONE(env.ctx, hwy::Profiler::GlobalIdx(), Zones::kGenImageTokens);
729730
if (config.vit_config.layer_configs.empty()) {
730731
HWY_ABORT("Model does not support generating image tokens.");
731732
}

gemma/vit.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class VitAttention {
7676
const size_t seq_len =
7777
static_cast<size_t>(activations_.attention.div_seq_len.GetDivisor());
7878
const float query_scale = 1.0f / sqrtf(static_cast<float>(qkv_dim));
79-
PROFILER_ZONE("Gen.VitAttention.DotSoftmax");
79+
PROFILER_ZONE("Gen.VitAttention.DotSoftmaxMatrix");
8080

8181
MatPtrT<float>& Q = activations_.attention.vit_Q;
8282
MatPtrT<float>& K = activations_.attention.vit_K;

ops/ops-inl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <cstdint>
2626
#include <random>
2727
#include <type_traits> // std::enable_if_t
28-
#include <utility>
2928
#include <vector>
3029

3130
#include "ops/matmul.h"
@@ -1869,6 +1868,7 @@ HWY_NOINLINE HWY_MAYBE_UNUSED TokenAndProb FusedSoftmaxAndSampleTopK(
18691868
// Performs 4x4 average pooling across row vectors
18701869
// Input has 4096 (64*64) rows, output has 256 (16*16) rows
18711870
// Each output row is the average of a 4x4 block of input rows
1871+
// This is surprisingly inexpensive for small images (<1 ms).
18721872
template <typename T>
18731873
MatStorageT<T> AvgPool4x4(MatStorageT<T>& input, const Allocator& allocator) {
18741874
const Extents2D extents = input.Extents();

paligemma/image.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ bool Image::ReadPPM(const std::string& filename) {
100100
return ReadPPM(hwy::Span<const char>(content.data(), content.size()));
101101
}
102102

103+
// This is surprisingly inexpensive for small images (3 ms).
103104
bool Image::ReadPPM(const hwy::Span<const char>& buf) {
104105
const char* pos = CheckP6Format(buf.cbegin(), buf.cend());
105106
if (!pos) {
@@ -171,6 +172,7 @@ void Image::Set(int width, int height, const float* data) {
171172
}
172173
}
173174

175+
// This is surprisingly inexpensive for small images (2 ms).
174176
void Image::Resize(int new_width, int new_height) {
175177
std::vector<float> new_data(new_width * new_height * 3);
176178
// TODO: go to bilinear interpolation, or antialias.

util/zones.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ const char* ZoneName(Zones zone) {
4747
return "Gen.EmbeddingMatmul";
4848
case Zones::kGenFFW:
4949
return "Gen.FFW";
50+
case Zones::kGenImageTokens:
51+
return "Gen.ImageTokens";
5052
case Zones::kGenSampleTop1:
5153
return "Gen.SampleTop1";
5254
case Zones::kGenSampleTopK:
@@ -111,6 +113,7 @@ hwy::ProfilerFlags ZoneFlags(Zones zone) {
111113
case Zones::kGenEmbed:
112114
case Zones::kGenEmbeddingMatmul:
113115
case Zones::kGenFFW:
116+
case Zones::kGenImageTokens:
114117
return hwy::ProfilerFlags::kInclusive;
115118
default:
116119
return hwy::ProfilerFlags::kDefault;

util/zones.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enum class Zones { // Keep sorted
2929
kGenEmbed,
3030
kGenEmbeddingMatmul,
3131
kGenFFW,
32+
kGenImageTokens,
3233
kGenSampleTop1,
3334
kGenSampleTopK,
3435
kGenStats,

0 commit comments

Comments
 (0)