From 71406cf6d0347b5f4e855b1d7346d08a7ac70e49 Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Wed, 13 Aug 2025 03:15:07 -0700 Subject: [PATCH] More profiler interface fixes: hwy:: plus avoid ADD_ZONE PiperOrigin-RevId: 794493165 --- CMakeLists.txt | 2 +- MODULE.bazel | 2 +- examples/hello_world/CMakeLists.txt | 2 +- examples/simplified_gemma/CMakeLists.txt | 2 +- gemma/attention.cc | 2 +- gemma/gemma-inl.h | 2 +- ops/matmul-inl.h | 22 +++++++++++++--------- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b29a379..4e4bfd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64 EXCLUDE_FROM_ALL) +FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300 EXCLUDE_FROM_ALL) FetchContent_MakeAvailable(highway) ## Note: absl needs to be installed by sentencepiece. This will only happen if diff --git a/MODULE.bazel b/MODULE.bazel index 73ae1ec..b6b5f78 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -18,7 +18,7 @@ bazel_dep(name = "google_benchmark", version = "1.8.5") # Require a more recent version. git_override( module_name = "highway", - commit = "92d327e841d78e11ae888757a3e16d291951cf64", + commit = "1d16731233de45a365b43867f27d0a5f73925300", remote = "https://github.com/google/highway", ) diff --git a/examples/hello_world/CMakeLists.txt b/examples/hello_world/CMakeLists.txt index c466e1c..7a63ace 100644 --- a/examples/hello_world/CMakeLists.txt +++ b/examples/hello_world/CMakeLists.txt @@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) include(FetchContent) -FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64) +FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300) FetchContent_MakeAvailable(highway) FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 9045b2f60fa2b323dfac0eaef8fc17565036f9f9) FetchContent_MakeAvailable(sentencepiece) diff --git a/examples/simplified_gemma/CMakeLists.txt b/examples/simplified_gemma/CMakeLists.txt index 4723852..da111cc 100644 --- a/examples/simplified_gemma/CMakeLists.txt +++ b/examples/simplified_gemma/CMakeLists.txt @@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) include(FetchContent) -FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64) +FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300) FetchContent_MakeAvailable(highway) FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c) FetchContent_MakeAvailable(sentencepiece) diff --git a/gemma/attention.cc b/gemma/attention.cc index bd8917e..a04b868 100644 --- a/gemma/attention.cc +++ b/gemma/attention.cc @@ -345,7 +345,7 @@ void GemmaAttention(size_t num_tokens, const size_t layer_idx, AttentionActivations& activations, QBatch& qbatch, MatMulEnv& env, int flags) { static const auto zone = - env.ctx.profiler.AddZone("Gen.Attention", ProfilerFlags::kInclusive); + env.ctx.profiler.AddZone("Gen.Attention", hwy::ProfilerFlags::kInclusive); PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone); const LayerConfig& layer_config = layer.layer_config; diff --git a/gemma/gemma-inl.h b/gemma/gemma-inl.h index 9bda90f..c1ff722 100644 --- a/gemma/gemma-inl.h +++ b/gemma/gemma-inl.h @@ -114,7 +114,7 @@ void PostNorm(PostNormType post_norm, const MatPtr& weights, static inline void FFWNoVit(const LayerWeightsPtrs& layer, Activations& activations, MatMulEnv& env) { static const auto zone = - env.ctx.profiler.AddZone("Gen.FFW", ProfilerFlags::kInclusive); + env.ctx.profiler.AddZone("Gen.FFW", hwy::ProfilerFlags::kInclusive); PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone); const LayerConfig& layer_config = layer.layer_config; const size_t ffh_hidden_dim = layer_config.ff_hidden_dim; diff --git a/ops/matmul-inl.h b/ops/matmul-inl.h index bf85da3..4741759 100644 --- a/ops/matmul-inl.h +++ b/ops/matmul-inl.h @@ -912,7 +912,7 @@ class MMPerPackage { // Single M and K ranges, parallel N. Fills all of C directly. template HWY_INLINE void DoNT(const MatPtrT& B, RowPtrs C_rows) const { - static const auto zone = PROFILER_ADD_ZONE("MM.NT"); + static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT"); HWY_DASSERT(ranges_mc_.NumTasks() == 1); HWY_DASSERT(ranges_kc_.NumTasks() == 1); const IndexRange& range_M = ranges_mc_.Range(0); @@ -947,7 +947,7 @@ class MMPerPackage { // Single M range, parallel N, sequential K. Fills all of partial. template HWY_INLINE void DoNT_K(const MatPtrT& B, RowPtrs C_rows) const { - static const auto zone = PROFILER_ADD_ZONE("MM.NT_K"); + static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_K"); HWY_DASSERT(ranges_mc_.NumTasks() == 1); const IndexRange& range_mc = ranges_mc_.Range(0); @@ -991,12 +991,14 @@ class MMPerPackage { }); if (out_ == MMOut::kCopy) { - static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.Copy"); + static const auto zone = + args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.Copy"); MMZone fill_zone; fill_zone.MaybeEnter(0, zone, args_); MMScaleDemoteAdd::FillC(range_mc, range_np_, args_, C_rows); } else if (out_ == MMOut::kParM) { - static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.ParM"); + static const auto zone = + args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.ParM"); args_.env->parallel.ForRangeMC( range_mc, pkg_idx_, [&](size_t row_a, size_t worker) HWY_ATTR { MMZone fill_zone; @@ -1013,7 +1015,7 @@ class MMPerPackage { // Fills `mc x nc` sections of C directly, in parallel. template HWY_INLINE void DoNT_MT(const MatPtrT& B, RowPtrs C_rows) const { - static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT"); + static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT"); HWY_DASSERT(ranges_kc_.NumTasks() == 1); const IndexRange& range_K = ranges_kc_.Range(0); const size_t K = range_K.Num(); @@ -1049,8 +1051,9 @@ class MMPerPackage { // Fills `mc x nc` sections of `partial`, then `C`, in parallel. template HWY_INLINE void DoNT_MT_K(const MatPtrT& B, RowPtrs C_rows) const { - static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT_K"); - static const auto fill_zone = PROFILER_ADD_ZONE("MM.NT_MT_K.FillC"); + static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT_K"); + static const auto fill_zone = + args_.env->ctx.profiler.AddZone("MM.NT_MT_K.FillC"); const size_t kc_max = ranges_kc_.TaskSize(); HWY_DASSERT(kc_max <= MMStorage::kMaxKC); const size_t B_stride = @@ -1116,7 +1119,7 @@ class MMPerPackage { const size_t NBF = hn::Lanes(dbf); static_assert(hwy::IsSameEither(), "Can seek"); - static const auto zone = PROFILER_ADD_ZONE("MM.DecompressA"); + static const auto zone = args_.env->ctx.profiler.AddZone("MM.DecompressA"); const auto do_range = [&](const IndexRange& range_M, const IndexRange& range_K, @@ -1280,7 +1283,8 @@ struct MMImpl { RowPtrs C_rows, const MMArgs& args, const MMConfig& config) { PROFILER_ZONE("MM.DoMatMul"); - static const auto zone = PROFILER_ADD_ZONE("MM.DoMatMul.PerPkg"); + static const auto zone = + args.env->ctx.profiler.AddZone("MM.DoMatMul.PerPkg"); if constexpr (kMaxPackages > 1) { // Outermost loop: static NUMA-aware partition of B rows across packages.