More profiler interface fixes: hwy:: plus avoid ADD_ZONE

PiperOrigin-RevId: 794493165
This commit is contained in:
Jan Wassenberg 2025-08-13 03:15:07 -07:00 committed by Copybara-Service
parent faa4102992
commit 71406cf6d0
7 changed files with 19 additions and 15 deletions

View File

@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64 EXCLUDE_FROM_ALL) FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300 EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(highway) FetchContent_MakeAvailable(highway)
## Note: absl needs to be installed by sentencepiece. This will only happen if ## Note: absl needs to be installed by sentencepiece. This will only happen if

View File

@ -18,7 +18,7 @@ bazel_dep(name = "google_benchmark", version = "1.8.5")
# Require a more recent version. # Require a more recent version.
git_override( git_override(
module_name = "highway", module_name = "highway",
commit = "92d327e841d78e11ae888757a3e16d291951cf64", commit = "1d16731233de45a365b43867f27d0a5f73925300",
remote = "https://github.com/google/highway", remote = "https://github.com/google/highway",
) )

View File

@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
include(FetchContent) include(FetchContent)
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64) FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300)
FetchContent_MakeAvailable(highway) FetchContent_MakeAvailable(highway)
FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 9045b2f60fa2b323dfac0eaef8fc17565036f9f9) FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 9045b2f60fa2b323dfac0eaef8fc17565036f9f9)
FetchContent_MakeAvailable(sentencepiece) FetchContent_MakeAvailable(sentencepiece)

View File

@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
include(FetchContent) include(FetchContent)
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64) FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300)
FetchContent_MakeAvailable(highway) FetchContent_MakeAvailable(highway)
FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c) FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
FetchContent_MakeAvailable(sentencepiece) FetchContent_MakeAvailable(sentencepiece)

View File

@ -345,7 +345,7 @@ void GemmaAttention(size_t num_tokens, const size_t layer_idx,
AttentionActivations& activations, QBatch& qbatch, AttentionActivations& activations, QBatch& qbatch,
MatMulEnv& env, int flags) { MatMulEnv& env, int flags) {
static const auto zone = static const auto zone =
env.ctx.profiler.AddZone("Gen.Attention", ProfilerFlags::kInclusive); env.ctx.profiler.AddZone("Gen.Attention", hwy::ProfilerFlags::kInclusive);
PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone); PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone);
const LayerConfig& layer_config = layer.layer_config; const LayerConfig& layer_config = layer.layer_config;

View File

@ -114,7 +114,7 @@ void PostNorm(PostNormType post_norm, const MatPtr& weights,
static inline void FFWNoVit(const LayerWeightsPtrs& layer, static inline void FFWNoVit(const LayerWeightsPtrs& layer,
Activations& activations, MatMulEnv& env) { Activations& activations, MatMulEnv& env) {
static const auto zone = static const auto zone =
env.ctx.profiler.AddZone("Gen.FFW", ProfilerFlags::kInclusive); env.ctx.profiler.AddZone("Gen.FFW", hwy::ProfilerFlags::kInclusive);
PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone); PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone);
const LayerConfig& layer_config = layer.layer_config; const LayerConfig& layer_config = layer.layer_config;
const size_t ffh_hidden_dim = layer_config.ff_hidden_dim; const size_t ffh_hidden_dim = layer_config.ff_hidden_dim;

View File

@ -912,7 +912,7 @@ class MMPerPackage {
// Single M and K ranges, parallel N. Fills all of C directly. // Single M and K ranges, parallel N. Fills all of C directly.
template <typename TB, typename TC> template <typename TB, typename TC>
HWY_INLINE void DoNT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const { HWY_INLINE void DoNT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
static const auto zone = PROFILER_ADD_ZONE("MM.NT"); static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT");
HWY_DASSERT(ranges_mc_.NumTasks() == 1); HWY_DASSERT(ranges_mc_.NumTasks() == 1);
HWY_DASSERT(ranges_kc_.NumTasks() == 1); HWY_DASSERT(ranges_kc_.NumTasks() == 1);
const IndexRange& range_M = ranges_mc_.Range(0); const IndexRange& range_M = ranges_mc_.Range(0);
@ -947,7 +947,7 @@ class MMPerPackage {
// Single M range, parallel N, sequential K. Fills all of partial. // Single M range, parallel N, sequential K. Fills all of partial.
template <typename TB, typename TC> template <typename TB, typename TC>
HWY_INLINE void DoNT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const { HWY_INLINE void DoNT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K"); static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_K");
HWY_DASSERT(ranges_mc_.NumTasks() == 1); HWY_DASSERT(ranges_mc_.NumTasks() == 1);
const IndexRange& range_mc = ranges_mc_.Range(0); const IndexRange& range_mc = ranges_mc_.Range(0);
@ -991,12 +991,14 @@ class MMPerPackage {
}); });
if (out_ == MMOut::kCopy) { if (out_ == MMOut::kCopy) {
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.Copy"); static const auto zone =
args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.Copy");
MMZone fill_zone; MMZone fill_zone;
fill_zone.MaybeEnter(0, zone, args_); fill_zone.MaybeEnter(0, zone, args_);
MMScaleDemoteAdd::FillC(range_mc, range_np_, args_, C_rows); MMScaleDemoteAdd::FillC(range_mc, range_np_, args_, C_rows);
} else if (out_ == MMOut::kParM) { } else if (out_ == MMOut::kParM) {
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.ParM"); static const auto zone =
args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.ParM");
args_.env->parallel.ForRangeMC( args_.env->parallel.ForRangeMC(
range_mc, pkg_idx_, [&](size_t row_a, size_t worker) HWY_ATTR { range_mc, pkg_idx_, [&](size_t row_a, size_t worker) HWY_ATTR {
MMZone fill_zone; MMZone fill_zone;
@ -1013,7 +1015,7 @@ class MMPerPackage {
// Fills `mc x nc` sections of C directly, in parallel. // Fills `mc x nc` sections of C directly, in parallel.
template <typename TB, typename TC> template <typename TB, typename TC>
HWY_INLINE void DoNT_MT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const { HWY_INLINE void DoNT_MT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT"); static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT");
HWY_DASSERT(ranges_kc_.NumTasks() == 1); HWY_DASSERT(ranges_kc_.NumTasks() == 1);
const IndexRange& range_K = ranges_kc_.Range(0); const IndexRange& range_K = ranges_kc_.Range(0);
const size_t K = range_K.Num(); const size_t K = range_K.Num();
@ -1049,8 +1051,9 @@ class MMPerPackage {
// Fills `mc x nc` sections of `partial`, then `C`, in parallel. // Fills `mc x nc` sections of `partial`, then `C`, in parallel.
template <typename TB, typename TC> template <typename TB, typename TC>
HWY_INLINE void DoNT_MT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const { HWY_INLINE void DoNT_MT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT_K"); static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT_K");
static const auto fill_zone = PROFILER_ADD_ZONE("MM.NT_MT_K.FillC"); static const auto fill_zone =
args_.env->ctx.profiler.AddZone("MM.NT_MT_K.FillC");
const size_t kc_max = ranges_kc_.TaskSize(); const size_t kc_max = ranges_kc_.TaskSize();
HWY_DASSERT(kc_max <= MMStorage::kMaxKC); HWY_DASSERT(kc_max <= MMStorage::kMaxKC);
const size_t B_stride = const size_t B_stride =
@ -1116,7 +1119,7 @@ class MMPerPackage {
const size_t NBF = hn::Lanes(dbf); const size_t NBF = hn::Lanes(dbf);
static_assert(hwy::IsSameEither<TA, BF16, float>(), "Can seek"); static_assert(hwy::IsSameEither<TA, BF16, float>(), "Can seek");
static const auto zone = PROFILER_ADD_ZONE("MM.DecompressA"); static const auto zone = args_.env->ctx.profiler.AddZone("MM.DecompressA");
const auto do_range = [&](const IndexRange& range_M, const auto do_range = [&](const IndexRange& range_M,
const IndexRange& range_K, const IndexRange& range_K,
@ -1280,7 +1283,8 @@ struct MMImpl {
RowPtrs<TC> C_rows, const MMArgs& args, RowPtrs<TC> C_rows, const MMArgs& args,
const MMConfig& config) { const MMConfig& config) {
PROFILER_ZONE("MM.DoMatMul"); PROFILER_ZONE("MM.DoMatMul");
static const auto zone = PROFILER_ADD_ZONE("MM.DoMatMul.PerPkg"); static const auto zone =
args.env->ctx.profiler.AddZone("MM.DoMatMul.PerPkg");
if constexpr (kMaxPackages > 1) { if constexpr (kMaxPackages > 1) {
// Outermost loop: static NUMA-aware partition of B rows across packages. // Outermost loop: static NUMA-aware partition of B rows across packages.