mirror of https://github.com/google/gemma.cpp.git
More profiler interface fixes: hwy:: plus avoid ADD_ZONE
PiperOrigin-RevId: 794493165
This commit is contained in:
parent
faa4102992
commit
71406cf6d0
|
|
@ -22,7 +22,7 @@ set(CMAKE_CXX_STANDARD 17)
|
|||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64 EXCLUDE_FROM_ALL)
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300 EXCLUDE_FROM_ALL)
|
||||
FetchContent_MakeAvailable(highway)
|
||||
|
||||
## Note: absl needs to be installed by sentencepiece. This will only happen if
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ bazel_dep(name = "google_benchmark", version = "1.8.5")
|
|||
# Require a more recent version.
|
||||
git_override(
|
||||
module_name = "highway",
|
||||
commit = "92d327e841d78e11ae888757a3e16d291951cf64",
|
||||
commit = "1d16731233de45a365b43867f27d0a5f73925300",
|
||||
remote = "https://github.com/google/highway",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17)
|
|||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64)
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300)
|
||||
FetchContent_MakeAvailable(highway)
|
||||
FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 9045b2f60fa2b323dfac0eaef8fc17565036f9f9)
|
||||
FetchContent_MakeAvailable(sentencepiece)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ set(CMAKE_CXX_STANDARD 17)
|
|||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 92d327e841d78e11ae888757a3e16d291951cf64)
|
||||
FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 1d16731233de45a365b43867f27d0a5f73925300)
|
||||
FetchContent_MakeAvailable(highway)
|
||||
FetchContent_Declare(sentencepiece GIT_REPOSITORY https://github.com/google/sentencepiece GIT_TAG 53de76561cfc149d3c01037f0595669ad32a5e7c)
|
||||
FetchContent_MakeAvailable(sentencepiece)
|
||||
|
|
|
|||
|
|
@ -345,7 +345,7 @@ void GemmaAttention(size_t num_tokens, const size_t layer_idx,
|
|||
AttentionActivations& activations, QBatch& qbatch,
|
||||
MatMulEnv& env, int flags) {
|
||||
static const auto zone =
|
||||
env.ctx.profiler.AddZone("Gen.Attention", ProfilerFlags::kInclusive);
|
||||
env.ctx.profiler.AddZone("Gen.Attention", hwy::ProfilerFlags::kInclusive);
|
||||
PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone);
|
||||
|
||||
const LayerConfig& layer_config = layer.layer_config;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ void PostNorm(PostNormType post_norm, const MatPtr& weights,
|
|||
static inline void FFWNoVit(const LayerWeightsPtrs& layer,
|
||||
Activations& activations, MatMulEnv& env) {
|
||||
static const auto zone =
|
||||
env.ctx.profiler.AddZone("Gen.FFW", ProfilerFlags::kInclusive);
|
||||
env.ctx.profiler.AddZone("Gen.FFW", hwy::ProfilerFlags::kInclusive);
|
||||
PROFILER_ZONE3(env.ctx.profiler, hwy::Profiler::Thread(), zone);
|
||||
const LayerConfig& layer_config = layer.layer_config;
|
||||
const size_t ffh_hidden_dim = layer_config.ff_hidden_dim;
|
||||
|
|
|
|||
|
|
@ -912,7 +912,7 @@ class MMPerPackage {
|
|||
// Single M and K ranges, parallel N. Fills all of C directly.
|
||||
template <typename TB, typename TC>
|
||||
HWY_INLINE void DoNT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT");
|
||||
static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT");
|
||||
HWY_DASSERT(ranges_mc_.NumTasks() == 1);
|
||||
HWY_DASSERT(ranges_kc_.NumTasks() == 1);
|
||||
const IndexRange& range_M = ranges_mc_.Range(0);
|
||||
|
|
@ -947,7 +947,7 @@ class MMPerPackage {
|
|||
// Single M range, parallel N, sequential K. Fills all of partial.
|
||||
template <typename TB, typename TC>
|
||||
HWY_INLINE void DoNT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K");
|
||||
static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_K");
|
||||
HWY_DASSERT(ranges_mc_.NumTasks() == 1);
|
||||
const IndexRange& range_mc = ranges_mc_.Range(0);
|
||||
|
||||
|
|
@ -991,12 +991,14 @@ class MMPerPackage {
|
|||
});
|
||||
|
||||
if (out_ == MMOut::kCopy) {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.Copy");
|
||||
static const auto zone =
|
||||
args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.Copy");
|
||||
MMZone fill_zone;
|
||||
fill_zone.MaybeEnter(0, zone, args_);
|
||||
MMScaleDemoteAdd::FillC(range_mc, range_np_, args_, C_rows);
|
||||
} else if (out_ == MMOut::kParM) {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT_K.FillC.ParM");
|
||||
static const auto zone =
|
||||
args_.env->ctx.profiler.AddZone("MM.NT_K.FillC.ParM");
|
||||
args_.env->parallel.ForRangeMC(
|
||||
range_mc, pkg_idx_, [&](size_t row_a, size_t worker) HWY_ATTR {
|
||||
MMZone fill_zone;
|
||||
|
|
@ -1013,7 +1015,7 @@ class MMPerPackage {
|
|||
// Fills `mc x nc` sections of C directly, in parallel.
|
||||
template <typename TB, typename TC>
|
||||
HWY_INLINE void DoNT_MT(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT");
|
||||
static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT");
|
||||
HWY_DASSERT(ranges_kc_.NumTasks() == 1);
|
||||
const IndexRange& range_K = ranges_kc_.Range(0);
|
||||
const size_t K = range_K.Num();
|
||||
|
|
@ -1049,8 +1051,9 @@ class MMPerPackage {
|
|||
// Fills `mc x nc` sections of `partial`, then `C`, in parallel.
|
||||
template <typename TB, typename TC>
|
||||
HWY_INLINE void DoNT_MT_K(const MatPtrT<TB>& B, RowPtrs<TC> C_rows) const {
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.NT_MT_K");
|
||||
static const auto fill_zone = PROFILER_ADD_ZONE("MM.NT_MT_K.FillC");
|
||||
static const auto zone = args_.env->ctx.profiler.AddZone("MM.NT_MT_K");
|
||||
static const auto fill_zone =
|
||||
args_.env->ctx.profiler.AddZone("MM.NT_MT_K.FillC");
|
||||
const size_t kc_max = ranges_kc_.TaskSize();
|
||||
HWY_DASSERT(kc_max <= MMStorage::kMaxKC);
|
||||
const size_t B_stride =
|
||||
|
|
@ -1116,7 +1119,7 @@ class MMPerPackage {
|
|||
const size_t NBF = hn::Lanes(dbf);
|
||||
static_assert(hwy::IsSameEither<TA, BF16, float>(), "Can seek");
|
||||
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.DecompressA");
|
||||
static const auto zone = args_.env->ctx.profiler.AddZone("MM.DecompressA");
|
||||
|
||||
const auto do_range = [&](const IndexRange& range_M,
|
||||
const IndexRange& range_K,
|
||||
|
|
@ -1280,7 +1283,8 @@ struct MMImpl {
|
|||
RowPtrs<TC> C_rows, const MMArgs& args,
|
||||
const MMConfig& config) {
|
||||
PROFILER_ZONE("MM.DoMatMul");
|
||||
static const auto zone = PROFILER_ADD_ZONE("MM.DoMatMul.PerPkg");
|
||||
static const auto zone =
|
||||
args.env->ctx.profiler.AddZone("MM.DoMatMul.PerPkg");
|
||||
|
||||
if constexpr (kMaxPackages > 1) {
|
||||
// Outermost loop: static NUMA-aware partition of B rows across packages.
|
||||
|
|
|
|||
Loading…
Reference in New Issue