Fix empty enabled_lps in topology detection

Also expand the debug output.

PiperOrigin-RevId: 838832605
This commit is contained in:
Jan Wassenberg 2025-12-01 10:23:14 -08:00 committed by Copybara-Service
parent 6e5e4123f1
commit 1564dd3111
1 changed files with 27 additions and 9 deletions

View File

@ -27,6 +27,7 @@ namespace gcpp {
// Returns set of LPs available for use. // Returns set of LPs available for use.
static LPS EnabledLPs(const BoundedSlice& lp_slice) { static LPS EnabledLPs(const BoundedSlice& lp_slice) {
LPS enabled_lps; LPS enabled_lps;
const size_t num_lps = hwy::TotalLogicalProcessors();
// Thread-safe caching during the first call because subsequent pinning // Thread-safe caching during the first call because subsequent pinning
// overwrites the main thread's affinity. // overwrites the main thread's affinity.
@ -35,6 +36,7 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
if (!GetThreadAffinity(affinity)) affinity = LPS(); if (!GetThreadAffinity(affinity)) affinity = LPS();
return affinity; return affinity;
}(); }();
if (HWY_LIKELY(affinity.Any())) { if (HWY_LIKELY(affinity.Any())) {
// To honor taskset/numactl *and* the users's `lp_slice`, we interpret // To honor taskset/numactl *and* the users's `lp_slice`, we interpret
// the latter as a slice of the 1-bits of `enabled_lps`. Note that this // the latter as a slice of the 1-bits of `enabled_lps`. Note that this
@ -48,18 +50,32 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
} }
++enabled_idx; ++enabled_idx;
}); });
} else { }
const size_t num_lps = hwy::TotalLogicalProcessors();
if (HWY_UNLIKELY(!enabled_lps.Any())) {
// First warn: either about unknown affinity, or no overlap with `lp_slice`.
if (!affinity.Any()) {
// Do not warn on Apple, where affinity is not supported. // Do not warn on Apple, where affinity is not supported.
if (!HWY_OS_APPLE) { if (!HWY_OS_APPLE) {
HWY_WARN("unknown OS affinity, max %zu LPs and slice %zu.", num_lps, HWY_WARN("unknown OS affinity, max %zu LPs and slice %zu.", num_lps,
lp_slice.Num(num_lps)); lp_slice.Num(num_lps));
} }
} else {
HWY_WARN("LP slice [%zu, %zu) of initial affinity %zu is empty.",
lp_slice.Begin(), lp_slice.End(num_lps), affinity.Count());
}
// Set `enabled_lps` based only on `lp_slice` and total logical processors.
for (size_t lp = 0; lp < num_lps; ++lp) { for (size_t lp = 0; lp < num_lps; ++lp) {
if (lp_slice.Contains(num_lps, lp)) { if (lp_slice.Contains(num_lps, lp)) {
enabled_lps.Set(lp); enabled_lps.Set(lp);
} }
} }
if (!enabled_lps.Any()) {
HWY_WARN("no enabled LPs of total %zu, slice [%zu, %zu).", num_lps,
lp_slice.Begin(), lp_slice.End(affinity.Count()));
}
} }
// Without threading support, only keep the first enabled LP; it might still // Without threading support, only keep the first enabled LP; it might still
@ -72,6 +88,7 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
HWY_WARN("Warning, threads not supported, using only the main thread."); HWY_WARN("Warning, threads not supported, using only the main thread.");
} }
HWY_ASSERT(enabled_lps.Any());
return enabled_lps; return enabled_lps;
} }
@ -225,11 +242,12 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
}); });
if (HWY_UNLIKELY(clusters_.empty())) { if (HWY_UNLIKELY(clusters_.empty())) {
HWY_WARN( HWY_WARN(
"cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu does not" "cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu, "
"overlap with enabled_lps 0x%zx; #LPs: %zu", "#LPs: %zu does not overlap with %zu enabled LPs: ",
cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()), cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
tclusters.size(), max_tcluster_cores, max_tcluster_lps, tclusters.size(), max_tcluster_cores, max_tcluster_lps,
static_cast<size_t>(enabled_lps.Get64()), topology_.lps.size()); topology_.lps.size(), enabled_lps.Count());
enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
return false; return false;
} }