mirror of https://github.com/google/gemma.cpp.git
Fix empty enabled_lps in topology detection
Also expand the debug output. PiperOrigin-RevId: 838832605
This commit is contained in:
parent
6e5e4123f1
commit
1564dd3111
|
|
@ -27,6 +27,7 @@ namespace gcpp {
|
||||||
// Returns set of LPs available for use.
|
// Returns set of LPs available for use.
|
||||||
static LPS EnabledLPs(const BoundedSlice& lp_slice) {
|
static LPS EnabledLPs(const BoundedSlice& lp_slice) {
|
||||||
LPS enabled_lps;
|
LPS enabled_lps;
|
||||||
|
const size_t num_lps = hwy::TotalLogicalProcessors();
|
||||||
|
|
||||||
// Thread-safe caching during the first call because subsequent pinning
|
// Thread-safe caching during the first call because subsequent pinning
|
||||||
// overwrites the main thread's affinity.
|
// overwrites the main thread's affinity.
|
||||||
|
|
@ -35,6 +36,7 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
|
||||||
if (!GetThreadAffinity(affinity)) affinity = LPS();
|
if (!GetThreadAffinity(affinity)) affinity = LPS();
|
||||||
return affinity;
|
return affinity;
|
||||||
}();
|
}();
|
||||||
|
|
||||||
if (HWY_LIKELY(affinity.Any())) {
|
if (HWY_LIKELY(affinity.Any())) {
|
||||||
// To honor taskset/numactl *and* the users's `lp_slice`, we interpret
|
// To honor taskset/numactl *and* the users's `lp_slice`, we interpret
|
||||||
// the latter as a slice of the 1-bits of `enabled_lps`. Note that this
|
// the latter as a slice of the 1-bits of `enabled_lps`. Note that this
|
||||||
|
|
@ -48,18 +50,32 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
|
||||||
}
|
}
|
||||||
++enabled_idx;
|
++enabled_idx;
|
||||||
});
|
});
|
||||||
} else {
|
}
|
||||||
const size_t num_lps = hwy::TotalLogicalProcessors();
|
|
||||||
|
if (HWY_UNLIKELY(!enabled_lps.Any())) {
|
||||||
|
// First warn: either about unknown affinity, or no overlap with `lp_slice`.
|
||||||
|
if (!affinity.Any()) {
|
||||||
// Do not warn on Apple, where affinity is not supported.
|
// Do not warn on Apple, where affinity is not supported.
|
||||||
if (!HWY_OS_APPLE) {
|
if (!HWY_OS_APPLE) {
|
||||||
HWY_WARN("unknown OS affinity, max %zu LPs and slice %zu.", num_lps,
|
HWY_WARN("unknown OS affinity, max %zu LPs and slice %zu.", num_lps,
|
||||||
lp_slice.Num(num_lps));
|
lp_slice.Num(num_lps));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
HWY_WARN("LP slice [%zu, %zu) of initial affinity %zu is empty.",
|
||||||
|
lp_slice.Begin(), lp_slice.End(num_lps), affinity.Count());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set `enabled_lps` based only on `lp_slice` and total logical processors.
|
||||||
for (size_t lp = 0; lp < num_lps; ++lp) {
|
for (size_t lp = 0; lp < num_lps; ++lp) {
|
||||||
if (lp_slice.Contains(num_lps, lp)) {
|
if (lp_slice.Contains(num_lps, lp)) {
|
||||||
enabled_lps.Set(lp);
|
enabled_lps.Set(lp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!enabled_lps.Any()) {
|
||||||
|
HWY_WARN("no enabled LPs of total %zu, slice [%zu, %zu).", num_lps,
|
||||||
|
lp_slice.Begin(), lp_slice.End(affinity.Count()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Without threading support, only keep the first enabled LP; it might still
|
// Without threading support, only keep the first enabled LP; it might still
|
||||||
|
|
@ -72,6 +88,7 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
|
||||||
HWY_WARN("Warning, threads not supported, using only the main thread.");
|
HWY_WARN("Warning, threads not supported, using only the main thread.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWY_ASSERT(enabled_lps.Any());
|
||||||
return enabled_lps;
|
return enabled_lps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -225,11 +242,12 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
|
||||||
});
|
});
|
||||||
if (HWY_UNLIKELY(clusters_.empty())) {
|
if (HWY_UNLIKELY(clusters_.empty())) {
|
||||||
HWY_WARN(
|
HWY_WARN(
|
||||||
"cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu does not"
|
"cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu, "
|
||||||
"overlap with enabled_lps 0x%zx; #LPs: %zu",
|
"#LPs: %zu does not overlap with %zu enabled LPs: ",
|
||||||
cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
|
cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
|
||||||
tclusters.size(), max_tcluster_cores, max_tcluster_lps,
|
tclusters.size(), max_tcluster_cores, max_tcluster_lps,
|
||||||
static_cast<size_t>(enabled_lps.Get64()), topology_.lps.size());
|
topology_.lps.size(), enabled_lps.Count());
|
||||||
|
enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue