This commit is contained in:
copybara-service[bot] 2025-12-08 14:45:36 +00:00 committed by GitHub
commit 60b23bcc9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 73 additions and 45 deletions

View File

@ -66,6 +66,7 @@ cc_library(
srcs = ["util/topology.cc"], srcs = ["util/topology.cc"],
hdrs = ["util/topology.h"], hdrs = ["util/topology.h"],
deps = [ deps = [
"@highway//:bit_set",
"@highway//:hwy", "@highway//:hwy",
"@highway//:topology", "@highway//:topology",
], ],

View File

@ -21,6 +21,7 @@
#include <vector> #include <vector>
#include "hwy/base.h" #include "hwy/base.h"
#include "hwy/bit_set.h"
namespace gcpp { namespace gcpp {
@ -173,12 +174,13 @@ constexpr size_t kMaxLPsPerCluster = 6;
#if !GEMMA_DISABLE_TOPOLOGY #if !GEMMA_DISABLE_TOPOLOGY
static size_t CoresFromLPs(const LPS& lps, const hwy::Topology& topology) { // Returns number of distinct SMT (hyperthreads).
LPS cores; static size_t NumSMT(const hwy::Topology& topology) {
lps.Foreach([&](size_t lp) { hwy::BitSet64 smt;
if (topology.lps[lp].smt == 0) cores.Set(lp); for (const hwy::Topology::LP& lp : topology.lps) {
}); smt.Set(lp.smt);
return cores.Count(); }
return smt.Count();
} }
// tcluster is a modifiable copy of the first cluster in the package. // tcluster is a modifiable copy of the first cluster in the package.
@ -204,34 +206,66 @@ void BoundedTopology::SplitLargeCluster(const LPS& enabled_lps,
} }
} }
// Main part of ctor, called when topology is known. using TClusters = std::vector<hwy::Topology::Cluster>;
bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
const size_t tpkg_idx = package_slice_.Begin(); // Returns false if no cluster in `tclusters` has any enabled LPs.
HWY_ASSERT(tpkg_idx < topology_.packages.size()); static bool AnyEnabledLPs(const TClusters& tclusters, const LPS& enabled_lps) {
const hwy::Topology::Package& tpackage = topology_.packages[tpkg_idx];
const std::vector<hwy::Topology::Cluster>& tclusters = tpackage.clusters;
if (HWY_UNLIKELY(tclusters.empty())) { if (HWY_UNLIKELY(tclusters.empty())) {
HWY_WARN("Topology: no clusters found in package %zu.", tpkg_idx); HWY_WARN("Topology: no clusters found.");
return false; return false;
} }
size_t max_tcluster_cores = 0;
size_t max_tcluster_lps = 0;
for (const hwy::Topology::Cluster& tcluster : tclusters) { for (const hwy::Topology::Cluster& tcluster : tclusters) {
const size_t cores = CoresFromLPs(tcluster.lps, topology_); bool any_lp_enabled = false;
const size_t lps = tcluster.lps.Count(); tcluster.lps.Foreach(
max_tcluster_cores = HWY_MAX(max_tcluster_cores, cores); [&](size_t lp) { any_lp_enabled |= (enabled_lps.Get(lp)); });
max_tcluster_lps = HWY_MAX(max_tcluster_lps, lps); if (any_lp_enabled) return true;
} }
HWY_ASSERT(max_tcluster_cores != 0);
HWY_ASSERT(max_tcluster_lps >= max_tcluster_cores); // No warning: this can happen if OS affinity limits us to the second package.
return false;
}
// Returns nullptr on failure. Also attempts `1 - tpkg_idx`, which is suitable
// for the common case of up to two packages.
static const TClusters* GetPackageClusters(const hwy::Topology& topology,
size_t tpkg_idx,
const LPS& enabled_lps) {
const size_t num_packages = topology.packages.size();
HWY_ASSERT(tpkg_idx < num_packages);
{
const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
}
// Retry with the other package, if any.
tpkg_idx ^= 1;
if (tpkg_idx == num_packages) return nullptr;
{
const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
}
HWY_WARN(
"Ignoring topology (%zu tpackages) because no clusters overlap with the "
"OS affinity (%zu enabled LPs): ",
num_packages, enabled_lps.Count());
enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
return nullptr;
}
// Main part of ctor, called when topology is known.
bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
const TClusters* maybe_tclusters =
GetPackageClusters(topology_, package_slice_.Begin(), enabled_lps);
if (!maybe_tclusters) return false;
const TClusters& tclusters = *maybe_tclusters;
// Populate `clusters` with the subset of clusters in `cluster_slice` that // Populate `clusters` with the subset of clusters in `cluster_slice` that
// have any enabled LPs. // have any enabled LPs.
clusters_.reserve(cluster_slice_.Num(tclusters.size())); clusters_.reserve(cluster_slice_.Num(tclusters.size()));
cluster_slice_.Foreach("cluster", tclusters.size(), [&](size_t cluster_idx) { cluster_slice_.Foreach("cluster", tclusters.size(), [&](size_t cluster_idx) {
const hwy::Topology::Cluster& tcluster = tpackage.clusters[cluster_idx]; Cluster cluster(enabled_lps, topology_.lps, tclusters[cluster_idx]);
Cluster cluster(enabled_lps, topology_.lps, tcluster);
// Skip if empty, i.e. too few `enabled_lps`. // Skip if empty, i.e. too few `enabled_lps`.
if (HWY_LIKELY(cluster.NumWorkers() != 0)) { if (HWY_LIKELY(cluster.NumWorkers() != 0)) {
@ -240,20 +274,10 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
nodes_.Set(cluster.Node()); nodes_.Set(cluster.Node());
} }
}); });
if (HWY_UNLIKELY(clusters_.empty())) {
HWY_WARN(
"cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu, "
"#LPs: %zu does not overlap with %zu enabled LPs: ",
cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
tclusters.size(), max_tcluster_cores, max_tcluster_lps,
topology_.lps.size(), enabled_lps.Count());
enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
return false;
}
if (kSplitLargeClusters && clusters_.size() == 1 && if (kSplitLargeClusters && clusters_.size() == 1 &&
enabled_lps.Count() >= 16) { enabled_lps.Count() >= 16) {
SplitLargeCluster(enabled_lps, tpackage.clusters[0]); SplitLargeCluster(enabled_lps, tclusters[0]);
} }
// Sort by descending 'size' so that users who only use one get the largest. // Sort by descending 'size' so that users who only use one get the largest.
@ -262,20 +286,23 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
return a.NumWorkers() > b.NumWorkers(); return a.NumWorkers() > b.NumWorkers();
}); });
// Largest number of enabled workers in any cluster, for `topology_string_`. // Happens if all LPs are HTs (we checked that at least some LPs are enabled).
// This may be less than `max_tcluster_cores` if `enabled_lps` excludes some. if (HWY_UNLIKELY(clusters_.empty())) {
size_t max_cluster_workers = 0; HWY_WARN(
for (const Cluster& c : clusters_) { "Ignoring topology - no usable clusters. cluster_slice [%zu, %zu), "
max_cluster_workers = HWY_MAX(max_cluster_workers, c.NumWorkers()); "%zu tclusters, %zu tLPs, %zu enabled LPs: ",
cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
tclusters.size(), topology_.lps.size(), enabled_lps.Count());
enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
return false;
} }
HWY_ASSERT(max_cluster_workers <= max_tcluster_cores);
// Do not warn about large clusters: GNR has 40.
const size_t num_smt = NumSMT(topology_);
snprintf(topology_string_, sizeof(topology_string_), snprintf(topology_string_, sizeof(topology_string_),
"%zuS %zuX %zuC %zuH, using %zuX %zuC (nodes=%zu)", "%zuS %zuX %zuC %zuH, using %zuX %zuC (nodes=%zu)",
topology_.packages.size(), tclusters.size(), max_tcluster_cores, topology_.packages.size(), tclusters.size(),
max_tcluster_lps / max_tcluster_cores, NumClusters(), tclusters[0].lps.Count() / num_smt, num_smt, NumClusters(),
max_cluster_workers, nodes_.Count()); clusters_[0].NumWorkers(), nodes_.Count());
return true; return true;
} }

View File

@ -93,7 +93,7 @@ class BoundedTopology {
class Cluster { class Cluster {
public: public:
Cluster(const LPS& lps); explicit Cluster(const LPS& lps);
Cluster(const LPS& enabled_lps, Cluster(const LPS& enabled_lps,
const std::vector<hwy::Topology::LP>& all_lps, const std::vector<hwy::Topology::LP>& all_lps,
const hwy::Topology::Cluster& tcluster); const hwy::Topology::Cluster& tcluster);