Merge 2b9245ad93 into b510ba2ab2

2025-12-08 14:45:36 +00:00 · 2025-12-08 14:45:36 +00:00 · 60b23bcc9e
parent b510ba2ab2 2b9245ad93
commit 60b23bcc9e
3 changed files with 73 additions and 45 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -66,6 +66,7 @@ cc_library(
    srcs = ["util/topology.cc"],
    hdrs = ["util/topology.h"],
    deps = [
        "@highway//:bit_set",
        "@highway//:hwy",
        "@highway//:topology",
    ],
--- a/util/topology.cc
+++ b/util/topology.cc
@ -21,6 +21,7 @@
 #include <vector>
 #include "hwy/base.h"
 #include "hwy/bit_set.h"
 namespace gcpp {
@ -173,12 +174,13 @@ constexpr size_t kMaxLPsPerCluster = 6;
 #if !GEMMA_DISABLE_TOPOLOGY
-static size_t CoresFromLPs(const LPS& lps, const hwy::Topology& topology) {
+// Returns number of distinct SMT (hyperthreads).
-  LPS cores;
+static size_t NumSMT(const hwy::Topology& topology) {
-  lps.Foreach([&](size_t lp) {
+  hwy::BitSet64 smt;
-    if (topology.lps[lp].smt == 0) cores.Set(lp);
+  for (const hwy::Topology::LP& lp : topology.lps) {
-  });
+    smt.Set(lp.smt);
-  return cores.Count();
+  }
  return smt.Count();
 }
 // tcluster is a modifiable copy of the first cluster in the package.
@ -204,34 +206,66 @@ void BoundedTopology::SplitLargeCluster(const LPS& enabled_lps,
  }
 }
-// Main part of ctor, called when topology is known.
+using TClusters = std::vector<hwy::Topology::Cluster>;
-bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
+
-  const size_t tpkg_idx = package_slice_.Begin();
+// Returns false if no cluster in `tclusters` has any enabled LPs.
-  HWY_ASSERT(tpkg_idx < topology_.packages.size());
+static bool AnyEnabledLPs(const TClusters& tclusters, const LPS& enabled_lps) {
  const hwy::Topology::Package& tpackage = topology_.packages[tpkg_idx];
  const std::vector<hwy::Topology::Cluster>& tclusters = tpackage.clusters;
  if (HWY_UNLIKELY(tclusters.empty())) {
-    HWY_WARN("Topology: no clusters found in package %zu.", tpkg_idx);
+    HWY_WARN("Topology: no clusters found.");
    return false;
  }
  size_t max_tcluster_cores = 0;
  size_t max_tcluster_lps = 0;
  for (const hwy::Topology::Cluster& tcluster : tclusters) {
-    const size_t cores = CoresFromLPs(tcluster.lps, topology_);
+    bool any_lp_enabled = false;
-    const size_t lps = tcluster.lps.Count();
+    tcluster.lps.Foreach(
-    max_tcluster_cores = HWY_MAX(max_tcluster_cores, cores);
+        [&](size_t lp) { any_lp_enabled |= (enabled_lps.Get(lp)); });
-    max_tcluster_lps = HWY_MAX(max_tcluster_lps, lps);
+    if (any_lp_enabled) return true;
  }
-  HWY_ASSERT(max_tcluster_cores != 0);
+
-  HWY_ASSERT(max_tcluster_lps >= max_tcluster_cores);
+  // No warning: this can happen if OS affinity limits us to the second package.
  return false;
 }
 // Returns nullptr on failure. Also attempts `1 - tpkg_idx`, which is suitable
 // for the common case of up to two packages.
 static const TClusters* GetPackageClusters(const hwy::Topology& topology,
                                           size_t tpkg_idx,
                                           const LPS& enabled_lps) {
  const size_t num_packages = topology.packages.size();
  HWY_ASSERT(tpkg_idx < num_packages);
  {
    const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
    if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
  }
  // Retry with the other package, if any.
  tpkg_idx ^= 1;
  if (tpkg_idx == num_packages) return nullptr;
  {
    const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
    if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
  }
  HWY_WARN(
      "Ignoring topology (%zu tpackages) because no clusters overlap with the "
      "OS affinity (%zu enabled LPs): ",
      num_packages, enabled_lps.Count());
  enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
  return nullptr;
 }
 // Main part of ctor, called when topology is known.
 bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
  const TClusters* maybe_tclusters =
      GetPackageClusters(topology_, package_slice_.Begin(), enabled_lps);
  if (!maybe_tclusters) return false;
  const TClusters& tclusters = *maybe_tclusters;
  // Populate `clusters` with the subset of clusters in `cluster_slice` that
  // have any enabled LPs.
  clusters_.reserve(cluster_slice_.Num(tclusters.size()));
  cluster_slice_.Foreach("cluster", tclusters.size(), [&](size_t cluster_idx) {
-    const hwy::Topology::Cluster& tcluster = tpackage.clusters[cluster_idx];
+    Cluster cluster(enabled_lps, topology_.lps, tclusters[cluster_idx]);
    Cluster cluster(enabled_lps, topology_.lps, tcluster);
    // Skip if empty, i.e. too few `enabled_lps`.
    if (HWY_LIKELY(cluster.NumWorkers() != 0)) {
@ -240,20 +274,10 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
      nodes_.Set(cluster.Node());
    }
  });
  if (HWY_UNLIKELY(clusters_.empty())) {
    HWY_WARN(
        "cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu, "
        "#LPs: %zu does not overlap with %zu enabled LPs: ",
        cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
        tclusters.size(), max_tcluster_cores, max_tcluster_lps,
        topology_.lps.size(), enabled_lps.Count());
    enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
    return false;
  }
  if (kSplitLargeClusters && clusters_.size() == 1 &&
      enabled_lps.Count() >= 16) {
-    SplitLargeCluster(enabled_lps, tpackage.clusters[0]);
+    SplitLargeCluster(enabled_lps, tclusters[0]);
  }
  // Sort by descending 'size' so that users who only use one get the largest.
@ -262,20 +286,23 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
              return a.NumWorkers() > b.NumWorkers();
            });
-  // Largest number of enabled workers in any cluster, for `topology_string_`.
+  // Happens if all LPs are HTs (we checked that at least some LPs are enabled).
-  // This may be less than `max_tcluster_cores` if `enabled_lps` excludes some.
+  if (HWY_UNLIKELY(clusters_.empty())) {
-  size_t max_cluster_workers = 0;
+    HWY_WARN(
-  for (const Cluster& c : clusters_) {
+        "Ignoring topology - no usable clusters. cluster_slice [%zu, %zu), "
-    max_cluster_workers = HWY_MAX(max_cluster_workers, c.NumWorkers());
+        "%zu tclusters, %zu tLPs, %zu enabled LPs: ",
        cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
        tclusters.size(), topology_.lps.size(), enabled_lps.Count());
    enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
    return false;
  }
  HWY_ASSERT(max_cluster_workers <= max_tcluster_cores);
  // Do not warn about large clusters: GNR has 40.
  const size_t num_smt = NumSMT(topology_);
  snprintf(topology_string_, sizeof(topology_string_),
           "%zuS %zuX %zuC %zuH, using %zuX %zuC (nodes=%zu)",
-           topology_.packages.size(), tclusters.size(), max_tcluster_cores,
+           topology_.packages.size(), tclusters.size(),
-           max_tcluster_lps / max_tcluster_cores, NumClusters(),
+           tclusters[0].lps.Count() / num_smt, num_smt, NumClusters(),
-           max_cluster_workers, nodes_.Count());
+           clusters_[0].NumWorkers(), nodes_.Count());
  return true;
 }
--- a/util/topology.h
+++ b/util/topology.h
@ -93,7 +93,7 @@ class BoundedTopology {
  class Cluster {
   public:
-    Cluster(const LPS& lps);
+    explicit Cluster(const LPS& lps);
    Cluster(const LPS& enabled_lps,
            const std::vector<hwy::Topology::LP>& all_lps,
            const hwy::Topology::Cluster& tcluster);