Avoid warning when OS affinity limits us to the second socket

Also simplify NumSMT, detect from .smt field directly PiperOrigin-RevId: 841723362
2025-12-08 05:38:26 -08:00 · 2025-12-08 05:38:26 -08:00 · 2b9245ad93
parent b510ba2ab2
commit 2b9245ad93
3 changed files with 73 additions and 45 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -66,6 +66,7 @@ cc_library(
    srcs = ["util/topology.cc"],
    hdrs = ["util/topology.h"],
    deps = [
+        "@highway//:bit_set",
        "@highway//:hwy",
        "@highway//:topology",
    ],
--- a/util/topology.cc
+++ b/util/topology.cc
@ -21,6 +21,7 @@
 #include <vector>

 #include "hwy/base.h"
+#include "hwy/bit_set.h"

 namespace gcpp {

@ -173,12 +174,13 @@ constexpr size_t kMaxLPsPerCluster = 6;

 #if !GEMMA_DISABLE_TOPOLOGY

-static size_t CoresFromLPs(const LPS& lps, const hwy::Topology& topology) {
-  LPS cores;
-  lps.Foreach([&](size_t lp) {
-    if (topology.lps[lp].smt == 0) cores.Set(lp);
-  });
-  return cores.Count();
+// Returns number of distinct SMT (hyperthreads).
+static size_t NumSMT(const hwy::Topology& topology) {
+  hwy::BitSet64 smt;
+  for (const hwy::Topology::LP& lp : topology.lps) {
+    smt.Set(lp.smt);
+  }
+  return smt.Count();
 }

 // tcluster is a modifiable copy of the first cluster in the package.
@ -204,34 +206,66 @@ void BoundedTopology::SplitLargeCluster(const LPS& enabled_lps,
  }
 }

-// Main part of ctor, called when topology is known.
-bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
-  const size_t tpkg_idx = package_slice_.Begin();
-  HWY_ASSERT(tpkg_idx < topology_.packages.size());
-  const hwy::Topology::Package& tpackage = topology_.packages[tpkg_idx];
-  const std::vector<hwy::Topology::Cluster>& tclusters = tpackage.clusters;
+using TClusters = std::vector<hwy::Topology::Cluster>;
+
+// Returns false if no cluster in `tclusters` has any enabled LPs.
+static bool AnyEnabledLPs(const TClusters& tclusters, const LPS& enabled_lps) {
  if (HWY_UNLIKELY(tclusters.empty())) {
-    HWY_WARN("Topology: no clusters found in package %zu.", tpkg_idx);
+    HWY_WARN("Topology: no clusters found.");
    return false;
  }

-  size_t max_tcluster_cores = 0;
-  size_t max_tcluster_lps = 0;
  for (const hwy::Topology::Cluster& tcluster : tclusters) {
-    const size_t cores = CoresFromLPs(tcluster.lps, topology_);
-    const size_t lps = tcluster.lps.Count();
-    max_tcluster_cores = HWY_MAX(max_tcluster_cores, cores);
-    max_tcluster_lps = HWY_MAX(max_tcluster_lps, lps);
+    bool any_lp_enabled = false;
+    tcluster.lps.Foreach(
+        [&](size_t lp) { any_lp_enabled |= (enabled_lps.Get(lp)); });
+    if (any_lp_enabled) return true;
  }
-  HWY_ASSERT(max_tcluster_cores != 0);
-  HWY_ASSERT(max_tcluster_lps >= max_tcluster_cores);
+
+  // No warning: this can happen if OS affinity limits us to the second package.
+  return false;
+}
+
+// Returns nullptr on failure. Also attempts `1 - tpkg_idx`, which is suitable
+// for the common case of up to two packages.
+static const TClusters* GetPackageClusters(const hwy::Topology& topology,
+                                           size_t tpkg_idx,
+                                           const LPS& enabled_lps) {
+  const size_t num_packages = topology.packages.size();
+  HWY_ASSERT(tpkg_idx < num_packages);
+  {
+    const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
+    if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
+  }
+
+  // Retry with the other package, if any.
+  tpkg_idx ^= 1;
+  if (tpkg_idx == num_packages) return nullptr;
+  {
+    const TClusters& tclusters = topology.packages[tpkg_idx].clusters;
+    if (AnyEnabledLPs(tclusters, enabled_lps)) return &tclusters;
+  }
+
+  HWY_WARN(
+      "Ignoring topology (%zu tpackages) because no clusters overlap with the "
+      "OS affinity (%zu enabled LPs): ",
+      num_packages, enabled_lps.Count());
+  enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
+  return nullptr;
+}
+
+// Main part of ctor, called when topology is known.
+bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
+  const TClusters* maybe_tclusters =
+      GetPackageClusters(topology_, package_slice_.Begin(), enabled_lps);
+  if (!maybe_tclusters) return false;
+  const TClusters& tclusters = *maybe_tclusters;

  // Populate `clusters` with the subset of clusters in `cluster_slice` that
  // have any enabled LPs.
  clusters_.reserve(cluster_slice_.Num(tclusters.size()));
  cluster_slice_.Foreach("cluster", tclusters.size(), [&](size_t cluster_idx) {
-    const hwy::Topology::Cluster& tcluster = tpackage.clusters[cluster_idx];
-    Cluster cluster(enabled_lps, topology_.lps, tcluster);
+    Cluster cluster(enabled_lps, topology_.lps, tclusters[cluster_idx]);

    // Skip if empty, i.e. too few `enabled_lps`.
    if (HWY_LIKELY(cluster.NumWorkers() != 0)) {
@ -240,20 +274,10 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
      nodes_.Set(cluster.Node());
    }
  });
-  if (HWY_UNLIKELY(clusters_.empty())) {
-    HWY_WARN(
-        "cluster_slice [%zu, %zu), tclusters %zu, tcores %zu, tLPs %zu, "
-        "#LPs: %zu does not overlap with %zu enabled LPs: ",
-        cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
-        tclusters.size(), max_tcluster_cores, max_tcluster_lps,
-        topology_.lps.size(), enabled_lps.Count());
-    enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
-    return false;
-  }

  if (kSplitLargeClusters && clusters_.size() == 1 &&
      enabled_lps.Count() >= 16) {
-    SplitLargeCluster(enabled_lps, tpackage.clusters[0]);
+    SplitLargeCluster(enabled_lps, tclusters[0]);
  }

  // Sort by descending 'size' so that users who only use one get the largest.
@ -262,20 +286,23 @@ bool BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
              return a.NumWorkers() > b.NumWorkers();
            });

-  // Largest number of enabled workers in any cluster, for `topology_string_`.
-  // This may be less than `max_tcluster_cores` if `enabled_lps` excludes some.
-  size_t max_cluster_workers = 0;
-  for (const Cluster& c : clusters_) {
-    max_cluster_workers = HWY_MAX(max_cluster_workers, c.NumWorkers());
+  // Happens if all LPs are HTs (we checked that at least some LPs are enabled).
+  if (HWY_UNLIKELY(clusters_.empty())) {
+    HWY_WARN(
+        "Ignoring topology - no usable clusters. cluster_slice [%zu, %zu), "
+        "%zu tclusters, %zu tLPs, %zu enabled LPs: ",
+        cluster_slice_.Begin(), cluster_slice_.End(tclusters.size()),
+        tclusters.size(), topology_.lps.size(), enabled_lps.Count());
+    enabled_lps.Foreach([](size_t lp) { fprintf(stderr, "%zu, ", lp); });
+    return false;
  }
-  HWY_ASSERT(max_cluster_workers <= max_tcluster_cores);
-  // Do not warn about large clusters: GNR has 40.

+  const size_t num_smt = NumSMT(topology_);
  snprintf(topology_string_, sizeof(topology_string_),
           "%zuS %zuX %zuC %zuH, using %zuX %zuC (nodes=%zu)",
-           topology_.packages.size(), tclusters.size(), max_tcluster_cores,
-           max_tcluster_lps / max_tcluster_cores, NumClusters(),
-           max_cluster_workers, nodes_.Count());
+           topology_.packages.size(), tclusters.size(),
+           tclusters[0].lps.Count() / num_smt, num_smt, NumClusters(),
+           clusters_[0].NumWorkers(), nodes_.Count());
  return true;
 }

--- a/util/topology.h
+++ b/util/topology.h
@ -93,7 +93,7 @@ class BoundedTopology {

  class Cluster {
   public:
-    Cluster(const LPS& lps);
+    explicit Cluster(const LPS& lps);
    Cluster(const LPS& enabled_lps,
            const std::vector<hwy::Topology::LP>& all_lps,
            const hwy::Topology::Cluster& tcluster);