Fix thread name when skipping packages/clusters

PiperOrigin-RevId: 766054198
This commit is contained in:
Jan Wassenberg 2025-06-01 23:49:35 -07:00 committed by Copybara-Service
parent 0023ff8770
commit a3f7bf0991
3 changed files with 22 additions and 15 deletions

View File

@ -59,8 +59,8 @@ class Pinning {
// If want_pin_, tries to pin each worker in `pool` to an LP in `cluster`, // If want_pin_, tries to pin each worker in `pool` to an LP in `cluster`,
// and sets `any_error_` if any fails. // and sets `any_error_` if any fails.
void MaybePin(size_t pkg_idx, size_t cluster_idx, void MaybePin(const BoundedTopology& topology, size_t pkg_idx,
const BoundedTopology::Cluster& cluster, size_t cluster_idx, const BoundedTopology::Cluster& cluster,
hwy::ThreadPool& pool) { hwy::ThreadPool& pool) {
const std::vector<size_t> lps = cluster.LPVector(); const std::vector<size_t> lps = cluster.LPVector();
HWY_ASSERT(pool.NumWorkers() <= lps.size()); HWY_ASSERT(pool.NumWorkers() <= lps.size());
@ -68,9 +68,10 @@ class Pinning {
HWY_ASSERT(task == thread); // each worker has one task HWY_ASSERT(task == thread); // each worker has one task
char buf[16]; // Linux limitation char buf[16]; // Linux limitation
const int bytes_written = const int bytes_written = snprintf(
snprintf(buf, sizeof(buf), "P%zu X%02zu C%03d", pkg_idx, cluster_idx, buf, sizeof(buf), "P%zu X%02zu C%03d",
static_cast<int>(task)); topology.SkippedPackages() + pkg_idx,
topology.SkippedClusters() + cluster_idx, static_cast<int>(task));
HWY_ASSERT(bytes_written < static_cast<int>(sizeof(buf))); HWY_ASSERT(bytes_written < static_cast<int>(sizeof(buf)));
hwy::SetThreadName(buf, 0); // does not support varargs hwy::SetThreadName(buf, 0); // does not support varargs
@ -195,7 +196,7 @@ NestedPools::Package::Package(const BoundedTopology& topology,
allocator, CapIfNonZero(cluster.Size(), max_workers_per_cluster), allocator, CapIfNonZero(cluster.Size(), max_workers_per_cluster),
cluster.Node()); cluster.Node());
// Pin workers AND the calling thread from `all_clusters`. // Pin workers AND the calling thread from `all_clusters`.
GetPinning().MaybePin(pkg_idx, cluster_idx, cluster, GetPinning().MaybePin(topology, pkg_idx, cluster_idx, cluster,
*clusters_[cluster_idx]); *clusters_[cluster_idx]);
}); });
} }

View File

@ -83,12 +83,13 @@ static LPS EnabledLPs(const BoundedSlice& lp_slice) {
BoundedTopology::BoundedTopology(BoundedSlice package_slice, BoundedTopology::BoundedTopology(BoundedSlice package_slice,
BoundedSlice cluster_slice, BoundedSlice cluster_slice,
BoundedSlice lp_slice) { BoundedSlice lp_slice)
: package_slice_(package_slice), cluster_slice_(cluster_slice) {
const LPS enabled_lps = EnabledLPs(lp_slice); const LPS enabled_lps = EnabledLPs(lp_slice);
#if !GEMMA_DISABLE_TOPOLOGY #if !GEMMA_DISABLE_TOPOLOGY
if (HWY_LIKELY(!topology_.packages.empty())) { if (HWY_LIKELY(!topology_.packages.empty())) {
InitFromTopology(enabled_lps, package_slice, cluster_slice); InitFromTopology(enabled_lps);
} }
#endif #endif
@ -270,16 +271,14 @@ static void ScanTClusters(hwy::Topology& topology_, size_t& max_tclusters,
} }
// Main part of ctor, called when topology is known. // Main part of ctor, called when topology is known.
void BoundedTopology::InitFromTopology(const LPS& enabled_lps, void BoundedTopology::InitFromTopology(const LPS& enabled_lps) {
BoundedSlice package_slice,
BoundedSlice cluster_slice) {
size_t max_tclusters, max_tcluster_cores, max_tcluster_lps; size_t max_tclusters, max_tcluster_cores, max_tcluster_lps;
ScanTClusters(topology_, max_tclusters, max_tcluster_cores, max_tcluster_lps); ScanTClusters(topology_, max_tclusters, max_tcluster_cores, max_tcluster_lps);
// (Possibly empty) subset of `Topology` packages that have `enabled_lps`. // (Possibly empty) subset of `Topology` packages that have `enabled_lps`.
package_slice.Foreach( package_slice_.Foreach(
"package", topology_.packages.size(), [&](size_t pkg_idx) { "package", topology_.packages.size(), [&](size_t pkg_idx) {
Package package(enabled_lps, topology_, pkg_idx, cluster_slice); Package package(enabled_lps, topology_, pkg_idx, cluster_slice_);
// Skip if empty, i.e. too few `enabled_lps`. // Skip if empty, i.e. too few `enabled_lps`.
if (HWY_LIKELY(!package.clusters.empty())) { if (HWY_LIKELY(!package.clusters.empty())) {
packages_.push_back(std::move(package)); packages_.push_back(std::move(package));

View File

@ -148,6 +148,12 @@ class BoundedTopology {
const hwy::Topology& FullTopology() const { return topology_; } const hwy::Topology& FullTopology() const { return topology_; }
#endif #endif
// In case we are running with a subset of packages/clusters, these are added
// to the package/cluster indices for purposes of the thread name, so that
// they are distinct.
size_t SkippedPackages() const { return package_slice_.Begin(); }
size_t SkippedClusters() const { return cluster_slice_.Begin(); }
private: private:
struct Package { struct Package {
explicit Package(const LPS& enabled_lps); explicit Package(const LPS& enabled_lps);
@ -160,13 +166,14 @@ class BoundedTopology {
std::vector<Cluster> clusters; std::vector<Cluster> clusters;
}; // Package }; // Package
void InitFromTopology(const LPS& enabled_lps, BoundedSlice package_slice, void InitFromTopology(const LPS& enabled_lps);
BoundedSlice cluster_slice);
void InitFromLPs(const LPS& enabled_lps); void InitFromLPs(const LPS& enabled_lps);
#if !GEMMA_DISABLE_TOPOLOGY #if !GEMMA_DISABLE_TOPOLOGY
hwy::Topology topology_; hwy::Topology topology_;
#endif #endif
BoundedSlice package_slice_;
BoundedSlice cluster_slice_;
std::vector<Package> packages_; std::vector<Package> packages_;
char topology_string_[96]; char topology_string_[96];
LPS nodes_; LPS nodes_;