ggml-cpu: Respect cpumask settings (#16164)

2025-09-23 01:58:12 -07:00 · 2025-09-23 01:58:12 -07:00 · 4e29084ba4
parent f6b4af3d04
commit 4e29084ba4
1 changed files with 17 additions and 3 deletions
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@ -473,10 +473,10 @@ struct ggml_threadpool {
 struct ggml_compute_state {
 #ifndef GGML_USE_OPENMP
    ggml_thread_t thrd;
    bool cpumask[GGML_MAX_N_THREADS];
    int  last_graph;
    bool pending;
 #endif
    bool cpumask[GGML_MAX_N_THREADS];
    struct ggml_threadpool * threadpool;
    int ith;
 };
@ -3081,7 +3081,14 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
    threadpool->workers = workers;
-#ifndef GGML_USE_OPENMP
+#ifdef GGML_USE_OPENMP
    int32_t cpumask_iter = 0;
    // Compute CPU masks for each thread
    for (int j = 0; j < tpp->n_threads; j++) {
        ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
    }
 #else // GGML_USE_OPENMP
    ggml_mutex_init(&threadpool->mutex);
    ggml_cond_init(&threadpool->cond);
@ -3154,7 +3161,14 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
                atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
            }
-            ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
+            // Apply thread CPU mask and priority
            int ith = omp_get_thread_num();
            ggml_thread_apply_priority(threadpool->prio);
            if (ggml_thread_cpumask_is_valid(threadpool->workers[ith].cpumask)) {
                ggml_thread_apply_affinity(threadpool->workers[ith].cpumask);
            }
            ggml_graph_compute_thread(&threadpool->workers[ith]);
        }
    } else {
        atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);