From 9c600bcd4b3b21f70c9d95cf8a938e43192eb492 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 25 Mar 2026 21:17:27 +0800 Subject: [PATCH] llama-bench: print `-n-cpu-moe` when offloaded layers > 1 (#20984) --- tools/llama-bench/llama-bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index 25beb369e6..0a23f69853 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -1807,7 +1807,7 @@ struct markdown_printer : public printer { if (!is_cpu_backend) { fields.emplace_back("n_gpu_layers"); } - if (params.n_cpu_moe.size() > 1) { + if (params.n_cpu_moe.size() > 1 || params.n_cpu_moe != cmd_params_defaults.n_cpu_moe) { fields.emplace_back("n_cpu_moe"); } if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {