From 4164596c76cb4f62322fd1ff9d157cee5ba97b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 17 Dec 2025 00:03:19 +0100 Subject: [PATCH] llama-fit-params: QoL impr. for prints/errors (#18089) --- tools/fit-params/fit-params.cpp | 36 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp index fbf7a2eb37..2c113c453e 100644 --- a/tools/fit-params/fit-params.cpp +++ b/tools/fit-params/fit-params.cpp @@ -4,7 +4,11 @@ #include "common.h" #include "log.h" -#include +#include +#include +#include + +using namespace std::chrono_literals; #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -22,13 +26,17 @@ int main(int argc, char ** argv) { llama_numa_init(params.numa); auto mparams = common_model_params_to_llama(params); auto cparams = common_context_params_to_llama(params); - llama_params_fit(params.model.path.c_str(), &mparams, &cparams, + const bool success = llama_params_fit(params.model.path.c_str(), &mparams, &cparams, params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx, params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR); + if (!success) { + LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__); + exit(1); + } - LOG_INF("Printing fitted CLI arguments to stdout...\n"); - std::cout << "-c " << cparams.n_ctx; - std::cout << " -ngl " << mparams.n_gpu_layers; + LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__); + std::this_thread::sleep_for(10ms); // to avoid a race between stderr and stdout + printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers); size_t nd = llama_max_devices(); while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) { @@ -37,26 +45,22 @@ int main(int argc, char ** argv) { if (nd > 1) { for (size_t id = 0; id < nd; id++) { if (id == 0) { - std::cout << " -ts "; + printf(" -ts "); } - if (id > 0) { - std::cout << ","; - } - std::cout << mparams.tensor_split[id]; + printf("%s%" PRIu32, id > 0 ? "," : "", uint32_t(mparams.tensor_split[id])); } } const size_t ntbo = llama_max_tensor_buft_overrides(); + bool any_tbo = false; for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) { if (itbo == 0) { - std::cout << " -ot "; + printf(" -ot \""); } - if (itbo > 0) { - std::cout << ","; - } - std::cout << mparams.tensor_buft_overrides[itbo].pattern << "=" << ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft); + printf("%s%s=%s", itbo > 0 ? "," : "", mparams.tensor_buft_overrides[itbo].pattern, ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft)); + any_tbo = true; } - std::cout << "\n"; + printf("%s\n", any_tbo ? "\"" : ""); return 0; }