llama-fit-params: QoL impr. for prints/errors (#18089)

This commit is contained in:
Johannes Gäßler 2025-12-17 00:03:19 +01:00 committed by GitHub
parent ef83fb8601
commit 4164596c76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 20 additions and 16 deletions

View File

@ -4,7 +4,11 @@
#include "common.h" #include "common.h"
#include "log.h" #include "log.h"
#include <iostream> #include <chrono>
#include <cinttypes>
#include <thread>
using namespace std::chrono_literals;
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
@ -22,13 +26,17 @@ int main(int argc, char ** argv) {
llama_numa_init(params.numa); llama_numa_init(params.numa);
auto mparams = common_model_params_to_llama(params); auto mparams = common_model_params_to_llama(params);
auto cparams = common_context_params_to_llama(params); auto cparams = common_context_params_to_llama(params);
llama_params_fit(params.model.path.c_str(), &mparams, &cparams, const bool success = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx, params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR); params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
if (!success) {
LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__);
exit(1);
}
LOG_INF("Printing fitted CLI arguments to stdout...\n"); LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
std::cout << "-c " << cparams.n_ctx; std::this_thread::sleep_for(10ms); // to avoid a race between stderr and stdout
std::cout << " -ngl " << mparams.n_gpu_layers; printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
size_t nd = llama_max_devices(); size_t nd = llama_max_devices();
while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) { while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
@ -37,26 +45,22 @@ int main(int argc, char ** argv) {
if (nd > 1) { if (nd > 1) {
for (size_t id = 0; id < nd; id++) { for (size_t id = 0; id < nd; id++) {
if (id == 0) { if (id == 0) {
std::cout << " -ts "; printf(" -ts ");
} }
if (id > 0) { printf("%s%" PRIu32, id > 0 ? "," : "", uint32_t(mparams.tensor_split[id]));
std::cout << ",";
}
std::cout << mparams.tensor_split[id];
} }
} }
const size_t ntbo = llama_max_tensor_buft_overrides(); const size_t ntbo = llama_max_tensor_buft_overrides();
bool any_tbo = false;
for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) { for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) {
if (itbo == 0) { if (itbo == 0) {
std::cout << " -ot "; printf(" -ot \"");
} }
if (itbo > 0) { printf("%s%s=%s", itbo > 0 ? "," : "", mparams.tensor_buft_overrides[itbo].pattern, ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft));
std::cout << ","; any_tbo = true;
} }
std::cout << mparams.tensor_buft_overrides[itbo].pattern << "=" << ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft); printf("%s\n", any_tbo ? "\"" : "");
}
std::cout << "\n";
return 0; return 0;
} }