From 41361c8599d04b5792edbcda22b168cc58212b17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Tue, 31 Mar 2026 12:53:41 +0200 Subject: [PATCH] common : move up common_init() and fix Windows UTF-8 logs (#21176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build info is now only for debug, so we avoid the duplicate with `--version`. The UTF-8 setup at the beginning is needed to avoid logging garbage on Windows. Signed-off-by: Adrien Gallouët --- common/common.cpp | 7 ++++++- examples/batched/batched.cpp | 4 ++-- examples/debug/debug.cpp | 4 ++-- examples/diffusion/diffusion-cli.cpp | 3 ++- examples/embedding/embedding.cpp | 4 ++-- examples/eval-callback/eval-callback.cpp | 4 ++-- examples/idle/idle.cpp | 4 ++-- examples/lookahead/lookahead.cpp | 4 ++-- examples/lookup/lookup-create.cpp | 2 ++ examples/lookup/lookup-stats.cpp | 4 ++-- examples/lookup/lookup.cpp | 4 ++-- examples/parallel/parallel.cpp | 4 ++-- examples/passkey/passkey.cpp | 4 ++-- examples/retrieval/retrieval.cpp | 4 ++-- examples/save-load-state/save-load-state.cpp | 4 ++-- examples/speculative-simple/speculative-simple.cpp | 4 ++-- examples/speculative/speculative.cpp | 4 ++-- examples/training/finetune.cpp | 3 ++- tests/export-graph-ops.cpp | 4 ++-- tests/test-state-restore-fragmented.cpp | 4 ++-- tests/test-thread-safety.cpp | 4 ++-- tools/batched-bench/batched-bench.cpp | 4 ++-- tools/cli/cli.cpp | 4 ++-- tools/completion/completion.cpp | 4 ++-- tools/cvector-generator/cvector-generator.cpp | 2 ++ tools/export-lora/export-lora.cpp | 2 ++ tools/fit-params/fit-params.cpp | 3 ++- tools/imatrix/imatrix.cpp | 4 ++-- tools/mtmd/debug/mtmd-debug.cpp | 3 ++- tools/mtmd/mtmd-cli.cpp | 3 ++- tools/perplexity/perplexity.cpp | 4 ++-- tools/results/results.cpp | 4 +++- tools/server/server.cpp | 4 ++-- tools/tts/tts.cpp | 4 ++-- 34 files changed, 73 insertions(+), 55 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 497cfaad5e..60396af1f8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -359,6 +359,11 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD } void common_init() { +#if defined(_WIN32) + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); +#endif + llama_log_set(common_log_default_callback, NULL); #ifdef NDEBUG @@ -367,7 +372,7 @@ void common_init() { const char * build_type = " (debug)"; #endif - LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); + LOG_DBG("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); } std::string common_params_get_system_info(const common_params & params) { diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index d2b2e336e7..830e45f5af 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -24,12 +24,12 @@ int main(int argc, char ** argv) { params.prompt = "Hello my name is"; params.n_predict = 32; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_BATCHED, print_usage)) { return 1; } - common_init(); - // number of parallel batches int n_parallel = params.n_parallel; diff --git a/examples/debug/debug.cpp b/examples/debug/debug.cpp index 88947acbd3..ec80be19ba 100644 --- a/examples/debug/debug.cpp +++ b/examples/debug/debug.cpp @@ -213,12 +213,12 @@ static bool run(llama_context * ctx, const common_params & params) { int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DEBUG, print_usage)) { return 1; } - common_init(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/diffusion/diffusion-cli.cpp b/examples/diffusion/diffusion-cli.cpp index d38bfe7f82..e9780407da 100644 --- a/examples/diffusion/diffusion-cli.cpp +++ b/examples/diffusion/diffusion-cli.cpp @@ -545,11 +545,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DIFFUSION)) { return 1; } - common_init(); llama_backend_init(); llama_model_params model_params = llama_model_default_params(); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 33ef2a7521..f6a20ef9d0 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -99,12 +99,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EMBEDDING)) { return 1; } - common_init(); - params.embedding = true; // get max number of sequences per batch diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index 17d162d95d..902b0fdb56 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -37,12 +37,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } - common_init(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/examples/idle/idle.cpp b/examples/idle/idle.cpp index 000427143d..409fd25c18 100644 --- a/examples/idle/idle.cpp +++ b/examples/idle/idle.cpp @@ -19,12 +19,12 @@ static void print_usage(int /*argc*/, char ** argv) { int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON, print_usage)) { return 1; } - common_init(); - // init LLM llama_backend_init(); diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index d5fde081c5..b7f5c6de86 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -43,12 +43,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } - common_init(); - const int W = 15; // lookahead window const int N = 5; // n-gram size const int G = 15; // max verification n-grams diff --git a/examples/lookup/lookup-create.cpp b/examples/lookup/lookup-create.cpp index 439e3f726e..6b8f258a4a 100644 --- a/examples/lookup/lookup-create.cpp +++ b/examples/lookup/lookup-create.cpp @@ -12,6 +12,8 @@ int main(int argc, char ** argv){ common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) { return 1; } diff --git a/examples/lookup/lookup-stats.cpp b/examples/lookup/lookup-stats.cpp index c3158281c7..847976ddc7 100644 --- a/examples/lookup/lookup-stats.cpp +++ b/examples/lookup/lookup-stats.cpp @@ -18,12 +18,12 @@ int main(int argc, char ** argv){ common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) { return 1; } - common_init(); - const int n_draft = params.speculative.n_max; // init llama.cpp diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index bd216035c0..74272f17e7 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -18,12 +18,12 @@ int main(int argc, char ** argv){ common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) { return 1; } - common_init(); - // max. number of additional tokens to draft if match is found const int n_draft = params.speculative.n_max; diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 1700ceefbf..a46400c5b9 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -163,12 +163,12 @@ int main(int argc, char ** argv) { params.n_predict = 128; params.n_junk = 1; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) { return 1; } - common_init(); - // number of simultaneous "clients" to simulate const int32_t n_clients = params.n_parallel; diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 665191047a..8440a2bf77 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -25,12 +25,12 @@ int main(int argc, char ** argv) { params.n_keep = 32; params.i_pos = -1; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PASSKEY, print_usage)) { return 1; } - common_init(); - int n_junk = params.n_junk; int n_keep = params.n_keep; int n_grp = params.grp_attn_n; diff --git a/examples/retrieval/retrieval.cpp b/examples/retrieval/retrieval.cpp index 9e05fc2233..7d93ab1172 100644 --- a/examples/retrieval/retrieval.cpp +++ b/examples/retrieval/retrieval.cpp @@ -117,12 +117,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_RETRIEVAL, print_usage)) { return 1; } - common_init(); - // For BERT models, batch size must be equal to ubatch size params.n_ubatch = params.n_batch; params.embedding = true; diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 174c8c7585..a26fd73cb3 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -17,6 +17,8 @@ int main(int argc, char ** argv) { const std::string_view state_file = "dump_state.bin"; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } @@ -27,8 +29,6 @@ int main(int argc, char ** argv) { params.kv_unified = true; } - common_init(); - if (params.n_predict < 0) { params.n_predict = 16; } diff --git a/examples/speculative-simple/speculative-simple.cpp b/examples/speculative-simple/speculative-simple.cpp index 8a1cbd96c2..a03dbce887 100644 --- a/examples/speculative-simple/speculative-simple.cpp +++ b/examples/speculative-simple/speculative-simple.cpp @@ -16,6 +16,8 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SPECULATIVE)) { return 1; } @@ -25,8 +27,6 @@ int main(int argc, char ** argv) { return 1; } - common_init(); - if (params.speculative.mparams_dft.path.empty()) { LOG_ERR("%s: --model-draft is required\n", __func__); return 1; diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 250c5b7c62..8f56a659b3 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -38,6 +38,8 @@ int main(int argc, char ** argv) { // needed to get candidate probs even for temp <= 0.0 params.sampling.n_probs = 128; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SPECULATIVE)) { return 1; } @@ -47,8 +49,6 @@ int main(int argc, char ** argv) { return 1; } - common_init(); - if (params.speculative.mparams_dft.path.empty()) { LOG_ERR("%s: --model-draft is required\n", __func__); return 1; diff --git a/examples/training/finetune.cpp b/examples/training/finetune.cpp index e20f89488f..0a75ac110c 100644 --- a/examples/training/finetune.cpp +++ b/examples/training/finetune.cpp @@ -20,6 +20,8 @@ int main(int argc, char ** argv) { common_params params; params.escape = false; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_FINETUNE)) { return 1; } @@ -38,7 +40,6 @@ int main(int argc, char ** argv) { params.cache_type_v = GGML_TYPE_F32; } - common_init(); llama_backend_init(); llama_numa_init(params.numa); // load the model and apply lora adapter, if any diff --git a/tests/export-graph-ops.cpp b/tests/export-graph-ops.cpp index 754089d068..cac3ff628e 100644 --- a/tests/export-graph-ops.cpp +++ b/tests/export-graph-ops.cpp @@ -118,12 +118,12 @@ int main(int argc, char ** argv) { common_params params; params.out_file = "tests.txt"; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_GRAPH_OPS)) { return 1; } - common_init(); - // Load CPU-only ggml_backend_dev_t cpu_device = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); params.devices = { cpu_device, nullptr }; diff --git a/tests/test-state-restore-fragmented.cpp b/tests/test-state-restore-fragmented.cpp index 481b39d04c..8a9bfaf5dc 100644 --- a/tests/test-state-restore-fragmented.cpp +++ b/tests/test-state-restore-fragmented.cpp @@ -22,12 +22,12 @@ int main(int argc, char ** argv) { params.n_parallel = 3; params.n_ctx = 256; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } - common_init(); - // init common_init_result_ptr llama_init = common_init_from_params(params); diff --git a/tests/test-thread-safety.cpp b/tests/test-thread-safety.cpp index bcb86c35e6..acda4aa81e 100644 --- a/tests/test-thread-safety.cpp +++ b/tests/test-thread-safety.cpp @@ -16,12 +16,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } - common_init(); - llama_backend_init(); llama_numa_init(params.numa); diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp index 224f0e1f1c..3964ef2595 100644 --- a/tools/batched-bench/batched-bench.cpp +++ b/tools/batched-bench/batched-bench.cpp @@ -20,12 +20,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_BENCH, print_usage)) { return 1; } - common_init(); - int is_pp_shared = params.is_pp_shared; int is_tg_separate = params.is_tg_separate; diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index c58fda83e2..b57d27762c 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -347,6 +347,8 @@ int main(int argc, char ** argv) { params.verbosity = LOG_LEVEL_ERROR; // by default, less verbose logs + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) { return 1; } @@ -357,8 +359,6 @@ int main(int argc, char ** argv) { console::error("please use llama-completion instead\n"); } - common_init(); - // struct that contains llama context and inference cli_context ctx_cli(params); diff --git a/tools/completion/completion.cpp b/tools/completion/completion.cpp index 813526a0ec..1dc5df1afa 100644 --- a/tools/completion/completion.cpp +++ b/tools/completion/completion.cpp @@ -90,12 +90,12 @@ int main(int argc, char ** argv) { common_params params; g_params = ¶ms; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMPLETION, print_usage)) { return 1; } - common_init(); - auto & sparams = params.sampling; // save choice to use color for later diff --git a/tools/cvector-generator/cvector-generator.cpp b/tools/cvector-generator/cvector-generator.cpp index dcce0e9841..fd6e5ddd2d 100644 --- a/tools/cvector-generator/cvector-generator.cpp +++ b/tools/cvector-generator/cvector-generator.cpp @@ -400,6 +400,8 @@ int main(int argc, char ** argv) { params.out_file = "control_vector.gguf"; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CVECTOR_GENERATOR, print_usage)) { return 1; } diff --git a/tools/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp index 50774c59ba..e1bc4a2f31 100644 --- a/tools/export-lora/export-lora.cpp +++ b/tools/export-lora/export-lora.cpp @@ -418,6 +418,8 @@ int main(int argc, char ** argv) { params.out_file = "ggml-lora-merged-f16.gguf"; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage)) { return 1; } diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp index 0176be06e7..3c0404ed30 100644 --- a/tools/fit-params/fit-params.cpp +++ b/tools/fit-params/fit-params.cpp @@ -17,11 +17,12 @@ using namespace std::chrono_literals; int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { return 1; } - common_init(); llama_backend_init(); llama_numa_init(params.numa); auto mparams = common_model_params_to_llama(params); diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp index fa21f6c9d5..3f7f3a11df 100644 --- a/tools/imatrix/imatrix.cpp +++ b/tools/imatrix/imatrix.cpp @@ -1212,6 +1212,8 @@ int main(int argc, char ** argv) { params.n_ctx = 512; params.escape = false; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) { return 1; } @@ -1223,8 +1225,6 @@ int main(int argc, char ** argv) { return 0; } - common_init(); - const int32_t n_ctx = params.n_ctx; if (n_ctx <= 0) { diff --git a/tools/mtmd/debug/mtmd-debug.cpp b/tools/mtmd/debug/mtmd-debug.cpp index d42806ec3f..6e32b283aa 100644 --- a/tools/mtmd/debug/mtmd-debug.cpp +++ b/tools/mtmd/debug/mtmd-debug.cpp @@ -54,11 +54,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) { return 1; } - common_init(); mtmd_helper_log_set(common_log_default_callback, nullptr); if (params.mmproj.path.empty()) { diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp index ba00e08534..dd72dfb17c 100644 --- a/tools/mtmd/mtmd-cli.cpp +++ b/tools/mtmd/mtmd-cli.cpp @@ -281,11 +281,12 @@ int main(int argc, char ** argv) { common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) { return 1; } - common_init(); mtmd_helper_log_set(common_log_default_callback, nullptr); if (params.mmproj.path.empty()) { diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 0eb062f05d..9c49e18630 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -2012,12 +2012,12 @@ int main(int argc, char ** argv) { params.n_ctx = 512; params.escape = false; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PERPLEXITY)) { return 1; } - common_init(); - const int32_t n_ctx = params.n_ctx; if (n_ctx <= 0) { diff --git a/tools/results/results.cpp b/tools/results/results.cpp index e5c5df128e..f2179ed275 100644 --- a/tools/results/results.cpp +++ b/tools/results/results.cpp @@ -58,6 +58,9 @@ static std::vector get_logits( int main(int argc, char ** argv) { common_params params; params.escape = false; + + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_RESULTS)) { return 1; } @@ -65,7 +68,6 @@ int main(int argc, char ** argv) { LOG_ERR("%s: an output file must be specified", __func__); return 1; } - common_init(); llama_backend_init(); llama_numa_init(params.numa); common_init_result_ptr llama_init = common_init_from_params(params); diff --git a/tools/server/server.cpp b/tools/server/server.cpp index ef54a46b19..a7afa77438 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -75,6 +75,8 @@ int main(int argc, char ** argv) { // own arguments required by this example common_params params; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_SERVER)) { return 1; } @@ -100,8 +102,6 @@ int main(int argc, char ** argv) { params.model_alias.insert(params.model.name); } - common_init(); - // struct that contains llama context and inference server_context ctx_server; diff --git a/tools/tts/tts.cpp b/tools/tts/tts.cpp index dc2fa494b8..ce68213d14 100644 --- a/tools/tts/tts.cpp +++ b/tools/tts/tts.cpp @@ -551,6 +551,8 @@ int main(int argc, char ** argv) { params.sampling.top_k = 4; params.sampling.samplers = { COMMON_SAMPLER_TYPE_TOP_K, }; + common_init(); + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_TTS, print_usage)) { return 1; } @@ -558,8 +560,6 @@ int main(int argc, char ** argv) { const int n_parallel = params.n_parallel; const int n_predict = params.n_predict; - common_init(); - // init LLM llama_backend_init();