From 0f70e3e0cd90326b768bb6b212e47dd5987e20ce Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 1 Jan 2026 12:49:19 +0100 Subject: [PATCH 1/2] arg: support remote preset --- common/arg.cpp | 151 +++++++++++++++++++++++++++++--------------- common/download.cpp | 15 +++-- common/download.h | 6 ++ common/preset.cpp | 77 +++++++++++++++++++++- common/preset.h | 11 +++- docs/preset.md | 50 +++++++++++++++ 6 files changed, 253 insertions(+), 57 deletions(-) create mode 100644 docs/preset.md diff --git a/common/arg.cpp b/common/arg.cpp index 62d31393c4..4d4f91e96f 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -6,6 +6,7 @@ #include "log.h" #include "sampling.h" #include "download.h" +#include "preset.h" // fix problem with std::min and std::max #if defined(_WIN32) @@ -268,6 +269,42 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector } } +static std::string clean_file_name(const std::string & fname) { + std::string clean_fname = fname; + string_replace_all(clean_fname, "\\", "/"); + string_replace_all(clean_fname, "/", ""); + return clean_fname; +} + +static bool common_params_handle_remote_preset(common_params & params, llama_example ex) { + GGML_ASSERT(!params.model.hf_repo.empty()); + + const bool offline = params.offline; + std::string model_endpoint = get_model_endpoint(); + auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini"; + + // prepare local path for caching + auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini"); + auto preset_path = fs_get_cache_file(preset_fname); + bool has_preset = common_download_file_single(preset_url, preset_path, params.hf_token, offline); + + // remote preset is optional, so we don't error out if not found + if (has_preset) { + LOG_INF("applying remote preset from %s\n", preset_url.c_str()); + common_preset_context ctx(ex, /* only_remote_allowed */ true); + common_preset global; // unused for now + auto remote_presets = ctx.load_from_ini(preset_path, global); + if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) { + common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME); + preset.apply_to_params(params); + } else { + throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); + } + } + + return has_preset; +} + struct handle_model_result { bool found_mmproj = false; common_params_model mmproj; @@ -309,9 +346,7 @@ static handle_model_result common_params_handle_model( // make sure model path is present (for caching purposes) if (model.path.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = model.hf_repo + "_" + model.hf_file; - // to make sure we don't have any slashes in the filename - string_replace_all(filename, "/", "_"); + std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file); model.path = fs_get_cache_file(filename); } @@ -425,61 +460,75 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context } }; - std::set seen_args; + auto parse_cli_args = [&]() { + std::set seen_args; - for (int i = 1; i < argc; i++) { - const std::string arg_prefix = "--"; + for (int i = 1; i < argc; i++) { + const std::string arg_prefix = "--"; - std::string arg = argv[i]; - if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { - std::replace(arg.begin(), arg.end(), '_', '-'); - } - if (arg_to_options.find(arg) == arg_to_options.end()) { - throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); - } - if (!seen_args.insert(arg).second) { - LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); - } - auto & tmp = arg_to_options[arg]; - auto opt = *tmp.first; - bool is_positive = tmp.second; - if (opt.has_value_from_env()) { - fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); - } - try { - if (opt.handler_void) { - opt.handler_void(params); - continue; + std::string arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); } - if (opt.handler_bool) { - opt.handler_bool(params, is_positive); - continue; + if (arg_to_options.find(arg) == arg_to_options.end()) { + throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); } + if (!seen_args.insert(arg).second) { + LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); + } + auto & tmp = arg_to_options[arg]; + auto opt = *tmp.first; + bool is_positive = tmp.second; + if (opt.has_value_from_env()) { + fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); + } + try { + if (opt.handler_void) { + opt.handler_void(params); + continue; + } + if (opt.handler_bool) { + opt.handler_bool(params, is_positive); + continue; + } - // arg with single value - check_arg(i); - std::string val = argv[++i]; - if (opt.handler_int) { - opt.handler_int(params, std::stoi(val)); - continue; - } - if (opt.handler_string) { - opt.handler_string(params, val); - continue; - } + // arg with single value + check_arg(i); + std::string val = argv[++i]; + if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + continue; + } + if (opt.handler_string) { + opt.handler_string(params, val); + continue; + } - // arg with 2 values - check_arg(i); - std::string val2 = argv[++i]; - if (opt.handler_str_str) { - opt.handler_str_str(params, val, val2); - continue; + // arg with 2 values + check_arg(i); + std::string val2 = argv[++i]; + if (opt.handler_str_str) { + opt.handler_str_str(params, val, val2); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling argument \"%s\": %s\n\n" + "usage:\n%s\n\nto show complete usage, run with -h", + arg.c_str(), e.what(), opt.to_string().c_str())); } - } catch (std::exception & e) { - throw std::invalid_argument(string_format( - "error while handling argument \"%s\": %s\n\n" - "usage:\n%s\n\nto show complete usage, run with -h", - arg.c_str(), e.what(), opt.to_string().c_str())); + } + }; + + // parse the first time to get -hf option (used for remote preset) + parse_cli_args(); + + // maybe handle remote preset + if (!params.model.hf_repo.empty()) { + bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex); + if (has_preset) { + // re-parse CLI args to override preset values + parse_cli_args(); } } diff --git a/common/download.cpp b/common/download.cpp index ef87472560..d0aa386081 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -769,10 +769,10 @@ std::pair> common_remote_get_content(const std::string #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB) -static bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - bool offline) { +bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline) { if (!offline) { return common_download_file_single_online(url, path, bearer_token); } @@ -1096,6 +1096,13 @@ std::string common_docker_resolve_model(const std::string &) { throw std::runtime_error("download functionality is not enabled in this build"); } +bool common_download_file_single(const std::string &, + const std::string &, + const std::string &, + bool) { + throw std::runtime_error("download functionality is not enabled in this build"); +} + #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB std::vector common_list_cached_models() { diff --git a/common/download.h b/common/download.h index d1321e6e90..5f42527af8 100644 --- a/common/download.h +++ b/common/download.h @@ -52,6 +52,12 @@ bool common_download_model( // returns list of cached models std::vector common_list_cached_models(); +// download single file from url to local path +bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline); + // resolve and download model from Docker registry // return local path to downloaded model file std::string common_docker_resolve_model(const std::string & docker); diff --git a/common/preset.cpp b/common/preset.cpp index e2fc18c5da..949fe00110 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -16,6 +16,46 @@ static std::string rm_leading_dashes(const std::string & str) { return str.substr(pos); } +// only allow a subset of args for remote presets for security reasons +// do not add more args unless absolutely necessary +// args that output to files are strictly prohibited +static std::set get_remote_preset_whitelist(std::map & key_to_opt) { + static const std::set allowed_options = { + "model-url", + "hf-repo", + "hf-repo-draft", + "hf-repo-v", // vocoder + "hf-file-v", // vocoder + "mmproj-url", + "pooling", + "jinja", + "batch-size", + "ubatch-size", + "cache-reuse", + // note: sampling params are automatically allowed by default + // negated args will be added automatically + }; + + std::set allowed_keys; + + for (const auto & it : key_to_opt) { + const std::string & key = it.first; + const common_arg & opt = it.second; + if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) { + allowed_keys.insert(key); + // also add variant keys (args without leading dashes and env vars) + for (const auto & arg : opt.get_args()) { + allowed_keys.insert(rm_leading_dashes(arg)); + } + for (const auto & env : opt.get_env()) { + allowed_keys.insert(env); + } + } + } + + return allowed_keys; +} + std::vector common_preset::to_args(const std::string & bin_path) const { std::vector args; @@ -121,6 +161,29 @@ void common_preset::merge(const common_preset & other) { } } +void common_preset::apply_to_params(common_params & params) const { + for (const auto & [opt, val] : options) { + // apply each option to params + if (opt.handler_string) { + opt.handler_string(params, val); + } else if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + } else if (opt.handler_bool) { + opt.handler_bool(params, common_arg_utils::is_truthy(val)); + } else if (opt.handler_str_str) { + // not supported yet + throw std::runtime_error(string_format( + "%s: option with two values is not supported yet", + __func__ + )); + } else if (opt.handler_void) { + opt.handler_void(params); + } else { + GGML_ABORT("unknown handler type"); + } + } +} + static std::map> parse_ini_from_file(const std::string & path) { std::map> parsed; @@ -230,10 +293,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_preset_context::common_preset_context(llama_example ex) +common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed) : ctx_params(common_params_parser_init(default_params, ex)) { common_params_add_preset_options(ctx_params.options); key_to_opt = get_map_key_opt(ctx_params); + + // setup allowed keys if only_remote_allowed is true + if (only_remote_allowed) { + filter_allowed_keys = true; + allowed_keys = get_remote_preset_whitelist(key_to_opt); + } } common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const { @@ -250,6 +319,12 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co LOG_DBG("loading preset: %s\n", preset.name.c_str()); for (const auto & [key, value] : section.second) { LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); + if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) { + throw std::runtime_error(string_format( + "option '%s' is not allowed in remote presets", + key.c_str() + )); + } if (key_to_opt.find(key) != key_to_opt.end()) { const auto & opt = key_to_opt.at(key); if (is_bool_arg(opt)) { diff --git a/common/preset.h b/common/preset.h index 3a84d1be29..11ba6ef812 100644 --- a/common/preset.h +++ b/common/preset.h @@ -6,6 +6,7 @@ #include #include #include +#include // // INI preset parser and writer @@ -40,6 +41,9 @@ struct common_preset { // merge another preset into this one, overwriting existing options void merge(const common_preset & other); + + // apply preset options to common_params + void apply_to_params(common_params & params) const; }; // interface for multiple presets in one file @@ -50,7 +54,12 @@ struct common_preset_context { common_params default_params; // unused for now common_params_context ctx_params; std::map key_to_opt; - common_preset_context(llama_example ex); + + bool filter_allowed_keys = false; + std::set allowed_keys; + + // if only_remote_allowed is true, only accept whitelisted keys + common_preset_context(llama_example ex, bool only_remote_allowed = false); // load presets from INI file common_presets load_from_ini(const std::string & path, common_preset & global) const; diff --git a/docs/preset.md b/docs/preset.md new file mode 100644 index 0000000000..daea562cc5 --- /dev/null +++ b/docs/preset.md @@ -0,0 +1,50 @@ +# llama.cpp INI preset + +## Introduction + +INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp + +### Using preset on server + +When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more. + +### Using a remote preset + +> [!NOTE] +> +> This feature is currently only supported via the `-hf` option + +For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model. + +Example: + +```ini +hf-repo-draft = username/my-draft-model-GGUF +temp = 0.5 +top-k = 20 +top-p = 0.95 +``` + +For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options. + +Example usage: + +Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above. + +```sh +llama-cli -hf username/my-model-with-preset + +# equivalent to +llama-cli -hf username/my-model-with-preset \ + --hf-repo-draft username/my-draft-model-GGUF \ + --temp 0.5 \ + --top-k 20 \ + --top-p 0.95 +``` + +You can also optionally override preset args by specifying them in the arguments: + +```sh +# forcing temp = 0.1 +llama-cli -hf username/my-model-with-preset --temp 0.1 +``` From f9a97375cb539c7f195b473871a3a1eccf9da3d8 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 1 Jan 2026 12:50:45 +0100 Subject: [PATCH 2/2] proof reading --- docs/preset.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/preset.md b/docs/preset.md index daea562cc5..c11c5dc22f 100644 --- a/docs/preset.md +++ b/docs/preset.md @@ -1,20 +1,20 @@ -# llama.cpp INI preset +# llama.cpp INI Presets ## Introduction -INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp +The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp. -### Using preset on server +### Using Presets with the Server -When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more. +When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details. -### Using a remote preset +### Using a Remote Preset > [!NOTE] > -> This feature is currently only supported via the `-hf` option +> This feature is currently only supported via the `-hf` option. -For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model. +For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model. Example: @@ -25,16 +25,16 @@ top-k = 20 top-p = 0.95 ``` -For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options. +For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options. Example usage: -Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above. +Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above: ```sh llama-cli -hf username/my-model-with-preset -# equivalent to +# This is equivalent to: llama-cli -hf username/my-model-with-preset \ --hf-repo-draft username/my-draft-model-GGUF \ --temp 0.5 \ @@ -42,9 +42,9 @@ llama-cli -hf username/my-model-with-preset \ --top-p 0.95 ``` -You can also optionally override preset args by specifying them in the arguments: +You can also override preset arguments by specifying them on the command line: ```sh -# forcing temp = 0.1 +# Force temp = 0.1, overriding the preset value llama-cli -hf username/my-model-with-preset --temp 0.1 ```