From 8ece3836b400dd8d89021ad2cc6e57843ced8378 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Thu, 8 Jan 2026 22:35:40 +0100 Subject: [PATCH] common: support remote preset (#18520) * arg: support remote preset * proof reading * allow one HF repo to point to multiple HF repos * docs: mention about multiple GGUF use case * correct clean_file_name * download: also return HTTP status code * fix case with cache file used * fix --offline option --- common/arg.cpp | 167 ++++++++++++++++++++++++++++++-------------- common/download.cpp | 83 ++++++++++++++-------- common/download.h | 8 +++ common/preset.cpp | 77 +++++++++++++++++++- common/preset.h | 11 ++- docs/preset.md | 60 ++++++++++++++++ 6 files changed, 324 insertions(+), 82 deletions(-) create mode 100644 docs/preset.md diff --git a/common/arg.cpp b/common/arg.cpp index 9c0e6fbe78..72750a3cba 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -6,6 +6,7 @@ #include "log.h" #include "sampling.h" #include "download.h" +#include "preset.h" // fix problem with std::min and std::max #if defined(_WIN32) @@ -268,6 +269,46 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector } } +static std::string clean_file_name(const std::string & fname) { + std::string clean_fname = fname; + string_replace_all(clean_fname, "\\", "_"); + string_replace_all(clean_fname, "/", "_"); + return clean_fname; +} + +static bool common_params_handle_remote_preset(common_params & params, llama_example ex) { + GGML_ASSERT(!params.model.hf_repo.empty()); + + const bool offline = params.offline; + std::string model_endpoint = get_model_endpoint(); + auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini"; + + // prepare local path for caching + auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini"); + auto preset_path = fs_get_cache_file(preset_fname); + const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline); + const bool has_preset = status >= 200 && status < 400; + + // remote preset is optional, so we don't error out if not found + if (has_preset) { + LOG_INF("applying remote preset from %s\n", preset_url.c_str()); + common_preset_context ctx(ex, /* only_remote_allowed */ true); + common_preset global; // unused for now + auto remote_presets = ctx.load_from_ini(preset_path, global); + if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) { + common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME); + LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline + preset.apply_to_params(params); + } else { + throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); + } + } else { + LOG_INF("%s", "no remote preset found, skipping\n"); + } + + return has_preset; +} + struct handle_model_result { bool found_mmproj = false; common_params_model mmproj; @@ -309,9 +350,7 @@ static handle_model_result common_params_handle_model( // make sure model path is present (for caching purposes) if (model.path.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = model.hf_repo + "_" + model.hf_file; - // to make sure we don't have any slashes in the filename - string_replace_all(filename, "/", "_"); + std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file); model.path = fs_get_cache_file(filename); } @@ -425,61 +464,87 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context } }; - std::set seen_args; + auto parse_cli_args = [&]() { + std::set seen_args; - for (int i = 1; i < argc; i++) { - const std::string arg_prefix = "--"; + for (int i = 1; i < argc; i++) { + const std::string arg_prefix = "--"; - std::string arg = argv[i]; - if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { - std::replace(arg.begin(), arg.end(), '_', '-'); - } - if (arg_to_options.find(arg) == arg_to_options.end()) { - throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); - } - if (!seen_args.insert(arg).second) { - LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); - } - auto & tmp = arg_to_options[arg]; - auto opt = *tmp.first; - bool is_positive = tmp.second; - if (opt.has_value_from_env()) { - fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); - } - try { - if (opt.handler_void) { - opt.handler_void(params); - continue; + std::string arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); } - if (opt.handler_bool) { - opt.handler_bool(params, is_positive); - continue; + if (arg_to_options.find(arg) == arg_to_options.end()) { + throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); } + if (!seen_args.insert(arg).second) { + LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); + } + auto & tmp = arg_to_options[arg]; + auto opt = *tmp.first; + bool is_positive = tmp.second; + if (opt.has_value_from_env()) { + fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); + } + try { + if (opt.handler_void) { + opt.handler_void(params); + continue; + } + if (opt.handler_bool) { + opt.handler_bool(params, is_positive); + continue; + } - // arg with single value - check_arg(i); - std::string val = argv[++i]; - if (opt.handler_int) { - opt.handler_int(params, std::stoi(val)); - continue; - } - if (opt.handler_string) { - opt.handler_string(params, val); - continue; - } + // arg with single value + check_arg(i); + std::string val = argv[++i]; + if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + continue; + } + if (opt.handler_string) { + opt.handler_string(params, val); + continue; + } - // arg with 2 values - check_arg(i); - std::string val2 = argv[++i]; - if (opt.handler_str_str) { - opt.handler_str_str(params, val, val2); - continue; + // arg with 2 values + check_arg(i); + std::string val2 = argv[++i]; + if (opt.handler_str_str) { + opt.handler_str_str(params, val, val2); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling argument \"%s\": %s\n\n" + "usage:\n%s\n\nto show complete usage, run with -h", + arg.c_str(), e.what(), opt.to_string().c_str())); } - } catch (std::exception & e) { - throw std::invalid_argument(string_format( - "error while handling argument \"%s\": %s\n\n" - "usage:\n%s\n\nto show complete usage, run with -h", - arg.c_str(), e.what(), opt.to_string().c_str())); + } + }; + + // parse the first time to get -hf option (used for remote preset) + parse_cli_args(); + + // maybe handle remote preset + if (!params.model.hf_repo.empty()) { + std::string cli_hf_repo = params.model.hf_repo; + bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex); + + // special case: if hf_repo explicitly set by preset, we need to preserve it (ignore CLI value) + // this is useful when we have one HF repo pointing to other HF repos (one model - multiple GGUFs) + std::string preset_hf_repo = params.model.hf_repo; + bool preset_has_hf_repo = preset_hf_repo != cli_hf_repo; + + if (has_preset) { + // re-parse CLI args to override preset values + parse_cli_args(); + } + + // preserve hf_repo from preset if needed + if (preset_has_hf_repo) { + params.model.hf_repo = preset_hf_repo; } } diff --git a/common/download.cpp b/common/download.cpp index 6f56b5518f..a1e0e518e9 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -157,6 +157,10 @@ static std::string read_etag(const std::string & path) { return none; } +static bool is_http_status_ok(int status) { + return status >= 200 && status < 400; +} + #ifdef LLAMA_USE_CURL // @@ -306,12 +310,14 @@ static bool common_download_head(CURL * curl, } // download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, +// returns status code or -1 on error +static int common_download_file_single_online(const std::string & url, const std::string & path, const std::string & bearer_token, const common_header_list & custom_headers) { static const int max_attempts = 3; static const int retry_delay_seconds = 2; + for (int i = 0; i < max_attempts; ++i) { std::string etag; @@ -371,7 +377,7 @@ static bool common_download_file_single_online(const std::string & url, LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } @@ -380,14 +386,14 @@ static bool common_download_file_single_online(const std::string & url, if (std::filesystem::exists(path_temporary)) { if (remove(path_temporary.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; + return -1; } } if (std::filesystem::exists(path)) { if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } } @@ -414,23 +420,27 @@ static bool common_download_file_single_online(const std::string & url, long http_code = 0; curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { + + int status = static_cast(http_code); + if (!is_http_status_ok(http_code)) { LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; + return status; // TODO: maybe only return on certain codes } if (rename(path_temporary.c_str(), path.c_str()) != 0) { LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; + return -1; } + + return static_cast(http_code); } else { LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - } - break; + return 304; // Not Modified - fake cached response + } } - return true; + return -1; // max attempts reached } std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params) { @@ -625,7 +635,8 @@ static bool common_pull_file(httplib::Client & cli, } // download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, +// returns status code or -1 on error +static int common_download_file_single_online(const std::string & url, const std::string & path, const std::string & bearer_token, const common_header_list & custom_headers) { @@ -659,8 +670,10 @@ static bool common_download_file_single_online(const std::string & url, LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); if (file_exists) { LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); - return true; + return 304; // 304 Not Modified - fake cached response } + return head->status; // cannot use cached file, return raw status code + // TODO: maybe retry only on certain codes } std::string etag; @@ -692,12 +705,12 @@ static bool common_download_file_single_online(const std::string & url, if (file_exists) { if (!should_download_from_scratch) { LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - return true; + return 304; // 304 Not Modified - fake cached response } LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } @@ -709,7 +722,7 @@ static bool common_download_file_single_online(const std::string & url, existing_size = std::filesystem::file_size(path_temporary); } else if (remove(path_temporary.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; + return -1; } } @@ -730,15 +743,16 @@ static bool common_download_file_single_online(const std::string & url, if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; + return -1; } if (!etag.empty()) { write_etag(path, etag); } - break; + + return head->status; // TODO: use actual GET status? } - return true; + return -1; // max attempts reached } std::pair> common_remote_get_content(const std::string & url, @@ -777,22 +791,22 @@ std::pair> common_remote_get_content(const std::string #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB) -static bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - bool offline, - const common_header_list & headers) { +int common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline, + const common_header_list & headers) { if (!offline) { return common_download_file_single_online(url, path, bearer_token, headers); } if (!std::filesystem::exists(path)) { LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); - return false; + return -1; } LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return true; + return 304; // Not Modified - fake cached response } // download multiple files from remote URLs to local paths @@ -810,7 +824,8 @@ static bool common_download_file_multiple(const std::vector & it) -> bool { - return common_download_file_single(it.first, it.second, bearer_token, offline, headers); + const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers); + return is_http_status_ok(http_status); }, item ) @@ -837,7 +852,8 @@ bool common_download_model(const common_params_model & model, return false; } - if (!common_download_file_single(model.url, model.path, bearer_token, offline, headers)) { + const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers); + if (!is_http_status_ok(http_status)) { return false; } @@ -975,7 +991,7 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, } else if (res_code == 401) { throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); } else { - throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str())); } // check response @@ -1094,7 +1110,8 @@ std::string common_docker_resolve_model(const std::string & docker) { std::string local_path = fs_get_cache_file(model_filename); const std::string blob_url = url_prefix + "/blobs/" + gguf_digest; - if (!common_download_file_single(blob_url, local_path, token, false, {})) { + const int http_status = common_download_file_single(blob_url, local_path, token, false, {}); + if (!is_http_status_ok(http_status)) { throw std::runtime_error("Failed to download Docker Model"); } @@ -1120,6 +1137,14 @@ std::string common_docker_resolve_model(const std::string &) { throw std::runtime_error("download functionality is not enabled in this build"); } +int common_download_file_single(const std::string &, + const std::string &, + const std::string &, + bool, + const common_header_list &) { + throw std::runtime_error("download functionality is not enabled in this build"); +} + #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB std::vector common_list_cached_models() { diff --git a/common/download.h b/common/download.h index 9ea2093939..c79be2f90e 100644 --- a/common/download.h +++ b/common/download.h @@ -65,6 +65,14 @@ bool common_download_model( // returns list of cached models std::vector common_list_cached_models(); +// download single file from url to local path +// returns status code or -1 on error +int common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline, + const common_header_list & headers = {}); + // resolve and download model from Docker registry // return local path to downloaded model file std::string common_docker_resolve_model(const std::string & docker); diff --git a/common/preset.cpp b/common/preset.cpp index e2fc18c5da..aec14e0769 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -16,6 +16,46 @@ static std::string rm_leading_dashes(const std::string & str) { return str.substr(pos); } +// only allow a subset of args for remote presets for security reasons +// do not add more args unless absolutely necessary +// args that output to files are strictly prohibited +static std::set get_remote_preset_whitelist(const std::map & key_to_opt) { + static const std::set allowed_options = { + "model-url", + "hf-repo", + "hf-repo-draft", + "hf-repo-v", // vocoder + "hf-file-v", // vocoder + "mmproj-url", + "pooling", + "jinja", + "batch-size", + "ubatch-size", + "cache-reuse", + // note: sampling params are automatically allowed by default + // negated args will be added automatically + }; + + std::set allowed_keys; + + for (const auto & it : key_to_opt) { + const std::string & key = it.first; + const common_arg & opt = it.second; + if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) { + allowed_keys.insert(key); + // also add variant keys (args without leading dashes and env vars) + for (const auto & arg : opt.get_args()) { + allowed_keys.insert(rm_leading_dashes(arg)); + } + for (const auto & env : opt.get_env()) { + allowed_keys.insert(env); + } + } + } + + return allowed_keys; +} + std::vector common_preset::to_args(const std::string & bin_path) const { std::vector args; @@ -121,6 +161,29 @@ void common_preset::merge(const common_preset & other) { } } +void common_preset::apply_to_params(common_params & params) const { + for (const auto & [opt, val] : options) { + // apply each option to params + if (opt.handler_string) { + opt.handler_string(params, val); + } else if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + } else if (opt.handler_bool) { + opt.handler_bool(params, common_arg_utils::is_truthy(val)); + } else if (opt.handler_str_str) { + // not supported yet + throw std::runtime_error(string_format( + "%s: option with two values is not supported yet", + __func__ + )); + } else if (opt.handler_void) { + opt.handler_void(params); + } else { + GGML_ABORT("unknown handler type"); + } + } +} + static std::map> parse_ini_from_file(const std::string & path) { std::map> parsed; @@ -230,10 +293,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_preset_context::common_preset_context(llama_example ex) +common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed) : ctx_params(common_params_parser_init(default_params, ex)) { common_params_add_preset_options(ctx_params.options); key_to_opt = get_map_key_opt(ctx_params); + + // setup allowed keys if only_remote_allowed is true + if (only_remote_allowed) { + filter_allowed_keys = true; + allowed_keys = get_remote_preset_whitelist(key_to_opt); + } } common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const { @@ -250,6 +319,12 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co LOG_DBG("loading preset: %s\n", preset.name.c_str()); for (const auto & [key, value] : section.second) { LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); + if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) { + throw std::runtime_error(string_format( + "option '%s' is not allowed in remote presets", + key.c_str() + )); + } if (key_to_opt.find(key) != key_to_opt.end()) { const auto & opt = key_to_opt.at(key); if (is_bool_arg(opt)) { diff --git a/common/preset.h b/common/preset.h index 3a84d1be29..11ba6ef812 100644 --- a/common/preset.h +++ b/common/preset.h @@ -6,6 +6,7 @@ #include #include #include +#include // // INI preset parser and writer @@ -40,6 +41,9 @@ struct common_preset { // merge another preset into this one, overwriting existing options void merge(const common_preset & other); + + // apply preset options to common_params + void apply_to_params(common_params & params) const; }; // interface for multiple presets in one file @@ -50,7 +54,12 @@ struct common_preset_context { common_params default_params; // unused for now common_params_context ctx_params; std::map key_to_opt; - common_preset_context(llama_example ex); + + bool filter_allowed_keys = false; + std::set allowed_keys; + + // if only_remote_allowed is true, only accept whitelisted keys + common_preset_context(llama_example ex, bool only_remote_allowed = false); // load presets from INI file common_presets load_from_ini(const std::string & path, common_preset & global) const; diff --git a/docs/preset.md b/docs/preset.md new file mode 100644 index 0000000000..be50bb9926 --- /dev/null +++ b/docs/preset.md @@ -0,0 +1,60 @@ +# llama.cpp INI Presets + +## Introduction + +The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp. + +### Using Presets with the Server + +When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details. + +### Using a Remote Preset + +> [!NOTE] +> +> This feature is currently only supported via the `-hf` option. + +For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model. + +Example: + +```ini +hf-repo-draft = username/my-draft-model-GGUF +temp = 0.5 +top-k = 20 +top-p = 0.95 +``` + +For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options. + +Example usage: + +Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above: + +```sh +llama-cli -hf username/my-model-with-preset + +# This is equivalent to: +llama-cli -hf username/my-model-with-preset \ + --hf-repo-draft username/my-draft-model-GGUF \ + --temp 0.5 \ + --top-k 20 \ + --top-p 0.95 +``` + +You can also override preset arguments by specifying them on the command line: + +```sh +# Force temp = 0.1, overriding the preset value +llama-cli -hf username/my-model-with-preset --temp 0.1 +``` + +If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo for each preset. Each HF repo should contain a `preset.ini` file that references the actual model(s): + +```ini +hf-repo = user/my-model-main +hf-repo-draft = user/my-model-draft +temp = 0.8 +ctx-size = 1024 +; (and other configurations) +```