From 0f70e3e0cd90326b768bb6b212e47dd5987e20ce Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 1 Jan 2026 12:49:19 +0100
Subject: [PATCH 1/2] arg: support remote preset

---
 common/arg.cpp      | 151 +++++++++++++++++++++++++++++---------------
 common/download.cpp |  15 +++--
 common/download.h   |   6 ++
 common/preset.cpp   |  77 +++++++++++++++++++++-
 common/preset.h     |  11 +++-
 docs/preset.md      |  50 +++++++++++++++
 6 files changed, 253 insertions(+), 57 deletions(-)
 create mode 100644 docs/preset.md

diff --git a/common/arg.cpp b/common/arg.cpp
index 62d31393c4..4d4f91e96f 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -6,6 +6,7 @@
 #include "log.h"
 #include "sampling.h"
 #include "download.h"
+#include "preset.h"
 
 // fix problem with std::min and std::max
 #if defined(_WIN32)
@@ -268,6 +269,42 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector
     }
 }
 
+static std::string clean_file_name(const std::string & fname) {
+    std::string clean_fname = fname;
+    string_replace_all(clean_fname, "\\", "/");
+    string_replace_all(clean_fname, "/", "");
+    return clean_fname;
+}
+
+static bool common_params_handle_remote_preset(common_params & params, llama_example ex) {
+    GGML_ASSERT(!params.model.hf_repo.empty());
+
+    const bool offline = params.offline;
+    std::string model_endpoint = get_model_endpoint();
+    auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini";
+
+    // prepare local path for caching
+    auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini");
+    auto preset_path = fs_get_cache_file(preset_fname);
+    bool has_preset = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
+
+    // remote preset is optional, so we don't error out if not found
+    if (has_preset) {
+        LOG_INF("applying remote preset from %s\n", preset_url.c_str());
+        common_preset_context ctx(ex, /* only_remote_allowed */ true);
+        common_preset global; // unused for now
+        auto remote_presets = ctx.load_from_ini(preset_path, global);
+        if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) {
+            common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME);
+            preset.apply_to_params(params);
+        } else {
+            throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section");
+        }
+    }
+
+    return has_preset;
+}
+
 struct handle_model_result {
     bool found_mmproj = false;
     common_params_model mmproj;
@@ -309,9 +346,7 @@ static handle_model_result common_params_handle_model(
             // make sure model path is present (for caching purposes)
             if (model.path.empty()) {
                 // this is to avoid different repo having same file name, or same file name in different subdirs
-                std::string filename = model.hf_repo + "_" + model.hf_file;
-                // to make sure we don't have any slashes in the filename
-                string_replace_all(filename, "/", "_");
+                std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file);
                 model.path = fs_get_cache_file(filename);
             }
 
@@ -425,61 +460,75 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
         }
     };
 
-    std::set<std::string> seen_args;
+    auto parse_cli_args = [&]() {
+        std::set<std::string> seen_args;
 
-    for (int i = 1; i < argc; i++) {
-        const std::string arg_prefix = "--";
+        for (int i = 1; i < argc; i++) {
+            const std::string arg_prefix = "--";
 
-        std::string arg = argv[i];
-        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
-            std::replace(arg.begin(), arg.end(), '_', '-');
-        }
-        if (arg_to_options.find(arg) == arg_to_options.end()) {
-            throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
-        }
-        if (!seen_args.insert(arg).second) {
-            LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
-        }
-        auto & tmp = arg_to_options[arg];
-        auto opt = *tmp.first;
-        bool is_positive = tmp.second;
-        if (opt.has_value_from_env()) {
-            fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
-        }
-        try {
-            if (opt.handler_void) {
-                opt.handler_void(params);
-                continue;
+            std::string arg = argv[i];
+            if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+                std::replace(arg.begin(), arg.end(), '_', '-');
             }
-            if (opt.handler_bool) {
-                opt.handler_bool(params, is_positive);
-                continue;
+            if (arg_to_options.find(arg) == arg_to_options.end()) {
+                throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
             }
+            if (!seen_args.insert(arg).second) {
+                LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str());
+            }
+            auto & tmp = arg_to_options[arg];
+            auto opt = *tmp.first;
+            bool is_positive = tmp.second;
+            if (opt.has_value_from_env()) {
+                fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str());
+            }
+            try {
+                if (opt.handler_void) {
+                    opt.handler_void(params);
+                    continue;
+                }
+                if (opt.handler_bool) {
+                    opt.handler_bool(params, is_positive);
+                    continue;
+                }
 
-            // arg with single value
-            check_arg(i);
-            std::string val = argv[++i];
-            if (opt.handler_int) {
-                opt.handler_int(params, std::stoi(val));
-                continue;
-            }
-            if (opt.handler_string) {
-                opt.handler_string(params, val);
-                continue;
-            }
+                // arg with single value
+                check_arg(i);
+                std::string val = argv[++i];
+                if (opt.handler_int) {
+                    opt.handler_int(params, std::stoi(val));
+                    continue;
+                }
+                if (opt.handler_string) {
+                    opt.handler_string(params, val);
+                    continue;
+                }
 
-            // arg with 2 values
-            check_arg(i);
-            std::string val2 = argv[++i];
-            if (opt.handler_str_str) {
-                opt.handler_str_str(params, val, val2);
-                continue;
+                // arg with 2 values
+                check_arg(i);
+                std::string val2 = argv[++i];
+                if (opt.handler_str_str) {
+                    opt.handler_str_str(params, val, val2);
+                    continue;
+                }
+            } catch (std::exception & e) {
+                throw std::invalid_argument(string_format(
+                    "error while handling argument \"%s\": %s\n\n"
+                    "usage:\n%s\n\nto show complete usage, run with -h",
+                    arg.c_str(), e.what(), opt.to_string().c_str()));
             }
-        } catch (std::exception & e) {
-            throw std::invalid_argument(string_format(
-                "error while handling argument \"%s\": %s\n\n"
-                "usage:\n%s\n\nto show complete usage, run with -h",
-                arg.c_str(), e.what(), opt.to_string().c_str()));
+        }
+    };
+
+    // parse the first time to get -hf option (used for remote preset)
+    parse_cli_args();
+
+    // maybe handle remote preset
+    if (!params.model.hf_repo.empty()) {
+        bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex);
+        if (has_preset) {
+            // re-parse CLI args to override preset values
+            parse_cli_args();
         }
     }
 
diff --git a/common/download.cpp b/common/download.cpp
index ef87472560..d0aa386081 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -769,10 +769,10 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
 
 #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
 
-static bool common_download_file_single(const std::string & url,
-                                        const std::string & path,
-                                        const std::string & bearer_token,
-                                        bool                offline) {
+bool common_download_file_single(const std::string & url,
+                                 const std::string & path,
+                                 const std::string & bearer_token,
+                                 bool                offline) {
     if (!offline) {
         return common_download_file_single_online(url, path, bearer_token);
     }
@@ -1096,6 +1096,13 @@ std::string common_docker_resolve_model(const std::string &) {
     throw std::runtime_error("download functionality is not enabled in this build");
 }
 
+bool common_download_file_single(const std::string &,
+                                 const std::string &,
+                                 const std::string &,
+                                 bool) {
+    throw std::runtime_error("download functionality is not enabled in this build");
+}
+
 #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
 
 std::vector<common_cached_model_info> common_list_cached_models() {
diff --git a/common/download.h b/common/download.h
index d1321e6e90..5f42527af8 100644
--- a/common/download.h
+++ b/common/download.h
@@ -52,6 +52,12 @@ bool common_download_model(
 // returns list of cached models
 std::vector<common_cached_model_info> common_list_cached_models();
 
+// download single file from url to local path
+bool common_download_file_single(const std::string & url,
+                                 const std::string & path,
+                                 const std::string & bearer_token,
+                                 bool                offline);
+
 // resolve and download model from Docker registry
 // return local path to downloaded model file
 std::string common_docker_resolve_model(const std::string & docker);
diff --git a/common/preset.cpp b/common/preset.cpp
index e2fc18c5da..949fe00110 100644
--- a/common/preset.cpp
+++ b/common/preset.cpp
@@ -16,6 +16,46 @@ static std::string rm_leading_dashes(const std::string & str) {
     return str.substr(pos);
 }
 
+// only allow a subset of args for remote presets for security reasons
+// do not add more args unless absolutely necessary
+// args that output to files are strictly prohibited
+static std::set<std::string> get_remote_preset_whitelist(std::map<std::string, common_arg> & key_to_opt) {
+    static const std::set<std::string> allowed_options = {
+        "model-url",
+        "hf-repo",
+        "hf-repo-draft",
+        "hf-repo-v", // vocoder
+        "hf-file-v", // vocoder
+        "mmproj-url",
+        "pooling",
+        "jinja",
+        "batch-size",
+        "ubatch-size",
+        "cache-reuse",
+        // note: sampling params are automatically allowed by default
+        // negated args will be added automatically
+    };
+
+    std::set<std::string> allowed_keys;
+
+    for (const auto & it : key_to_opt) {
+        const std::string & key = it.first;
+        const common_arg & opt = it.second;
+        if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) {
+            allowed_keys.insert(key);
+            // also add variant keys (args without leading dashes and env vars)
+            for (const auto & arg : opt.get_args()) {
+                allowed_keys.insert(rm_leading_dashes(arg));
+            }
+            for (const auto & env : opt.get_env()) {
+                allowed_keys.insert(env);
+            }
+        }
+    }
+
+    return allowed_keys;
+}
+
 std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
     std::vector<std::string> args;
 
@@ -121,6 +161,29 @@ void common_preset::merge(const common_preset & other) {
     }
 }
 
+void common_preset::apply_to_params(common_params & params) const {
+    for (const auto & [opt, val] : options) {
+        // apply each option to params
+        if (opt.handler_string) {
+            opt.handler_string(params, val);
+        } else if (opt.handler_int) {
+            opt.handler_int(params, std::stoi(val));
+        } else if (opt.handler_bool) {
+            opt.handler_bool(params, common_arg_utils::is_truthy(val));
+        } else if (opt.handler_str_str) {
+            // not supported yet
+            throw std::runtime_error(string_format(
+                "%s: option with two values is not supported yet",
+                __func__
+            ));
+        } else if (opt.handler_void) {
+            opt.handler_void(params);
+        } else {
+            GGML_ABORT("unknown handler type");
+        }
+    }
+}
+
 static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
     std::map<std::string, std::map<std::string, std::string>> parsed;
 
@@ -230,10 +293,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
     return value;
 }
 
-common_preset_context::common_preset_context(llama_example ex)
+common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed)
         : ctx_params(common_params_parser_init(default_params, ex)) {
     common_params_add_preset_options(ctx_params.options);
     key_to_opt = get_map_key_opt(ctx_params);
+
+    // setup allowed keys if only_remote_allowed is true
+    if (only_remote_allowed) {
+        filter_allowed_keys = true;
+        allowed_keys = get_remote_preset_whitelist(key_to_opt);
+    }
 }
 
 common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
@@ -250,6 +319,12 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
         LOG_DBG("loading preset: %s\n", preset.name.c_str());
         for (const auto & [key, value] : section.second) {
             LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
+            if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
+                throw std::runtime_error(string_format(
+                    "option '%s' is not allowed in remote presets",
+                    key.c_str()
+                ));
+            }
             if (key_to_opt.find(key) != key_to_opt.end()) {
                 const auto & opt = key_to_opt.at(key);
                 if (is_bool_arg(opt)) {
diff --git a/common/preset.h b/common/preset.h
index 3a84d1be29..11ba6ef812 100644
--- a/common/preset.h
+++ b/common/preset.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include <set>
 
 //
 // INI preset parser and writer
@@ -40,6 +41,9 @@ struct common_preset {
 
     // merge another preset into this one, overwriting existing options
     void merge(const common_preset & other);
+
+    // apply preset options to common_params
+    void apply_to_params(common_params & params) const;
 };
 
 // interface for multiple presets in one file
@@ -50,7 +54,12 @@ struct common_preset_context {
     common_params default_params; // unused for now
     common_params_context ctx_params;
     std::map<std::string, common_arg> key_to_opt;
-    common_preset_context(llama_example ex);
+
+    bool filter_allowed_keys = false;
+    std::set<std::string> allowed_keys;
+
+    // if only_remote_allowed is true, only accept whitelisted keys
+    common_preset_context(llama_example ex, bool only_remote_allowed = false);
 
     // load presets from INI file
     common_presets load_from_ini(const std::string & path, common_preset & global) const;
diff --git a/docs/preset.md b/docs/preset.md
new file mode 100644
index 0000000000..daea562cc5
--- /dev/null
+++ b/docs/preset.md
@@ -0,0 +1,50 @@
+# llama.cpp INI preset
+
+## Introduction
+
+INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp
+
+### Using preset on server
+
+When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more.
+
+### Using a remote preset
+
+> [!NOTE]
+>
+> This feature is currently only supported via the `-hf` option
+
+For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model.
+
+Example:
+
+```ini
+hf-repo-draft = username/my-draft-model-GGUF
+temp = 0.5
+top-k = 20
+top-p = 0.95
+```
+
+For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options.
+
+Example usage:
+
+Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above.
+
+```sh
+llama-cli -hf username/my-model-with-preset
+
+# equivalent to
+llama-cli -hf username/my-model-with-preset \
+  --hf-repo-draft username/my-draft-model-GGUF \
+  --temp 0.5 \
+  --top-k 20 \
+  --top-p 0.95
+```
+
+You can also optionally override preset args by specifying them in the arguments:
+
+```sh
+# forcing temp = 0.1
+llama-cli -hf username/my-model-with-preset --temp 0.1
+```

From f9a97375cb539c7f195b473871a3a1eccf9da3d8 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 1 Jan 2026 12:50:45 +0100
Subject: [PATCH 2/2] proof reading

---
 docs/preset.md | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/docs/preset.md b/docs/preset.md
index daea562cc5..c11c5dc22f 100644
--- a/docs/preset.md
+++ b/docs/preset.md
@@ -1,20 +1,20 @@
-# llama.cpp INI preset
+# llama.cpp INI Presets
 
 ## Introduction
 
-INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp
+The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp.
 
-### Using preset on server
+### Using Presets with the Server
 
-When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more.
+When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details.
 
-### Using a remote preset
+### Using a Remote Preset
 
 > [!NOTE]
 >
-> This feature is currently only supported via the `-hf` option
+> This feature is currently only supported via the `-hf` option.
 
-For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model.
+For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model.
 
 Example:
 
@@ -25,16 +25,16 @@ top-k = 20
 top-p = 0.95
 ```
 
-For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options.
+For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options.
 
 Example usage:
 
-Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above.
+Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above:
 
 ```sh
 llama-cli -hf username/my-model-with-preset
 
-# equivalent to
+# This is equivalent to:
 llama-cli -hf username/my-model-with-preset \
   --hf-repo-draft username/my-draft-model-GGUF \
   --temp 0.5 \
@@ -42,9 +42,9 @@ llama-cli -hf username/my-model-with-preset \
   --top-p 0.95
 ```
 
-You can also optionally override preset args by specifying them in the arguments:
+You can also override preset arguments by specifying them on the command line:
 
 ```sh
-# forcing temp = 0.1
+# Force temp = 0.1, overriding the preset value
 llama-cli -hf username/my-model-with-preset --temp 0.1
 ```