diff --git a/common/arg.cpp b/common/arg.cpp index 869ec545e6..ec0a2f015e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -281,12 +281,20 @@ static std::string clean_file_name(const std::string & fname) { static bool common_params_handle_remote_preset(common_params & params, llama_example ex) { GGML_ASSERT(!params.model.hf_repo.empty()); + // the returned hf_repo is without tag + auto [hf_repo, hf_tag] = common_download_split_repo_tag(params.model.hf_repo); + + // "latest" tag (default if not specified) is translated to "default" preset + if (hf_tag == "latest") { + hf_tag = "default"; + } + const bool offline = params.offline; std::string model_endpoint = get_model_endpoint(); - auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini"; + auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini"; // prepare local path for caching - auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini"); + auto preset_fname = clean_file_name(hf_repo + "_preset.ini"); auto preset_path = fs_get_cache_file(preset_fname); const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline); const bool has_preset = status >= 200 && status < 400; @@ -295,14 +303,15 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa if (has_preset) { LOG_INF("applying remote preset from %s\n", preset_url.c_str()); common_preset_context ctx(ex, /* only_remote_allowed */ true); - common_preset global; // unused for now + common_preset global; auto remote_presets = ctx.load_from_ini(preset_path, global); - if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) { - common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME); + remote_presets = ctx.cascade(global, remote_presets); + if (remote_presets.find(hf_tag) != remote_presets.end()) { + common_preset preset = remote_presets.at(hf_tag); LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline preset.apply_to_params(params); } else { - throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); + throw std::runtime_error("Remote preset.ini does not contain [" + std::string(hf_tag) + "] section"); } } else { LOG_INF("%s", "no remote preset found, skipping\n"); diff --git a/common/download.cpp b/common/download.cpp index a1e0e518e9..dc7d5c8478 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -161,6 +161,16 @@ static bool is_http_status_ok(int status) { return status >= 200 && status < 400; } +std::pair common_download_split_repo_tag(const std::string & hf_repo_with_tag) { + auto parts = string_split(hf_repo_with_tag, ':'); + std::string tag = parts.size() > 1 ? parts.back() : "latest"; + std::string hf_repo = parts[0]; + if (string_split(hf_repo, '/').size() != 2) { + throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); + } + return {hf_repo, tag}; +} + #ifdef LLAMA_USE_CURL // @@ -922,12 +932,8 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline, const common_header_list & custom_headers) { - auto parts = string_split(hf_repo_with_tag, ':'); - std::string tag = parts.size() > 1 ? parts.back() : "latest"; - std::string hf_repo = parts[0]; - if (string_split(hf_repo, '/').size() != 2) { - throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); - } + // the returned hf_repo is without tag + auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag); std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag; diff --git a/common/download.h b/common/download.h index c79be2f90e..1c1d8e6db5 100644 --- a/common/download.h +++ b/common/download.h @@ -17,6 +17,12 @@ struct common_remote_params { // get remote file content, returns std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params); +// split HF repo with tag into +// for example: "user/model:tag" -> <"user/model", "tag"> +// if tag is not present, default to "latest" +// example: "user/model" -> <"user/model", "latest"> +std::pair common_download_split_repo_tag(const std::string & hf_repo_with_tag); + struct common_cached_model_info { std::string manifest_path; std::string user; diff --git a/common/preset.cpp b/common/preset.cpp index aec14e0769..57ccd000b5 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -32,8 +32,10 @@ static std::set get_remote_preset_whitelist(const std::map allowed_keys; @@ -318,6 +320,11 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co } LOG_DBG("loading preset: %s\n", preset.name.c_str()); for (const auto & [key, value] : section.second) { + if (key == "version") { + // skip version key (reserved for future use) + continue; + } + LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) { throw std::runtime_error(string_format( @@ -334,7 +341,10 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co } LOG_DBG("accepted option: %s = %s\n", key.c_str(), preset.options[opt].c_str()); } else { - // TODO: maybe warn about unknown key? + throw std::runtime_error(string_format( + "option '%s' not recognized in preset '%s'", + key.c_str(), preset.name.c_str() + )); } } diff --git a/docs/preset.md b/docs/preset.md index be50bb9926..d49fb0a1ae 100644 --- a/docs/preset.md +++ b/docs/preset.md @@ -58,3 +58,40 @@ temp = 0.8 ctx-size = 1024 ; (and other configurations) ``` + +### Named presets + +If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo containing a single `preset.ini` file that references the actual model(s): + +```ini +[*] +mmap = 1 + +[gpt-oss-20b-hf] +hf = ggml-org/gpt-oss-20b-GGUF +batch-size = 2048 +ubatch-size = 2048 +top-p = 1.0 +top-k = 0 +min-p = 0.01 +temp = 1.0 +chat-template-kwargs = {"reasoning_effort": "high"} + +[gpt-oss-120b-hf] +hf = ggml-org/gpt-oss-120b-GGUF +batch-size = 2048 +ubatch-size = 2048 +top-p = 1.0 +top-k = 0 +min-p = 0.01 +temp = 1.0 +chat-template-kwargs = {"reasoning_effort": "high"} +``` + +You can then use it via `llama-cli` or `llama-server`, example: + +```sh +llama-server -hf user/repo:gpt-oss-120b-hf +``` + +Please make sure to provide the correct `hf-repo` for each child preset. Otherwise, you may get error: `The specified tag is not a valid quantization scheme.`