preset: allow named remote preset (#18728)
* preset: allow named remote preset * nits: fix docs * cont docs
This commit is contained in:
parent
2656c0d265
commit
23f82f2420
|
|
@ -281,12 +281,20 @@ static std::string clean_file_name(const std::string & fname) {
|
|||
static bool common_params_handle_remote_preset(common_params & params, llama_example ex) {
|
||||
GGML_ASSERT(!params.model.hf_repo.empty());
|
||||
|
||||
// the returned hf_repo is without tag
|
||||
auto [hf_repo, hf_tag] = common_download_split_repo_tag(params.model.hf_repo);
|
||||
|
||||
// "latest" tag (default if not specified) is translated to "default" preset
|
||||
if (hf_tag == "latest") {
|
||||
hf_tag = "default";
|
||||
}
|
||||
|
||||
const bool offline = params.offline;
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini";
|
||||
auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";
|
||||
|
||||
// prepare local path for caching
|
||||
auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini");
|
||||
auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
|
||||
auto preset_path = fs_get_cache_file(preset_fname);
|
||||
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
|
||||
const bool has_preset = status >= 200 && status < 400;
|
||||
|
|
@ -295,14 +303,15 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
|
|||
if (has_preset) {
|
||||
LOG_INF("applying remote preset from %s\n", preset_url.c_str());
|
||||
common_preset_context ctx(ex, /* only_remote_allowed */ true);
|
||||
common_preset global; // unused for now
|
||||
common_preset global;
|
||||
auto remote_presets = ctx.load_from_ini(preset_path, global);
|
||||
if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) {
|
||||
common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME);
|
||||
remote_presets = ctx.cascade(global, remote_presets);
|
||||
if (remote_presets.find(hf_tag) != remote_presets.end()) {
|
||||
common_preset preset = remote_presets.at(hf_tag);
|
||||
LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline
|
||||
preset.apply_to_params(params);
|
||||
} else {
|
||||
throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section");
|
||||
throw std::runtime_error("Remote preset.ini does not contain [" + std::string(hf_tag) + "] section");
|
||||
}
|
||||
} else {
|
||||
LOG_INF("%s", "no remote preset found, skipping\n");
|
||||
|
|
|
|||
|
|
@ -161,6 +161,16 @@ static bool is_http_status_ok(int status) {
|
|||
return status >= 200 && status < 400;
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
|
||||
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
||||
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
||||
std::string hf_repo = parts[0];
|
||||
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
||||
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
||||
}
|
||||
return {hf_repo, tag};
|
||||
}
|
||||
|
||||
#ifdef LLAMA_USE_CURL
|
||||
|
||||
//
|
||||
|
|
@ -922,12 +932,8 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
|
|||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
const common_header_list & custom_headers) {
|
||||
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
||||
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
||||
std::string hf_repo = parts[0];
|
||||
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
||||
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
||||
}
|
||||
// the returned hf_repo is without tag
|
||||
auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
|
||||
|
||||
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,12 @@ struct common_remote_params {
|
|||
// get remote file content, returns <http_code, raw_response_body>
|
||||
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
|
||||
|
||||
// split HF repo with tag into <repo, tag>
|
||||
// for example: "user/model:tag" -> <"user/model", "tag">
|
||||
// if tag is not present, default to "latest"
|
||||
// example: "user/model" -> <"user/model", "latest">
|
||||
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
|
||||
|
||||
struct common_cached_model_info {
|
||||
std::string manifest_path;
|
||||
std::string user;
|
||||
|
|
|
|||
|
|
@ -32,8 +32,10 @@ static std::set<std::string> get_remote_preset_whitelist(const std::map<std::str
|
|||
"batch-size",
|
||||
"ubatch-size",
|
||||
"cache-reuse",
|
||||
"chat-template-kwargs",
|
||||
"mmap",
|
||||
// note: sampling params are automatically allowed by default
|
||||
// negated args will be added automatically
|
||||
// negated args will be added automatically if the positive arg is specified above
|
||||
};
|
||||
|
||||
std::set<std::string> allowed_keys;
|
||||
|
|
@ -318,6 +320,11 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
|
|||
}
|
||||
LOG_DBG("loading preset: %s\n", preset.name.c_str());
|
||||
for (const auto & [key, value] : section.second) {
|
||||
if (key == "version") {
|
||||
// skip version key (reserved for future use)
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
|
||||
if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) {
|
||||
throw std::runtime_error(string_format(
|
||||
|
|
@ -334,7 +341,10 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co
|
|||
}
|
||||
LOG_DBG("accepted option: %s = %s\n", key.c_str(), preset.options[opt].c_str());
|
||||
} else {
|
||||
// TODO: maybe warn about unknown key?
|
||||
throw std::runtime_error(string_format(
|
||||
"option '%s' not recognized in preset '%s'",
|
||||
key.c_str(), preset.name.c_str()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,3 +58,40 @@ temp = 0.8
|
|||
ctx-size = 1024
|
||||
; (and other configurations)
|
||||
```
|
||||
|
||||
### Named presets
|
||||
|
||||
If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo containing a single `preset.ini` file that references the actual model(s):
|
||||
|
||||
```ini
|
||||
[*]
|
||||
mmap = 1
|
||||
|
||||
[gpt-oss-20b-hf]
|
||||
hf = ggml-org/gpt-oss-20b-GGUF
|
||||
batch-size = 2048
|
||||
ubatch-size = 2048
|
||||
top-p = 1.0
|
||||
top-k = 0
|
||||
min-p = 0.01
|
||||
temp = 1.0
|
||||
chat-template-kwargs = {"reasoning_effort": "high"}
|
||||
|
||||
[gpt-oss-120b-hf]
|
||||
hf = ggml-org/gpt-oss-120b-GGUF
|
||||
batch-size = 2048
|
||||
ubatch-size = 2048
|
||||
top-p = 1.0
|
||||
top-k = 0
|
||||
min-p = 0.01
|
||||
temp = 1.0
|
||||
chat-template-kwargs = {"reasoning_effort": "high"}
|
||||
```
|
||||
|
||||
You can then use it via `llama-cli` or `llama-server`, example:
|
||||
|
||||
```sh
|
||||
llama-server -hf user/repo:gpt-oss-120b-hf
|
||||
```
|
||||
|
||||
Please make sure to provide the correct `hf-repo` for each child preset. Otherwise, you may get error: `The specified tag is not a valid quantization scheme.`
|
||||
|
|
|
|||
Loading…
Reference in New Issue