From e2731c3767ddb7fad4ae41b1dedce2d817ba2401 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 26 Nov 2025 15:57:20 +0100 Subject: [PATCH] set hf_repo/docker_repo as model alias when posible --- common/arg.cpp | 4 +++- common/common.h | 1 + common/download.h | 4 +++- tools/server/README.md | 1 - tools/server/server.cpp | 6 +++++- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 062046c0d0..84db9ca77c 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -218,6 +218,7 @@ static handle_model_result common_params_handle_model( { if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths model.path = common_docker_resolve_model(model.docker_repo); + model.name = model.docker_repo; // set name for consistency } else if (!model.hf_repo.empty()) { // short-hand to avoid specifying --hf-file -> default it to --model if (model.hf_file.empty()) { @@ -226,7 +227,8 @@ static handle_model_result common_params_handle_model( if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) { exit(1); // built without CURL, error message already printed } - model.hf_repo = auto_detected.repo; + model.name = model.hf_repo; // repo name with tag + model.hf_repo = auto_detected.repo; // repo name without tag model.hf_file = auto_detected.ggufFile; if (!auto_detected.mmprojFile.empty()) { result.found_mmproj = true; diff --git a/common/common.h b/common/common.h index 4ac9700d7b..5569814738 100644 --- a/common/common.h +++ b/common/common.h @@ -203,6 +203,7 @@ struct common_params_model { std::string hf_repo = ""; // HF repo // NOLINT std::string hf_file = ""; // HF file // NOLINT std::string docker_repo = ""; // Docker repo // NOLINT + std::string name = ""; // in format /[:] (tag is optional) // NOLINT }; struct common_params_speculative { diff --git a/common/download.h b/common/download.h index 45a6bd6bba..d1321e6e90 100644 --- a/common/download.h +++ b/common/download.h @@ -14,8 +14,10 @@ struct common_cached_model_info { std::string model; std::string tag; size_t size = 0; // GGUF size in bytes + // return string representation like "user/model:tag" + // if tag is "latest", it will be omitted std::string to_string() const { - return user + "/" + model + ":" + tag; + return user + "/" + model + (tag == "latest" ? "" : ":" + tag); } }; diff --git a/tools/server/README.md b/tools/server/README.md index 6b911b635a..44e27fd181 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1445,7 +1445,6 @@ Listing all models in cache. The model metadata will also include a field to ind ```json { "data": [{ - "name": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", "id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", "in_cache": true, "path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf", diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 5ebcb08af8..ad766f1089 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -3353,7 +3353,6 @@ public: } models_json.push_back(json { {"id", meta.name}, - {"name", meta.name}, {"object", "model"}, // for OAI-compat {"owned_by", "llamacpp"}, // for OAI-compat {"created", t}, // for OAI-compat @@ -3822,6 +3821,11 @@ int main(int argc, char ** argv, char ** envp) { params.kv_unified = true; } + // for consistency between server router mode and single-model mode, we set the same model name as alias + if (params.model_alias.empty() && !params.model.name.empty()) { + params.model_alias = params.model.name; + } + common_init(); // struct that contains llama context and inference