From e2731c3767ddb7fad4ae41b1dedce2d817ba2401 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Wed, 26 Nov 2025 15:57:20 +0100
Subject: [PATCH] set hf_repo/docker_repo as model alias when posible

---
 common/arg.cpp          | 4 +++-
 common/common.h         | 1 +
 common/download.h       | 4 +++-
 tools/server/README.md  | 1 -
 tools/server/server.cpp | 6 +++++-
 5 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/common/arg.cpp b/common/arg.cpp
index 062046c0d0..84db9ca77c 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -218,6 +218,7 @@ static handle_model_result common_params_handle_model(
     {
         if (!model.docker_repo.empty()) {  // Handle Docker URLs by resolving them to local paths
             model.path = common_docker_resolve_model(model.docker_repo);
+            model.name = model.docker_repo; // set name for consistency
         } else if (!model.hf_repo.empty()) {
             // short-hand to avoid specifying --hf-file -> default it to --model
             if (model.hf_file.empty()) {
@@ -226,7 +227,8 @@ static handle_model_result common_params_handle_model(
                     if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
                         exit(1); // built without CURL, error message already printed
                     }
-                    model.hf_repo = auto_detected.repo;
+                    model.name    = model.hf_repo;      // repo name with tag
+                    model.hf_repo = auto_detected.repo; // repo name without tag
                     model.hf_file = auto_detected.ggufFile;
                     if (!auto_detected.mmprojFile.empty()) {
                         result.found_mmproj   = true;
diff --git a/common/common.h b/common/common.h
index 4ac9700d7b..5569814738 100644
--- a/common/common.h
+++ b/common/common.h
@@ -203,6 +203,7 @@ struct common_params_model {
     std::string hf_repo     = ""; // HF repo                                                // NOLINT
     std::string hf_file     = ""; // HF file                                                // NOLINT
     std::string docker_repo = ""; // Docker repo                                            // NOLINT
+    std::string name        = ""; // in format <user>/<model>[:<tag>] (tag is optional)     // NOLINT
 };
 
 struct common_params_speculative {
diff --git a/common/download.h b/common/download.h
index 45a6bd6bba..d1321e6e90 100644
--- a/common/download.h
+++ b/common/download.h
@@ -14,8 +14,10 @@ struct common_cached_model_info {
     std::string model;
     std::string tag;
     size_t      size = 0; // GGUF size in bytes
+    // return string representation like "user/model:tag"
+    // if tag is "latest", it will be omitted
     std::string to_string() const {
-        return user + "/" + model + ":" + tag;
+        return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
     }
 };
 
diff --git a/tools/server/README.md b/tools/server/README.md
index 6b911b635a..44e27fd181 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1445,7 +1445,6 @@ Listing all models in cache. The model metadata will also include a field to ind
 ```json
 {
   "data": [{
-    "name": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
     "id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
     "in_cache": true,
     "path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf",
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index 5ebcb08af8..ad766f1089 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -3353,7 +3353,6 @@ public:
             }
             models_json.push_back(json {
                 {"id",       meta.name},
-                {"name",     meta.name},
                 {"object",   "model"},    // for OAI-compat
                 {"owned_by", "llamacpp"}, // for OAI-compat
                 {"created",  t},          // for OAI-compat
@@ -3822,6 +3821,11 @@ int main(int argc, char ** argv, char ** envp) {
         params.kv_unified = true;
     }
 
+    // for consistency between server router mode and single-model mode, we set the same model name as alias
+    if (params.model_alias.empty() && !params.model.name.empty()) {
+        params.model_alias = params.model.name;
+    }
+
     common_init();
 
     // struct that contains llama context and inference