set hf_repo/docker_repo as model alias when posible
This commit is contained in:
parent
e40f35fb61
commit
e2731c3767
|
|
@ -218,6 +218,7 @@ static handle_model_result common_params_handle_model(
|
||||||
{
|
{
|
||||||
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
|
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
|
||||||
model.path = common_docker_resolve_model(model.docker_repo);
|
model.path = common_docker_resolve_model(model.docker_repo);
|
||||||
|
model.name = model.docker_repo; // set name for consistency
|
||||||
} else if (!model.hf_repo.empty()) {
|
} else if (!model.hf_repo.empty()) {
|
||||||
// short-hand to avoid specifying --hf-file -> default it to --model
|
// short-hand to avoid specifying --hf-file -> default it to --model
|
||||||
if (model.hf_file.empty()) {
|
if (model.hf_file.empty()) {
|
||||||
|
|
@ -226,7 +227,8 @@ static handle_model_result common_params_handle_model(
|
||||||
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
||||||
exit(1); // built without CURL, error message already printed
|
exit(1); // built without CURL, error message already printed
|
||||||
}
|
}
|
||||||
model.hf_repo = auto_detected.repo;
|
model.name = model.hf_repo; // repo name with tag
|
||||||
|
model.hf_repo = auto_detected.repo; // repo name without tag
|
||||||
model.hf_file = auto_detected.ggufFile;
|
model.hf_file = auto_detected.ggufFile;
|
||||||
if (!auto_detected.mmprojFile.empty()) {
|
if (!auto_detected.mmprojFile.empty()) {
|
||||||
result.found_mmproj = true;
|
result.found_mmproj = true;
|
||||||
|
|
|
||||||
|
|
@ -203,6 +203,7 @@ struct common_params_model {
|
||||||
std::string hf_repo = ""; // HF repo // NOLINT
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
std::string hf_file = ""; // HF file // NOLINT
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
std::string docker_repo = ""; // Docker repo // NOLINT
|
std::string docker_repo = ""; // Docker repo // NOLINT
|
||||||
|
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
||||||
};
|
};
|
||||||
|
|
||||||
struct common_params_speculative {
|
struct common_params_speculative {
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,10 @@ struct common_cached_model_info {
|
||||||
std::string model;
|
std::string model;
|
||||||
std::string tag;
|
std::string tag;
|
||||||
size_t size = 0; // GGUF size in bytes
|
size_t size = 0; // GGUF size in bytes
|
||||||
|
// return string representation like "user/model:tag"
|
||||||
|
// if tag is "latest", it will be omitted
|
||||||
std::string to_string() const {
|
std::string to_string() const {
|
||||||
return user + "/" + model + ":" + tag;
|
return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1445,7 +1445,6 @@ Listing all models in cache. The model metadata will also include a field to ind
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"data": [{
|
"data": [{
|
||||||
"name": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
||||||
"id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
"id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||||
"in_cache": true,
|
"in_cache": true,
|
||||||
"path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf",
|
"path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf",
|
||||||
|
|
|
||||||
|
|
@ -3353,7 +3353,6 @@ public:
|
||||||
}
|
}
|
||||||
models_json.push_back(json {
|
models_json.push_back(json {
|
||||||
{"id", meta.name},
|
{"id", meta.name},
|
||||||
{"name", meta.name},
|
|
||||||
{"object", "model"}, // for OAI-compat
|
{"object", "model"}, // for OAI-compat
|
||||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||||
{"created", t}, // for OAI-compat
|
{"created", t}, // for OAI-compat
|
||||||
|
|
@ -3822,6 +3821,11 @@ int main(int argc, char ** argv, char ** envp) {
|
||||||
params.kv_unified = true;
|
params.kv_unified = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for consistency between server router mode and single-model mode, we set the same model name as alias
|
||||||
|
if (params.model_alias.empty() && !params.model.name.empty()) {
|
||||||
|
params.model_alias = params.model.name;
|
||||||
|
}
|
||||||
|
|
||||||
common_init();
|
common_init();
|
||||||
|
|
||||||
// struct that contains llama context and inference
|
// struct that contains llama context and inference
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue