From 3306dbaef7553da03971c617e48cd27d00328bb4 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Sat, 21 Mar 2026 16:00:26 -0500 Subject: [PATCH] misc : prefer ggml-org models in docs and examples (#20827) * misc : prefer ggml-org models in docs and examples Prefer referring to known-good quantizations under ggml-org rather than 3rd-party uploaders. * remove accidentally committed file --- common/arg.cpp | 2 +- tools/cli/README.md | 2 +- tools/completion/README.md | 2 +- tools/llama-bench/llama-bench.cpp | 2 +- tools/server/README.md | 2 +- tools/server/webui/src/lib/constants/settings-config.ts | 2 +- tools/server/webui/src/lib/stores/models.svelte.ts | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index aad70ec546..c6a2dcbf2d 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2583,7 +2583,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"-hf", "-hfr", "--hf-repo"}, "/[:quant]", "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" "mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n" - "example: unsloth/phi-4-GGUF:q4_k_m\n" + "example: ggml-org/GLM-4.7-Flash-GGUF:Q4_K_M\n" "(default: unused)", [](common_params & params, const std::string & value) { params.model.hf_repo = value; diff --git a/tools/cli/README.md b/tools/cli/README.md index 22d3fc87e9..c344cab2a8 100644 --- a/tools/cli/README.md +++ b/tools/cli/README.md @@ -83,7 +83,7 @@ | `-m, --model FNAME` | model path to load
(env: LLAMA_ARG_MODEL) | | `-mu, --model-url MODEL_URL` | model download url (default: unused)
(env: LLAMA_ARG_MODEL_URL) | | `-dr, --docker-repo [/][:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.
example: gemma3
(default: unused)
(env: LLAMA_ARG_DOCKER_REPO) | -| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: unsloth/phi-4-GGUF:q4_k_m
(default: unused)
(env: LLAMA_ARG_HF_REPO) | +| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: ggml-org/GLM-4.7-Flash-GGUF:Q4_K_M
(default: unused)
(env: LLAMA_ARG_HF_REPO) | | `-hfd, -hfrd, --hf-repo-draft /[:quant]` | Same as --hf-repo, but for the draft model (default: unused)
(env: LLAMA_ARG_HFD_REPO) | | `-hff, --hf-file FILE` | Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)
(env: LLAMA_ARG_HF_FILE) | | `-hfv, -hfrv, --hf-repo-v /[:quant]` | Hugging Face model repository for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_REPO_V) | diff --git a/tools/completion/README.md b/tools/completion/README.md index f868c2c7d7..b5eeba7334 100644 --- a/tools/completion/README.md +++ b/tools/completion/README.md @@ -166,7 +166,7 @@ llama-completion.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1 | `-m, --model FNAME` | model path to load
(env: LLAMA_ARG_MODEL) | | `-mu, --model-url MODEL_URL` | model download url (default: unused)
(env: LLAMA_ARG_MODEL_URL) | | `-dr, --docker-repo [/][:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.
example: gemma3
(default: unused)
(env: LLAMA_ARG_DOCKER_REPO) | -| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: unsloth/phi-4-GGUF:q4_k_m
(default: unused)
(env: LLAMA_ARG_HF_REPO) | +| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: ggml-org/GLM-4.7-Flash-GGUF:Q4_K_M
(default: unused)
(env: LLAMA_ARG_HF_REPO) | | `-hfd, -hfrd, --hf-repo-draft /[:quant]` | Same as --hf-repo, but for the draft model (default: unused)
(env: LLAMA_ARG_HFD_REPO) | | `-hff, --hf-file FILE` | Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)
(env: LLAMA_ARG_HF_FILE) | | `-hfv, -hfrv, --hf-repo-v /[:quant]` | Hugging Face model repository for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_REPO_V) | diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index b0f1d6b936..21173576cc 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -418,7 +418,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -m, --model (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); printf(" -hf, -hfr, --hf-repo /[:quant] Hugging Face model repository; quant is optional, case-insensitive\n"); printf(" default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"); - printf(" example: unsloth/phi-4-GGUF:Q4_K_M\n"); + printf(" example: ggml-org/GLM-4.7-Flash-GGUF:Q4_K_M\n"); printf(" (default: unused)\n"); printf(" -hff, --hf-file Hugging Face model file. If specified, it will override the quant in --hf-repo\n"); printf(" (default: unused)\n"); diff --git a/tools/server/README.md b/tools/server/README.md index df59e2d9b7..554444d74b 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -100,7 +100,7 @@ For the full list of features, please refer to [server's changelog](https://gith | `-m, --model FNAME` | model path to load
(env: LLAMA_ARG_MODEL) | | `-mu, --model-url MODEL_URL` | model download url (default: unused)
(env: LLAMA_ARG_MODEL_URL) | | `-dr, --docker-repo [/][:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.
example: gemma3
(default: unused)
(env: LLAMA_ARG_DOCKER_REPO) | -| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: unsloth/phi-4-GGUF:q4_k_m
(default: unused)
(env: LLAMA_ARG_HF_REPO) | +| `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: ggml-org/GLM-4.7-Flash-GGUF:Q4_K_M
(default: unused)
(env: LLAMA_ARG_HF_REPO) | | `-hfd, -hfrd, --hf-repo-draft /[:quant]` | Same as --hf-repo, but for the draft model (default: unused)
(env: LLAMA_ARG_HFD_REPO) | | `-hff, --hf-file FILE` | Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)
(env: LLAMA_ARG_HF_FILE) | | `-hfv, -hfrv, --hf-repo-v /[:quant]` | Hugging Face model repository for the vocoder model (default: unused)
(env: LLAMA_ARG_HF_REPO_V) | diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index 39aaf561bb..ae9dd3ce8f 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -127,7 +127,7 @@ export const SETTING_CONFIG_INFO: Record = { fullHeightCodeBlocks: 'Always display code blocks at their full natural height, overriding any height limits.', showRawModelNames: - 'Display full raw model identifiers (e.g. "unsloth/Qwen3.5-27B-GGUF:BF16") instead of parsed names with badges.', + 'Display full raw model identifiers (e.g. "ggml-org/GLM-4.7-Flash-GGUF:Q8_0") instead of parsed names with badges.', mcpServers: 'Configure MCP servers as a JSON list. Use the form in the MCP Client settings section to edit.', mcpServerUsageStats: diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts index a6d7d6572f..50c32034a6 100644 --- a/tools/server/webui/src/lib/stores/models.svelte.ts +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -457,7 +457,7 @@ class ModelsStore { /** * Select a model by its model name (used for syncing with conversation model) - * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest") + * @param modelName - Model name to select (e.g., "ggml-org/GLM-4.7-Flash-GGUF") */ selectModelByName(modelName: string): void { const option = this.models.find((model) => model.model === modelName);