Restore common_cached_model_info and align mmproj filtering

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2026-03-21 22:57:46 +00:00
parent e404f6ab1c
commit 77fa9a9990
No known key found for this signature in database
6 changed files with 267 additions and 223 deletions

View File

@ -1060,7 +1060,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
auto models = common_list_cached_models();
printf("number of models in cache: %zu\n", models.size());
for (size_t i = 0; i < models.size(); i++) {
printf("%4zu. %s\n", i + 1, models[i].c_str());
printf("%4zu. %s\n", i + 1, models[i].to_string().c_str());
}
exit(0);
}

View File

@ -15,7 +15,7 @@
#include <map>
#include <mutex>
#include <regex>
#include <set>
#include <unordered_set>
#include <string>
#include <thread>
#include <vector>
@ -442,19 +442,50 @@ int common_download_file_single(const std::string & url,
}
struct gguf_split_info {
std::string prefix;
int index = 0;
int count = 0;
std::string prefix; // tag included
std::string tag;
int index;
int count;
};
static gguf_split_info get_gguf_split_info(const std::string & path) {
static const std::regex re(R"(^(.+)-([0-9]+)-of-([0-9]+)\.gguf$)", std::regex::icase);
static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase);
static const std::regex re_tag("[-.]([A-Z0-9_]+)$", std::regex::icase);
std::smatch m;
if (std::regex_match(path, m, re)) {
return {m[1].str(), std::stoi(m[2].str()), std::stoi(m[3].str())};
std::string prefix = path;
string_remove_suffix(prefix, ".gguf");
int index = 1;
int count = 1;
if (std::regex_match(prefix, m, re_split)) {
prefix = m[1].str();
index = std::stoi(m[2].str());
count = std::stoi(m[3].str());
}
return {};
std::string tag;
if (std::regex_search(prefix, m, re_tag)) {
tag = m[1].str();
for (char & c : tag) {
c = std::toupper((unsigned char)c);
}
}
return {std::move(prefix), std::move(tag), index, count};
}
// Q4_0 -> 4, F16 -> 16, NVFP4 -> 4, Q8_K_M -> 8, etc
static int extract_quant_bits(const std::string & filename) {
auto split = get_gguf_split_info(filename);
auto pos = split.tag.find_first_of("0123456789");
if (pos == std::string::npos) {
return 0;
}
return std::stoi(split.tag.substr(pos));
}
static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files,
@ -475,23 +506,75 @@ static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files,
return result;
}
static hf_cache::hf_files filter_gguf_by_quant(const hf_cache::hf_files & files,
const std::string & quant_tag) {
hf_cache::hf_files result;
std::regex pattern(quant_tag + "[.-]", std::regex::icase);
static hf_cache::hf_file find_best_mmproj(const hf_cache::hf_files & files,
const std::string & model) {
hf_cache::hf_file best;
size_t best_depth = 0;
int best_diff = 0;
bool found = false;
auto model_bits = extract_quant_bits(model);
auto model_parts = string_split<std::string>(model, '/');
auto model_dir = model_parts.end() - 1;
for (const auto & f : files) {
if (!string_ends_with(f.path, ".gguf")) {
if (!string_ends_with(f.path, ".gguf") ||
f.path.find("mmproj") == std::string::npos) {
continue;
}
if (f.path.find("mmproj") != std::string::npos) {
auto mmproj_parts = string_split<std::string>(f.path, '/');
auto mmproj_dir = mmproj_parts.end() - 1;
auto [_, dir] = std::mismatch(model_parts.begin(), model_dir,
mmproj_parts.begin(), mmproj_dir);
if (dir != mmproj_dir) {
continue;
}
if (std::regex_search(f.path, pattern)) {
result.push_back(f);
size_t depth = dir - mmproj_parts.begin();
auto bits = extract_quant_bits(f.path);
auto diff = std::abs(bits - model_bits);
if (!found || depth > best_depth || (depth == best_depth && diff < best_diff)) {
best = f;
best_depth = depth;
best_diff = diff;
found = true;
}
}
return result;
return best;
}
static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
const std::string & tag) {
std::vector<std::string> tags;
if (!tag.empty()) {
tags.push_back(tag);
} else {
tags = {"Q4_K_M", "Q4_0"};
}
for (const auto & t : tags) {
std::regex pattern(t + "[.-]", std::regex::icase);
for (const auto & f : files) {
if (string_ends_with(f.path, ".gguf") &&
f.path.find("mmproj") == std::string::npos &&
std::regex_search(f.path, pattern)) {
return f;
}
}
}
for (const auto & f : files) {
if (string_ends_with(f.path, ".gguf") &&
f.path.find("mmproj") == std::string::npos) {
return f;
}
}
return {};
}
static void list_available_gguf_files(const hf_cache::hf_files & files) {
@ -504,11 +587,8 @@ static void list_available_gguf_files(const hf_cache::hf_files & files) {
}
struct hf_plan {
hf_cache::hf_file primary;
hf_cache::hf_files model_files;
hf_cache::hf_file mmproj;
bool has_primary = false;
bool has_mmproj = false;
hf_cache::hf_files files;
};
static hf_plan get_hf_plan(const common_params_model & model,
@ -523,94 +603,64 @@ static hf_plan get_hf_plan(const common_params_model & model,
return plan;
}
hf_cache::hf_files candidates;
hf_cache::hf_file primary;
if (!model.hf_file.empty()) {
const hf_cache::hf_file * found_file = nullptr;
for (const auto & f : all) {
if (f.path == model.hf_file) {
found_file = &f;
primary = f;
break;
}
}
if (!found_file) {
LOG_ERR("%s: --hf-file '%s' not found in repository\n", __func__, model.hf_file.c_str());
if (primary.path.empty()) {
LOG_ERR("%s: file '%s' not found in repository\n", __func__, model.hf_file.c_str());
list_available_gguf_files(all);
return plan;
}
plan.primary = *found_file;
plan.has_primary = true;
candidates = get_split_files(all, *found_file);
} else {
std::vector<std::string> search_priority = {!tag.empty() ? tag : "Q4_K_M", "Q4_0"};
for (const auto & q : search_priority) {
candidates = filter_gguf_by_quant(all, q);
if (!candidates.empty()) {
candidates = get_split_files(all, candidates[0]);
break;
}
}
if (candidates.empty()) {
for (const auto & f : all) {
if (string_ends_with(f.path, ".gguf") &&
f.path.find("mmproj") == std::string::npos) {
candidates = get_split_files(all, f);
break;
}
}
}
if (candidates.empty()) {
primary = find_best_model(all, tag);
if (primary.path.empty()) {
LOG_ERR("%s: no GGUF files found in repository %s\n", __func__, repo.c_str());
list_available_gguf_files(all);
return plan;
}
plan.primary = candidates[0];
plan.has_primary = true;
}
for (const auto & f : candidates) {
plan.files.push_back(f);
}
plan.model_files = get_split_files(all, primary);
if (opts.download_mmproj) {
for (const auto & f : all) {
if (string_ends_with(f.path, ".gguf") &&
f.path.find("mmproj") != std::string::npos) {
plan.mmproj = f;
plan.has_mmproj = true;
plan.files.push_back(f);
break;
}
}
plan.mmproj = find_best_mmproj(all, primary.path);
}
return plan;
}
static std::vector<std::pair<std::string, std::string>> get_url_tasks(const common_params_model & model) {
auto [prefix_url, idx, count] = get_gguf_split_info(model.url);
struct download_task {
std::string url;
std::string path;
};
if (count <= 1) {
static std::vector<download_task> get_url_tasks(const common_params_model & model) {
auto split = get_gguf_split_info(model.url);
if (split.count <= 1) {
return {{model.url, model.path}};
}
std::vector<std::pair<std::string, std::string>> files;
size_t pos = prefix_url.rfind('/');
std::string prefix_filename = (pos != std::string::npos) ? prefix_url.substr(pos + 1) : prefix_url;
std::string prefix_path = (std::filesystem::path(model.path).parent_path() / prefix_filename).string();
for (int i = 1; i <= count; i++) {
std::string suffix = string_format("-%05d-of-%05d.gguf", i, count);
files.emplace_back(prefix_url + suffix, prefix_path + suffix);
auto filename = split.prefix;
if (auto pos = split.prefix.rfind('/'); pos != std::string::npos) {
filename = split.prefix.substr(pos + 1);
}
return files;
auto parent_path = std::filesystem::path(model.path).parent_path();
auto prefix_path = (parent_path / filename).string();
std::vector<download_task> tasks;
for (int i = 1; i <= split.count; i++) {
auto suffix = string_format("-%05d-of-%05d.gguf", i, split.count);
tasks.push_back({split.prefix + suffix, prefix_path + suffix});
}
return tasks;
}
common_download_model_result common_download_model(const common_params_model & model,
@ -618,32 +668,35 @@ common_download_model_result common_download_model(const common_params_model
const common_download_model_opts & opts,
const common_header_list & headers) {
common_download_model_result result;
std::vector<std::pair<std::string, std::string>> to_download;
std::vector<download_task> tasks;
hf_plan hf;
bool is_hf = !model.hf_repo.empty();
if (is_hf) {
hf = get_hf_plan(model, bearer_token, opts);
for (const auto & f : hf.files) {
to_download.emplace_back(f.url, f.local_path);
for (const auto & f : hf.model_files) {
tasks.push_back({f.url, f.local_path});
}
if (!hf.mmproj.path.empty()) {
tasks.push_back({hf.mmproj.url, hf.mmproj.local_path});
}
} else if (!model.url.empty()) {
to_download = get_url_tasks(model);
tasks = get_url_tasks(model);
} else {
result.model_path = model.path;
return result;
}
if (to_download.empty()) {
if (tasks.empty()) {
return result;
}
std::vector<std::future<bool>> futures;
for (const auto & item : to_download) {
for (const auto & task : tasks) {
futures.push_back(std::async(std::launch::async,
[u = item.first, p = item.second, &bearer_token, offline = opts.offline, &headers, is_hf]() {
int status = common_download_file_single(u, p, bearer_token, offline, headers, is_hf);
[&task, &bearer_token, offline = opts.offline, &headers, is_hf]() {
int status = common_download_file_single(task.url, task.path, bearer_token, offline, headers, is_hf);
return is_http_status_ok(status);
}
));
@ -656,13 +709,12 @@ common_download_model_result common_download_model(const common_params_model
}
if (is_hf) {
for (const auto & f : hf.files) {
for (const auto & f : hf.model_files) {
hf_cache::finalize_file(f);
}
if (hf.has_primary) {
result.model_path = hf_cache::finalize_file(hf.primary);
}
if (hf.has_mmproj) {
result.model_path = hf.model_files[0].local_path;
if (!hf.mmproj.path.empty()) {
result.mmproj_path = hf_cache::finalize_file(hf.mmproj);
}
} else {
@ -793,48 +845,22 @@ std::string common_docker_resolve_model(const std::string & docker) {
}
}
std::vector<std::string> common_list_cached_models() {
auto files = hf_cache::get_cached_files("");
std::set<std::string> models;
std::vector<common_cached_model_info> common_list_cached_models() {
std::unordered_set<std::string> seen;
std::vector<common_cached_model_info> result;
auto files = hf_cache::get_cached_files();
for (const auto & f : files) {
std::string tmp = f.path;
if (!string_remove_suffix(tmp, ".gguf")) {
auto split = get_gguf_split_info(f.path);
if (split.index != 1 || split.tag.empty() ||
split.prefix.find("mmproj") != std::string::npos) {
continue;
}
if (tmp.find("mmproj") != std::string::npos) {
continue;
}
auto split_pos = tmp.find("-00001-of-");
if (split_pos == std::string::npos &&
tmp.find("-of-") != std::string::npos) {
continue;
}
if (split_pos != std::string::npos) {
tmp.erase(split_pos);
}
auto sep_pos = tmp.find_last_of("-.");
if (sep_pos == std::string::npos || sep_pos == tmp.size() - 1) {
continue;
}
tmp.erase(0, sep_pos + 1);
bool is_valid = true;
for (char & c : tmp) {
unsigned char uc = c;
if (!std::isalnum(uc) && uc != '_') {
is_valid = false;
break;
}
c = std::toupper(uc);
}
if (is_valid) {
models.insert(f.repo_id + ":" + tmp);
if (seen.insert(f.repo_id + ":" + split.tag).second) {
result.push_back({f.repo_id, split.tag});
}
}
return {models.begin(), models.end()};
return result;
}

View File

@ -17,12 +17,20 @@ struct common_remote_params {
// get remote file content, returns <http_code, raw_response_body>
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
// split HF repo with tag into <repo, tag>
// for example: "user/model:tag" -> <"user/model", "tag">
// if tag is not present, default to "latest"
// example: "user/model" -> <"user/model", "latest">
// split HF repo with tag into <repo, tag>, for example:
// - "ggml-org/models:F16" -> <"ggml-org/models", "F16">
// tag is optional and can be empty
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
// Result of common_list_cached_models
struct common_cached_model_info {
std::string repo;
std::string tag;
std::string to_string() const {
return repo + ":" + tag;
}
};
// Options for common_download_model
struct common_download_model_opts {
bool download_mmproj = false;
@ -31,17 +39,34 @@ struct common_download_model_opts {
// Result of common_download_model
struct common_download_model_result {
std::string model_path; // path to downloaded model (empty on failure)
std::string mmproj_path; // path to downloaded mmproj (empty if not downloaded)
std::string model_path;
std::string mmproj_path;
};
/**
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
* Tag is optional, it checks for Q4_K_M first, then Q4_0, then if not found, return the first GGUF file in repo
*/
// Download model from HuggingFace repo or URL
//
// input (via model struct):
// - model.hf_repo: HF repo with optional tag, see common_download_split_repo_tag
// - model.hf_file: specific file in the repo (requires hf_repo)
// - model.url: simple download (used if hf_repo is empty)
// - model.path: local file path
//
// tag matching (for HF repos without model.hf_file):
// - if tag is specified, searches for GGUF matching that quantization
// - if no tag, searches for Q4_K_M, then Q4_0, then first available GGUF
//
// split GGUF: multi-part files like "model-00001-of-00003.gguf" are automatically
// detected and all parts are downloaded
//
// caching:
// - HF repos: uses HuggingFace cache
// - URLs: uses ETag-based caching
//
// when opts.offline=true, no network requests are made
// when download_mmproj=true, searches for mmproj in same directory as model or any parent directory
// then with the closest quantization bits
//
// returns result with model_path and mmproj_path (empty on failure)
common_download_model_result common_download_model(
const common_params_model & model,
const std::string & bearer_token,
@ -50,7 +75,7 @@ common_download_model_result common_download_model(
);
// returns list of cached models
std::vector<std::string> common_list_cached_models();
std::vector<common_cached_model_info> common_list_cached_models();
// download single file from url to local path
// returns status code or -1 on error

View File

@ -9,7 +9,7 @@
#include <filesystem>
#include <fstream>
#include <mutex>
#include <atomic>
#include <regex> // migration only
#include <string>
#include <string_view>
@ -22,7 +22,10 @@ namespace nl = nlohmann;
#ifndef NOMINMAX
#define NOMINMAX
#endif
#define HOME_DIR "USERPROFILE"
#include <windows.h>
#else
#define HOME_DIR "HOME"
#endif
namespace hf_cache {
@ -30,60 +33,27 @@ namespace hf_cache {
namespace fs = std::filesystem;
static fs::path get_cache_directory() {
const char * hf_hub_cache = std::getenv("HF_HUB_CACHE");
if (hf_hub_cache && *hf_hub_cache) {
return fs::path(hf_hub_cache); // assume shell-expanded; add expand logic if you want full parity
}
const char * huggingface_hub_cache = std::getenv("HUGGINGFACE_HUB_CACHE");
if (huggingface_hub_cache && *huggingface_hub_cache) {
return fs::path(huggingface_hub_cache);
}
const char * hf_home = std::getenv("HF_HOME");
if (hf_home && *hf_home) {
return fs::path(hf_home) / "hub";
}
const char * xdg_cache_home = std::getenv("XDG_CACHE_HOME");
if (xdg_cache_home && *xdg_cache_home) {
return fs::path(xdg_cache_home) / "huggingface" / "hub";
}
#if defined(_WIN32)
const char * userprofile = std::getenv("USERPROFILE");
if (userprofile && *userprofile) {
return fs::path(userprofile) / ".cache" / "huggingface" / "hub";
}
#else
const char * home = std::getenv("HOME");
if (home && *home) {
return fs::path(home) / ".cache" / "huggingface" / "hub";
}
#endif
throw std::runtime_error("Failed to determine HF cache directory");
}
static bool symlinks_supported() {
#ifdef _WIN32
static bool supported = false;
static std::once_flag once;
std::call_once(once, []() {
fs::path link = get_cache_directory() / ("link_" + std::to_string(GetCurrentProcessId()));
std::error_code ec;
fs::create_directory_symlink("..", link, ec);
supported = !ec;
if (!ec) {
fs::remove(link, ec);
} else if (GetLastError() == ERROR_PRIVILEGE_NOT_HELD) {
LOG_WRN("symlink creation requires Developer Mode or admin privileges on Windows\n");
static const fs::path cache = []() {
struct {
const char * var;
fs::path path;
} entries[] = {
{"HF_HUB_CACHE", fs::path()},
{"HUGGINGFACE_HUB_CACHE", fs::path()},
{"HF_HOME", fs::path("hub")},
{"XDG_CACHE_HOME", fs::path("huggingface") / "hub"},
{HOME_DIR, fs::path(".cache") / "huggingface" / "hub"}
};
for (const auto & entry : entries) {
if (auto * p = std::getenv(entry.var); p && *p) {
fs::path base(p);
return entry.path.empty() ? base : base / entry.path;
}
}
});
return supported;
#else
return true;
#endif
throw std::runtime_error("Failed to determine HF cache directory");
}();
return cache;
}
static std::string folder_name_to_repo(const std::string & folder) {
@ -255,13 +225,13 @@ hf_files get_repo_files(const std::string & repo_id,
fs::path path = file.path;
fs::path repo_path = get_repo_path(repo_id);
fs::path snapshots_path = repo_path / "snapshots" / rev / path;
fs::path blobs_path = repo_path / "blobs" / file.oid;
if (symlinks_supported()) {
file.local_path = blobs_path.string();
file.link_path = snapshots_path.string();
} else { // degraded mode
file.local_path = snapshots_path.string();
file.final_path = snapshots_path.string();
file.local_path = file.final_path;
if (!file.oid.empty() && !fs::exists(snapshots_path)) {
fs::path blob_path = repo_path / "blobs" / file.oid;
file.local_path = blob_path.string();
}
files.push_back(file);
@ -332,6 +302,7 @@ hf_files get_cached_files(const std::string & repo_id) {
file.repo_id = _repo_id;
file.path = path.generic_string();
file.local_path = entry.path().string();
file.final_path = file.local_path;
files.push_back(std::move(file));
}
}
@ -341,24 +312,46 @@ hf_files get_cached_files(const std::string & repo_id) {
}
std::string finalize_file(const hf_file & file) {
if (file.link_path.empty()) {
return file.local_path;
}
fs::path link_path(file.link_path);
fs::path local_path(file.local_path);
static std::atomic<bool> symlinks_disabled{false};
std::error_code ec;
fs::create_directories(link_path.parent_path(), ec);
fs::path target_path = fs::relative(local_path, link_path.parent_path(), ec);
fs::create_symlink(target_path, link_path, ec);
fs::path blob_path(file.local_path);
fs::path snapshot_path(file.final_path);
if (fs::exists(link_path)) {
return file.link_path;
if (blob_path == snapshot_path || fs::exists(snapshot_path, ec)) {
return file.final_path;
}
LOG_WRN("%s: failed to create symlink: %s\n", __func__, file.link_path.c_str());
return file.local_path;
if (!fs::exists(blob_path, ec)) {
return file.final_path;
}
fs::create_directories(snapshot_path.parent_path(), ec);
if (!symlinks_disabled) {
fs::path target = fs::relative(blob_path, snapshot_path.parent_path(), ec);
if (!ec) {
fs::create_symlink(target, snapshot_path, ec);
}
if (!ec) {
return file.final_path;
}
}
if (!symlinks_disabled.exchange(true)) {
LOG_WRN("%s: failed to create symlink: %s\n", __func__, ec.message().c_str());
LOG_WRN("%s: switching to degraded mode\n", __func__);
}
fs::rename(blob_path, snapshot_path, ec);
if (ec) {
LOG_WRN("%s: failed to move file to snapshots: %s\n", __func__, ec.message().c_str());
fs::copy(blob_path, snapshot_path, ec);
if (ec) {
LOG_ERR("%s: failed to copy file to snapshots: %s\n", __func__, ec.message().c_str());
}
}
return file.final_path;
}
// delete everything after this line, one day

View File

@ -11,7 +11,7 @@ struct hf_file {
std::string path;
std::string url;
std::string local_path;
std::string link_path;
std::string final_path;
std::string oid;
std::string repo_id;
};
@ -24,9 +24,9 @@ hf_files get_repo_files(
const std::string & bearer_token
);
hf_files get_cached_files(const std::string & repo_id);
hf_files get_cached_files(const std::string & repo_id = {});
// Create symlink if link_path is set and returns the snapshot path
// Create snapshot path (link or move/copy) and return it
std::string finalize_file(const hf_file & file);
// TODO: Remove later

View File

@ -365,8 +365,8 @@ common_presets common_preset_context::load_from_cache() const {
auto cached_models = common_list_cached_models();
for (const auto & model : cached_models) {
common_preset preset;
preset.name = model;
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model);
preset.name = model.to_string();
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
out[preset.name] = preset;
}