diff --git a/common/arg.cpp b/common/arg.cpp index 0fc5fae498..5b53cd6b61 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1060,7 +1060,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex auto models = common_list_cached_models(); printf("number of models in cache: %zu\n", models.size()); for (size_t i = 0; i < models.size(); i++) { - printf("%4zu. %s\n", i + 1, models[i].c_str()); + printf("%4zu. %s\n", i + 1, models[i].to_string().c_str()); } exit(0); } diff --git a/common/download.cpp b/common/download.cpp index e52794567b..cf28d624fc 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -442,19 +442,50 @@ int common_download_file_single(const std::string & url, } struct gguf_split_info { - std::string prefix; - int index = 0; - int count = 0; + std::string prefix; // tag included + std::string tag; + int index; + int count; }; static gguf_split_info get_gguf_split_info(const std::string & path) { - static const std::regex re(R"(^(.+)-([0-9]+)-of-([0-9]+)\.gguf$)", std::regex::icase); + static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase); + static const std::regex re_tag("[-.]([A-Z0-9_]+)$", std::regex::icase); std::smatch m; - if (std::regex_match(path, m, re)) { - return {m[1].str(), std::stoi(m[2].str()), std::stoi(m[3].str())}; + std::string prefix = path; + string_remove_suffix(prefix, ".gguf"); + + int index = 1; + int count = 1; + + if (std::regex_match(prefix, m, re_split)) { + prefix = m[1].str(); + index = std::stoi(m[2].str()); + count = std::stoi(m[3].str()); } - return {}; + + std::string tag; + if (std::regex_search(prefix, m, re_tag)) { + tag = m[1].str(); + for (char & c : tag) { + c = std::toupper((unsigned char)c); + } + } + + return {std::move(prefix), std::move(tag), index, count}; +} + +// Q4_0 -> 4, F16 -> 16, NVFP4 -> 4, Q8_K_M -> 8, etc +static int extract_quant_bits(const std::string & filename) { + auto split = get_gguf_split_info(filename); + + auto pos = split.tag.find_first_of("0123456789"); + if (pos == std::string::npos) { + return 0; + } + + return std::stoi(split.tag.substr(pos)); } static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files, @@ -475,23 +506,75 @@ static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files, return result; } -static hf_cache::hf_files filter_gguf_by_quant(const hf_cache::hf_files & files, - const std::string & quant_tag) { - hf_cache::hf_files result; - std::regex pattern(quant_tag + "[.-]", std::regex::icase); +static hf_cache::hf_file find_best_mmproj(const hf_cache::hf_files & files, + const std::string & model) { + hf_cache::hf_file best; + size_t best_depth = 0; + int best_diff = 0; + bool found = false; + + auto model_bits = extract_quant_bits(model); + auto model_parts = string_split(model, '/'); + auto model_dir = model_parts.end() - 1; for (const auto & f : files) { - if (!string_ends_with(f.path, ".gguf")) { + if (!string_ends_with(f.path, ".gguf") || + f.path.find("mmproj") == std::string::npos) { continue; } - if (f.path.find("mmproj") != std::string::npos) { + + auto mmproj_parts = string_split(f.path, '/'); + auto mmproj_dir = mmproj_parts.end() - 1; + + auto [_, dir] = std::mismatch(model_parts.begin(), model_dir, + mmproj_parts.begin(), mmproj_dir); + if (dir != mmproj_dir) { continue; } - if (std::regex_search(f.path, pattern)) { - result.push_back(f); + + size_t depth = dir - mmproj_parts.begin(); + auto bits = extract_quant_bits(f.path); + auto diff = std::abs(bits - model_bits); + + if (!found || depth > best_depth || (depth == best_depth && diff < best_diff)) { + best = f; + best_depth = depth; + best_diff = diff; + found = true; } } - return result; + return best; +} + +static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files, + const std::string & tag) { + std::vector tags; + + if (!tag.empty()) { + tags.push_back(tag); + } else { + tags = {"Q4_K_M", "Q4_0"}; + } + + for (const auto & t : tags) { + std::regex pattern(t + "[.-]", std::regex::icase); + for (const auto & f : files) { + if (string_ends_with(f.path, ".gguf") && + f.path.find("mmproj") == std::string::npos && + std::regex_search(f.path, pattern)) { + return f; + } + } + } + + for (const auto & f : files) { + if (string_ends_with(f.path, ".gguf") && + f.path.find("mmproj") == std::string::npos) { + return f; + } + } + + return {}; } static void list_available_gguf_files(const hf_cache::hf_files & files) { @@ -504,11 +587,8 @@ static void list_available_gguf_files(const hf_cache::hf_files & files) { } struct hf_plan { - hf_cache::hf_file primary; + hf_cache::hf_files model_files; hf_cache::hf_file mmproj; - bool has_primary = false; - bool has_mmproj = false; - hf_cache::hf_files files; }; static hf_plan get_hf_plan(const common_params_model & model, @@ -523,94 +603,64 @@ static hf_plan get_hf_plan(const common_params_model & model, return plan; } - hf_cache::hf_files candidates; + hf_cache::hf_file primary; if (!model.hf_file.empty()) { - const hf_cache::hf_file * found_file = nullptr; for (const auto & f : all) { if (f.path == model.hf_file) { - found_file = &f; + primary = f; break; } } - - if (!found_file) { - LOG_ERR("%s: --hf-file '%s' not found in repository\n", __func__, model.hf_file.c_str()); + if (primary.path.empty()) { + LOG_ERR("%s: file '%s' not found in repository\n", __func__, model.hf_file.c_str()); list_available_gguf_files(all); return plan; } - - plan.primary = *found_file; - plan.has_primary = true; - candidates = get_split_files(all, *found_file); } else { - std::vector search_priority = {!tag.empty() ? tag : "Q4_K_M", "Q4_0"}; - - for (const auto & q : search_priority) { - candidates = filter_gguf_by_quant(all, q); - if (!candidates.empty()) { - candidates = get_split_files(all, candidates[0]); - break; - } - } - - if (candidates.empty()) { - for (const auto & f : all) { - if (string_ends_with(f.path, ".gguf") && - f.path.find("mmproj") == std::string::npos) { - candidates = get_split_files(all, f); - break; - } - } - } - - if (candidates.empty()) { + primary = find_best_model(all, tag); + if (primary.path.empty()) { LOG_ERR("%s: no GGUF files found in repository %s\n", __func__, repo.c_str()); list_available_gguf_files(all); return plan; } - - plan.primary = candidates[0]; - plan.has_primary = true; } - for (const auto & f : candidates) { - plan.files.push_back(f); - } + plan.model_files = get_split_files(all, primary); if (opts.download_mmproj) { - for (const auto & f : all) { - if (string_ends_with(f.path, ".gguf") && - f.path.find("mmproj") != std::string::npos) { - plan.mmproj = f; - plan.has_mmproj = true; - plan.files.push_back(f); - break; - } - } + plan.mmproj = find_best_mmproj(all, primary.path); } return plan; } -static std::vector> get_url_tasks(const common_params_model & model) { - auto [prefix_url, idx, count] = get_gguf_split_info(model.url); +struct download_task { + std::string url; + std::string path; +}; - if (count <= 1) { +static std::vector get_url_tasks(const common_params_model & model) { + auto split = get_gguf_split_info(model.url); + + if (split.count <= 1) { return {{model.url, model.path}}; } - std::vector> files; - - size_t pos = prefix_url.rfind('/'); - std::string prefix_filename = (pos != std::string::npos) ? prefix_url.substr(pos + 1) : prefix_url; - std::string prefix_path = (std::filesystem::path(model.path).parent_path() / prefix_filename).string(); - - for (int i = 1; i <= count; i++) { - std::string suffix = string_format("-%05d-of-%05d.gguf", i, count); - files.emplace_back(prefix_url + suffix, prefix_path + suffix); + auto filename = split.prefix; + if (auto pos = split.prefix.rfind('/'); pos != std::string::npos) { + filename = split.prefix.substr(pos + 1); } - return files; + + auto parent_path = std::filesystem::path(model.path).parent_path(); + auto prefix_path = (parent_path / filename).string(); + + std::vector tasks; + for (int i = 1; i <= split.count; i++) { + auto suffix = string_format("-%05d-of-%05d.gguf", i, split.count); + tasks.push_back({split.prefix + suffix, prefix_path + suffix}); + } + return tasks; } common_download_model_result common_download_model(const common_params_model & model, @@ -618,32 +668,35 @@ common_download_model_result common_download_model(const common_params_model const common_download_model_opts & opts, const common_header_list & headers) { common_download_model_result result; - std::vector> to_download; + std::vector tasks; hf_plan hf; bool is_hf = !model.hf_repo.empty(); if (is_hf) { hf = get_hf_plan(model, bearer_token, opts); - for (const auto & f : hf.files) { - to_download.emplace_back(f.url, f.local_path); + for (const auto & f : hf.model_files) { + tasks.push_back({f.url, f.local_path}); + } + if (!hf.mmproj.path.empty()) { + tasks.push_back({hf.mmproj.url, hf.mmproj.local_path}); } } else if (!model.url.empty()) { - to_download = get_url_tasks(model); + tasks = get_url_tasks(model); } else { result.model_path = model.path; return result; } - if (to_download.empty()) { + if (tasks.empty()) { return result; } std::vector> futures; - for (const auto & item : to_download) { + for (const auto & task : tasks) { futures.push_back(std::async(std::launch::async, - [u = item.first, p = item.second, &bearer_token, offline = opts.offline, &headers, is_hf]() { - int status = common_download_file_single(u, p, bearer_token, offline, headers, is_hf); + [&task, &bearer_token, offline = opts.offline, &headers, is_hf]() { + int status = common_download_file_single(task.url, task.path, bearer_token, offline, headers, is_hf); return is_http_status_ok(status); } )); @@ -656,13 +709,12 @@ common_download_model_result common_download_model(const common_params_model } if (is_hf) { - for (const auto & f : hf.files) { + for (const auto & f : hf.model_files) { hf_cache::finalize_file(f); } - if (hf.has_primary) { - result.model_path = hf_cache::finalize_file(hf.primary); - } - if (hf.has_mmproj) { + result.model_path = hf.model_files[0].local_path; + + if (!hf.mmproj.path.empty()) { result.mmproj_path = hf_cache::finalize_file(hf.mmproj); } } else { @@ -793,48 +845,22 @@ std::string common_docker_resolve_model(const std::string & docker) { } } -std::vector common_list_cached_models() { - auto files = hf_cache::get_cached_files(""); - std::set models; +std::vector common_list_cached_models() { + std::unordered_set seen; + std::vector result; + + auto files = hf_cache::get_cached_files(); for (const auto & f : files) { - std::string tmp = f.path; - - if (!string_remove_suffix(tmp, ".gguf")) { + auto split = get_gguf_split_info(f.path); + if (split.index != 1 || split.tag.empty() || + split.prefix.find("mmproj") != std::string::npos) { continue; } - if (tmp.find("mmproj") != std::string::npos) { - continue; - } - auto split_pos = tmp.find("-00001-of-"); - - if (split_pos == std::string::npos && - tmp.find("-of-") != std::string::npos) { - continue; - } - if (split_pos != std::string::npos) { - tmp.erase(split_pos); - } - auto sep_pos = tmp.find_last_of("-."); - - if (sep_pos == std::string::npos || sep_pos == tmp.size() - 1) { - continue; - } - tmp.erase(0, sep_pos + 1); - - bool is_valid = true; - for (char & c : tmp) { - unsigned char uc = c; - if (!std::isalnum(uc) && uc != '_') { - is_valid = false; - break; - } - c = std::toupper(uc); - } - if (is_valid) { - models.insert(f.repo_id + ":" + tmp); + if (seen.insert(f.repo_id + ":" + split.tag).second) { + result.push_back({f.repo_id, split.tag}); } } - return {models.begin(), models.end()}; + return result; } diff --git a/common/download.h b/common/download.h index 03116d2ced..0a933521fa 100644 --- a/common/download.h +++ b/common/download.h @@ -17,12 +17,20 @@ struct common_remote_params { // get remote file content, returns std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params); -// split HF repo with tag into -// for example: "user/model:tag" -> <"user/model", "tag"> -// if tag is not present, default to "latest" -// example: "user/model" -> <"user/model", "latest"> +// split HF repo with tag into , for example: +// - "ggml-org/models:F16" -> <"ggml-org/models", "F16"> +// tag is optional and can be empty std::pair common_download_split_repo_tag(const std::string & hf_repo_with_tag); +// Result of common_list_cached_models +struct common_cached_model_info { + std::string repo; + std::string tag; + std::string to_string() const { + return repo + ":" + tag; + } +}; + // Options for common_download_model struct common_download_model_opts { bool download_mmproj = false; @@ -31,17 +39,34 @@ struct common_download_model_opts { // Result of common_download_model struct common_download_model_result { - std::string model_path; // path to downloaded model (empty on failure) - std::string mmproj_path; // path to downloaded mmproj (empty if not downloaded) + std::string model_path; + std::string mmproj_path; }; -/** - * Allow getting the HF file from the HF repo with tag (like ollama), for example: - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 - * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s - * Tag is optional, it checks for Q4_K_M first, then Q4_0, then if not found, return the first GGUF file in repo - */ +// Download model from HuggingFace repo or URL +// +// input (via model struct): +// - model.hf_repo: HF repo with optional tag, see common_download_split_repo_tag +// - model.hf_file: specific file in the repo (requires hf_repo) +// - model.url: simple download (used if hf_repo is empty) +// - model.path: local file path +// +// tag matching (for HF repos without model.hf_file): +// - if tag is specified, searches for GGUF matching that quantization +// - if no tag, searches for Q4_K_M, then Q4_0, then first available GGUF +// +// split GGUF: multi-part files like "model-00001-of-00003.gguf" are automatically +// detected and all parts are downloaded +// +// caching: +// - HF repos: uses HuggingFace cache +// - URLs: uses ETag-based caching +// +// when opts.offline=true, no network requests are made +// when download_mmproj=true, searches for mmproj in same directory as model or any parent directory +// then with the closest quantization bits +// +// returns result with model_path and mmproj_path (empty on failure) common_download_model_result common_download_model( const common_params_model & model, const std::string & bearer_token, @@ -50,7 +75,7 @@ common_download_model_result common_download_model( ); // returns list of cached models -std::vector common_list_cached_models(); +std::vector common_list_cached_models(); // download single file from url to local path // returns status code or -1 on error diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp index 8aab2d117c..51425d0c4d 100644 --- a/common/hf-cache.cpp +++ b/common/hf-cache.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include // migration only #include #include @@ -22,7 +22,10 @@ namespace nl = nlohmann; #ifndef NOMINMAX #define NOMINMAX #endif +#define HOME_DIR "USERPROFILE" #include +#else +#define HOME_DIR "HOME" #endif namespace hf_cache { @@ -30,60 +33,27 @@ namespace hf_cache { namespace fs = std::filesystem; static fs::path get_cache_directory() { - const char * hf_hub_cache = std::getenv("HF_HUB_CACHE"); - if (hf_hub_cache && *hf_hub_cache) { - return fs::path(hf_hub_cache); // assume shell-expanded; add expand logic if you want full parity - } - - const char * huggingface_hub_cache = std::getenv("HUGGINGFACE_HUB_CACHE"); - if (huggingface_hub_cache && *huggingface_hub_cache) { - return fs::path(huggingface_hub_cache); - } - - const char * hf_home = std::getenv("HF_HOME"); - if (hf_home && *hf_home) { - return fs::path(hf_home) / "hub"; - } - - const char * xdg_cache_home = std::getenv("XDG_CACHE_HOME"); - if (xdg_cache_home && *xdg_cache_home) { - return fs::path(xdg_cache_home) / "huggingface" / "hub"; - } -#if defined(_WIN32) - const char * userprofile = std::getenv("USERPROFILE"); - if (userprofile && *userprofile) { - return fs::path(userprofile) / ".cache" / "huggingface" / "hub"; - } -#else - const char * home = std::getenv("HOME"); - if (home && *home) { - return fs::path(home) / ".cache" / "huggingface" / "hub"; - } -#endif - throw std::runtime_error("Failed to determine HF cache directory"); -} - -static bool symlinks_supported() { -#ifdef _WIN32 - static bool supported = false; - static std::once_flag once; - std::call_once(once, []() { - fs::path link = get_cache_directory() / ("link_" + std::to_string(GetCurrentProcessId())); - - std::error_code ec; - fs::create_directory_symlink("..", link, ec); - supported = !ec; - - if (!ec) { - fs::remove(link, ec); - } else if (GetLastError() == ERROR_PRIVILEGE_NOT_HELD) { - LOG_WRN("symlink creation requires Developer Mode or admin privileges on Windows\n"); + static const fs::path cache = []() { + struct { + const char * var; + fs::path path; + } entries[] = { + {"HF_HUB_CACHE", fs::path()}, + {"HUGGINGFACE_HUB_CACHE", fs::path()}, + {"HF_HOME", fs::path("hub")}, + {"XDG_CACHE_HOME", fs::path("huggingface") / "hub"}, + {HOME_DIR, fs::path(".cache") / "huggingface" / "hub"} + }; + for (const auto & entry : entries) { + if (auto * p = std::getenv(entry.var); p && *p) { + fs::path base(p); + return entry.path.empty() ? base : base / entry.path; + } } - }); - return supported; -#else - return true; -#endif + throw std::runtime_error("Failed to determine HF cache directory"); + }(); + + return cache; } static std::string folder_name_to_repo(const std::string & folder) { @@ -255,13 +225,13 @@ hf_files get_repo_files(const std::string & repo_id, fs::path path = file.path; fs::path repo_path = get_repo_path(repo_id); fs::path snapshots_path = repo_path / "snapshots" / rev / path; - fs::path blobs_path = repo_path / "blobs" / file.oid; - if (symlinks_supported()) { - file.local_path = blobs_path.string(); - file.link_path = snapshots_path.string(); - } else { // degraded mode - file.local_path = snapshots_path.string(); + file.final_path = snapshots_path.string(); + file.local_path = file.final_path; + + if (!file.oid.empty() && !fs::exists(snapshots_path)) { + fs::path blob_path = repo_path / "blobs" / file.oid; + file.local_path = blob_path.string(); } files.push_back(file); @@ -332,6 +302,7 @@ hf_files get_cached_files(const std::string & repo_id) { file.repo_id = _repo_id; file.path = path.generic_string(); file.local_path = entry.path().string(); + file.final_path = file.local_path; files.push_back(std::move(file)); } } @@ -341,24 +312,46 @@ hf_files get_cached_files(const std::string & repo_id) { } std::string finalize_file(const hf_file & file) { - if (file.link_path.empty()) { - return file.local_path; - } - - fs::path link_path(file.link_path); - fs::path local_path(file.local_path); + static std::atomic symlinks_disabled{false}; std::error_code ec; - fs::create_directories(link_path.parent_path(), ec); - fs::path target_path = fs::relative(local_path, link_path.parent_path(), ec); - fs::create_symlink(target_path, link_path, ec); + fs::path blob_path(file.local_path); + fs::path snapshot_path(file.final_path); - if (fs::exists(link_path)) { - return file.link_path; + if (blob_path == snapshot_path || fs::exists(snapshot_path, ec)) { + return file.final_path; } - LOG_WRN("%s: failed to create symlink: %s\n", __func__, file.link_path.c_str()); - return file.local_path; + if (!fs::exists(blob_path, ec)) { + return file.final_path; + } + + fs::create_directories(snapshot_path.parent_path(), ec); + + if (!symlinks_disabled) { + fs::path target = fs::relative(blob_path, snapshot_path.parent_path(), ec); + if (!ec) { + fs::create_symlink(target, snapshot_path, ec); + } + if (!ec) { + return file.final_path; + } + } + + if (!symlinks_disabled.exchange(true)) { + LOG_WRN("%s: failed to create symlink: %s\n", __func__, ec.message().c_str()); + LOG_WRN("%s: switching to degraded mode\n", __func__); + } + + fs::rename(blob_path, snapshot_path, ec); + if (ec) { + LOG_WRN("%s: failed to move file to snapshots: %s\n", __func__, ec.message().c_str()); + fs::copy(blob_path, snapshot_path, ec); + if (ec) { + LOG_ERR("%s: failed to copy file to snapshots: %s\n", __func__, ec.message().c_str()); + } + } + return file.final_path; } // delete everything after this line, one day diff --git a/common/hf-cache.h b/common/hf-cache.h index d5718cb752..7934ec7970 100644 --- a/common/hf-cache.h +++ b/common/hf-cache.h @@ -11,7 +11,7 @@ struct hf_file { std::string path; std::string url; std::string local_path; - std::string link_path; + std::string final_path; std::string oid; std::string repo_id; }; @@ -24,9 +24,9 @@ hf_files get_repo_files( const std::string & bearer_token ); -hf_files get_cached_files(const std::string & repo_id); +hf_files get_cached_files(const std::string & repo_id = {}); -// Create symlink if link_path is set and returns the snapshot path +// Create snapshot path (link or move/copy) and return it std::string finalize_file(const hf_file & file); // TODO: Remove later diff --git a/common/preset.cpp b/common/preset.cpp index 6bbd591c64..57ccd000b5 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -365,8 +365,8 @@ common_presets common_preset_context::load_from_cache() const { auto cached_models = common_list_cached_models(); for (const auto & model : cached_models) { common_preset preset; - preset.name = model; - preset.set_option(*this, "LLAMA_ARG_HF_REPO", model); + preset.name = model.to_string(); + preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string()); out[preset.name] = preset; }