diff --git a/common/arg.cpp b/common/arg.cpp
index 0fc5fae498..5b53cd6b61 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1060,7 +1060,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             auto models = common_list_cached_models();
             printf("number of models in cache: %zu\n", models.size());
             for (size_t i = 0; i < models.size(); i++) {
-                printf("%4zu. %s\n", i + 1, models[i].c_str());
+                printf("%4zu. %s\n", i + 1, models[i].to_string().c_str());
             }
             exit(0);
         }
diff --git a/common/download.cpp b/common/download.cpp
index e52794567b..cf28d624fc 100644
--- a/common/download.cpp
+++ b/common/download.cpp
@@ -15,7 +15,7 @@
 #include <map>
 #include <mutex>
 #include <regex>
-#include <set>
+#include <unordered_set>
 #include <string>
 #include <thread>
 #include <vector>
@@ -442,19 +442,50 @@ int common_download_file_single(const std::string & url,
 }
 
 struct gguf_split_info {
-    std::string prefix;
-    int index = 0;
-    int count = 0;
+    std::string prefix; // tag included
+    std::string tag;
+    int index;
+    int count;
 };
 
 static gguf_split_info get_gguf_split_info(const std::string & path) {
-    static const std::regex re(R"(^(.+)-([0-9]+)-of-([0-9]+)\.gguf$)", std::regex::icase);
+    static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase);
+    static const std::regex re_tag("[-.]([A-Z0-9_]+)$", std::regex::icase);
     std::smatch m;
 
-    if (std::regex_match(path, m, re)) {
-        return {m[1].str(), std::stoi(m[2].str()), std::stoi(m[3].str())};
+    std::string prefix = path;
+    string_remove_suffix(prefix, ".gguf");
+
+    int index = 1;
+    int count = 1;
+
+    if (std::regex_match(prefix, m, re_split)) {
+        prefix = m[1].str();
+        index = std::stoi(m[2].str());
+        count = std::stoi(m[3].str());
     }
-    return {};
+
+    std::string tag;
+    if (std::regex_search(prefix, m, re_tag)) {
+        tag = m[1].str();
+        for (char & c : tag) {
+            c = std::toupper((unsigned char)c);
+        }
+    }
+
+    return {std::move(prefix), std::move(tag), index, count};
+}
+
+// Q4_0 -> 4, F16 -> 16, NVFP4 -> 4, Q8_K_M -> 8, etc
+static int extract_quant_bits(const std::string & filename) {
+    auto split = get_gguf_split_info(filename);
+
+    auto pos = split.tag.find_first_of("0123456789");
+    if (pos == std::string::npos) {
+        return 0;
+    }
+
+    return std::stoi(split.tag.substr(pos));
 }
 
 static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files,
@@ -475,23 +506,75 @@ static hf_cache::hf_files get_split_files(const hf_cache::hf_files & files,
     return result;
 }
 
-static hf_cache::hf_files filter_gguf_by_quant(const hf_cache::hf_files & files,
-                                               const std::string & quant_tag) {
-    hf_cache::hf_files result;
-    std::regex pattern(quant_tag + "[.-]", std::regex::icase);
+static hf_cache::hf_file find_best_mmproj(const hf_cache::hf_files & files,
+                                          const std::string        & model) {
+    hf_cache::hf_file best;
+    size_t best_depth = 0;
+    int best_diff = 0;
+    bool found = false;
+
+    auto model_bits = extract_quant_bits(model);
+    auto model_parts = string_split<std::string>(model, '/');
+    auto model_dir = model_parts.end() - 1;
 
     for (const auto & f : files) {
-        if (!string_ends_with(f.path, ".gguf")) {
+        if (!string_ends_with(f.path, ".gguf") ||
+            f.path.find("mmproj") == std::string::npos) {
             continue;
         }
-        if (f.path.find("mmproj") != std::string::npos) {
+
+        auto mmproj_parts = string_split<std::string>(f.path, '/');
+        auto mmproj_dir = mmproj_parts.end() - 1;
+
+        auto [_, dir] = std::mismatch(model_parts.begin(), model_dir,
+                                      mmproj_parts.begin(), mmproj_dir);
+        if (dir != mmproj_dir) {
             continue;
         }
-        if (std::regex_search(f.path, pattern)) {
-            result.push_back(f);
+
+        size_t depth = dir - mmproj_parts.begin();
+        auto bits = extract_quant_bits(f.path);
+        auto diff = std::abs(bits - model_bits);
+
+        if (!found || depth > best_depth || (depth == best_depth && diff < best_diff)) {
+            best = f;
+            best_depth = depth;
+            best_diff = diff;
+            found = true;
         }
     }
-    return result;
+    return best;
+}
+
+static hf_cache::hf_file find_best_model(const hf_cache::hf_files & files,
+                                         const std::string        & tag) {
+    std::vector<std::string> tags;
+
+    if (!tag.empty()) {
+        tags.push_back(tag);
+    } else {
+        tags = {"Q4_K_M", "Q4_0"};
+    }
+
+    for (const auto & t : tags) {
+        std::regex pattern(t + "[.-]", std::regex::icase);
+        for (const auto & f : files) {
+            if (string_ends_with(f.path, ".gguf") &&
+                f.path.find("mmproj") == std::string::npos &&
+                std::regex_search(f.path, pattern)) {
+                return f;
+            }
+        }
+    }
+
+    for (const auto & f : files) {
+        if (string_ends_with(f.path, ".gguf") &&
+            f.path.find("mmproj") == std::string::npos) {
+            return f;
+        }
+    }
+
+    return {};
 }
 
 static void list_available_gguf_files(const hf_cache::hf_files & files) {
@@ -504,11 +587,8 @@ static void list_available_gguf_files(const hf_cache::hf_files & files) {
 }
 
 struct hf_plan {
-    hf_cache::hf_file primary;
+    hf_cache::hf_files model_files;
     hf_cache::hf_file mmproj;
-    bool has_primary = false;
-    bool has_mmproj = false;
-    hf_cache::hf_files files;
 };
 
 static hf_plan get_hf_plan(const common_params_model        & model,
@@ -523,94 +603,64 @@ static hf_plan get_hf_plan(const common_params_model        & model,
         return plan;
     }
 
-    hf_cache::hf_files candidates;
+    hf_cache::hf_file primary;
 
     if (!model.hf_file.empty()) {
-        const hf_cache::hf_file * found_file = nullptr;
         for (const auto & f : all) {
             if (f.path == model.hf_file) {
-                found_file = &f;
+                primary = f;
                 break;
             }
         }
-
-        if (!found_file) {
-            LOG_ERR("%s: --hf-file '%s' not found in repository\n", __func__, model.hf_file.c_str());
+        if (primary.path.empty()) {
+            LOG_ERR("%s: file '%s' not found in repository\n", __func__, model.hf_file.c_str());
             list_available_gguf_files(all);
             return plan;
         }
-
-        plan.primary = *found_file;
-        plan.has_primary = true;
-        candidates = get_split_files(all, *found_file);
     } else {
-        std::vector<std::string> search_priority = {!tag.empty() ? tag : "Q4_K_M", "Q4_0"};
-
-        for (const auto & q : search_priority) {
-            candidates = filter_gguf_by_quant(all, q);
-            if (!candidates.empty()) {
-                candidates = get_split_files(all, candidates[0]);
-                break;
-            }
-        }
-
-        if (candidates.empty()) {
-            for (const auto & f : all) {
-                if (string_ends_with(f.path, ".gguf") &&
-                    f.path.find("mmproj") == std::string::npos) {
-                    candidates = get_split_files(all, f);
-                    break;
-                }
-            }
-        }
-
-        if (candidates.empty()) {
+        primary = find_best_model(all, tag);
+        if (primary.path.empty()) {
             LOG_ERR("%s: no GGUF files found in repository %s\n", __func__, repo.c_str());
             list_available_gguf_files(all);
             return plan;
         }
-
-        plan.primary = candidates[0];
-        plan.has_primary = true;
     }
 
-    for (const auto & f : candidates) {
-        plan.files.push_back(f);
-    }
+    plan.model_files = get_split_files(all, primary);
 
     if (opts.download_mmproj) {
-        for (const auto & f : all) {
-            if (string_ends_with(f.path, ".gguf") &&
-                f.path.find("mmproj") != std::string::npos) {
-                plan.mmproj = f;
-                plan.has_mmproj = true;
-                plan.files.push_back(f);
-                break;
-            }
-        }
+        plan.mmproj = find_best_mmproj(all, primary.path);
     }
 
     return plan;
 }
 
-static std::vector<std::pair<std::string, std::string>> get_url_tasks(const common_params_model & model) {
-    auto [prefix_url, idx, count] = get_gguf_split_info(model.url);
+struct download_task {
+    std::string url;
+    std::string path;
+};
 
-    if (count <= 1) {
+static std::vector<download_task> get_url_tasks(const common_params_model & model) {
+    auto split = get_gguf_split_info(model.url);
+
+    if (split.count <= 1) {
         return {{model.url, model.path}};
     }
 
-    std::vector<std::pair<std::string, std::string>> files;
-
-    size_t pos = prefix_url.rfind('/');
-    std::string prefix_filename = (pos != std::string::npos) ? prefix_url.substr(pos + 1) : prefix_url;
-    std::string prefix_path = (std::filesystem::path(model.path).parent_path() / prefix_filename).string();
-
-    for (int i = 1; i <= count; i++) {
-        std::string suffix = string_format("-%05d-of-%05d.gguf", i, count);
-        files.emplace_back(prefix_url + suffix, prefix_path + suffix);
+    auto filename = split.prefix;
+    if (auto pos = split.prefix.rfind('/'); pos != std::string::npos) {
+        filename = split.prefix.substr(pos + 1);
     }
-    return files;
+
+    auto parent_path = std::filesystem::path(model.path).parent_path();
+    auto prefix_path = (parent_path / filename).string();
+
+    std::vector<download_task> tasks;
+    for (int i = 1; i <= split.count; i++) {
+        auto suffix = string_format("-%05d-of-%05d.gguf", i, split.count);
+        tasks.push_back({split.prefix + suffix, prefix_path + suffix});
+    }
+    return tasks;
 }
 
 common_download_model_result common_download_model(const common_params_model        & model,
@@ -618,32 +668,35 @@ common_download_model_result common_download_model(const common_params_model
                                                    const common_download_model_opts & opts,
                                                    const common_header_list         & headers) {
     common_download_model_result result;
-    std::vector<std::pair<std::string, std::string>> to_download;
+    std::vector<download_task> tasks;
     hf_plan hf;
 
     bool is_hf = !model.hf_repo.empty();
 
     if (is_hf) {
         hf = get_hf_plan(model, bearer_token, opts);
-        for (const auto & f : hf.files) {
-            to_download.emplace_back(f.url, f.local_path);
+        for (const auto & f : hf.model_files) {
+            tasks.push_back({f.url, f.local_path});
+        }
+        if (!hf.mmproj.path.empty()) {
+            tasks.push_back({hf.mmproj.url, hf.mmproj.local_path});
         }
     } else if (!model.url.empty()) {
-        to_download = get_url_tasks(model);
+        tasks = get_url_tasks(model);
     } else {
         result.model_path = model.path;
         return result;
     }
 
-    if (to_download.empty()) {
+    if (tasks.empty()) {
         return result;
     }
 
     std::vector<std::future<bool>> futures;
-    for (const auto & item : to_download) {
+    for (const auto & task : tasks) {
         futures.push_back(std::async(std::launch::async,
-            [u = item.first, p = item.second, &bearer_token, offline = opts.offline, &headers, is_hf]() {
-                int status = common_download_file_single(u, p, bearer_token, offline, headers, is_hf);
+            [&task, &bearer_token, offline = opts.offline, &headers, is_hf]() {
+                int status = common_download_file_single(task.url, task.path, bearer_token, offline, headers, is_hf);
                 return is_http_status_ok(status);
             }
         ));
@@ -656,13 +709,12 @@ common_download_model_result common_download_model(const common_params_model
     }
 
     if (is_hf) {
-        for (const auto & f : hf.files) {
+        for (const auto & f : hf.model_files) {
             hf_cache::finalize_file(f);
         }
-        if (hf.has_primary) {
-            result.model_path = hf_cache::finalize_file(hf.primary);
-        }
-        if (hf.has_mmproj) {
+        result.model_path = hf.model_files[0].local_path;
+
+        if (!hf.mmproj.path.empty()) {
             result.mmproj_path = hf_cache::finalize_file(hf.mmproj);
         }
     } else {
@@ -793,48 +845,22 @@ std::string common_docker_resolve_model(const std::string & docker) {
     }
 }
 
-std::vector<std::string> common_list_cached_models() {
-    auto files = hf_cache::get_cached_files("");
-    std::set<std::string> models;
+std::vector<common_cached_model_info> common_list_cached_models() {
+    std::unordered_set<std::string> seen;
+    std::vector<common_cached_model_info> result;
+
+    auto files = hf_cache::get_cached_files();
 
     for (const auto & f : files) {
-        std::string tmp = f.path;
-
-        if (!string_remove_suffix(tmp, ".gguf")) {
+        auto split = get_gguf_split_info(f.path);
+        if (split.index != 1 || split.tag.empty() ||
+            split.prefix.find("mmproj") != std::string::npos) {
             continue;
         }
-        if (tmp.find("mmproj") != std::string::npos) {
-            continue;
-        }
-        auto split_pos = tmp.find("-00001-of-");
-
-        if (split_pos == std::string::npos &&
-            tmp.find("-of-") != std::string::npos) {
-            continue;
-        }
-        if (split_pos != std::string::npos) {
-            tmp.erase(split_pos);
-        }
-        auto sep_pos = tmp.find_last_of("-.");
-
-        if (sep_pos == std::string::npos || sep_pos == tmp.size() - 1) {
-            continue;
-        }
-        tmp.erase(0, sep_pos + 1);
-
-        bool is_valid = true;
-        for (char & c : tmp) {
-            unsigned char uc = c;
-            if (!std::isalnum(uc) && uc != '_') {
-                is_valid = false;
-                break;
-            }
-            c = std::toupper(uc);
-        }
-        if (is_valid) {
-            models.insert(f.repo_id + ":" + tmp);
+        if (seen.insert(f.repo_id + ":" + split.tag).second) {
+            result.push_back({f.repo_id, split.tag});
         }
     }
 
-    return {models.begin(), models.end()};
+    return result;
 }
diff --git a/common/download.h b/common/download.h
index 03116d2ced..0a933521fa 100644
--- a/common/download.h
+++ b/common/download.h
@@ -17,12 +17,20 @@ struct common_remote_params {
 // get remote file content, returns <http_code, raw_response_body>
 std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
 
-// split HF repo with tag into <repo, tag>
-// for example: "user/model:tag" -> <"user/model", "tag">
-// if tag is not present, default to "latest"
-// example: "user/model" -> <"user/model", "latest">
+// split HF repo with tag into <repo, tag>, for example:
+// - "ggml-org/models:F16" -> <"ggml-org/models", "F16">
+// tag is optional and can be empty
 std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
 
+// Result of common_list_cached_models
+struct common_cached_model_info {
+    std::string repo;
+    std::string tag;
+    std::string to_string() const {
+        return repo + ":" + tag;
+    }
+};
+
 // Options for common_download_model
 struct common_download_model_opts {
     bool download_mmproj = false;
@@ -31,17 +39,34 @@ struct common_download_model_opts {
 
 // Result of common_download_model
 struct common_download_model_result {
-    std::string model_path;  // path to downloaded model (empty on failure)
-    std::string mmproj_path; // path to downloaded mmproj (empty if not downloaded)
+    std::string model_path;
+    std::string mmproj_path;
 };
 
-/**
- * Allow getting the HF file from the HF repo with tag (like ollama), for example:
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
- * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
- * Tag is optional, it checks for Q4_K_M first, then Q4_0, then if not found, return the first GGUF file in repo
- */
+// Download model from HuggingFace repo or URL
+//
+// input (via model struct):
+// - model.hf_repo: HF repo with optional tag, see common_download_split_repo_tag
+// - model.hf_file: specific file in the repo (requires hf_repo)
+// - model.url: simple download (used if hf_repo is empty)
+// - model.path: local file path
+//
+// tag matching (for HF repos without model.hf_file):
+// - if tag is specified, searches for GGUF matching that quantization
+// - if no tag, searches for Q4_K_M, then Q4_0, then first available GGUF
+//
+// split GGUF: multi-part files like "model-00001-of-00003.gguf" are automatically
+// detected and all parts are downloaded
+//
+// caching:
+// - HF repos: uses HuggingFace cache
+// - URLs: uses ETag-based caching
+//
+// when opts.offline=true, no network requests are made
+// when download_mmproj=true, searches for mmproj in same directory as model or any parent directory
+// then with the closest quantization bits
+//
+// returns result with model_path and mmproj_path (empty on failure)
 common_download_model_result common_download_model(
     const common_params_model & model,
     const std::string & bearer_token,
@@ -50,7 +75,7 @@ common_download_model_result common_download_model(
 );
 
 // returns list of cached models
-std::vector<std::string> common_list_cached_models();
+std::vector<common_cached_model_info> common_list_cached_models();
 
 // download single file from url to local path
 // returns status code or -1 on error
diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp
index 8aab2d117c..51425d0c4d 100644
--- a/common/hf-cache.cpp
+++ b/common/hf-cache.cpp
@@ -9,7 +9,7 @@
 
 #include <filesystem>
 #include <fstream>
-#include <mutex>
+#include <atomic>
 #include <regex> // migration only
 #include <string>
 #include <string_view>
@@ -22,7 +22,10 @@ namespace nl = nlohmann;
 #ifndef NOMINMAX
 #define NOMINMAX
 #endif
+#define HOME_DIR "USERPROFILE"
 #include <windows.h>
+#else
+#define HOME_DIR "HOME"
 #endif
 
 namespace hf_cache {
@@ -30,60 +33,27 @@ namespace hf_cache {
 namespace fs = std::filesystem;
 
 static fs::path get_cache_directory() {
-    const char * hf_hub_cache = std::getenv("HF_HUB_CACHE");
-    if (hf_hub_cache && *hf_hub_cache) {
-        return fs::path(hf_hub_cache);  // assume shell-expanded; add expand logic if you want full parity
-    }
-
-    const char * huggingface_hub_cache = std::getenv("HUGGINGFACE_HUB_CACHE");
-    if (huggingface_hub_cache && *huggingface_hub_cache) {
-        return fs::path(huggingface_hub_cache);
-    }
-
-    const char * hf_home = std::getenv("HF_HOME");
-    if (hf_home && *hf_home) {
-        return fs::path(hf_home) / "hub";
-    }
-
-    const char * xdg_cache_home = std::getenv("XDG_CACHE_HOME");
-    if (xdg_cache_home && *xdg_cache_home) {
-        return fs::path(xdg_cache_home) / "huggingface" / "hub";
-    }
-#if defined(_WIN32)
-    const char * userprofile = std::getenv("USERPROFILE");
-    if (userprofile && *userprofile) {
-        return fs::path(userprofile) / ".cache" / "huggingface" / "hub";
-    }
-#else
-    const char * home = std::getenv("HOME");
-    if (home && *home) {
-        return fs::path(home) / ".cache" / "huggingface" / "hub";
-    }
-#endif
-    throw std::runtime_error("Failed to determine HF cache directory");
-}
-
-static bool symlinks_supported() {
-#ifdef _WIN32
-    static bool supported = false;
-    static std::once_flag once;
-    std::call_once(once, []() {
-        fs::path link = get_cache_directory() / ("link_" + std::to_string(GetCurrentProcessId()));
-
-        std::error_code ec;
-        fs::create_directory_symlink("..", link, ec);
-        supported = !ec;
-
-        if (!ec) {
-            fs::remove(link, ec);
-        } else if (GetLastError() == ERROR_PRIVILEGE_NOT_HELD) {
-            LOG_WRN("symlink creation requires Developer Mode or admin privileges on Windows\n");
+    static const fs::path cache = []() {
+        struct {
+            const char * var;
+            fs::path path;
+        } entries[] = {
+            {"HF_HUB_CACHE",          fs::path()},
+            {"HUGGINGFACE_HUB_CACHE", fs::path()},
+            {"HF_HOME",               fs::path("hub")},
+            {"XDG_CACHE_HOME",        fs::path("huggingface") / "hub"},
+            {HOME_DIR,                fs::path(".cache") / "huggingface" / "hub"}
+        };
+        for (const auto & entry : entries) {
+            if (auto * p = std::getenv(entry.var); p && *p) {
+                fs::path base(p);
+                return entry.path.empty() ? base : base / entry.path;
+            }
         }
-    });
-    return supported;
-#else
-    return true;
-#endif
+        throw std::runtime_error("Failed to determine HF cache directory");
+    }();
+
+    return cache;
 }
 
 static std::string folder_name_to_repo(const std::string & folder) {
@@ -255,13 +225,13 @@ hf_files get_repo_files(const std::string & repo_id,
             fs::path path = file.path;
             fs::path repo_path = get_repo_path(repo_id);
             fs::path snapshots_path = repo_path / "snapshots" / rev / path;
-            fs::path blobs_path = repo_path / "blobs" / file.oid;
 
-            if (symlinks_supported()) {
-                file.local_path = blobs_path.string();
-                file.link_path = snapshots_path.string();
-            } else { // degraded mode
-                file.local_path = snapshots_path.string();
+            file.final_path = snapshots_path.string();
+            file.local_path = file.final_path;
+
+            if (!file.oid.empty() && !fs::exists(snapshots_path)) {
+                fs::path blob_path = repo_path / "blobs" / file.oid;
+                file.local_path = blob_path.string();
             }
 
             files.push_back(file);
@@ -332,6 +302,7 @@ hf_files get_cached_files(const std::string & repo_id) {
                 file.repo_id = _repo_id;
                 file.path = path.generic_string();
                 file.local_path = entry.path().string();
+                file.final_path = file.local_path;
                 files.push_back(std::move(file));
             }
         }
@@ -341,24 +312,46 @@ hf_files get_cached_files(const std::string & repo_id) {
 }
 
 std::string finalize_file(const hf_file & file) {
-    if (file.link_path.empty()) {
-        return file.local_path;
-    }
-
-    fs::path link_path(file.link_path);
-    fs::path local_path(file.local_path);
+    static std::atomic<bool> symlinks_disabled{false};
 
     std::error_code ec;
-    fs::create_directories(link_path.parent_path(), ec);
-    fs::path target_path = fs::relative(local_path, link_path.parent_path(), ec);
-    fs::create_symlink(target_path, link_path, ec);
+    fs::path blob_path(file.local_path);
+    fs::path snapshot_path(file.final_path);
 
-    if (fs::exists(link_path)) {
-        return file.link_path;
+    if (blob_path == snapshot_path || fs::exists(snapshot_path, ec)) {
+        return file.final_path;
     }
 
-    LOG_WRN("%s: failed to create symlink: %s\n", __func__, file.link_path.c_str());
-    return file.local_path;
+    if (!fs::exists(blob_path, ec)) {
+        return file.final_path;
+    }
+
+    fs::create_directories(snapshot_path.parent_path(), ec);
+
+    if (!symlinks_disabled) {
+        fs::path target = fs::relative(blob_path, snapshot_path.parent_path(), ec);
+        if (!ec) {
+            fs::create_symlink(target, snapshot_path, ec);
+        }
+        if (!ec) {
+            return file.final_path;
+        }
+    }
+
+    if (!symlinks_disabled.exchange(true)) {
+        LOG_WRN("%s: failed to create symlink: %s\n", __func__, ec.message().c_str());
+        LOG_WRN("%s: switching to degraded mode\n", __func__);
+    }
+
+    fs::rename(blob_path, snapshot_path, ec);
+    if (ec) {
+        LOG_WRN("%s: failed to move file to snapshots: %s\n", __func__, ec.message().c_str());
+        fs::copy(blob_path, snapshot_path, ec);
+        if (ec) {
+            LOG_ERR("%s: failed to copy file to snapshots: %s\n", __func__, ec.message().c_str());
+        }
+    }
+    return file.final_path;
 }
 
 // delete everything after this line, one day
diff --git a/common/hf-cache.h b/common/hf-cache.h
index d5718cb752..7934ec7970 100644
--- a/common/hf-cache.h
+++ b/common/hf-cache.h
@@ -11,7 +11,7 @@ struct hf_file {
     std::string path;
     std::string url;
     std::string local_path;
-    std::string link_path;
+    std::string final_path;
     std::string oid;
     std::string repo_id;
 };
@@ -24,9 +24,9 @@ hf_files get_repo_files(
     const std::string & bearer_token
 );
 
-hf_files get_cached_files(const std::string & repo_id);
+hf_files get_cached_files(const std::string & repo_id = {});
 
-// Create symlink if link_path is set and returns the snapshot path
+// Create snapshot path (link or move/copy) and return it
 std::string finalize_file(const hf_file & file);
 
 // TODO: Remove later
diff --git a/common/preset.cpp b/common/preset.cpp
index 6bbd591c64..57ccd000b5 100644
--- a/common/preset.cpp
+++ b/common/preset.cpp
@@ -365,8 +365,8 @@ common_presets common_preset_context::load_from_cache() const {
     auto cached_models = common_list_cached_models();
     for (const auto & model : cached_models) {
         common_preset preset;
-        preset.name = model;
-        preset.set_option(*this, "LLAMA_ARG_HF_REPO", model);
+        preset.name = model.to_string();
+        preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
         out[preset.name] = preset;
     }