Merge f71b68ae4b into 9e2e2198b0

2026-03-16 12:13:16 +11:00 · 2026-03-16 12:13:16 +11:00 · f5480c0e9d
parent 9e2e2198b0 f71b68ae4b
commit f5480c0e9d
4 changed files with 38 additions and 14 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -291,13 +291,14 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
    }

    const bool offline = params.offline;
+    const bool cache_only = params.cache_only;
    std::string model_endpoint = get_model_endpoint();
    auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";

    // prepare local path for caching
    auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
    auto preset_path = fs_get_cache_file(preset_fname);
-    const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
+    const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline, cache_only);
    const bool has_preset = status >= 200 && status < 400;

    // remote preset is optional, so we don't error out if not found
@ -378,7 +379,7 @@ static handle_model_result common_params_handle_model(

    // then, download it if needed
    if (!model.url.empty()) {
-        bool ok = common_download_model(model, bearer_token, offline);
+        bool ok = common_download_model(model, bearer_token, offline, cache_only);
        if (!ok) {
            LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
            exit(1);
@ -3248,6 +3249,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.offline = true;
        }
    ).set_env("LLAMA_OFFLINE"));
+    add_opt(common_arg(
+        {"--cache-only"},
+        "Cache-only mode: download models if not cached, but never re-download or update cached models",
+        [](common_params & params) {
+            params.cache_only = true;
+        }
+    ).set_env("LLAMA_CACHE_ONLY"));
    add_opt(common_arg(
        {"-lv", "--verbosity", "--log-verbosity"}, "N",
        string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
--- a/common/common.h
+++ b/common/common.h
@ -450,6 +450,7 @@ struct common_params {
    int32_t control_vector_layer_start = -1; // layer range for control vector
    int32_t control_vector_layer_end   = -1; // layer range for control vector
    bool    offline                    = false;
+    bool    cache_only                 = false; // use cache only, never re-download

    int32_t ppl_stride      = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
    int32_t ppl_output_type = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
--- a/common/download.cpp
+++ b/common/download.cpp
@ -440,18 +440,29 @@ int common_download_file_single(const std::string & url,
                                const std::string & path,
                                const std::string & bearer_token,
                                bool offline,
+                                bool cache_only,
                                const common_header_list & headers) {
-    if (!offline) {
-        return common_download_file_single_online(url, path, bearer_token, headers);
+    if (offline) {
+        // Original offline mode: no network access at all
+        if (!std::filesystem::exists(path)) {
+            LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
+            return -1;
+        }
+        LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
+        return 304; // Not Modified - fake cached response
    }

-    if (!std::filesystem::exists(path)) {
-        LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
-        return -1;
+    if (cache_only) {
+        // Cache-only mode: use cached file if exists, otherwise download
+        // but never re-download if etag changed
+        if (std::filesystem::exists(path)) {
+            LOG_INF("%s: using cached file (cache-only mode): %s\n", __func__, path.c_str());
+            return 304; // Not Modified - fake cached response
+        }
+        // File not cached, proceed with download
    }

-    LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
-    return 304; // Not Modified - fake cached response
+    return common_download_file_single_online(url, path, bearer_token, headers);
 }

 // download multiple files from remote URLs to local paths
@ -459,6 +470,7 @@ int common_download_file_single(const std::string & url,
 static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
                                          const std::string & bearer_token,
                                          bool offline,
+                                          bool cache_only,
                                          const common_header_list & headers) {
    // Prepare download in parallel
    std::vector<std::future<bool>> futures_download;
@ -468,8 +480,8 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
        futures_download.push_back(
            std::async(
                std::launch::async,
-                [&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
-                    const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
+                [&bearer_token, offline, cache_only, &headers](const std::pair<std::string, std::string> & it) -> bool {
+                    const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, cache_only, headers);
                    return is_http_status_ok(http_status);
                },
                item
@ -490,6 +502,7 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
 bool common_download_model(const common_params_model & model,
                           const std::string & bearer_token,
                           bool offline,
+                           bool cache_only,
                           const common_header_list & headers) {
    // Basic validation of the model.url
    if (model.url.empty()) {
@ -497,7 +510,7 @@ bool common_download_model(const common_params_model & model,
        return false;
    }

-    const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
+    const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, cache_only, headers);
    if (!is_http_status_ok(http_status)) {
        return false;
    }
@ -557,7 +570,7 @@ bool common_download_model(const common_params_model & model,
        }

        // Download in parallel
-        common_download_file_multiple(urls, bearer_token, offline, headers);
+        common_download_file_multiple(urls, bearer_token, offline, cache_only, headers);
    }

    return true;
@ -751,7 +764,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
        std::string local_path = fs_get_cache_file(model_filename);

        const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
-        const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
+        const int http_status = common_download_file_single(blob_url, local_path, token, false, false, {});
        if (!is_http_status_ok(http_status)) {
            throw std::runtime_error("Failed to download Docker Model");
        }
--- a/common/download.h
+++ b/common/download.h
@ -65,6 +65,7 @@ bool common_download_model(
    const common_params_model & model,
    const std::string & bearer_token,
    bool offline,
+    bool cache_only,
    const common_header_list & headers = {}
 );

@ -77,6 +78,7 @@ int common_download_file_single(const std::string & url,
                                const std::string & path,
                                const std::string & bearer_token,
                                bool offline,
+                                bool cache_only,
                                const common_header_list & headers = {});

 // resolve and download model from Docker registry