This commit is contained in:
Lonnie 2026-03-16 12:13:16 +11:00 committed by GitHub
commit f5480c0e9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 38 additions and 14 deletions

View File

@ -291,13 +291,14 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
}
const bool offline = params.offline;
const bool cache_only = params.cache_only;
std::string model_endpoint = get_model_endpoint();
auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";
// prepare local path for caching
auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
auto preset_path = fs_get_cache_file(preset_fname);
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline, cache_only);
const bool has_preset = status >= 200 && status < 400;
// remote preset is optional, so we don't error out if not found
@ -378,7 +379,7 @@ static handle_model_result common_params_handle_model(
// then, download it if needed
if (!model.url.empty()) {
bool ok = common_download_model(model, bearer_token, offline);
bool ok = common_download_model(model, bearer_token, offline, cache_only);
if (!ok) {
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
exit(1);
@ -3248,6 +3249,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.offline = true;
}
).set_env("LLAMA_OFFLINE"));
add_opt(common_arg(
{"--cache-only"},
"Cache-only mode: download models if not cached, but never re-download or update cached models",
[](common_params & params) {
params.cache_only = true;
}
).set_env("LLAMA_CACHE_ONLY"));
add_opt(common_arg(
{"-lv", "--verbosity", "--log-verbosity"}, "N",
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"

View File

@ -450,6 +450,7 @@ struct common_params {
int32_t control_vector_layer_start = -1; // layer range for control vector
int32_t control_vector_layer_end = -1; // layer range for control vector
bool offline = false;
bool cache_only = false; // use cache only, never re-download
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line

View File

@ -440,18 +440,29 @@ int common_download_file_single(const std::string & url,
const std::string & path,
const std::string & bearer_token,
bool offline,
bool cache_only,
const common_header_list & headers) {
if (!offline) {
return common_download_file_single_online(url, path, bearer_token, headers);
if (offline) {
// Original offline mode: no network access at all
if (!std::filesystem::exists(path)) {
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
return -1;
}
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
return 304; // Not Modified - fake cached response
}
if (!std::filesystem::exists(path)) {
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
return -1;
if (cache_only) {
// Cache-only mode: use cached file if exists, otherwise download
// but never re-download if etag changed
if (std::filesystem::exists(path)) {
LOG_INF("%s: using cached file (cache-only mode): %s\n", __func__, path.c_str());
return 304; // Not Modified - fake cached response
}
// File not cached, proceed with download
}
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
return 304; // Not Modified - fake cached response
return common_download_file_single_online(url, path, bearer_token, headers);
}
// download multiple files from remote URLs to local paths
@ -459,6 +470,7 @@ int common_download_file_single(const std::string & url,
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
const std::string & bearer_token,
bool offline,
bool cache_only,
const common_header_list & headers) {
// Prepare download in parallel
std::vector<std::future<bool>> futures_download;
@ -468,8 +480,8 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
futures_download.push_back(
std::async(
std::launch::async,
[&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
[&bearer_token, offline, cache_only, &headers](const std::pair<std::string, std::string> & it) -> bool {
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, cache_only, headers);
return is_http_status_ok(http_status);
},
item
@ -490,6 +502,7 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
bool common_download_model(const common_params_model & model,
const std::string & bearer_token,
bool offline,
bool cache_only,
const common_header_list & headers) {
// Basic validation of the model.url
if (model.url.empty()) {
@ -497,7 +510,7 @@ bool common_download_model(const common_params_model & model,
return false;
}
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, cache_only, headers);
if (!is_http_status_ok(http_status)) {
return false;
}
@ -557,7 +570,7 @@ bool common_download_model(const common_params_model & model,
}
// Download in parallel
common_download_file_multiple(urls, bearer_token, offline, headers);
common_download_file_multiple(urls, bearer_token, offline, cache_only, headers);
}
return true;
@ -751,7 +764,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
std::string local_path = fs_get_cache_file(model_filename);
const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
const int http_status = common_download_file_single(blob_url, local_path, token, false, false, {});
if (!is_http_status_ok(http_status)) {
throw std::runtime_error("Failed to download Docker Model");
}

View File

@ -65,6 +65,7 @@ bool common_download_model(
const common_params_model & model,
const std::string & bearer_token,
bool offline,
bool cache_only,
const common_header_list & headers = {}
);
@ -77,6 +78,7 @@ int common_download_file_single(const std::string & url,
const std::string & path,
const std::string & bearer_token,
bool offline,
bool cache_only,
const common_header_list & headers = {});
// resolve and download model from Docker registry