Merge f71b68ae4b into 9e2e2198b0
This commit is contained in:
commit
f5480c0e9d
|
|
@ -291,13 +291,14 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa
|
|||
}
|
||||
|
||||
const bool offline = params.offline;
|
||||
const bool cache_only = params.cache_only;
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
auto preset_url = model_endpoint + hf_repo + "/resolve/main/preset.ini";
|
||||
|
||||
// prepare local path for caching
|
||||
auto preset_fname = clean_file_name(hf_repo + "_preset.ini");
|
||||
auto preset_path = fs_get_cache_file(preset_fname);
|
||||
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline);
|
||||
const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline, cache_only);
|
||||
const bool has_preset = status >= 200 && status < 400;
|
||||
|
||||
// remote preset is optional, so we don't error out if not found
|
||||
|
|
@ -378,7 +379,7 @@ static handle_model_result common_params_handle_model(
|
|||
|
||||
// then, download it if needed
|
||||
if (!model.url.empty()) {
|
||||
bool ok = common_download_model(model, bearer_token, offline);
|
||||
bool ok = common_download_model(model, bearer_token, offline, cache_only);
|
||||
if (!ok) {
|
||||
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
|
||||
exit(1);
|
||||
|
|
@ -3248,6 +3249,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.offline = true;
|
||||
}
|
||||
).set_env("LLAMA_OFFLINE"));
|
||||
add_opt(common_arg(
|
||||
{"--cache-only"},
|
||||
"Cache-only mode: download models if not cached, but never re-download or update cached models",
|
||||
[](common_params & params) {
|
||||
params.cache_only = true;
|
||||
}
|
||||
).set_env("LLAMA_CACHE_ONLY"));
|
||||
add_opt(common_arg(
|
||||
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
||||
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
|
||||
|
|
|
|||
|
|
@ -450,6 +450,7 @@ struct common_params {
|
|||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||
int32_t control_vector_layer_end = -1; // layer range for control vector
|
||||
bool offline = false;
|
||||
bool cache_only = false; // use cache only, never re-download
|
||||
|
||||
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
||||
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
||||
|
|
|
|||
|
|
@ -440,18 +440,29 @@ int common_download_file_single(const std::string & url,
|
|||
const std::string & path,
|
||||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
bool cache_only,
|
||||
const common_header_list & headers) {
|
||||
if (!offline) {
|
||||
return common_download_file_single_online(url, path, bearer_token, headers);
|
||||
if (offline) {
|
||||
// Original offline mode: no network access at all
|
||||
if (!std::filesystem::exists(path)) {
|
||||
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
||||
return -1;
|
||||
}
|
||||
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
||||
return 304; // Not Modified - fake cached response
|
||||
}
|
||||
|
||||
if (!std::filesystem::exists(path)) {
|
||||
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
||||
return -1;
|
||||
if (cache_only) {
|
||||
// Cache-only mode: use cached file if exists, otherwise download
|
||||
// but never re-download if etag changed
|
||||
if (std::filesystem::exists(path)) {
|
||||
LOG_INF("%s: using cached file (cache-only mode): %s\n", __func__, path.c_str());
|
||||
return 304; // Not Modified - fake cached response
|
||||
}
|
||||
// File not cached, proceed with download
|
||||
}
|
||||
|
||||
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
||||
return 304; // Not Modified - fake cached response
|
||||
return common_download_file_single_online(url, path, bearer_token, headers);
|
||||
}
|
||||
|
||||
// download multiple files from remote URLs to local paths
|
||||
|
|
@ -459,6 +470,7 @@ int common_download_file_single(const std::string & url,
|
|||
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
|
||||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
bool cache_only,
|
||||
const common_header_list & headers) {
|
||||
// Prepare download in parallel
|
||||
std::vector<std::future<bool>> futures_download;
|
||||
|
|
@ -468,8 +480,8 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
|
|||
futures_download.push_back(
|
||||
std::async(
|
||||
std::launch::async,
|
||||
[&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
|
||||
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
|
||||
[&bearer_token, offline, cache_only, &headers](const std::pair<std::string, std::string> & it) -> bool {
|
||||
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, cache_only, headers);
|
||||
return is_http_status_ok(http_status);
|
||||
},
|
||||
item
|
||||
|
|
@ -490,6 +502,7 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
|
|||
bool common_download_model(const common_params_model & model,
|
||||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
bool cache_only,
|
||||
const common_header_list & headers) {
|
||||
// Basic validation of the model.url
|
||||
if (model.url.empty()) {
|
||||
|
|
@ -497,7 +510,7 @@ bool common_download_model(const common_params_model & model,
|
|||
return false;
|
||||
}
|
||||
|
||||
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
|
||||
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, cache_only, headers);
|
||||
if (!is_http_status_ok(http_status)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -557,7 +570,7 @@ bool common_download_model(const common_params_model & model,
|
|||
}
|
||||
|
||||
// Download in parallel
|
||||
common_download_file_multiple(urls, bearer_token, offline, headers);
|
||||
common_download_file_multiple(urls, bearer_token, offline, cache_only, headers);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -751,7 +764,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|||
std::string local_path = fs_get_cache_file(model_filename);
|
||||
|
||||
const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
|
||||
const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
|
||||
const int http_status = common_download_file_single(blob_url, local_path, token, false, false, {});
|
||||
if (!is_http_status_ok(http_status)) {
|
||||
throw std::runtime_error("Failed to download Docker Model");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ bool common_download_model(
|
|||
const common_params_model & model,
|
||||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
bool cache_only,
|
||||
const common_header_list & headers = {}
|
||||
);
|
||||
|
||||
|
|
@ -77,6 +78,7 @@ int common_download_file_single(const std::string & url,
|
|||
const std::string & path,
|
||||
const std::string & bearer_token,
|
||||
bool offline,
|
||||
bool cache_only,
|
||||
const common_header_list & headers = {});
|
||||
|
||||
// resolve and download model from Docker registry
|
||||
|
|
|
|||
Loading…
Reference in New Issue