From e12513f55e1dde0d4da7669fb38789e39a1e7de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Thu, 12 Feb 2026 17:41:43 +0100 Subject: [PATCH 1/2] common : remove legacy .json to .etag migration code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- common/download.cpp | 44 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/common/download.cpp b/common/download.cpp index 8710438aa4..f2305eb974 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -114,44 +114,18 @@ static void write_etag(const std::string & path, const std::string & etag) { } static std::string read_etag(const std::string & path) { - std::string none; const std::string etag_path = path + ".etag"; - - if (std::filesystem::exists(etag_path)) { - std::ifstream etag_in(etag_path); - if (!etag_in) { - LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str()); - return none; - } - std::string etag; - std::getline(etag_in, etag); - return etag; + if (!std::filesystem::exists(etag_path)) { + return {}; } - - // no etag file, but maybe there is an old .json - // remove this code later - const std::string metadata_path = path + ".json"; - - if (std::filesystem::exists(metadata_path)) { - std::ifstream metadata_in(metadata_path); - try { - nlohmann::json metadata_json; - metadata_in >> metadata_json; - LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), - metadata_json.dump().c_str()); - if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) { - std::string etag = metadata_json.at("etag"); - write_etag(path, etag); - if (!std::filesystem::remove(metadata_path)) { - LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str()); - } - return etag; - } - } catch (const nlohmann::json::exception & e) { - LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); - } + std::ifstream etag_in(etag_path); + if (!etag_in) { + LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str()); + return {}; } - return none; + std::string etag; + std::getline(etag_in, etag); + return etag; } static bool is_http_status_ok(int status) { From e808daec68266aa6ff636c48e451e8966a919102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Thu, 12 Feb 2026 19:12:12 +0100 Subject: [PATCH 2/2] common : simplify common_download_file_single_online MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit also force a redownload if the file exists but has no .etag file. Signed-off-by: Adrien Gallouët --- common/download.cpp | 137 +++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 72 deletions(-) diff --git a/common/download.cpp b/common/download.cpp index f2305eb974..17f930f5ac 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -321,62 +321,64 @@ static int common_download_file_single_online(const std::string & url, LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); } - for (int i = 0; i < max_attempts; ++i) { - auto head = cli.Head(parts.path); - bool head_ok = head && head->status >= 200 && head->status < 300; - if (!head_ok) { - LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); - if (file_exists) { - LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); - return 304; // 304 Not Modified - fake cached response - } - return head->status; // cannot use cached file, return raw status code - // TODO: maybe retry only on certain codes - } - - std::string etag; - if (head_ok && head->has_header("ETag")) { - etag = head->get_header_value("ETag"); - } - - size_t total_size = 0; - if (head_ok && head->has_header("Content-Length")) { - try { - total_size = std::stoull(head->get_header_value("Content-Length")); - } catch (const std::exception& e) { - LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what()); - } - } - - bool supports_ranges = false; - if (head_ok && head->has_header("Accept-Ranges")) { - supports_ranges = head->get_header_value("Accept-Ranges") != "none"; - } - - bool should_download_from_scratch = false; - if (!last_etag.empty() && !etag.empty() && last_etag != etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, - last_etag.c_str(), etag.c_str()); - should_download_from_scratch = true; - } - + auto head = cli.Head(parts.path); + if (!head || head->status < 200 || head->status >= 300) { + LOG_WRN("%s: HEAD failed, status: %d\n", __func__, head ? head->status : -1); if (file_exists) { - if (!should_download_from_scratch) { - LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - return 304; // 304 Not Modified - fake cached response - } - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return -1; - } + LOG_INF("%s: using cached file (HEAD failed): %s\n", __func__, path.c_str()); + return 304; // 304 Not Modified - fake cached response + } + return head ? head->status : -1; + } + + std::string etag; + if (head->has_header("ETag")) { + etag = head->get_header_value("ETag"); + } + + size_t total_size = 0; + if (head->has_header("Content-Length")) { + try { + total_size = std::stoull(head->get_header_value("Content-Length")); + } catch (const std::exception& e) { + LOG_WRN("%s: invalid Content-Length in HEAD response: %s\n", __func__, e.what()); + } + } + + bool supports_ranges = false; + if (head->has_header("Accept-Ranges")) { + supports_ranges = head->get_header_value("Accept-Ranges") != "none"; + } + + if (file_exists) { + if (etag.empty()) { + LOG_INF("%s: using cached file (no server etag): %s\n", __func__, path.c_str()); + return 304; // 304 Not Modified - fake cached response + } + if (!last_etag.empty() && last_etag == etag) { + LOG_INF("%s: using cached file (same etag): %s\n", __func__, path.c_str()); + return 304; // 304 Not Modified - fake cached response + } + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return -1; + } + } + + const std::string path_temporary = path + ".downloadInProgress"; + int delay = retry_delay_seconds; + + for (int i = 0; i < max_attempts; ++i) { + if (i) { + LOG_WRN("%s: retrying after %d seconds...\n", __func__, delay); + std::this_thread::sleep_for(std::chrono::seconds(delay)); + delay *= retry_delay_seconds; } - const std::string path_temporary = path + ".downloadInProgress"; size_t existing_size = 0; if (std::filesystem::exists(path_temporary)) { - if (supports_ranges && !should_download_from_scratch) { + if (supports_ranges) { existing_size = std::filesystem::file_size(path_temporary); } else if (remove(path_temporary.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); @@ -384,32 +386,23 @@ static int common_download_file_single_online(const std::string & url, } } - // start the download - LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n", - __func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str()); - const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size); - if (!was_pull_successful) { - if (i + 1 < max_attempts) { - const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000; - LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay); - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); - } else { - LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts); + LOG_INF("%s: downloading from %s to %s (etag:%s)...\n", + __func__, common_http_show_masked_url(parts).c_str(), + path_temporary.c_str(), etag.c_str()); + + if (common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size)) { + if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return -1; } - continue; + if (!etag.empty()) { + write_etag(path, etag); + } + return head->status; } - - if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return -1; - } - if (!etag.empty()) { - write_etag(path, etag); - } - - return head->status; // TODO: use actual GET status? } + LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts); return -1; // max attempts reached }