diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index aaba61af1f..d0617f7fbf 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -728,6 +729,7 @@ static std::unordered_map target_bpw_type( std::string name; std::snprintf(hex, sizeof(hex), "%016" PRIx64, (uint64_t)model_id); ml.get_key(LLM_KV_GENERAL_NAME, name, false); + name.erase(0, name.find_last_of('/') + 1); std::replace(name.begin(), name.end(), ' ', '_'); name.empty() ? checkpoint_file = ml.arch_name : checkpoint_file = name; checkpoint_file += "-" + std::string(hex) + (valid_wce ? "-wce" : "-mse") + ".bpw_state"; @@ -1500,7 +1502,7 @@ static std::unordered_map target_bpw_type( ch.candidates.push_back(fb); } - auto simplify_pareto = [](std::vector & candidates) { + auto simplify_pareto = [&](std::vector & candidates) { std::sort(candidates.begin(), candidates.end(), [](const auto& a, const auto& b) { return a.bytes < b.bytes || (a.bytes == b.bytes && a.error < b.error); });