diff --git a/tests/test-llama-archs.cpp b/tests/test-llama-archs.cpp index add340d22c..2bff2d787f 100644 --- a/tests/test-llama-archs.cpp +++ b/tests/test-llama-archs.cpp @@ -231,7 +231,8 @@ static gguf_context_ptr get_gguf_ctx(const llm_arch arch, const bool moe) { } static std::pair get_model_and_ctx( - struct gguf_context * gguf_ctx, const size_t seed, const std::vector & devs) { + struct gguf_context * gguf_ctx, FILE * file, const size_t seed, const std::vector & devs) { + GGML_ASSERT((gguf_ctx == nullptr) != (file == nullptr)); llama_model_params model_params = llama_model_default_params(); std::vector devs_copy = devs; devs_copy.push_back(nullptr); @@ -243,7 +244,9 @@ static std::pair get_model_and_ctx( ctx_params.n_threads_batch = 4; size_t tmp = seed; - llama_model_ptr model(llama_model_init_from_user(gguf_ctx, set_tensor_data, &tmp, model_params)); + llama_model_ptr model(gguf_ctx != nullptr ? + llama_model_init_from_user(gguf_ctx, set_tensor_data, &tmp, model_params) : + llama_model_load_from_file_ptr(file, model_params)); if (!model) { throw std::runtime_error("failed to create llama model"); } @@ -392,7 +395,7 @@ static int save_models(const llm_arch target_arch, const size_t seed, const ggml continue; } gguf_context_ptr gguf_ctx = get_gguf_ctx(arch, moe); - auto model_and_ctx = get_model_and_ctx(gguf_ctx.get(), seed, {}); + auto model_and_ctx = get_model_and_ctx(gguf_ctx.get(), nullptr, seed, {}); const std::string path = dir + "/" + llm_arch_name(arch) + (moe ? "-moe.gguf" : "-dense.gguf"); LOG_INF("%s: Saving %s model (%s) to %s...\n", __func__, llm_arch_name(arch), moe ? "MoE" : "dense", path.c_str()); llama_model_save_to_file(model_and_ctx.first.get(), path.c_str()); @@ -424,8 +427,8 @@ static int test_backends(const llm_arch target_arch, const size_t seed, const gg bool all_ok = true; common_log_flush(common_log_main()); - printf("|%15s|%30s|%16s|%8s|%6s|\n", "Model arch.", "Device", "Config", "NMSE", "Status"); - printf("|---------------|------------------------------|----------------|--------|------|\n"); + printf("|%15s|%30s|%16s|%15s|%9s|\n", "Model arch.", "Device", "Config", "NMSE vs. CPU", "Roundtrip"); + printf("|---------------|------------------------------|----------------|---------------|---------|\n"); for (const llm_arch & arch : llm_arch_all()) { if (target_arch != LLM_ARCH_UNKNOWN && arch != target_arch) { continue; @@ -466,65 +469,50 @@ static int test_backends(const llm_arch target_arch, const size_t seed, const gg continue; } gguf_context_ptr gguf_ctx = get_gguf_ctx(arch, moe); - auto model_and_ctx_cpu = get_model_and_ctx(gguf_ctx.get(), seed, {}); + auto model_and_ctx_cpu = get_model_and_ctx(gguf_ctx.get(), nullptr, seed, {}); const std::vector logits_cpu = get_logits(model_and_ctx_cpu.first.get(), model_and_ctx_cpu.second.get(), tokens, encode); for (size_t i = 0; i < ggml_backend_dev_count(); i++) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { continue; } - auto model_and_ctx_dev = get_model_and_ctx(gguf_ctx.get(), seed, {dev}); + auto model_and_ctx_dev = get_model_and_ctx(gguf_ctx.get(), nullptr, seed, {dev}); std::string config_name = moe ? "MoE" : "Dense"; - { - const std::vector logits_dev = get_logits(model_and_ctx_dev.first.get(), model_and_ctx_dev.second.get(), tokens, encode); - const double nmse_val = nmse(logits_cpu, logits_dev); - const bool ok = nmse_val <= 1e-4; - all_ok = all_ok && ok; - char nmse_str[10]; - snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val); - printf("|%15s|%30s|%16s|%8s|%17s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev), - config_name.c_str(), nmse_str, ok ? "\033[1;32mOK\033[0m" : "\033[1;31mFAIL\033[0m"); + const std::vector logits_dev = get_logits(model_and_ctx_dev.first.get(), model_and_ctx_dev.second.get(), tokens, encode); + const double nmse_val = nmse(logits_cpu, logits_dev); + char nmse_str[10]; + snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val); + std::string status_nmse = "\033[1;32mOK\033[0m"; + if (nmse_val > 1e-4) { + all_ok = false; + status_nmse = "\033[1;31mFAIL\033[0m"; } - if (llama_model_saver_supports_arch(arch)) { - FILE * file = tmpfile(); -#ifdef _WIN32 - if (!file) { - continue; - } -#else - GGML_ASSERT(file); -#endif // _WIN32 + + std::string status_roundtrip = "\033[1;33mSKIP\033[0m"; + FILE * file = tmpfile(); // Can be null on Windows without administrator privileges. + if (file != nullptr && llama_model_saver_supports_arch(arch)) { llama_model_saver ms = llama_model_saver(model_and_ctx_dev.first.get()); ms.add_kv_from_model(); ms.add_tensors_from_model(); ms.save(file); rewind(file); - llama_model_params model_params = llama_model_default_params(); - std::vector devs_copy = {dev}; - devs_copy.push_back(nullptr); - model_params.devices = devs_copy.data(); - llama_model_ptr model_roundtrip(llama_model_load_from_file_ptr(file, model_params)); - GGML_ASSERT(model_roundtrip); - config_name += ",roundtrip"; - llama_context_params ctx_params = llama_context_default_params(); - ctx_params.n_ctx = 0; - ctx_params.n_threads = 4; - ctx_params.n_threads_batch = 4; - llama_context_ptr lctx_roundtrip(llama_init_from_model(model_roundtrip.get(), ctx_params)); - if (!lctx_roundtrip) { - throw std::runtime_error("failed to create llama context"); + auto model_and_ctx_roundtrip = get_model_and_ctx(nullptr, file, seed, {dev}); + const std::vector logits_roundtrip = get_logits( + model_and_ctx_roundtrip.first.get(), model_and_ctx_roundtrip.second.get(), tokens, encode); + status_roundtrip = "\033[1;32mOK\033[0m"; + GGML_ASSERT(logits_roundtrip.size() == logits_dev.size()); + for (size_t i = 0; i < logits_roundtrip.size(); i++) { + if (logits_roundtrip[i] != logits_dev[i]) { + all_ok = false; + status_roundtrip = "\033[1;31mFAIL\033[0m"; + break; + } } - - const std::vector logits_dev = get_logits(model_roundtrip.get(), lctx_roundtrip.get(), tokens, encode); - const double nmse_val = nmse(logits_cpu, logits_dev); - const bool ok = nmse_val <= 1e-4; - all_ok = all_ok && ok; - char nmse_str[10]; - snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val); - printf("|%15s|%30s|%16s|%8s|%17s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev), - config_name.c_str(), nmse_str, ok ? "\033[1;32mOK\033[0m" : "\033[1;31mFAIL\033[0m"); } + + printf("|%15s|%30s|%16s|%15s (%8s)|%20s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev), + config_name.c_str(), status_nmse.c_str(), nmse_str, status_roundtrip.c_str()); } } }