roundtrip tests
This commit is contained in:
parent
e8e2f634e7
commit
c66fd8a227
|
|
@ -191,6 +191,7 @@ extern "C" {
|
|||
|
||||
// write the entire context to a binary file
|
||||
GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
||||
GGML_API bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta);
|
||||
|
||||
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
||||
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
||||
|
|
|
|||
|
|
@ -1520,16 +1520,25 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
|
|||
return false;
|
||||
}
|
||||
|
||||
const bool success = gguf_write_to_file_ptr(ctx, file, only_meta);
|
||||
if (!success) {
|
||||
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s'\n", __func__, fname);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta) {
|
||||
GGML_ASSERT(file);
|
||||
|
||||
try {
|
||||
gguf_writer_file gw(file);
|
||||
gguf_write_out(ctx, gw, only_meta);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
|
||||
fclose(file);
|
||||
GGML_LOG_ERROR("%s: failed to write GGUF data: %s\n", __func__, ex.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "llama-model-saver.h"
|
||||
|
||||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include "llama-arch.h"
|
||||
|
|
@ -11,8 +12,33 @@
|
|||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
bool llama_model_saver_supports_arch(llm_arch arch) {
|
||||
switch (arch) {
|
||||
case LLM_ARCH_QWEN3NEXT:
|
||||
case LLM_ARCH_QWEN35:
|
||||
case LLM_ARCH_QWEN35MOE:
|
||||
case LLM_ARCH_PLAMO3:
|
||||
case LLM_ARCH_GEMMA3:
|
||||
case LLM_ARCH_GEMMA3N:
|
||||
case LLM_ARCH_COHERE2:
|
||||
case LLM_ARCH_OLMO2:
|
||||
case LLM_ARCH_BITNET:
|
||||
case LLM_ARCH_T5:
|
||||
case LLM_ARCH_EXAONE_MOE:
|
||||
case LLM_ARCH_AFMOE:
|
||||
case LLM_ARCH_APERTUS:
|
||||
case LLM_ARCH_MIMO2:
|
||||
case LLM_ARCH_STEP35:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
llama_model_saver::llama_model_saver(const struct llama_model * model) :
|
||||
gguf_ctx(gguf_init_empty()), gguf_ctx_owned(true), model(model), llm_kv(model->arch) {}
|
||||
gguf_ctx(gguf_init_empty()), gguf_ctx_owned(true), model(model), llm_kv(model->arch) {
|
||||
GGML_ASSERT(llama_model_saver_supports_arch(model->arch));
|
||||
}
|
||||
|
||||
llama_model_saver::llama_model_saver(enum llm_arch arch, struct gguf_context * gguf_ctx) :
|
||||
gguf_ctx(gguf_ctx == nullptr ? gguf_init_empty() : gguf_ctx), gguf_ctx_owned(gguf_ctx == nullptr), model(nullptr), llm_kv(arch) {}
|
||||
|
|
@ -383,3 +409,6 @@ void llama_model_saver::save(const std::string & path_model) {
|
|||
gguf_write_to_file(gguf_ctx, path_model.c_str(), false);
|
||||
}
|
||||
|
||||
void llama_model_saver::save(FILE * file) {
|
||||
gguf_write_to_file_ptr(gguf_ctx, file, false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
// FIXME temporary function for better error messages
|
||||
bool llama_model_saver_supports_arch(llm_arch arch);
|
||||
|
||||
struct llama_model_saver {
|
||||
struct gguf_context * gguf_ctx = nullptr;
|
||||
const bool gguf_ctx_owned;
|
||||
|
|
@ -37,4 +40,5 @@ struct llama_model_saver {
|
|||
void add_tensors_from_model();
|
||||
|
||||
void save(const std::string & path_model);
|
||||
void save(FILE * file);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1125,12 +1125,8 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
|
|||
GGML_ASSERT(file);
|
||||
#endif // _WIN32
|
||||
|
||||
{
|
||||
std::vector<int8_t> buf;
|
||||
gguf_write_to_buf(gguf_ctx_0, buf, only_meta);
|
||||
GGML_ASSERT(fwrite(buf.data(), 1, buf.size(), file) == buf.size());
|
||||
rewind(file);
|
||||
}
|
||||
gguf_write_to_file_ptr(gguf_ctx_0, file, only_meta);
|
||||
rewind(file);
|
||||
|
||||
struct ggml_context * ctx_1 = nullptr;
|
||||
struct gguf_init_params gguf_params = {
|
||||
|
|
|
|||
|
|
@ -424,8 +424,8 @@ static int test_backends(const llm_arch target_arch, const size_t seed, const gg
|
|||
|
||||
bool all_ok = true;
|
||||
common_log_flush(common_log_main());
|
||||
printf("|%15s|%30s|%6s|%8s|%6s|\n", "Model arch.", "Device", "Config", "NMSE", "Status");
|
||||
printf("|---------------|------------------------------|------|--------|------|\n");
|
||||
printf("|%15s|%30s|%16s|%8s|%6s|\n", "Model arch.", "Device", "Config", "NMSE", "Status");
|
||||
printf("|---------------|------------------------------|----------------|--------|------|\n");
|
||||
for (const llm_arch & arch : llm_arch_all()) {
|
||||
if (target_arch != LLM_ARCH_UNKNOWN && arch != target_arch) {
|
||||
continue;
|
||||
|
|
@ -474,14 +474,57 @@ static int test_backends(const llm_arch target_arch, const size_t seed, const gg
|
|||
continue;
|
||||
}
|
||||
auto model_and_ctx_dev = get_model_and_ctx(gguf_ctx.get(), seed, {dev});
|
||||
const std::vector<float> logits_dev = get_logits(model_and_ctx_dev.first.get(), model_and_ctx_dev.second.get(), tokens, encode);
|
||||
const double nmse_val = nmse(logits_cpu, logits_dev);
|
||||
const bool ok = nmse_val <= 1e-4;
|
||||
all_ok = all_ok && ok;
|
||||
char nmse_str[10];
|
||||
snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val);
|
||||
printf("|%15s|%30s|%6s|%8s|%17s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev),
|
||||
moe ? "MoE" : "Dense", nmse_str, ok ? "\033[1;32mOK\033[0m" : "\033[1;31mFAIL\033[0m");
|
||||
std::string config_name = moe ? "MoE" : "Dense";
|
||||
{
|
||||
const std::vector<float> logits_dev = get_logits(model_and_ctx_dev.first.get(), model_and_ctx_dev.second.get(), tokens, encode);
|
||||
const double nmse_val = nmse(logits_cpu, logits_dev);
|
||||
const bool ok = nmse_val <= 1e-4;
|
||||
all_ok = all_ok && ok;
|
||||
char nmse_str[10];
|
||||
snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val);
|
||||
printf("|%15s|%30s|%16s|%8s|%17s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev),
|
||||
config_name.c_str(), nmse_str, ok ? "\033[1;32mOK\033[0m" : "\033[1;31mFAIL\033[0m");
|
||||
}
|
||||
if (llama_model_saver_supports_arch(arch)) {
|
||||
FILE * file = tmpfile();
|
||||
#ifdef _WIN32
|
||||
if (!file) {
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
GGML_ASSERT(file);
|
||||
#endif // _WIN32
|
||||
llama_model_saver ms = llama_model_saver(model_and_ctx_dev.first.get());
|
||||
ms.add_kv_from_model();
|
||||
ms.add_tensors_from_model();
|
||||
ms.save(file);
|
||||
rewind(file);
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
std::vector<ggml_backend_dev_t> devs_copy = {dev};
|
||||
devs_copy.push_back(nullptr);
|
||||
model_params.devices = devs_copy.data();
|
||||
llama_model_ptr model_roundtrip(llama_model_load_from_file_ptr(file, model_params));
|
||||
GGML_ASSERT(model_roundtrip);
|
||||
config_name += ",roundtrip";
|
||||
|
||||
llama_context_params ctx_params = llama_context_default_params();
|
||||
ctx_params.n_ctx = 0;
|
||||
ctx_params.n_threads = 4;
|
||||
ctx_params.n_threads_batch = 4;
|
||||
llama_context_ptr lctx_roundtrip(llama_init_from_model(model_roundtrip.get(), ctx_params));
|
||||
if (!lctx_roundtrip) {
|
||||
throw std::runtime_error("failed to create llama context");
|
||||
}
|
||||
|
||||
const std::vector<float> logits_dev = get_logits(model_roundtrip.get(), lctx_roundtrip.get(), tokens, encode);
|
||||
const double nmse_val = nmse(logits_cpu, logits_dev);
|
||||
const bool ok = nmse_val <= 1e-4;
|
||||
all_ok = all_ok && ok;
|
||||
char nmse_str[10];
|
||||
snprintf(nmse_str, sizeof(nmse_str), "%.2e", nmse_val);
|
||||
printf("|%15s|%30s|%16s|%8s|%17s|\n", llm_arch_name(arch), ggml_backend_dev_description(dev),
|
||||
config_name.c_str(), nmse_str, ok ? "\033[1;32mOK\033[0m" : "\033[1;31mFAIL\033[0m");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue