test-backend-ops: allow loading tests from file and parsing model operators into file (#19896)
* tests: allow loading test-backend-ops tests from json * add error threshold based on op * add error when file cannot be read * add graph operator json extraction tool * add nb parameter for non-contiguous input tensors * fix view check * only use view if non-contiguous/permuted, use C++ random instead of rand() * replace internal API calls with public llama_graph_reserve call * reduce test description length * fix nb[0] not getting set for view * add name to tests * fix inplace error * use text file instead of json * move llama_graph_reserve function to new llama-ext header, move export-graph-ops to tests/ * fix missing declaration * use pragma once * fix indent * fix Windows build
This commit is contained in:
parent
6de1bc631d
commit
128142fe7d
|
|
@ -2671,7 +2671,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
[](common_params & params, const std::string & value) {
|
||||
params.out_file = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE, LLAMA_EXAMPLE_RESULTS}));
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE,
|
||||
LLAMA_EXAMPLE_RESULTS, LLAMA_EXAMPLE_EXPORT_GRAPH_OPS}));
|
||||
add_opt(common_arg(
|
||||
{"-ofreq", "--output-frequency"}, "N",
|
||||
string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ enum llama_example {
|
|||
LLAMA_EXAMPLE_FINETUNE,
|
||||
LLAMA_EXAMPLE_FIT_PARAMS,
|
||||
LLAMA_EXAMPLE_RESULTS,
|
||||
LLAMA_EXAMPLE_EXPORT_GRAPH_OPS,
|
||||
|
||||
LLAMA_EXAMPLE_COUNT,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "llama-memory.h"
|
||||
#include "llama-mmap.h"
|
||||
#include "llama-model.h"
|
||||
#include "llama-ext.h"
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cmath>
|
||||
|
|
@ -3129,6 +3130,19 @@ uint32_t llama_get_sampled_probs_count_ith(llama_context * ctx, int32_t i) {
|
|||
return static_cast<uint32_t>(ctx->get_sampled_probs_count(i));
|
||||
}
|
||||
|
||||
struct ggml_cgraph * llama_graph_reserve(
|
||||
struct llama_context * ctx,
|
||||
uint32_t n_tokens,
|
||||
uint32_t n_seqs,
|
||||
uint32_t n_outputs) {
|
||||
auto * memory = ctx->get_memory();
|
||||
llama_memory_context_ptr mctx;
|
||||
if (memory) {
|
||||
mctx = memory->init_full();
|
||||
}
|
||||
return ctx->graph_reserve(n_tokens, n_seqs, n_outputs, mctx.get());
|
||||
}
|
||||
|
||||
// llama adapter API
|
||||
|
||||
int32_t llama_set_adapters_lora(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include "llama-context.h"
|
||||
#include "ggml.h"
|
||||
#include "stdint.h"
|
||||
|
||||
// Reserve a new compute graph. It is valid until the next call to llama_graph_reserve.
|
||||
LLAMA_API struct ggml_cgraph * llama_graph_reserve(
|
||||
struct llama_context * ctx,
|
||||
uint32_t n_tokens,
|
||||
uint32_t n_seqs,
|
||||
uint32_t n_outputs);
|
||||
|
|
@ -260,6 +260,7 @@ endif()
|
|||
set(LLAMA_TEST_NAME test-mtmd-c-api)
|
||||
llama_build_and_test(test-mtmd-c-api.c)
|
||||
target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
|
||||
unset(LLAMA_TEST_NAME)
|
||||
|
||||
# GGUF model data fetcher library for tests that need real model metadata
|
||||
# Only compile when cpp-httplib has SSL support (CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
|
|
@ -284,4 +285,5 @@ target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
|||
llama_build_and_test(test-alloc.cpp)
|
||||
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
|
||||
|
||||
|
||||
llama_build(export-graph-ops.cpp)
|
||||
target_include_directories(export-graph-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
#include "llama.h"
|
||||
#include "../src/llama-ext.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
struct input_tensor {
|
||||
ggml_type type;
|
||||
std::array<int64_t, 4> ne;
|
||||
std::array<size_t, 4> nb;
|
||||
|
||||
input_tensor(ggml_type type, int64_t * ne, size_t * nb): type(type) {
|
||||
memcpy(this->ne.data(), ne, 4 * sizeof(int64_t));
|
||||
memcpy(this->nb.data(), nb, 4 * sizeof(size_t));
|
||||
}
|
||||
|
||||
bool operator<(const input_tensor &b) const {
|
||||
return std::tie(type, ne, nb) <
|
||||
std::tie(b.type, b.ne, b.nb);
|
||||
}
|
||||
|
||||
void serialize(std::ostream& out) const {
|
||||
out << type << ' ';
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
out << ne[i] << ' ';
|
||||
}
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
out << nb[i] << ' ';
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct test_object {
|
||||
ggml_op op;
|
||||
ggml_type type;
|
||||
std::array<int64_t, 4> ne;
|
||||
std::vector<int32_t> op_params;
|
||||
std::vector<input_tensor> sources;
|
||||
std::string name;
|
||||
|
||||
void serialize(std::ostream& out) const {
|
||||
out << op << ' ' << type << ' ';
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
out << ne[i] << ' ';
|
||||
}
|
||||
|
||||
out << op_params.size() << ' ';
|
||||
for (size_t i = 0; i < op_params.size(); i++) {
|
||||
out << op_params[i] << ' ';
|
||||
}
|
||||
|
||||
out << sources.size() << ' ';
|
||||
for (size_t s = 0; s < sources.size(); s++) {
|
||||
sources[s].serialize(out);
|
||||
}
|
||||
|
||||
if (!name.empty()) {
|
||||
out << name;
|
||||
} else {
|
||||
out << '-';
|
||||
}
|
||||
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
bool operator<(const test_object &b) const {
|
||||
return std::tie(op, type, ne, op_params, sources) <
|
||||
std::tie(b.op, b.type, b.ne, b.op_params, b.sources);
|
||||
}
|
||||
};
|
||||
|
||||
static void extract_graph_ops(ggml_cgraph * cgraph, const char * label, std::set<test_object> & tests) {
|
||||
int n_nodes = ggml_graph_n_nodes(cgraph);
|
||||
int n_skipped = 0;
|
||||
int n_before = (int) tests.size();
|
||||
for (int i = 0; i < n_nodes; i++) {
|
||||
ggml_tensor * node = ggml_graph_node(cgraph, i);
|
||||
|
||||
if (node->op == GGML_OP_NONE || node->op == GGML_OP_VIEW || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE) {
|
||||
n_skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
test_object test;
|
||||
|
||||
test.op = node->op;
|
||||
test.type = node->type;
|
||||
memcpy(&test.ne, node->ne, 4 * sizeof(int64_t));
|
||||
|
||||
test.op_params.resize(GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
||||
memcpy(test.op_params.data(), node->op_params, GGML_MAX_OP_PARAMS);
|
||||
|
||||
for (size_t s = 0; s < GGML_MAX_SRC; s++) {
|
||||
if (node->src[s] == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
test.sources.emplace_back(node->src[s]->type, node->src[s]->ne, node->src[s]->nb);
|
||||
}
|
||||
|
||||
test.name = node->name;
|
||||
tests.insert(test);
|
||||
}
|
||||
|
||||
int n_new = (int) tests.size() - n_before;
|
||||
LOG_INF("%s: %d unique ops, %d total nodes, %d skipped (view ops)\n",
|
||||
label, n_new, n_nodes, n_skipped);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
common_params params;
|
||||
params.out_file = "tests.txt";
|
||||
|
||||
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_EXPORT_GRAPH_OPS)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
// Load CPU-only
|
||||
ggml_backend_dev_t cpu_device = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
params.devices = { cpu_device, nullptr };
|
||||
params.fit_params = false;
|
||||
params.n_gpu_layers = 0;
|
||||
|
||||
params.warmup = false;
|
||||
|
||||
auto init_result = common_init_from_params(params);
|
||||
|
||||
llama_context * ctx = init_result->context();
|
||||
|
||||
const uint32_t n_seqs = llama_n_seq_max(ctx);
|
||||
const uint32_t n_tokens = std::min(llama_n_ctx(ctx), llama_n_ubatch(ctx));
|
||||
|
||||
std::set<test_object> tests;
|
||||
|
||||
auto * gf_pp = llama_graph_reserve(ctx, n_tokens, n_seqs, n_tokens);
|
||||
if (!gf_pp) {
|
||||
throw std::runtime_error("failed to reserve prompt processing graph");
|
||||
}
|
||||
extract_graph_ops(gf_pp, "pp", tests);
|
||||
|
||||
auto * gf_tg = llama_graph_reserve(ctx, n_seqs, n_seqs, n_seqs);
|
||||
if (!gf_tg) {
|
||||
throw std::runtime_error("failed to reserve token generation graph");
|
||||
}
|
||||
extract_graph_ops(gf_tg, "tg", tests);
|
||||
|
||||
LOG_INF("%d unique ops total\n", (int) tests.size());
|
||||
|
||||
std::ofstream f(params.out_file);
|
||||
|
||||
if (!f.is_open()) {
|
||||
throw std::runtime_error("Unable to open output file");
|
||||
}
|
||||
|
||||
for (const auto& test : tests) {
|
||||
test.serialize(f);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -31,10 +31,12 @@
|
|||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <future>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <regex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
|
|
@ -6648,6 +6650,236 @@ struct test_diag : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// Deserializable generic test case
|
||||
struct input_tensor {
|
||||
ggml_type type;
|
||||
std::array<int64_t, 4> ne;
|
||||
std::array<size_t, 4> nb; // strides (0 = use default contiguous strides)
|
||||
};
|
||||
|
||||
static bool is_non_contiguous(const input_tensor & src) {
|
||||
if (src.nb[0] == 0) {
|
||||
return false;
|
||||
}
|
||||
const size_t default_nb0 = ggml_type_size(src.type);
|
||||
const size_t default_nb1 = default_nb0 * (src.ne[0] / ggml_blck_size(src.type));
|
||||
const size_t default_nb2 = default_nb1 * src.ne[1];
|
||||
const size_t default_nb3 = default_nb2 * src.ne[2];
|
||||
return src.nb[0] != default_nb0 ||
|
||||
src.nb[1] != default_nb1 ||
|
||||
src.nb[2] != default_nb2 ||
|
||||
src.nb[3] != default_nb3;
|
||||
}
|
||||
|
||||
static std::string var_to_str(const std::vector<input_tensor>& sources) {
|
||||
std::ostringstream oss;
|
||||
bool first = true;
|
||||
for (const auto& src : sources) {
|
||||
if (!first) oss << ",";
|
||||
oss << ggml_type_name(src.type) << "[" << src.ne[0] << "," << src.ne[1] << "," << src.ne[2] << "," << src.ne[3] << "]";
|
||||
if (is_non_contiguous(src)) {
|
||||
oss << "nb[" << src.nb[0] << "," << src.nb[1] << "," << src.nb[2] << "," << src.nb[3] << "]";
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
static std::string var_to_str(const std::array<int32_t, GGML_MAX_OP_PARAMS / sizeof(int32_t)>& params) {
|
||||
std::ostringstream oss;
|
||||
oss << "[";
|
||||
bool first = true;
|
||||
for (size_t i = 0; i < params.size(); ++i) {
|
||||
if (params[i] != 0) {
|
||||
if (!first) oss << ",";
|
||||
oss << i << ":" << params[i];
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
oss << "]";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
|
||||
struct test_generic_op : public test_case {
|
||||
const ggml_op op;
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const std::array<int32_t, GGML_MAX_OP_PARAMS / sizeof(int32_t)> op_params;
|
||||
|
||||
const std::vector<input_tensor> sources;
|
||||
const std::string name;
|
||||
|
||||
std::string vars() override {
|
||||
if (name.empty()) {
|
||||
return VARS_TO_STR4(type, ne, op_params, sources);
|
||||
}
|
||||
|
||||
return VARS_TO_STR5(name, type, ne, op_params, sources);
|
||||
}
|
||||
|
||||
test_generic_op(ggml_op op, ggml_type type, std::array<int64_t, 4> ne,
|
||||
std::array<int32_t, GGML_MAX_OP_PARAMS / sizeof(int32_t)> op_params,
|
||||
std::vector<input_tensor> sources, std::string name = "")
|
||||
: op(op), type(type), ne(ne), op_params(op_params), sources(sources), name(std::move(name)) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
const size_t source_count = std::min(sources.size(), (size_t)GGML_MAX_SRC);
|
||||
|
||||
std::array<ggml_tensor *, GGML_MAX_SRC> source_tensors;
|
||||
for (size_t i = 0; i < source_count; ++i) {
|
||||
const input_tensor& src = sources[i];
|
||||
|
||||
if (is_non_contiguous(src)) {
|
||||
size_t total_size;
|
||||
const size_t blck_size = ggml_blck_size(src.type);
|
||||
if (blck_size == 1) {
|
||||
total_size = ggml_type_size(src.type);
|
||||
for (int d = 0; d < 4; d++) {
|
||||
total_size += (src.ne[d] - 1) * src.nb[d];
|
||||
}
|
||||
} else {
|
||||
total_size = src.ne[0] * src.nb[0] / blck_size;
|
||||
for (int d = 1; d < 4; d++) {
|
||||
total_size += (src.ne[d] - 1) * src.nb[d];
|
||||
}
|
||||
}
|
||||
|
||||
// Convert bytes to elements, padded to block size for quantized types
|
||||
const size_t type_size = ggml_type_size(src.type);
|
||||
size_t backing_elements = (total_size * blck_size + type_size - 1) / type_size;
|
||||
backing_elements = ((backing_elements + blck_size - 1) / blck_size) * blck_size;
|
||||
ggml_tensor * backing = ggml_new_tensor_1d(ctx, src.type, backing_elements);
|
||||
source_tensors[i] = ggml_view_4d(ctx, backing,
|
||||
src.ne[0], src.ne[1], src.ne[2], src.ne[3],
|
||||
src.nb[1], src.nb[2], src.nb[3], 0);
|
||||
// nb[0] does not get set by view_4d, so set it manually
|
||||
source_tensors[i]->nb[0] = src.nb[0];
|
||||
} else {
|
||||
source_tensors[i] = ggml_new_tensor_4d(ctx, src.type, src.ne[0], src.ne[1], src.ne[2], src.ne[3]);
|
||||
}
|
||||
}
|
||||
|
||||
// Ops with an inplace flag create a view of src[0] as their output.
|
||||
bool inplace = false;
|
||||
if (op == GGML_OP_SET || op == GGML_OP_ACC) {
|
||||
inplace = op_params[4] != 0;
|
||||
} else if (op == GGML_OP_ADD_REL_POS) {
|
||||
inplace = op_params[0] != 0;
|
||||
}
|
||||
|
||||
ggml_tensor * out;
|
||||
if (inplace && source_count > 0) {
|
||||
out = ggml_view_tensor(ctx, source_tensors[0]);
|
||||
} else {
|
||||
out = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
}
|
||||
out->op = op;
|
||||
for (size_t i = 0; i < source_count; ++i) {
|
||||
out->src[i] = source_tensors[i];
|
||||
}
|
||||
|
||||
memcpy(out->op_params, op_params.data(), GGML_MAX_OP_PARAMS);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
switch (op) {
|
||||
case GGML_OP_MUL_MAT:
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
case GGML_OP_OUT_PROD:
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
case GGML_OP_IM2COL:
|
||||
case GGML_OP_CONV_2D:
|
||||
case GGML_OP_CONV_3D:
|
||||
case GGML_OP_SET_ROWS:
|
||||
case GGML_OP_CPY:
|
||||
return 5e-4;
|
||||
case GGML_OP_SOFT_MAX:
|
||||
return 1e-6;
|
||||
case GGML_OP_RWKV_WKV7:
|
||||
return 5e-3;
|
||||
case GGML_OP_FLASH_ATTN_EXT:
|
||||
{
|
||||
// Scale error with kv length to account for accumulating floating point error
|
||||
const int64_t kv = sources[1].ne[1];
|
||||
return 5e-4 * std::max(1.0, kv / 20000.0);
|
||||
}
|
||||
default:
|
||||
return 1e-7;
|
||||
}
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
ggml_tensor * out = ggml_get_tensor(ctx, "out");
|
||||
|
||||
std::random_device rd;
|
||||
std::default_random_engine rng(rd());
|
||||
|
||||
for (size_t i = 0; i < sources.size() && i < GGML_MAX_SRC; i++) {
|
||||
ggml_tensor * t = out->src[i];
|
||||
if (!t) {
|
||||
break;
|
||||
}
|
||||
|
||||
// FLASH_ATTN_EXT: src[3] is the KQ mask
|
||||
if (op == GGML_OP_FLASH_ATTN_EXT && i == 3) {
|
||||
init_tensor_kq_mask(t);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (t->type == GGML_TYPE_I32 || t->type == GGML_TYPE_I64) {
|
||||
if (op == GGML_OP_GET_ROWS || op == GGML_OP_GET_ROWS_BACK) {
|
||||
const int64_t num_rows = sources[0].ne[1];
|
||||
const int64_t nels = ggml_nelements(t);
|
||||
std::vector<int32_t> data(nels);
|
||||
std::uniform_int_distribution<int32_t> dist(0, num_rows - 1);
|
||||
for (int64_t i = 0; i < nels; i++) {
|
||||
data[i] = dist(rng);
|
||||
}
|
||||
ggml_backend_tensor_set(t, data.data(), 0, nels * sizeof(int32_t));
|
||||
} else if (op == GGML_OP_SET_ROWS) {
|
||||
init_set_rows_row_ids(t, ne[1]);
|
||||
} else if (op == GGML_OP_ROPE) {
|
||||
const int mode = op_params[2];
|
||||
const int64_t nels = (mode & GGML_ROPE_TYPE_MROPE) ? ne[2] * 4 : ne[2];
|
||||
std::vector<int32_t> data(nels);
|
||||
std::uniform_int_distribution<int32_t> dist(0, ne[2] - 1);
|
||||
for (int64_t i = 0; i < nels; i++) {
|
||||
data[i] = dist(rng);
|
||||
}
|
||||
ggml_backend_tensor_set(t, data.data(), 0, nels * sizeof(int32_t));
|
||||
} else if (op == GGML_OP_MUL_MAT_ID || op == GGML_OP_ADD_ID) {
|
||||
const int64_t n_expert = (op == GGML_OP_MUL_MAT_ID) ? sources[0].ne[2] : sources[1].ne[1];
|
||||
for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
||||
std::vector<int32_t> data(t->ne[0]);
|
||||
for (int32_t i = 0; i < t->ne[0]; i++) {
|
||||
data[i] = i % n_expert;
|
||||
}
|
||||
std::shuffle(data.begin(), data.end(), rng);
|
||||
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(int32_t));
|
||||
}
|
||||
} else if (op == GGML_OP_SSM_SCAN) {
|
||||
for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
||||
std::vector<int32_t> data(t->ne[0]);
|
||||
for (int32_t i = 0; i < t->ne[0]; i++) {
|
||||
data[i] = i;
|
||||
}
|
||||
std::shuffle(data.begin(), data.end(), rng);
|
||||
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(int32_t));
|
||||
}
|
||||
} else {
|
||||
init_tensor_uniform(t);
|
||||
}
|
||||
} else {
|
||||
init_tensor_uniform(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum llm_norm_type {
|
||||
LLM_NORM,
|
||||
|
|
@ -8751,8 +8983,72 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
return test_cases;
|
||||
}
|
||||
|
||||
static std::vector<std::unique_ptr<test_case>> make_test_cases_from_file(const char * path) {
|
||||
std::ifstream f(path);
|
||||
|
||||
if (!f.is_open()) {
|
||||
throw std::runtime_error("Unable to read test file");
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<test_case>> test_cases;
|
||||
|
||||
std::string line;
|
||||
|
||||
while (std::getline(f, line)) {
|
||||
std::istringstream iss(line);
|
||||
|
||||
ggml_op op;
|
||||
ggml_type type;
|
||||
std::array<int64_t, 4> ne;
|
||||
std::array<int32_t, GGML_MAX_OP_PARAMS / sizeof(int32_t)> op_params = {};
|
||||
std::string name;
|
||||
uint64_t tmp;
|
||||
|
||||
iss >> tmp;
|
||||
op = (ggml_op)tmp;
|
||||
iss >> tmp;
|
||||
type = (ggml_type)tmp;
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
iss >> ne[i];
|
||||
}
|
||||
|
||||
iss >> tmp;
|
||||
for (size_t i = 0; i < tmp && i < op_params.size(); i++) {
|
||||
iss >> op_params[i];
|
||||
}
|
||||
|
||||
iss >> tmp;
|
||||
|
||||
size_t num_src = std::min((uint64_t)GGML_MAX_SRC, tmp);
|
||||
std::vector<input_tensor> sources(num_src);
|
||||
for (size_t i = 0; i < num_src; i++) {
|
||||
input_tensor& src = sources[i];
|
||||
iss >> tmp;
|
||||
src.type = (ggml_type)tmp;
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
iss >> src.ne[i];
|
||||
}
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
iss >> src.nb[i];
|
||||
}
|
||||
}
|
||||
|
||||
iss >> name;
|
||||
|
||||
if (name.length() == 1 && name[0] == '-') {
|
||||
name = "";
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_generic_op(op, type, ne, op_params, sources, std::move(name)));
|
||||
}
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_names_filter, const char * params_filter,
|
||||
printer * output_printer) {
|
||||
printer * output_printer, const char * test_file_path) {
|
||||
auto filter_test_cases = [](std::vector<std::unique_ptr<test_case>> & test_cases, const char * params_filter) {
|
||||
if (params_filter == nullptr) {
|
||||
return;
|
||||
|
|
@ -8770,9 +9066,26 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
}
|
||||
};
|
||||
|
||||
std::vector<std::unique_ptr<test_case>> test_cases;
|
||||
|
||||
if (test_file_path == nullptr) {
|
||||
switch (mode) {
|
||||
case MODE_TEST:
|
||||
case MODE_GRAD:
|
||||
case MODE_SUPPORT:
|
||||
test_cases = make_test_cases_eval();
|
||||
break;
|
||||
case MODE_PERF:
|
||||
test_cases = make_test_cases_perf();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
test_cases = make_test_cases_from_file(test_file_path);
|
||||
}
|
||||
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
|
||||
if (mode == MODE_TEST) {
|
||||
auto test_cases = make_test_cases_eval();
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
|
||||
if (backend_cpu == NULL) {
|
||||
test_operation_info info("", "", "CPU");
|
||||
|
|
@ -8812,8 +9125,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
}
|
||||
|
||||
if (mode == MODE_GRAD) {
|
||||
auto test_cases = make_test_cases_eval();
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
size_t n_ok = 0;
|
||||
for (auto & test : test_cases) {
|
||||
if (test->eval_grad(backend, op_names_filter, output_printer)) {
|
||||
|
|
@ -8826,8 +9137,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
}
|
||||
|
||||
if (mode == MODE_PERF) {
|
||||
auto test_cases = make_test_cases_perf();
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
for (auto & test : test_cases) {
|
||||
test->eval_perf(backend, op_names_filter, output_printer);
|
||||
}
|
||||
|
|
@ -8835,9 +9144,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
|||
}
|
||||
|
||||
if (mode == MODE_SUPPORT) {
|
||||
auto test_cases = make_test_cases_eval();
|
||||
filter_test_cases(test_cases, params_filter);
|
||||
|
||||
// Filter out fusion cases
|
||||
test_cases.erase(
|
||||
std::remove_if(test_cases.begin(), test_cases.end(), [](const std::unique_ptr<test_case> & tc) {
|
||||
|
|
@ -8956,7 +9262,8 @@ static void show_test_coverage() {
|
|||
}
|
||||
|
||||
static void usage(char ** argv) {
|
||||
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops] [--show-coverage]\n", argv[0]);
|
||||
printf("Usage: %s [mode] [-o <op,..>] [-b <backend>] [-p <params regex>] [--output <console|sql|csv>] [--list-ops]", argv[0]);
|
||||
printf(" [--show-coverage] [--test-file <path>]\n");
|
||||
printf(" valid modes:\n");
|
||||
printf(" - test (default, compare with CPU backend for correctness)\n");
|
||||
printf(" - grad (compare gradients from backpropagation with method of finite differences)\n");
|
||||
|
|
@ -8967,6 +9274,7 @@ static void usage(char ** argv) {
|
|||
printf(" --output specifies output format (default: console, options: console, sql, csv)\n");
|
||||
printf(" --list-ops lists all available GGML operations\n");
|
||||
printf(" --show-coverage shows test coverage\n");
|
||||
printf(" --test-file reads test operators from a test file generated by llama-export-graph-ops\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
|
|
@ -8975,6 +9283,7 @@ int main(int argc, char ** argv) {
|
|||
const char * op_names_filter = nullptr;
|
||||
const char * backend_filter = nullptr;
|
||||
const char * params_filter = nullptr;
|
||||
const char * test_file_path = nullptr;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "test") == 0) {
|
||||
|
|
@ -9022,6 +9331,13 @@ int main(int argc, char ** argv) {
|
|||
} else if (strcmp(argv[i], "--show-coverage") == 0) {
|
||||
show_test_coverage();
|
||||
return 0;
|
||||
} else if (strcmp(argv[i], "--test-file") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
test_file_path = argv[++i];
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
|
|
@ -9074,7 +9390,7 @@ int main(int argc, char ** argv) {
|
|||
false, "", ggml_backend_dev_description(dev),
|
||||
total / 1024 / 1024, free / 1024 / 1024, true));
|
||||
|
||||
bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get());
|
||||
bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get(), test_file_path);
|
||||
|
||||
if (ok) {
|
||||
n_ok++;
|
||||
|
|
|
|||
Loading…
Reference in New Issue