tests: set tensor usage as weight for weight tensors
only for mul_mat and mul_mat_id ops Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
parent
1926e07e1a
commit
61ee32dec3
|
|
@ -1160,6 +1160,9 @@ struct test_case {
|
|||
|
||||
std::vector<ggml_tensor *> sentinels;
|
||||
|
||||
// Track weight tensors for separate buffer allocation with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
std::vector<ggml_tensor *> weight_tensors;
|
||||
|
||||
std::string current_op_name;
|
||||
|
||||
void add_sentinel(ggml_context * ctx) {
|
||||
|
|
@ -1238,6 +1241,8 @@ struct test_case {
|
|||
const char * op_names_filter,
|
||||
printer * output_printer) {
|
||||
mode = MODE_TEST;
|
||||
weight_tensors.clear();
|
||||
sentinels.clear();
|
||||
|
||||
ggml_init_params params = {
|
||||
/* .mem_size = */ ggml_tensor_overhead()*128 + ggml_graph_overhead(),
|
||||
|
|
@ -1288,10 +1293,35 @@ struct test_case {
|
|||
// post-graph sentinel
|
||||
add_sentinel(ctx);
|
||||
|
||||
// allocate
|
||||
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
ggml_backend_buffer_t weights_buf = nullptr;
|
||||
if (!weight_tensors.empty()) {
|
||||
// Calculate total size needed for weight tensors
|
||||
size_t weight_size = 0;
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend1), wt);
|
||||
}
|
||||
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend1)));
|
||||
|
||||
weights_buf = ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend1), weight_size);
|
||||
if (weights_buf == NULL) {
|
||||
printf("failed to allocate weight tensors [%s] ", ggml_backend_name(backend1));
|
||||
ggml_free(ctx);
|
||||
return test_status_t::FAIL;
|
||||
}
|
||||
ggml_backend_buffer_set_usage(weights_buf, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
// Allocate each weight tensor in the weights buffer
|
||||
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf);
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
ggml_tallocr_alloc(&weights_talloc, wt);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate remaining tensors
|
||||
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
|
||||
|
||||
if (buf == NULL) {
|
||||
if (buf == NULL && weights_buf == NULL) {
|
||||
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));
|
||||
ggml_free(ctx);
|
||||
return test_status_t::FAIL;
|
||||
|
|
@ -1385,6 +1415,9 @@ struct test_case {
|
|||
|
||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
|
||||
|
||||
if (weights_buf) {
|
||||
ggml_backend_buffer_free(weights_buf);
|
||||
}
|
||||
ggml_backend_buffer_free(buf);
|
||||
|
||||
ggml_free(ctx);
|
||||
|
|
@ -1404,6 +1437,7 @@ struct test_case {
|
|||
|
||||
bool eval_perf(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
|
||||
mode = MODE_PERF;
|
||||
weight_tensors.clear();
|
||||
|
||||
static const size_t graph_nodes = 8192;
|
||||
|
||||
|
|
@ -1432,10 +1466,34 @@ struct test_case {
|
|||
return true;
|
||||
}
|
||||
|
||||
// allocate
|
||||
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
ggml_backend_buffer_ptr weights_buf(nullptr); // smart ptr
|
||||
if (!weight_tensors.empty()) {
|
||||
// Calculate total size needed for weight tensors
|
||||
size_t weight_size = 0;
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend), wt);
|
||||
}
|
||||
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)));
|
||||
|
||||
weights_buf.reset(ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), weight_size));
|
||||
if (weights_buf == NULL) {
|
||||
printf("failed to allocate weight tensors\n");
|
||||
return false;
|
||||
}
|
||||
ggml_backend_buffer_set_usage(weights_buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
// Allocate each weight tensor in the weights buffer
|
||||
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf.get());
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
ggml_tallocr_alloc(&weights_talloc, wt);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate remaining tensors
|
||||
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
||||
|
||||
if (buf == NULL) {
|
||||
if (buf == NULL && weights_buf == NULL) {
|
||||
printf("failed to allocate tensors\n");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1534,6 +1592,7 @@ struct test_case {
|
|||
|
||||
bool eval_support(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
|
||||
mode = MODE_SUPPORT;
|
||||
weight_tensors.clear();
|
||||
|
||||
static const size_t graph_nodes = 8192;
|
||||
|
||||
|
|
@ -1569,6 +1628,7 @@ struct test_case {
|
|||
|
||||
bool eval_grad(ggml_backend_t backend, const char * op_names_filter, printer * output_printer) {
|
||||
mode = MODE_GRAD;
|
||||
weight_tensors.clear();
|
||||
const std::vector<float> expect = grad_expect();
|
||||
|
||||
ggml_init_params params = {
|
||||
|
|
@ -1679,9 +1739,35 @@ struct test_case {
|
|||
return true;
|
||||
}
|
||||
|
||||
// allocate
|
||||
// allocate weight tensors in a separate buffer with GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
ggml_backend_buffer_ptr weights_buf(nullptr); // smart ptr
|
||||
if (!weight_tensors.empty()) {
|
||||
// Calculate total size needed for weight tensors
|
||||
size_t weight_size = 0;
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
weight_size += ggml_backend_buft_get_alloc_size(ggml_backend_get_default_buffer_type(backend), wt);
|
||||
}
|
||||
weight_size = GGML_PAD(weight_size, ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)));
|
||||
|
||||
weights_buf.reset(ggml_backend_buft_alloc_buffer(ggml_backend_get_default_buffer_type(backend), weight_size));
|
||||
if (weights_buf == NULL) {
|
||||
test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend));
|
||||
info.set_error("weight allocation", "");
|
||||
output_printer->print_operation(info);
|
||||
return false;
|
||||
}
|
||||
ggml_backend_buffer_set_usage(weights_buf.get(), GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
// Allocate each weight tensor in the weights buffer
|
||||
ggml_tallocr weights_talloc = ggml_tallocr_new(weights_buf.get());
|
||||
for (ggml_tensor * wt : weight_tensors) {
|
||||
ggml_tallocr_alloc(&weights_talloc, wt);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate remaining tensors
|
||||
ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
||||
if (buf == NULL) {
|
||||
if (buf == NULL && weights_buf == NULL) {
|
||||
test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend));
|
||||
info.set_error("allocation", "");
|
||||
output_printer->print_operation(info);
|
||||
|
|
@ -3606,6 +3692,7 @@ struct test_mul_mat : public test_case {
|
|||
|
||||
a = ggml_new_tensor_4d(ctx, type_a, ne_a[per[0]], ne_a[per[1]], ne_a[per[2]], ne_a[per[3]]);
|
||||
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
||||
weight_tensors.push_back(a); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
if (!ggml_is_quantized(type_a)) {
|
||||
if (bs[1] == 1 && nr[1] == 1) {
|
||||
ggml_set_param(a);
|
||||
|
|
@ -3623,6 +3710,7 @@ struct test_mul_mat : public test_case {
|
|||
const int64_t k_physical = k_v == 0 ? k : k_v;
|
||||
a = ggml_new_tensor_4d(ctx, type_a, k_physical, m, bs[0], bs[1]);
|
||||
b = ggml_new_tensor_4d(ctx, type_b, k_physical, n, bs[0]*nr[0], bs[1]*nr[1]);
|
||||
weight_tensors.push_back(a); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
|
||||
if (!ggml_is_quantized(type_a)) {
|
||||
if (bs[1] == 1 && nr[1] == 1) {
|
||||
|
|
@ -3716,6 +3804,7 @@ struct test_mul_mat_id : public test_case {
|
|||
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
||||
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
|
||||
ggml_set_name(as, "as");
|
||||
weight_tensors.push_back(as); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
|
||||
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
|
||||
ggml_set_name(ids, "ids");
|
||||
|
|
@ -3776,6 +3865,7 @@ struct test_mul_mat_id_fusion : public test_case {
|
|||
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
||||
ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
|
||||
ggml_set_name(as, "as");
|
||||
weight_tensors.push_back(as); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
|
||||
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
|
||||
ggml_set_name(ids, "ids");
|
||||
|
|
@ -3792,6 +3882,7 @@ struct test_mul_mat_id_fusion : public test_case {
|
|||
|
||||
for (uint32_t i = 1; i < o; ++i) {
|
||||
ggml_tensor * a2 = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
|
||||
weight_tensors.push_back(a2); // Track weight tensor for GGML_BACKEND_BUFFER_USAGE_WEIGHTS
|
||||
ggml_tensor * out2 = ggml_mul_mat_id(ctx, a2, b, ids);
|
||||
ggml_set_name(out2, "out2");
|
||||
out = ggml_add(ctx, out, out2);
|
||||
|
|
@ -7861,9 +7952,24 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 64, 64, 2, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 79, 79, 5, 3 }, { 417, 79, 5, 3 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 80, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 79, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 2, 8 }, { 81, 80, 2, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 80, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 79, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 80, 80, 8, 8 }, { 81, 80, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 84, 84, 4, 4 }, { 32, 84, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 95, 95, 8, 8 }, { 40, 95, 8, 8 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 31, 128, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 4 }, { 32, 128, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 3, 4 }, { 32, 128, 3, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 32, 128, 4, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 200, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 384, 64, 4, 4 }));
|
||||
|
||||
for (bool v : {false, true}) {
|
||||
for (bool circular : {false, true}) {
|
||||
|
|
@ -8064,12 +8170,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, 2*16416));
|
||||
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 2 }, { 6, 64, 4, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 1 }, { 8, 128, 4, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 32, 64, 4, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 2 }, { 32, 128, 4, 2 }));
|
||||
// qwen3next with CHUNK_SIZE 64
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 8, 32 }, { 64, 64, 8, 32 }));
|
||||
// qwen3next with CHUNK_SIZE 128
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 128, 128, 4, 32 }, { 128, 128, 4, 32 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 256, 256, 4, 2 }, { 128, 256, 4, 2 }));
|
||||
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER, GGML_TYPE_F32, { 256, 256, 4, 4 }));
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG, GGML_TYPE_F32, { 1024, 1024, 8, 4 }));
|
||||
|
|
|
|||
Loading…
Reference in New Issue