minor updates

This commit is contained in:
bssrdf 2025-10-08 13:38:16 -04:00
parent 53a2ccbe12
commit c6255442bb
2 changed files with 69 additions and 13 deletions

View File

@ -39,6 +39,7 @@
#include <string_view>
#include <thread>
#include <vector>
#include <map>
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
size_t nels = ggml_nelements(tensor);
@ -6725,14 +6726,66 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
}
}
for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
for (auto act_case : cases) {
// Direct CONV_2D
test_cases.emplace_back(new test_conv_2d_implicit(
{ act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
{ act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
kernel_type, 1, 1, 0, 0, 1, 1, false));
}
// for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
// for (auto act_case : cases) {
// // Direct CONV_2D
// test_cases.emplace_back(new test_conv_2d_implicit(
// { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
// { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
// kernel_type, 1, 1, 0, 0, 1, 1, false));
// }
// }
// Stable-diffusion layers
std::map<std::string, uint32_t> idx_sd{
{ "iw", 0 },
{ "ih", 1 },
{ "kw", 2 },
{ "kh", 3 },
{ "Cout", 4 },
{ "Cin", 5 },
{ "B", 6 },
};
// Input image size
uint32_t w = 768;
uint32_t h = 1024;
// Number of filters (base)
uint32_t Cout_b = 128;
uint32_t Cin_b = 128;
std::vector<std::array<uint32_t, 7>> cases_sd = {
{ w / 8, h / 8, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x10 (called 10 times)
{ w / 4, h / 4, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x7
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, // x5
{ w, h, 3, 3, Cout_b, Cin_b, 1 }, // x5
{ w / 8, h / 8, 1, 1, Cout_b * 4, Cin_b * 4, 1 }, // x4
{ w / 8, h / 8, 1, 1, 4, 4, 1 },
{ w / 8, h / 8, 3, 3, Cout_b * 4, 4, 1 },
{ w / 2, h / 2, 3, 3, Cout_b * 4, Cin_b * 4, 1 },
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 4, 1 },
{ w / 2, h / 2, 1, 1, Cout_b * 2, Cin_b * 4, 1 },
{ w, h, 3, 3, Cout_b, Cin_b * 2, 1 },
{ w, h, 1, 1, Cout_b, Cin_b * 2, 1 },
{ w, h, 3, 3, Cout_b * 2, Cin_b * 2, 1 },
{ w, h, 3, 3, 3, Cin_b, 1 },
};
for (auto act_case : cases_sd) {
GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1);
GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1);
uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0;
uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0;
test_cases.emplace_back(new test_conv_2d_implicit(
{ act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] },
{ act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] },
GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false));
}

View File

@ -63,8 +63,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
size_t buffer_size = 0;
{
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
buffer_size += IW * IH * IC * N * ggml_type_size(GGML_TYPE_F32); // tensor b
buffer_size += 1024; // overhead
}
@ -112,7 +112,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
model.ctx = ggml_init(params);
// create tensors
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
// model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, KW, KH, IC, OC);
model.b = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, IW, IH, IC, N);
// create a allocator
@ -123,9 +124,11 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
// load data to buffer
if(ggml_backend_is_cpu(model.backend)) {
memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
// memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
memcpy(model.a->data, adata.data(), ggml_nbytes(model.a));
} else {
ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
// ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
ggml_backend_tensor_set(model.a, adata.data(), 0, ggml_nbytes(model.a));
}
// alloc memory