diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 1b9e8a2464..3c4388f8a5 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -39,6 +39,7 @@ #include #include #include +#include static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { size_t nels = ggml_nelements(tensor); @@ -6725,14 +6726,66 @@ static std::vector> make_test_cases_perf() { } } - for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) { - for (auto act_case : cases) { - // Direct CONV_2D - test_cases.emplace_back(new test_conv_2d_implicit( - { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] }, - { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, - kernel_type, 1, 1, 0, 0, 1, 1, false)); - } + // for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) { + // for (auto act_case : cases) { + // // Direct CONV_2D + // test_cases.emplace_back(new test_conv_2d_implicit( + // { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] }, + // { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] }, + // kernel_type, 1, 1, 0, 0, 1, 1, false)); + // } + // } + + // Stable-diffusion layers + std::map idx_sd{ + { "iw", 0 }, + { "ih", 1 }, + { "kw", 2 }, + { "kh", 3 }, + { "Cout", 4 }, + { "Cin", 5 }, + { "B", 6 }, + }; + + // Input image size + uint32_t w = 768; + uint32_t h = 1024; + + // Number of filters (base) + uint32_t Cout_b = 128; + uint32_t Cin_b = 128; + + std::vector> cases_sd = { + { w / 8, h / 8, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x10 (called 10 times) + { w / 4, h / 4, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x7 + { w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, // x5 + { w, h, 3, 3, Cout_b, Cin_b, 1 }, // x5 + { w / 8, h / 8, 1, 1, Cout_b * 4, Cin_b * 4, 1 }, // x4 + { w / 8, h / 8, 1, 1, 4, 4, 1 }, + { w / 8, h / 8, 3, 3, Cout_b * 4, 4, 1 }, + + { w / 2, h / 2, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, + { w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 4, 1 }, + { w / 2, h / 2, 1, 1, Cout_b * 2, Cin_b * 4, 1 }, + + { w, h, 3, 3, Cout_b, Cin_b * 2, 1 }, + { w, h, 1, 1, Cout_b, Cin_b * 2, 1 }, + { w, h, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, + + { w, h, 3, 3, 3, Cin_b, 1 }, + }; + + for (auto act_case : cases_sd) { + GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1); + GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1); + + uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0; + uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0; + + test_cases.emplace_back(new test_conv_2d_implicit( + { act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] }, + { act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] }, + GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false)); } diff --git a/tests/test-conv2d-implicit.cpp b/tests/test-conv2d-implicit.cpp index 6077299cb4..e963e2b361 100644 --- a/tests/test-conv2d-implicit.cpp +++ b/tests/test-conv2d-implicit.cpp @@ -63,8 +63,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu size_t buffer_size = 0; { - // buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a - buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a + buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a + // buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a buffer_size += IW * IH * IC * N * ggml_type_size(GGML_TYPE_F32); // tensor b buffer_size += 1024; // overhead } @@ -112,7 +112,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu model.ctx = ggml_init(params); // create tensors - model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC); + // model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC); + model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, KW, KH, IC, OC); model.b = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, IW, IH, IC, N); // create a allocator @@ -123,9 +124,11 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu // load data to buffer if(ggml_backend_is_cpu(model.backend)) { - memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a)); + // memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a)); + memcpy(model.a->data, adata.data(), ggml_nbytes(model.a)); } else { - ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a)); + // ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a)); + ggml_backend_tensor_set(model.a, adata.data(), 0, ggml_nbytes(model.a)); } // alloc memory