minor updates
This commit is contained in:
parent
53a2ccbe12
commit
c6255442bb
|
|
@ -39,6 +39,7 @@
|
|||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
||||
size_t nels = ggml_nelements(tensor);
|
||||
|
|
@ -6725,14 +6726,66 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
}
|
||||
}
|
||||
|
||||
for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
||||
for (auto act_case : cases) {
|
||||
// Direct CONV_2D
|
||||
test_cases.emplace_back(new test_conv_2d_implicit(
|
||||
{ act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
|
||||
{ act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
|
||||
kernel_type, 1, 1, 0, 0, 1, 1, false));
|
||||
}
|
||||
// for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
||||
// for (auto act_case : cases) {
|
||||
// // Direct CONV_2D
|
||||
// test_cases.emplace_back(new test_conv_2d_implicit(
|
||||
// { act_case[iwh_idx], act_case[iwh_idx], act_case[Cin_idx], act_case[B_idx] },
|
||||
// { act_case[kwh_idx], act_case[kwh_idx], act_case[Cin_idx], act_case[Cout_idx] },
|
||||
// kernel_type, 1, 1, 0, 0, 1, 1, false));
|
||||
// }
|
||||
// }
|
||||
|
||||
// Stable-diffusion layers
|
||||
std::map<std::string, uint32_t> idx_sd{
|
||||
{ "iw", 0 },
|
||||
{ "ih", 1 },
|
||||
{ "kw", 2 },
|
||||
{ "kh", 3 },
|
||||
{ "Cout", 4 },
|
||||
{ "Cin", 5 },
|
||||
{ "B", 6 },
|
||||
};
|
||||
|
||||
// Input image size
|
||||
uint32_t w = 768;
|
||||
uint32_t h = 1024;
|
||||
|
||||
// Number of filters (base)
|
||||
uint32_t Cout_b = 128;
|
||||
uint32_t Cin_b = 128;
|
||||
|
||||
std::vector<std::array<uint32_t, 7>> cases_sd = {
|
||||
{ w / 8, h / 8, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x10 (called 10 times)
|
||||
{ w / 4, h / 4, 3, 3, Cout_b * 4, Cin_b * 4, 1 }, // x7
|
||||
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 2, 1 }, // x5
|
||||
{ w, h, 3, 3, Cout_b, Cin_b, 1 }, // x5
|
||||
{ w / 8, h / 8, 1, 1, Cout_b * 4, Cin_b * 4, 1 }, // x4
|
||||
{ w / 8, h / 8, 1, 1, 4, 4, 1 },
|
||||
{ w / 8, h / 8, 3, 3, Cout_b * 4, 4, 1 },
|
||||
|
||||
{ w / 2, h / 2, 3, 3, Cout_b * 4, Cin_b * 4, 1 },
|
||||
{ w / 2, h / 2, 3, 3, Cout_b * 2, Cin_b * 4, 1 },
|
||||
{ w / 2, h / 2, 1, 1, Cout_b * 2, Cin_b * 4, 1 },
|
||||
|
||||
{ w, h, 3, 3, Cout_b, Cin_b * 2, 1 },
|
||||
{ w, h, 1, 1, Cout_b, Cin_b * 2, 1 },
|
||||
{ w, h, 3, 3, Cout_b * 2, Cin_b * 2, 1 },
|
||||
|
||||
{ w, h, 3, 3, 3, Cin_b, 1 },
|
||||
};
|
||||
|
||||
for (auto act_case : cases_sd) {
|
||||
GGML_ASSERT(act_case[idx_sd["kw"]] == 3 || act_case[idx_sd["kw"]] == 1);
|
||||
GGML_ASSERT(act_case[idx_sd["kh"]] == 3 || act_case[idx_sd["kh"]] == 1);
|
||||
|
||||
uint32_t p0 = act_case[idx_sd["kw"]] == 3 ? 1 : 0;
|
||||
uint32_t p1 = act_case[idx_sd["kh"]] == 3 ? 1 : 0;
|
||||
|
||||
test_cases.emplace_back(new test_conv_2d_implicit(
|
||||
{ act_case[idx_sd["iw"]], act_case[idx_sd["ih"]], act_case[idx_sd["Cin"]], act_case[idx_sd["B"]] },
|
||||
{ act_case[idx_sd["kw"]], act_case[idx_sd["kh"]], act_case[idx_sd["Cin"]], act_case[idx_sd["Cout"]] },
|
||||
GGML_TYPE_F16, 1, 1, p0, p1, 1, 1, false));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -63,8 +63,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
|
|||
|
||||
size_t buffer_size = 0;
|
||||
{
|
||||
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
|
||||
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
|
||||
buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F32); // tensor a
|
||||
// buffer_size += KW * KH * IC * OC * ggml_type_size(GGML_TYPE_F16); // tensor a
|
||||
buffer_size += IW * IH * IC * N * ggml_type_size(GGML_TYPE_F32); // tensor b
|
||||
buffer_size += 1024; // overhead
|
||||
}
|
||||
|
|
@ -112,7 +112,8 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
|
|||
model.ctx = ggml_init(params);
|
||||
|
||||
// create tensors
|
||||
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
|
||||
// model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F16, KW, KH, IC, OC);
|
||||
model.a = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, KW, KH, IC, OC);
|
||||
model.b = ggml_new_tensor_4d(model.ctx, GGML_TYPE_F32, IW, IH, IC, N);
|
||||
|
||||
// create a allocator
|
||||
|
|
@ -123,9 +124,11 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
|
|||
|
||||
// load data to buffer
|
||||
if(ggml_backend_is_cpu(model.backend)) {
|
||||
memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
|
||||
// memcpy(model.a->data, hadata.data(), ggml_nbytes(model.a));
|
||||
memcpy(model.a->data, adata.data(), ggml_nbytes(model.a));
|
||||
} else {
|
||||
ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
|
||||
// ggml_backend_tensor_set(model.a, hadata.data(), 0, ggml_nbytes(model.a));
|
||||
ggml_backend_tensor_set(model.a, adata.data(), 0, ggml_nbytes(model.a));
|
||||
}
|
||||
|
||||
// alloc memory
|
||||
|
|
|
|||
Loading…
Reference in New Issue