now bank conflicts free and performance get a bit boosted too
This commit is contained in:
parent
949eca4cba
commit
8809af79a8
|
|
@ -677,14 +677,12 @@ static __global__ void conv2d_implicit_kernel(const half * __restrict__ input,
|
|||
uint32_t (®_)[2] = reinterpret_cast<uint32_t(&)[2]>(acc_register_[mma_m][mma_n]);
|
||||
uint idx = output_sts_addr +
|
||||
mma_m * MMA_M * BN / 2 + (mma_n - i * mma_tiles_per_warp_n/2) * MMA_N;
|
||||
uint idx8 = idx + 8 * BN / 2;
|
||||
idx = idx ^ ((idx & 0b110000000000) >> 9);
|
||||
idx = idx ^ ((idx & 0b1110000000) >> 4);
|
||||
uint32_t* dst_ptr = reinterpret_cast<uint32_t*>(&smemoutput[idx]);
|
||||
dst_ptr[0] = reg_[0];
|
||||
idx8 = idx8 ^ ((idx8 & 0b110000000000) >> 9);
|
||||
idx8 = idx8 ^ ((idx8 & 0b1110000000) >> 4);
|
||||
dst_ptr = reinterpret_cast<uint32_t*>(&smemoutput[idx8]);
|
||||
idx = (idx + 8 * BN / 2 ) ^ 0b010;
|
||||
dst_ptr = reinterpret_cast<uint32_t*>(&smemoutput[idx]);
|
||||
dst_ptr[0] = reg_[1];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,353 +300,355 @@ int main(void)
|
|||
|
||||
double time_iter0 = 0.0, time_iter1 = 0.0;
|
||||
std::vector<std::tuple<int, int, int, int, int, int>> configs = {
|
||||
// std::make_tuple(64,64,48,64,3,3),
|
||||
// std::make_tuple(320,320,104,152,3,3),
|
||||
// std::make_tuple(640,640,52,76,3,3),
|
||||
// std::make_tuple(640,640,104,152,3,3),
|
||||
// std::make_tuple(960,320,104,152,3,3),
|
||||
std::make_tuple(64,64,48,64,3,3),
|
||||
std::make_tuple(320,320,104,152,3,3),
|
||||
std::make_tuple(640,640,52,76,3,3),
|
||||
std::make_tuple(640,640,104,152,3,3),
|
||||
std::make_tuple(960,320,104,152,3,3),
|
||||
std::make_tuple(1280,1280,26,38,3,3),
|
||||
// std::make_tuple(4,320,96,128,3,3),
|
||||
// std::make_tuple(320,4,96,128,3,3),
|
||||
// std::make_tuple(4,320,64,96,3,3),
|
||||
// std::make_tuple(320,4,64,96,3,3),
|
||||
// std::make_tuple(640,640,96,128,3,3),
|
||||
// std::make_tuple(1280,1280,26,38,1,1),
|
||||
// std::make_tuple(256,128,768,1024,3,3),
|
||||
// std::make_tuple(128,3,768,1024,3,3),
|
||||
// std::make_tuple(256,128,768,1024,1,1),
|
||||
// std::make_tuple(512,256,384,512,1,1),
|
||||
// std::make_tuple(1280,640,52,76,3,3),
|
||||
// std::make_tuple(1920,1280,26,38,3,3),
|
||||
// std::make_tuple(2560,1280,26,38,3,3),
|
||||
// std::make_tuple(320,1280,26,38,3,3),
|
||||
// std::make_tuple(512,512,104,152,3,3),
|
||||
// std::make_tuple(512,512,208,304,3,3),
|
||||
// std::make_tuple(512,256,416,608,3,3),
|
||||
// std::make_tuple(256,128,832,1216,3,3),
|
||||
// std::make_tuple(256,256,832,1216,3,3),
|
||||
std::make_tuple(4,320,96,128,3,3),
|
||||
std::make_tuple(320,4,96,128,3,3),
|
||||
std::make_tuple(4,320,64,96,3,3),
|
||||
std::make_tuple(320,4,64,96,3,3),
|
||||
std::make_tuple(640,640,96,128,3,3),
|
||||
std::make_tuple(1280,1280,26,38,1,1),
|
||||
std::make_tuple(256,128,768,1024,3,3),
|
||||
std::make_tuple(128,3,768,1024,3,3),
|
||||
std::make_tuple(256,128,768,1024,1,1),
|
||||
std::make_tuple(512,256,384,512,1,1),
|
||||
std::make_tuple(1280,640,52,76,3,3),
|
||||
std::make_tuple(1920,1280,26,38,3,3),
|
||||
std::make_tuple(2560,1280,26,38,3,3),
|
||||
std::make_tuple(320,1280,26,38,3,3),
|
||||
std::make_tuple(512,512,104,152,3,3),
|
||||
std::make_tuple(512,512,208,304,3,3),
|
||||
std::make_tuple(512,256,416,608,3,3),
|
||||
std::make_tuple(256,128,832,1216,3,3),
|
||||
std::make_tuple(256,256,832,1216,3,3),
|
||||
// std::make_tuple(320,256,1024,1920)
|
||||
// std::make_tuple(32,64,58,58,3,3)
|
||||
|
||||
std::make_tuple(32,64,58,58,3,3)
|
||||
};
|
||||
std::vector<std::tuple<int, int, int, int, int, int>> configs_sdxl_512 = {
|
||||
//512x512
|
||||
// std::make_tuple(4,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(320,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(640,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1920,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1920,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(1920,640,32,32,3,3),
|
||||
// std::make_tuple(1280,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(1280,640,32,32,3,3),
|
||||
// std::make_tuple(960,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(960,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(960,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(960,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,4,64,64,3,3),
|
||||
// std::make_tuple(4,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(320,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(320,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(640,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(640,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(2560,1280,16,16,3,3),
|
||||
// std::make_tuple(1920,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,16,16,3,3),
|
||||
// std::make_tuple(1920,1280,16,16,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(1920,640,32,32,3,3),
|
||||
// std::make_tuple(1280,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(1280,640,32,32,3,3),
|
||||
// std::make_tuple(960,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,32,32,3,3),
|
||||
// std::make_tuple(960,640,32,32,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(960,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(960,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,320,64,64,3,3),
|
||||
// std::make_tuple(640,320,64,64,3,3),
|
||||
// std::make_tuple(320,4,64,64,3,3),
|
||||
std::make_tuple(4,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(320,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(640,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1920,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1920,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1920,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(1920,640,32,32,3,3),
|
||||
std::make_tuple(1280,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(1280,640,32,32,3,3),
|
||||
std::make_tuple(960,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(960,640,32,32,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(960,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(960,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,4,64,64,3,3),
|
||||
std::make_tuple(4,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(320,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(320,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(640,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(640,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(2560,1280,16,16,3,3),
|
||||
std::make_tuple(1920,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,16,16,3,3),
|
||||
std::make_tuple(1920,1280,16,16,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1920,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(1920,640,32,32,3,3),
|
||||
std::make_tuple(1280,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(1280,640,32,32,3,3),
|
||||
std::make_tuple(960,640,32,32,3,3),
|
||||
std::make_tuple(640,640,32,32,3,3),
|
||||
std::make_tuple(960,640,32,32,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(960,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(960,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,320,64,64,3,3),
|
||||
std::make_tuple(640,320,64,64,3,3),
|
||||
std::make_tuple(320,4,64,64,3,3)
|
||||
};
|
||||
|
||||
std::vector<std::tuple<int, int, int, int, int, int>> configs_sdxl_768 = {
|
||||
//768x768
|
||||
// std::make_tuple(4,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(320,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(640,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1920,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1920,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,48,48,3,3),
|
||||
// std::make_tuple(1920,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(1920,640,48,48,3,3),
|
||||
// std::make_tuple(1280,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(1280,640,48,48,3,3),
|
||||
// std::make_tuple(960,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(960,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,96,96,3,3),
|
||||
// std::make_tuple(960,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(960,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,4,96,96,3,3),
|
||||
// std::make_tuple(4,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(320,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(320,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(640,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(640,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(2560,1280,24,24,3,3),
|
||||
// std::make_tuple(1920,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,24,24,3,3),
|
||||
// std::make_tuple(1920,1280,24,24,3,3),
|
||||
// std::make_tuple(1280,1280,48,48,3,3),
|
||||
// std::make_tuple(1920,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(1920,640,48,48,3,3),
|
||||
// std::make_tuple(1280,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(1280,640,48,48,3,3),
|
||||
// std::make_tuple(960,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,48,48,3,3),
|
||||
// std::make_tuple(960,640,48,48,3,3),
|
||||
// std::make_tuple(640,640,96,96,3,3),
|
||||
// std::make_tuple(960,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(960,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,320,96,96,3,3),
|
||||
// std::make_tuple(640,320,96,96,3,3),
|
||||
// std::make_tuple(320,4,96,96,3,3),
|
||||
|
||||
std::make_tuple(4,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(320,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(640,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1920,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1920,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,48,48,3,3),
|
||||
std::make_tuple(1920,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(1920,640,48,48,3,3),
|
||||
std::make_tuple(1280,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(1280,640,48,48,3,3),
|
||||
std::make_tuple(960,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(960,640,48,48,3,3),
|
||||
std::make_tuple(640,640,96,96,3,3),
|
||||
std::make_tuple(960,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(960,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,4,96,96,3,3),
|
||||
std::make_tuple(4,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(320,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(320,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(640,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(640,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(2560,1280,24,24,3,3),
|
||||
std::make_tuple(1920,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,24,24,3,3),
|
||||
std::make_tuple(1920,1280,24,24,3,3),
|
||||
std::make_tuple(1280,1280,48,48,3,3),
|
||||
std::make_tuple(1920,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(1920,640,48,48,3,3),
|
||||
std::make_tuple(1280,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(1280,640,48,48,3,3),
|
||||
std::make_tuple(960,640,48,48,3,3),
|
||||
std::make_tuple(640,640,48,48,3,3),
|
||||
std::make_tuple(960,640,48,48,3,3),
|
||||
std::make_tuple(640,640,96,96,3,3),
|
||||
std::make_tuple(960,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(960,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,320,96,96,3,3),
|
||||
std::make_tuple(640,320,96,96,3,3),
|
||||
std::make_tuple(320,4,96,96,3,3),
|
||||
};
|
||||
|
||||
std::vector<std::tuple<int, int, int, int, int, int>> configs_sdxl_1024 = {
|
||||
//1024x1024
|
||||
// std::make_tuple(4,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(320,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(640,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,64,64,3,3),
|
||||
// std::make_tuple(1920,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(1920,640,64,64,3,3),
|
||||
// std::make_tuple(1280,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(1280,640,64,64,3,3),
|
||||
// std::make_tuple(960,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(960,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,128,128,3,3),
|
||||
// std::make_tuple(960,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(960,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,4,128,128,3,3),
|
||||
// std::make_tuple(4,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(320,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(320,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(640,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(640,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(2560,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,32,32,3,3),
|
||||
// std::make_tuple(1920,1280,32,32,3,3),
|
||||
// std::make_tuple(1280,1280,64,64,3,3),
|
||||
// std::make_tuple(1920,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(1920,640,64,64,3,3),
|
||||
// std::make_tuple(1280,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(1280,640,64,64,3,3),
|
||||
// std::make_tuple(960,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,64,64,3,3),
|
||||
// std::make_tuple(960,640,64,64,3,3),
|
||||
// std::make_tuple(640,640,128,128,3,3),
|
||||
// std::make_tuple(960,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(960,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,320,128,128,3,3),
|
||||
// std::make_tuple(640,320,128,128,3,3),
|
||||
// std::make_tuple(320,4,128,128,3,3),
|
||||
|
||||
|
||||
std::make_tuple(4,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(320,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(640,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1920,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1920,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,64,64,3,3),
|
||||
std::make_tuple(1920,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(1920,640,64,64,3,3),
|
||||
std::make_tuple(1280,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(1280,640,64,64,3,3),
|
||||
std::make_tuple(960,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(960,640,64,64,3,3),
|
||||
std::make_tuple(640,640,128,128,3,3),
|
||||
std::make_tuple(960,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(960,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,4,128,128,3,3),
|
||||
std::make_tuple(4,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(320,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(320,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(640,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(640,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(2560,1280,32,32,3,3),
|
||||
std::make_tuple(1920,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,32,32,3,3),
|
||||
std::make_tuple(1920,1280,32,32,3,3),
|
||||
std::make_tuple(1280,1280,64,64,3,3),
|
||||
std::make_tuple(1920,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(1920,640,64,64,3,3),
|
||||
std::make_tuple(1280,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(1280,640,64,64,3,3),
|
||||
std::make_tuple(960,640,64,64,3,3),
|
||||
std::make_tuple(640,640,64,64,3,3),
|
||||
std::make_tuple(960,640,64,64,3,3),
|
||||
std::make_tuple(640,640,128,128,3,3),
|
||||
std::make_tuple(960,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(960,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,320,128,128,3,3),
|
||||
std::make_tuple(640,320,128,128,3,3),
|
||||
std::make_tuple(320,4,128,128,3,3)
|
||||
};
|
||||
|
||||
int k = 0;
|
||||
|
||||
for (auto c : configs){
|
||||
for (auto c : configs_sdxl_1024){
|
||||
test_model model;
|
||||
load_model(model, std::get<0>(c), std::get<1>(c), std::get<2>(c),
|
||||
std::get<3>(c), std::get<4>(c), std::get<5>(c), true);
|
||||
|
|
@ -663,7 +665,7 @@ int main(void)
|
|||
// fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0f/1024.0f);
|
||||
|
||||
|
||||
int iterations = 0;
|
||||
int iterations = 20;
|
||||
|
||||
double run_time0;
|
||||
std::vector<float> im2col_data = compute_graph(model, allocr, build_graph_0, iterations, &run_time0);
|
||||
|
|
|
|||
Loading…
Reference in New Issue