minor update

This commit is contained in:
bssrdf 2025-11-06 22:31:28 -05:00
parent ba70ad8e59
commit 4e9ebe92e0
1 changed files with 207 additions and 207 deletions

View File

@ -329,108 +329,108 @@ int main(void)
// std::make_tuple(32,64,58,58,3,3)
//512x512
std::make_tuple(4,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(320,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(640,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1920,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1920,1280,16,16,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1920,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(1920,640,32,32,3,3),
std::make_tuple(1280,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(1280,640,32,32,3,3),
std::make_tuple(960,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(960,640,32,32,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(960,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(960,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,4,64,64,3,3),
std::make_tuple(4,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(320,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(320,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(640,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(640,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(2560,1280,16,16,3,3),
std::make_tuple(1920,1280,16,16,3,3),
std::make_tuple(1280,1280,16,16,3,3),
std::make_tuple(1920,1280,16,16,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1920,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(1920,640,32,32,3,3),
std::make_tuple(1280,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(1280,640,32,32,3,3),
std::make_tuple(960,640,32,32,3,3),
std::make_tuple(640,640,32,32,3,3),
std::make_tuple(960,640,32,32,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(960,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(960,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,320,64,64,3,3),
std::make_tuple(640,320,64,64,3,3),
std::make_tuple(320,4,64,64,3,3),
// std::make_tuple(4,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(320,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(640,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1920,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1920,1280,16,16,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1920,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(1920,640,32,32,3,3),
// std::make_tuple(1280,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(1280,640,32,32,3,3),
// std::make_tuple(960,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(960,640,32,32,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(960,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(960,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,4,64,64,3,3),
// std::make_tuple(4,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(320,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(320,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(640,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(640,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(2560,1280,16,16,3,3),
// std::make_tuple(1920,1280,16,16,3,3),
// std::make_tuple(1280,1280,16,16,3,3),
// std::make_tuple(1920,1280,16,16,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1920,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(1920,640,32,32,3,3),
// std::make_tuple(1280,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(1280,640,32,32,3,3),
// std::make_tuple(960,640,32,32,3,3),
// std::make_tuple(640,640,32,32,3,3),
// std::make_tuple(960,640,32,32,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(960,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(960,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,320,64,64,3,3),
// std::make_tuple(640,320,64,64,3,3),
// std::make_tuple(320,4,64,64,3,3),
//768x768
// std::make_tuple(4,320,96,96,3,3),
@ -538,108 +538,108 @@ int main(void)
//1024x1024
// std::make_tuple(4,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(320,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(640,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1920,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1920,1280,32,32,3,3),
// std::make_tuple(1280,1280,64,64,3,3),
// std::make_tuple(1920,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(1920,640,64,64,3,3),
// std::make_tuple(1280,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(1280,640,64,64,3,3),
// std::make_tuple(960,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(960,640,64,64,3,3),
// std::make_tuple(640,640,128,128,3,3),
// std::make_tuple(960,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(960,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,4,128,128,3,3),
// std::make_tuple(4,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(320,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(320,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(640,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(640,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(2560,1280,32,32,3,3),
// std::make_tuple(1920,1280,32,32,3,3),
// std::make_tuple(1280,1280,32,32,3,3),
// std::make_tuple(1920,1280,32,32,3,3),
// std::make_tuple(1280,1280,64,64,3,3),
// std::make_tuple(1920,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(1920,640,64,64,3,3),
// std::make_tuple(1280,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(1280,640,64,64,3,3),
// std::make_tuple(960,640,64,64,3,3),
// std::make_tuple(640,640,64,64,3,3),
// std::make_tuple(960,640,64,64,3,3),
// std::make_tuple(640,640,128,128,3,3),
// std::make_tuple(960,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(960,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,320,128,128,3,3),
// std::make_tuple(640,320,128,128,3,3),
// std::make_tuple(320,4,128,128,3,3),
std::make_tuple(4,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(320,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(640,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1920,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1920,1280,32,32,3,3),
std::make_tuple(1280,1280,64,64,3,3),
std::make_tuple(1920,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(1920,640,64,64,3,3),
std::make_tuple(1280,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(1280,640,64,64,3,3),
std::make_tuple(960,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(960,640,64,64,3,3),
std::make_tuple(640,640,128,128,3,3),
std::make_tuple(960,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(960,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,4,128,128,3,3),
std::make_tuple(4,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(320,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(320,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(640,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(640,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(2560,1280,32,32,3,3),
std::make_tuple(1920,1280,32,32,3,3),
std::make_tuple(1280,1280,32,32,3,3),
std::make_tuple(1920,1280,32,32,3,3),
std::make_tuple(1280,1280,64,64,3,3),
std::make_tuple(1920,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(1920,640,64,64,3,3),
std::make_tuple(1280,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(1280,640,64,64,3,3),
std::make_tuple(960,640,64,64,3,3),
std::make_tuple(640,640,64,64,3,3),
std::make_tuple(960,640,64,64,3,3),
std::make_tuple(640,640,128,128,3,3),
std::make_tuple(960,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(960,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,320,128,128,3,3),
std::make_tuple(640,320,128,128,3,3),
std::make_tuple(320,4,128,128,3,3),
};
@ -690,15 +690,15 @@ int main(void)
if(k==0) {
k = 1;
fprintf(stderr, "| (IC, OC, IW, IH, KW, KH) | im2col+GEMM TIME | im2col+GEMM VRAM | implicit GEMM TIME | implicit GEMM VRAM \n");
fprintf(stderr, "| --- | --- | --- | --- | --- \n");
fprintf(stdout, "| (IC, OC, IW, IH, KW, KH) | im2col+GEMM TIME | im2col+GEMM VRAM | implicit GEMM TIME | implicit GEMM VRAM \n");
fprintf(stdout, "| --- | --- | --- | --- | --- \n");
}
time_iter0 += run_time0;
time_iter1 += run_time1;
fprintf(stderr, " | (%d, %d, %d, %d, %d, %d) | %.2f ms | %.2f MB | %.2f ms | %.2f MB\n",
fprintf(stdout, " | (%d, %d, %d, %d, %d, %d) | %.2f ms | %.2f MB | %.2f ms | %.2f MB\n",
std::get<0>(c), std::get<1>(c), std::get<2>(c), std::get<3>(c), std::get<4>(c), std::get<5>(c),
run_time0, mem_size0/1024.0f/1024.0f,
run_time1, mem_size1/1024.0f/1024.0f);