minor tweak

This commit is contained in:
bssrdf 2025-12-04 08:44:08 -05:00
parent adb72c52f1
commit 3683721688
2 changed files with 5 additions and 4 deletions

View File

@ -394,18 +394,18 @@ void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, gg
ne01n = ne01*ne02; ne01n = ne01*ne02;
ne02n = 1; ne02n = 1;
} }
ne00 = ne00n ; ne00 = ne00n;
ne01 = ne01n; ne01 = ne01n;
ne02 = ne02n; ne02 = ne02n;
can_be_transposed = true; can_be_transposed = true;
} }
if ((nb02 == (int64_t)ggml_element_size(src0) && if ((nb02 == (int64_t)ggml_element_size(src0) && nb00 <= nb01 &&
nb01 == ne02 * ne00 * (int64_t)ggml_element_size(src0))) { nb01 == ne02 * ne00 * (int64_t)ggml_element_size(src0))) {
GGML_ASSERT(nb00 <= nb01); // GGML_ASSERT(nb00 <= nb01);
ne00n = ne00*ne01; ne00n = ne00*ne01;
ne01n = ne02; ne01n = ne02;
ne02n = 1; // not used ne02n = 1; // not used
ne00 = ne00n ; ne00 = ne00n;
ne01 = ne01n; ne01 = ne01n;
ne02 = ne02n; ne02 = ne02n;
can_be_transposed = true; can_be_transposed = true;

View File

@ -7928,6 +7928,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); test_cases.emplace_back(new test_cpy(GGML_TYPE_F16, GGML_TYPE_F16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true)); test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {768, 1024, 256, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
// sd.cpp cases
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0})); test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0})); test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {4352, 1, 9216, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {21504, 4352, 1, 1}, {2, 0, 1, 3}, {0, 0, 0, 0})); test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {21504, 4352, 1, 1}, {2, 0, 1, 3}, {0, 0, 0, 0}));