diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp index bb33717453..65cd2c1c40 100644 --- a/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ggml/src/ggml-blas/ggml-blas.cpp @@ -513,13 +513,14 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const g // TODO: find the optimal value const int64_t min_batch = 32; - return ggml_is_contiguous(src0) && - ggml_is_contiguous(src1) && - src0->view_src == nullptr && - src1->view_src == nullptr && - src1->type == GGML_TYPE_F32 && - (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) && - (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); + return ggml_is_contiguous(src0) + && ggml_is_contiguous(src1) + && src1->type == GGML_TYPE_F32 + // NOTE: llama-bench creates views that somehow does not go through init_tensor + // this prevents the uninitialized views from being used in BLAS + && src0->view_src == nullptr && src1->view_src == nullptr + && (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) + && (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL); } default: