opt mulmat base on official doc (#25)
https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md
This commit is contained in:
parent
84328ffbda
commit
ff033e1e23
|
|
@ -385,36 +385,26 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
||||||
* [5, 4],
|
* [5, 4],
|
||||||
* ])
|
* ])
|
||||||
* # Perform matrix multiplication
|
* # Perform matrix multiplication
|
||||||
* result = torch.matmul(A, B.T)
|
* C = torch.matmul(A, B.T)
|
||||||
* print(result.T)
|
* print(C.T)
|
||||||
* ```
|
* ```
|
||||||
* Here, the B.T is the transpose of B.
|
* Here, the B.T is the transpose of B.
|
||||||
|
* So C.T = A * B.T which is equivalent to C = B * A.T.
|
||||||
|
* See: https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md
|
||||||
*
|
*
|
||||||
* So here we need to create graph like:
|
* So here we need to create graph like:
|
||||||
* ```mermaid
|
* ```mermaid
|
||||||
* graph TD;
|
* graph TD;
|
||||||
* i1>ggml_tensor_in0] --src0--> mat_mul0;
|
* i1>ggml_tensor_in0] --src1--> mat_mul0;
|
||||||
* i2>ggml_tensor_in1] --src1--> mat_mul0;
|
* i2>ggml_tensor_in1] --src0.T--> mat_mul0;
|
||||||
* mat_mul0 --dst_trans--> transpose_out;
|
* mat_mul0 --dst0--> o1>ggml_tensor_out];
|
||||||
* transpose1 --dst0--> o1>ggml_tensor_out];
|
|
||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// create src0_trans tensor
|
// create src0_trans tensor
|
||||||
auto src1 = tensor_inputs.back();
|
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS does not match the expected value");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS does not match the expected value");
|
||||||
|
GGML_ASSERT(tensor_inputs.size() == 2);
|
||||||
qnn_dimension_array_t dimensions = get_transposed_dimensions(src1->get_dimensions(), rank);
|
GGML_ASSERT(tensor_outputs.size() == 1);
|
||||||
|
|
||||||
// create dst_trans tensor
|
|
||||||
auto dst = tensor_outputs.front();
|
|
||||||
dimensions = get_transposed_dimensions(dst->get_dimensions(), rank);
|
|
||||||
auto dst_trans = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, "dst_trans", dimensions,
|
|
||||||
dst->get_data_type(), rank, device, graph_handle, _qnn_instance);
|
|
||||||
|
|
||||||
// create transpose_out
|
|
||||||
auto transpose_out = std::make_shared<ggml_qnn_single_op_config>(_name + "_trans1", QNN_OP_PACKAGE_NAME_QTI_AISW,
|
|
||||||
QNN_OP_TRANSPOSE, _qnn_instance);
|
|
||||||
|
|
||||||
// create mat_mul
|
// create mat_mul
|
||||||
auto mat_mul =
|
auto mat_mul =
|
||||||
|
|
@ -425,24 +415,12 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
||||||
scalar.bool8Value = 1;
|
scalar.bool8Value = 1;
|
||||||
mat_mul->add_scalar_param(QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar);
|
mat_mul->add_scalar_param(QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar);
|
||||||
|
|
||||||
// set transpose_out parameters
|
|
||||||
auto *params_data = reinterpret_cast<const uint8_t *>(kTransposeParamData[rank - 1].data());
|
|
||||||
const qnn_dimension_array_t param_dims = {(uint32_t)rank, 1, 1, 1};
|
|
||||||
transpose_out->add_tensor_param(QNN_OP_TRANSPOSE_PARAM_PERM, param_dims, 1, params_data, QNN_DATATYPE_UINT_32,
|
|
||||||
device, graph_handle);
|
|
||||||
|
|
||||||
// set tensor to mat_mul
|
// set tensor to mat_mul
|
||||||
|
std::swap(tensor_inputs[0], tensor_inputs[1]);
|
||||||
mat_mul->set_input_tensors(tensor_inputs);
|
mat_mul->set_input_tensors(tensor_inputs);
|
||||||
qnn_tensor_array_t tensors = {dst_trans};
|
mat_mul->set_output_tensors(tensor_outputs);
|
||||||
mat_mul->set_output_tensors(tensors);
|
|
||||||
|
|
||||||
// set tensor to transpose_out
|
|
||||||
tensors = {dst_trans};
|
|
||||||
transpose_out->set_input_tensors(tensors);
|
|
||||||
transpose_out->set_output_tensors(tensor_outputs);
|
|
||||||
|
|
||||||
_operations.push_back(mat_mul);
|
_operations.push_back(mat_mul);
|
||||||
_operations.push_back(transpose_out);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue