From 9a91ca6ef96cc14e3c6658b68fca0e7950034ab8 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Mon, 4 Aug 2025 17:20:06 +0800 Subject: [PATCH] Optimize tensor conversion, improve TTFT --- ggml/src/ggml-openvino/ggml-decoder.cpp | 75 ++++++------------------- 1 file changed, 17 insertions(+), 58 deletions(-) diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 2f7ae333e7..eb0cdcb28d 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "ggml-backend-impl.h" @@ -391,53 +392,12 @@ std::map> GgmlOvDecoder::create_weight_no } std::shared_ptr GgmlOvDecoder::create_weight_node(ggml_tensor* tensor) { - std::shared_ptr weight_node; auto node_type = get_ov_type(tensor); auto node_shape = get_shape(tensor); auto ne_total = ggml_nelements(tensor); - switch (tensor->type) { - case GGML_TYPE_I32: { - const auto* ptr = reinterpret_cast(tensor->data); - std::vector data(ptr, ptr + ne_total); - weight_node = std::make_shared(node_type, node_shape, data); - break; - } - case GGML_TYPE_I64: { - const auto* ptr = reinterpret_cast(tensor->data); - std::vector data(ptr, ptr + ne_total); - weight_node = std::make_shared(node_type, node_shape, data); - break; - } - case GGML_TYPE_F32: { - const auto* ptr = reinterpret_cast(tensor->data); - std::vector data(ptr, ptr + ne_total); - weight_node = std::make_shared(node_type, node_shape, data); - break; - } - case GGML_TYPE_F16: { - const auto* ptr = reinterpret_cast(tensor->data); - std::vector data_f16; - data_f16.reserve(ne_total); - for (int i = 0; i < ne_total; ++i) { - data_f16.push_back(ov::float16::from_bits(ptr[i])); - } - weight_node = std::make_shared(node_type, node_shape, data_f16); - break; - } - case GGML_TYPE_BF16: { - const auto* ptr = reinterpret_cast(tensor->data); - std::vector data_bf16; - data_bf16.reserve(ne_total); - for (int i = 0; i < ne_total; ++i) { - data_bf16.push_back(ov::bfloat16::from_bits(ptr[i])); - } - weight_node = std::make_shared(node_type, node_shape, data_bf16); - break; - } - default: - throw std::invalid_argument("Unsupported tensor type"); - } - return weight_node; + ov::Tensor weights(node_type, node_shape); + memcpy(weights.data(), tensor->data, ne_total * node_type.size()); + return std::make_shared(weights); } void GgmlOvDecoder::dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename) { @@ -549,27 +509,26 @@ std::vector GgmlOvDecoder::get_stride(const ggml_tensor* tensor) { } ov::element::Type GgmlOvDecoder::get_ov_type(const ggml_tensor* tensor) { - ov::element::Type type = ov::element::dynamic; switch (tensor->type) { + case GGML_TYPE_F64: + return ov::element::f64; case GGML_TYPE_F32: - type = ov::element::f32; - break; + return ov::element::f32; case GGML_TYPE_F16: - type = ov::element::f16; - break; + return ov::element::f16; case GGML_TYPE_BF16: - type = ov::element::bf16; - break; - case GGML_TYPE_I64: - type = ov::element::i64; - break; + return ov::element::bf16; + case GGML_TYPE_I8: + return ov::element::i8; + case GGML_TYPE_I16: + return ov::element::i16; case GGML_TYPE_I32: - type = ov::element::i32; - break; + return ov::element::i32; + case GGML_TYPE_I64: + return ov::element::i64; default: - break; + throw std::runtime_error("Unsupported tensor type"); } - return type; } ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string& name) const {