diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index d00b78e891..0dfc11e490 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -154,22 +154,22 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) { // Add model outputs, if called for the whole graph if (naive) { - m_model_output_names.push_back(node->name); + m_model_output_names.push_back(node_name); } else if (!m_node) { + // Model outputs are tensors with GGML_TENSOR_FLAG_OUTPUT flag and kv_caches static std::set debug_output_names = {}; // Workaround: the final tensor "result_output" does not have GGML_TENSOR_FLAG_OUTPUT flag set in cgraph - if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY || node->flags & GGML_TENSOR_FLAG_OUTPUT || - std::string(node->name).find("result") == 0 || debug_output_names.count(node->name)) { - auto name = node->view_src ? std::string(node->view_src->name) : std::string(node->name); - if (node->buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY) { - assert(name.find("cache_k") == 0 || name.find("cache_v") == 0); + if (node->op == GGML_OP_SET_ROWS || node->flags & GGML_TENSOR_FLAG_OUTPUT || node_name.find("result") == 0 || + debug_output_names.count(node_name)) { + if (node->op == GGML_OP_SET_ROWS) { + assert(node_name.find("cache_k") == 0 || node_name.find("cache_v") == 0); + if (auto it = std::find(m_kv_names.begin(), m_kv_names.end(), node_name); it == m_kv_names.end()) { + m_kv_names.push_back(node_name); + } } - if (auto it = std::find(m_model_output_names.begin(), m_model_output_names.end(), name); + if (auto it = std::find(m_model_output_names.begin(), m_model_output_names.end(), node_name); it == m_model_output_names.end()) { - m_model_output_names.push_back(name); - } - if (auto it = std::find(m_kv_names.begin(), m_kv_names.end(), name); it == m_kv_names.end()) { - m_kv_names.push_back(name); + m_model_output_names.push_back(node_name); } } } @@ -177,7 +177,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) { if (m_node) { switch (node->op) { case GGML_OP_RESHAPE: { - if (node->ne[0] * node->ne[1] == node->src[0]->ne[0]) { + if (node->src[0]->op == GGML_OP_RESHAPE && node->src[0]->src[0]->ne[0] == node->ne[0] && + node->src[0]->src[0]->ne[1] == node->ne[1]) { + m_op_case = 4; + } else if (node->ne[0] * node->ne[1] == node->src[0]->ne[0]) { m_op_case = 1; } else if (node->src[0]->ne[0] * node->src[0]->ne[1] == node->ne[0]) { m_op_case = 2; diff --git a/ggml/src/ggml-openvino/openvino/op/reshape.cpp b/ggml/src/ggml-openvino/openvino/op/reshape.cpp index 4ef3833c90..1ed6f4b880 100644 --- a/ggml/src/ggml-openvino/openvino/op/reshape.cpp +++ b/ggml/src/ggml-openvino/openvino/op/reshape.cpp @@ -23,7 +23,8 @@ OutputVector translate_reshape(const NodeContext& context) { } int op_case = context.get_op_case(); - FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported RESHAPE case"); + FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3 || op_case == 4, + "Unsupported RESHAPE case"); auto output_shape = context.get_output_shape(0).to_shape(); std::shared_ptr new_shape_node; @@ -37,9 +38,11 @@ OutputVector translate_reshape(const NodeContext& context) { ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]}); - } else { + } else if (op_case == 3) { new_shape_node = ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector{(int64_t) output_shape[0], -1, 1}); + } else if (op_case == 4) { + return {context.get_input(0).get_node_shared_ptr()->input_value(0)}; } auto res = std::make_shared(context.get_input(0), new_shape_node, false); return rename_outputs_with_suffix({res}, context.get_name()); diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 3f728c242d..588404df19 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -418,17 +420,50 @@ void print_output_tensor_info(const std::string& name, const ov::Tensor& tensor, std::map& output_dst) { std::cout << "Output name: " << name << ", Output shape: " << tensor.get_shape() << ", Address: " << output_dst[name] << std::endl; + + auto print_float_stats = [](const std::string& type_name, size_t size, auto get_value) { + if (size == 0) { + return; + } + + float first = get_value(0); + float min = first; + float max = first; + double sum = first; + + for (size_t i = 1; i < size; ++i) { + float v = get_value(i); + if (v < min) { + min = v; + } + if (v > max) { + max = v; + } + sum += v; + } + double mean = sum / size; + + std::cout << std::right << std::setw(6) << type_name << std::right << std::setw(12) << "First" << std::setw(12) + << "Min" << std::setw(12) << "Max" << std::setw(12) << "Mean" << std::endl; + std::cout << std::right << std::setw(6) << "" << std::right << std::setw(12) << first << std::setw(12) << min + << std::setw(12) << max << std::setw(12) << mean << std::endl; + }; + switch (tensor.get_element_type()) { - case ov::element::f32: - std::cout << *(tensor.data()) << std::endl; - std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; - break; - case ov::element::f16: - std::cout << *(tensor.data()) << std::endl; - std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; - break; - default: - break; + case ov::element::f32: { + const float* data = tensor.data(); + size_t size = tensor.get_size(); + print_float_stats("[f32]", size, [data](size_t i) { return data[i]; }); + break; + } + case ov::element::f16: { + const ov::float16* data = tensor.data(); + size_t size = tensor.get_size(); + print_float_stats("[f16]", size, [data](size_t i) { return static_cast(data[i]); }); + break; + } + default: + break; } }