temp. changes for mark decomp

This commit is contained in:
Cavus Mustafa 2025-07-28 17:14:20 -07:00 committed by Mustafa Cavus
parent 4e7f04a307
commit 9cf56d6837
2 changed files with 13 additions and 1 deletions

View File

@ -28,7 +28,17 @@ OutputVector translate_mulmat(const NodeContext& context) {
ov::Output<Node> res;
ov::Output<ov::Node> B = context.get_input(0);
ov::Output<ov::Node> A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
ov::Output<ov::Node> A = context.get_input(1);
if (context.get_op_case() == 1) {
if (context.get_input_type(0) == ov::element::f16) {
B = std::make_shared<ov::op::v0::Convert>(context.get_input(0), ov::element::f32);
}
if (context.get_input_type(1) == ov::element::f16) {
A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), ov::element::f32);
}
} else {
A = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
}
auto B_shape = context.get_input_shape(0).to_shape();
auto A_shape = context.get_input_shape(1).to_shape();

View File

@ -22,6 +22,7 @@
#include <openvino/op/unsqueeze.hpp>
#include <openvino/pass/constant_folding.hpp>
#include <openvino/pass/make_stateful.hpp>
#include <transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp>
#include "ggml-openvino/openvino/node_context.hpp"
#include "ggml-openvino/openvino/utils.hpp"
@ -258,6 +259,7 @@ void TranslateSession::apply_transformations(const std::shared_ptr<Model>& model
ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::MarkCompressedFloatConstants>();
manager.register_pass<ov::pass::ConstantFolding>();
if (!ggml_model_decoder->is_static()) {