diff --git a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp index ad5cd3f6ba..8be9e8deb0 100644 --- a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp +++ b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp @@ -26,7 +26,7 @@ OutputVector translate_glu_geglu(const NodeContext & context) { src1 = context.get_input(1); } else { auto combined = context.get_input(0); - auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {3}); + auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1}); auto split = std::make_shared(combined, split_axis, 2); src0 = split->output(0); src1 = split->output(1); diff --git a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp index 2b7f13629f..6e0b85517e 100644 --- a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +++ b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp @@ -26,7 +26,7 @@ OutputVector translate_glu_swiglu(const NodeContext & context) { src1 = context.get_input(1); } else { auto combined = context.get_input(0); - auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {3}); + auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1}); auto split = std::make_shared(combined, split_axis, 2); src0 = split->output(0); src1 = split->output(1); diff --git a/ggml/src/ggml-openvino/openvino/op/rope.cpp b/ggml/src/ggml-openvino/openvino/op/rope.cpp index 01bc46131e..44e3368217 100644 --- a/ggml/src/ggml-openvino/openvino/op/rope.cpp +++ b/ggml/src/ggml-openvino/openvino/op/rope.cpp @@ -70,22 +70,16 @@ OutputVector translate_rope(const NodeContext & context) { constexpr int ROPE_TYPE_NORM = 0; if (mode == ROPE_TYPE_NORM) { + auto neg_one = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1}); auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); auto two = ov::op::v0::Constant::create(ov::element::i64, {1}, {2}); auto end = ov::op::v0::Constant::create(ov::element::i64, {1}, {output_shape[3]}); Output even_slice; Output odd_slice; - int32_t unsqueeze_dim = 4; - if (context.is_stateful()) { - unsqueeze_dim = 3; - even_slice = std::make_shared(data_node, zero, end, two, two); - odd_slice = std::make_shared(data_node, one, end, two, two); - } else { - auto three = ov::op::v0::Constant::create(ov::element::i64, {1}, {3}); - even_slice = std::make_shared(data_node, zero, end, two, three); - odd_slice = std::make_shared(data_node, one, end, two, three); - } + int32_t unsqueeze_dim = context.is_stateful() ? 3 : 4; + even_slice = std::make_shared(data_node, zero, end, two, neg_one); + odd_slice = std::make_shared(data_node, one, end, two, neg_one); Output first_half = std::make_shared(std::make_shared(even_slice, cos_theta_node), @@ -105,7 +99,7 @@ OutputVector translate_rope(const NodeContext & context) { res = std::make_shared(stack, data_shape, false); } else if (mode == ROPE_TYPE_NEOX) { auto data_split = std::make_shared( - data_node, ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {3}), 2); + data_node, ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {-1}), 2); Output slice_data_node_0 = data_split->outputs()[0]; Output slice_data_node_1 = data_split->outputs()[1]; @@ -117,11 +111,7 @@ OutputVector translate_rope(const NodeContext & context) { std::make_shared(slice_data_node_0, sin_theta_node), std::make_shared(slice_data_node_1, cos_theta_node)); - int32_t concat_dim = 3; - if (context.is_stateful()) { - concat_dim = 2; - } - res = std::make_shared(ov::OutputVector{first_half_node, second_half_node}, concat_dim); + res = std::make_shared(ov::OutputVector{first_half_node, second_half_node}, -1); } return rename_outputs_with_suffix({res}, context.get_name()); diff --git a/ggml/src/ggml-openvino/openvino/utils.cpp b/ggml/src/ggml-openvino/openvino/utils.cpp index b7553f99c8..a0215b97b1 100644 --- a/ggml/src/ggml-openvino/openvino/utils.cpp +++ b/ggml/src/ggml-openvino/openvino/utils.cpp @@ -216,7 +216,7 @@ ov::Output process_view_input(const NodeContext & context, int input_i auto begin = ov::op::v0::Constant::create(ov::element::i64, {1}, {split_addr}); auto end = ov::op::v0::Constant::create(ov::element::i64, {1}, {slice_end}); auto stride = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); - auto axes = ov::op::v0::Constant::create(ov::element::i64, {1}, {3}); + auto axes = ov::op::v0::Constant::create(ov::element::i64, {1}, {context.is_stateful() ? 2 : 3}); auto sliced = std::make_shared(input, begin, end, stride, axes); return sliced; } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 8c3717472b..edf42cd985 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -497,6 +497,7 @@ ov::Tensor get_ov_input_tensor(std::shared_ptr ggml_decoder, cons ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr ggml_decoder, const std::string & param_name) { + // NPU decoding stage const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(param_name); const auto * op = ggml_decoder->get_tensor_used_op(ggml_tensor); @@ -540,6 +541,7 @@ ov::Tensor get_ov_input_tensor_static_decode(std::shared_ptr ggml ov::Tensor get_ov_input_tensor_static_prefill(std::shared_ptr ggml_decoder, const std::string & param_name, int chunk_index) { + // NPU prompt processing stage const auto * ggml_tensor = ggml_decoder->get_input_ggml_tensor(param_name); const auto * op = ggml_decoder->get_tensor_used_op(ggml_tensor);