diff --git a/ggml/src/ggml-openvino/.clang-format b/ggml/src/ggml-openvino/.clang-format index 8491f4e5c6..9382a117b8 100644 --- a/ggml/src/ggml-openvino/.clang-format +++ b/ggml/src/ggml-openvino/.clang-format @@ -1,4 +1,161 @@ --- +# Override root .clang-format AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false ReferenceAlignment: Left +PointerAlignment: Left + +Language: Cpp +AlignAfterOpenBracket: Align +AlignArrayOfStructures: Left +AlignConsecutiveBitFields: AcrossComments +AlignConsecutiveMacros: AcrossComments +# AlignConsecutiveShortCaseStatements: AcrossComments +AlignEscapedNewlines: Left # LeftWithLastLine +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 1 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: false +# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Inline +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: true +BinPackArguments: true +BinPackParameters: true # OnePerLine +BitFieldColonSpacing: Both +BreakBeforeBraces: Custom # Attach +BraceWrapping: + AfterCaseLabel: true + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false +# BreakAdjacentStringLiterals: true +BreakAfterAttributes: Never +BreakBeforeBinaryOperators: None +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: false +# BreakBinaryOperations: Never +BreakConstructorInitializers: AfterColon +# BreakFunctionDefinitionParameters: false +BreakInheritanceList: AfterComma +BreakStringLiterals: true +# BreakTemplateDeclarations: Yes +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +EmptyLineBeforeAccessModifier: Leave +EmptyLineAfterAccessModifier: Never +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + - Regex: '.*' + Priority: 3 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: true +IndentCaseLabels: true +IndentExternBlock: NoIndent +IndentGotoLabels: false +IndentPPDirectives: AfterHash +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: true # NOTE: may lead to incorrect formatting +InsertNewlineAtEOF: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +LineEnding: LF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: CurrentLine +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +QualifierAlignment: Left +#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict'] +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' +ReflowComments: false # IndentOnly +SeparateDefinitionBlocks: Always +SortIncludes: CaseInsensitive +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: c++17 +TabWidth: 4 +UseTab: Never +WhitespaceSensitiveMacros: ['STRINGIZE'] +... diff --git a/ggml/src/ggml-openvino/openvino/op_table.cpp b/ggml/src/ggml-openvino/openvino/op_table.cpp index 11d1c773c3..bf7d54d9a1 100644 --- a/ggml/src/ggml-openvino/openvino/op_table.cpp +++ b/ggml/src/ggml-openvino/openvino/op_table.cpp @@ -14,25 +14,27 @@ namespace frontend { namespace ggml { std::unordered_map get_supported_ops() { - using namespace ov::op; - return {{"GGML_OP_ADD", op::translate_1to1_match_2_inputs}, - {"GGML_OP_ADD1", op::translate_1to1_match_2_inputs}, - {"GGML_OP_CONT", op::translate_cont}, - {"GGML_OP_CPY", op::translate_cpy}, - {"GGML_OP_DIV", op::translate_1to1_match_2_inputs}, - {"GGML_OP_GET_ROWS", op::translate_get_rows}, - {"GGML_OP_MUL", op::translate_1to1_match_2_inputs}, - {"GGML_OP_MUL_MAT", op::translate_mulmat}, - {"GGML_OP_PERMUTE", op::translate_permute}, - {"GGML_OP_RESHAPE", op::translate_reshape}, - {"GGML_OP_RMS_NORM", op::translate_rms_norm}, - {"GGML_OP_ROPE", op::translate_rope}, - {"GGML_OP_SCALE", op::translate_scale}, - {"GGML_OP_SOFT_MAX", op::translate_soft_max}, - {"GGML_OP_SUB", op::translate_1to1_match_2_inputs}, - {"GGML_OP_TRANSPOSE", op::translate_transpose}, - {"GGML_UNARY_OP_SILU", op::translate_unary_silu}, - {"GGML_OP_VIEW", op::translate_view}}; + using namespace ov::op; + return { + { "GGML_OP_ADD", op::translate_1to1_match_2_inputs }, + { "GGML_OP_ADD1", op::translate_1to1_match_2_inputs }, + { "GGML_OP_CONT", op::translate_cont }, + { "GGML_OP_CPY", op::translate_cpy }, + { "GGML_OP_DIV", op::translate_1to1_match_2_inputs }, + { "GGML_OP_GET_ROWS", op::translate_get_rows }, + { "GGML_OP_MUL", op::translate_1to1_match_2_inputs }, + { "GGML_OP_MUL_MAT", op::translate_mulmat }, + { "GGML_OP_PERMUTE", op::translate_permute }, + { "GGML_OP_RESHAPE", op::translate_reshape }, + { "GGML_OP_RMS_NORM", op::translate_rms_norm }, + { "GGML_OP_ROPE", op::translate_rope }, + { "GGML_OP_SCALE", op::translate_scale }, + { "GGML_OP_SOFT_MAX", op::translate_soft_max }, + { "GGML_OP_SUB", op::translate_1to1_match_2_inputs }, + { "GGML_OP_TRANSPOSE", op::translate_transpose }, + { "GGML_UNARY_OP_SILU", op::translate_unary_silu }, + { "GGML_OP_VIEW", op::translate_view } + }; } } // namespace ggml diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index ebcf8fdd75..d20e671064 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -27,15 +27,13 @@ std::shared_ptr get_ggml_decoder(struct ggml_cgraph* cgraph, bool return std::make_shared(nullptr, cgraph, is_static, is_first_token); } -ov::Tensor convert_ggml_input_to_ov(std::shared_ptr ggml_decoder, - const std::string& name) { - auto *input_data = ggml_decoder->get_input_ggml_tensor(name)->data; - ov::Tensor input_tensor; - ov::Shape input_shape = ggml_decoder->get_input_shape(name).to_shape(); - std::vector input_stride = ggml_decoder->get_input_stride(name); - input_tensor = - ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data); - return input_tensor; +ov::Tensor convert_ggml_input_to_ov(std::shared_ptr ggml_decoder, const std::string& name) { + auto* input_data = ggml_decoder->get_input_ggml_tensor(name)->data; + ov::Tensor input_tensor; + ov::Shape input_shape = ggml_decoder->get_input_shape(name).to_shape(); + std::vector input_stride = ggml_decoder->get_input_stride(name); + input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data); + return input_tensor; } std::map get_ggml_graph_output_dst(std::shared_ptr ggml_decoder) { @@ -61,21 +59,20 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c static std::string device = getenv("GGML_OPENVINO_DEVICE") ? getenv("GGML_OPENVINO_DEVICE") : ""; if (device.empty()) { - const std::vector preferred_device = {"GPU", "CPU", "NPU"}; - const auto available_devices = core.get_available_devices(); - for (const auto& dev : preferred_device) { - if (std::find(available_devices.begin(), available_devices.end(), - dev) != available_devices.end()) { - device = dev; - break; + const std::vector preferred_device = { "GPU", "CPU", "NPU" }; + const auto available_devices = core.get_available_devices(); + for (const auto& dev : preferred_device) { + if (std::find(available_devices.begin(), available_devices.end(), dev) != available_devices.end()) { + device = dev; + break; + } } - } } bool is_static = device == "NPU" ? true : false; ov::AnyMap config; if (device == "NPU") { - config = get_npu_config(); + config = get_npu_config(); } auto start_time = ggml_time_us(); @@ -107,10 +104,10 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c if (is_static) { if (is_first_token) { - model = compiled_cache_prefill[cgraph].first; + model = compiled_cache_prefill[cgraph].first; compiled_model = compiled_cache_prefill[cgraph].second; } else { - model = compiled_cache_kvcache[cgraph].first; + model = compiled_cache_kvcache[cgraph].first; compiled_model = compiled_cache_kvcache[cgraph].second; } } else { @@ -141,7 +138,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c if (getenv("GGML_OPENVINO_DUMP_IR")) { char timestamped_filename[64]; - auto timestamp = (long long)ggml_time_us(); + auto timestamp = (long long) ggml_time_us(); snprintf(timestamped_filename, sizeof(timestamped_filename), "model_prefill_%lld.xml", timestamp); ov::serialize(model, timestamped_filename); snprintf(timestamped_filename, sizeof(timestamped_filename), "model_kvcache_%lld.xml", timestamp); @@ -161,7 +158,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c if (getenv("GGML_OPENVINO_DUMP_IR")) { char timestamped_filename[64]; - auto timestamp = (long long)ggml_time_us(); + auto timestamp = (long long) ggml_time_us(); snprintf(timestamped_filename, sizeof(timestamped_filename), "model_%lld.xml", timestamp); ov::serialize(model, timestamped_filename); } @@ -227,68 +224,59 @@ ov::AnyMap get_npu_config() { return config; } -ov::Tensor get_ov_input_tensor(std::shared_ptr ggml_decoder, - const std::string& param_name) { - bool is_static = ggml_decoder->is_static(); - bool is_first_token = ggml_decoder->is_first_token(); +ov::Tensor get_ov_input_tensor(std::shared_ptr ggml_decoder, const std::string& param_name) { + bool is_static = ggml_decoder->is_static(); + bool is_first_token = ggml_decoder->is_first_token(); - ov::Tensor input_tensor; - if (ggml_decoder->get_model_extra_inputs().find(param_name) != - ggml_decoder->get_model_extra_inputs().end()) { - input_tensor = *ggml_decoder->get_model_extra_input_values().at(param_name); + ov::Tensor input_tensor; + if (ggml_decoder->get_model_extra_inputs().find(param_name) != ggml_decoder->get_model_extra_inputs().end()) { + input_tensor = *ggml_decoder->get_model_extra_input_values().at(param_name); - } else if (!is_static) { - input_tensor = convert_ggml_input_to_ov(ggml_decoder, param_name); - - } else { - if (param_name == "inp_tokens" || param_name == "inp_pos") { - if (is_first_token) { - size_t max_token_len = ggml_decoder->get_max_token_len(); - const auto *input_tensor_ggml = - ggml_decoder->get_input_ggml_tensor(param_name); - std::vector padded_data = - pad_input(input_tensor_ggml, 1, max_token_len, 0); - input_tensor = - ov::Tensor(ov::element::i32, ov::Shape{1, 1, max_token_len}); - auto *data_ptr = input_tensor.data(); - std::copy(padded_data.begin(), padded_data.end(), data_ptr); - } else { + } else if (!is_static) { input_tensor = convert_ggml_input_to_ov(ggml_decoder, param_name); - } - - } else if (param_name == "KQ_mask") { - size_t max_token_len = ggml_decoder->get_max_token_len(); - const auto *input_tensor_ggml = - ggml_decoder->get_input_ggml_tensor(param_name); - if (is_first_token) { - std::vector padded_data = pad_input( - input_tensor_ggml, max_token_len, max_token_len, -INFINITY); - set_zero_diagonal(padded_data, max_token_len); - input_tensor = ov::Tensor(ov::element::f32, - ov::Shape{1, max_token_len, max_token_len}); - auto *data_ptr = input_tensor.data(); - std::copy(padded_data.begin(), padded_data.end(), data_ptr); - } else { - std::vector padded_data = - pad_input(input_tensor_ggml, 1, max_token_len, -INFINITY); - input_tensor = - ov::Tensor(ov::element::f32, ov::Shape{1, 1, max_token_len}); - auto *data_ptr = input_tensor.data(); - std::copy(padded_data.begin(), padded_data.end(), data_ptr); - } } else { - input_tensor = convert_ggml_input_to_ov(ggml_decoder, param_name); + if (param_name == "inp_tokens" || param_name == "inp_pos") { + if (is_first_token) { + size_t max_token_len = ggml_decoder->get_max_token_len(); + const auto* input_tensor_ggml = ggml_decoder->get_input_ggml_tensor(param_name); + std::vector padded_data = pad_input(input_tensor_ggml, 1, max_token_len, 0); + input_tensor = ov::Tensor(ov::element::i32, ov::Shape{ 1, 1, max_token_len }); + auto* data_ptr = input_tensor.data(); + std::copy(padded_data.begin(), padded_data.end(), data_ptr); + } else { + input_tensor = convert_ggml_input_to_ov(ggml_decoder, param_name); + } + + } else if (param_name == "KQ_mask") { + size_t max_token_len = ggml_decoder->get_max_token_len(); + const auto* input_tensor_ggml = ggml_decoder->get_input_ggml_tensor(param_name); + if (is_first_token) { + std::vector padded_data = + pad_input(input_tensor_ggml, max_token_len, max_token_len, -INFINITY); + set_zero_diagonal(padded_data, max_token_len); + input_tensor = ov::Tensor(ov::element::f32, ov::Shape{ 1, max_token_len, max_token_len }); + auto* data_ptr = input_tensor.data(); + std::copy(padded_data.begin(), padded_data.end(), data_ptr); + } else { + std::vector padded_data = pad_input(input_tensor_ggml, 1, max_token_len, -INFINITY); + input_tensor = ov::Tensor(ov::element::f32, ov::Shape{ 1, 1, max_token_len }); + auto* data_ptr = input_tensor.data(); + std::copy(padded_data.begin(), padded_data.end(), data_ptr); + } + + } else { + input_tensor = convert_ggml_input_to_ov(ggml_decoder, param_name); + } } - } - return input_tensor; + return input_tensor; } size_t checksum(const void* data, size_t size) { const uint8_t* bytes = static_cast(data); size_t sum = 0; for (size_t i = 0; i < size; ++i) { - sum += (uint8_t)i; + sum += (uint8_t) i; sum += bytes[i]; } return sum; @@ -302,41 +290,38 @@ void print_input_tensor_info(const std::string& name, const ov::Tensor& tensor) std::cout << "Input name: " << name << ", Input shape: " << tensor.get_shape() << ", Address: " << tensor.data() << std::endl; switch (tensor.get_element_type()) { - case ov::element::f32: - std::cout << *(tensor.data()) << std::endl; - break; - case ov::element::f16: - std::cout << ov::float16::from_bits(*(tensor.data())) - << std::endl; - break; - case ov::element::i32: - std::cout << *(tensor.data()) << std::endl; - break; - case ov::element::i64: - std::cout << *(tensor.data()) << std::endl; - break; - default: - break; + case ov::element::f32: + std::cout << *(tensor.data()) << std::endl; + break; + case ov::element::f16: + std::cout << ov::float16::from_bits(*(tensor.data())) << std::endl; + break; + case ov::element::i32: + std::cout << *(tensor.data()) << std::endl; + break; + case ov::element::i64: + std::cout << *(tensor.data()) << std::endl; + break; + default: + break; } } -void print_output_tensor_info(const std::string& name, - const ov::Tensor& tensor, +void print_output_tensor_info(const std::string& name, const ov::Tensor& tensor, std::map& output_dst) { std::cout << "Output name: " << name << ", Output shape: " << tensor.get_shape() << ", Address: " << output_dst[name] << std::endl; switch (tensor.get_element_type()) { - case ov::element::f32: - std::cout << *(tensor.data()) << std::endl; - std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; - break; - case ov::element::f16: - std::cout << ov::float16::from_bits(*(tensor.data())) - << std::endl; - std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; - break; - default: - break; + case ov::element::f32: + std::cout << *(tensor.data()) << std::endl; + std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; + break; + case ov::element::f16: + std::cout << ov::float16::from_bits(*(tensor.data())) << std::endl; + std::cout << checksum(tensor.data(), tensor.get_byte_size()) << std::endl; + break; + default: + break; } } @@ -348,9 +333,9 @@ void set_zero_diagonal(std::vector& matrix, size_t dim) { } } -bool is_prefill(struct ggml_cgraph * cgraph) { +bool is_prefill(struct ggml_cgraph* cgraph) { for (int i = 0; i < cgraph->n_nodes; ++i) { - auto * op = cgraph->nodes[i]; + auto* op = cgraph->nodes[i]; for (int j = 0; j < GGML_MAX_SRC; ++j) { auto* src = op->src[j]; if (src == nullptr) {