diff --git a/ci/run.sh b/ci/run.sh index bfce48f337..564dd270bd 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -174,7 +174,7 @@ if [ ! -z ${GG_BUILD_OPENVINO} ]; then echo "source /opt/intel/openvino/setupvars.sh" exit 1 fi - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON" + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF" fi ## helpers diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index cd897e5f68..cde99f3288 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -32,7 +32,7 @@ #include "ggml-backend-impl.h" #include "ggml-backend.h" -#include "ggml-quant.hpp" +#include "ggml-quants.hpp" GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgraph, bool is_static, bool is_first_token, int context_size, int num_heads, int num_heads_kv, int head_size) : diff --git a/ggml/src/ggml-openvino/ggml-quant.cpp b/ggml/src/ggml-openvino/ggml-quants.cpp similarity index 98% rename from ggml/src/ggml-openvino/ggml-quant.cpp rename to ggml/src/ggml-openvino/ggml-quants.cpp index 14ef58a3f7..8d4fb14189 100644 --- a/ggml/src/ggml-openvino/ggml-quant.cpp +++ b/ggml/src/ggml-openvino/ggml-quants.cpp @@ -1,4 +1,4 @@ -#include "ggml-quant.hpp" +#include "ggml-quants.hpp" #include #include @@ -75,11 +75,11 @@ void extract_q8_0_data(const ggml_tensor* tensor, auto weights = static_cast(weights_arr.data()); auto scales = scales_arr.data::value_type>(); auto biases = biases_arr.data::value_type>(); - for (int64_t i = 0; i < scales_arr.get_size(); i++) { + for (size_t i = 0; i < scales_arr.get_size(); i++) { uint8_t* block_data = data + i * bytes_per_block; scales[i] = ov::float16::from_bits(*(uint16_t*)block_data); biases[i] = ov::float16(-128.f * static_cast(scales[i])); - for (int64_t j = 0; j < weights_per_block; ++j) { + for (size_t j = 0; j < weights_per_block; ++j) { uint8_t x = block_data[j + 2]; // j+2 to skip the scale bytes. // Original data is in int8_t, so we add a bias of -128 and invert the // first bit. @@ -128,7 +128,7 @@ void extract_q4_k_data(const ggml_tensor* tensor, // Extract qs1 and qs2 uint8_t* qs1 = block_data + 4; - uint8_t* qs2 = block_data + 16; + // uint8_t* qs2 = block_data + 16; scales[i * 8] = ov::float16(scale_scales * static_cast((*(qs1) & 0b111111))); scales[i * 8 + 1] = ov::float16(scale_scales * static_cast((*(qs1 + 1) & 0b111111))); @@ -170,7 +170,7 @@ void extract_q6_k_data(const ggml_tensor* tensor, auto scales = scales_arr.data::value_type>(); auto biases = biases_arr.data::value_type>(); // std::string name(tensor.name, tensor.namelen); - for (int64_t i = 0; i < n_super_block; i++) { + for (size_t i = 0; i < n_super_block; i++) { uint8_t* block_data = data + i * bytes_per_block; float scale_factor = diff --git a/ggml/src/ggml-openvino/ggml-quant.hpp b/ggml/src/ggml-openvino/ggml-quants.hpp similarity index 100% rename from ggml/src/ggml-openvino/ggml-quant.hpp rename to ggml/src/ggml-openvino/ggml-quants.hpp