Fix CI; Disable test-backend-ops

This commit is contained in:
Yu, Zijun 2025-08-07 15:22:58 +08:00 committed by Mustafa Cavus
parent 6ab76ed10a
commit dd80b04235
4 changed files with 7 additions and 7 deletions

View File

@ -174,7 +174,7 @@ if [ ! -z ${GG_BUILD_OPENVINO} ]; then
echo "source /opt/intel/openvino/setupvars.sh"
exit 1
fi
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON"
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF"
fi
## helpers

View File

@ -32,7 +32,7 @@
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-quant.hpp"
#include "ggml-quants.hpp"
GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgraph, bool is_static, bool is_first_token,
int context_size, int num_heads, int num_heads_kv, int head_size) :

View File

@ -1,4 +1,4 @@
#include "ggml-quant.hpp"
#include "ggml-quants.hpp"
#include <cstdint>
#include <openvino/core/parallel.hpp>
@ -75,11 +75,11 @@ void extract_q8_0_data(const ggml_tensor* tensor,
auto weights = static_cast<uint8_t*>(weights_arr.data());
auto scales = scales_arr.data<ov::element_type_traits<ov::element::f16>::value_type>();
auto biases = biases_arr.data<ov::element_type_traits<ov::element::f16>::value_type>();
for (int64_t i = 0; i < scales_arr.get_size(); i++) {
for (size_t i = 0; i < scales_arr.get_size(); i++) {
uint8_t* block_data = data + i * bytes_per_block;
scales[i] = ov::float16::from_bits(*(uint16_t*)block_data);
biases[i] = ov::float16(-128.f * static_cast<float>(scales[i]));
for (int64_t j = 0; j < weights_per_block; ++j) {
for (size_t j = 0; j < weights_per_block; ++j) {
uint8_t x = block_data[j + 2]; // j+2 to skip the scale bytes.
// Original data is in int8_t, so we add a bias of -128 and invert the
// first bit.
@ -128,7 +128,7 @@ void extract_q4_k_data(const ggml_tensor* tensor,
// Extract qs1 and qs2
uint8_t* qs1 = block_data + 4;
uint8_t* qs2 = block_data + 16;
// uint8_t* qs2 = block_data + 16;
scales[i * 8] = ov::float16(scale_scales * static_cast<float>((*(qs1) & 0b111111)));
scales[i * 8 + 1] = ov::float16(scale_scales * static_cast<float>((*(qs1 + 1) & 0b111111)));
@ -170,7 +170,7 @@ void extract_q6_k_data(const ggml_tensor* tensor,
auto scales = scales_arr.data<ov::element_type_traits<ov::element::f16>::value_type>();
auto biases = biases_arr.data<ov::element_type_traits<ov::element::f16>::value_type>();
// std::string name(tensor.name, tensor.namelen);
for (int64_t i = 0; i < n_super_block; i++) {
for (size_t i = 0; i < n_super_block; i++) {
uint8_t* block_data = data + i * bytes_per_block;
float scale_factor =