gguf-hash: add RVV tensor hashing using xxh3

Signed-off-by: Wang Yang <yangwang@iscas.ac.cn>
This commit is contained in:
ixgbe 2026-01-04 10:04:04 +08:00
parent cef1d23c5a
commit 32a347340a
4 changed files with 248 additions and 7 deletions

View File

@ -20,3 +20,11 @@ target_link_libraries(${TARGET} PRIVATE sha256)
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
if (GGML_RVV)
message(STATUS "gguf-hash: RISC-V Vector support enabled")
target_compile_options(xxhash PRIVATE -march=rv64gcv -mabi=lp64d)
target_compile_options(${TARGET} PRIVATE -march=rv64gcv -mabi=lp64d)
endif()
endif()

View File

@ -8,6 +8,7 @@ CLI to hash GGUF files to detect difference on a per model and per tensor level.
- `--help`: display help message
- `--xxh64`: use xhash 64bit hash mode (default)
- `--sha1`: use sha1
- `--xxh3`: use xxh3
- `--uuid`: use uuid
- `--sha256`: use sha256
- `--all`: use all hash

135
examples/gguf-hash/deps/xxhash/xxhash.h Normal file → Executable file
View File

@ -1092,6 +1092,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const
* - WebAssembly SIMD128
* - POWER8 VSX
* - s390x ZVector
* - RVV
* This can be controlled via the @ref XXH_VECTOR macro, but it automatically
* selects the best version according to predefined macros. For the x86 family, an
* automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
@ -3751,6 +3752,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
# include <immintrin.h>
# elif defined(__SSE2__)
# include <emmintrin.h>
# elif defined(__riscv_v)
# include <riscv_vector.h>
# endif
#endif
@ -3873,6 +3876,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
*/
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
XXH_RVV = 7,
};
/*!
* @ingroup tuning
@ -3895,6 +3899,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
# define XXH_NEON 4
# define XXH_VSX 5
# define XXH_SVE 6
# define XXH_RVV 7
#endif
#ifndef XXH_VECTOR /* can be defined on command line */
@ -3919,6 +3924,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|| (defined(__s390x__) && defined(__VEC__)) \
&& defined(__GNUC__) /* TODO: IBM XL */
# define XXH_VECTOR XXH_VSX
# elif defined(__riscv_v)
# define XXH_VECTOR XXH_RVV
# else
# define XXH_VECTOR XXH_SCALAR
# endif
@ -3935,6 +3942,12 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
# define XXH_VECTOR XXH_SCALAR
#endif
#if (XXH_VECTOR == XXH_RVV) && !defined(__riscv_v)
# warning "__riscv_v isn't supported. Use SCALAR instead."
# undef XXH_VECTOR
# define XXH_VECTOR XXH_SCALAR
#endif
/*
* Controls the alignment of the accumulator,
* for compatibility with aligned vector loads, which are usually faster.
@ -3956,6 +3969,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
# define XXH_ACC_ALIGN 64
# elif XXH_VECTOR == XXH_SVE /* sve */
# define XXH_ACC_ALIGN 64
# elif XXH_VECTOR == XXH_RVV /* rvv */
# define XXH_ACC_ALIGN 64
# endif
#endif
@ -3964,6 +3979,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
#elif XXH_VECTOR == XXH_SVE
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
#elif XXH_VECTOR == XXH_RVV
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
#else
# define XXH_SEC_ALIGN 8
#endif
@ -5626,6 +5643,117 @@ XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
}
#endif
#if (XXH_VECTOR == XXH_RVV)
XXH_FORCE_INLINE void
XXH3_accumulate_512_rvv(void* XXH_RESTRICT acc,
const void* XXH_RESTRICT input,
const void* XXH_RESTRICT secret)
{
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_input[XXH_ACC_NB];
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_secret[XXH_ACC_NB];
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_acc[XXH_ACC_NB];
memcpy(aligned_input, input, XXH_STRIPE_LEN);
memcpy(aligned_secret, secret, XXH_STRIPE_LEN);
memcpy(aligned_acc, acc, XXH_STRIPE_LEN);
size_t vl = __riscv_vsetvl_e64m8(8);
vuint64m8_t data_vec = __riscv_vle64_v_u64m8(aligned_input, vl);
vuint64m8_t key_vec = __riscv_vle64_v_u64m8(aligned_secret, vl);
vuint64m8_t acc_vec = __riscv_vle64_v_u64m8(aligned_acc, vl);
vuint64m8_t data_key = __riscv_vxor_vv_u64m8(data_vec, key_vec, vl);
vuint64m8_t data_key_lo = __riscv_vand_vx_u64m8(data_key, 0xFFFFFFFFULL, vl);
vuint64m8_t data_key_hi = __riscv_vsrl_vx_u64m8(data_key, 32, vl);
vuint64m8_t idx = __riscv_vxor_vx_u64m8(__riscv_vid_v_u64m8(vl), 1, vl);
vuint64m8_t data_swap = __riscv_vrgather_vv_u64m8(data_vec, idx, vl);
acc_vec = __riscv_vadd_vv_u64m8(acc_vec, data_swap, vl);
acc_vec = __riscv_vmacc_vv_u64m8(acc_vec, data_key_lo, data_key_hi, vl);
__riscv_vse64_v_u64m8(aligned_acc, acc_vec, vl);
memcpy(acc, aligned_acc, XXH_STRIPE_LEN);
}
XXH_FORCE_INLINE void
XXH3_accumulate_rvv(xxh_u64* XXH_RESTRICT acc,
const xxh_u8* XXH_RESTRICT input,
const xxh_u8* XXH_RESTRICT secret,
size_t nbStripes)
{
if (nbStripes == 0) return;
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_acc[XXH_ACC_NB];
memcpy(aligned_acc, acc, XXH_STRIPE_LEN);
const uint64_t* xinput = (const uint64_t*) (const void*) input;
const uint64_t* xsecret = (const uint64_t*) (const void*) secret;
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_input[XXH_ACC_NB];
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_secret[XXH_ACC_NB];
size_t vl = __riscv_vsetvl_e64m8(8);
vuint64m8_t vacc = __riscv_vle64_v_u64m8(aligned_acc, vl);
vuint64m8_t idx = __riscv_vxor_vx_u64m8(__riscv_vid_v_u64m8(vl), 1, vl);
do {
XXH_PREFETCH((const xxh_u8*)xinput + XXH_PREFETCH_DIST);
memcpy(aligned_input, xinput, XXH_STRIPE_LEN);
memcpy(aligned_secret, xsecret, XXH_STRIPE_LEN);
vuint64m8_t data_vec = __riscv_vle64_v_u64m8(aligned_input, vl);
vuint64m8_t key_vec = __riscv_vle64_v_u64m8(aligned_secret, vl);
vuint64m8_t data_key = __riscv_vxor_vv_u64m8(data_vec, key_vec, vl);
vuint64m8_t data_key_lo = __riscv_vand_vx_u64m8(data_key, 0xFFFFFFFFULL, vl);
vuint64m8_t data_key_hi = __riscv_vsrl_vx_u64m8(data_key, 32, vl);
vuint64m8_t data_swap = __riscv_vrgather_vv_u64m8(data_vec, idx, vl);
vacc = __riscv_vadd_vv_u64m8(vacc, data_swap, vl);
vacc = __riscv_vmacc_vv_u64m8(vacc, data_key_lo, data_key_hi, vl);
xinput += 8;
xsecret += 1;
nbStripes--;
} while (nbStripes > 0);
__riscv_vse64_v_u64m8(aligned_acc, vacc, vl);
memcpy(acc, aligned_acc, XXH_STRIPE_LEN);
}
XXH_FORCE_INLINE void
XXH3_scrambleAcc_rvv(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
{
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_acc[XXH_ACC_NB];
XXH_ALIGN(XXH_ACC_ALIGN) uint64_t aligned_secret[XXH_ACC_NB];
memcpy(aligned_acc, acc, XXH_STRIPE_LEN);
memcpy(aligned_secret, secret, XXH_STRIPE_LEN);
size_t vl = __riscv_vsetvl_e64m8(8);
vuint64m8_t acc_vec = __riscv_vle64_v_u64m8(aligned_acc, vl);
vuint64m8_t key_vec = __riscv_vle64_v_u64m8(aligned_secret, vl);
vuint64m8_t shifted = __riscv_vsrl_vx_u64m8(acc_vec, 47, vl);
vuint64m8_t data_vec = __riscv_vxor_vv_u64m8(acc_vec, shifted, vl);
vuint64m8_t data_key = __riscv_vxor_vv_u64m8(data_vec, key_vec, vl);
acc_vec = __riscv_vmul_vx_u64m8(data_key, XXH_PRIME32_1, vl);
__riscv_vse64_v_u64m8(aligned_acc, acc_vec, vl);
memcpy(acc, aligned_acc, XXH_STRIPE_LEN);
}
#endif // XXH_VECTOR == XXH_RVV
/*!
* @internal
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
@ -5823,8 +5951,13 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
#else /* scalar */
#elif (XXH_VECTOR == XXH_RVV)
#define XXH3_accumulate_512 XXH3_accumulate_512_rvv
#define XXH3_accumulate XXH3_accumulate_rvv
#define XXH3_scrambleAcc XXH3_scrambleAcc_rvv
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
#else /* scalar */
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
#define XXH3_accumulate XXH3_accumulate_scalar
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar

111
examples/gguf-hash/gguf-hash.cpp Normal file → Executable file
View File

@ -33,6 +33,7 @@ extern "C" {
#define HASH_TYPE_SHA256_STR "sha256"
#define HASH_TYPE_SHA1_STR "sha1"
#define HASH_TYPE_XXH64_STR "xxh64"
#define HASH_TYPE_XXH3_STR "xxh3"
#define HASH_TYPE_UUID_STR "uuid"
@ -55,10 +56,11 @@ typedef enum {
struct hash_params {
std::string input;
bool xxh64 = false;
bool sha1 = false;
bool xxh64 = false;
bool xxh3 = false;
bool sha1 = false;
bool sha256 = false;
bool uuid = false;
bool uuid = false;
bool no_layer = false;
@ -68,6 +70,7 @@ struct hash_params {
struct manifest_check_params {
bool xxh64 = false;
bool xxh3 = false;
bool sha1 = false;
bool sha256 = false;
bool uuid = false;
@ -104,6 +107,7 @@ static void hash_print_usage(const char * executable) {
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" --xxh64 use xxh64 hash\n");
printf(" --xxh3 use xxh3 hash\n");
printf(" --sha1 use sha1 hash\n");
printf(" --sha256 use sha256 hash\n");
printf(" --all use all hash\n");
@ -136,6 +140,11 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
params.xxh64 = true;
}
if (arg == "--xxh3") {
arg_found = true;
params.xxh3 = true;
}
if (arg == "--sha1") {
arg_found = true;
params.sha1 = true;
@ -156,6 +165,7 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
params.sha256 = true;
params.sha1 = true;
params.xxh64 = true;
params.xxh3 = true;
}
if (arg == "--no-layer") {
@ -224,6 +234,8 @@ static bool manifest_type(const std::string & manifest_file, manifest_check_para
manifest_check.sha1 = true;
} else if (file_hash_type == HASH_TYPE_XXH64_STR) {
manifest_check.xxh64 = true;
} else if (file_hash_type == HASH_TYPE_XXH3_STR) {
manifest_check.xxh3 = true;
} else if (file_hash_type == HASH_TYPE_UUID_STR) {
manifest_check.uuid = true;
}
@ -306,6 +318,19 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
}
}
// xxh3 init
XXH3_state_t* xxh3_model_hash_state = NULL;
if (hash_params.xxh3) {
xxh3_model_hash_state = XXH3_createState();
if (xxh3_model_hash_state == NULL) {
abort();
}
if (XXH3_64bits_reset(xxh3_model_hash_state) == XXH_ERROR) {
abort();
}
}
// sha1 init
SHA1_CTX sha1_model_hash_ctx;
if (hash_params.sha1) {
@ -376,6 +401,44 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
}
if (hash_params.xxh3) {
if (!hash_params.no_layer) {
// Per Layer Hash
XXH64_hash_t hash = XXH3_64bits(raw_data, n_bytes);
char hex_result[17];
for (int offset = 0; offset < 8; offset++) {
unsigned int shift_bits_by = (8 * (8 - offset - 1));
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH3_STR, hex_result, tensor_layer_name);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
tensor_layer_in_manifest = true;
tensor_layer_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
tensor_layer_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH3_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_XXH3_STR, hex_result, tensor_layer_name.c_str());
}
}
// Overall Model Hash
if (XXH3_64bits_update(xxh3_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
}
if (hash_params.sha1) {
if (!hash_params.no_layer) {
@ -485,6 +548,36 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
}
}
if (hash_params.xxh3) {
XXH64_hash_t const hash = XXH3_64bits_digest(xxh3_model_hash_state);
char hex_result[17];
for (int offset = 0; offset < 8; offset++) {
unsigned int shift_bits_by = (8 * (8 - offset - 1));
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
}
if (hash_params.manifest_is_usable) {
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH3_STR, hex_result, fname);
switch (verify_result) {
case HASH_MANIFEST_NOT_FOUND:
break;
case HASH_MANIFEST_MISMATCH:
model_in_manifest = true;
model_has_mismatch = true;
break;
case HASH_MANIFEST_OK:
model_in_manifest = true;
break;
}
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH3_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
} else {
printf("%-8s %-s %s\n", HASH_TYPE_XXH3_STR, hex_result, fname.c_str());
}
}
if (hash_params.sha1) {
unsigned char result[21];
SHA1Final(result, &sha1_model_hash_ctx);
@ -636,7 +729,7 @@ int main(int argc, const char ** argv) {
return HASH_EXIT_MANIFEST_FILE_ERROR;
}
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.xxh3 && !manifest_check.uuid) {
printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
}
@ -655,6 +748,10 @@ int main(int argc, const char ** argv) {
printf(" xxh64");
}
if (manifest_check.xxh3) {
printf(" xxh3");
}
if (manifest_check.uuid) {
printf(" uuid");
}
@ -663,7 +760,7 @@ int main(int argc, const char ** argv) {
// Autoselect the highest security hash if manifest is provided but
// the user has not specifically defined the hash they care about
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
if (!params.xxh64 && !params.xxh3 && !params.sha1 && !params.uuid && !params.sha256) {
// User has not selected a specific value, pick most secure hash
if (manifest_check.sha256) {
params.sha256 = true;
@ -671,6 +768,8 @@ int main(int argc, const char ** argv) {
params.sha1 = true;
} else if (manifest_check.xxh64) {
params.xxh64 = true;
} else if (manifest_check.xxh3) {
params.xxh3 = true;
} else if (manifest_check.uuid) {
params.uuid = true;
}
@ -680,7 +779,7 @@ int main(int argc, const char ** argv) {
}
// By default if no swich argument provided, assume xxh64
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
if (!params.xxh64 && !params.xxh3 && !params.sha1 && !params.uuid && !params.sha256) {
params.xxh64 = true;
}