npu perf fix
This commit is contained in:
parent
981ec6571d
commit
a40a5dfc60
|
|
@ -165,7 +165,7 @@ clEnqueueMemcpyINTEL_fn ggml_openvino_get_clEnqueueMemcpyINTEL() {
|
|||
// Get requantization type for a tensor type (returns nullopt if no requant needed)
|
||||
std::optional<ExtraQuantType> ggml_openvino_get_requant_type(const ggml_tensor * tensor) {
|
||||
if (strncmp(tensor->name, "token_embd.weight", 17) == 0) {
|
||||
return ExtraQuantType::Q8_0_C;
|
||||
return (ggml_openvino_is_npu() ? ExtraQuantType::F16 : ExtraQuantType::Q8_0_C);
|
||||
}
|
||||
if (strncmp(tensor->name, "output.weight", 13) == 0) {
|
||||
return ExtraQuantType::Q8_0_C;
|
||||
|
|
|
|||
Loading…
Reference in New Issue