diff --git a/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp b/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp
index e559cfdb28..4a13f3ec05 100644
--- a/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp
+++ b/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp
@@ -283,8 +283,11 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
                  qnn::get_backend_desc(dev_ctx->device));
         dev_ctx->description = buffer;
     }
+
+#ifdef GGML_HEXAGON_ENABLE_QUANTIZED_TENSORS
     // TODO: remove npu from here if hardware quantization is supported
     dev_ctx->enable_cpu_dequantize = device == QNN_BACKEND_CPU;
+#endif
 
     ggml_backend_t qnn_backend = new ggml_backend{
         /* .guid      = */ ggml_backend_qnn_guid(),
diff --git a/ggml/src/ggml-qnn/qnn/qnn-lib.cpp b/ggml/src/ggml-qnn/qnn/qnn-lib.cpp
index e32bab5f92..7dbcaf968e 100644
--- a/ggml/src/ggml-qnn/qnn/qnn-lib.cpp
+++ b/ggml/src/ggml-qnn/qnn/qnn-lib.cpp
@@ -38,7 +38,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
      // all quantized types can be offload to CPU, at current implementation, those types will be dequantized into float32 on cpu
         0xFFFFFE,
 #else
-        0,
+        (1L << GGML_TYPE_F32),
 #endif
 
      0,                                                                     // 0 for no limitation
@@ -50,7 +50,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
      // all quantized types can be offload to GPU, at current implementation, those types will be dequantized into float32 on cpu
         0xFFFFFE,
 #else
-        0,
+        (1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
 #endif
      (128256L * 4096 *
          sizeof(float)), // tested on 8 gen 2, failed to allocate tensor with size 128256x4096 and float32
@@ -62,7 +62,7 @@ constexpr const qnn::device_caps kDeviceCaps[] = {
      (1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16) | (1L << GGML_TYPE_I16),
      (1L << GGML_TYPE_Q2_K) | (1L << GGML_TYPE_Q3_K) | (1L << GGML_TYPE_Q4_K) | (1L << GGML_TYPE_Q8_K),
 #else
-        0,
+        (1L << GGML_TYPE_F32) | (1L << GGML_TYPE_F16),
 #endif
      (8192L * 2048 + 8192 * 512 + 2048 * 512) * sizeof(float),  // TODO: should have a better way to get this value
     },