diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index af021984cc..ffa219f167 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -14,6 +14,32 @@
 #include <limits>
 #include <stdexcept>
 
+static bool model_supports_compute_type(enum llm_arch arch, ggml_type compute_type) {
+    // F32 is always supported - it's the default/safe precision
+    if (compute_type == GGML_TYPE_F32) {
+        return true;
+    }
+
+    // Nowadays FP16 and BF16 support is model-specific.
+    // Add models here as their required ops are 'compute_type' implemented and validated.
+    // Example (uncomment when ready):
+    // if (compute_type == GGML_TYPE_F16 || compute_type == GGML_TYPE_BF16) {
+    //     switch (arch) {
+    //         case LLM_ARCH_QWEN2:
+    //         case LLM_ARCH_QWEN2MOE:
+    //         case LLM_ARCH_QWEN3:
+    //         // ... other validated models
+    //             return true;
+    //         default:
+    //             return false;
+    //     }
+    // }
+
+    // No models enabled yet for non-F32 compute types
+    (void)arch;
+    return false;
+}
+
 //
 // llama_context
 //
@@ -166,6 +192,17 @@ llama_context::llama_context(
             break;
     }
 
+    // check if the model supports the requested compute type
+    if (cparams.compute_type != GGML_TYPE_F32) {
+        if (!model_supports_compute_type(model.arch, cparams.compute_type)) {
+            LLAMA_LOG_WARN("%s: model arch '%s' does not yet support compute_type %s, "
+                           "falling back to F32. To enable, the required ops must be implemented first.\n",
+                           __func__, llm_arch_name(model.arch),
+                           ggml_type_name(cparams.compute_type));
+            cparams.compute_type = GGML_TYPE_F32;
+        }
+    }
+
     // with causal attention, the batch size is limited by the context size
     cparams.n_batch = cparams.causal_attn ? std::min(cparams.n_ctx, params.n_batch) : params.n_batch;