diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index ffd69ad74b..a3574e914f 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -12,7 +12,9 @@ #include "ggml-backend-impl.h" #include "ggml-alloc.h" #include "ggml-cpu.h" -#include "ggml-cuda.h" // TODO add IFDEFs for CUDA-specific parts +#ifdef GGML_CUDA +#include "ggml-cuda.h" +#endif // GGML_CUDA #include "ggml-impl.h" #include @@ -740,12 +742,15 @@ struct ggml_backend_sched { static void ggml_backend_synchronize_if_required(ggml_backend_t current_backend) { // TODO add env-flag check here to auto-disable this change + +#ifdef GGML_CUDA // CUDA backends have an implicit order between execution and memory operations via the CUDA stream. // Multiple parallel copies are also possible. // There is consequently no need to synchronize in between computation and subsequent memcpys if (ggml_backend_is_cuda(current_backend)) { return; } +#endif // GGML_CUDA // in all other cases, just sync. ggml_backend_synchronize(current_backend);