From d08e9b5279133dbcc55195bd2eac2f069dda3e6d Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Thu, 13 Dec 2018 15:59:22 +0100
Subject: [PATCH] Change __CUDACC_VER_*__ preprocessor directives to
 CUDA_VERSION because clang doesn't define the former.

---
 src/include/common_coll.h | 2 +-
 src/include/core.h        | 2 +-
 src/init.cu               | 6 +++---
 src/misc/enqueue.cu       | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/include/common_coll.h b/src/include/common_coll.h
index cd97bbd..3f3bffe 100644
--- a/src/include/common_coll.h
+++ b/src/include/common_coll.h
@@ -18,7 +18,7 @@ static ncclResult_t PointerCheck(const void* pointer, struct ncclComm* comm, con
     WARN("%s : %s is not a valid pointer", opname, ptrname);
     return ncclInvalidArgument;
   }
-#if __CUDACC_VER_MAJOR__ >= 10
+#if CUDA_VERSION >= 10000
   if (attr.type == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
 #else
   if (attr.memoryType == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
diff --git a/src/include/core.h b/src/include/core.h
index 25744e3..9fd5e0e 100644
--- a/src/include/core.h
+++ b/src/include/core.h
@@ -18,7 +18,7 @@
 #include <stdlib.h>
 #include <cuda_runtime.h>
 
-#if __CUDACC_VER_MAJOR__ < 9
+#if CUDA_VERSION < 9000
 struct cudaLaunchParams {
   void *func;
   dim3 gridDim;
diff --git a/src/init.cu b/src/init.cu
index db522b8..5f92de4 100644
--- a/src/init.cu
+++ b/src/init.cu
@@ -42,7 +42,7 @@ FILE *ncclDebugFile = stdout;
 std::chrono::high_resolution_clock::time_point ncclEpoch;
 #endif
 
-#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)
+#if CUDA_VERSION >= 9200
 #define NCCL_GROUP_CUDA_STREAM 0 // CGMD: CUDA 9.2,10.X Don't need to use an internal CUDA stream
 #else
 #define NCCL_GROUP_CUDA_STREAM 1 // CGMD: CUDA 9.0,9.1 Need to use an internal CUDA stream
@@ -229,7 +229,7 @@ static ncclResult_t commAlloc(ncclComm_t* comret, int ndev, int rank) {
   comm->doneEvent = doneEvent;
   comm->llThreshold = ncclParamLlThreshold();
   comm->checkPointers = ncclParamCheckPointers() == 1 ? true : false;
-#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)
+#if CUDA_VERSION >= 9200
   comm->groupCudaStream = ncclParamGroupCudaStream();
 #else
   // Don't allow the user to overload the default setting in older CUDA builds
@@ -473,7 +473,7 @@ ncclResult_t ncclCommSetIntra(struct ncclComm* comm, int rank, int ranks, struct
   }
   if (comm->launchMode == ncclComm::GROUP) {
     CUDACHECK(cudaStreamCreateWithFlags(&comm->groupStream, cudaStreamNonBlocking));
-#if __CUDACC_VER_MAJOR__ >= 9
+#if CUDA_VERSION >= 9000
     if (*comm->intraCC && (ncclCudaFullCompCap() == *comm->intraCC)) {
       // Check whether the GPU supports Cooperative Group Multi Device Launch
       (void) cudaDeviceGetAttribute(&cgMdLaunch, cudaDevAttrCooperativeMultiDeviceLaunch, comm->cudaDev);
diff --git a/src/misc/enqueue.cu b/src/misc/enqueue.cu
index 6c6e2ce..3335fa0 100644
--- a/src/misc/enqueue.cu
+++ b/src/misc/enqueue.cu
@@ -58,7 +58,7 @@ static void* const ncclKerns[ncclCollCount*ncclNumOps*ncclNumTypes*2] = {
 };
 
 ncclResult_t ncclLaunchCooperativeKernelMultiDevice(struct cudaLaunchParams *paramsList, int* cudaDevs, int numDevices, int cgMode) {
-#if __CUDACC_VER_MAJOR__ >= 9
+#if CUDA_VERSION >= 9000
   if (cgMode & 0x01) {
     CUDACHECK(cudaLaunchCooperativeKernelMultiDevice(paramsList, numDevices,
             // These flags are to reduce the latency of using this API