Change __CUDACC_VER_*__ preprocessor directives to CUDA_VERSION because clang doesn't define the former.
This commit is contained in:
parent
469b69a5d0
commit
d08e9b5279
@ -18,7 +18,7 @@ static ncclResult_t PointerCheck(const void* pointer, struct ncclComm* comm, con
|
|||||||
WARN("%s : %s is not a valid pointer", opname, ptrname);
|
WARN("%s : %s is not a valid pointer", opname, ptrname);
|
||||||
return ncclInvalidArgument;
|
return ncclInvalidArgument;
|
||||||
}
|
}
|
||||||
#if __CUDACC_VER_MAJOR__ >= 10
|
#if CUDA_VERSION >= 10000
|
||||||
if (attr.type == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
|
if (attr.type == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
|
||||||
#else
|
#else
|
||||||
if (attr.memoryType == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
|
if (attr.memoryType == cudaMemoryTypeDevice && attr.device != comm->cudaDev) {
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
#if __CUDACC_VER_MAJOR__ < 9
|
#if CUDA_VERSION < 9000
|
||||||
struct cudaLaunchParams {
|
struct cudaLaunchParams {
|
||||||
void *func;
|
void *func;
|
||||||
dim3 gridDim;
|
dim3 gridDim;
|
||||||
|
@ -42,7 +42,7 @@ FILE *ncclDebugFile = stdout;
|
|||||||
std::chrono::high_resolution_clock::time_point ncclEpoch;
|
std::chrono::high_resolution_clock::time_point ncclEpoch;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)
|
#if CUDA_VERSION >= 9200
|
||||||
#define NCCL_GROUP_CUDA_STREAM 0 // CGMD: CUDA 9.2,10.X Don't need to use an internal CUDA stream
|
#define NCCL_GROUP_CUDA_STREAM 0 // CGMD: CUDA 9.2,10.X Don't need to use an internal CUDA stream
|
||||||
#else
|
#else
|
||||||
#define NCCL_GROUP_CUDA_STREAM 1 // CGMD: CUDA 9.0,9.1 Need to use an internal CUDA stream
|
#define NCCL_GROUP_CUDA_STREAM 1 // CGMD: CUDA 9.0,9.1 Need to use an internal CUDA stream
|
||||||
@ -229,7 +229,7 @@ static ncclResult_t commAlloc(ncclComm_t* comret, int ndev, int rank) {
|
|||||||
comm->doneEvent = doneEvent;
|
comm->doneEvent = doneEvent;
|
||||||
comm->llThreshold = ncclParamLlThreshold();
|
comm->llThreshold = ncclParamLlThreshold();
|
||||||
comm->checkPointers = ncclParamCheckPointers() == 1 ? true : false;
|
comm->checkPointers = ncclParamCheckPointers() == 1 ? true : false;
|
||||||
#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)
|
#if CUDA_VERSION >= 9200
|
||||||
comm->groupCudaStream = ncclParamGroupCudaStream();
|
comm->groupCudaStream = ncclParamGroupCudaStream();
|
||||||
#else
|
#else
|
||||||
// Don't allow the user to overload the default setting in older CUDA builds
|
// Don't allow the user to overload the default setting in older CUDA builds
|
||||||
@ -473,7 +473,7 @@ ncclResult_t ncclCommSetIntra(struct ncclComm* comm, int rank, int ranks, struct
|
|||||||
}
|
}
|
||||||
if (comm->launchMode == ncclComm::GROUP) {
|
if (comm->launchMode == ncclComm::GROUP) {
|
||||||
CUDACHECK(cudaStreamCreateWithFlags(&comm->groupStream, cudaStreamNonBlocking));
|
CUDACHECK(cudaStreamCreateWithFlags(&comm->groupStream, cudaStreamNonBlocking));
|
||||||
#if __CUDACC_VER_MAJOR__ >= 9
|
#if CUDA_VERSION >= 9000
|
||||||
if (*comm->intraCC && (ncclCudaFullCompCap() == *comm->intraCC)) {
|
if (*comm->intraCC && (ncclCudaFullCompCap() == *comm->intraCC)) {
|
||||||
// Check whether the GPU supports Cooperative Group Multi Device Launch
|
// Check whether the GPU supports Cooperative Group Multi Device Launch
|
||||||
(void) cudaDeviceGetAttribute(&cgMdLaunch, cudaDevAttrCooperativeMultiDeviceLaunch, comm->cudaDev);
|
(void) cudaDeviceGetAttribute(&cgMdLaunch, cudaDevAttrCooperativeMultiDeviceLaunch, comm->cudaDev);
|
||||||
|
@ -58,7 +58,7 @@ static void* const ncclKerns[ncclCollCount*ncclNumOps*ncclNumTypes*2] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
ncclResult_t ncclLaunchCooperativeKernelMultiDevice(struct cudaLaunchParams *paramsList, int* cudaDevs, int numDevices, int cgMode) {
|
ncclResult_t ncclLaunchCooperativeKernelMultiDevice(struct cudaLaunchParams *paramsList, int* cudaDevs, int numDevices, int cgMode) {
|
||||||
#if __CUDACC_VER_MAJOR__ >= 9
|
#if CUDA_VERSION >= 9000
|
||||||
if (cgMode & 0x01) {
|
if (cgMode & 0x01) {
|
||||||
CUDACHECK(cudaLaunchCooperativeKernelMultiDevice(paramsList, numDevices,
|
CUDACHECK(cudaLaunchCooperativeKernelMultiDevice(paramsList, numDevices,
|
||||||
// These flags are to reduce the latency of using this API
|
// These flags are to reduce the latency of using this API
|
||||||
|
Loading…
x
Reference in New Issue
Block a user