Fix 1.3.2 compilation
This commit is contained in:
parent
648e9fbb58
commit
2a974f5ca2
@ -85,7 +85,6 @@ struct KernelArgs {
|
|||||||
T * __restrict__ ThisOutput;
|
T * __restrict__ ThisOutput;
|
||||||
|
|
||||||
DevRing<char>* ring;
|
DevRing<char>* ring;
|
||||||
int nRings;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -93,21 +92,19 @@ void ArgsSetup(KernelArgs<T> *args, const void* sendbuff, void* recvbuff,
|
|||||||
const int root, const int count, ncclComm *comm) {
|
const int root, const int count, ncclComm *comm) {
|
||||||
args->nRanks = comm->nRanks;
|
args->nRanks = comm->nRanks;
|
||||||
args->root = root;
|
args->root = root;
|
||||||
args->buffSize = comm->buffSizePerRing;
|
args->buffSize = comm->buffSize;
|
||||||
args->N = count;
|
args->N = count;
|
||||||
args->opIndex = comm->opSched;
|
args->opIndex = comm->opSched;
|
||||||
args->opCounter = comm->opCounter;
|
args->opCounter = comm->opCounter;
|
||||||
args->doneCount = &comm->devMem->doneCount;
|
|
||||||
args->ThisInput = (const T*)sendbuff;
|
args->ThisInput = (const T*)sendbuff;
|
||||||
args->ThisOutput = (T*)recvbuff;
|
args->ThisOutput = (T*)recvbuff;
|
||||||
args->ring = comm->devRing;
|
args->ring = comm->devRing;
|
||||||
args->pushrecv = comm->globalMemSpace;
|
args->pushrecv = comm->globalMemSpace;
|
||||||
args->nRings = comm->nRings;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LAUNCH_KERNEL(K, THREADS, UNROLL, FUNC, T, \
|
#define LAUNCH_KERNEL(K, THREADS, UNROLL, FUNC, T, \
|
||||||
args, stream) do { \
|
args, stream) do { \
|
||||||
dim3 grid(args.nRings, 1, 1); \
|
dim3 grid(1, 1, 1); \
|
||||||
dim3 block(THREADS+1, 1, 1); \
|
dim3 block(THREADS+1, 1, 1); \
|
||||||
void* argptrs[] = {&args}; \
|
void* argptrs[] = {&args}; \
|
||||||
CUDACHECK(cudaLaunchKernel( \
|
CUDACHECK(cudaLaunchKernel( \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user