/************************************************************************* * Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. * * See LICENCE.txt for license information ************************************************************************/ #include #include #include #include "nccl.h" #include "mpi.h" #include "test_utilities.h" #define SIZE 128 #define NITERS 1 int main(int argc, char *argv[]) { ncclUniqueId commId; int size, rank; ncclResult_t ret; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (argc < size) { if (rank == 0) printf("Usage : %s \n", argv[0]); exit(1); } int gpu = atoi(argv[rank+1]); // We have to set our device before NCCL init CUDACHECK(cudaSetDevice(gpu)); MPI_Barrier(MPI_COMM_WORLD); // NCCL Communicator creation ncclComm_t comm; NCCLCHECK(ncclGetUniqueId(&commId)); MPI_Bcast(&commId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, MPI_COMM_WORLD); ret = ncclCommInitRank(&comm, size, commId, rank); if (ret != ncclSuccess) { printf("NCCL Init failed (%d) '%s'\n", ret, ncclGetErrorString(ret)); exit(1); } // CUDA stream creation cudaStream_t stream; CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); // Initialize input values int *dptr; CUDACHECK(cudaMalloc(&dptr, SIZE*2*sizeof(int))); int *val = (int*) malloc(SIZE*sizeof(int)); for (int v=0; v