Moved tests to separate dir and improved MPI test

test sources moved to test/ directory.
MPI test displays PASS/FAIL and returns code accordingly.

Change-Id: I058ebd1bd5202d8f38cc9787898b2480100c102b
Reviewed-on: http://git-master/r/936086
Reviewed-by: Przemek Tredak <ptredak@nvidia.com>
Tested-by: Przemek Tredak <ptredak@nvidia.com>
This commit is contained in:
Sylvain Jeaugey 2016-01-20 14:18:25 -05:00 committed by Przemek Tredak
parent 5966316771
commit c05312f151
8 changed files with 63 additions and 24 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@ -49,6 +49,7 @@ endif
LDFLAGS := -L$(CUDA_HOME)/lib64 -lcudart
MPIFLAGS := -I$(MPI_HOME)/include -L$(MPI_HOME)/lib -lmpi
TSTINC := -Ibuild/include -Itest/include
.PHONY : lib clean test mpitest install
.DEFAULT : lib
@ -65,8 +66,8 @@ MPITESTS := mpi_test
INCDIR := $(BUILDDIR)/include
LIBDIR := $(BUILDDIR)/lib
OBJDIR := $(BUILDDIR)/obj
TSTDIR := $(BUILDDIR)/test
MPITSTDIR := $(BUILDDIR)/mpitest
TSTDIR := $(BUILDDIR)/test/single
MPITSTDIR := $(BUILDDIR)/test/mpi
INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS))
LIBSONAME := $(patsubst %,%.$(VER_MAJOR),$(LIBNAME))
@ -108,11 +109,11 @@ clean :
test : lib $(TESTBINS)
$(TSTDIR)/% : src/%.cu lib
$(TSTDIR)/% : test/single/%.cu lib
@printf "Building %-25s > %-24s\n" $< $@
@mkdir -p $(TSTDIR)
@$(NVCC) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt
@$(NVCC) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp)
@$(NVCC) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt
@$(NVCC) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp)
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
@ -120,11 +121,11 @@ $(TSTDIR)/% : src/%.cu lib
mpitest : lib $(MPITESTBINS)
$(MPITSTDIR)/% : src/%.cu lib
$(MPITSTDIR)/% : test/mpi/%.cu lib
@printf "Building %-25s > %-24s\n" $< $@
@mkdir -p $(MPITSTDIR)
@$(NVCC) $(MPIFLAGS) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS)
@$(NVCC) $(MPIFLAGS) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) > $(@:%=%.d.tmp)
@$(NVCC) $(MPIFLAGS) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS)
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) > $(@:%=%.d.tmp)
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)

View File

@ -28,19 +28,23 @@
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include "nccl.h"
#include "mpi.h"
#define CUDACHECK(cmd) do { \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Cuda failure %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Cuda failure %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(false)
#define SIZE 128
#define NITERS 1
int main(int argc, char *argv[]) {
ncclUniqueId commId;
int size, rank;
@ -50,14 +54,18 @@ int main(int argc, char *argv[]) {
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (argc < size) {
printf("Usage : %s <GPU list per rank>\n", argv[0]);
}
int gpu = atoi(argv[rank+1]);
printf("MPI Rank %d running on GPU %d\n", rank, gpu);
// We have to set our device before NCCL init
CUDACHECK(cudaSetDevice(gpu));
MPI_Barrier(MPI_COMM_WORLD);
// NCCL Communicator creation
ncclComm_t comm;
// Let's use rank 0 PID as job ID
ncclGetUniqueId(&commId);
MPI_Bcast(&commId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, MPI_COMM_WORLD);
ret = ncclCommInitRank(&comm, size, commId, rank);
@ -66,18 +74,48 @@ int main(int argc, char *argv[]) {
exit(1);
}
// CUDA stream creation
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
// Initialize input values
int *dptr;
CUDACHECK(cudaMalloc(&dptr, 1024*2*sizeof(int)));
int val = rank;
CUDACHECK(cudaMemcpy(dptr, &val, sizeof(int), cudaMemcpyHostToDevice));
CUDACHECK(cudaMalloc(&dptr, SIZE*2*sizeof(int)));
int *val = (int*) malloc(SIZE*sizeof(int));
for (int v=0; v<SIZE; v++) {
val[v] = rank + 1;
}
CUDACHECK(cudaMemcpy(dptr, val, SIZE*sizeof(int), cudaMemcpyHostToDevice));
ncclAllReduce((const void*)dptr, (void*)(dptr+1024), 1024, ncclInt, ncclSum, comm, cudaStreamDefault);
// Compute final value
int ref = size*(size+1)/2;
CUDACHECK(cudaMemcpy(&val, (dptr+1024), sizeof(int), cudaMemcpyDeviceToHost));
printf("Sum is %d\n", val);
// Run allreduce
int errors = 0;
for (int i=0; i<NITERS; i++) {
ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream);
}
// Check results
cudaStreamSynchronize(stream);
CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost));
for (int v=0; v<SIZE; v++) {
if (val[v] != ref) {
errors++;
printf("[%d] Error at %d : got %d instead of %d\n", rank, v, val[v], ref);
}
}
CUDACHECK(cudaFree(dptr));
MPI_Allreduce(MPI_IN_PLACE, &errors, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
if (rank == 0) {
if (errors)
printf("%d errors. Test FAILED.\n", errors);
else
printf("Test PASSED.\n");
}
MPI_Finalize();
ncclCommDestroy(comm);
return 0;
return errors ? 1 : 0;
}