Moved tests to separate dir and improved MPI test
test sources moved to test/ directory. MPI test displays PASS/FAIL and returns code accordingly. Change-Id: I058ebd1bd5202d8f38cc9787898b2480100c102b Reviewed-on: http://git-master/r/936086 Reviewed-by: Przemek Tredak <ptredak@nvidia.com> Tested-by: Przemek Tredak <ptredak@nvidia.com>
This commit is contained in:
parent
5966316771
commit
c05312f151
19
Makefile
19
Makefile
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
@ -49,6 +49,7 @@ endif
|
||||
|
||||
LDFLAGS := -L$(CUDA_HOME)/lib64 -lcudart
|
||||
MPIFLAGS := -I$(MPI_HOME)/include -L$(MPI_HOME)/lib -lmpi
|
||||
TSTINC := -Ibuild/include -Itest/include
|
||||
|
||||
.PHONY : lib clean test mpitest install
|
||||
.DEFAULT : lib
|
||||
@ -65,8 +66,8 @@ MPITESTS := mpi_test
|
||||
INCDIR := $(BUILDDIR)/include
|
||||
LIBDIR := $(BUILDDIR)/lib
|
||||
OBJDIR := $(BUILDDIR)/obj
|
||||
TSTDIR := $(BUILDDIR)/test
|
||||
MPITSTDIR := $(BUILDDIR)/mpitest
|
||||
TSTDIR := $(BUILDDIR)/test/single
|
||||
MPITSTDIR := $(BUILDDIR)/test/mpi
|
||||
|
||||
INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS))
|
||||
LIBSONAME := $(patsubst %,%.$(VER_MAJOR),$(LIBNAME))
|
||||
@ -108,11 +109,11 @@ clean :
|
||||
|
||||
test : lib $(TESTBINS)
|
||||
|
||||
$(TSTDIR)/% : src/%.cu lib
|
||||
$(TSTDIR)/% : test/single/%.cu lib
|
||||
@printf "Building %-25s > %-24s\n" $< $@
|
||||
@mkdir -p $(TSTDIR)
|
||||
@$(NVCC) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt
|
||||
@$(NVCC) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp)
|
||||
@$(NVCC) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt
|
||||
@$(NVCC) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp)
|
||||
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
|
||||
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
|
||||
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
|
||||
@ -120,11 +121,11 @@ $(TSTDIR)/% : src/%.cu lib
|
||||
|
||||
mpitest : lib $(MPITESTBINS)
|
||||
|
||||
$(MPITSTDIR)/% : src/%.cu lib
|
||||
$(MPITSTDIR)/% : test/mpi/%.cu lib
|
||||
@printf "Building %-25s > %-24s\n" $< $@
|
||||
@mkdir -p $(MPITSTDIR)
|
||||
@$(NVCC) $(MPIFLAGS) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS)
|
||||
@$(NVCC) $(MPIFLAGS) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) > $(@:%=%.d.tmp)
|
||||
@$(NVCC) $(MPIFLAGS) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS)
|
||||
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) > $(@:%=%.d.tmp)
|
||||
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
|
||||
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
|
||||
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
|
||||
|
@ -28,19 +28,23 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "nccl.h"
|
||||
#include "mpi.h"
|
||||
|
||||
#define CUDACHECK(cmd) do { \
|
||||
cudaError_t e = cmd; \
|
||||
if( e != cudaSuccess ) { \
|
||||
printf("Cuda failure %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,cudaGetErrorString(e)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
cudaError_t e = cmd; \
|
||||
if( e != cudaSuccess ) { \
|
||||
printf("Cuda failure %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,cudaGetErrorString(e)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while(false)
|
||||
|
||||
#define SIZE 128
|
||||
#define NITERS 1
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
ncclUniqueId commId;
|
||||
int size, rank;
|
||||
@ -50,14 +54,18 @@ int main(int argc, char *argv[]) {
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
|
||||
if (argc < size) {
|
||||
printf("Usage : %s <GPU list per rank>\n", argv[0]);
|
||||
}
|
||||
|
||||
int gpu = atoi(argv[rank+1]);
|
||||
printf("MPI Rank %d running on GPU %d\n", rank, gpu);
|
||||
|
||||
// We have to set our device before NCCL init
|
||||
CUDACHECK(cudaSetDevice(gpu));
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// NCCL Communicator creation
|
||||
ncclComm_t comm;
|
||||
// Let's use rank 0 PID as job ID
|
||||
ncclGetUniqueId(&commId);
|
||||
MPI_Bcast(&commId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, MPI_COMM_WORLD);
|
||||
ret = ncclCommInitRank(&comm, size, commId, rank);
|
||||
@ -66,18 +74,48 @@ int main(int argc, char *argv[]) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// CUDA stream creation
|
||||
cudaStream_t stream;
|
||||
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
||||
|
||||
// Initialize input values
|
||||
int *dptr;
|
||||
CUDACHECK(cudaMalloc(&dptr, 1024*2*sizeof(int)));
|
||||
int val = rank;
|
||||
CUDACHECK(cudaMemcpy(dptr, &val, sizeof(int), cudaMemcpyHostToDevice));
|
||||
CUDACHECK(cudaMalloc(&dptr, SIZE*2*sizeof(int)));
|
||||
int *val = (int*) malloc(SIZE*sizeof(int));
|
||||
for (int v=0; v<SIZE; v++) {
|
||||
val[v] = rank + 1;
|
||||
}
|
||||
CUDACHECK(cudaMemcpy(dptr, val, SIZE*sizeof(int), cudaMemcpyHostToDevice));
|
||||
|
||||
ncclAllReduce((const void*)dptr, (void*)(dptr+1024), 1024, ncclInt, ncclSum, comm, cudaStreamDefault);
|
||||
// Compute final value
|
||||
int ref = size*(size+1)/2;
|
||||
|
||||
CUDACHECK(cudaMemcpy(&val, (dptr+1024), sizeof(int), cudaMemcpyDeviceToHost));
|
||||
printf("Sum is %d\n", val);
|
||||
// Run allreduce
|
||||
int errors = 0;
|
||||
for (int i=0; i<NITERS; i++) {
|
||||
ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream);
|
||||
}
|
||||
|
||||
// Check results
|
||||
cudaStreamSynchronize(stream);
|
||||
CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost));
|
||||
for (int v=0; v<SIZE; v++) {
|
||||
if (val[v] != ref) {
|
||||
errors++;
|
||||
printf("[%d] Error at %d : got %d instead of %d\n", rank, v, val[v], ref);
|
||||
}
|
||||
}
|
||||
CUDACHECK(cudaFree(dptr));
|
||||
|
||||
MPI_Allreduce(MPI_IN_PLACE, &errors, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
|
||||
if (rank == 0) {
|
||||
if (errors)
|
||||
printf("%d errors. Test FAILED.\n", errors);
|
||||
else
|
||||
printf("Test PASSED.\n");
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
ncclCommDestroy(comm);
|
||||
return 0;
|
||||
return errors ? 1 : 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user