diff --git a/Makefile b/Makefile index db697ba..1c9b536 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ PREFIX ?= /usr/local VERBOSE ?= 0 CUDACODE := -gencode=arch=compute_35,code=sm_35 \ + -gencode=arch=compute_50,code=sm_50 \ -gencode=arch=compute_52,code=sm_52 BUILDDIR := build @@ -55,7 +56,9 @@ MPIFLAGS := -I$(MPI_HOME)/include -L$(MPI_HOME)/lib -lmpi INCEXPORTS := nccl.h LIBSRCFILES := libwrap.cu core.cu all_gather.cu all_reduce.cu broadcast.cu reduce.cu reduce_scatter.cu LIBNAME := libnccl.so -APIVER := 0 +VER_MAJOR := 1 +VER_MINOR := 0 +VER_PATCH := 0 TESTS := all_gather_test all_reduce_test broadcast_test reduce_test reduce_scatter_test MPITESTS := mpi_test @@ -66,7 +69,8 @@ TSTDIR := $(BUILDDIR)/test MPITSTDIR := $(BUILDDIR)/mpitest INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS)) -LIBTARGET := $(patsubst %, $(LIBDIR)/%.$(APIVER), $(LIBNAME)) +LIBSONAME := $(patsubst %,%.$(VER_MAJOR),$(LIBNAME)) +LIBTARGET := $(patsubst %,%.$(VER_MAJOR).$(VER_MINOR).$(VER_PATCH),$(LIBNAME)) LIBLINK := $(patsubst lib%.so, -l%, $(LIBNAME)) LIBOBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(filter %.cu, $(LIBSRCFILES))) TESTBINS := $(patsubst %, $(TSTDIR)/%, $(TESTS)) @@ -80,8 +84,9 @@ lib : $(INCTARGETS) $(LIBTARGET) $(LIBTARGET) : $(LIBOBJ) @printf "Linking %-25s\n" $@ @mkdir -p $(LIBDIR) - @$(GPP) $(CPPFLAGS) $(CXXFLAGS) -shared -Wl,-soname,$(LIBNAME).$(APIVER) -o $@ $(LDFLAGS) $(LIBOBJ) - @ln -sf $(LIBNAME).$(APIVER) $(LIBDIR)/$(LIBNAME) + @$(GPP) $(CPPFLAGS) $(CXXFLAGS) -shared -Wl,-soname,$(LIBSONAME) -o $(LIBDIR)/$@ $(LDFLAGS) $(LIBOBJ) + @ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME) + @ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME) $(INCDIR)/%.h : src/%.h @printf "Grabbing %-25s > %-25s\n" $< $@ @@ -106,8 +111,8 @@ test : lib $(TESTBINS) $(TSTDIR)/% : src/%.cu lib @printf "Building %-25s > %-24s\n" $< $@ @mkdir -p $(TSTDIR) - @$(NVCC) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt -lnvidia-ml - @$(NVCC) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt -lnvidia-ml > $(@:%=%.d.tmp) + @$(NVCC) -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt + @$(NVCC) -M -Ibuild/include $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp) @sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d) @sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \ sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d) diff --git a/README.md b/README.md index 7722ce9..289b46e 100644 --- a/README.md +++ b/README.md @@ -36,13 +36,13 @@ Test binaries are located in the subdirectories nccl/build/test and nccl/build/m ```shell $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./build/lib -$ ./build/test/allreduce_test +$ ./build/test/all_reduce_test Error: must specify at least data size in bytes! Tests nccl AllReduce with user supplied arguments. Usage: all_reduce_test [number of GPUs] [GPU 0] [GPU 1] ... -$ ./build/test/allreduce_test 10000000 +$ ./build/test/all_reduce_test 10000000 # Using devices # Device 0 -> 0 [0x0a] GeForce GTX TITAN X # Device 1 -> 1 [0x09] GeForce GTX TITAN X