82 lines
2.4 KiB
Makefile
82 lines
2.4 KiB
Makefile
FC := gfortran
|
|
FCNAME := $(notdir $(FC))
|
|
|
|
BUILDDIR ?= ../build
|
|
INCDIR := $(BUILDDIR)/include
|
|
LIBDIR := $(BUILDDIR)/lib
|
|
OBJDIR := $(BUILDDIR)/obj
|
|
|
|
LIBNAME := libncclfor.so
|
|
LIBSONAME := $(patsubst %,%.$(NCCL_MAJOR),$(LIBNAME))
|
|
LIBTARGET := $(patsubst %,%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH),$(LIBNAME))
|
|
LIBLINK += $(patsubst lib%.so,-l%,$(LIBNAME))
|
|
|
|
LIBCUDAFOR := libcudafor.so
|
|
|
|
ifneq ($(filter pgf%, $(FCNAME)), )
|
|
# PGI compiler (pgfortran, pgf90, pgf95)
|
|
FCMODFLAGS := -module $(INCDIR)
|
|
FCPREFLAGS := -Mpreprocess
|
|
FCCUDAFLAGS := -Mcuda,cuda$(CUDA_MAJOR).$(CUDA_MINOR)
|
|
FCFLAGS := -fast -O3
|
|
else
|
|
# non-PGI compilers do not have CUDA support, compile our own CUDA lib
|
|
CUDAFORDEP := $(LIBDIR)/$(LIBCUDAFOR)
|
|
CUDALINK := -L$(CUDA_LIB) -lcudart
|
|
CUDAFORLINK := -lcudafor
|
|
ifeq ($(FCNAME), gfortran)
|
|
FCMODFLAGS := -J$(INCDIR)
|
|
FCPREFLAGS += -cpp
|
|
FCFLAGS += -ffree-line-length-none
|
|
else ifeq ($(FCNAME), ifort)
|
|
FCMODFLAGS := -module $(INCDIR)
|
|
FCPREFLAGS += -fpp
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(VERBOSE), 0)
|
|
.SILENT:
|
|
endif
|
|
|
|
lib: $(CUDAFORDEP)
|
|
$(MAKE) $(LIBDIR)/$(LIBTARGET)
|
|
|
|
$(LIBDIR)/$(LIBTARGET): $(OBJDIR)/ncclfor.o
|
|
@printf "Linking %-35s > %s\n" $(LIBTARGET) $@
|
|
mkdir -p $(LIBDIR)
|
|
$(FC) -shared -Wl,--no-as-needed -Wl,-soname,$(LIBSONAME) $< -o $(LIBDIR)/$(LIBTARGET)
|
|
ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME)
|
|
ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME)
|
|
|
|
$(LIBDIR)/$(LIBCUDAFOR): $(OBJDIR)/cudafor.o
|
|
@printf "Linking %-35s > %s\n" $(LIBCUDAFOR) $@
|
|
mkdir -p $(LIBDIR)
|
|
$(FC) -shared -Wl,--no-as-needed -Wl,-soname,$(LIBCUDAFOR) $< -o $(LIBDIR)/$(LIBCUDAFOR)
|
|
|
|
$(OBJDIR)/%.o: src/%.f90
|
|
@printf "Building %-35s > %s\n" $< $@
|
|
mkdir -p $(OBJDIR)
|
|
mkdir -p $(INCDIR)
|
|
$(FC) -c $(FCMODFLAGS) $(FCPREFLAGS) -fPIC $(FCCUDAFLAGS) $(FCFLAGS) $< -o $@
|
|
|
|
TESTS := reduce_ptr_out allreduce_ptr_out reducescatter_ptr_out broadcast_ptr allgather_ptr_out
|
|
ifneq ($(filter pgf%, $(FCNAME)), )
|
|
TESTS += reduce_arr_out allreduce_arr_out reducescatter_arr_out broadcast_arr allgather_arr_out
|
|
endif
|
|
|
|
TESTDIR := $(BUILDDIR)/test/fortran
|
|
TESTBINS := $(patsubst %,$(TESTDIR)/%,$(TESTS))
|
|
|
|
test: lib $(TESTBINS)
|
|
|
|
$(TESTDIR)/%: test/%.f90 lib
|
|
@printf "Building %-35s > %s\n" $< $@
|
|
@mkdir -p $(TESTDIR)
|
|
$(FC) $(FCCUDAFLAGS) $(FCFLAGS) $< $(CUDALINK) -I$(INCDIR) -L$(LIBDIR) $(CUDAFORLINK) $(LIBLINK) -o $@
|
|
|
|
clean:
|
|
rm -f $(LIBDIR)/$(LIBTARGET) $(LIBDIR)/$(LIBSONAME) $(LIBDIR)/$(LIBNAME)
|
|
rm -f $(LIBDIR)/$(LIBCUDAFOR) $(OBJDIR)/*for.o $(INCDIR)/*.mod
|
|
rm -rf $(TESTDIR)/
|
|
|