diff --git a/src/collectives/device/Makefile b/src/collectives/device/Makefile index 04bce8e..a2498a0 100644 --- a/src/collectives/device/Makefile +++ b/src/collectives/device/Makefile @@ -29,7 +29,7 @@ all: $(STATICLIB) all_deps: $(DEPENDFILES) # Auto-generating the rules per op/reduction/datatype/algorithm -$(RULESFILE) : +$(RULESFILE) : gen_rules.sh @printf "Generating %-35s > %s\n" rules $@ @mkdir -p $(OBJDIR) @CUDA_MAJOR=${CUDA_MAJOR} CUDA_MINOR=${CUDA_MINOR} ./gen_rules.sh $(OBJDIR) > $@ diff --git a/src/collectives/device/gen_rules.sh b/src/collectives/device/gen_rules.sh index aaf3685..a43f005 100755 --- a/src/collectives/device/gen_rules.sh +++ b/src/collectives/device/gen_rules.sh @@ -13,6 +13,9 @@ then datatypes+=" bf16" fi +echo "CURDIR := \$(dir \$(realpath \$(word \$(words \$(math\$(MAKEFILE_LIST))-1), \$(MAKEFILE_LIST))))" ++echo "" + targets="GENOBJS := \\\\\n" for base in sendrecv all_reduce all_gather broadcast reduce reduce_scatter; do @@ -21,10 +24,17 @@ for base in sendrecv all_reduce all_gather broadcast reduce reduce_scatter; do dtn=0 # Order must match that of the ncclDataType_t enum for dt in ${datatypes}; do - echo "${dir}/${base}_${op}_${dt}.o : ${base}.cu ${dir}/${base}.dep" + # Generate a unique filename for each compilation unit, + # otherwise the __nv_module_id may conflict at link time + echo "${dir}/${base}_${opn}_${dtn}.cu :" + echo " echo \"#include \\\"\$(CURDIR)${base}.cu\\\"\" > \$@" + echo "" + # Compile the file + echo "${dir}/${base}_${op}_${dt}.o : ${dir}/${base}_${opn}_${dtn}.cu ${base}.cu ${dir}/${base}.dep" + echo " @printf \"Compiling %-35s > %s\\\\n\" ${base}.cu ${dir}/${base}_${op}_${dt}.o" echo " mkdir -p ${dir}" - echo " \${NVCC} -DNCCL_OP=${opn} -DNCCL_TYPE=${dtn} \${NVCUFLAGS} -dc ${base}.cu -o ${dir}/${base}_${op}_${dt}.o" + echo " \${NVCC} -DNCCL_OP=${opn} -DNCCL_TYPE=${dtn} \${NVCUFLAGS} -dc \$< -o \$@" echo "" targets="$targets\t${dir}/${base}_${op}_${dt}.o \\\\\n" dtn=$(($dtn + 1))