Merge remote-tracking branch 'origin/master'

This commit is contained in:
Sylvain Jeaugey 2022-09-27 02:24:41 -07:00
commit 99c28f2e75
3 changed files with 13 additions and 4 deletions

View File

@ -29,7 +29,7 @@ all: $(STATICLIB)
all_deps: $(DEPENDFILES) all_deps: $(DEPENDFILES)
# Auto-generating the rules per op/reduction/datatype/algorithm # Auto-generating the rules per op/reduction/datatype/algorithm
$(RULESFILE) : $(RULESFILE) : gen_rules.sh
@printf "Generating %-35s > %s\n" rules $@ @printf "Generating %-35s > %s\n" rules $@
@mkdir -p $(OBJDIR) @mkdir -p $(OBJDIR)
@CUDA_MAJOR=${CUDA_MAJOR} CUDA_MINOR=${CUDA_MINOR} ./gen_rules.sh $(OBJDIR) > $@ @CUDA_MAJOR=${CUDA_MAJOR} CUDA_MINOR=${CUDA_MINOR} ./gen_rules.sh $(OBJDIR) > $@

View File

@ -21,10 +21,18 @@ for base in sendrecv all_reduce all_gather broadcast reduce reduce_scatter; do
dtn=0 dtn=0
# Order must match that of the ncclDataType_t enum # Order must match that of the ncclDataType_t enum
for dt in ${datatypes}; do for dt in ${datatypes}; do
echo "${dir}/${base}_${op}_${dt}.o : ${base}.cu ${dir}/${base}.dep" # Generate a unique filename for each compilation unit,
# otherwise the __nv_module_id may conflict at link time
echo "${dir}/${base}_${op}_${dt}.cu : ${base}.cu"
echo " @printf \"Copying %-35s > %s\\\\n\" \$< \$@"
echo " cp \$< \$@"
echo ""
# Compile the file
echo "${dir}/${base}_${op}_${dt}.o : ${dir}/${base}_${op}_${dt}.cu ${base}.cu ${dir}/${base}.dep"
echo " @printf \"Compiling %-35s > %s\\\\n\" ${base}.cu ${dir}/${base}_${op}_${dt}.o" echo " @printf \"Compiling %-35s > %s\\\\n\" ${base}.cu ${dir}/${base}_${op}_${dt}.o"
echo " mkdir -p ${dir}" echo " mkdir -p ${dir}"
echo " \${NVCC} -DNCCL_OP=${opn} -DNCCL_TYPE=${dtn} \${NVCUFLAGS} -dc ${base}.cu -o ${dir}/${base}_${op}_${dt}.o" echo " \${NVCC} -DNCCL_OP=${opn} -DNCCL_TYPE=${dtn} \${NVCUFLAGS} -dc \$< -o \$@"
echo "" echo ""
targets="$targets\t${dir}/${base}_${op}_${dt}.o \\\\\n" targets="$targets\t${dir}/${base}_${op}_${dt}.o \\\\\n"
dtn=$(($dtn + 1)) dtn=$(($dtn + 1))

View File

@ -1037,6 +1037,8 @@ collnet_cleanup:
} }
} }
NCCLCHECKGOTO(devCommSetup(comm), ret, affinity_restore);
/* Local intra-node barrier */ /* Local intra-node barrier */
NCCLCHECK(bootstrapBarrier(comm->bootstrap, comm->localRankToRank, comm->localRank, comm->localRanks, comm->localRankToRank[0])); NCCLCHECK(bootstrapBarrier(comm->bootstrap, comm->localRankToRank, comm->localRank, comm->localRanks, comm->localRankToRank[0]));
@ -1087,7 +1089,6 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) {
} }
NCCLCHECKGOTO(commAlloc(newcomm, nranks, myrank), res, cleanup); NCCLCHECKGOTO(commAlloc(newcomm, nranks, myrank), res, cleanup);
NCCLCHECKGOTO(initTransportsRank(*newcomm, &commId), res, cleanup); NCCLCHECKGOTO(initTransportsRank(*newcomm, &commId), res, cleanup);
NCCLCHECKGOTO(devCommSetup(*newcomm), res, cleanup);
// update communicator state // update communicator state
comm->initState = ncclSuccess; comm->initState = ncclSuccess;