2.7.5-1
Minor fixes for A100 platforms. Add a WARN for invalid GroupEnd call.
This commit is contained in:
parent
5949d96f36
commit
01afd20a77
@ -1,6 +1,6 @@
|
||||
##### version
|
||||
NCCL_MAJOR := 2
|
||||
NCCL_MINOR := 7
|
||||
NCCL_PATCH := 3
|
||||
NCCL_PATCH := 5
|
||||
NCCL_SUFFIX :=
|
||||
PKG_REVISION := 1
|
||||
|
@ -774,8 +774,8 @@ done:
|
||||
int dupChannels = std::min(graph->nChannels*2, graph->maxChannels);
|
||||
memcpy(graph->intra+graph->nChannels*ngpus, graph->intra, (dupChannels-graph->nChannels)*ngpus*sizeof(int));
|
||||
memcpy(graph->inter+graph->nChannels*2,graph->inter, (dupChannels-graph->nChannels)*2*sizeof(int));
|
||||
graph->speedIntra /= 2;
|
||||
graph->speedInter /= 2;
|
||||
graph->speedIntra /= DIVUP(dupChannels, graph->nChannels);
|
||||
graph->speedInter /= DIVUP(dupChannels, graph->nChannels);
|
||||
graph->nChannels = dupChannels;
|
||||
}
|
||||
return ncclSuccess;
|
||||
|
@ -107,9 +107,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
|
||||
for (int p=0; p<NCCL_NUM_PROTOCOLS; p++) {
|
||||
float speed = comm->nNodes <= 2 || a == NCCL_ALGO_COLLNET ? graphs[a]->speedIntra : graphs[a]->speedInter;
|
||||
float busBw = graphs[a]->nChannels * speed;
|
||||
if (compCap80) busBw *= 0.92;
|
||||
|
||||
// Various model refinements
|
||||
if (compCap80) busBw = std::min(busBw, 235.0f);
|
||||
if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL) busBw *= (comm->nNodes > 1 || coll == ncclCollAllReduce || coll == ncclCollReduce) ? 1.0/4.0 : 1.0/3.0;
|
||||
if (a == NCCL_ALGO_RING && p == NCCL_PROTO_LL128) busBw = std::min(busBw * (ppn < 2 ? 0.7 : 0.92 /*120.0/128.0*/), ll128MaxBwPerCh[coll]*graphs[a]->nChannels);
|
||||
double maxTreeBw = comm->nNodes > 2 ?
|
||||
|
@ -145,7 +145,10 @@ void* ncclAsyncThreadPreconnect(void* args_) {
|
||||
|
||||
NCCL_API(ncclResult_t, ncclGroupEnd);
|
||||
ncclResult_t ncclGroupEnd() {
|
||||
if (ncclGroupMode == 0) return ncclInvalidUsage;
|
||||
if (ncclGroupMode == 0) {
|
||||
WARN("ncclGroupEnd: not in a group call.");
|
||||
return ncclInvalidUsage;
|
||||
}
|
||||
ncclGroupMode--;
|
||||
if (ncclGroupMode > 0) return ncclSuccess;
|
||||
int savedDev;
|
||||
|
Loading…
x
Reference in New Issue
Block a user