Tune chunkSteps for networks with large latency
This commit is contained in:
parent
c324f771db
commit
a6c8f5e0c2
@ -1281,6 +1281,12 @@ static ncclResult_t getStepInfo(struct ncclInfo* info) {
|
||||
info->sliceSteps = ncclParamRingSliceSteps();
|
||||
}
|
||||
}
|
||||
// Make buffer deeper for longer latency network segment
|
||||
if (info->comm->nNodes > 1 && info->comm->netLatency > 100 &&
|
||||
(info->coll == ncclFuncReduceScatter || info->coll == ncclFuncAllGather || info->coll == ncclFuncAllReduce)) {
|
||||
info->sliceSteps = 1;
|
||||
info->chunkSteps = 2;
|
||||
}
|
||||
if (info->chunkSteps > NCCL_STEPS/2 || info->sliceSteps > NCCL_STEPS/2) {
|
||||
WARN("Invalid chunkSteps=%d/sliceSteps=%d, must be at most NCCL_STEPS/2=%d\n", info->chunkSteps, info->sliceSteps, NCCL_STEPS/2);
|
||||
return ncclInvalidUsage;
|
||||
|
@ -218,6 +218,7 @@ struct ncclComm {
|
||||
float latencies[NCCL_NUM_FUNCTIONS][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
|
||||
float bandwidths[NCCL_NUM_FUNCTIONS][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
|
||||
int maxThreads[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
|
||||
float netLatency;
|
||||
|
||||
/* This attribute can indicate the states of communicators and return code of
|
||||
* asynchronous NCCL operations. */
|
||||
|
@ -290,6 +290,10 @@ ncclResult_t ncclNetInit(struct ncclComm* comm) {
|
||||
WARN("Error: network %s not found.", netName ? netName : "");
|
||||
return ncclInvalidUsage;
|
||||
}
|
||||
|
||||
ncclNetProperties_t props;
|
||||
NCCLCHECK(ncclNetGetProperties(comm, 0, &props));
|
||||
comm->netLatency = props.latency;
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user