NCCL 2.4.7-1
Performance tweaks for PowerPC builds only; Set default NCCL_MIN_NRINGS to 4 Disable PCI-E NUMA distance detection
This commit is contained in:
parent
60a586ded9
commit
0ceaec9cee
@ -1,6 +1,6 @@
|
||||
##### version
|
||||
NCCL_MAJOR := 2
|
||||
NCCL_MINOR := 4
|
||||
NCCL_PATCH := 6
|
||||
NCCL_PATCH := 7
|
||||
NCCL_SUFFIX :=
|
||||
PKG_REVISION := 1
|
||||
|
@ -170,7 +170,13 @@ static ncclResult_t fillCoords(int nranks, int* matrix, int* coords, int* rankTo
|
||||
}
|
||||
}
|
||||
|
||||
NCCL_PARAM(MinNrings, "MIN_NRINGS", 0);
|
||||
#ifdef __PPC__
|
||||
// Make the default NCCL_MIN_NRINGS=4 for IBM/Power nodes
|
||||
#define DEFAULT_MIN_NRINGS 4
|
||||
#else
|
||||
#define DEFAULT_MIN_NRINGS 0
|
||||
#endif
|
||||
NCCL_PARAM(MinNrings, "MIN_NRINGS", DEFAULT_MIN_NRINGS);
|
||||
NCCL_PARAM(MaxNrings, "MAX_NRINGS", 0);
|
||||
|
||||
/* Users can force the number of threads with an environment variable */
|
||||
|
@ -39,11 +39,17 @@ int pciDistance(char* path1, char* path2) {
|
||||
}
|
||||
}
|
||||
if (score <= 3) {
|
||||
#ifdef __PPC__
|
||||
// NUMA distance detection and PATH_SYS not supported on IBM/Power nodes
|
||||
// nodes currently
|
||||
return PATH_NODE;
|
||||
#else
|
||||
/* Split the former PATH_SOC distance into PATH_NODE and PATH_SYS based on numaId */
|
||||
int numaId1 = getNumaId(path1);
|
||||
int numaId2 = getNumaId(path2);
|
||||
TRACE(NCCL_INIT, "depth %d score %d path1 %s numaId %d path2 %s numaId %d", depth, score, path1, numaId1, path2, numaId2);
|
||||
return ((numaId1 == numaId2) ? PATH_NODE : PATH_SYS);
|
||||
#endif
|
||||
}
|
||||
if (score == 4) return PATH_PHB;
|
||||
if (score == depth-1) return PATH_PIX;
|
||||
|
Loading…
x
Reference in New Issue
Block a user