Only enable peer access for ring neighbors.

This enables support for systems with more than 9 GPUs attached to a single PCIe root complex.
This commit is contained in:
Nathan Luehr 2017-03-01 15:59:48 -08:00 committed by Sylvain Jeaugey
parent c219a183d0
commit 8996811936

View File

@ -388,7 +388,9 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
pid_t iPid = ranks[i].pid;
int canpeer = 0;
if (cudaDeviceCanAccessPeer(&canpeer, myDev, iDev) != cudaSuccess) {
int iIsNeighbor = (i == (myNcclId+1)%ndev) || (i == (myNcclId+ndev-1)%ndev);
if (iIsNeighbor && cudaDeviceCanAccessPeer(&canpeer, myDev, iDev) != cudaSuccess) {
INFO("peer query failed between rank %d (dev %d) and rank %d (dev %d)",
rank, myDev, iRank, iDev);
canpeer = 0;
@ -777,12 +779,12 @@ ncclResult_t ncclCommInitRank(ncclComm_t* newcomm, int ndev, ncclUniqueId commId
}
res = commBuildMaps(*newcomm, &commId, myrank, gath->ranks, &gath->globalMemSpaceBroke);
syncRingDirect(gath, &((*newcomm)->globalMemSpace));
if (res != ncclSuccess) {
WARN("rank %d failed to build comm maps", myrank);
goto cleanup;
}
syncRingDirect(gath, &((*newcomm)->globalMemSpace));
INFO("Global device memory space is %s", (*newcomm)->globalMemSpace ? "enabled" : "disabled");
res = closeGather(gath, ndev); // includes a barrier