From 89968119365a33c94989ed45c7cae27083f6a120 Mon Sep 17 00:00:00 2001 From: Nathan Luehr Date: Wed, 1 Mar 2017 15:59:48 -0800 Subject: [PATCH] Only enable peer access for ring neighbors. This enables support for systems with more than 9 GPUs attached to a single PCIe root complex. --- src/core.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core.cu b/src/core.cu index c7bf89e..1420d21 100644 --- a/src/core.cu +++ b/src/core.cu @@ -388,7 +388,9 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran pid_t iPid = ranks[i].pid; int canpeer = 0; - if (cudaDeviceCanAccessPeer(&canpeer, myDev, iDev) != cudaSuccess) { + int iIsNeighbor = (i == (myNcclId+1)%ndev) || (i == (myNcclId+ndev-1)%ndev); + + if (iIsNeighbor && cudaDeviceCanAccessPeer(&canpeer, myDev, iDev) != cudaSuccess) { INFO("peer query failed between rank %d (dev %d) and rank %d (dev %d)", rank, myDev, iRank, iDev); canpeer = 0; @@ -777,12 +779,12 @@ ncclResult_t ncclCommInitRank(ncclComm_t* newcomm, int ndev, ncclUniqueId commId } res = commBuildMaps(*newcomm, &commId, myrank, gath->ranks, &gath->globalMemSpaceBroke); + syncRingDirect(gath, &((*newcomm)->globalMemSpace)); if (res != ncclSuccess) { WARN("rank %d failed to build comm maps", myrank); goto cleanup; } - syncRingDirect(gath, &((*newcomm)->globalMemSpace)); INFO("Global device memory space is %s", (*newcomm)->globalMemSpace ? "enabled" : "disabled"); res = closeGather(gath, ndev); // includes a barrier