Fixed redundant contexts in multi-process apps

Change-Id: If787014450fd281304f0c7baf01d25963e40905d
This commit is contained in:
Nathan Luehr 2016-07-22 17:29:13 -07:00
parent 7a1aa6b563
commit 55c42ad681
2 changed files with 36 additions and 30 deletions

View File

@ -422,21 +422,26 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
canpeer = 0; canpeer = 0;
} }
if (iPid == myPid) {
if (myDev == iDev) {
INFO("rank access %d -> %d via common device", rank, iRank);
comm->ptrs[i].local = ranks[myId].devptr;
comm->ptrs[i].remote = ranks[i].devptr;
comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
} else {
int peer_enabled = canpeer;
if (canpeer) { if (canpeer) {
cudaError_t err; cudaError_t p2pErr = cudaDeviceEnablePeerAccess(iDev, 0);
err = cudaDeviceEnablePeerAccess(iDev, 0); if (p2pErr == cudaErrorPeerAccessAlreadyEnabled) {
if (err == cudaErrorPeerAccessAlreadyEnabled) {
cudaGetLastError(); cudaGetLastError();
} else if (err != cudaSuccess) { } else if (p2pErr != cudaSuccess) {
INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n", INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n",
rank, myDev, iRank, iDev); rank, myDev, iRank, iDev);
peer_enabled = 0;
canpeer = 0;
} }
} }
if (iPid == myPid) { if (peer_enabled) {
if (canpeer || myDev == iDev) {
INFO("rank access %d -> %d via P2P device mem", rank, iRank); INFO("rank access %d -> %d via P2P device mem", rank, iRank);
comm->ptrs[i].local = ranks[myId].devptr; comm->ptrs[i].local = ranks[myId].devptr;
comm->ptrs[i].remote = ranks[i].devptr; comm->ptrs[i].remote = ranks[i].devptr;
@ -457,6 +462,7 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
} }
comm->ptrs[i].remoteCleanup = CLEANUP_NONE; comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
} }
}
} else { // multi-process! } else { // multi-process!
*ringDirectFailed = 1; *ringDirectFailed = 1;
if (canpeer || myDev == iDev) { if (canpeer || myDev == iDev) {

View File

@ -70,7 +70,7 @@ int main(int argc, char *argv[]) {
// CUDA stream creation // CUDA stream creation
cudaStream_t stream; cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
// Initialize input values // Initialize input values
int *dptr; int *dptr;
@ -91,7 +91,7 @@ int main(int argc, char *argv[]) {
} }
// Check results // Check results
cudaStreamSynchronize(stream); CUDACHECK(cudaStreamSynchronize(stream));
CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost)); CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost));
for (int v=0; v<SIZE; v++) { for (int v=0; v<SIZE; v++) {
if (val[v] != ref) { if (val[v] != ref) {