Fixed redundant contexts in multi-process apps
Change-Id: If787014450fd281304f0c7baf01d25963e40905d
This commit is contained in:
parent
7a1aa6b563
commit
55c42ad681
22
src/core.cu
22
src/core.cu
@ -422,21 +422,26 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
|
|||||||
canpeer = 0;
|
canpeer = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (iPid == myPid) {
|
||||||
|
if (myDev == iDev) {
|
||||||
|
INFO("rank access %d -> %d via common device", rank, iRank);
|
||||||
|
comm->ptrs[i].local = ranks[myId].devptr;
|
||||||
|
comm->ptrs[i].remote = ranks[i].devptr;
|
||||||
|
comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
|
||||||
|
} else {
|
||||||
|
int peer_enabled = canpeer;
|
||||||
if (canpeer) {
|
if (canpeer) {
|
||||||
cudaError_t err;
|
cudaError_t p2pErr = cudaDeviceEnablePeerAccess(iDev, 0);
|
||||||
err = cudaDeviceEnablePeerAccess(iDev, 0);
|
if (p2pErr == cudaErrorPeerAccessAlreadyEnabled) {
|
||||||
if (err == cudaErrorPeerAccessAlreadyEnabled) {
|
|
||||||
cudaGetLastError();
|
cudaGetLastError();
|
||||||
} else if (err != cudaSuccess) {
|
} else if (p2pErr != cudaSuccess) {
|
||||||
INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n",
|
INFO("peer access failed between rank %d (dev %d) and rank %d (dev %d)\n",
|
||||||
rank, myDev, iRank, iDev);
|
rank, myDev, iRank, iDev);
|
||||||
|
peer_enabled = 0;
|
||||||
canpeer = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iPid == myPid) {
|
if (peer_enabled) {
|
||||||
if (canpeer || myDev == iDev) {
|
|
||||||
INFO("rank access %d -> %d via P2P device mem", rank, iRank);
|
INFO("rank access %d -> %d via P2P device mem", rank, iRank);
|
||||||
comm->ptrs[i].local = ranks[myId].devptr;
|
comm->ptrs[i].local = ranks[myId].devptr;
|
||||||
comm->ptrs[i].remote = ranks[i].devptr;
|
comm->ptrs[i].remote = ranks[i].devptr;
|
||||||
@ -457,6 +462,7 @@ static ncclResult_t commBuildMaps(ncclComm_t comm, ncclUniqueId* commId, int ran
|
|||||||
}
|
}
|
||||||
comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
|
comm->ptrs[i].remoteCleanup = CLEANUP_NONE;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else { // multi-process!
|
} else { // multi-process!
|
||||||
*ringDirectFailed = 1;
|
*ringDirectFailed = 1;
|
||||||
if (canpeer || myDev == iDev) {
|
if (canpeer || myDev == iDev) {
|
||||||
|
@ -70,7 +70,7 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// CUDA stream creation
|
// CUDA stream creation
|
||||||
cudaStream_t stream;
|
cudaStream_t stream;
|
||||||
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
|
CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||||
|
|
||||||
// Initialize input values
|
// Initialize input values
|
||||||
int *dptr;
|
int *dptr;
|
||||||
@ -91,7 +91,7 @@ int main(int argc, char *argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check results
|
// Check results
|
||||||
cudaStreamSynchronize(stream);
|
CUDACHECK(cudaStreamSynchronize(stream));
|
||||||
CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost));
|
CUDACHECK(cudaMemcpy(val, (dptr+SIZE), SIZE*sizeof(int), cudaMemcpyDeviceToHost));
|
||||||
for (int v=0; v<SIZE; v++) {
|
for (int v=0; v<SIZE; v++) {
|
||||||
if (val[v] != ref) {
|
if (val[v] != ref) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user