Merge pull request #822 from KaimingOuyang/github/pytorch-hang-fix
Shutdown socket before close in ncclSocketClose()
This commit is contained in:
commit
9b7d5edbfc
@ -818,7 +818,14 @@ ncclResult_t ncclSocketTryRecv(struct ncclSocket* sock, void* ptr, int size, int
|
||||
|
||||
ncclResult_t ncclSocketClose(struct ncclSocket* sock) {
|
||||
if (sock != NULL) {
|
||||
if (sock->fd >= 0) close(sock->fd);
|
||||
if (sock->fd >= 0) {
|
||||
/* shutdown() is needed to send FIN packet to proxy thread; shutdown() is not affected
|
||||
* by refcount of fd, but close() is. close() won't close a fd and send FIN packet if
|
||||
* the fd is duplicated (e.g. fork()). So shutdown() guarantees the correct and graceful
|
||||
* connection close here. */
|
||||
shutdown(sock->fd, SHUT_RDWR);
|
||||
close(sock->fd);
|
||||
}
|
||||
sock->state = ncclSocketStateClosed;
|
||||
sock->fd = -1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user