From 367e9b61c3123870465d1eeda214443ec26f1a3f Mon Sep 17 00:00:00 2001 From: Kaiming Ouyang Date: Wed, 12 Apr 2023 08:51:09 -0700 Subject: [PATCH 1/2] Shutdown socket before close in ncclSocketClose() --- src/misc/socket.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/misc/socket.cc b/src/misc/socket.cc index 56c96c5..19bbdc0 100644 --- a/src/misc/socket.cc +++ b/src/misc/socket.cc @@ -818,7 +818,10 @@ ncclResult_t ncclSocketTryRecv(struct ncclSocket* sock, void* ptr, int size, int ncclResult_t ncclSocketClose(struct ncclSocket* sock) { if (sock != NULL) { - if (sock->fd >= 0) close(sock->fd); + if (sock->fd >= 0) { + shutdown(sock->fd, SHUT_RDWR); + close(sock->fd); + } sock->state = ncclSocketStateClosed; sock->fd = -1; } From 006b6bc7dc5e534e384133f57ceac1367f581865 Mon Sep 17 00:00:00 2001 From: Kaiming Ouyang Date: Thu, 13 Apr 2023 09:00:59 -0700 Subject: [PATCH 2/2] Add a comment to shutdown() in ncclSocketClose --- src/misc/socket.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/misc/socket.cc b/src/misc/socket.cc index 19bbdc0..037d1ff 100644 --- a/src/misc/socket.cc +++ b/src/misc/socket.cc @@ -819,6 +819,10 @@ ncclResult_t ncclSocketTryRecv(struct ncclSocket* sock, void* ptr, int size, int ncclResult_t ncclSocketClose(struct ncclSocket* sock) { if (sock != NULL) { if (sock->fd >= 0) { + /* shutdown() is needed to send FIN packet to proxy thread; shutdown() is not affected + * by refcount of fd, but close() is. close() won't close a fd and send FIN packet if + * the fd is duplicated (e.g. fork()). So shutdown() guarantees the correct and graceful + * connection close here. */ shutdown(sock->fd, SHUT_RDWR); close(sock->fd); }