Fix crash when NVswitch is not visible inside a VM.
This commit is contained in:
Sylvain Jeaugey 2020-06-23 14:08:49 -07:00
parent 01afd20a77
commit 1952325569
2 changed files with 9 additions and 4 deletions

View File

@ -1,6 +1,6 @@
##### version
NCCL_MAJOR := 2
NCCL_MINOR := 7
NCCL_PATCH := 5
NCCL_PATCH := 6
NCCL_SUFFIX :=
PKG_REVISION := 1

View File

@ -640,9 +640,14 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, nvmlDevice_t nvm
if (index == -1) {
const char* busId;
NCCLCHECK(xmlGetAttr(sub, "target", &busId));
char* path;
NCCLCHECK(getPciPath(busId, &path));
NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass"));
if (strcmp(busId, "fffffff:ffff:ff") == 0) {
// Remote NVLink device is not visible inside this VM. Assume NVSwitch.
NCCLCHECK(xmlSetAttr(sub, "tclass", "0x068000"));
} else {
char* path;
NCCLCHECK(getPciPath(busId, &path));
NCCLCHECK(ncclTopoSetAttrFromSys(sub, path, "class", "tclass"));
}
}
}
*gpuNodeRet = gpuNode;