Fix crash when only a subset of GPUs are visible within a container.
Fixes #326.
This commit is contained in:
parent
23a9fbb788
commit
f36540f55a
@ -520,6 +520,7 @@ ncclResult_t ncclTopoGetSystem(struct ncclComm* comm, struct ncclTopoSystem** sy
|
||||
NCCLCHECK(int64ToBusId(comm->peerInfo[r].busId, busId));
|
||||
struct ncclXmlNode* node;
|
||||
NCCLCHECK(ncclTopoFillGpu(xml, busId, &node));
|
||||
if (node == NULL) continue;
|
||||
NCCLCHECK(xmlSetAttrInt(node, "rank", r));
|
||||
NCCLCHECK(xmlInitAttrInt(node, "gdr", comm->peerInfo[r].gdrSupport));
|
||||
}
|
||||
|
@ -569,7 +569,7 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, nvmlDevice_t nvm
|
||||
NCCLCHECK(xmlSetAttrInt(gpuNode, "dev", dev));
|
||||
}
|
||||
NCCLCHECK(xmlGetAttrInt(gpuNode, "dev", &dev));
|
||||
if (dev == -1) return ncclSuccess;
|
||||
if (dev == -1) { *gpuNodeRet = NULL; return ncclSuccess; }
|
||||
|
||||
NCCLCHECK(xmlGetAttrIndex(gpuNode, "sm", &index));
|
||||
if (index == -1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user