Fix #224: prevent number of IB devices from going out of bound

This commit is contained in:
Ke Wen 2019-07-16 08:41:56 -07:00
parent c8c68fb5f7
commit 920ae57c14

View File

@ -112,13 +112,13 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
for (int d=0; d<nIbDevs; d++) {
for (int d=0; d<nIbDevs && ncclNIbDevs<MAX_IB_DEVS; d++) {
struct ibv_context * context;
if (ncclSuccess != wrap_ibv_open_device(&context, devices[d]) || context == NULL) {
WARN("NET/IB : Unable to open device %s", devices[d]->name);
continue;
}
int found = 0;
int nPorts = 0;
struct ibv_device_attr devAttr;
memset(&devAttr, 0, sizeof(devAttr));
if (ncclSuccess != wrap_ibv_query_device(context, &devAttr)) {
@ -148,10 +148,10 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
ncclIbDevs[ncclNIbDevs].context = context;
strncpy(ncclIbDevs[ncclNIbDevs].devName, devices[d]->name, MAXNAMESIZE);
ncclNIbDevs++;
found++;
nPorts++;
pthread_create(&ncclIbAsyncThread, NULL, ncclIbAsyncThreadMain, context);
}
if (found == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; }
if (nPorts == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; }
}
if (nIbDevs && (ncclSuccess != wrap_ibv_free_device_list(devices))) { return ncclInternalError; };
}