Fix #224: prevent number of IB devices from going out of bound
This commit is contained in:
parent
c8c68fb5f7
commit
920ae57c14
@ -112,13 +112,13 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
|||||||
|
|
||||||
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
|
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
|
||||||
|
|
||||||
for (int d=0; d<nIbDevs; d++) {
|
for (int d=0; d<nIbDevs && ncclNIbDevs<MAX_IB_DEVS; d++) {
|
||||||
struct ibv_context * context;
|
struct ibv_context * context;
|
||||||
if (ncclSuccess != wrap_ibv_open_device(&context, devices[d]) || context == NULL) {
|
if (ncclSuccess != wrap_ibv_open_device(&context, devices[d]) || context == NULL) {
|
||||||
WARN("NET/IB : Unable to open device %s", devices[d]->name);
|
WARN("NET/IB : Unable to open device %s", devices[d]->name);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int found = 0;
|
int nPorts = 0;
|
||||||
struct ibv_device_attr devAttr;
|
struct ibv_device_attr devAttr;
|
||||||
memset(&devAttr, 0, sizeof(devAttr));
|
memset(&devAttr, 0, sizeof(devAttr));
|
||||||
if (ncclSuccess != wrap_ibv_query_device(context, &devAttr)) {
|
if (ncclSuccess != wrap_ibv_query_device(context, &devAttr)) {
|
||||||
@ -148,10 +148,10 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
|||||||
ncclIbDevs[ncclNIbDevs].context = context;
|
ncclIbDevs[ncclNIbDevs].context = context;
|
||||||
strncpy(ncclIbDevs[ncclNIbDevs].devName, devices[d]->name, MAXNAMESIZE);
|
strncpy(ncclIbDevs[ncclNIbDevs].devName, devices[d]->name, MAXNAMESIZE);
|
||||||
ncclNIbDevs++;
|
ncclNIbDevs++;
|
||||||
found++;
|
nPorts++;
|
||||||
pthread_create(&ncclIbAsyncThread, NULL, ncclIbAsyncThreadMain, context);
|
pthread_create(&ncclIbAsyncThread, NULL, ncclIbAsyncThreadMain, context);
|
||||||
}
|
}
|
||||||
if (found == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; }
|
if (nPorts == 0 && ncclSuccess != wrap_ibv_close_device(context)) { return ncclInternalError; }
|
||||||
}
|
}
|
||||||
if (nIbDevs && (ncclSuccess != wrap_ibv_free_device_list(devices))) { return ncclInternalError; };
|
if (nIbDevs && (ncclSuccess != wrap_ibv_free_device_list(devices))) { return ncclInternalError; };
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user