Add the exact matching modifier support "=" to the NCCL_IB_HCA variable (#236)
Perform exact matching when the prefix "=" is specified in the NCCL_IB_HCA variable to exclude HCAs mlx5_X[0-9]+ when mlx5_X is specified.
This commit is contained in:
parent
8e04d80382
commit
0b192d2299
@ -66,6 +66,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
|
|||||||
#endif
|
#endif
|
||||||
struct netIf userIfs[MAX_IFS];
|
struct netIf userIfs[MAX_IFS];
|
||||||
bool searchNot = prefixList && prefixList[0] == '^';
|
bool searchNot = prefixList && prefixList[0] == '^';
|
||||||
|
bool searchExact = prefixList && prefixList[0] == '=';
|
||||||
int nUserIfs = parseStringList(prefixList, userIfs, MAX_IFS);
|
int nUserIfs = parseStringList(prefixList, userIfs, MAX_IFS);
|
||||||
|
|
||||||
int found = 0;
|
int found = 0;
|
||||||
@ -92,7 +93,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
|
|||||||
}
|
}
|
||||||
|
|
||||||
// check against user specified interfaces
|
// check against user specified interfaces
|
||||||
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs) ^ searchNot)) {
|
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs, searchExact) ^ searchNot)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,6 @@ struct netIf {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int parseStringList(const char* string, struct netIf* ifList, int maxList);
|
int parseStringList(const char* string, struct netIf* ifList, int maxList);
|
||||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize);
|
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -147,8 +147,8 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
|
|||||||
if (!string) return 0;
|
if (!string) return 0;
|
||||||
|
|
||||||
const char* ptr = string;
|
const char* ptr = string;
|
||||||
// Ignore "^" prefix, will be detected outside of this function
|
// Ignore "^" or "=" prefix, will be detected outside of this function
|
||||||
if (ptr[0] == '^') ptr++;
|
if (ptr[0] == '^' || ptr[0] == '=') ptr++;
|
||||||
|
|
||||||
int ifNum = 0;
|
int ifNum = 0;
|
||||||
int ifC = 0;
|
int ifC = 0;
|
||||||
@ -177,8 +177,10 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
|
|||||||
return ifNum;
|
return ifNum;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool matchPrefix(const char* string, const char* prefix) {
|
static bool matchIf(const char* string, const char* ref, bool matchExact) {
|
||||||
return (strncmp(string, prefix, strlen(prefix)) == 0);
|
// Make sure to include '\0' in the exact case
|
||||||
|
int matchLen = matchExact ? strlen(string) + 1 : strlen(ref);
|
||||||
|
return strncmp(string, ref, matchLen) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool matchPort(const int port1, const int port2) {
|
static bool matchPort(const int port1, const int port2) {
|
||||||
@ -189,12 +191,12 @@ static bool matchPort(const int port1, const int port2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize) {
|
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) {
|
||||||
// Make an exception for the case where no user list is defined
|
// Make an exception for the case where no user list is defined
|
||||||
if (listSize == 0) return true;
|
if (listSize == 0) return true;
|
||||||
|
|
||||||
for (int i=0; i<listSize; i++) {
|
for (int i=0; i<listSize; i++) {
|
||||||
if (matchPrefix(string, ifList[i].prefix)
|
if (matchIf(string, ifList[i].prefix, matchExact)
|
||||||
&& matchPort(port, ifList[i].port)) {
|
&& matchPort(port, ifList[i].port)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -107,6 +107,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
|||||||
char* userIbEnv = getenv("NCCL_IB_HCA");
|
char* userIbEnv = getenv("NCCL_IB_HCA");
|
||||||
struct netIf userIfs[MAX_IB_DEVS];
|
struct netIf userIfs[MAX_IB_DEVS];
|
||||||
bool searchNot = userIbEnv && userIbEnv[0] == '^';
|
bool searchNot = userIbEnv && userIbEnv[0] == '^';
|
||||||
|
bool searchExact = userIbEnv && userIbEnv[0] == '=';
|
||||||
int nUserIfs = parseStringList(userIbEnv, userIfs, MAX_IB_DEVS);
|
int nUserIfs = parseStringList(userIbEnv, userIfs, MAX_IB_DEVS);
|
||||||
|
|
||||||
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
|
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
|
||||||
@ -136,7 +137,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
|||||||
&& portAttr.link_layer != IBV_LINK_LAYER_ETHERNET) continue;
|
&& portAttr.link_layer != IBV_LINK_LAYER_ETHERNET) continue;
|
||||||
|
|
||||||
// check against user specified HCAs/ports
|
// check against user specified HCAs/ports
|
||||||
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs) ^ searchNot)) {
|
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs, searchExact) ^ searchNot)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
TRACE(NCCL_INIT|NCCL_NET,"NET/IB: [%d] %s:%d/%s ", d, devices[d]->name, port,
|
TRACE(NCCL_INIT|NCCL_NET,"NET/IB: [%d] %s:%d/%s ", d, devices[d]->name, port,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user