Add the exact matching modifier support "=" to the NCCL_IB_HCA variable (#236)
Perform exact matching when the prefix "=" is specified in the NCCL_IB_HCA variable to exclude HCAs mlx5_X[0-9]+ when mlx5_X is specified.
This commit is contained in:
parent
8e04d80382
commit
0b192d2299
@ -66,6 +66,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
|
||||
#endif
|
||||
struct netIf userIfs[MAX_IFS];
|
||||
bool searchNot = prefixList && prefixList[0] == '^';
|
||||
bool searchExact = prefixList && prefixList[0] == '=';
|
||||
int nUserIfs = parseStringList(prefixList, userIfs, MAX_IFS);
|
||||
|
||||
int found = 0;
|
||||
@ -92,7 +93,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
|
||||
}
|
||||
|
||||
// check against user specified interfaces
|
||||
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs) ^ searchNot)) {
|
||||
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs, searchExact) ^ searchNot)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,6 @@ struct netIf {
|
||||
};
|
||||
|
||||
int parseStringList(const char* string, struct netIf* ifList, int maxList);
|
||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize);
|
||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact);
|
||||
|
||||
#endif
|
||||
|
@ -147,8 +147,8 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
|
||||
if (!string) return 0;
|
||||
|
||||
const char* ptr = string;
|
||||
// Ignore "^" prefix, will be detected outside of this function
|
||||
if (ptr[0] == '^') ptr++;
|
||||
// Ignore "^" or "=" prefix, will be detected outside of this function
|
||||
if (ptr[0] == '^' || ptr[0] == '=') ptr++;
|
||||
|
||||
int ifNum = 0;
|
||||
int ifC = 0;
|
||||
@ -177,8 +177,10 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
|
||||
return ifNum;
|
||||
}
|
||||
|
||||
static bool matchPrefix(const char* string, const char* prefix) {
|
||||
return (strncmp(string, prefix, strlen(prefix)) == 0);
|
||||
static bool matchIf(const char* string, const char* ref, bool matchExact) {
|
||||
// Make sure to include '\0' in the exact case
|
||||
int matchLen = matchExact ? strlen(string) + 1 : strlen(ref);
|
||||
return strncmp(string, ref, matchLen) == 0;
|
||||
}
|
||||
|
||||
static bool matchPort(const int port1, const int port2) {
|
||||
@ -189,12 +191,12 @@ static bool matchPort(const int port1, const int port2) {
|
||||
}
|
||||
|
||||
|
||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize) {
|
||||
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) {
|
||||
// Make an exception for the case where no user list is defined
|
||||
if (listSize == 0) return true;
|
||||
|
||||
for (int i=0; i<listSize; i++) {
|
||||
if (matchPrefix(string, ifList[i].prefix)
|
||||
if (matchIf(string, ifList[i].prefix, matchExact)
|
||||
&& matchPort(port, ifList[i].port)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -107,6 +107,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
||||
char* userIbEnv = getenv("NCCL_IB_HCA");
|
||||
struct netIf userIfs[MAX_IB_DEVS];
|
||||
bool searchNot = userIbEnv && userIbEnv[0] == '^';
|
||||
bool searchExact = userIbEnv && userIbEnv[0] == '=';
|
||||
int nUserIfs = parseStringList(userIbEnv, userIfs, MAX_IB_DEVS);
|
||||
|
||||
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
|
||||
@ -136,7 +137,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
|
||||
&& portAttr.link_layer != IBV_LINK_LAYER_ETHERNET) continue;
|
||||
|
||||
// check against user specified HCAs/ports
|
||||
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs) ^ searchNot)) {
|
||||
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs, searchExact) ^ searchNot)) {
|
||||
continue;
|
||||
}
|
||||
TRACE(NCCL_INIT|NCCL_NET,"NET/IB: [%d] %s:%d/%s ", d, devices[d]->name, port,
|
||||
|
Loading…
x
Reference in New Issue
Block a user