Add the exact matching modifier support "=" to the NCCL_IB_HCA variable (#236)

Perform exact matching when the prefix "=" is specified in the NCCL_IB_HCA variable to exclude HCAs mlx5_X[0-9]+ when mlx5_X is specified.
This commit is contained in:
Hirochika Asai 2019-07-10 06:45:41 +09:00 committed by Sylvain Jeaugey
parent 8e04d80382
commit 0b192d2299
4 changed files with 13 additions and 9 deletions

View File

@ -66,6 +66,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
#endif
struct netIf userIfs[MAX_IFS];
bool searchNot = prefixList && prefixList[0] == '^';
bool searchExact = prefixList && prefixList[0] == '=';
int nUserIfs = parseStringList(prefixList, userIfs, MAX_IFS);
int found = 0;
@ -92,7 +93,7 @@ static int findInterfaces(const char* prefixList, char* names, union socketAddre
}
// check against user specified interfaces
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs) ^ searchNot)) {
if (!(matchIfList(interface->ifa_name, -1, userIfs, nUserIfs, searchExact) ^ searchNot)) {
continue;
}

View File

@ -20,6 +20,6 @@ struct netIf {
};
int parseStringList(const char* string, struct netIf* ifList, int maxList);
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize);
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact);
#endif

View File

@ -147,8 +147,8 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
if (!string) return 0;
const char* ptr = string;
// Ignore "^" prefix, will be detected outside of this function
if (ptr[0] == '^') ptr++;
// Ignore "^" or "=" prefix, will be detected outside of this function
if (ptr[0] == '^' || ptr[0] == '=') ptr++;
int ifNum = 0;
int ifC = 0;
@ -177,8 +177,10 @@ int parseStringList(const char* string, struct netIf* ifList, int maxList) {
return ifNum;
}
static bool matchPrefix(const char* string, const char* prefix) {
return (strncmp(string, prefix, strlen(prefix)) == 0);
static bool matchIf(const char* string, const char* ref, bool matchExact) {
// Make sure to include '\0' in the exact case
int matchLen = matchExact ? strlen(string) + 1 : strlen(ref);
return strncmp(string, ref, matchLen) == 0;
}
static bool matchPort(const int port1, const int port2) {
@ -189,12 +191,12 @@ static bool matchPort(const int port1, const int port2) {
}
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize) {
bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) {
// Make an exception for the case where no user list is defined
if (listSize == 0) return true;
for (int i=0; i<listSize; i++) {
if (matchPrefix(string, ifList[i].prefix)
if (matchIf(string, ifList[i].prefix, matchExact)
&& matchPort(port, ifList[i].port)) {
return true;
}

View File

@ -107,6 +107,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
char* userIbEnv = getenv("NCCL_IB_HCA");
struct netIf userIfs[MAX_IB_DEVS];
bool searchNot = userIbEnv && userIbEnv[0] == '^';
bool searchExact = userIbEnv && userIbEnv[0] == '=';
int nUserIfs = parseStringList(userIbEnv, userIfs, MAX_IB_DEVS);
if (ncclSuccess != wrap_ibv_get_device_list(&devices, &nIbDevs)) return ncclInternalError;
@ -136,7 +137,7 @@ ncclResult_t ncclIbInit(ncclDebugLogger_t logFunction) {
&& portAttr.link_layer != IBV_LINK_LAYER_ETHERNET) continue;
// check against user specified HCAs/ports
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs) ^ searchNot)) {
if (! (matchIfList(devices[d]->name, port, userIfs, nUserIfs, searchExact) ^ searchNot)) {
continue;
}
TRACE(NCCL_INIT|NCCL_NET,"NET/IB: [%d] %s:%d/%s ", d, devices[d]->name, port,