Improve robustness of PCI detection
Fallback to default values when class/speed is unknown.
This commit is contained in:
parent
a783484ab5
commit
23a9fbb788
@ -303,8 +303,8 @@ ncclResult_t ncclTopoAddGpu(struct ncclXmlNode* xmlGpu, struct ncclTopoSystem* s
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
struct kvDict kvDictPciClass[] = { { "0x060400", PCI }, { "0x068000", NVS }, { "0x068001", CPU }, { "0x03", GPU }, { "0x02", NIC }, { NULL, 0 } };
|
||||
struct kvDict kvDictPciGen[] = { { "2.5 GT/s", 15 }, { "5 GT/s", 30 }, { "8 GT/s", 60 }, { "16 GT/s", 120 }, { NULL, 0 } }; // x100 Mbps per lane
|
||||
struct kvDict kvDictPciClass[] = { { "0x060400", PCI }, { "0x068000", NVS }, { "0x068001", CPU }, { "0x03", GPU }, { "0x02", NIC }, { NULL, PCI /* Default fallback value */ } };
|
||||
struct kvDict kvDictPciGen[] = { { "2.5 GT/s", 15 }, { "5 GT/s", 30 }, { "8 GT/s", 60 }, { "16 GT/s", 120 }, { NULL, 60 /* Default fallback */ } }; // x100 Mbps per lane
|
||||
ncclResult_t ncclTopoAddPci(struct ncclXmlNode* xmlPci, struct ncclTopoSystem* system, struct ncclTopoNode* parent) {
|
||||
const char* str;
|
||||
|
||||
@ -356,8 +356,6 @@ ncclResult_t ncclTopoAddPci(struct ncclXmlNode* xmlPci, struct ncclTopoSystem* s
|
||||
|
||||
// Manage cases where speed was not indicated in /sys
|
||||
if (width == 0) width = 16;
|
||||
if (strlen(str) == 0 || strcasecmp(str, "Unknown speed") == 0) str = "8 GT/s";
|
||||
|
||||
NCCLCHECK(kvConvertToInt(str, &speed, kvDictPciGen)); // Values in 100Mbps, per lane (we want GB/s in the end)
|
||||
|
||||
NCCLCHECK(ncclTopoConnectNodes(node, parent, LINK_PCI, width*speed/80.0));
|
||||
|
@ -218,8 +218,9 @@ static ncclResult_t kvConvertToInt(const char* str, int* value, struct kvDict* d
|
||||
}
|
||||
d++;
|
||||
}
|
||||
WARN("KV Convert to int : could not find value of '%s' in dictionary", str);
|
||||
return ncclInternalError;
|
||||
INFO(NCCL_GRAPH, "KV Convert to int : could not find value of '%s' in dictionary, falling back to %d", str, d->value);
|
||||
*value = d->value;
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t kvConvertToStr(int value, const char** str, struct kvDict* dict) {
|
||||
struct kvDict* d = dict;
|
||||
|
Loading…
x
Reference in New Issue
Block a user