From 399656269027c1818fc999ccf8ec4dd838cec50d Mon Sep 17 00:00:00 2001 From: Jonas Zhou Date: Fri, 27 Nov 2020 16:20:55 +0800 Subject: [PATCH] x86: Add CPU detection for Zhaoxin processors Signed-off-by: Jonas Zhou --- src/graph/paths.cc | 3 +++ src/graph/topo.cc | 10 +++++++++- src/graph/topo.h | 2 ++ src/include/graph.h | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/graph/paths.cc b/src/graph/paths.cc index c7525e6..eb556c4 100644 --- a/src/graph/paths.cc +++ b/src/graph/paths.cc @@ -283,6 +283,9 @@ ncclResult_t ncclTopoCheckP2p(struct ncclTopoSystem* system, int64_t id1, int64_ if (model == NCCL_TOPO_CPU_TYPE_BDW) p2pLevel = PATH_PXB; else p2pLevel = PATH_PHB; } + if (arch == NCCL_TOPO_CPU_ARCH_X86 && vendor == NCCL_TOPO_CPU_VENDOR_ZHAOXIN) { + p2pLevel = PATH_PXB; + } compare: // Compute the PCI distance and compare with the p2pLevel. diff --git a/src/graph/topo.cc b/src/graph/topo.cc index 3e395c5..d53b532 100644 --- a/src/graph/topo.cc +++ b/src/graph/topo.cc @@ -72,6 +72,9 @@ static ncclResult_t ncclTopoGetInterCpuWidth(struct ncclTopoNode* cpu, float* wi if (cpu->cpu.arch == NCCL_TOPO_CPU_ARCH_X86 && cpu->cpu.vendor == NCCL_TOPO_CPU_VENDOR_INTEL) { *width = cpu->cpu.model == NCCL_TOPO_CPU_TYPE_SKL ? SKL_QPI_WIDTH : QPI_WIDTH; } + if (cpu->cpu.arch == NCCL_TOPO_CPU_ARCH_X86 && cpu->cpu.vendor == NCCL_TOPO_CPU_VENDOR_ZHAOXIN) { + *width = cpu->cpu.model == NCCL_TOPO_CPU_TYPE_YONGFENG ? YONGFENG_ZPI_WIDTH : ZPI_WIDTH; + } return ncclSuccess; } @@ -364,7 +367,7 @@ ncclResult_t ncclTopoAddPci(struct ncclXmlNode* xmlPci, struct ncclTopoSystem* s } struct kvDict kvDictCpuArch[] = { { "x86_64", NCCL_TOPO_CPU_ARCH_X86 }, { "arm64", NCCL_TOPO_CPU_ARCH_ARM }, { "ppc64", NCCL_TOPO_CPU_ARCH_POWER }, { NULL, 0 } }; -struct kvDict kvDictCpuVendor[] = { { "GenuineIntel", NCCL_TOPO_CPU_VENDOR_INTEL }, { "AuthenticAMD", NCCL_TOPO_CPU_VENDOR_AMD }, { NULL, 0 } }; +struct kvDict kvDictCpuVendor[] = { { "GenuineIntel", NCCL_TOPO_CPU_VENDOR_INTEL }, { "AuthenticAMD", NCCL_TOPO_CPU_VENDOR_AMD }, { "CentaurHauls", NCCL_TOPO_CPU_VENDOR_ZHAOXIN }, { " Shanghai ", NCCL_TOPO_CPU_VENDOR_ZHAOXIN }, { NULL, 0 } }; ncclResult_t ncclTopoAddCpu(struct ncclXmlNode* xmlCpu, struct ncclTopoSystem* system) { int numaId; @@ -387,6 +390,11 @@ ncclResult_t ncclTopoAddCpu(struct ncclXmlNode* xmlCpu, struct ncclTopoSystem* s NCCLCHECK(xmlGetAttrInt(xmlCpu, "familyid", &familyId)); NCCLCHECK(xmlGetAttrInt(xmlCpu, "modelid", &modelId)); cpu->cpu.model = (familyId == 6 && modelId >= 0x55) ? NCCL_TOPO_CPU_TYPE_SKL : NCCL_TOPO_CPU_INTEL_BDW; + } else if (cpu->cpu.vendor == NCCL_TOPO_CPU_VENDOR_ZHAOXIN) { + int familyId, modelId; + NCCLCHECK(xmlGetAttrInt(xmlCpu, "familyid", &familyId)); + NCCLCHECK(xmlGetAttrInt(xmlCpu, "modelid", &modelId)); + if (familyId == 7 && modelId == 0x5B) cpu->cpu.model = NCCL_TOPO_CPU_TYPE_YONGFENG; } } for (int s=0; snSubs; s++) { diff --git a/src/graph/topo.h b/src/graph/topo.h index a12bb2d..0cb6d33 100644 --- a/src/graph/topo.h +++ b/src/graph/topo.h @@ -19,6 +19,8 @@ #define PCI_WIDTH 12.0 // PCI Gen3 x16 #define QPI_WIDTH 6.0 #define SKL_QPI_WIDTH 9.0 +#define ZPI_WIDTH 6.0 +#define YONGFENG_ZPI_WIDTH 9.0 #define P9_WIDTH 32.0 #define ARM_WIDTH 6.0 #define NET_WIDTH 12.0 // 100Gbit diff --git a/src/include/graph.h b/src/include/graph.h index a4dba5c..0c912eb 100644 --- a/src/include/graph.h +++ b/src/include/graph.h @@ -40,8 +40,10 @@ ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank); #define NCCL_TOPO_CPU_ARCH_ARM 3 #define NCCL_TOPO_CPU_VENDOR_INTEL 1 #define NCCL_TOPO_CPU_VENDOR_AMD 2 +#define NCCL_TOPO_CPU_VENDOR_ZHAOXIN 3 #define NCCL_TOPO_CPU_TYPE_BDW 1 #define NCCL_TOPO_CPU_TYPE_SKL 2 +#define NCCL_TOPO_CPU_TYPE_YONGFENG 1 ncclResult_t ncclTopoCpuType(struct ncclTopoSystem* system, int* arch, int* vendor, int* model); ncclResult_t ncclTopoGetNetCount(struct ncclTopoSystem* system, int* count);