nccl/src/graph/topo.h
Ke Wen 7e51592129 2.10.3-1
Add support for bfloat16.
Add ncclAvg reduction operation.
Improve performance for aggregated operations.
Improve performance for tree.
Improve network error reporting.
Add NCCL_NET parameter to force a specific network.
Add NCCL_IB_QPS_PER_CONNECTION parameter to split IB traffic onto multiple queue pairs.
Fix topology detection error in WSL2.
Fix proxy memory elements affinity (improve alltoall performance).
Fix graph search on cubemesh topologies.
Fix hang in cubemesh during NVB connections.
2021-07-08 14:30:14 -07:00

176 lines
4.8 KiB
C

/*************************************************************************
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_TOPO_H_
#define NCCL_TOPO_H_
#include "graph.h"
#include "core.h"
#define LOC_WIDTH 5000.0
#define SM60_NVLINK_WIDTH 18.0
#define SM70_NVLINK_WIDTH 22.0
#define SM80_NVLINK_WIDTH 22.0
#define SM86_NVLINK_WIDTH 12.0
#define PCI_WIDTH 12.0 // PCI Gen3 x16
#define QPI_WIDTH 6.0
#define SKL_QPI_WIDTH 9.0
#define ZPI_WIDTH 6.0
#define YONGFENG_ZPI_WIDTH 9.0
#define P9_WIDTH 32.0
#define ARM_WIDTH 6.0
#define NET_WIDTH 12.0 // 100Gbit
// Intel CPU convert GPU P2P traffic into 64B PCI TLPs, so GPU
// to GPU traffic consumes more PCI bandwidth.
#define INTEL_P2P_OVERHEAD(speed) (speed*6/5)
#define NCCL_TOPO_NODE_TYPES 7
#define GPU 0
#define PCI 1
#define NVS 2
#define CPU 3 // Actually NUMA domains
#define NIC 4
#define NET 5
extern const char* topoNodeTypeStr[];
// We want link types and path types to match as much as possible
#define LINK_LOC 0
#define LINK_NVL 1
// Skipping 2 for PATH_NVB
#define LINK_PCI 3
// Skipping 4 for PATH_PXB
// Skipping 5 for PATH_PHB
#define LINK_SYS 6
#define LINK_NET 7
extern const char* topoLinkTypeStr[];
#define PATH_LOC 0
#define PATH_NVL 1
#define PATH_NVB 2
#define PATH_PIX 3
#define PATH_PXB 4
#define PATH_PHB 5
#define PATH_SYS 6
extern const char* topoPathTypeStr[];
struct ncclTopoNode;
struct ncclTopoLink {
int type;
float width;
struct ncclTopoNode* remNode;
};
#define NCCL_TOPO_MAX_LINKS 32
#define NCCL_TOPO_MAX_HOPS (NCCL_TOPO_MAX_NODES*NCCL_TOPO_NODE_TYPES)
struct ncclTopoLinkList {
struct ncclTopoLink* list[NCCL_TOPO_MAX_HOPS];
int count;
float width;
int type;
};
#define NCCL_TOPO_CPU_INTEL_BDW 1
#define NCCL_TOPO_CPU_INTEL_SKL 2
#define NCCL_TOPO_UNDEF (-1)
struct ncclTopoNode {
int type;
int64_t id;
// Type specific data
union {
struct {
int dev; // NVML dev number
int rank;
int cudaCompCap;
int gdrSupport;
}gpu;
struct {
uint64_t asic;
int port;
float width;
int gdrSupport;
int collSupport;
int maxChannels;
}net;
struct {
int arch;
int vendor;
int model;
cpu_set_t affinity;
}cpu;
struct {
uint64_t device;
}pci;
};
int nlinks;
struct ncclTopoLink links[NCCL_TOPO_MAX_LINKS];
// Pre-computed paths to GPUs and NICs
struct ncclTopoLinkList* paths[NCCL_TOPO_NODE_TYPES];
// Used during search
uint64_t used;
};
struct ncclTopoNodeSet {
int count;
struct ncclTopoNode nodes[NCCL_TOPO_MAX_NODES];
};
struct ncclTopoSystem {
struct ncclTopoNodeSet nodes[NCCL_TOPO_NODE_TYPES];
float maxWidth;
float totalWidth;
};
ncclResult_t ncclTopoGetNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id);
ncclResult_t ncclTopoCreateNode(struct ncclTopoSystem* system, struct ncclTopoNode** node, int type, uint64_t id);
ncclResult_t ncclTopoRemoveNode(struct ncclTopoSystem* system, int type, int id);
ncclResult_t ncclTopoConnectNodes(struct ncclTopoNode* node, struct ncclTopoNode* remNode, int type, float width);
ncclResult_t ncclTopoPrintPaths(struct ncclTopoSystem* system);
ncclResult_t ncclTopoLoadSystem(const char* xmlTopoFile, struct ncclTopoSystem* system);
ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_t* id, int rr);
ncclResult_t ncclTopoGetSystemFromXml(struct ncclXml* xml, struct ncclTopoSystem** topoSystem);
ncclResult_t ncclTopoGetGraphFromXml(struct ncclXmlNode *xmlGraphs, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* nChannels);
ncclResult_t ncclTopoGetXmlFromGraphs(int ngraphs, struct ncclTopoGraph** graphs, struct ncclTopoSystem* system, struct ncclXml *xml);
ncclResult_t ncclTopoGetCompCap(struct ncclTopoSystem* system, int* ccMin, int* ccMax);
static ncclResult_t ncclTopoIdToIndex(struct ncclTopoSystem* system, int type, int64_t id, int* index) {
*index = -1;
for (int i=0; i<system->nodes[type].count; i++) {
if (system->nodes[type].nodes[i].id == id) {
*index = i;
return ncclSuccess;
}
}
return ncclInternalError;
}
static ncclResult_t ncclTopoRankToIndex(struct ncclTopoSystem* system, int rank, int* index) {
*index = -1;
for (int i=0; i<system->nodes[GPU].count; i++) {
if (system->nodes[GPU].nodes[i].gpu.rank == rank) {
*index = i;
return ncclSuccess;
}
}
return ncclInternalError;
}
// Returns NVLink speed in GB/s
static float ncclTopoNVLinkSpeed(int cudaCompCap) {
return
cudaCompCap == 86 ? SM86_NVLINK_WIDTH :
cudaCompCap >= 80 ? SM80_NVLINK_WIDTH :
cudaCompCap >= 70 ? SM70_NVLINK_WIDTH :
cudaCompCap >= 60 ? SM60_NVLINK_WIDTH :
SM80_NVLINK_WIDTH;
}
#endif