Add support for alternating rings, allow for cross-nic rings without cross-rail communication. Add support for user buffer registration for network send/recv. Optimize aggregated operations to better utilize all channels. Add flattening for BCM PCI gen5 switches. Add support for inter-node NVLink communication Add support for port fusion in NET/IB. Add support for ReduceScatter and AllGather using Collnet. Update net API to v8. Fix hang during A2A connection.
36 lines
1003 B
C
36 lines
1003 B
C
/*
|
|
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
|
*/
|
|
|
|
#ifndef NCCL_NET_H_
|
|
#define NCCL_NET_H_
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "err.h"
|
|
|
|
#define NCCL_NET_HANDLE_MAXSIZE 128
|
|
|
|
#define NCCL_PTR_HOST 0x1
|
|
#define NCCL_PTR_CUDA 0x2
|
|
#define NCCL_PTR_DMABUF 0x4
|
|
|
|
// Maximum number of requests per comm object
|
|
#define NCCL_NET_MAX_REQUESTS 32
|
|
|
|
typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
|
|
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_ALL=~0} ncclDebugLogSubSys;
|
|
|
|
typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
|
|
|
|
#include "net_v8.h"
|
|
#include "net_v7.h"
|
|
#include "net_v6.h"
|
|
#include "net_v5.h"
|
|
#include "net_v4.h"
|
|
#include "net_v3.h"
|
|
#include "net_v2.h"
|
|
|
|
#endif // end include guard
|