Add documentation for NCCL NET plugins
Also repurpose dummy plugin as example, including headers and compat layers from v6 to v2.
This commit is contained in:
parent
2f4cb874ba
commit
55b1d8ab98
352
ext-net/README.md
Normal file
352
ext-net/README.md
Normal file
@ -0,0 +1,352 @@
|
||||
# NCCL Net Plugin Documentation
|
||||
|
||||
This page describes the NCCL Net plugin API and how to implement a network plugin for NCCL.
|
||||
|
||||
# Overview
|
||||
|
||||
To allow NCCL to work on any network type, NCCL provides a way to use external plugins. Plugins
|
||||
implement the NCCL network API, and decouple NCCL binary builds which are built against a
|
||||
particular version of the GPU stack (i.e. CUDA) from the network code which is built against a
|
||||
particular version of the networking stack. That way, we can easily integrate any CUDA version
|
||||
with any network stack version.
|
||||
|
||||
NCCL network plugins come as a shared library called `libnccl-net.so`. That shared library
|
||||
contains one or more implementations of the NCCL NET API, in the form of versioned structs,
|
||||
filled with pointers to all required functions.
|
||||
|
||||
# Plugin architecture
|
||||
|
||||
## Plugin name and supporting multiple network plugins
|
||||
|
||||
When NCCL is initialized, it will look for a `libnccl-net.so` library and dynamically load it,
|
||||
then look for symbols inside the library.
|
||||
|
||||
The `NCCL_NET_PLUGIN` environment variable allows multiple plugins to coexist. If set, NCCL
|
||||
will look for a library with a name of `libnccl-net-${NCCL_NET_PLUGIN}.so`. It is therefore
|
||||
advised to name the library following that pattern, with a symlink pointing `libnccl-net.so`
|
||||
to `libnccl-net-${NCCL_NET_PLUGIN}.so`. That way, if there are multiple plugins in the path,
|
||||
setting `NCCL_NET_PLUGIN` will allow users to select the right plugin.
|
||||
|
||||
## Struct versioning
|
||||
|
||||
Once a library is found, NCCL will look for a symbol named `ncclNet_vX`, with `X` increasing
|
||||
over time. The versioning ensures that the plugin and the NCCL core are compatible.
|
||||
|
||||
Plugins are encouraged to provide multiple of those symbols, implementing multiple versions
|
||||
of the NCCL NET API, so that the same plugin can be compiled and support a wide range of NCCL
|
||||
versions.
|
||||
|
||||
Conversely, and to ease transition, NCCL can choose to support different plugin versions, looking
|
||||
for the latest ncclNet struct version, but also looking for older ones so that older plugins
|
||||
would still work.
|
||||
|
||||
## In-network collective operations, a.k.a. collNet
|
||||
|
||||
Additionally to the ncclNet structure, network plugins can provide a collNet structure which
|
||||
implements in-network collective operations, if supported. That can be used by the NCCL collNet
|
||||
algorithm to accelerate inter-node reductions in allReduce.
|
||||
|
||||
The collNet struct is a different, optional struct provided by the network plugin, but its
|
||||
versioning is tied to the ncclNet struct and many functions are common between the two to
|
||||
ease the implementation.
|
||||
|
||||
## Headers management
|
||||
|
||||
To help users build plugins effortlessly, plugins should copy the `ncclNet_vX` definitions
|
||||
they support to their internal includes. An example is shown in `ext-net/example/` where we keep
|
||||
all headers in the `nccl/` directory and provide thin layers to implement old versions on top
|
||||
of newer ones.
|
||||
|
||||
The `nccl/` directory is populated with `net_vX.h` files extracting all relevant definitions
|
||||
from old API versions. It also provides error codes in `err.h`.
|
||||
|
||||
# API (v6)
|
||||
|
||||
Below is the main `ncclNet_v6` struct. Each function is explained in later sections.
|
||||
|
||||
```
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with sendComm == NULL with the expectation that
|
||||
// it will be called again until sendComm != NULL.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connect.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with recvComm == NULL with the expectation that
|
||||
// it will be called again until recvComm != NULL.
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
// Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
/* DMA-BUF support */
|
||||
ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* sizes);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v6_t;
|
||||
```
|
||||
|
||||
## Error codes
|
||||
|
||||
All plugins functions use NCCL error codes as return value. `ncclSuccess` should be returned upon
|
||||
success.
|
||||
|
||||
Otherwise, plugins can return one of the following:
|
||||
- `ncclSystemError` is the most common error for network plugins, when a call to the linux kernel
|
||||
or a system library fails. This typically includes all network/hardware errors.
|
||||
- `ncclInternalError` is returned when the NCCL core code is using the network plugin in an
|
||||
incorrect way, for example allocating more requests than it should, or passing an invalid argument
|
||||
to calls.
|
||||
- `ncclInvalidUsage` should be returned when the error is most likely a user error. This can
|
||||
include misconfiguration, but also sizes mismatch.
|
||||
- `ncclInvalidArgument` should usually not be used by plugins since arguments should be checked by
|
||||
the NCCL core layer.
|
||||
- `ncclUnhandledCudaError` is returned when an error comes from CUDA. Since network plugins should
|
||||
not need to rely on CUDA, this should not be common.
|
||||
|
||||
## Operation overview
|
||||
|
||||
NCCL will call the `init` function first, then query the number of network devices with the
|
||||
`devices` function, getting each network device properties with `getProperties`.
|
||||
|
||||
To establish a connection between two network devices, NCCL will first call `listen` on the
|
||||
receiving side, pass the returned handle to the sender side of the connection, and call `connect`
|
||||
with that handle. Finally, `accept` will be called on the receiving side to finalize the connection
|
||||
establishment.
|
||||
|
||||
Once the connection is established, communication will be done using the functions `isend`,
|
||||
`irecv` and `test`. Prior to calling `isend` or `irecv`, NCCL will call the `regMr` function on
|
||||
all buffers to allow RDMA NICs to prepare buffers. `deregMr` will be used to unregister buffers.
|
||||
|
||||
In certain conditions, `iflush` will be called after a receive calls completes to allow the network
|
||||
plugin to flush data and ensure the GPU will observe the newly written data.
|
||||
|
||||
To close the connections NCCL will call `closeListen` to close the object returned by `listen`,
|
||||
`closeSend` to close the object returned by `connect` and `closeRecv` to close the object returned
|
||||
by `accept`.
|
||||
|
||||
## API Functions
|
||||
|
||||
### Initialization
|
||||
`name`
|
||||
|
||||
The `name` field should point to a character string with the name of the network plugin. This will
|
||||
be used for all logging, especially when `NCCL_DEBUG=INFO` is set.
|
||||
|
||||
Note: setting `NCCL_NET=<plugin name>` will ensure a specific network implementation is used, with
|
||||
a matching `name`. This is not to be confused with `NCCL_NET_PLUGIN` which defines a suffix to the
|
||||
`libnccl-net.so`library name to load.
|
||||
|
||||
`init`
|
||||
|
||||
As soon as NCCL finds the plugin and the correct ncclNet symbol, it will call the `init` function.
|
||||
This will allow the plugin to discover network devices and make sure they are usable. If the
|
||||
`init` function does not return `ncclSuccess`, then NCCL will not use the plugin and fall back on
|
||||
internal ones.
|
||||
|
||||
To allow the plugin logs to integrate into the NCCL logs seemlessly, NCCL provides a logging
|
||||
function to `init`. This function is typically used to allow for `INFO` and `WARN` macros within
|
||||
the plugin code adding the following definitions:
|
||||
|
||||
```
|
||||
#define WARN(...) logFunction(NCCL_LOG_WARN, NCCL_ALL, __FILE__, __LINE__, __VA_ARGS__)
|
||||
#define INFO(FLAGS, ...) logFunction(NCCL_LOG_INFO, (FLAGS), __func__, __LINE__, __VA_ARGS__)
|
||||
```
|
||||
|
||||
`devices`
|
||||
|
||||
Once the plugin is initialized, NCCL will query the number of devices available. It should not
|
||||
be zero, otherwise NCCL initialization will fail. If no device is present or usable, the `init`
|
||||
function should not return `ncclSuccess`.
|
||||
|
||||
`getProperties`
|
||||
|
||||
Right after getting the number of devices, NCCL will query properties for each available network
|
||||
device. These properties are critical when multiple adapters are present to ensure NCCL uses each
|
||||
adapter in the most optimized way.
|
||||
|
||||
The `name` is only used for logging.
|
||||
|
||||
The `pciPath` is the base for all topology detection and should point to the PCI device directory
|
||||
in /sys. This is typically the directory pointed by `/sys/class/net/eth0/device` or
|
||||
`/sys/class/infiniband/mlx5_0/device`. If the network interface is virtual, then `pciPath` should
|
||||
be `NULL`.
|
||||
|
||||
The `guid` field is used to determine when network adapters are connected to multiple PCI
|
||||
endpoints. For normal cases, it can be set to the device number. If multiple network devices have
|
||||
the same guid, then NCCL will consider the are sharing the same network port to the fabric, hence
|
||||
it will not use the port multiple times.
|
||||
|
||||
The `ptrSupport` field indicates whether or not CUDA pointers are supported. If so, it should be
|
||||
set to `NCCL_PTR_HOST|NCCL_PTR_CUDA`, otherwise it should be set to `NCCL_PTR_HOST`. If the plugin
|
||||
supports `dmabuf`, it should set `ptrSupport` to `NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF` and
|
||||
provide a `regMrDmaBuf` function.
|
||||
|
||||
The `speed` field indicates the speed of the network port in Mbps (10^6 bits per second). This is
|
||||
important to ensure proper optimization of flows within the node.
|
||||
|
||||
The `port` field indicates the port number. This is important again for topology detection and flow
|
||||
optimization within the node when a NIC with a single PCI connection is connected to the fabric
|
||||
with multiple ports.
|
||||
|
||||
The `latency` field indicates the network latency in microseconds. This can be useful to improve
|
||||
the NCCL tuning and make sure NCCL switches from tree to ring at the right size.
|
||||
|
||||
The `maxComms` field indicates the maximum number of connections we can create.
|
||||
|
||||
The `maxRecvs` field indicates the maximum number for grouped receive operations (see grouped
|
||||
receive).
|
||||
|
||||
### Connection establishment
|
||||
|
||||
Connections are used in an unidirectional manner. There is therefore a sender side and a receiver
|
||||
side.
|
||||
|
||||
`listen`
|
||||
|
||||
To create a connection, NCCL will start by calling `listen` on the receiver side. This function
|
||||
takes a device number as input argument, and should return a local `listenComm` object, and a
|
||||
`handle` to pass to the other side, so that the sender side can connect to the receiver.
|
||||
|
||||
The `handle` is a buffer of size `NCCL_NET_HANDLE_MAXSIZE` and is provided by NCCL.
|
||||
|
||||
This call should never block, but contrary to `connect` and `accept`, `listenComm` should never
|
||||
be `NULL` if the call succeeds.
|
||||
|
||||
`connect`
|
||||
|
||||
NCCL will use its bootstrap infrastructure to provide the `handle` to the sender side, then call
|
||||
`connect` on the sender side on a given device index `dev`, providing the `handle`. `connect`
|
||||
should not block either, and instead set `sendComm` to `NULL` and return `ncclSuccess`. In that
|
||||
case, NCCL will call `accept` again until it succeeds.
|
||||
|
||||
`accept`
|
||||
|
||||
To finalize the connection, the receiver side will call `accept` on the `listenComm` returned by
|
||||
the `listen` call previously. If the sender did not connect yet, `accept` should not block. It
|
||||
should return `ncclSuccess`, setting `recvComm` to `NULL`. NCCL will call `accept` again until it
|
||||
succeeds.
|
||||
|
||||
`closeListen`/`closeSend`/`closeRecv`
|
||||
|
||||
Once a `listenComm`/`sendComm`/`recvComm` is no longer needed, NCCL will call
|
||||
`closeListen`/`closeSend`/`closeRecv` to free the associated resources.
|
||||
|
||||
### Communication
|
||||
|
||||
Communication is done using asynchronous send and receive operations: `isend`, `irecv` and `test`.
|
||||
To support RDMA capabilities, buffer registration and flush functions are provided.
|
||||
|
||||
To keep track of asynchronous send, receive and flush operations, requests are returned to NCCL,
|
||||
then queried with `test`. Each `sendComm` or `recvComm` must be able to handle
|
||||
`NCCL_NET_MAX_REQUESTS` requests in parallel.
|
||||
|
||||
Note: That value should be multiplied by the multi-receive capability of the plugin for the sender
|
||||
side, so that we can effectively have `NCCL_NET_MAX_REQUESTS` multi-receive operations happening
|
||||
in parallel. So, if we have a `maxRecvs`value of 8 and `NCCL_NET_MAX_REQUESTS` is 8, then each
|
||||
`sendComm` must be able to handle up to 8x8=64 concurrent `isend` operations.
|
||||
|
||||
`regMr`
|
||||
|
||||
Prior to sending or receiving data, NCCL will call `regMr` with any buffers later used for
|
||||
communication. It will provide a `sendComm` or `recvComm` as `comm` argument, then the buffer
|
||||
pointer `data`, `size`, and `type` being either `NCCL_PTR_HOST`, or `NCCL_PTR_CUDA` if the network
|
||||
supports CUDA pointers.
|
||||
|
||||
The network plugin can use the output argument `mhandle` to keep any reference to that memory
|
||||
registration, as this `mhandle` will be passed back for all `isend`, `irecv`, `iflush` and
|
||||
`deregMr` calls.
|
||||
|
||||
`regMrDmaBuf`
|
||||
|
||||
If the plugin has set the `NCCL_PTR_DMABUF` property in `ptrSupport`, NCCL will use `regMrDmaBuf`
|
||||
instead of `regMr`. If the property was not set, `regMrDmaBuf` can be set to `NULL`.
|
||||
|
||||
|
||||
`deregMr`
|
||||
|
||||
When buffers will no longer be used for communication, NCCL will call `deregMr` to let the plugin
|
||||
free resources. This function is used to deregister handles returned by both `regMr` and
|
||||
`regMrDmaBuf`.
|
||||
|
||||
`isend`
|
||||
|
||||
Data will be sent through the connection using `isend`, passing the `sendComm` previously
|
||||
created by `connect`, and the buffer described by `data`, `size`, and `mhandle`. A `tag` must be
|
||||
used if the network supports multi-receive operations (see `irecv`) to distinguish between
|
||||
different sends matching the same multi-receive. Otherwise it can be set to 0.
|
||||
|
||||
The `isend` operation returns a handle in the `request` argument for further calls to `test`. If
|
||||
the `isend` operation cannot be initiated, `request` can be set to `NULL` and NCCL will call
|
||||
`isend` again later.
|
||||
|
||||
`irecv`
|
||||
|
||||
To receive data, NCCL will call `irecv` with the `recvComm` returned by `accept`. The argument
|
||||
`n` will allow NCCL to perform a multi-receive, to allow grouping of multiple sends through a
|
||||
single network connection. Each buffer will be described by the `data`, `sizes`, and `mhandles`
|
||||
arrays. `tags` will specify a tag for each receive so that each of the `n` independent `isend`
|
||||
operations is received into the right buffer.
|
||||
|
||||
If all receive operations can be initiated, `irecv` will return a handle in the `request` pointer,
|
||||
otherwise it will set it to `NULL`. In the case of multi-receive, all `n` receive operations are
|
||||
handled by a single request handle.
|
||||
|
||||
The sizes provided to `irecv` can (and will) be larger than the size of the `isend` operation.
|
||||
The contrary (receive size being lower than the send size) is an error, however.
|
||||
|
||||
Note: for a given connection, send/receive operations should always match in the order they were
|
||||
posted. Tags provided for receive operations are only used to assign a given send operation to one
|
||||
of the buffers of the first (multi-)receive in the queue, not to allow for out-of-order tag
|
||||
matching on any receive operation posted.
|
||||
|
||||
`test`
|
||||
|
||||
After an `isend` or `irecv` operation is initiated, NCCL will call `test` on the request handles
|
||||
until they complete. When that happens, `done` will be set to 1 and `sizes` will be set to the
|
||||
real size sent or received, the latter being potentially lower than the size passed to `irecv`.
|
||||
|
||||
In the case of a multi-receive, all receives will be considered as done as a single operation (the
|
||||
goal being to allow aggregation), hence they share a single request and a single `done` status.
|
||||
However, they can have different sizes, so when `done` is non-zero, the `sizes` array should
|
||||
contain the `n` sizes corresponding to the buffers passed to `irecv`.
|
||||
|
||||
Once `test` returns 1 in `done`, the request handle can be freed, meaning that NCCL will never
|
||||
call `test` again on that request (until it is reallocated by another call to `isend` or `irecv`).
|
||||
|
||||
`iflush`
|
||||
|
||||
After a receive operation completes, if the operation was targeting GPU memory and received a
|
||||
non-zero number of bytes, NCCL will call `iflush` to let the network flush any buffer and ensure
|
||||
the GPU can read it right after without seeing stale data. This flush operation is decoupled from
|
||||
the `test` code to improve latency of `LL*` protocols, as those are capable of determining when
|
||||
data is valid or not.
|
||||
|
||||
`iflush` returns a request which needs to be queried with `test` until it completes.
|
@ -1,80 +0,0 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
|
||||
#include <nccl.h>
|
||||
#include <nccl_net.h>
|
||||
|
||||
#define __hidden __attribute__ ((visibility("hidden")))
|
||||
|
||||
__hidden ncclResult_t pluginInit(ncclDebugLogger_t logFunction) { return ncclSuccess; }
|
||||
__hidden ncclResult_t pluginDevices(int* ndev) { *ndev = 0; return ncclSuccess; }
|
||||
__hidden ncclResult_t pluginPciPath(int dev, char** path) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginPtrSupport(int dev, int* supportedTypes) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginListen(int dev, void* handle, void** listenComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginConnect(int dev, void* handle, void** sendComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginAccept(void* listenComm, void** recvComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginRegMr(void* collComm, void* data, int size, int type, void** mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginDeregMr(void* collComm, void* mhandle) { return ncclInternalError;}
|
||||
__hidden ncclResult_t pluginIsend(void* sendComm, void* data, int size, void* mhandle, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginIrecv(void* recvComm, void* data, int size, void* mhandle, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginFlush(void* recvComm, void* data, int size, void* mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginTest(void* request, int* done, int* size) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseSend(void* sendComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseRecv(void* recvComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseListen(void* listenComm) { return ncclInternalError; }
|
||||
|
||||
ncclNet_t NCCL_PLUGIN_SYMBOL = {
|
||||
"Dummy",
|
||||
pluginInit,
|
||||
pluginDevices,
|
||||
pluginPciPath,
|
||||
pluginPtrSupport,
|
||||
pluginListen,
|
||||
pluginConnect,
|
||||
pluginAccept,
|
||||
pluginRegMr,
|
||||
pluginDeregMr,
|
||||
pluginIsend,
|
||||
pluginIrecv,
|
||||
pluginFlush,
|
||||
pluginTest,
|
||||
pluginCloseSend,
|
||||
pluginCloseRecv,
|
||||
pluginCloseListen
|
||||
};
|
||||
|
||||
__hidden ncclResult_t pluginCollNetInit(ncclDebugLogger_t logFunction) { return ncclSuccess; }
|
||||
__hidden ncclResult_t pluginCollNetDevices(int* ndev) { *ndev = 0; return ncclSuccess; }
|
||||
__hidden ncclResult_t pluginCollNetPciPath(int dev, char** path) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetPtrSupport(int dev, int* supportedTypes) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetListen(int dev, void* handle, void** listenComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetConnect(void* handles[], int nranks, int rank, void* listenComm, void** collComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetReduceSupport(ncclDataType_t dataType, ncclRedOp_t redOp, int* supported) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetRegMr(void* collComm, void* data, int size, int type, void** mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetDeregMr(void* collComm, void* mhandle) { return ncclInternalError;}
|
||||
__hidden ncclResult_t pluginCollNetIallreduce(void* collComm, void* sendData, void* recvData, int count, ncclDataType_t dataType, ncclRedOp_t redOp, void* sendMhandle, void* recvMhandle, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetFlush(void* collComm, void* data, int size, void* mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetTest(void* request, int* done, int* size) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetCloseColl(void* collComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCollNetCloseListen(void* listenComm) { return ncclInternalError; }
|
||||
|
||||
ncclCollNet_t NCCL_COLLNET_PLUGIN_SYMBOL = {
|
||||
"Dummy",
|
||||
pluginCollNetInit,
|
||||
pluginCollNetDevices,
|
||||
pluginCollNetPciPath,
|
||||
pluginCollNetPtrSupport,
|
||||
pluginCollNetListen,
|
||||
pluginCollNetConnect,
|
||||
pluginCollNetReduceSupport,
|
||||
pluginCollNetRegMr,
|
||||
pluginCollNetDeregMr,
|
||||
pluginCollNetIallreduce,
|
||||
pluginCollNetFlush,
|
||||
pluginCollNetTest,
|
||||
pluginCollNetCloseColl,
|
||||
pluginCollNetCloseListen
|
||||
};
|
16
ext-net/example/nccl/err.h
Normal file
16
ext-net/example/nccl/err.h
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_ERR_H_
|
||||
#define NCCL_ERR_H_
|
||||
|
||||
/* Error type for plugins */
|
||||
typedef enum { ncclSuccess = 0,
|
||||
ncclUnhandledCudaError = 1,
|
||||
ncclSystemError = 2,
|
||||
ncclInternalError = 3,
|
||||
ncclInvalidArgument = 4,
|
||||
ncclRemoteError = 6 } ncclResult_t;
|
||||
|
||||
#endif
|
33
ext-net/example/nccl/net.h
Normal file
33
ext-net/example/nccl/net.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_H_
|
||||
#define NCCL_NET_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "err.h"
|
||||
|
||||
#define NCCL_NET_HANDLE_MAXSIZE 128
|
||||
|
||||
#define NCCL_PTR_HOST 0x1
|
||||
#define NCCL_PTR_CUDA 0x2
|
||||
#define NCCL_PTR_DMABUF 0x4
|
||||
|
||||
// Maximum number of requests per comm object
|
||||
#define NCCL_NET_MAX_REQUESTS 8
|
||||
|
||||
typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
|
||||
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_ALL=~0} ncclDebugLogSubSys;
|
||||
|
||||
typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
|
||||
|
||||
#include "net_v6.h"
|
||||
#include "net_v5.h"
|
||||
#include "net_v4.h"
|
||||
#include "net_v3.h"
|
||||
#include "net_v2.h"
|
||||
|
||||
#endif // end include guard
|
50
ext-net/example/nccl/net_v2.h
Normal file
50
ext-net/example/nccl/net_v2.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_V2_H_
|
||||
#define NCCL_NET_V2_H_
|
||||
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Return the device path in /sys. NCCL will call free on this path.
|
||||
ncclResult_t (*pciPath)(int dev, char** path);
|
||||
// Return whether this device supports host pointers and/or CUDA pointers
|
||||
// as data from the current GPU. Supported types should be composed with
|
||||
// NCCL_PTR_HOST and NCCL_PTR_CUDA.
|
||||
ncclResult_t (*ptrSupport)(int dev, int* supportedTypes);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connectHandle
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer. Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*flush)(void* recvComm, void* data, int size, void* mhandle);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* size);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v2_t;
|
||||
|
||||
#endif // end include guard
|
51
ext-net/example/nccl/net_v3.h
Normal file
51
ext-net/example/nccl/net_v3.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_V3_H_
|
||||
#define NCCL_NET_V3_H_
|
||||
|
||||
#define NCCL_NET_HANDLE_MAXSIZE_V3 64
|
||||
#define NCCL_NET_MAX_REQUESTS_V3 16
|
||||
|
||||
typedef ncclNetProperties_v4_t ncclNetProperties_v3_t;
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v3_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connectHandle
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
// Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*flush)(void* recvComm, void* data, int size, void* mhandle);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* size);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v3_t;
|
||||
|
||||
#endif // end include guard
|
59
ext-net/example/nccl/net_v4.h
Normal file
59
ext-net/example/nccl/net_v4.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_V4_H_
|
||||
#define NCCL_NET_V4_H_
|
||||
|
||||
typedef struct {
|
||||
char* name; // Used mostly for logging.
|
||||
char* pciPath; // Path to the PCI device in /sys.
|
||||
uint64_t guid; // Unique identifier for the NIC chip. Important for
|
||||
// cards with multiple PCI functions (Physical or virtual).
|
||||
int ptrSupport; // NCCL_PTR_HOST or NCCL_PTR_HOST|NCCL_PTR_CUDA
|
||||
int speed; // Port speed in Mbps.
|
||||
int port; // Port number.
|
||||
int maxComms; // Maximum number of comms we can create
|
||||
} ncclNetProperties_v4_t;
|
||||
|
||||
// v4 struct for backwards compatibility
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v4_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connectHandle
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
// Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*iflush)(void* recvComm, void* data, int size, void* mhandle, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* size);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v4_t;
|
||||
|
||||
#endif // end include guard
|
54
ext-net/example/nccl/net_v5.h
Normal file
54
ext-net/example/nccl/net_v5.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_V5_H_
|
||||
#define NCCL_NET_V5_H_
|
||||
|
||||
typedef ncclNetProperties_v6_t ncclNetProperties_v5_t;
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v5_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with sendComm == NULL with the expectation that
|
||||
// it will be called again until sendComm != NULL.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connect.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with recvComm == NULL with the expectation that
|
||||
// it will be called again until recvComm != NULL.
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
// Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* sizes);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v5_t;
|
||||
|
||||
#endif // end include guard
|
70
ext-net/example/nccl/net_v6.h
Normal file
70
ext-net/example/nccl/net_v6.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_NET_V6_H_
|
||||
#define NCCL_NET_V6_H_
|
||||
|
||||
typedef struct {
|
||||
char* name; // Used mostly for logging.
|
||||
char* pciPath; // Path to the PCI device in /sys.
|
||||
uint64_t guid; // Unique identifier for the NIC chip. Important for
|
||||
// cards with multiple PCI functions (Physical or virtual).
|
||||
int ptrSupport; // [NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF]
|
||||
int speed; // Port speed in Mbps.
|
||||
int port; // Port number.
|
||||
float latency; // Network latency
|
||||
int maxComms; // Maximum number of comms we can create
|
||||
int maxRecvs; // Maximum number of grouped receives.
|
||||
}ncclNetProperties_v6_t;
|
||||
|
||||
typedef ncclNetProperties_v6_t ncclNetProperties_t;
|
||||
|
||||
typedef struct {
|
||||
// Name of the network (mainly for logs)
|
||||
const char* name;
|
||||
// Initialize the network.
|
||||
ncclResult_t (*init)(ncclDebugLogger_t logFunction);
|
||||
// Return the number of adapters.
|
||||
ncclResult_t (*devices)(int* ndev);
|
||||
// Get various device properties.
|
||||
ncclResult_t (*getProperties)(int dev, ncclNetProperties_v6_t* props);
|
||||
// Create a receiving object and provide a handle to connect to it. The
|
||||
// handle can be up to NCCL_NET_HANDLE_MAXSIZE bytes and will be exchanged
|
||||
// between ranks to create a connection.
|
||||
ncclResult_t (*listen)(int dev, void* handle, void** listenComm);
|
||||
// Connect to a handle and return a sending comm object for that peer.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with sendComm == NULL with the expectation that
|
||||
// it will be called again until sendComm != NULL.
|
||||
ncclResult_t (*connect)(int dev, void* handle, void** sendComm);
|
||||
// Finalize connection establishment after remote peer has called connect.
|
||||
// This call must not block for the connection to be established, and instead
|
||||
// should return successfully with recvComm == NULL with the expectation that
|
||||
// it will be called again until recvComm != NULL.
|
||||
ncclResult_t (*accept)(void* listenComm, void** recvComm);
|
||||
// Register/Deregister memory. Comm can be either a sendComm or a recvComm.
|
||||
// Type is either NCCL_PTR_HOST or NCCL_PTR_CUDA.
|
||||
ncclResult_t (*regMr)(void* comm, void* data, int size, int type, void** mhandle);
|
||||
/* DMA-BUF support */
|
||||
ncclResult_t (*regMrDmaBuf)(void* comm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle);
|
||||
ncclResult_t (*deregMr)(void* comm, void* mhandle);
|
||||
// Asynchronous send to a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*isend)(void* sendComm, void* data, int size, int tag, void* mhandle, void** request);
|
||||
// Asynchronous recv from a peer.
|
||||
// May return request == NULL if the call cannot be performed (or would block)
|
||||
ncclResult_t (*irecv)(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request);
|
||||
// Perform a flush/fence to make sure all data received with NCCL_PTR_CUDA is
|
||||
// visible to the GPU
|
||||
ncclResult_t (*iflush)(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request);
|
||||
// Test whether a request is complete. If size is not NULL, it returns the
|
||||
// number of bytes sent/received.
|
||||
ncclResult_t (*test)(void* request, int* done, int* sizes);
|
||||
// Close and free send/recv comm objects
|
||||
ncclResult_t (*closeSend)(void* sendComm);
|
||||
ncclResult_t (*closeRecv)(void* recvComm);
|
||||
ncclResult_t (*closeListen)(void* listenComm);
|
||||
} ncclNet_v6_t;
|
||||
|
||||
#endif // end include guard
|
21
ext-net/example/nccl/types.h
Normal file
21
ext-net/example/nccl/types.h
Normal file
@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef NCCL_ERR_H_
|
||||
#define NCCL_ERR_H_
|
||||
|
||||
/* Data types */
|
||||
typedef enum { ncclInt8 = 0, ncclChar = 0,
|
||||
ncclUint8 = 1,
|
||||
ncclInt32 = 2, ncclInt = 2,
|
||||
ncclUint32 = 3,
|
||||
ncclInt64 = 4,
|
||||
ncclUint64 = 5,
|
||||
ncclFloat16 = 6, ncclHalf = 6,
|
||||
ncclFloat32 = 7, ncclFloat = 7,
|
||||
ncclFloat64 = 8, ncclDouble = 8,
|
||||
ncclBfloat16 = 9,
|
||||
} ncclDataType_t;
|
||||
|
||||
#endif
|
200
ext-net/example/plugin.c
Normal file
200
ext-net/example/plugin.c
Normal file
@ -0,0 +1,200 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
|
||||
#include <nccl/net.h>
|
||||
|
||||
#define __hidden __attribute__ ((visibility("hidden")))
|
||||
|
||||
int max_requests = NCCL_NET_MAX_REQUESTS;
|
||||
|
||||
__hidden ncclResult_t pluginInit(ncclDebugLogger_t logFunction) { return ncclSuccess; }
|
||||
__hidden ncclResult_t pluginDevices(int* ndev) { *ndev = 0; return ncclSuccess; }
|
||||
|
||||
__hidden ncclResult_t pluginPciPath(int dev, char** path) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginPtrSupport(int dev, int* supportedTypes) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginGetProperties(int dev, ncclNetProperties_v6_t* props) {
|
||||
//pluginPciPath(dev, &props.pciPath);
|
||||
//pluginPtrSupport(dev, &props.ptrSupport);
|
||||
return ncclInternalError;
|
||||
}
|
||||
__hidden ncclResult_t pluginListen(int dev, void* handle, void** listenComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginConnect(int dev, void* handle, void** sendComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginAccept(void* listenComm, void** recvComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginRegMr(void* collComm, void* data, int size, int type, void** mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginRegMrDmaBuf(void* collComm, void* data, size_t size, int type, uint64_t offset, int fd, void** mhandle) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginDeregMr(void* collComm, void* mhandle) { return ncclInternalError;}
|
||||
__hidden ncclResult_t pluginIsend(void* sendComm, void* data, int size, int tag, void* mhandle, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginIrecv(void* recvComm, int n, void** data, int* sizes, int* tags, void** mhandles, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginIflush(void* recvComm, int n, void** data, int* sizes, void** mhandles, void** request) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginTest(void* request, int* done, int* size) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseSend(void* sendComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseRecv(void* recvComm) { return ncclInternalError; }
|
||||
__hidden ncclResult_t pluginCloseListen(void* listenComm) { return ncclInternalError; }
|
||||
|
||||
#define PLUGIN_NAME "Plugin"
|
||||
|
||||
const ncclNet_v6_t ncclNetPlugin_v6 = {
|
||||
.name = PLUGIN_NAME,
|
||||
.init = pluginInit,
|
||||
.devices = pluginDevices,
|
||||
.getProperties = pluginGetProperties,
|
||||
.listen = pluginListen,
|
||||
.connect = pluginConnect,
|
||||
.accept = pluginAccept,
|
||||
.regMr = pluginRegMr,
|
||||
.regMrDmaBuf = pluginRegMrDmaBuf,
|
||||
.deregMr = pluginDeregMr,
|
||||
.isend = pluginIsend,
|
||||
.irecv = pluginIrecv,
|
||||
.iflush = pluginIflush,
|
||||
.test = pluginTest,
|
||||
.closeSend = pluginCloseSend,
|
||||
.closeRecv = pluginCloseRecv,
|
||||
.closeListen = pluginCloseListen,
|
||||
};
|
||||
|
||||
/* v5 Compat */
|
||||
const ncclNet_v5_t ncclNetPlugin_v5 = {
|
||||
.name = PLUGIN_NAME,
|
||||
.init = pluginInit,
|
||||
.devices = pluginDevices,
|
||||
.getProperties = pluginGetProperties,
|
||||
.listen = pluginListen,
|
||||
.connect = pluginConnect,
|
||||
.accept = pluginAccept,
|
||||
.regMr = pluginRegMr,
|
||||
.deregMr = pluginDeregMr,
|
||||
.isend = pluginIsend,
|
||||
.irecv = pluginIrecv,
|
||||
.iflush = pluginIflush,
|
||||
.test = pluginTest,
|
||||
.closeSend = pluginCloseSend,
|
||||
.closeRecv = pluginCloseRecv,
|
||||
.closeListen = pluginCloseListen,
|
||||
};
|
||||
|
||||
/* v4 Compat */
|
||||
static ncclResult_t pluginGetProperties_v4(int dev, ncclNetProperties_v4_t* props) {
|
||||
ncclNetProperties_v6_t props_v6;
|
||||
ncclResult_t ret = pluginGetProperties(dev, &props_v6);
|
||||
if (ret != ncclSuccess) return ret;
|
||||
props->name = props_v6.name;
|
||||
props->pciPath = props_v6.pciPath;
|
||||
props->guid = props_v6.guid;
|
||||
props->ptrSupport = props_v6.ptrSupport;
|
||||
props->speed = props_v6.speed;
|
||||
props->port = props_v6.port;
|
||||
props->maxComms = props_v6.maxComms;
|
||||
return ncclSuccess;
|
||||
}
|
||||
static ncclResult_t pluginIsend_v4(void *sendComm, void* data, int size, void *mhandle, void** request) {
|
||||
return pluginIsend(sendComm, data, size, 0, mhandle, request);
|
||||
}
|
||||
static ncclResult_t pluginIrecv_v4(void* recvComm, void* data, int size, void* mhandle, void** request) {
|
||||
int tag = 0;
|
||||
return pluginIrecv(recvComm, 1, &data, &size, &tag, &mhandle, request);
|
||||
}
|
||||
static ncclResult_t pluginIflush_v4(void* recvComm, void* data, int size, void* mhandle, void** request) {
|
||||
return pluginIflush(recvComm, 1, &data, &size, &mhandle, request);
|
||||
}
|
||||
static ncclResult_t pluginConnect_v4(int dev, void* handle, void** sendComm) {
|
||||
ncclResult_t ret;
|
||||
do {
|
||||
ret = pluginConnect(dev, handle, sendComm);
|
||||
} while (ret == ncclSuccess && *sendComm == NULL);
|
||||
return ret;
|
||||
}
|
||||
static ncclResult_t pluginAccept_v4(void* listenComm, void** recvComm) {
|
||||
ncclResult_t ret;
|
||||
do {
|
||||
ret = pluginAccept(listenComm, recvComm);
|
||||
} while (ret == ncclSuccess && *recvComm == NULL);
|
||||
return ret;
|
||||
}
|
||||
const ncclNet_v4_t ncclNetPlugin_v4 = {
|
||||
.name = PLUGIN_NAME,
|
||||
.init = pluginInit,
|
||||
.devices = pluginDevices,
|
||||
.getProperties = pluginGetProperties_v4,
|
||||
.listen = pluginListen,
|
||||
.connect = pluginConnect_v4,
|
||||
.accept = pluginAccept_v4,
|
||||
.regMr = pluginRegMr,
|
||||
.deregMr = pluginDeregMr,
|
||||
.isend = pluginIsend_v4,
|
||||
.irecv = pluginIrecv_v4,
|
||||
.iflush = pluginIflush_v4,
|
||||
.test = pluginTest,
|
||||
.closeSend = pluginCloseSend,
|
||||
.closeRecv = pluginCloseRecv,
|
||||
.closeListen = pluginCloseListen,
|
||||
};
|
||||
|
||||
/* v3 Compat */
|
||||
static ncclResult_t pluginFlush(void* recvComm, void* data, int size, void* mhandle) {
|
||||
void* req;
|
||||
ncclResult_t ret = pluginIflush_v4(recvComm, data, size, mhandle, &req);
|
||||
int done = 0;
|
||||
while (ret == ncclSuccess && done == 0) {
|
||||
ret = pluginTest(req, &done, NULL);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
static ncclResult_t pluginInit_v3(ncclDebugLogger_t logFunction) {
|
||||
max_requests = NCCL_NET_MAX_REQUESTS_V3;
|
||||
return pluginInit(logFunction);
|
||||
}
|
||||
#include <string.h>
|
||||
static ncclResult_t pluginListen_v3(int dev, void* handle, void** listenComm) {
|
||||
char pluginHandle[NCCL_NET_HANDLE_MAXSIZE];
|
||||
ncclResult_t ret = pluginListen(dev, &pluginHandle, listenComm);
|
||||
memcpy(handle, &pluginHandle, NCCL_NET_HANDLE_MAXSIZE_V3);
|
||||
return ret;
|
||||
}
|
||||
static ncclResult_t pluginConnect_v3(int dev, void* handle, void** sendComm) {
|
||||
char pluginHandle[NCCL_NET_HANDLE_MAXSIZE];
|
||||
memcpy(&pluginHandle, handle, NCCL_NET_HANDLE_MAXSIZE_V3);
|
||||
return pluginConnect_v4(dev, &pluginHandle, sendComm);
|
||||
}
|
||||
const ncclNet_v3_t ncclNetPlugin_v3 = {
|
||||
.name = PLUGIN_NAME,
|
||||
.init = pluginInit_v3,
|
||||
.devices = pluginDevices,
|
||||
.getProperties = pluginGetProperties_v4,
|
||||
.listen = pluginListen_v3,
|
||||
.connect = pluginConnect_v3,
|
||||
.accept = pluginAccept_v4,
|
||||
.regMr = pluginRegMr,
|
||||
.deregMr = pluginDeregMr,
|
||||
.isend = pluginIsend_v4,
|
||||
.irecv = pluginIrecv_v4,
|
||||
.flush = pluginFlush,
|
||||
.test = pluginTest,
|
||||
.closeSend = pluginCloseSend,
|
||||
.closeRecv = pluginCloseRecv,
|
||||
.closeListen = pluginCloseListen,
|
||||
};
|
||||
|
||||
/* v2 Compat */
|
||||
const ncclNet_v2_t ncclNetPlugin_v2 = {
|
||||
.name = PLUGIN_NAME,
|
||||
.init = pluginInit_v3,
|
||||
.devices = pluginDevices,
|
||||
.pciPath = pluginPciPath,
|
||||
.ptrSupport = pluginPtrSupport,
|
||||
.listen = pluginListen,
|
||||
.connect = pluginConnect_v4,
|
||||
.accept = pluginAccept_v4,
|
||||
.regMr = pluginRegMr,
|
||||
.deregMr = pluginDeregMr,
|
||||
.isend = pluginIsend_v4,
|
||||
.irecv = pluginIrecv_v4,
|
||||
.flush = pluginFlush,
|
||||
.test = pluginTest,
|
||||
.closeSend = pluginCloseSend,
|
||||
.closeRecv = pluginCloseRecv,
|
||||
.closeListen = pluginCloseListen,
|
||||
};
|
Loading…
x
Reference in New Issue
Block a user