Improvements for GB200 systems * Optimize the network performance by alternating the direction of the rings and the NIC to GPU assignment across communicators to limit unnecessary sharing. * Fix the detection of C2C links in case GPU Direct RDMA is disabled between a GPU and a NIC. * Fix PXN support on MNNVL systems, where NCCL would try (and fail) to share regular host memory across multiple nodes. * Fix P2C (PXN over C2C), which is now preferred over regular PXN. This support is currently preliminary and is disabled by default; use NCCL_PXN_C2C=1 to enable. Further reduce the overheads of CUDA graph capturing, which increased in NCCL 2.26.2 for large graphs. Optimize the network performance on DGX B200 systems by adjusting the bandwidths provided to the graph search algorithm. Enable fp8 reductions in symmetric kernels on Blackwell with CUDA 12.8. Restore the plugin name handling logic to make it possible to specify a path to the plugin (Issue #1732). Restore the ability to change NCCL_COLLNET_ENABLE during execution (Issue #1741). Add an example tuner plugin with CSV-based overrides. Remove an x86 dependency from the example profiler.
454 lines
18 KiB
C
454 lines
18 KiB
C
/*************************************************************************
|
|
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#include "tuner.h"
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#define __hidden __attribute__ ((visibility("hidden")))
|
|
#define MAX_LINE_LENGTH 256
|
|
|
|
// CSV field indices for configuration parsing
|
|
// Format: colltype,minbytes,maxbytes,algorithm,protocol,channels,nNodes,nRanks,numPipeOps,regBuff
|
|
#define CONFIG_FIELD_COLLTYPE 0
|
|
#define CONFIG_FIELD_MINBYTES 1
|
|
#define CONFIG_FIELD_MAXBYTES 2
|
|
#define CONFIG_FIELD_ALGORITHM 3
|
|
#define CONFIG_FIELD_PROTOCOL 4
|
|
#define CONFIG_FIELD_CHANNELS 5
|
|
#define CONFIG_FIELD_NNODES 6
|
|
#define CONFIG_FIELD_NRANKS 7
|
|
#define CONFIG_FIELD_PIPEOPS 8 // Optional field
|
|
#define CONFIG_FIELD_REGBUFF 9 // Optional field
|
|
|
|
// Field count constants
|
|
#define CONFIG_FIELDS_REQUIRED 8 // Minimum required fields (up to nRanks)
|
|
#define CONFIG_FIELDS_WITH_PIPEOPS 9 // Fields including numPipeOps
|
|
#define CONFIG_FIELDS_WITH_REGBUFF 10 // Fields including both numPipeOps and regBuff
|
|
#define CONFIG_FIELDS_MAX 10 // Maximum number of fields supported
|
|
|
|
typedef struct {
|
|
ncclFunc_t collType;
|
|
size_t minBytes;
|
|
size_t maxBytes;
|
|
int algorithm;
|
|
int protocol;
|
|
int nChannels;
|
|
int nNodes;
|
|
int nRanks;
|
|
int numPipeOps;
|
|
int regBuff;
|
|
} TuningConfig;
|
|
|
|
typedef struct {
|
|
TuningConfig* configs; // Changed from static array to dynamic pointer
|
|
int numConfigs;
|
|
int maxConfigs; // Added to track allocated size
|
|
size_t nRanks;
|
|
size_t nNodes;
|
|
ncclDebugLogger_t logFunction;
|
|
} TunerContext;
|
|
|
|
// Parse collective type from string
|
|
static ncclFunc_t parseCollType(const char* str) {
|
|
if (strcmp(str, "broadcast") == 0) return ncclFuncBroadcast;
|
|
if (strcmp(str, "reduce") == 0) return ncclFuncReduce;
|
|
if (strcmp(str, "allgather") == 0) return ncclFuncAllGather;
|
|
if (strcmp(str, "reducescatter") == 0) return ncclFuncReduceScatter;
|
|
if (strcmp(str, "allreduce") == 0) return ncclFuncAllReduce;
|
|
return ncclFuncAllReduce; // default
|
|
}
|
|
|
|
// Convert collective type to string
|
|
static const char* collTypeToString(ncclFunc_t collType) {
|
|
switch (collType) {
|
|
case ncclFuncBroadcast: return "broadcast";
|
|
case ncclFuncReduce: return "reduce";
|
|
case ncclFuncAllGather: return "allgather";
|
|
case ncclFuncReduceScatter: return "reducescatter";
|
|
case ncclFuncAllReduce: return "allreduce";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
// Parse algorithm from string
|
|
static int parseAlgorithm(const char* str) {
|
|
if (strcmp(str, "tree") == 0) return NCCL_ALGO_TREE;
|
|
if (strcmp(str, "ring") == 0) return NCCL_ALGO_RING;
|
|
if (strcmp(str, "collnet_direct") == 0) return NCCL_ALGO_COLLNET_DIRECT;
|
|
if (strcmp(str, "collnet_chain") == 0) return NCCL_ALGO_COLLNET_CHAIN;
|
|
if (strcmp(str, "nvls") == 0) return NCCL_ALGO_NVLS;
|
|
if (strcmp(str, "nvls_tree") == 0) return NCCL_ALGO_NVLS_TREE;
|
|
if (strcmp(str, "pat") == 0) return NCCL_ALGO_PAT;
|
|
return NCCL_ALGO_RING; // default
|
|
}
|
|
|
|
// Convert algorithm to string
|
|
static const char* algorithmToString(int algorithm) {
|
|
switch (algorithm) {
|
|
case NCCL_ALGO_TREE: return "tree";
|
|
case NCCL_ALGO_RING: return "ring";
|
|
case NCCL_ALGO_COLLNET_DIRECT: return "collnet_direct";
|
|
case NCCL_ALGO_COLLNET_CHAIN: return "collnet_chain";
|
|
case NCCL_ALGO_NVLS: return "nvls";
|
|
case NCCL_ALGO_NVLS_TREE: return "nvls_tree";
|
|
case NCCL_ALGO_PAT: return "pat";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
// Parse protocol from string
|
|
static int parseProtocol(const char* str) {
|
|
if (strcmp(str, "ll") == 0) return NCCL_PROTO_LL;
|
|
if (strcmp(str, "ll128") == 0) return NCCL_PROTO_LL128;
|
|
if (strcmp(str, "simple") == 0) return NCCL_PROTO_SIMPLE;
|
|
return NCCL_PROTO_SIMPLE; // default
|
|
}
|
|
|
|
// Convert protocol to string
|
|
static const char* protocolToString(int protocol) {
|
|
switch (protocol) {
|
|
case NCCL_PROTO_LL: return "ll";
|
|
case NCCL_PROTO_LL128: return "ll128";
|
|
case NCCL_PROTO_SIMPLE: return "simple";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
// Helper function to count valid configuration lines in file
|
|
static int countConfigLines(const char* filename) {
|
|
FILE* file = fopen(filename, "r");
|
|
if (!file) {
|
|
return 0;
|
|
}
|
|
|
|
char line[MAX_LINE_LENGTH];
|
|
int count = 0;
|
|
|
|
while (fgets(line, sizeof(line), file)) {
|
|
// Skip comments and empty lines
|
|
if (line[0] == '#' || line[0] == '\n') continue;
|
|
|
|
// Remove trailing newline
|
|
line[strcspn(line, "\n")] = 0;
|
|
|
|
// Check if line has content
|
|
if (strlen(line) > 0) {
|
|
count++;
|
|
}
|
|
}
|
|
|
|
fclose(file);
|
|
return count;
|
|
}
|
|
|
|
// Load configuration from file
|
|
static ncclResult_t loadConfig(TunerContext* ctx, const char* filename) {
|
|
FILE* file = fopen(filename, "r");
|
|
if (!file) {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Config file %s not found, using defaults", filename);
|
|
}
|
|
return ncclSuccess; // Not finding config file is not an error
|
|
}
|
|
|
|
// First pass: count valid configuration lines
|
|
int configCount = countConfigLines(filename);
|
|
if (configCount == 0) {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: No valid configurations found in %s", filename);
|
|
}
|
|
fclose(file);
|
|
return ncclSuccess;
|
|
}
|
|
|
|
// Allocate memory for configurations based on actual count
|
|
ctx->configs = (TuningConfig*)malloc(configCount * sizeof(TuningConfig));
|
|
if (!ctx->configs) {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Failed to allocate memory for %d configurations", configCount);
|
|
}
|
|
fclose(file);
|
|
return ncclSystemError;
|
|
}
|
|
|
|
ctx->maxConfigs = configCount;
|
|
ctx->numConfigs = 0;
|
|
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Allocated memory for %d configurations", configCount);
|
|
}
|
|
|
|
// Reset file pointer to beginning
|
|
fseek(file, 0, SEEK_SET);
|
|
|
|
char line[MAX_LINE_LENGTH];
|
|
|
|
while (fgets(line, sizeof(line), file) && ctx->numConfigs < ctx->maxConfigs) {
|
|
// Skip comments and empty lines
|
|
if (line[0] == '#' || line[0] == '\n') continue;
|
|
|
|
// Remove trailing newline
|
|
line[strcspn(line, "\n")] = 0;
|
|
|
|
// Parse CSV format: colltype,minbytes,maxbytes,algorithm,protocol,channels,nNodes,nRanks,numPipeOps,regBuff
|
|
char* token;
|
|
char* tokens[CONFIG_FIELDS_MAX];
|
|
int tokenCount = 0;
|
|
|
|
// Make a copy of the line for tokenizing
|
|
char lineCopy[MAX_LINE_LENGTH];
|
|
strncpy(lineCopy, line, sizeof(lineCopy));
|
|
lineCopy[sizeof(lineCopy) - 1] = '\0';
|
|
|
|
// Tokenize by comma
|
|
token = strtok(lineCopy, ",");
|
|
while (token != NULL && tokenCount < CONFIG_FIELDS_MAX) {
|
|
// Trim whitespace
|
|
while (*token == ' ' || *token == '\t') token++;
|
|
char* end = token + strlen(token) - 1;
|
|
while (end > token && (*end == ' ' || *end == '\t')) {
|
|
*end = '\0';
|
|
end--;
|
|
}
|
|
tokens[tokenCount++] = token;
|
|
token = strtok(NULL, ",");
|
|
}
|
|
|
|
// Validate field count: support required fields (8), with pipeOps (9), or with regBuff (10)
|
|
if (tokenCount >= CONFIG_FIELDS_REQUIRED && tokenCount <= CONFIG_FIELDS_MAX) {
|
|
TuningConfig* config = &ctx->configs[ctx->numConfigs];
|
|
config->collType = parseCollType(tokens[CONFIG_FIELD_COLLTYPE]);
|
|
config->minBytes = (size_t)strtoull(tokens[CONFIG_FIELD_MINBYTES], NULL, 10);
|
|
config->maxBytes = (size_t)strtoull(tokens[CONFIG_FIELD_MAXBYTES], NULL, 10);
|
|
config->algorithm = parseAlgorithm(tokens[CONFIG_FIELD_ALGORITHM]);
|
|
config->protocol = parseProtocol(tokens[CONFIG_FIELD_PROTOCOL]);
|
|
config->nChannels = atoi(tokens[CONFIG_FIELD_CHANNELS]);
|
|
config->nNodes = atoi(tokens[CONFIG_FIELD_NNODES]);
|
|
config->nRanks = atoi(tokens[CONFIG_FIELD_NRANKS]);
|
|
|
|
// numPipeOps is optional (9th field, index 8)
|
|
if (tokenCount >= CONFIG_FIELDS_WITH_PIPEOPS) {
|
|
config->numPipeOps = atoi(tokens[CONFIG_FIELD_PIPEOPS]);
|
|
} else {
|
|
config->numPipeOps = -1; // -1 means match any numPipeOps
|
|
}
|
|
|
|
// regBuff is optional (10th field, index 9)
|
|
if (tokenCount >= CONFIG_FIELDS_WITH_REGBUFF) {
|
|
config->regBuff = atoi(tokens[CONFIG_FIELD_REGBUFF]);
|
|
} else {
|
|
config->regBuff = -1; // -1 means match any regBuff value
|
|
}
|
|
|
|
ctx->numConfigs++;
|
|
|
|
if (ctx->logFunction) {
|
|
if (config->numPipeOps == -1 && config->regBuff == -1) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Loaded config: %s [%zu-%zu] %s/%s channels=%d nodes=%d ranks=%d pipeOps=any regBuff=any",
|
|
tokens[CONFIG_FIELD_COLLTYPE], config->minBytes, config->maxBytes,
|
|
tokens[CONFIG_FIELD_ALGORITHM], tokens[CONFIG_FIELD_PROTOCOL],
|
|
config->nChannels, config->nNodes, config->nRanks);
|
|
} else if (config->regBuff == -1) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Loaded config: %s [%zu-%zu] %s/%s channels=%d nodes=%d ranks=%d pipeOps=%d regBuff=any",
|
|
tokens[CONFIG_FIELD_COLLTYPE], config->minBytes, config->maxBytes,
|
|
tokens[CONFIG_FIELD_ALGORITHM], tokens[CONFIG_FIELD_PROTOCOL],
|
|
config->nChannels, config->nNodes, config->nRanks, config->numPipeOps);
|
|
} else if (config->numPipeOps == -1) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Loaded config: %s [%zu-%zu] %s/%s channels=%d nodes=%d ranks=%d pipeOps=any regBuff=%d",
|
|
tokens[CONFIG_FIELD_COLLTYPE], config->minBytes, config->maxBytes,
|
|
tokens[CONFIG_FIELD_ALGORITHM], tokens[CONFIG_FIELD_PROTOCOL],
|
|
config->nChannels, config->nNodes, config->nRanks, config->regBuff);
|
|
} else {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Loaded config: %s [%zu-%zu] %s/%s channels=%d nodes=%d ranks=%d pipeOps=%d regBuff=%d",
|
|
tokens[CONFIG_FIELD_COLLTYPE], config->minBytes, config->maxBytes,
|
|
tokens[CONFIG_FIELD_ALGORITHM], tokens[CONFIG_FIELD_PROTOCOL],
|
|
config->nChannels, config->nNodes, config->nRanks, config->numPipeOps, config->regBuff);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fclose(file);
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Loaded %d tuning configurations from %s", ctx->numConfigs, filename);
|
|
}
|
|
return ncclSuccess;
|
|
}
|
|
|
|
__hidden ncclResult_t pluginInit(size_t nRanks, size_t nNodes, ncclDebugLogger_t logFunction, void **context) {
|
|
TunerContext* ctx = (TunerContext*)malloc(sizeof(TunerContext));
|
|
if (!ctx) return ncclSystemError;
|
|
|
|
ctx->configs = NULL; // Initialize to NULL
|
|
ctx->numConfigs = 0;
|
|
ctx->maxConfigs = 0; // Initialize to 0
|
|
ctx->nRanks = nRanks;
|
|
ctx->nNodes = nNodes;
|
|
ctx->logFunction = logFunction;
|
|
|
|
if (logFunction) {
|
|
logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Initializing tuner for %zu nodes, %zu ranks", nNodes, nRanks);
|
|
}
|
|
|
|
// Try to load config file from environment variable or default location
|
|
const char* configFile = getenv("NCCL_TUNER_CONFIG_FILE");
|
|
if (!configFile) {
|
|
configFile = "nccl_tuner.conf"; // default config file name
|
|
}
|
|
|
|
ncclResult_t result = loadConfig(ctx, configFile);
|
|
if (result != ncclSuccess) {
|
|
if (ctx->configs) {
|
|
free(ctx->configs); // Clean up allocated memory on error
|
|
}
|
|
free(ctx);
|
|
return result;
|
|
}
|
|
|
|
*context = ctx;
|
|
return ncclSuccess;
|
|
}
|
|
|
|
__hidden ncclResult_t pluginGetCollInfo(void* context, ncclFunc_t collType, size_t nBytes,
|
|
int numPipeOps, float** collCostTable, int numAlgo, int numProto,
|
|
int regBuff, int* nChannels) {
|
|
TunerContext* ctx = (TunerContext*)context;
|
|
if (!ctx) return ncclInternalError;
|
|
|
|
// Default channels
|
|
*nChannels = 1;
|
|
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_TRACE, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: pluginGetCollInfo called - collType=%s, nBytes=%zu, numPipeOps=%d, regBuff=%d, numConfigs=%d",
|
|
collTypeToString(collType), nBytes, numPipeOps, regBuff, ctx->numConfigs);
|
|
}
|
|
|
|
// Look for matching configuration
|
|
for (int i = 0; i < ctx->numConfigs; i++) {
|
|
TuningConfig* config = &ctx->configs[i];
|
|
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_TRACE, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Checking config %d - collType=%s, minBytes=%zu, maxBytes=%zu, algo=%s, proto=%s, nNodes=%d, nRanks=%d, numPipeOps=%d, regBuff=%d",
|
|
i, collTypeToString(config->collType), config->minBytes, config->maxBytes, algorithmToString(config->algorithm), protocolToString(config->protocol),
|
|
config->nNodes, config->nRanks, config->numPipeOps, config->regBuff);
|
|
}
|
|
|
|
// Check if this config matches the current collective, size range, topology, pipeline ops, and regBuff
|
|
if (config->collType == collType &&
|
|
nBytes >= config->minBytes &&
|
|
nBytes <= config->maxBytes &&
|
|
(config->nNodes == -1 || config->nNodes == (int)ctx->nNodes) &&
|
|
(config->nRanks == -1 || config->nRanks == (int)ctx->nRanks) &&
|
|
(config->numPipeOps == -1 || config->numPipeOps == numPipeOps) &&
|
|
(config->regBuff == -1 || config->regBuff == regBuff)) {
|
|
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_TRACE, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Config matches. Applying algo=%s, proto=%s, channels=%d",
|
|
algorithmToString(config->algorithm), protocolToString(config->protocol), config->nChannels);
|
|
}
|
|
|
|
// Check bounds
|
|
if (config->algorithm < numAlgo && config->protocol < numProto) {
|
|
if (collCostTable[config->algorithm][config->protocol] != NCCL_ALGO_PROTO_IGNORE) {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_TRACE, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Setting cost table[%s][%s] (%p) = 0.0 (was %.1f)",
|
|
algorithmToString(config->algorithm), protocolToString(config->protocol),
|
|
&collCostTable[config->algorithm][config->protocol], collCostTable[config->algorithm][config->protocol]);
|
|
}
|
|
collCostTable[config->algorithm][config->protocol] = 0.0; // Set low cost to prefer this configuration
|
|
|
|
// Only override channels if not set to -1 (keep default)
|
|
if (config->nChannels != -1) {
|
|
*nChannels = config->nChannels;
|
|
}
|
|
|
|
if (ctx->logFunction) {
|
|
if (config->nChannels == -1) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Applied config for collType=%s, bytes=%zu, pipeOps=%d, regBuff=%d: algo=%s, proto=%s, channels=default (nodes=%d, ranks=%d)",
|
|
collTypeToString(config->collType), nBytes, numPipeOps, regBuff, algorithmToString(config->algorithm), protocolToString(config->protocol),
|
|
config->nNodes, config->nRanks);
|
|
} else {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Applied config for collType=%s, bytes=%zu, pipeOps=%d, regBuff=%d: algo=%s, proto=%s, channels=%d (nodes=%d, ranks=%d)",
|
|
collTypeToString(config->collType), nBytes, numPipeOps, regBuff, algorithmToString(config->algorithm), protocolToString(config->protocol),
|
|
config->nChannels, config->nNodes, config->nRanks);
|
|
}
|
|
}
|
|
return ncclSuccess;
|
|
} else {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Algorithm/protocol combination [%s][%s] is marked as IGNORE",
|
|
algorithmToString(config->algorithm), protocolToString(config->protocol));
|
|
}
|
|
}
|
|
} else {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Algorithm/protocol out of bounds - algo=%s (max %d), proto=%s (max %d)",
|
|
algorithmToString(config->algorithm), numAlgo, protocolToString(config->protocol), numProto);
|
|
}
|
|
}
|
|
} else {
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: Config does not match - collType match=%d, size match=%d, nodes match=%d, ranks match=%d, pipeOps match=%d, regBuff match=%d",
|
|
config->collType == collType,
|
|
(nBytes >= config->minBytes && nBytes <= config->maxBytes),
|
|
(config->nNodes == -1 || config->nNodes == (int)ctx->nNodes),
|
|
(config->nRanks == -1 || config->nRanks == (int)ctx->nRanks),
|
|
(config->numPipeOps == -1 || config->numPipeOps == numPipeOps),
|
|
(config->regBuff == -1 || config->regBuff == regBuff));
|
|
}
|
|
}
|
|
}
|
|
|
|
// If no specific config found, apply default behavior
|
|
if (ctx->logFunction) {
|
|
ctx->logFunction(NCCL_LOG_INFO, NCCL_TUNING, __FILE__, __LINE__,
|
|
"TUNER/ExamplePlugin: No matching config found");
|
|
}
|
|
|
|
return ncclSuccess;
|
|
}
|
|
|
|
__hidden ncclResult_t pluginDestroy(void* context) {
|
|
if (context) {
|
|
TunerContext* ctx = (TunerContext*)context;
|
|
if (ctx->configs) {
|
|
free(ctx->configs); // Free dynamically allocated configs array
|
|
}
|
|
free(context);
|
|
}
|
|
return ncclSuccess;
|
|
}
|
|
|
|
#define PLUGIN_NAME "Example"
|
|
|
|
const ncclTuner_v4_t ncclTunerPlugin_v4 = {
|
|
.name = PLUGIN_NAME,
|
|
.init = pluginInit,
|
|
.getCollInfo = pluginGetCollInfo,
|
|
.destroy = pluginDestroy
|
|
};
|