nccl/src/init_nvtx.cc
Sylvain Jeaugey 28189e2df8 2.16.2-1
Add support for CUDA 12.0, drop Kepler (sm_35).
Support for H100 features.
Make socket code more robust and protected. Solves #555.
Improve performance on large CUDA graphs, reducing dependencies.
Reduce inter-socket bandwidth on AMD CPUs to favor better paths.
Various fixes to ncclCommAbort.
Make service thread polling resistant to EINTR.
Compile with profiling API by default.
Extend NVTX instrumentation with call arguments.
2022-11-30 02:31:59 -08:00

27 lines
832 B
C++

#include "nccl.h"
#include "nvtx.h"
static constexpr const nvtxPayloadEnum_t NvtxEnumRedSchema[] = {
{"Sum", ncclSum},
{"Product", ncclProd},
{"Max", ncclMax},
{"Min", ncclMin},
{"Avg", ncclAvg}
};
// Must be called before the first call to any reduction operation.
void initNvtxRegisteredEnums() {
// Register schemas and strings
constexpr const nvtxPayloadEnumAttr_t eAttr {
.fieldMask = NVTX_PAYLOAD_ENUM_ATTR_ENTRIES | NVTX_PAYLOAD_ENUM_ATTR_NUM_ENTRIES |
NVTX_PAYLOAD_ENUM_ATTR_SIZE | NVTX_PAYLOAD_ENUM_ATTR_SCHEMA_ID,
.name = NULL,
.entries = NvtxEnumRedSchema,
.numEntries = std::extent<decltype(NvtxEnumRedSchema)>::value,
.sizeOfEnum = sizeof(ncclRedOp_t),
.schemaId = NVTX_PAYLOAD_ENTRY_NCCL_REDOP
};
nvtxPayloadEnumRegister(nvtx3::domain::get<nccl_domain>(), &eAttr);
}