/************************************************************************* * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include "enqueue.h" #include "collectives.h" #include "nccl.h" NCCL_API(ncclResult_t, ncclReduce, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream); ncclResult_t ncclReduce(const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op, int root, ncclComm_t comm, cudaStream_t stream) { struct NvtxParamsReduce { size_t bytes; int root; ncclRedOp_t op; }; constexpr nvtxPayloadSchemaEntry_t ReduceSchema[] = { {0, NVTX_PAYLOAD_ENTRY_TYPE_SIZE, "Message size [bytes]"}, {0, NVTX_PAYLOAD_ENTRY_TYPE_INT, "Root", nullptr, 0, offsetof(NvtxParamsReduce, root)}, {0, NVTX_PAYLOAD_ENTRY_NCCL_REDOP, "Reduction operation", nullptr, 0, offsetof(NvtxParamsReduce, op)} }; NvtxParamsReduce payload{count * ncclTypeSize(datatype), root, op}; NVTX3_FUNC_WITH_PARAMS(Reduce, ReduceSchema, payload) struct ncclInfo info = { ncclFuncReduce, "Reduce", sendbuff, recvbuff, count, datatype, op, root, comm, stream, /* Args */ REDUCE_CHUNKSTEPS, REDUCE_SLICESTEPS }; return ncclEnqueueCheck(&info); }