nccl/ext-tuner/example/nccl_tuner.conf

# NCCL Tuner Configuration File (CSV Format)
# Format: collective_type,min_bytes,max_bytes,algorithm,protocol,channels,nNodes,nRanks,numPipeOps,regBuff
#
# Collective types: broadcast, reduce, allgather, reducescatter, allreduce
# Algorithms: tree, ring, collnet_direct, collnet_chain, nvls, nvls_tree, pat
# Protocols: ll, ll128, simple
# Channels: number of channels to use, or -1 to keep default
# nNodes: number of nodes to match, or -1 for any number of nodes
# nRanks: number of ranks to match, or -1 for any number of ranks
# numPipeOps: number of pipeline operations to match, or -1 for any number (optional)
# regBuff: whether user buffer can be registered (0=no, 1=yes, -1=any) (optional)
#
# Note: numPipeOps and regBuff parameters are optional - configurations without them will match any value
#
# Examples:

# For single-node configurations with registered buffers
# Small allreduce operations on single node - use tree algorithm, registered buffers
allreduce,0,65536,tree,simple,2,1,-1,-1,1

# For multi-node configurations with 4 nodes, 32 total ranks, single pipeline op, non-registered buffers
# Medium allreduce operations - use ring algorithm
allreduce,65537,1048576,ring,simple,4,4,32,1,0

# For any topology - large allreduce operations with LL128 protocol, multiple pipeline ops, any buffer type
allreduce,1048577,4294967295,ring,ll128,-1,-1,-1,4,-1

# Broadcast operations - different configs for different topologies, pipeline complexity, and buffer types
# Single node broadcast - prefer tree, any pipeOps, registered buffers only
broadcast,0,32768,tree,simple,-1,1,-1,-1,1

# Multi-node broadcast with single pipeline operation, non-registered buffers - use ring
broadcast,32769,4294967295,ring,simple,2,-1,-1,1,0

# AllGather operations - optimized for 2-node configurations, any pipeOps, any buffer type
allgather,0,4294967295,ring,simple,4,2,-1

# ReduceScatter operations
# Small messages on single node, single pipeline op, registered buffers
reducescatter,0,131072,tree,simple,2,1,-1,1,1
# Large messages on any topology, multiple pipeline ops, non-registered buffers
reducescatter,131073,4294967295,ring,simple,-1,-1,-1,2,0

# Reduce operations - any topology, keep default channels, any pipeOps, any buffer type
reduce,0,4294967295,tree,simple,-1,-1,-1