# NCCL Tuner Configuration File (CSV Format) # Format: collective_type,min_bytes,max_bytes,algorithm,protocol,channels,nNodes,nRanks,numPipeOps,regBuff # # Collective types: broadcast, reduce, allgather, reducescatter, allreduce # Algorithms: tree, ring, collnet_direct, collnet_chain, nvls, nvls_tree, pat # Protocols: ll, ll128, simple # Channels: number of channels to use, or -1 to keep default # nNodes: number of nodes to match, or -1 for any number of nodes # nRanks: number of ranks to match, or -1 for any number of ranks # numPipeOps: number of pipeline operations to match, or -1 for any number (optional) # regBuff: whether user buffer can be registered (0=no, 1=yes, -1=any) (optional) # # Note: numPipeOps and regBuff parameters are optional - configurations without them will match any value # # Examples: # For single-node configurations with registered buffers # Small allreduce operations on single node - use tree algorithm, registered buffers allreduce,0,65536,tree,simple,2,1,-1,-1,1 # For multi-node configurations with 4 nodes, 32 total ranks, single pipeline op, non-registered buffers # Medium allreduce operations - use ring algorithm allreduce,65537,1048576,ring,simple,4,4,32,1,0 # For any topology - large allreduce operations with LL128 protocol, multiple pipeline ops, any buffer type allreduce,1048577,4294967295,ring,ll128,-1,-1,-1,4,-1 # Broadcast operations - different configs for different topologies, pipeline complexity, and buffer types # Single node broadcast - prefer tree, any pipeOps, registered buffers only broadcast,0,32768,tree,simple,-1,1,-1,-1,1 # Multi-node broadcast with single pipeline operation, non-registered buffers - use ring broadcast,32769,4294967295,ring,simple,2,-1,-1,1,0 # AllGather operations - optimized for 2-node configurations, any pipeOps, any buffer type allgather,0,4294967295,ring,simple,4,2,-1 # ReduceScatter operations # Small messages on single node, single pipeline op, registered buffers reducescatter,0,131072,tree,simple,2,1,-1,1,1 # Large messages on any topology, multiple pipeline ops, non-registered buffers reducescatter,131073,4294967295,ring,simple,-1,-1,-1,2,0 # Reduce operations - any topology, keep default channels, any pipeOps, any buffer type reduce,0,4294967295,tree,simple,-1,-1,-1