/************************************************************************* * Unit tests for NCCL Tuner Plugin ************************************************************************/ #define _GNU_SOURCE // Enable setenv/unsetenv and other GNU extensions #include #include #include #include #include #include #include // Include NCCL tuner header (which includes common.h and err.h) #include "tuner.h" // Include plugin source for testing #include "../plugin.c" // Test framework macros #define TEST_ASSERT(condition, message) \ do { \ if (!(condition)) { \ printf("FAIL: %s - %s\n", __func__, message); \ return 0; \ } \ } while(0) #define TEST_PASS() \ do { \ printf("PASS: %s\n", __func__); \ return 1; \ } while(0) // Global test state static int test_log_count = 0; // Mock logger function void mock_logger(ncclDebugLogLevel level, unsigned long flags, const char* file, int line, const char* fmt, ...) { (void)flags; // Suppress unused parameter warning test_log_count++; // Check if we should print based on NCCL_DEBUG level const char* debug_level = getenv("NCCL_DEBUG"); int should_print = 0; if (debug_level) { if (strcmp(debug_level, "TRACE") == 0) { should_print = 1; // Print everything } else if (strcmp(debug_level, "INFO") == 0 && level <= NCCL_LOG_INFO) { should_print = 1; // Print INFO and below } else if (strcmp(debug_level, "WARN") == 0 && level <= NCCL_LOG_WARN) { should_print = 1; // Print WARN and below } } if (!should_print) return; // Convert log level to string const char* level_str; switch(level) { case NCCL_LOG_NONE: level_str = "NONE"; break; case NCCL_LOG_VERSION: level_str = "VERSION"; break; case NCCL_LOG_WARN: level_str = "WARN"; break; case NCCL_LOG_INFO: level_str = "INFO"; break; case NCCL_LOG_ABORT: level_str = "ABORT"; break; case NCCL_LOG_TRACE: level_str = "TRACE"; break; default: level_str = "UNKNOWN"; break; } // Print log header printf("[TUNER:%s:%s:%d] ", level_str, file, line); // Print formatted message va_list args; va_start(args, fmt); vprintf(fmt, args); va_end(args); printf("\n"); } // Helper function to create test config file void create_test_config(const char* filename, const char* content) { FILE* f = fopen(filename, "w"); if (f) { fprintf(f, "%s", content); fclose(f); } } // Test 1: Plugin initialization int test_plugin_init() { void* context = NULL; // Test successful initialization ncclResult_t result = pluginInit(8, 2, mock_logger, &context); TEST_ASSERT(result == ncclSuccess, "Plugin init should succeed"); TEST_ASSERT(context != NULL, "Context should be allocated"); // Clean up pluginDestroy(context); TEST_PASS(); } // Test 2: Configuration file parsing - valid CSV int test_config_parsing_valid() { const char* test_config = "# Test configuration\n" "allreduce,0,65536,tree,simple,2,1,-1,-1,-1\n" "broadcast,0,32768,ring,ll128,4,2,16,-1,-1\n" "# Comment line\n" "\n" // Empty line "reduce,1024,2048,tree,simple,-1,-1,-1,-1,-1\n"; create_test_config("test_valid.conf", test_config); // Set environment variable to use our test config setenv("NCCL_TUNER_CONFIG_FILE", "test_valid.conf", 1); void* context = NULL; ncclResult_t result = pluginInit(16, 2, mock_logger, &context); TEST_ASSERT(result == ncclSuccess, "Plugin init with valid config should succeed"); // Clean up pluginDestroy(context); unlink("test_valid.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 3: Configuration file parsing - invalid CSV int test_config_parsing_invalid() { const char* test_config = "allreduce,0,65536,tree,simple,2,1 # Missing nRanks and other fields\n" "invalid_collective,0,1024,ring,simple,1,1,1,-1,-1\n" "broadcast,abc,def,ring,simple,1,1,1,-1,-1\n"; // Invalid numbers create_test_config("test_invalid.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_invalid.conf", 1); void* context = NULL; ncclResult_t result = pluginInit(8, 1, mock_logger, &context); // Should still succeed but with no valid configs loaded TEST_ASSERT(result == ncclSuccess, "Plugin init should succeed even with invalid config"); // Clean up pluginDestroy(context); unlink("test_invalid.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 4: Collective type matching int test_collective_matching() { const char* test_config = "allreduce,0,65536,tree,simple,8,1,-1,-1,-1\n" "broadcast,0,32768,ring,ll128,4,-1,-1,-1,-1\n"; create_test_config("test_match.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_match.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); // Create mock cost table float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; // Default high cost } } int nChannels; // Test allreduce matching (should match first config) ncclResult_t result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(result == ncclSuccess, "GetCollInfo should succeed"); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Checking cost_table[TREE][SIMPLE] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE]); TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Tree/Simple should have low cost"); TEST_ASSERT(nChannels == 8, "Should set 8 channels"); // Test broadcast matching (should match second config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; // Reset costs } } result = pluginGetCollInfo(context, ncclFuncBroadcast, 16384, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(result == ncclSuccess, "GetCollInfo should succeed"); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Checking cost_table[RING][LL128] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128], cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128]); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Ring/LL128 should have low cost"); TEST_ASSERT(nChannels == 4, "Should set 4 channels"); // Clean up pluginDestroy(context); unlink("test_match.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 5: Size range matching int test_size_matching() { const char* test_config = "allreduce,0,1024,tree,simple,2,-1,-1,-1,-1\n" "allreduce,1025,65536,ring,simple,4,-1,-1,-1,-1\n" "allreduce,65537,4294967295,ring,ll128,8,-1,-1,-1,-1\n"; create_test_config("test_size.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_size.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } int nChannels = 1; pluginGetCollInfo(context, ncclFuncAllReduce, 512, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Small message - checking cost_table[TREE][SIMPLE] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE]); TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Small: Tree/Simple should have low cost"); TEST_ASSERT(nChannels == 2, "Small: Should set 2 channels"); // Test medium message (should match second config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Medium message - checking cost_table[RING][SIMPLE] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE]); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Medium: Ring/Simple should have low cost"); TEST_ASSERT(nChannels == 4, "Medium: Should set 4 channels"); // Test large message (should match third config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 1048576, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Large message - checking cost_table[RING][LL128] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128], cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128]); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Large: Ring/LL128 should have low cost"); TEST_ASSERT(nChannels == 8, "Large: Should set 8 channels"); // Clean up pluginDestroy(context); unlink("test_size.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 6: Topology matching int test_topology_matching() { const char* test_config = "allreduce,0,65536,tree,simple,2,1,-1,-1,-1\n" // Single node only "allreduce,0,65536,ring,simple,4,4,32,-1,-1\n" // 4 nodes, 32 ranks exactly "allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any topology create_test_config("test_topo.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_topo.conf", 1); // Test with single node setup void* context1 = NULL; pluginInit(8, 1, mock_logger, &context1); // 8 ranks, 1 node float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } int nChannels; pluginGetCollInfo(context1, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Single node: Should match tree config"); TEST_ASSERT(nChannels == 2, "Single node: Should set 2 channels"); pluginDestroy(context1); // Test with 4 nodes, 32 ranks setup void* context2 = NULL; pluginInit(32, 4, mock_logger, &context2); // 32 ranks, 4 nodes for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context2, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "4-node: Should match ring/simple config"); TEST_ASSERT(nChannels == 4, "4-node: Should set 4 channels"); // Clean up unlink("test_topo.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 7: Default channels behavior (-1) int test_default_channels() { const char* test_config = "allreduce,0,65536,tree,simple,-1,-1,-1,-1,-1\n"; // Use default channels create_test_config("test_default.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_default.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } int nChannels = 99; // Set to known value pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Should apply algorithm/protocol"); TEST_ASSERT(nChannels == 1, "Should keep default channels (1) when config has -1"); // Clean up pluginDestroy(context); unlink("test_default.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 8: regBuff matching int test_regbuff_matching() { const char* test_config = "allreduce,0,65536,tree,simple,2,-1,-1,-1,1\n" // Registered buffers only "allreduce,0,65536,ring,simple,4,-1,-1,-1,0\n" // Non-registered buffers only "allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any buffer type (backward compatible) create_test_config("test_regbuff.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_regbuff.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; } int nChannels; // Test registered buffer (should match first config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 1, &nChannels); // regBuff = 1 (registered) TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Registered buffer: Tree/Simple should have low cost"); TEST_ASSERT(nChannels == 2, "Registered buffer: Should set 2 channels"); // Test non-registered buffer (should match second config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); // regBuff = 0 (non-registered) TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Non-registered buffer: Ring/Simple should have low cost"); TEST_ASSERT(nChannels == 4, "Non-registered buffer: Should set 4 channels"); // Test backward compatibility - config without regBuff should match any regBuff value for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } // First try with regBuff=2 (unusual value, should match third config) pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 2, &nChannels); // regBuff = 2 (only third config should match) TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Any regBuff: Ring/LL128 should have low cost"); TEST_ASSERT(nChannels == 8, "Any regBuff: Should set 8 channels"); // Clean up pluginDestroy(context); unlink("test_regbuff.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 9: numPipeOps matching int test_pipeops_matching() { const char* test_config = "allreduce,0,65536,tree,simple,2,-1,-1,1,-1\n" // Single pipeline op "allreduce,0,65536,ring,simple,4,-1,-1,4,-1\n" // Multiple pipeline ops "allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any pipeline ops (backward compatible) create_test_config("test_pipeops.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_pipeops.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; } int nChannels; // Test single pipeline op (should match first config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Single pipeOp: Tree/Simple should have low cost"); TEST_ASSERT(nChannels == 2, "Single pipeOp: Should set 2 channels"); // Test multiple pipeline ops (should match second config) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 4, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Multiple pipeOps: Ring/Simple should have low cost"); TEST_ASSERT(nChannels == 4, "Multiple pipeOps: Should set 4 channels"); // Test different number of pipeline ops (should match third config - backward compatible) for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 2, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Any pipeOps: Ring/LL128 should have low cost"); TEST_ASSERT(nChannels == 8, "Any pipeOps: Should set 8 channels"); // Clean up pluginDestroy(context); unlink("test_pipeops.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 10: No matching configuration (fallback behavior) int test_no_match_fallback() { const char* test_config = "broadcast,0,1024,tree,simple,2,-1,-1,-1,-1\n"; // Only broadcast config create_test_config("test_fallback.conf", test_config); setenv("NCCL_TUNER_CONFIG_FILE", "test_fallback.conf", 1); void* context = NULL; pluginInit(8, 1, mock_logger, &context); float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } int nChannels; // Try allreduce (should not match, use fallback) pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "DEBUG: Fallback test - checking cost_table[RING][SIMPLE] (%p) = %.1f (expecting 0.0)", &cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE]); TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 1.0, "Should use pass through unmodified"); TEST_ASSERT(nChannels == 1, "Should use default channels"); // Clean up pluginDestroy(context); unlink("test_fallback.conf"); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 11: Large configuration files (testing dynamic allocation) int test_large_config() { const char* large_config_file = "test_large.conf"; // Create a large configuration file with many entries // This tests the dynamic allocation functionality FILE* f = fopen(large_config_file, "w"); TEST_ASSERT(f != NULL, "Should be able to create large config file"); // Write header comment fprintf(f, "# Large configuration file for testing dynamic allocation\n"); fprintf(f, "# This file contains many configurations to test memory allocation\n"); // Generate a large number of configurations (much more than the old MAX_CONFIGS=100) const int num_configs = 500; // 5x the old static limit const char* collectives[] = {"allreduce", "broadcast", "reduce", "allgather", "reducescatter"}; const char* algorithms[] = {"tree", "ring", "collnet_direct", "nvls"}; const char* protocols[] = {"simple", "ll", "ll128"}; for (int i = 0; i < num_configs; i++) { // Vary the configurations to create realistic test data const char* coll = collectives[i % 5]; const char* algo = algorithms[i % 4]; const char* proto = protocols[i % 3]; size_t min_bytes = (i * 1024) % 1048576; // Vary from 0 to 1MB size_t max_bytes = min_bytes + 65536; // 64KB range int channels = (i % 8) + 1; // 1-8 channels int nodes = (i % 4) == 0 ? -1 : (i % 4); // Mix of -1 and 1-3 nodes int ranks = (i % 8) == 0 ? -1 : (i % 32) + 1; // Mix of -1 and 1-32 ranks int pipeOps = (i % 3) == 0 ? -1 : (i % 4) + 1; // Mix of -1 and 1-4 pipeOps int regBuff = (i % 3) == 0 ? -1 : (i % 2); // Mix of -1, 0, 1 fprintf(f, "%s,%zu,%zu,%s,%s,%d,%d,%d,%d,%d\n", coll, min_bytes, max_bytes, algo, proto, channels, nodes, ranks, pipeOps, regBuff); } fclose(f); // Set environment to use our large config file setenv("NCCL_TUNER_CONFIG_FILE", large_config_file, 1); // Initialize plugin with large config void* context = NULL; ncclResult_t result = pluginInit(16, 4, mock_logger, &context); TEST_ASSERT(result == ncclSuccess, "Plugin init with large config should succeed"); TEST_ASSERT(context != NULL, "Context should be allocated"); // Verify that configurations were loaded TunerContext* ctx = (TunerContext*)context; TEST_ASSERT(ctx->numConfigs == num_configs, "Should load all configurations from large file"); TEST_ASSERT(ctx->maxConfigs == num_configs, "maxConfigs should match allocated size"); TEST_ASSERT(ctx->configs != NULL, "Configs array should be dynamically allocated"); // Test that we can access configurations throughout the array // (This would have failed with the old static MAX_CONFIGS=100 limit) for (int i = 0; i < ctx->numConfigs; i++) { TuningConfig* config = &ctx->configs[i]; // Basic sanity checks on the loaded configurations TEST_ASSERT(config->collType >= ncclFuncBroadcast && config->collType <= ncclFuncAllReduce, "Collective type should be valid"); TEST_ASSERT(config->maxBytes >= config->minBytes, "maxBytes should be >= minBytes"); TEST_ASSERT(config->nChannels > 0, "nChannels should be positive"); } // Test specific configuration access at various indices // Index 0 (first config) TuningConfig* first_config = &ctx->configs[0]; TEST_ASSERT(first_config != NULL, "First config should be accessible"); // Index in middle TuningConfig* mid_config = &ctx->configs[num_configs / 2]; TEST_ASSERT(mid_config != NULL, "Middle config should be accessible"); // Index near end (this would have crashed with static array of 100) TuningConfig* late_config = &ctx->configs[num_configs - 1]; TEST_ASSERT(late_config != NULL, "Last config should be accessible"); // Test memory allocation size - verify we didn't over-allocate mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "Successfully loaded %d configurations (dynamic allocation)", ctx->numConfigs); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "Memory allocated for %d configurations (%zu bytes total)", ctx->maxConfigs, ctx->maxConfigs * sizeof(TuningConfig)); // Test that the plugin can still find matching configurations from the large set float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; // Default high cost } } int nChannels; // Try to find a matching configuration - should work with large config set result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(result == ncclSuccess, "GetCollInfo should work with large config set"); // Clean up pluginDestroy(context); unlink(large_config_file); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 12: Very large configuration stress test int test_very_large_config_stress() { const char* stress_config_file = "test_stress.conf"; // Create an even larger configuration file to stress test the implementation FILE* f = fopen(stress_config_file, "w"); TEST_ASSERT(f != NULL, "Should be able to create stress test config file"); fprintf(f, "# Stress test configuration with very large number of entries\n"); // Generate an extremely large number of configurations const int stress_configs = 2000; // 20x the old static limit for (int i = 0; i < stress_configs; i++) { // Create varied but valid configurations fprintf(f, "allreduce,%d,%d,ring,simple,4,-1,-1,-1,-1\n", i * 512, (i * 512) + 1024); } fclose(f); setenv("NCCL_TUNER_CONFIG_FILE", stress_config_file, 1); // Test initialization with stress config void* context = NULL; ncclResult_t result = pluginInit(8, 2, mock_logger, &context); TEST_ASSERT(result == ncclSuccess, "Plugin should handle very large config files"); TunerContext* ctx = (TunerContext*)context; TEST_ASSERT(ctx->numConfigs == stress_configs, "Should load all stress test configurations"); TEST_ASSERT(ctx->configs != NULL, "Stress test configs should be allocated"); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "Stress test - loaded %d configurations successfully", stress_configs); mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__, "Memory usage: %zu bytes for configuration array", stress_configs * sizeof(TuningConfig)); // Verify we can access configurations throughout the entire range for (int i = 0; i < stress_configs; i += 100) { // Sample every 100th config TuningConfig* config = &ctx->configs[i]; TEST_ASSERT(config->collType == ncclFuncAllReduce, "Config should have correct collective type"); TEST_ASSERT(config->minBytes == (size_t)(i * 512), "Config should have correct minBytes"); } // Clean up pluginDestroy(context); unlink(stress_config_file); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test 13: Edge case - empty config file int test_empty_config() { const char* empty_config_file = "test_empty.conf"; // Create empty config file (only comments) create_test_config(empty_config_file, "# Empty configuration file\n" "# No actual configurations\n" "\n" "\n"); setenv("NCCL_TUNER_CONFIG_FILE", empty_config_file, 1); void* context = NULL; ncclResult_t result = pluginInit(8, 2, mock_logger, &context); TEST_ASSERT(result == ncclSuccess, "Plugin should handle empty config files"); TunerContext* ctx = (TunerContext*)context; TEST_ASSERT(ctx->numConfigs == 0, "Should have zero configurations"); TEST_ASSERT(ctx->maxConfigs == 0, "Should have zero max configurations"); TEST_ASSERT(ctx->configs == NULL, "Should not allocate memory for empty config"); // Test that plugin still works with no configurations (fallback behavior) float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]; float* cost_table_ptr[NCCL_NUM_ALGORITHMS]; for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) { cost_table_ptr[i] = cost_table[i]; for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) { cost_table[i][j] = 1.0; } } int nChannels; result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1, cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS, 0, &nChannels); TEST_ASSERT(result == ncclSuccess, "GetCollInfo should work with empty config"); // Clean up pluginDestroy(context); unlink(empty_config_file); unsetenv("NCCL_TUNER_CONFIG_FILE"); TEST_PASS(); } // Test runner function pointer type typedef int (*TestFunction)(void); // Test registry typedef struct { const char* name; TestFunction func; const char* description; } TestCase; // All available tests TestCase test_cases[] = { {"init", test_plugin_init, "Plugin initialization"}, {"config-valid", test_config_parsing_valid, "Valid configuration parsing"}, {"config-invalid", test_config_parsing_invalid, "Invalid configuration parsing"}, {"collective", test_collective_matching, "Collective type matching"}, {"size", test_size_matching, "Size range matching"}, {"topology", test_topology_matching, "Topology matching"}, {"channels", test_default_channels, "Default channels behavior"}, {"regbuff", test_regbuff_matching, "Registered buffer matching"}, {"pipeops", test_pipeops_matching, "Pipeline operations matching"}, {"fallback", test_no_match_fallback, "Fallback behavior"}, {"large-config", test_large_config, "Large configuration files (dynamic allocation)"}, {"stress-config", test_very_large_config_stress, "Very large configuration stress test"}, {"empty-config", test_empty_config, "Empty configuration file handling"}, {NULL, NULL, NULL} // End marker }; // Show help/usage information void show_help(const char* program_name) { printf("Usage: %s [test_name ...]\n\n", program_name); printf("Available tests:\n"); for (int i = 0; test_cases[i].name != NULL; i++) { printf(" %-15s - %s\n", test_cases[i].name, test_cases[i].description); } printf("\nExamples:\n"); printf(" %s # Run all tests\n", program_name); printf(" %s init # Run only initialization test\n", program_name); printf(" %s init collective # Run initialization and collective tests\n", program_name); printf(" %s --help # Show this help\n", program_name); } // Find test by name TestFunction find_test(const char* name) { for (int i = 0; test_cases[i].name != NULL; i++) { if (strcmp(test_cases[i].name, name) == 0) { return test_cases[i].func; } } return NULL; } // Main test runner int main(int argc, char* argv[]) { int passed = 0, total = 0; // Check for help if (argc > 1 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) { show_help(argv[0]); return 0; } printf("Running NCCL Tuner Plugin Unit Tests\n"); printf("=====================================\n"); if (argc == 1) { // No arguments - run all tests for (int i = 0; test_cases[i].name != NULL; i++) { total++; passed += test_cases[i].func(); } } else { // Run specific tests for (int arg = 1; arg < argc; arg++) { TestFunction test_func = find_test(argv[arg]); if (test_func) { total++; passed += test_func(); } else { printf("ERROR: Unknown test '%s'\n", argv[arg]); printf("Use --help to see available tests\n"); return 1; } } } printf("\n=====================================\n"); printf("Test Results: %d/%d tests passed\n", passed, total); if (passed == total) { printf("All tests PASSED!\n"); return 0; } else { printf("Some tests FAILED!\n"); return 1; } }