From 6c77476cc1aeb38a7666550813baaedde3dd32d1 Mon Sep 17 00:00:00 2001 From: Sylvain Jeaugey Date: Tue, 20 Sep 2016 11:48:34 -0700 Subject: [PATCH] Make tests check for deltas and report bandwidth --- test/include/test_utilities.h | 152 +++++++++++++++++++++++++++-- test/single/all_gather_test.cu | 19 +++- test/single/all_reduce_test.cu | 22 ++++- test/single/broadcast_test.cu | 17 +++- test/single/reduce_scatter_test.cu | 20 +++- test/single/reduce_test.cu | 20 +++- 6 files changed, 236 insertions(+), 14 deletions(-) diff --git a/test/include/test_utilities.h b/test/include/test_utilities.h index 52d7fd0..c194205 100644 --- a/test/include/test_utilities.h +++ b/test/include/test_utilities.h @@ -9,6 +9,7 @@ #define SRC_TEST_UTILITIES_H_ #include +#include #include #define CUDACHECK(cmd) do { \ @@ -135,6 +136,27 @@ void Randomize(half* const dest, const int N, const int randomSeed) { } #endif +void makeRandom(void* ptr, int n, ncclDataType_t type, int seed) { + if (type == ncclChar) + Randomize((char*)ptr, n, seed); + else if (type == ncclInt) + Randomize((int*)ptr, n, seed); +#ifdef CUDA_HAS_HALF + else if (type == ncclHalf) + Randomize((half*)ptr, n, seed); +#endif + else if (type == ncclFloat) + Randomize((float*)ptr, n, seed); + else if (type == ncclDouble) + Randomize((double*)ptr, n, seed); + else if (type == ncclInt64) + Randomize((long long*)ptr, n, seed); + else if (type == ncclUint64) + Randomize((unsigned long long*)ptr, n, seed); + + return; +} + template __global__ static void accumKern(T* acum, const T* contrib, int N) { int tid = threadIdx.x + blockIdx.x*blockDim.x; @@ -200,22 +222,44 @@ void accumKern(half* acum, const half* contrib, int N) { } #endif +template +void accVecType(void* out, void* in, int n, ncclRedOp_t op) { + switch(op) { + case ncclSum: accumKern <<<256,256>>>((T*)out, (T*)in, n); break; + case ncclProd: accumKern<<<256,256>>>((T*)out, (T*)in, n); break; + case ncclMax: accumKern <<<256,256>>>((T*)out, (T*)in, n); break; + case ncclMin: accumKern <<<256,256>>>((T*)out, (T*)in, n); break; + default: + printf("Unknown reduction operation.\n"); + exit(EXIT_FAILURE); + } +} + template void Accumulate(T* dest, const T* contrib, int N, ncclRedOp_t op) { T* devdest; CUDACHECK(cudaHostRegister(dest, N*sizeof(T), 0)); CUDACHECK(cudaHostGetDevicePointer(&devdest, dest, 0)); - switch(op) { - case ncclSum: accumKern <<<256,256>>>(devdest, contrib, N); break; - case ncclProd: accumKern<<<256,256>>>(devdest, contrib, N); break; - case ncclMax: accumKern <<<256,256>>>(devdest, contrib, N); break; - case ncclMin: accumKern <<<256,256>>>(devdest, contrib, N); break; + accVecType((void*)devdest, (void*)contrib, N, op); + CUDACHECK(cudaHostUnregister(dest)); +} + +void accVec(void* out, void* in, int n, ncclDataType_t type, ncclRedOp_t op) { + switch (type) { + case ncclChar: accVecType (out, in, n, op); break; + case ncclInt: accVecType (out, in, n, op); break; +#ifdef CUDA_HAS_HALF + case ncclHalf: accVecType (out, in, n, op); break; +#endif + case ncclFloat: accVecType (out, in, n, op); break; + case ncclDouble: accVecType (out, in, n, op); break; + case ncclInt64: accVecType (out, in, n, op); break; + case ncclUint64: accVecType (out, in, n, op); break; default: - printf("Unknown reduction operation.\n"); + printf("Unknown reduction type.\n"); exit(EXIT_FAILURE); } - CUDACHECK(cudaHostUnregister(dest)); } template __device__ @@ -270,6 +314,22 @@ double CheckDelta(const T* results, const T* expected, int N) { return maxerr; } +void maxDiff(double* max, void* first, void* second, int n, ncclDataType_t type, cudaStream_t s) { + switch (type) { + case ncclChar: deltaKern <<<1,512,0,s>>>((char*)first, (char*)second, n, max); break; + case ncclInt: deltaKern <<<1,512,0,s>>>((int*)first, (int*)second, n, max); break; +#ifdef CUDA_HAS_HALF + case ncclHalf: deltaKern <<<1,512,0,s>>>((half*)first, (half*)second, n, max); break; +#endif + case ncclFloat: deltaKern <<<1,512,0,s>>>((float*)first, (float*)second, n, max); break; + case ncclDouble: deltaKern <<<1,512,0,s>>>((double*)first, (double*)second, n, max); break; + case ncclInt64: deltaKern <<<1,512,0,s>>>((long long*)first, (long long*)second, n, max); break; + case ncclUint64: deltaKern<<<1,512,0,s>>>((unsigned long long*)first, (unsigned long long*)second, n, max); break; + default: + printf("Unknown reduction type.\n"); + exit(EXIT_FAILURE); + } +} std::string TypeName(const ncclDataType_t type) { switch (type) { @@ -296,5 +356,83 @@ std::string OperationName(const ncclRedOp_t op) { } } +ncclDataType_t strToType(const char* s) { + if (strcmp(s, "char") == 0) + return ncclChar; + if (strcmp(s, "int") == 0) + return ncclInt; +#ifdef CUDA_HAS_HALF + if (strcmp(s, "half") == 0) + return ncclHalf; +#endif + if (strcmp(s, "float") == 0) + return ncclFloat; + if (strcmp(s, "double") == 0) + return ncclDouble; + if (strcmp(s, "int64") == 0) + return ncclInt64; + if (strcmp(s, "uint64") == 0) + return ncclUint64; + + return nccl_NUM_TYPES; +} + +size_t wordSize(ncclDataType_t type) { + switch(type) { + case ncclChar: return sizeof(char); + case ncclInt: return sizeof(int); +#ifdef CUDA_HAS_HALF + case ncclHalf: return sizeof(short); +#endif + case ncclFloat: return sizeof(float); + case ncclDouble: return sizeof(double); + case ncclInt64: return sizeof(long long); + case ncclUint64: return sizeof(unsigned long long); + } + + return 0; +} + +double deltaMaxValue(ncclDataType_t type, bool is_reduction) { + if (is_reduction) { + switch(type) { +#ifdef CUDA_HAS_HALF + case ncclHalf: return 5e-2; +#endif + case ncclFloat: return 1e-5; + case ncclDouble: return 1e-12; + } + } + return 1e-200; +} + +ncclRedOp_t strToOp(const char* s) { + if (strcmp(s, "sum") == 0) + return ncclSum; + if (strcmp(s, "prod") == 0) + return ncclProd; + if (strcmp(s, "max") == 0) + return ncclMax; + if (strcmp(s, "min") == 0) + return ncclMin; + + return nccl_NUM_OPS; +} + +int strToPosInt(const char* s) { + errno = 0; + long temp = strtol(s, NULL, 10); + if (errno != 0 || temp > INT_MAX || temp < 0) + return 0; + return (int)temp; +} + +int strToNonNeg(const char* s) { + errno = 0; + long temp = strtol(s, NULL, 10); + if (errno != 0 || temp > INT_MAX || temp < 0) + return -1; + return (int)temp; +} #endif // SRC_TEST_UTILITIES_H_ diff --git a/test/single/all_gather_test.cu b/test/single/all_gather_test.cu index 11496e1..ba3841f 100644 --- a/test/single/all_gather_test.cu +++ b/test/single/all_gather_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = false; template void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -84,6 +87,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for (int i = 0; i < nDev; ++i) { @@ -197,7 +203,7 @@ int main(int argc, char* argv[]) { RunTests(N / sizeof(char), ncclChar, comms, dList); RunTests(N / sizeof(int), ncclInt, comms, dList); -#if CUDART_VERSION >= 7050 +#ifdef CUDA_HAS_HALF RunTests(N / sizeof(half), ncclHalf, comms, dList); #endif RunTests(N / sizeof(float), ncclFloat, comms, dList); @@ -211,6 +217,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/all_reduce_test.cu b/test/single/all_reduce_test.cu index cebc198..642be80 100644 --- a/test/single/all_reduce_test.cu +++ b/test/single/all_reduce_test.cu @@ -15,6 +15,9 @@ #include int csv = false; +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -95,6 +98,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -138,6 +144,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -260,7 +269,7 @@ int main(int argc, char* argv[]) { RunTests(N / sizeof(char), ncclChar, comms, dList); RunTests(N / sizeof(int), ncclInt, comms, dList); -#if CUDART_VERSION >= 7050 +#ifdef CUDA_HAS_HALF RunTests(N / sizeof(half), ncclHalf, comms, dList); #endif RunTests(N / sizeof(float), ncclFloat, comms, dList); @@ -274,6 +283,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/broadcast_test.cu b/test/single/broadcast_test.cu index 4955f07..30afebd 100644 --- a/test/single/broadcast_test.cu +++ b/test/single/broadcast_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = false; template void RunTest(T** buff, const int N, const ncclDataType_t type, const int root, @@ -86,6 +89,9 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for(int i=0; i < nDev; ++i) { @@ -211,6 +217,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/reduce_scatter_test.cu b/test/single/reduce_scatter_test.cu index e6a56fe..81f3004 100644 --- a/test/single/reduce_scatter_test.cu +++ b/test/single/reduce_scatter_test.cu @@ -13,6 +13,9 @@ #include "nccl.h" #include "test_utilities.h" +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -90,6 +93,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } { @@ -126,6 +132,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf(" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; } for (int i = 0; i < nDev; ++i) { @@ -258,6 +267,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); } diff --git a/test/single/reduce_test.cu b/test/single/reduce_test.cu index dbe99c0..aa0d20f 100644 --- a/test/single/reduce_test.cu +++ b/test/single/reduce_test.cu @@ -15,6 +15,9 @@ #include int csv = false; +int errors = 0; +double min_bw = 10000.0; +bool is_reduction = true; template void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, @@ -94,6 +97,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -133,6 +139,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, printf((csv)?"%f,%f,%f,%le,":" %7.3f %5.2f %5.2f %7.0le\n", elapsedSec * 1.0E3, algbw, busbw, maxDelta); + if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; + if (busbw < min_bw) min_bw = busbw; + nvtxRangePop(); } @@ -272,6 +281,15 @@ int main(int argc, char* argv[]) { ncclCommDestroy(comms[i]); free(comms); - exit(EXIT_SUCCESS); + char* str = getenv("NCCL_TESTS_MIN_BW"); + double check_min_bw = str ? atof(str) : -1; + + printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); + printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf("\n"); + if (errors || min_bw < check_min_bw) + exit(EXIT_FAILURE); + else + exit(EXIT_SUCCESS); }