Fixed deadlock in back-to-back reduce_scatters.
Change-Id: I92d32b15e516a39710b676aee692ae9b70638937 Reviewed-on: http://git-master/r/935458 Reviewed-by: Przemek Tredak <ptredak@nvidia.com> Tested-by: Przemek Tredak <ptredak@nvidia.com>
This commit is contained in:
parent
90af7c73ef
commit
130ee246e2
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
/build
|
||||
|
@ -113,3 +113,9 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
```
|
||||
|
||||
## Copyright and License
|
||||
|
||||
NCCL is provided under the [BSD licence](LICENSE.txt). All source code and
|
||||
accompanying documentation is copyright (c) 2015-2016, NVIDIA CORPORATION. All
|
||||
rights reserved.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -164,6 +164,9 @@ struct ReduceScatterKernelArgs {
|
||||
int BufferSliceStride;
|
||||
int BufferMisalignedN;
|
||||
|
||||
T ** ThisPtrToNextOutput;
|
||||
T ** PrevPtrToThisOutput;
|
||||
|
||||
// local and remote input, output, and buffer
|
||||
const T * __restrict__ ThisInput;
|
||||
volatile T * __restrict__ ThisOutput;
|
||||
@ -187,6 +190,20 @@ __global__ void ReduceScatterKernel(const ReduceScatterKernelArgs<T> args) {
|
||||
if (args.N == 0) return;
|
||||
int tid = threadIdx.x;
|
||||
|
||||
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
|
||||
// the previous GPU is done with a previous collective operation.
|
||||
if (tid == 0) {
|
||||
Wait([=] {
|
||||
return *((T * volatile *)args.PrevPtrToThisOutput) == nullptr; // Wait for previous processor to be done
|
||||
});
|
||||
|
||||
*((T * volatile *)args.PrevPtrToThisOutput) = (T*)args.ThisOutput; // Tell Previous I'm starting
|
||||
Wait([=] {
|
||||
return *((T * volatile *)args.ThisPtrToNextOutput) != nullptr; // Wait till I've been told next started
|
||||
});
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
for (int chunk = 0; chunk < args.NumChunks; ++chunk) {
|
||||
// calculate slice size. for all chunks except (possibly) the last one,
|
||||
// this will just be args.SliceSize. For the last one, it may be smaller
|
||||
@ -311,6 +328,7 @@ __global__ void ReduceScatterKernel(const ReduceScatterKernelArgs<T> args) {
|
||||
if (tid == 0) {
|
||||
args.ThisNewDataAvailableFlag[tid] = 0;
|
||||
args.ThisChunkDoneFlag[tid] = 0;
|
||||
*args.ThisPtrToNextOutput = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -410,7 +428,8 @@ ncclResult_t ncclReduceScatterWithTypeAndFunc(const void* sendbuff,
|
||||
args.NumChunks = (args.N + args.ChunkSize - 1) / args.ChunkSize;
|
||||
}
|
||||
|
||||
// printf("sliceSize = %i, chunkSize = %i, numChunks = %i, sliceStride = %i, misalignedN = %i\n", args.SliceSize, args.ChunkSize, args.NumChunks, args.BufferSliceStride, args.BufferMisalignedN);
|
||||
args.ThisPtrToNextOutput = (T**)&(comm->local[nextId]->recvPtrs[0]);
|
||||
args.PrevPtrToThisOutput = (T**)&(comm->remote[prevId]->recvPtrs[0]);
|
||||
|
||||
args.ThisInput = (const T*)sendbuff;
|
||||
args.ThisOutput = (volatile T*)recvbuff;
|
||||
@ -426,7 +445,7 @@ ncclResult_t ncclReduceScatterWithTypeAndFunc(const void* sendbuff,
|
||||
args.PrevChunkDoneFlag = comm->remote[prevId]->flags + 1;
|
||||
|
||||
ReduceScatterKernel<NUM_THREADS, UNROLL_COUNT, FUNC, T>
|
||||
<<<1, NUM_THREADS + NUM_SUBCHUNKS * WARP_SIZE, 0, stream>>>(args);
|
||||
<<<1, NUM_THREADS + 1, 0, stream>>>(args);
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
Loading…
x
Reference in New Issue
Block a user