Fixed a race condition in reduce and braodcast.

This commit is contained in:
Nathan Luehr 2015-11-19 11:11:52 -08:00
parent 0673d5f44f
commit 27d32ac5d9
3 changed files with 17 additions and 25 deletions

View File

@ -59,7 +59,7 @@
// subchunks, we interleave the independent subchunks so that more data can be // subchunks, we interleave the independent subchunks so that more data can be
// transferred while the sync is in progress. This is the number of subchunks // transferred while the sync is in progress. This is the number of subchunks
// that are active at the same time // that are active at the same time
#define NUM_SUBCHUNKS 1 #define NUM_SUBCHUNKS 2
// If this is called with STEP, it means that we just finished processing the // If this is called with STEP, it means that we just finished processing the
// data for step STEP on this GPU, which is the data required on the next GPU // data for step STEP on this GPU, which is the data required on the next GPU

View File

@ -180,21 +180,17 @@ __global__ void BroadcastKernel(const BroadcastKernelArgs<T> args) {
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that // First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
// the previous GPU is done with a previous collective operation. // the previous GPU is done with a previous collective operation.
if (tid == 0) { if (tid == 0) {
if (ROLE != ROOT) { Wait([=] {
Wait([=] { return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done });
});
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting *((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
} Wait([=] {
if (ROLE != END) { return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
Wait([=] { });
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
if (PUSHRECV) if (PUSHRECV)
nextData = *((volatile void * volatile *)args.ThisPtrToNextData); // Grab next's pointer if needed. nextData = *((volatile void * volatile *)args.ThisPtrToNextData); // Grab next's pointer if needed.
}
} }
__syncthreads(); __syncthreads();

View File

@ -182,18 +182,14 @@ __global__ void ReduceKernel(const ReduceKernelArgs<T> args) {
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that // First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
// the previous GPU is done with a previous collective operation. // the previous GPU is done with a previous collective operation.
if (tid == 0) { if (tid == 0) {
if (ROLE != BEGIN) { Wait([=] {
Wait([=] { return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done });
});
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting *((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
} Wait([=] {
if (ROLE != END) { return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
Wait([=] { });
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
}
} }
__syncthreads(); __syncthreads();