Fixed a race condition in reduce and braodcast.

This commit is contained in:
Nathan Luehr 2015-11-19 11:11:52 -08:00
parent 0673d5f44f
commit 27d32ac5d9
3 changed files with 17 additions and 25 deletions

View File

@ -59,7 +59,7 @@
// subchunks, we interleave the independent subchunks so that more data can be
// transferred while the sync is in progress. This is the number of subchunks
// that are active at the same time
#define NUM_SUBCHUNKS 1
#define NUM_SUBCHUNKS 2
// If this is called with STEP, it means that we just finished processing the
// data for step STEP on this GPU, which is the data required on the next GPU

View File

@ -180,21 +180,17 @@ __global__ void BroadcastKernel(const BroadcastKernelArgs<T> args) {
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
// the previous GPU is done with a previous collective operation.
if (tid == 0) {
if (ROLE != ROOT) {
Wait([=] {
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
});
Wait([=] {
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
});
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
}
if (ROLE != END) {
Wait([=] {
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
Wait([=] {
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
if (PUSHRECV)
nextData = *((volatile void * volatile *)args.ThisPtrToNextData); // Grab next's pointer if needed.
}
if (PUSHRECV)
nextData = *((volatile void * volatile *)args.ThisPtrToNextData); // Grab next's pointer if needed.
}
__syncthreads();

View File

@ -182,18 +182,14 @@ __global__ void ReduceKernel(const ReduceKernelArgs<T> args) {
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
// the previous GPU is done with a previous collective operation.
if (tid == 0) {
if (ROLE != BEGIN) {
Wait([=] {
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
});
Wait([=] {
return *((T * volatile *)args.PrevPtrToThisData) == nullptr; // Wait for previous processor to be done
});
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
}
if (ROLE != END) {
Wait([=] {
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
}
*((T * volatile *)args.PrevPtrToThisData) = (T*)args.ThisData; // Tell Previous I'm starting
Wait([=] {
return *((T * volatile *)args.ThisPtrToNextData) != nullptr; // Wait till I've been told next started
});
}
__syncthreads();