Fixed deadlock in back-to-back reduce_scatters.
Change-Id: I92d32b15e516a39710b676aee692ae9b70638937 Reviewed-on: http://git-master/r/935458 Reviewed-by: Przemek Tredak <ptredak@nvidia.com> Tested-by: Przemek Tredak <ptredak@nvidia.com>
This commit is contained in:
parent
90af7c73ef
commit
130ee246e2
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
|
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
/build
|
/build
|
||||||
|
@ -113,3 +113,9 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Copyright and License
|
||||||
|
|
||||||
|
NCCL is provided under the [BSD licence](LICENSE.txt). All source code and
|
||||||
|
accompanying documentation is copyright (c) 2015-2016, NVIDIA CORPORATION. All
|
||||||
|
rights reserved.
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
@ -164,6 +164,9 @@ struct ReduceScatterKernelArgs {
|
|||||||
int BufferSliceStride;
|
int BufferSliceStride;
|
||||||
int BufferMisalignedN;
|
int BufferMisalignedN;
|
||||||
|
|
||||||
|
T ** ThisPtrToNextOutput;
|
||||||
|
T ** PrevPtrToThisOutput;
|
||||||
|
|
||||||
// local and remote input, output, and buffer
|
// local and remote input, output, and buffer
|
||||||
const T * __restrict__ ThisInput;
|
const T * __restrict__ ThisInput;
|
||||||
volatile T * __restrict__ ThisOutput;
|
volatile T * __restrict__ ThisOutput;
|
||||||
@ -187,6 +190,20 @@ __global__ void ReduceScatterKernel(const ReduceScatterKernelArgs<T> args) {
|
|||||||
if (args.N == 0) return;
|
if (args.N == 0) return;
|
||||||
int tid = threadIdx.x;
|
int tid = threadIdx.x;
|
||||||
|
|
||||||
|
// First wait for args.PrevPtrToThisOutput to become nullptr to ensure that
|
||||||
|
// the previous GPU is done with a previous collective operation.
|
||||||
|
if (tid == 0) {
|
||||||
|
Wait([=] {
|
||||||
|
return *((T * volatile *)args.PrevPtrToThisOutput) == nullptr; // Wait for previous processor to be done
|
||||||
|
});
|
||||||
|
|
||||||
|
*((T * volatile *)args.PrevPtrToThisOutput) = (T*)args.ThisOutput; // Tell Previous I'm starting
|
||||||
|
Wait([=] {
|
||||||
|
return *((T * volatile *)args.ThisPtrToNextOutput) != nullptr; // Wait till I've been told next started
|
||||||
|
});
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
for (int chunk = 0; chunk < args.NumChunks; ++chunk) {
|
for (int chunk = 0; chunk < args.NumChunks; ++chunk) {
|
||||||
// calculate slice size. for all chunks except (possibly) the last one,
|
// calculate slice size. for all chunks except (possibly) the last one,
|
||||||
// this will just be args.SliceSize. For the last one, it may be smaller
|
// this will just be args.SliceSize. For the last one, it may be smaller
|
||||||
@ -311,6 +328,7 @@ __global__ void ReduceScatterKernel(const ReduceScatterKernelArgs<T> args) {
|
|||||||
if (tid == 0) {
|
if (tid == 0) {
|
||||||
args.ThisNewDataAvailableFlag[tid] = 0;
|
args.ThisNewDataAvailableFlag[tid] = 0;
|
||||||
args.ThisChunkDoneFlag[tid] = 0;
|
args.ThisChunkDoneFlag[tid] = 0;
|
||||||
|
*args.ThisPtrToNextOutput = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -410,7 +428,8 @@ ncclResult_t ncclReduceScatterWithTypeAndFunc(const void* sendbuff,
|
|||||||
args.NumChunks = (args.N + args.ChunkSize - 1) / args.ChunkSize;
|
args.NumChunks = (args.N + args.ChunkSize - 1) / args.ChunkSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
// printf("sliceSize = %i, chunkSize = %i, numChunks = %i, sliceStride = %i, misalignedN = %i\n", args.SliceSize, args.ChunkSize, args.NumChunks, args.BufferSliceStride, args.BufferMisalignedN);
|
args.ThisPtrToNextOutput = (T**)&(comm->local[nextId]->recvPtrs[0]);
|
||||||
|
args.PrevPtrToThisOutput = (T**)&(comm->remote[prevId]->recvPtrs[0]);
|
||||||
|
|
||||||
args.ThisInput = (const T*)sendbuff;
|
args.ThisInput = (const T*)sendbuff;
|
||||||
args.ThisOutput = (volatile T*)recvbuff;
|
args.ThisOutput = (volatile T*)recvbuff;
|
||||||
@ -426,7 +445,7 @@ ncclResult_t ncclReduceScatterWithTypeAndFunc(const void* sendbuff,
|
|||||||
args.PrevChunkDoneFlag = comm->remote[prevId]->flags + 1;
|
args.PrevChunkDoneFlag = comm->remote[prevId]->flags + 1;
|
||||||
|
|
||||||
ReduceScatterKernel<NUM_THREADS, UNROLL_COUNT, FUNC, T>
|
ReduceScatterKernel<NUM_THREADS, UNROLL_COUNT, FUNC, T>
|
||||||
<<<1, NUM_THREADS + NUM_SUBCHUNKS * WARP_SIZE, 0, stream>>>(args);
|
<<<1, NUM_THREADS + 1, 0, stream>>>(args);
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions
|
* modification, are permitted provided that the following conditions
|
||||||
|
Loading…
x
Reference in New Issue
Block a user