Fix compilation error when compiling with 'clang -x cuda'.
Functions vFetch and vStore are not found by ADL with clang, so they need to be declared before usage in ReduceCopy.
This commit is contained in:
parent
7fef264bfa
commit
8241cd7b6e
@ -30,6 +30,32 @@
|
|||||||
#define BAR(type, barid, nthreads) \
|
#define BAR(type, barid, nthreads) \
|
||||||
BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE))
|
BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE))
|
||||||
|
|
||||||
|
template<typename T> inline __device__
|
||||||
|
T vFetch(const volatile T* ptr) {
|
||||||
|
return *ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CUDA_HAS_HALF
|
||||||
|
template<> inline __device__
|
||||||
|
half vFetch<half>(const volatile half* ptr) {
|
||||||
|
half r;
|
||||||
|
r.x = ptr->x;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename T> inline __device__
|
||||||
|
void vStore(volatile T* ptr, const T val) {
|
||||||
|
*ptr = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CUDA_HAS_HALF
|
||||||
|
template<> inline __device__
|
||||||
|
void vStore<half>(volatile half* ptr, const half val) {
|
||||||
|
ptr->x = val.x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
__device__ unsigned int spinct;
|
__device__ unsigned int spinct;
|
||||||
|
|
||||||
// Spin wait until func evaluates to true
|
// Spin wait until func evaluates to true
|
||||||
@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) {
|
|||||||
return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align));
|
return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> inline __device__
|
|
||||||
T vFetch(const volatile T* ptr) {
|
|
||||||
return *ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CUDA_HAS_HALF
|
|
||||||
template<> inline __device__
|
|
||||||
half vFetch<half>(const volatile half* ptr) {
|
|
||||||
half r;
|
|
||||||
r.x = ptr->x;
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<typename T> inline __device__
|
|
||||||
void vStore(volatile T* ptr, const T val) {
|
|
||||||
*ptr = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CUDA_HAS_HALF
|
|
||||||
template<> inline __device__
|
|
||||||
void vStore<half>(volatile half* ptr, const half val) {
|
|
||||||
ptr->x = val.x;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Assumptions:
|
// Assumptions:
|
||||||
// - there is exactly 1 block
|
// - there is exactly 1 block
|
||||||
// - THREADS is the number of producer threads
|
// - THREADS is the number of producer threads
|
||||||
|
Loading…
x
Reference in New Issue
Block a user