Fix compilation error when compiling with 'clang -x cuda'.

Functions vFetch and vStore are not found by ADL with clang,
so they need to be declared before usage in ReduceCopy.
This commit is contained in:
Ilya Biryukov 2017-03-16 12:01:11 +01:00
parent 7fef264bfa
commit 8241cd7b6e

View File

@ -30,6 +30,32 @@
#define BAR(type, barid, nthreads) \ #define BAR(type, barid, nthreads) \
BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE)) BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE))
template<typename T> inline __device__
T vFetch(const volatile T* ptr) {
return *ptr;
}
#ifdef CUDA_HAS_HALF
template<> inline __device__
half vFetch<half>(const volatile half* ptr) {
half r;
r.x = ptr->x;
return r;
}
#endif
template<typename T> inline __device__
void vStore(volatile T* ptr, const T val) {
*ptr = val;
}
#ifdef CUDA_HAS_HALF
template<> inline __device__
void vStore<half>(volatile half* ptr, const half val) {
ptr->x = val.x;
}
#endif
__device__ unsigned int spinct; __device__ unsigned int spinct;
// Spin wait until func evaluates to true // Spin wait until func evaluates to true
@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) {
return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align)); return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align));
} }
template<typename T> inline __device__
T vFetch(const volatile T* ptr) {
return *ptr;
}
#ifdef CUDA_HAS_HALF
template<> inline __device__
half vFetch<half>(const volatile half* ptr) {
half r;
r.x = ptr->x;
return r;
}
#endif
template<typename T> inline __device__
void vStore(volatile T* ptr, const T val) {
*ptr = val;
}
#ifdef CUDA_HAS_HALF
template<> inline __device__
void vStore<half>(volatile half* ptr, const half val) {
ptr->x = val.x;
}
#endif
// Assumptions: // Assumptions:
// - there is exactly 1 block // - there is exactly 1 block
// - THREADS is the number of producer threads // - THREADS is the number of producer threads