forked from OSchip/llvm-project
[CUDA] Provide address space conversion builtins.
CUDA-11 headers rely on these NVCC builtins. Despite having `__nv` previx, those are *not* provided by libdevice. Differential Revision: https://reviews.llvm.org/D111665
This commit is contained in:
parent
19b4e3cfc6
commit
f526ee5b85
|
@ -483,4 +483,36 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
|
||||||
|
|
||||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
|
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
|
||||||
|
|
||||||
|
#if CUDA_VERSION >= 11000
|
||||||
|
extern "C" {
|
||||||
|
__device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
|
||||||
|
return (size_t)(void __attribute__((address_space(1))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
|
||||||
|
return (size_t)(void __attribute__((address_space(3))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
|
||||||
|
return (size_t)(void __attribute__((address_space(4))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
|
||||||
|
return (size_t)(void __attribute__((address_space(5))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
|
||||||
|
return (void *)(void __attribute__((address_space(1))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
|
||||||
|
return (void *)(void __attribute__((address_space(3))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
|
||||||
|
return (void *)(void __attribute__((address_space(4))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
|
||||||
|
return (void *)(void __attribute__((address_space(5))) *)__ptr;
|
||||||
|
}
|
||||||
|
__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
|
||||||
|
return __nv_cvta_generic_to_shared_impl(__ptr);
|
||||||
|
}
|
||||||
|
} // extern "C"
|
||||||
|
#endif // CUDA_VERSION >= 11000
|
||||||
|
|
||||||
#endif // defined(__CLANG_CUDA_INTRINSICS_H__)
|
#endif // defined(__CLANG_CUDA_INTRINSICS_H__)
|
||||||
|
|
Loading…
Reference in New Issue