forked from OSchip/llvm-project
[openmp] Annotate tmp variables with omp_thread_mem_alloc
Fixes miscompile of calls into ocml. Bug 51445. The stack variable `double __tmp` is moved to dynamically allocated shared memory by CGOpenMPRuntimeGPU. This is usually fine, but when the variable is passed to a function that is explicitly annotated address_space(5) then allocating the variable off-stack leads to a miscompile in the back end, which cannot decide to move the variable back to the stack from shared. This could be fixed by removing the AS(5) annotation from the math library or by explicitly marking the variables as thread_mem_alloc. The cast to AS(5) is still a no-op once IR is reached. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D107971
This commit is contained in:
parent
dd3eea6566
commit
b6113548c9
|
@ -19,6 +19,9 @@
|
|||
#endif
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#endif // !defined(__HIPCC_RTC__)
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
|
@ -258,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
|
|||
__DEVICE__
|
||||
float frexpf(float __x, int *__nptr) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r =
|
||||
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__nptr = __tmp;
|
||||
|
@ -343,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
|
|||
__DEVICE__
|
||||
float modff(float __x, float *__iptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r =
|
||||
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__iptr = __tmp;
|
||||
|
@ -423,6 +432,9 @@ float remainderf(float __x, float __y) {
|
|||
__DEVICE__
|
||||
float remquof(float __x, float __y, int *__quo) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
float __r = __ocml_remquo_f32(
|
||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__quo = __tmp;
|
||||
|
@ -479,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
|
|||
__DEVICE__
|
||||
void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr =
|
||||
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
@ -487,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
|
|||
__DEVICE__
|
||||
void sincospif(float __x, float *__sinptr, float *__cosptr) {
|
||||
float __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincospi_f32(
|
||||
__x, (__attribute__((address_space(5))) float *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
@ -799,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
|
|||
__DEVICE__
|
||||
double frexp(double __x, int *__nptr) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r =
|
||||
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__nptr = __tmp;
|
||||
|
@ -883,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
|
|||
__DEVICE__
|
||||
double modf(double __x, double *__iptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r =
|
||||
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__iptr = __tmp;
|
||||
|
@ -971,6 +995,9 @@ double remainder(double __x, double __y) {
|
|||
__DEVICE__
|
||||
double remquo(double __x, double __y, int *__quo) {
|
||||
int __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
double __r = __ocml_remquo_f64(
|
||||
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
|
||||
*__quo = __tmp;
|
||||
|
@ -1029,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
|
|||
__DEVICE__
|
||||
void sincos(double __x, double *__sinptr, double *__cosptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincos_f64(
|
||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
@ -1037,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
|
|||
__DEVICE__
|
||||
void sincospi(double __x, double *__sinptr, double *__cosptr) {
|
||||
double __tmp;
|
||||
#ifdef __OPENMP_AMDGCN__
|
||||
#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
|
||||
#endif
|
||||
*__sinptr = __ocml_sincospi_f64(
|
||||
__x, (__attribute__((address_space(5))) double *)&__tmp);
|
||||
*__cosptr = __tmp;
|
||||
|
|
Loading…
Reference in New Issue