2016-01-27 07:37:29 +08:00
|
|
|
/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
|
|
|
|
*
|
2019-04-09 04:51:30 +08:00
|
|
|
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
* See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-01-27 07:37:29 +08:00
|
|
|
*
|
|
|
|
*===-----------------------------------------------------------------------===
|
|
|
|
*/
|
|
|
|
#ifndef __CLANG_CUDA_CMATH_H__
|
|
|
|
#define __CLANG_CUDA_CMATH_H__
|
|
|
|
#ifndef __CUDA__
|
|
|
|
#error "This file is for CUDA compilation only."
|
|
|
|
#endif
|
|
|
|
|
2016-08-19 04:43:13 +08:00
|
|
|
#include <limits>
|
|
|
|
|
2016-04-08 07:55:53 +08:00
|
|
|
// CUDA lets us use various std math functions on the device side. This file
|
|
|
|
// works in concert with __clang_cuda_math_forward_declares.h to make this work.
|
|
|
|
//
|
|
|
|
// Specifically, the forward-declares header declares __device__ overloads for
|
|
|
|
// these functions in the global namespace, then pulls them into namespace std
|
|
|
|
// with 'using' statements. Then this file implements those functions, after
|
2016-08-19 04:43:13 +08:00
|
|
|
// their implementations have been pulled in.
|
2016-04-08 07:55:53 +08:00
|
|
|
//
|
|
|
|
// It's important that we declare the functions in the global namespace and pull
|
|
|
|
// them into namespace std with using statements, as opposed to simply declaring
|
|
|
|
// these functions in namespace std, because our device functions need to
|
|
|
|
// overload the standard library functions, which may be declared in the global
|
|
|
|
// namespace or in std, depending on the degree of conformance of the stdlib
|
|
|
|
// implementation. Declaring in the global namespace and pulling into namespace
|
|
|
|
// std covers all of the known knowns.
|
2016-01-27 07:37:29 +08:00
|
|
|
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
#ifdef _OPENMP
|
|
|
|
#define __DEVICE__ static __attribute__((always_inline))
|
|
|
|
#else
|
2016-01-27 07:37:29 +08:00
|
|
|
#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
#endif
|
2016-01-27 07:37:29 +08:00
|
|
|
|
2019-05-16 04:18:21 +08:00
|
|
|
// For C++ 17 we need to include noexcept attribute to be compatible
|
|
|
|
// with the header-defined version. This may be removed once
|
|
|
|
// variant is supported.
|
|
|
|
#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L
|
|
|
|
#define __NOEXCEPT noexcept
|
|
|
|
#else
|
|
|
|
#define __NOEXCEPT
|
|
|
|
#endif
|
|
|
|
|
2019-05-14 06:11:44 +08:00
|
|
|
#if !(defined(_OPENMP) && defined(__cplusplus))
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
|
|
|
|
__DEVICE__ long abs(long __n) { return ::labs(__n); }
|
|
|
|
__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
|
|
|
|
__DEVICE__ double abs(double __x) { return ::fabs(__x); }
|
2019-05-16 04:28:23 +08:00
|
|
|
#endif
|
2019-05-18 03:15:53 +08:00
|
|
|
// TODO: remove once variat is supported.
|
|
|
|
#if defined(_OPENMP) && defined(__cplusplus)
|
|
|
|
__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); }
|
|
|
|
__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); }
|
|
|
|
#endif
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ float acos(float __x) { return ::acosf(__x); }
|
|
|
|
__DEVICE__ float asin(float __x) { return ::asinf(__x); }
|
|
|
|
__DEVICE__ float atan(float __x) { return ::atanf(__x); }
|
|
|
|
__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
|
|
|
|
__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
|
|
|
|
__DEVICE__ float cos(float __x) { return ::cosf(__x); }
|
|
|
|
__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
|
|
|
|
__DEVICE__ float exp(float __x) { return ::expf(__x); }
|
2019-05-16 04:18:21 +08:00
|
|
|
__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); }
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ float floor(float __x) { return ::floorf(__x); }
|
|
|
|
__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
// TODO: remove when variant is supported
|
|
|
|
#ifndef _OPENMP
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ int fpclassify(float __x) {
|
2016-01-27 07:37:29 +08:00
|
|
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
2016-02-12 10:22:53 +08:00
|
|
|
FP_ZERO, __x);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ int fpclassify(double __x) {
|
2016-01-27 07:37:29 +08:00
|
|
|
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
|
2016-02-12 10:22:53 +08:00
|
|
|
FP_ZERO, __x);
|
|
|
|
}
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
#endif
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ float frexp(float __arg, int *__exp) {
|
|
|
|
return ::frexpf(__arg, __exp);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2017-01-06 00:53:55 +08:00
|
|
|
|
|
|
|
// For inscrutable reasons, the CUDA headers define these functions for us on
|
|
|
|
// Windows.
|
|
|
|
#ifndef _MSC_VER
|
2016-04-08 07:55:53 +08:00
|
|
|
__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
|
|
|
|
__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
|
2016-11-18 08:41:35 +08:00
|
|
|
// For inscrutable reasons, __finite(), the double-precision version of
|
|
|
|
// __finitef, does not exist when compiling for MacOS. __isfinited is available
|
|
|
|
// everywhere and is just as good.
|
|
|
|
__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
|
2017-01-06 00:53:55 +08:00
|
|
|
__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
|
|
|
|
__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
|
|
|
|
#endif
|
|
|
|
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isgreater(float __x, float __y) {
|
|
|
|
return __builtin_isgreater(__x, __y);
|
|
|
|
}
|
|
|
|
__DEVICE__ bool isgreater(double __x, double __y) {
|
|
|
|
return __builtin_isgreater(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isgreaterequal(float __x, float __y) {
|
|
|
|
return __builtin_isgreaterequal(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isgreaterequal(double __x, double __y) {
|
|
|
|
return __builtin_isgreaterequal(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isless(float __x, float __y) {
|
|
|
|
return __builtin_isless(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isless(double __x, double __y) {
|
|
|
|
return __builtin_isless(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool islessequal(float __x, float __y) {
|
|
|
|
return __builtin_islessequal(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool islessequal(double __x, double __y) {
|
|
|
|
return __builtin_islessequal(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool islessgreater(float __x, float __y) {
|
|
|
|
return __builtin_islessgreater(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool islessgreater(double __x, double __y) {
|
|
|
|
return __builtin_islessgreater(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
|
|
|
|
__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
|
|
|
|
__DEVICE__ bool isunordered(float __x, float __y) {
|
|
|
|
return __builtin_isunordered(__x, __y);
|
|
|
|
}
|
|
|
|
__DEVICE__ bool isunordered(double __x, double __y) {
|
|
|
|
return __builtin_isunordered(__x, __y);
|
2016-01-27 07:37:29 +08:00
|
|
|
}
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ float ldexp(float __arg, int __exp) {
|
|
|
|
return ::ldexpf(__arg, __exp);
|
|
|
|
}
|
|
|
|
__DEVICE__ float log(float __x) { return ::logf(__x); }
|
|
|
|
__DEVICE__ float log10(float __x) { return ::log10f(__x); }
|
|
|
|
__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
|
|
|
|
__DEVICE__ float pow(float __base, float __exp) {
|
|
|
|
return ::powf(__base, __exp);
|
|
|
|
}
|
|
|
|
__DEVICE__ float pow(float __base, int __iexp) {
|
|
|
|
return ::powif(__base, __iexp);
|
|
|
|
}
|
|
|
|
__DEVICE__ double pow(double __base, int __iexp) {
|
|
|
|
return ::powi(__base, __iexp);
|
|
|
|
}
|
|
|
|
__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
|
2016-11-18 08:41:35 +08:00
|
|
|
__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
|
2016-02-12 10:22:53 +08:00
|
|
|
__DEVICE__ float sin(float __x) { return ::sinf(__x); }
|
|
|
|
__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
|
|
|
|
__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
|
|
|
|
__DEVICE__ float tan(float __x) { return ::tanf(__x); }
|
|
|
|
__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
|
2016-01-27 07:37:29 +08:00
|
|
|
|
2017-11-17 09:15:43 +08:00
|
|
|
// Notably missing above is nexttoward. We omit it because
|
|
|
|
// libdevice doesn't provide an implementation, and we don't want to be in the
|
|
|
|
// business of implementing tricky libm functions in this header.
|
|
|
|
|
2016-08-19 04:43:13 +08:00
|
|
|
// Now we've defined everything we promised we'd define in
|
|
|
|
// __clang_cuda_math_forward_declares.h. We need to do two additional things to
|
|
|
|
// fix up our math functions.
|
|
|
|
//
|
|
|
|
// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
|
|
|
|
// only sin(float) and sin(double), which means that e.g. sin(0) is
|
|
|
|
// ambiguous.
|
|
|
|
//
|
|
|
|
// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
|
|
|
|
// std. These are defined in the CUDA headers in the global namespace,
|
|
|
|
// independent of everything else we've done here.
|
|
|
|
|
|
|
|
// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
|
|
|
|
// we go ahead and unconditionally define functions that are only available when
|
|
|
|
// compiling for C++11 to match the behavior of the CUDA headers.
|
|
|
|
template<bool __B, class __T = void>
|
|
|
|
struct __clang_cuda_enable_if {};
|
|
|
|
|
|
|
|
template <class __T> struct __clang_cuda_enable_if<true, __T> {
|
|
|
|
typedef __T type;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Defines an overload of __fn that accepts one integral argument, calls
|
|
|
|
// __fn((double)x), and returns __retty.
|
|
|
|
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
|
|
|
|
template <typename __T> \
|
|
|
|
__DEVICE__ \
|
|
|
|
typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
|
|
|
|
__retty>::type \
|
|
|
|
__fn(__T __x) { \
|
|
|
|
return ::__fn((double)__x); \
|
|
|
|
}
|
|
|
|
|
|
|
|
// Defines an overload of __fn that accepts one two arithmetic arguments, calls
|
|
|
|
// __fn((double)x, (double)y), and returns a double.
|
|
|
|
//
|
|
|
|
// Note this is different from OVERLOAD_1, which generates an overload that
|
|
|
|
// accepts only *integral* arguments.
|
|
|
|
#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
|
|
|
|
template <typename __T1, typename __T2> \
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if< \
|
|
|
|
std::numeric_limits<__T1>::is_specialized && \
|
|
|
|
std::numeric_limits<__T2>::is_specialized, \
|
|
|
|
__retty>::type \
|
|
|
|
__fn(__T1 __x, __T2 __y) { \
|
|
|
|
return __fn((double)__x, (double)__y); \
|
|
|
|
}
|
|
|
|
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
|
|
|
|
__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
|
|
|
|
|
|
|
|
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
|
|
|
|
#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
|
|
|
|
|
|
|
|
// Overloads for functions that don't match the patterns expected by
|
|
|
|
// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
|
|
|
|
template <typename __T1, typename __T2, typename __T3>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<
|
|
|
|
std::numeric_limits<__T1>::is_specialized &&
|
|
|
|
std::numeric_limits<__T2>::is_specialized &&
|
|
|
|
std::numeric_limits<__T3>::is_specialized,
|
|
|
|
double>::type
|
|
|
|
fma(__T1 __x, __T2 __y, __T3 __z) {
|
|
|
|
return std::fma((double)__x, (double)__y, (double)__z);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename __T>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
|
|
double>::type
|
|
|
|
frexp(__T __x, int *__exp) {
|
|
|
|
return std::frexp((double)__x, __exp);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename __T>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
|
|
double>::type
|
|
|
|
ldexp(__T __x, int __exp) {
|
|
|
|
return std::ldexp((double)__x, __exp);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename __T1, typename __T2>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<
|
|
|
|
std::numeric_limits<__T1>::is_specialized &&
|
|
|
|
std::numeric_limits<__T2>::is_specialized,
|
|
|
|
double>::type
|
|
|
|
remquo(__T1 __x, __T2 __y, int *__quo) {
|
|
|
|
return std::remquo((double)__x, (double)__y, __quo);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename __T>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
|
|
double>::type
|
|
|
|
scalbln(__T __x, long __exp) {
|
|
|
|
return std::scalbln((double)__x, __exp);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename __T>
|
|
|
|
__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
|
|
|
|
double>::type
|
|
|
|
scalbn(__T __x, int __exp) {
|
|
|
|
return std::scalbn((double)__x, __exp);
|
|
|
|
}
|
|
|
|
|
2016-10-09 06:16:03 +08:00
|
|
|
// We need to define these overloads in exactly the namespace our standard
|
|
|
|
// library uses (including the right inline namespace), otherwise they won't be
|
|
|
|
// picked up by other functions in the standard library (e.g. functions in
|
|
|
|
// <complex>). Thus the ugliness below.
|
|
|
|
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
#else
|
2016-08-19 04:43:13 +08:00
|
|
|
namespace std {
|
2016-10-09 06:16:03 +08:00
|
|
|
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
|
|
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2016-08-19 04:43:13 +08:00
|
|
|
// Pull the new overloads we defined above into namespace std.
|
|
|
|
using ::acos;
|
|
|
|
using ::acosh;
|
|
|
|
using ::asin;
|
|
|
|
using ::asinh;
|
|
|
|
using ::atan;
|
|
|
|
using ::atan2;
|
|
|
|
using ::atanh;
|
|
|
|
using ::cbrt;
|
|
|
|
using ::ceil;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::copysign;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::cos;
|
|
|
|
using ::cosh;
|
|
|
|
using ::erf;
|
|
|
|
using ::erfc;
|
|
|
|
using ::exp;
|
|
|
|
using ::exp2;
|
|
|
|
using ::expm1;
|
|
|
|
using ::fabs;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::fdim;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::floor;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::fma;
|
|
|
|
using ::fmax;
|
|
|
|
using ::fmin;
|
|
|
|
using ::fmod;
|
|
|
|
using ::fpclassify;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::frexp;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::hypot;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::ilogb;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::isfinite;
|
|
|
|
using ::isgreater;
|
|
|
|
using ::isgreaterequal;
|
|
|
|
using ::isless;
|
|
|
|
using ::islessequal;
|
|
|
|
using ::islessgreater;
|
|
|
|
using ::isnormal;
|
|
|
|
using ::isunordered;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::ldexp;
|
|
|
|
using ::lgamma;
|
|
|
|
using ::llrint;
|
|
|
|
using ::llround;
|
|
|
|
using ::log;
|
|
|
|
using ::log10;
|
|
|
|
using ::log1p;
|
|
|
|
using ::log2;
|
|
|
|
using ::logb;
|
|
|
|
using ::lrint;
|
|
|
|
using ::lround;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::nearbyint;
|
|
|
|
using ::nextafter;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::pow;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::remainder;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::remquo;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::rint;
|
|
|
|
using ::round;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::scalbln;
|
|
|
|
using ::scalbn;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::signbit;
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::sin;
|
|
|
|
using ::sinh;
|
|
|
|
using ::sqrt;
|
|
|
|
using ::tan;
|
|
|
|
using ::tanh;
|
|
|
|
using ::tgamma;
|
2016-09-15 05:50:14 +08:00
|
|
|
using ::trunc;
|
|
|
|
|
|
|
|
// Well this is fun: We need to pull these symbols in for libc++, but we can't
|
|
|
|
// pull them in with libstdc++, because its ::isinf and ::isnan are different
|
|
|
|
// than its std::isinf and std::isnan.
|
|
|
|
#ifndef __GLIBCXX__
|
|
|
|
using ::isinf;
|
|
|
|
using ::isnan;
|
|
|
|
#endif
|
2016-08-19 04:43:13 +08:00
|
|
|
|
|
|
|
// Finally, pull the "foobarf" functions that CUDA defines in its headers into
|
|
|
|
// namespace std.
|
|
|
|
using ::acosf;
|
|
|
|
using ::acoshf;
|
|
|
|
using ::asinf;
|
|
|
|
using ::asinhf;
|
|
|
|
using ::atan2f;
|
|
|
|
using ::atanf;
|
|
|
|
using ::atanhf;
|
|
|
|
using ::cbrtf;
|
|
|
|
using ::ceilf;
|
|
|
|
using ::copysignf;
|
|
|
|
using ::cosf;
|
|
|
|
using ::coshf;
|
|
|
|
using ::erfcf;
|
|
|
|
using ::erff;
|
|
|
|
using ::exp2f;
|
|
|
|
using ::expf;
|
|
|
|
using ::expm1f;
|
|
|
|
using ::fabsf;
|
|
|
|
using ::fdimf;
|
|
|
|
using ::floorf;
|
|
|
|
using ::fmaf;
|
|
|
|
using ::fmaxf;
|
|
|
|
using ::fminf;
|
|
|
|
using ::fmodf;
|
|
|
|
using ::frexpf;
|
|
|
|
using ::hypotf;
|
|
|
|
using ::ilogbf;
|
|
|
|
using ::ldexpf;
|
|
|
|
using ::lgammaf;
|
|
|
|
using ::llrintf;
|
|
|
|
using ::llroundf;
|
|
|
|
using ::log10f;
|
|
|
|
using ::log1pf;
|
|
|
|
using ::log2f;
|
|
|
|
using ::logbf;
|
|
|
|
using ::logf;
|
|
|
|
using ::lrintf;
|
|
|
|
using ::lroundf;
|
|
|
|
using ::modff;
|
|
|
|
using ::nearbyintf;
|
|
|
|
using ::nextafterf;
|
|
|
|
using ::powf;
|
|
|
|
using ::remainderf;
|
|
|
|
using ::remquof;
|
|
|
|
using ::rintf;
|
|
|
|
using ::roundf;
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
// TODO: remove once variant is supported
|
|
|
|
#ifndef _OPENMP
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::scalblnf;
|
[OpenMP][Clang] Support for target math functions
Summary:
In this patch we propose a temporary solution to resolving math functions for the NVPTX toolchain, temporary until OpenMP variant is supported by Clang.
We intercept the inclusion of math.h and cmath headers and if we are in the OpenMP-NVPTX case, we re-use CUDA's math function resolution mechanism.
Authors:
@gtbercea
@jdoerfert
Reviewers: hfinkel, caomhin, ABataev, tra
Reviewed By: hfinkel, ABataev, tra
Subscribers: JDevlieghere, mgorny, guansong, cfe-commits, jdoerfert
Tags: #clang
Differential Revision: https://reviews.llvm.org/D61399
llvm-svn: 360265
2019-05-08 23:52:33 +08:00
|
|
|
#endif
|
2016-08-19 04:43:13 +08:00
|
|
|
using ::scalbnf;
|
|
|
|
using ::sinf;
|
|
|
|
using ::sinhf;
|
|
|
|
using ::sqrtf;
|
|
|
|
using ::tanf;
|
|
|
|
using ::tanhf;
|
|
|
|
using ::tgammaf;
|
|
|
|
using ::truncf;
|
2016-10-09 06:16:03 +08:00
|
|
|
|
|
|
|
#ifdef _LIBCPP_END_NAMESPACE_STD
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
#else
|
|
|
|
#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|
|
|
_GLIBCXX_END_NAMESPACE_VERSION
|
|
|
|
#endif
|
|
|
|
} // namespace std
|
|
|
|
#endif
|
2016-08-19 04:43:13 +08:00
|
|
|
|
2019-05-16 04:18:21 +08:00
|
|
|
#undef __NOEXCEPT
|
2016-03-30 00:24:23 +08:00
|
|
|
#undef __DEVICE__
|
|
|
|
|
2016-01-27 07:37:29 +08:00
|
|
|
#endif
|