forked from OSchip/llvm-project
[X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using generic IR
D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ truncating conversions with generic IR instead. It turns out that the behaviour of these intrinsics is different enough from generic IR that this will cause problems, INF/NAN/out of range values are guaranteed to result in a 0x80000000 value - which plays havoc with constant folding which converts them to either zero or UNDEF. This is also an issue with the scalar implementations (which were already generic IR and what I was trying to match). This patch changes both scalar and packed versions back to using x86-specific builtins. It also deals with the other scalar conversion cases that are runtime rounding mode dependent and can have similar issues with constant folding. Differential Revision: https://reviews.llvm.org/D22105 llvm-svn: 276102
This commit is contained in:
parent
f345d40ae2
commit
e3b9ee0645
|
@ -303,7 +303,9 @@ TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "", "ssse3")
|
||||||
TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
|
||||||
TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
|
TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
|
||||||
|
@ -328,8 +330,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
|
||||||
TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
|
TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
|
||||||
|
@ -455,7 +461,9 @@ TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
|
||||||
|
TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
|
||||||
TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")
|
TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")
|
||||||
|
|
|
@ -2117,7 +2117,7 @@ _mm256_cvtps_pd(__m128 __a)
|
||||||
static __inline __m128i __DEFAULT_FN_ATTRS
|
static __inline __m128i __DEFAULT_FN_ATTRS
|
||||||
_mm256_cvttpd_epi32(__m256d __a)
|
_mm256_cvttpd_epi32(__m256d __a)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_convertvector((__v4df) __a, __v4si);
|
return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m128i __DEFAULT_FN_ATTRS
|
static __inline __m128i __DEFAULT_FN_ATTRS
|
||||||
|
@ -2129,7 +2129,7 @@ _mm256_cvtpd_epi32(__m256d __a)
|
||||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||||
_mm256_cvttps_epi32(__m256 __a)
|
_mm256_cvttps_epi32(__m256 __a)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si);
|
return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline double __DEFAULT_FN_ATTRS
|
static __inline double __DEFAULT_FN_ATTRS
|
||||||
|
|
|
@ -417,8 +417,7 @@ _mm_cvtsd_si32(__m128d __a)
|
||||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||||
_mm_cvtsd_ss(__m128 __a, __m128d __b)
|
_mm_cvtsd_ss(__m128 __a, __m128d __b)
|
||||||
{
|
{
|
||||||
__a[0] = __b[0];
|
return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
|
||||||
return __a;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
static __inline__ __m128d __DEFAULT_FN_ATTRS
|
||||||
|
@ -444,7 +443,7 @@ _mm_cvttpd_epi32(__m128d __a)
|
||||||
static __inline__ int __DEFAULT_FN_ATTRS
|
static __inline__ int __DEFAULT_FN_ATTRS
|
||||||
_mm_cvttsd_si32(__m128d __a)
|
_mm_cvttsd_si32(__m128d __a)
|
||||||
{
|
{
|
||||||
return __a[0];
|
return __builtin_ia32_cvttsd2si((__v2df)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||||
|
@ -1707,7 +1706,7 @@ _mm_cvtsd_si64(__m128d __a)
|
||||||
static __inline__ long long __DEFAULT_FN_ATTRS
|
static __inline__ long long __DEFAULT_FN_ATTRS
|
||||||
_mm_cvttsd_si64(__m128d __a)
|
_mm_cvttsd_si64(__m128d __a)
|
||||||
{
|
{
|
||||||
return __a[0];
|
return __builtin_ia32_cvttsd2si64((__v2df)__a);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1755,7 +1754,7 @@ _mm_cvtps_epi32(__m128 __a)
|
||||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||||
_mm_cvttps_epi32(__m128 __a)
|
_mm_cvttps_epi32(__m128 __a)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si);
|
return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Returns a vector of [4 x i32] where the lowest element is the input
|
/// \brief Returns a vector of [4 x i32] where the lowest element is the input
|
||||||
|
|
|
@ -1350,7 +1350,7 @@ _mm_cvt_ps2pi(__m128 __a)
|
||||||
static __inline__ int __DEFAULT_FN_ATTRS
|
static __inline__ int __DEFAULT_FN_ATTRS
|
||||||
_mm_cvttss_si32(__m128 __a)
|
_mm_cvttss_si32(__m128 __a)
|
||||||
{
|
{
|
||||||
return __a[0];
|
return __builtin_ia32_cvttss2si((__v4sf)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Converts a float value contained in the lower 32 bits of a vector of
|
/// \brief Converts a float value contained in the lower 32 bits of a vector of
|
||||||
|
@ -1386,7 +1386,7 @@ _mm_cvtt_ss2si(__m128 __a)
|
||||||
static __inline__ long long __DEFAULT_FN_ATTRS
|
static __inline__ long long __DEFAULT_FN_ATTRS
|
||||||
_mm_cvttss_si64(__m128 __a)
|
_mm_cvttss_si64(__m128 __a)
|
||||||
{
|
{
|
||||||
return __a[0];
|
return __builtin_ia32_cvttss2si64((__v4sf)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Converts two low-order float values in a 128-bit vector of
|
/// \brief Converts two low-order float values in a 128-bit vector of
|
||||||
|
|
|
@ -286,13 +286,13 @@ __m256d test_mm256_cvtps_pd(__m128 A) {
|
||||||
|
|
||||||
__m128i test_mm256_cvttpd_epi32(__m256d A) {
|
__m128i test_mm256_cvttpd_epi32(__m256d A) {
|
||||||
// CHECK-LABEL: test_mm256_cvttpd_epi32
|
// CHECK-LABEL: test_mm256_cvttpd_epi32
|
||||||
// CHECK: fptosi <4 x double> %{{.*}} to <4 x i32>
|
// CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %{{.*}})
|
||||||
return _mm256_cvttpd_epi32(A);
|
return _mm256_cvttpd_epi32(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
__m256i test_mm256_cvttps_epi32(__m256 A) {
|
__m256i test_mm256_cvttps_epi32(__m256 A) {
|
||||||
// CHECK-LABEL: test_mm256_cvttps_epi32
|
// CHECK-LABEL: test_mm256_cvttps_epi32
|
||||||
// CHECK: fptosi <8 x float> %{{.*}} to <8 x i32>
|
// CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %{{.*}})
|
||||||
return _mm256_cvttps_epi32(A);
|
return _mm256_cvttps_epi32(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -287,12 +287,14 @@ void f0() {
|
||||||
tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
|
tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
|
||||||
tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
|
tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
|
||||||
tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
|
tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
|
||||||
|
tmp_i = __builtin_ia32_cvttss2si(tmp_V4f);
|
||||||
|
|
||||||
tmp_i = __builtin_ia32_rdtsc();
|
tmp_i = __builtin_ia32_rdtsc();
|
||||||
tmp_i = __builtin_ia32_rdtscp(&tmp_Ui);
|
tmp_i = __builtin_ia32_rdtscp(&tmp_Ui);
|
||||||
tmp_LLi = __builtin_ia32_rdpmc(tmp_i);
|
tmp_LLi = __builtin_ia32_rdpmc(tmp_i);
|
||||||
#ifdef USE_64
|
#ifdef USE_64
|
||||||
tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f);
|
tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f);
|
||||||
|
tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f);
|
||||||
#endif
|
#endif
|
||||||
tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
|
tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
|
||||||
(void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
|
(void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
|
||||||
|
@ -328,10 +330,14 @@ void f0() {
|
||||||
tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);
|
tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d);
|
||||||
tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);
|
tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i);
|
||||||
tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);
|
tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d);
|
||||||
|
tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d);
|
||||||
|
tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d);
|
||||||
#ifdef USE_64
|
#ifdef USE_64
|
||||||
tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
|
tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
|
||||||
|
tmp_LLi = __builtin_ia32_cvttsd2si64(tmp_V2d);
|
||||||
#endif
|
#endif
|
||||||
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
|
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
|
||||||
|
tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
|
||||||
(void) __builtin_ia32_clflush(tmp_vCp);
|
(void) __builtin_ia32_clflush(tmp_vCp);
|
||||||
(void) __builtin_ia32_lfence();
|
(void) __builtin_ia32_lfence();
|
||||||
(void) __builtin_ia32_mfence();
|
(void) __builtin_ia32_mfence();
|
||||||
|
@ -410,7 +416,9 @@ void f0() {
|
||||||
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
|
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
|
||||||
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
|
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
|
||||||
tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
|
tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
|
||||||
|
tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
|
||||||
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
|
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
|
||||||
|
tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
|
||||||
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
|
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
|
||||||
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
|
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
|
||||||
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
|
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
|
||||||
|
|
|
@ -295,22 +295,19 @@ long long test_mm_cvtss_si64(__m128 A) {
|
||||||
|
|
||||||
int test_mm_cvtt_ss2si(__m128 A) {
|
int test_mm_cvtt_ss2si(__m128 A) {
|
||||||
// CHECK-LABEL: test_mm_cvtt_ss2si
|
// CHECK-LABEL: test_mm_cvtt_ss2si
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
// CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
|
||||||
// CHECK: fptosi float %{{.*}} to i32
|
|
||||||
return _mm_cvtt_ss2si(A);
|
return _mm_cvtt_ss2si(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm_cvttss_si32(__m128 A) {
|
int test_mm_cvttss_si32(__m128 A) {
|
||||||
// CHECK-LABEL: test_mm_cvttss_si32
|
// CHECK-LABEL: test_mm_cvttss_si32
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
// CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
|
||||||
// CHECK: fptosi float %{{.*}} to i32
|
|
||||||
return _mm_cvttss_si32(A);
|
return _mm_cvttss_si32(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
long long test_mm_cvttss_si64(__m128 A) {
|
long long test_mm_cvttss_si64(__m128 A) {
|
||||||
// CHECK-LABEL: test_mm_cvttss_si64
|
// CHECK-LABEL: test_mm_cvttss_si64
|
||||||
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
|
// CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
|
||||||
// CHECK: fptosi float %{{.*}} to i64
|
|
||||||
return _mm_cvttss_si64(A);
|
return _mm_cvttss_si64(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -507,7 +507,7 @@ long long test_mm_cvtsd_si64(__m128d A) {
|
||||||
|
|
||||||
__m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
|
__m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
|
||||||
// CHECK-LABEL: test_mm_cvtsd_ss
|
// CHECK-LABEL: test_mm_cvtsd_ss
|
||||||
// CHECK: fptrunc double %{{.*}} to float
|
// CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
|
||||||
return _mm_cvtsd_ss(A, B);
|
return _mm_cvtsd_ss(A, B);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -569,21 +569,19 @@ __m128i test_mm_cvttpd_epi32(__m128d A) {
|
||||||
|
|
||||||
__m128i test_mm_cvttps_epi32(__m128 A) {
|
__m128i test_mm_cvttps_epi32(__m128 A) {
|
||||||
// CHECK-LABEL: test_mm_cvttps_epi32
|
// CHECK-LABEL: test_mm_cvttps_epi32
|
||||||
// CHECK: fptosi <4 x float> %{{.*}} to <4 x i32>
|
// CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
|
||||||
return _mm_cvttps_epi32(A);
|
return _mm_cvttps_epi32(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_mm_cvttsd_si32(__m128d A) {
|
int test_mm_cvttsd_si32(__m128d A) {
|
||||||
// CHECK-LABEL: test_mm_cvttsd_si32
|
// CHECK-LABEL: test_mm_cvttsd_si32
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
// CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
|
||||||
// CHECK: fptosi double %{{.*}} to i32
|
|
||||||
return _mm_cvttsd_si32(A);
|
return _mm_cvttsd_si32(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
long long test_mm_cvttsd_si64(__m128d A) {
|
long long test_mm_cvttsd_si64(__m128d A) {
|
||||||
// CHECK-LABEL: test_mm_cvttsd_si64
|
// CHECK-LABEL: test_mm_cvttsd_si64
|
||||||
// CHECK: extractelement <2 x double> %{{.*}}, i32 0
|
// CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
|
||||||
// CHECK: fptosi double %{{.*}} to i64
|
|
||||||
return _mm_cvttsd_si64(A);
|
return _mm_cvttsd_si64(A);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue