From 0abaf64580921e31983e355972b91c83fd7521f2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 12:35:36 +0000 Subject: [PATCH] Revert rG4727d29d908f9dd608dd97a58c0af1ad579fd3ca "[X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs" Some build bots are referencing the `__builtin_ia32_pabs` intrinsics via alternative headers --- clang/include/clang/Basic/BuiltinsX86.def | 12 ++++++++++++ clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++++++++++++ clang/lib/Headers/avx2intrin.h | 6 +++--- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/avx512fintrin.h | 8 ++------ clang/lib/Headers/avx512vlintrin.h | 4 ++-- clang/lib/Headers/tmmintrin.h | 6 +++--- clang/test/CodeGen/builtins-x86.c | 3 +++ 8 files changed, 42 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 9b7c763b0c6c..bc6208be4560 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -296,6 +296,9 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") @@ -555,6 +558,9 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") @@ -921,6 +927,8 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") @@ -1037,6 +1045,8 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") @@ -1188,6 +1198,8 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 49f054ec1a98..a49c03500278 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,6 +14285,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } + case X86::BI__builtin_ia32_pabsb128: + case X86::BI__builtin_ia32_pabsw128: + case X86::BI__builtin_ia32_pabsd128: + case X86::BI__builtin_ia32_pabsb256: + case X86::BI__builtin_ia32_pabsw256: + case X86::BI__builtin_ia32_pabsd256: + case X86::BI__builtin_ia32_pabsq128: + case X86::BI__builtin_ia32_pabsq256: + case X86::BI__builtin_ia32_pabsb512: + case X86::BI__builtin_ia32_pabsw512: + case X86::BI__builtin_ia32_pabsd512: + case X86::BI__builtin_ia32_pabsq512: { + Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c9ad74ce3fa4..5064c87c2bb1 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -26,19 +26,19 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v32qs)__a); + return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v16hi)__a); + return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v8si)__a); + return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 53319eb23011..6aee8aed8487 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v64qs)__A); + return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v32hi)__A); + return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9b02a7cffc64..df298640523b 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -26,10 +26,6 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); -/* We need an explicitly signed variant for char. Note that this shouldn't - * appear in the interface though. */ -typedef signed char __v64qs __attribute__((__vector_size__(64))); - typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); @@ -1850,7 +1846,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v8di)__A); + return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1872,7 +1868,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v16si) __A); + return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index eddb99902e3d..0519dba59081 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { - return (__m128i)__builtin_elementwise_abs((__v2di)__A); + return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { - return (__m256i)__builtin_elementwise_abs((__v4di)__A); + return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index cb9be2349de5..bcffa8187801 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v16qs)__a); + return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v8hi)__a); + return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v4si)__a); + return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index bfcd30072fc1..61b9d53c74f9 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -259,8 +259,11 @@ void f0() { tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s); tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i); tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i); + tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c); tmp_V8c = __builtin_ia32_pabsb(tmp_V8c); + tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s); tmp_V4s = __builtin_ia32_pabsw(tmp_V4s); + tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i); tmp_V2i = __builtin_ia32_pabsd(tmp_V2i); tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi); tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);