diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index c63289e25d5e..3466e9b0358d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -321,6 +321,9 @@ TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2") TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") @@ -598,6 +601,9 @@ TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "nc", "avx2") @@ -1363,6 +1369,8 @@ TARGET_BUILTIN(__builtin_ia32_prorvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl" TARGET_BUILTIN(__builtin_ia32_prorvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_prorvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_prorvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "nc ", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "nc ", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "nc", "avx512bw") @@ -1737,6 +1745,7 @@ TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "nc", "avx51 TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 025b34e809c3..741f36b095dc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9325,6 +9325,57 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, makeArrayRef(Indices, NumElts), "blend"); } + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: { + uint32_t Imm = cast(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) { + Indices[l + i] = l + (Imm & 3); + Imm >>= 2; + } + for (unsigned i = 4; i != 8; ++i) + Indices[l + i] = l + i; + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshuflw"); + } + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: { + uint32_t Imm = cast(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[32]; + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) + Indices[l + i] = l + i; + for (unsigned i = 4; i != 8; ++i) { + Indices[l + i] = l + 4 + (Imm & 3); + Imm >>= 2; + } + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "pshufhw"); + } + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilps: case X86::BI__builtin_ia32_vpermilpd256: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3867af08ccc6..e7fdd2b1e595 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -488,44 +488,13 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) } #define _mm256_shuffle_epi32(a, imm) \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \ - (__v8si)_mm256_undefined_si256(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)) + (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm)) #define _mm256_shufflehi_epi16(a, imm) \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_undefined_si256(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3), \ - 8, 9, 10, 11, \ - 12 + (((imm) >> 0) & 0x3), \ - 12 + (((imm) >> 2) & 0x3), \ - 12 + (((imm) >> 4) & 0x3), \ - 12 + (((imm) >> 6) & 0x3)) + (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm)) #define _mm256_shufflelo_epi16(a, imm) \ - (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_undefined_si256(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4, 5, 6, 7, \ - 8 + (((imm) >> 0) & 0x3), \ - 8 + (((imm) >> 2) & 0x3), \ - 8 + (((imm) >> 4) & 0x3), \ - 8 + (((imm) >> 6) & 0x3), \ - 12, 13, 14, 15) + (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sign_epi8(__m256i __a, __m256i __b) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index aabaabd648d2..0e21a57f3118 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1287,28 +1287,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) #define _mm512_shufflehi_epi16(A, imm) \ - (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_undefined_epi32(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3), \ - 8, 9, 10, 11, \ - 12 + (((imm) >> 0) & 0x3), \ - 12 + (((imm) >> 2) & 0x3), \ - 12 + (((imm) >> 4) & 0x3), \ - 12 + (((imm) >> 6) & 0x3), \ - 16, 17, 18, 19, \ - 20 + (((imm) >> 0) & 0x3), \ - 20 + (((imm) >> 2) & 0x3), \ - 20 + (((imm) >> 4) & 0x3), \ - 20 + (((imm) >> 6) & 0x3), \ - 24, 25, 26, 27, \ - 28 + (((imm) >> 0) & 0x3), \ - 28 + (((imm) >> 2) & 0x3), \ - 28 + (((imm) >> 4) & 0x3), \ - 28 + (((imm) >> 6) & 0x3)) + (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ @@ -1323,28 +1302,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) (__v32hi)_mm512_setzero_si512()) #define _mm512_shufflelo_epi16(A, imm) \ - (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_undefined_epi32(), \ - 0 + (((imm) >> 0) & 0x3), \ - 0 + (((imm) >> 2) & 0x3), \ - 0 + (((imm) >> 4) & 0x3), \ - 0 + (((imm) >> 6) & 0x3), \ - 4, 5, 6, 7, \ - 8 + (((imm) >> 0) & 0x3), \ - 8 + (((imm) >> 2) & 0x3), \ - 8 + (((imm) >> 4) & 0x3), \ - 8 + (((imm) >> 6) & 0x3), \ - 12, 13, 14, 15, \ - 16 + (((imm) >> 0) & 0x3), \ - 16 + (((imm) >> 2) & 0x3), \ - 16 + (((imm) >> 4) & 0x3), \ - 16 + (((imm) >> 6) & 0x3), \ - 20, 21, 22, 23, \ - 24 + (((imm) >> 0) & 0x3), \ - 24 + (((imm) >> 2) & 0x3), \ - 24 + (((imm) >> 4) & 0x3), \ - 24 + (((imm) >> 6) & 0x3), \ - 28, 29, 30, 31) + (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 4ae235e63300..fe730ac27f20 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8849,24 +8849,7 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A) } #define _mm512_shuffle_epi32(A, I) \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - 0 + (((I) >> 0) & 0x3), \ - 0 + (((I) >> 2) & 0x3), \ - 0 + (((I) >> 4) & 0x3), \ - 0 + (((I) >> 6) & 0x3), \ - 4 + (((I) >> 0) & 0x3), \ - 4 + (((I) >> 2) & 0x3), \ - 4 + (((I) >> 4) & 0x3), \ - 4 + (((I) >> 6) & 0x3), \ - 8 + (((I) >> 0) & 0x3), \ - 8 + (((I) >> 2) & 0x3), \ - 8 + (((I) >> 4) & 0x3), \ - 8 + (((I) >> 6) & 0x3), \ - 12 + (((I) >> 0) & 0x3), \ - 12 + (((I) >> 2) & 0x3), \ - 12 + (((I) >> 4) & 0x3), \ - 12 + (((I) >> 6) & 0x3)) + (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) #define _mm512_mask_shuffle_epi32(W, U, A, I) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index ad836b1e6372..8942790d5c5e 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4343,10 +4343,7 @@ _mm_movemask_epi8(__m128i __a) /// 11: assign values from bits [127:96] of \a a. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ - (__v4si)_mm_undefined_si128(), \ - ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3) + (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)) /// Constructs a 128-bit integer vector by shuffling four lower 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate @@ -4376,11 +4373,7 @@ _mm_movemask_epi8(__m128i __a) /// 11: assign values from bits [63:48] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_undefined_si128(), \ - ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \ - 4, 5, 6, 7) + (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)) /// Constructs a 128-bit integer vector by shuffling four upper 16-bit /// elements of a 128-bit integer vector of [8 x i16], using the immediate @@ -4410,13 +4403,7 @@ _mm_movemask_epi8(__m128i __a) /// 11: assign values from bits [127:112] of \a a. \n /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) \ - (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_undefined_si128(), \ - 0, 1, 2, 3, \ - 4 + (((imm) >> 0) & 0x3), \ - 4 + (((imm) >> 2) & 0x3), \ - 4 + (((imm) >> 4) & 0x3), \ - 4 + (((imm) >> 6) & 0x3)) + (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)) /// Unpacks the high-order (index 8-15) values from two 128-bit vectors /// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8]. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4f5405935e20..020d82edf72c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2759,6 +2759,15 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vpermilps256: case X86::BI__builtin_ia32_vpermilpd512: case X86::BI__builtin_ia32_vpermilps512: + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: case X86::BI__builtin_ia32_vcvtps2ph: case X86::BI__builtin_ia32_vcvtps2ph_mask: case X86::BI__builtin_ia32_vcvtps2ph256: diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index 72b4349f6e88..99c304109c6a 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -955,19 +955,19 @@ __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) { __m256i test_mm256_shuffle_epi32(__m256i a) { // CHECK-LABEL: test_mm256_shuffle_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> return _mm256_shuffle_epi32(a, 15); } __m256i test_mm256_shufflehi_epi16(__m256i a) { // CHECK-LABEL: test_mm256_shufflehi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> return _mm256_shufflehi_epi16(a, 107); } __m256i test_mm256_shufflelo_epi16(__m256i a) { // CHECK-LABEL: test_mm256_shufflelo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> return _mm256_shufflelo_epi16(a, 83); } diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c index 07569f53c7ef..6803652a7941 100644 --- a/clang/test/CodeGen/avx512bw-builtins.c +++ b/clang/test/CodeGen/avx512bw-builtins.c @@ -1250,40 +1250,40 @@ __m512i test_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) { __m512i test_mm512_shufflehi_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_shufflehi_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> return _mm512_shufflehi_epi16(__A, 5); } __m512i test_mm512_mask_shufflehi_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_shufflehi_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shufflehi_epi16(__W, __U, __A, 5); } __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_shufflehi_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shufflehi_epi16(__U, __A, 5); } __m512i test_mm512_shufflelo_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_shufflelo_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> return _mm512_shufflelo_epi16(__A, 5); } __m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_shufflelo_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shufflelo_epi16(__W, __U, __A, 5); } __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_shufflelo_epi16 - // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i32> + // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> undef, <32 x i32> // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shufflelo_epi16(__U, __A, 5); } diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 52554ba183ea..dd55c43940f8 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -7234,20 +7234,20 @@ __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { __m512i test_mm512_shuffle_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> return _mm512_shuffle_epi32(__A, 1); } __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_epi32(__U, __A, 1); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 1edc25d84942..c234e3b94d87 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -7598,28 +7598,28 @@ __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shuffle_epi32(__W, __U, __A, 1); } __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shuffle_epi32(__U, __A, 2); } __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_shuffle_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); } __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shuffle_epi32(__U, __A, 2); } diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c index da16abd2a81a..9cc58b650578 100644 --- a/clang/test/CodeGen/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/avx512vlbw-builtins.c @@ -3013,56 +3013,56 @@ __mmask16 test_mm256_movepi16_mask(__m256i __A) { __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask32 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_shufflehi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shufflehi_epi16(__W, __U, __A, 5); } __m128i test_mm_maskz_shufflehi_epi16(__mmask32 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_shufflehi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shufflehi_epi16(__U, __A, 5); } __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask32 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_shufflelo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shufflelo_epi16(__W, __U, __A, 5); } __m128i test_mm_maskz_shufflelo_epi16(__mmask32 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_shufflelo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shufflelo_epi16(__U, __A, 5); } __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_shufflehi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5); } __m256i test_mm256_maskz_shufflehi_epi16(__mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_shufflehi_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shufflehi_epi16(__U, __A, 5); } __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_shufflelo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shufflelo_epi16(__W, __U, __A, 5); } __m256i test_mm256_maskz_shufflelo_epi16(__mmask32 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_shufflelo_epi16 - // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shufflelo_epi16(__U, __A, 5); } diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c index 0031215f7c5a..0d79aabbafb8 100644 --- a/clang/test/CodeGen/sse2-builtins.c +++ b/clang/test/CodeGen/sse2-builtins.c @@ -1116,7 +1116,7 @@ __m128i test_mm_setzero_si128() { __m128i test_mm_shuffle_epi32(__m128i A) { // CHECK-LABEL: test_mm_shuffle_epi32 - // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> zeroinitializer return _mm_shuffle_epi32(A, 0); } @@ -1128,13 +1128,13 @@ __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { __m128i test_mm_shufflehi_epi16(__m128i A) { // CHECK-LABEL: test_mm_shufflehi_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> return _mm_shufflehi_epi16(A, 0); } __m128i test_mm_shufflelo_epi16(__m128i A) { // CHECK-LABEL: test_mm_shufflelo_epi16 - // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> return _mm_shufflelo_epi16(A, 0); }