From 2a383c9273faf83f94878074f8690ad75e8c01f3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 4 Jul 2016 22:18:01 +0000 Subject: [PATCH] [X86] Use undefined instead of setzero in shufflevector based intrinsics when the second source is unused. Rewrite immediate extractions in shuffle intrinsics to be in ((c >> x) & y) form instead of ((c & z) >> x). This way only x varies between each use instead of having to vary x and z. llvm-svn: 274525 --- clang/lib/Headers/avx2intrin.h | 70 +++++++------ clang/lib/Headers/avx512bwintrin.h | 68 ++++++------- clang/lib/Headers/avx512fintrin.h | 134 ++++++++++++------------- clang/lib/Headers/avx512vlintrin.h | 8 +- clang/lib/Headers/avxintrin.h | 73 +++++++------- clang/lib/Headers/emmintrin.h | 25 ++--- clang/lib/Headers/xmmintrin.h | 7 +- clang/test/CodeGen/avx-builtins.c | 22 ++-- clang/test/CodeGen/avx2-builtins.c | 10 +- clang/test/CodeGen/avx512f-builtins.c | 12 +-- clang/test/CodeGen/avx512vl-builtins.c | 16 +-- 11 files changed, 228 insertions(+), 217 deletions(-) diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index f66c0f310056..13bcbef4dbbe 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -497,40 +497,42 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \ - (__v8si)_mm256_setzero_si256(), \ - (imm) & 0x3, ((imm) & 0xc) >> 2, \ - ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ - 4 + (((imm) & 0x03) >> 0), \ - 4 + (((imm) & 0x0c) >> 2), \ - 4 + (((imm) & 0x30) >> 4), \ - 4 + (((imm) & 0xc0) >> 6)); }) + (__v8si)_mm256_undefined_si256(), \ + 0 + (((imm) >> 0) & 0x3), \ + 0 + (((imm) >> 2) & 0x3), \ + 0 + (((imm) >> 4) & 0x3), \ + 0 + (((imm) >> 6) & 0x3), \ + 4 + (((imm) >> 0) & 0x3), \ + 4 + (((imm) >> 2) & 0x3), \ + 4 + (((imm) >> 4) & 0x3), \ + 4 + (((imm) >> 6) & 0x3)); }) #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_setzero_si256(), \ + (__v16hi)_mm256_undefined_si256(), \ 0, 1, 2, 3, \ - 4 + (((imm) & 0x03) >> 0), \ - 4 + (((imm) & 0x0c) >> 2), \ - 4 + (((imm) & 0x30) >> 4), \ - 4 + (((imm) & 0xc0) >> 6), \ + 4 + (((imm) >> 0) & 0x3), \ + 4 + (((imm) >> 2) & 0x3), \ + 4 + (((imm) >> 4) & 0x3), \ + 4 + (((imm) >> 6) & 0x3), \ 8, 9, 10, 11, \ - 12 + (((imm) & 0x03) >> 0), \ - 12 + (((imm) & 0x0c) >> 2), \ - 12 + (((imm) & 0x30) >> 4), \ - 12 + (((imm) & 0xc0) >> 6)); }) + 12 + (((imm) >> 0) & 0x3), \ + 12 + (((imm) >> 2) & 0x3), \ + 12 + (((imm) >> 4) & 0x3), \ + 12 + (((imm) >> 6) & 0x3)); }) #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \ - (__v16hi)_mm256_setzero_si256(), \ - 0 + (((imm) & 0x03) >> 0), \ - 0 + (((imm) & 0x0c) >> 2), \ - 0 + (((imm) & 0x30) >> 4), \ - 0 + (((imm) & 0xc0) >> 6), \ + (__v16hi)_mm256_undefined_si256(), \ + 0 + (((imm) >> 0) & 0x3), \ + 0 + (((imm) >> 2) & 0x3), \ + 0 + (((imm) >> 4) & 0x3), \ + 0 + (((imm) >> 6) & 0x3), \ 4, 5, 6, 7, \ - 8 + (((imm) & 0x03) >> 0), \ - 8 + (((imm) & 0x0c) >> 2), \ - 8 + (((imm) & 0x30) >> 4), \ - 8 + (((imm) & 0xc0) >> 6), \ + 8 + (((imm) >> 0) & 0x3), \ + 8 + (((imm) >> 2) & 0x3), \ + 8 + (((imm) >> 4) & 0x3), \ + 8 + (((imm) >> 6) & 0x3), \ 12, 13, 14, 15); }) static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -940,9 +942,11 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) #define _mm256_permute4x64_pd(V, M) __extension__ ({ \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \ - (__v4df)_mm256_setzero_pd(), \ - (M) & 0x3, ((M) & 0xc) >> 2, \ - ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + (__v4df)_mm256_undefined_pd(), \ + ((M) >> 0) & 0x3, \ + ((M) >> 2) & 0x3, \ + ((M) >> 4) & 0x3, \ + ((M) >> 6) & 0x3); }) static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) @@ -952,16 +956,18 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \ - (__v4di)_mm256_setzero_si256(), \ - (M) & 0x3, ((M) & 0xc) >> 2, \ - ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + (__v4di)_mm256_undefined_si256(), \ + ((M) >> 0) & 0x3, \ + ((M) >> 2) & 0x3, \ + ((M) >> 4) & 0x3, \ + ((M) >> 6) & 0x3); }) #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); }) #define _mm256_extracti128_si256(V, M) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \ - (__v4di)_mm256_setzero_si256(), \ + (__v4di)_mm256_undefined_si256(), \ (((M) & 1) ? 2 : 0), \ (((M) & 1) ? 3 : 1) ); }) diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index f29305aedfa4..1d4e7f174102 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1613,27 +1613,27 @@ _mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A) #define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_setzero_hi(), \ + (__v32hi)_mm512_undefined_epi32(), \ 0, 1, 2, 3, \ - 4 + (((imm) & 0x03) >> 0), \ - 4 + (((imm) & 0x0c) >> 2), \ - 4 + (((imm) & 0x30) >> 4), \ - 4 + (((imm) & 0xc0) >> 6), \ + 4 + (((imm) >> 0) & 0x3), \ + 4 + (((imm) >> 2) & 0x3), \ + 4 + (((imm) >> 4) & 0x3), \ + 4 + (((imm) >> 6) & 0x3), \ 8, 9, 10, 11, \ - 12 + (((imm) & 0x03) >> 0), \ - 12 + (((imm) & 0x0c) >> 2), \ - 12 + (((imm) & 0x30) >> 4), \ - 12 + (((imm) & 0xc0) >> 6), \ + 12 + (((imm) >> 0) & 0x3), \ + 12 + (((imm) >> 2) & 0x3), \ + 12 + (((imm) >> 4) & 0x3), \ + 12 + (((imm) >> 6) & 0x3), \ 16, 17, 18, 19, \ - 20 + (((imm) & 0x03) >> 0), \ - 20 + (((imm) & 0x0c) >> 2), \ - 20 + (((imm) & 0x30) >> 4), \ - 20 + (((imm) & 0xc0) >> 6), \ + 20 + (((imm) >> 0) & 0x3), \ + 20 + (((imm) >> 2) & 0x3), \ + 20 + (((imm) >> 4) & 0x3), \ + 20 + (((imm) >> 6) & 0x3), \ 24, 25, 26, 27, \ - 28 + (((imm) & 0x03) >> 0), \ - 28 + (((imm) & 0x0c) >> 2), \ - 28 + (((imm) & 0x30) >> 4), \ - 28 + (((imm) & 0xc0) >> 6)); }) + 28 + (((imm) >> 0) & 0x3), \ + 28 + (((imm) >> 2) & 0x3), \ + 28 + (((imm) >> 4) & 0x3), \ + 28 + (((imm) >> 6) & 0x3)); }) #define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ @@ -1649,26 +1649,26 @@ _mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A) #define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ - (__v32hi)_mm512_setzero_hi(), \ - 0 + (((imm) & 0x03) >> 0), \ - 0 + (((imm) & 0x0c) >> 2), \ - 0 + (((imm) & 0x30) >> 4), \ - 0 + (((imm) & 0xc0) >> 6), \ + (__v32hi)_mm512_undefined_epi32(), \ + 0 + (((imm) >> 0) & 0x3), \ + 0 + (((imm) >> 2) & 0x3), \ + 0 + (((imm) >> 4) & 0x3), \ + 0 + (((imm) >> 6) & 0x3), \ 4, 5, 6, 7, \ - 8 + (((imm) & 0x03) >> 0), \ - 8 + (((imm) & 0x0c) >> 2), \ - 8 + (((imm) & 0x30) >> 4), \ - 8 + (((imm) & 0xc0) >> 6), \ + 8 + (((imm) >> 0) & 0x3), \ + 8 + (((imm) >> 2) & 0x3), \ + 8 + (((imm) >> 4) & 0x3), \ + 8 + (((imm) >> 6) & 0x3), \ 12, 13, 14, 15, \ - 16 + (((imm) & 0x03) >> 0), \ - 16 + (((imm) & 0x0c) >> 2), \ - 16 + (((imm) & 0x30) >> 4), \ - 16 + (((imm) & 0xc0) >> 6), \ + 16 + (((imm) >> 0) & 0x3), \ + 16 + (((imm) >> 2) & 0x3), \ + 16 + (((imm) >> 4) & 0x3), \ + 16 + (((imm) >> 6) & 0x3), \ 20, 21, 22, 23, \ - 24 + (((imm) & 0x03) >> 0), \ - 24 + (((imm) & 0x0c) >> 2), \ - 24 + (((imm) & 0x30) >> 4), \ - 24 + (((imm) & 0xc0) >> 6), \ + 24 + (((imm) >> 0) & 0x3), \ + 24 + (((imm) >> 2) & 0x3), \ + 24 + (((imm) >> 4) & 0x3), \ + 24 + (((imm) >> 6) & 0x3), \ 28, 29, 30, 31); }) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 4bd5a8d7dbb5..3378b071b10a 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -6542,15 +6542,15 @@ _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, #define _mm512_permute_pd(X, C) __extension__ ({ \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ - (__v8df)_mm512_setzero_pd(), \ - 0 + (((C) & 0x01) >> 0), \ - 0 + (((C) & 0x02) >> 1), \ - 2 + (((C) & 0x04) >> 2), \ - 2 + (((C) & 0x08) >> 3), \ - 4 + (((C) & 0x10) >> 4), \ - 4 + (((C) & 0x20) >> 5), \ - 6 + (((C) & 0x40) >> 6), \ - 6 + (((C) & 0x80) >> 7)); }) + (__v8df)_mm512_undefined_pd(), \ + 0 + (((C) >> 0) & 0x1), \ + 0 + (((C) >> 1) & 0x1), \ + 2 + (((C) >> 2) & 0x1), \ + 2 + (((C) >> 3) & 0x1), \ + 4 + (((C) >> 4) & 0x1), \ + 4 + (((C) >> 5) & 0x1), \ + 6 + (((C) >> 6) & 0x1), \ + 6 + (((C) >> 7) & 0x1)); }) #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -6564,23 +6564,23 @@ _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, #define _mm512_permute_ps(X, C) __extension__ ({ \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ - (__v16sf)_mm512_setzero_ps(), \ - 0 + (((C) & 0x03) >> 0), \ - 0 + (((C) & 0x0c) >> 2), \ - 0 + (((C) & 0x30) >> 4), \ - 0 + (((C) & 0xc0) >> 6), \ - 4 + (((C) & 0x03) >> 0), \ - 4 + (((C) & 0x0c) >> 2), \ - 4 + (((C) & 0x30) >> 4), \ - 4 + (((C) & 0xc0) >> 6), \ - 8 + (((C) & 0x03) >> 0), \ - 8 + (((C) & 0x0c) >> 2), \ - 8 + (((C) & 0x30) >> 4), \ - 8 + (((C) & 0xc0) >> 6), \ - 12 + (((C) & 0x03) >> 0), \ - 12 + (((C) & 0x0c) >> 2), \ - 12 + (((C) & 0x30) >> 4), \ - 12 + (((C) & 0xc0) >> 6)); }) + (__v16sf)_mm512_undefined_ps(), \ + 0 + (((C) >> 0) & 0x3), \ + 0 + (((C) >> 2) & 0x3), \ + 0 + (((C) >> 4) & 0x3), \ + 0 + (((C) >> 6) & 0x3), \ + 4 + (((C) >> 0) & 0x3), \ + 4 + (((C) >> 2) & 0x3), \ + 4 + (((C) >> 4) & 0x3), \ + 4 + (((C) >> 6) & 0x3), \ + 8 + (((C) >> 0) & 0x3), \ + 8 + (((C) >> 2) & 0x3), \ + 8 + (((C) >> 4) & 0x3), \ + 8 + (((C) >> 6) & 0x3), \ + 12 + (((C) >> 0) & 0x3), \ + 12 + (((C) >> 2) & 0x3), \ + 12 + (((C) >> 4) & 0x3), \ + 12 + (((C) >> 6) & 0x3)); }) #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -7170,14 +7170,14 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), \ - (((M) & 0x01) >> 0) + 0, \ - (((M) & 0x02) >> 1) + 8, \ - (((M) & 0x04) >> 2) + 2, \ - (((M) & 0x08) >> 3) + 10, \ - (((M) & 0x10) >> 4) + 4, \ - (((M) & 0x20) >> 5) + 12, \ - (((M) & 0x40) >> 6) + 6, \ - (((M) & 0x80) >> 7) + 14); }) + 0 + (((M) >> 0) & 0x1), \ + 8 + (((M) >> 1) & 0x1), \ + 2 + (((M) >> 2) & 0x1), \ + 10 + (((M) >> 3) & 0x1), \ + 4 + (((M) >> 4) & 0x1), \ + 12 + (((M) >> 5) & 0x1), \ + 6 + (((M) >> 6) & 0x1), \ + 14 + (((M) >> 7) & 0x1)); }) #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -8686,14 +8686,14 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) #define _mm512_permutex_pd(X, C) __extension__ ({ \ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ (__v8df)_mm512_undefined_pd(), \ - 0 + (((C) & 0x03) >> 0), \ - 0 + (((C) & 0x0c) >> 2), \ - 0 + (((C) & 0x30) >> 4), \ - 0 + (((C) & 0xc0) >> 6), \ - 4 + (((C) & 0x03) >> 0), \ - 4 + (((C) & 0x0c) >> 2), \ - 4 + (((C) & 0x30) >> 4), \ - 4 + (((C) & 0xc0) >> 6)); }) + 0 + (((C) >> 0) & 0x3), \ + 0 + (((C) >> 2) & 0x3), \ + 0 + (((C) >> 4) & 0x3), \ + 0 + (((C) >> 6) & 0x3), \ + 4 + (((C) >> 0) & 0x3), \ + 4 + (((C) >> 2) & 0x3), \ + 4 + (((C) >> 4) & 0x3), \ + 4 + (((C) >> 6) & 0x3)); }) #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -8708,14 +8708,14 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) #define _mm512_permutex_epi64(X, C) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \ (__v8di)_mm512_undefined_epi32(), \ - 0 + (((C) & 0x03) >> 0), \ - 0 + (((C) & 0x0c) >> 2), \ - 0 + (((C) & 0x30) >> 4), \ - 0 + (((C) & 0xc0) >> 6), \ - 4 + (((C) & 0x03) >> 0), \ - 4 + (((C) & 0x0c) >> 2), \ - 4 + (((C) & 0x30) >> 4), \ - 4 + (((C) & 0xc0) >> 6)); }) + 0 + (((C) >> 0) & 0x3), \ + 0 + (((C) >> 2) & 0x3), \ + 0 + (((C) >> 4) & 0x3), \ + 0 + (((C) >> 6) & 0x3), \ + 4 + (((C) >> 0) & 0x3), \ + 4 + (((C) >> 2) & 0x3), \ + 4 + (((C) >> 4) & 0x3), \ + 4 + (((C) >> 6) & 0x3)); }) #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ @@ -9069,23 +9069,23 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_setzero_si512(), \ - 0 + (((I) & 0x03) >> 0), \ - 0 + (((I) & 0x0c) >> 2), \ - 0 + (((I) & 0x30) >> 4), \ - 0 + (((I) & 0xc0) >> 6), \ - 4 + (((I) & 0x03) >> 0), \ - 4 + (((I) & 0x0c) >> 2), \ - 4 + (((I) & 0x30) >> 4), \ - 4 + (((I) & 0xc0) >> 6), \ - 8 + (((I) & 0x03) >> 0), \ - 8 + (((I) & 0x0c) >> 2), \ - 8 + (((I) & 0x30) >> 4), \ - 8 + (((I) & 0xc0) >> 6), \ - 12 + (((I) & 0x03) >> 0), \ - 12 + (((I) & 0x0c) >> 2), \ - 12 + (((I) & 0x30) >> 4), \ - 12 + (((I) & 0xc0) >> 6)); }) + (__v16si)_mm512_undefined_epi32(), \ + 0 + (((I) >> 0) & 0x3), \ + 0 + (((I) >> 2) & 0x3), \ + 0 + (((I) >> 4) & 0x3), \ + 0 + (((I) >> 6) & 0x3), \ + 4 + (((I) >> 0) & 0x3), \ + 4 + (((I) >> 2) & 0x3), \ + 4 + (((I) >> 4) & 0x3), \ + 4 + (((I) >> 6) & 0x3), \ + 8 + (((I) >> 0) & 0x3), \ + 8 + (((I) >> 2) & 0x3), \ + 8 + (((I) >> 4) & 0x3), \ + 8 + (((I) >> 6) & 0x3), \ + 12 + (((I) >> 0) & 0x3), \ + 12 + (((I) >> 2) & 0x3), \ + 12 + (((I) >> 4) & 0x3), \ + 12 + (((I) >> 6) & 0x3)); }) #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2e6f9fc83ba4..a84418b4447e 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -8803,8 +8803,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_permutex_pd(X, C) __extension__ ({ \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ (__v4df)_mm256_undefined_pd(), \ - (C) & 0x3, ((C) & 0xc) >> 2, \ - ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -8819,8 +8819,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_permutex_epi64(X, C) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ (__v4di)_mm256_undefined_si256(), \ - (C) & 0x3, ((C) & 0xc) >> 2, \ - ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 45c052363e34..86bfdfb80c79 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -999,8 +999,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_permute_pd(A, C) __extension__ ({ \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \ - (__v2df)_mm_setzero_pd(), \ - (C) & 0x1, ((C) & 0x2) >> 1); }) + (__v2df)_mm_undefined_pd(), \ + ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); }) /// \brief Copies the values in a 256-bit vector of [4 x double] as /// specified by the immediate integer operand. @@ -1040,10 +1040,11 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute_pd(A, C) __extension__ ({ \ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_setzero_pd(), \ - (C) & 0x1, ((C) & 0x2) >> 1, \ - 2 + (((C) & 0x4) >> 2), \ - 2 + (((C) & 0x8) >> 3)); }) + (__v4df)_mm256_undefined_pd(), \ + 0 + (((C) >> 0) & 0x1), \ + 0 + (((C) >> 1) & 0x1), \ + 2 + (((C) >> 2) & 0x1), \ + 2 + (((C) >> 3) & 0x1)); }) /// \brief Copies the values in a 128-bit vector of [4 x float] as /// specified by the immediate integer operand. @@ -1099,9 +1100,9 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_permute_ps(A, C) __extension__ ({ \ (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (C) & 0x3, ((C) & 0xc) >> 2, \ - ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + (__v4sf)_mm_undefined_ps(), \ + ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ + ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) /// \brief Copies the values in a 256-bit vector of [8 x float] as /// specified by the immediate integer operand. @@ -1193,13 +1194,15 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute_ps(A, C) __extension__ ({ \ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (C) & 0x3, ((C) & 0xc) >> 2, \ - ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \ - 4 + (((C) & 0x03) >> 0), \ - 4 + (((C) & 0x0c) >> 2), \ - 4 + (((C) & 0x30) >> 4), \ - 4 + (((C) & 0xc0) >> 6)); }) + (__v8sf)_mm256_undefined_ps(), \ + 0 + (((C) >> 0) & 0x3), \ + 0 + (((C) >> 2) & 0x3), \ + 0 + (((C) >> 4) & 0x3), \ + 0 + (((C) >> 6) & 0x3), \ + 4 + (((C) >> 0) & 0x3), \ + 4 + (((C) >> 2) & 0x3), \ + 4 + (((C) >> 4) & 0x3), \ + 4 + (((C) >> 6) & 0x3)); }) /// \brief Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. @@ -1538,16 +1541,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// 11: Bits [127:96] and [255:224] are copied from the selected operand. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ - (__v8sf)(__m256)(b), \ - (mask) & 0x3, \ - ((mask) & 0xc) >> 2, \ - (((mask) & 0x30) >> 4) + 8, \ - (((mask) & 0xc0) >> 6) + 8, \ - ((mask) & 0x3) + 4, \ - (((mask) & 0xc) >> 2) + 4, \ - (((mask) & 0x30) >> 4) + 12, \ - (((mask) & 0xc0) >> 6) + 12); }) + (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ + (__v8sf)(__m256)(b), \ + 0 + (((mask) >> 0) & 0x3), \ + 0 + (((mask) >> 2) & 0x3), \ + 8 + (((mask) >> 4) & 0x3), \ + 8 + (((mask) >> 6) & 0x3), \ + 4 + (((mask) >> 0) & 0x3), \ + 4 + (((mask) >> 2) & 0x3), \ + 12 + (((mask) >> 4) & 0x3), \ + 12 + (((mask) >> 6) & 0x3)); }) /// \brief Selects four double-precision values from the 256-bit operands of /// [4 x double], as specified by the immediate value operand. The selected @@ -1591,12 +1594,12 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// destination. /// \returns A 256-bit vector of [4 x double] containing the shuffled values. #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ - (__v4df)(__m256d)(b), \ - (mask) & 0x1, \ - (((mask) & 0x2) >> 1) + 4, \ - (((mask) & 0x4) >> 2) + 2, \ - (((mask) & 0x8) >> 3) + 6); }) + (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ + (__v4df)(__m256d)(b), \ + 0 + (((mask) >> 0) & 0x1), \ + 4 + (((mask) >> 1) & 0x1), \ + 2 + (((mask) >> 2) & 0x1), \ + 6 + (((mask) >> 3) & 0x1)); }) /* Compare */ #define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ @@ -2814,7 +2817,7 @@ _mm256_castsi128_si256(__m128i __a) #define _mm256_extractf128_ps(V, M) __extension__ ({ \ (__m128)__builtin_shufflevector( \ (__v8sf)(__m256)(V), \ - (__v8sf)(_mm256_setzero_ps()), \ + (__v8sf)(_mm256_undefined_ps()), \ (((M) & 1) ? 4 : 0), \ (((M) & 1) ? 5 : 1), \ (((M) & 1) ? 6 : 2), \ @@ -2823,14 +2826,14 @@ _mm256_castsi128_si256(__m128i __a) #define _mm256_extractf128_pd(V, M) __extension__ ({ \ (__m128d)__builtin_shufflevector( \ (__v4df)(__m256d)(V), \ - (__v4df)(_mm256_setzero_pd()), \ + (__v4df)(_mm256_undefined_pd()), \ (((M) & 1) ? 2 : 0), \ (((M) & 1) ? 3 : 1) );}) #define _mm256_extractf128_si256(V, M) __extension__ ({ \ (__m128i)__builtin_shufflevector( \ (__v4di)(__m256i)(V), \ - (__v4di)(_mm256_setzero_si256()), \ + (__v4di)(_mm256_undefined_si256()), \ (((M) & 1) ? 2 : 0), \ (((M) & 1) ? 3 : 1) );}) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 22eb02c69119..c78d059f442b 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2300,25 +2300,25 @@ _mm_movemask_epi8(__m128i __a) #define _mm_shuffle_epi32(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ - (__v4si)_mm_setzero_si128(), \ - (imm) & 0x3, ((imm) & 0xc) >> 2, \ - ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) + (__v4si)_mm_undefined_si128(), \ + ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); }) #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_setzero_si128(), \ - (imm) & 0x3, ((imm) & 0xc) >> 2, \ - ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + (__v8hi)_mm_undefined_si128(), \ + ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \ + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \ 4, 5, 6, 7); }) #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ - (__v8hi)_mm_setzero_si128(), \ + (__v8hi)_mm_undefined_si128(), \ 0, 1, 2, 3, \ - 4 + (((imm) & 0x03) >> 0), \ - 4 + (((imm) & 0x0c) >> 2), \ - 4 + (((imm) & 0x30) >> 4), \ - 4 + (((imm) & 0xc0) >> 6)); }) + 4 + (((imm) >> 0) & 0x3), \ + 4 + (((imm) >> 2) & 0x3), \ + 4 + (((imm) >> 4) & 0x3), \ + 4 + (((imm) >> 6) & 0x3)); }) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b) @@ -2406,7 +2406,8 @@ _mm_movemask_pd(__m128d __a) #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ - (i) & 1, (((i) & 2) >> 1) + 2); }) + 0 + (((i) >> 0) & 0x1), \ + 2 + (((i) >> 1) & 0x1)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 8d94181a1814..27967e0d856c 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2496,9 +2496,10 @@ _mm_setcsr(unsigned int __i) /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ - (mask) & 0x3, ((mask) & 0xc) >> 2, \ - (((mask) & 0x30) >> 4) + 4, \ - (((mask) & 0xc0) >> 6) + 4); }) + 0 + (((mask) >> 0) & 0x3), \ + 0 + (((mask) >> 2) & 0x3), \ + 4 + (((mask) >> 4) & 0x3), \ + 4 + (((mask) >> 6) & 0x3)); }) /// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of /// [4 x float] and interleaves them into a 128-bit vector of [4 x diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index f1f211e54d33..bf3e8cc5db60 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -346,19 +346,19 @@ long long test_mm256_extract_epi64(__m256i A) { __m128d test_mm256_extractf128_pd(__m256d A) { // CHECK-LABEL: test_mm256_extractf128_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> return _mm256_extractf128_pd(A, 1); } __m128 test_mm256_extractf128_ps(__m256 A) { // CHECK-LABEL: test_mm256_extractf128_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> return _mm256_extractf128_ps(A, 1); } __m128i test_mm256_extractf128_si256(__m256i A) { // CHECK-LABEL: test_mm256_extractf128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> return _mm256_extractf128_si256(A, 1); } @@ -647,32 +647,32 @@ __m256 test_mm256_or_ps(__m256 A, __m256 B) { __m128d test_mm_permute_pd(__m128d A) { // CHECK-LABEL: test_mm_permute_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> return _mm_permute_pd(A, 1); } __m256d test_mm256_permute_pd(__m256d A) { // CHECK-LABEL: test_mm256_permute_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> return _mm256_permute_pd(A, 5); } __m128 test_mm_permute_ps(__m128 A) { // CHECK-LABEL: test_mm_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> return _mm_permute_ps(A, 0x1b); } // Test case for PR12401 __m128 test2_mm_permute_ps(__m128 a) { // CHECK-LABEL: test2_mm_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> return _mm_permute_ps(a, 0xe6); } __m256 test_mm256_permute_ps(__m256 A) { // CHECK-LABEL: test_mm256_permute_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> return _mm256_permute_ps(A, 0x1b); } @@ -1177,7 +1177,7 @@ void test_mm256_storeu2_m128(float* A, float* B, __m256 C) { // CHECK-LABEL: test_mm256_storeu2_m128 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128(A, B, C); } @@ -1186,7 +1186,7 @@ void test_mm256_storeu2_m128d(double* A, double* B, __m256d C) { // CHECK-LABEL: test_mm256_storeu2_m128d // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128d(A, B, C); } @@ -1195,7 +1195,7 @@ void test_mm256_storeu2_m128i(__m128i* A, __m128i* B, __m256i C) { // CHECK-LABEL: test_mm256_storeu2_m128i // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128i(A, B, C); } diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index db3c40a3218d..b0deb47e6ec7 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -370,20 +370,20 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) { __m128i test0_mm256_extracti128_si256_0(__m256i a) { // CHECK-LABEL: test0_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> return _mm256_extracti128_si256(a, 0); } __m128i test1_mm256_extracti128_si256_1(__m256i a) { // CHECK-LABEL: test1_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> return _mm256_extracti128_si256(a, 1); } // Immediate should be truncated to one bit. __m128i test2_mm256_extracti128_si256(__m256i a) { // CHECK-LABEL: test2_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> return _mm256_extracti128_si256(a, 2); } @@ -891,13 +891,13 @@ __m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) { __m256i test_mm256_permute4x64_epi64(__m256i a) { // CHECK-LABEL: test_mm256_permute4x64_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> return _mm256_permute4x64_epi64(a, 35); } __m256d test_mm256_permute4x64_pd(__m256d a) { // CHECK-LABEL: test_mm256_permute4x64_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> return _mm256_permute4x64_pd(a, 25); } diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index a475e0eaaad8..830b702f930c 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -3409,40 +3409,40 @@ __m512i test_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 _ __m512d test_mm512_permute_pd(__m512d __X) { // CHECK-LABEL: @test_mm512_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> return _mm512_permute_pd(__X, 2); } __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) { // CHECK-LABEL: @test_mm512_mask_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_permute_pd(__W, __U, __X, 2); } __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) { // CHECK-LABEL: @test_mm512_maskz_permute_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_permute_pd(__U, __X, 2); } __m512 test_mm512_permute_ps(__m512 __X) { // CHECK-LABEL: @test_mm512_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> return _mm512_permute_ps(__X, 2); } __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) { // CHECK-LABEL: @test_mm512_mask_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_permute_ps(__W, __U, __X, 2); } __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) { // CHECK-LABEL: @test_mm512_maskz_permute_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_permute_ps(__U, __X, 2); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index b4024ba8c64f..f27849a15c01 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -4615,56 +4615,56 @@ __m256 test_mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A) { __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) { // CHECK-LABEL: @test_mm_mask_permute_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_permute_pd(__W, __U, __X, 1); } __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) { // CHECK-LABEL: @test_mm_maskz_permute_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_permute_pd(__U, __X, 1); } __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) { // CHECK-LABEL: @test_mm256_mask_permute_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_permute_pd(__W, __U, __X, 5); } __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) { // CHECK-LABEL: @test_mm256_maskz_permute_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_permute_pd(__U, __X, 5); } __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) { // CHECK-LABEL: @test_mm_mask_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_permute_ps(__W, __U, __X, 0x1b); } __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) { // CHECK-LABEL: @test_mm_maskz_permute_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_permute_ps(__U, __X, 0x1b); } __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) { // CHECK-LABEL: @test_mm256_mask_permute_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); } __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) { // CHECK-LABEL: @test_mm256_maskz_permute_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_permute_ps(__U, __X, 0x1b); }