forked from OSchip/llvm-project
[AVX-512] Remove masked vector extract builtins and replace with native shufflevectors and selects.
Unfortunately, the backend currently doesn't fold masks into the instructions correctly when they come from these shufflevectors. I'll work on that in a future commit. llvm-svn: 285540
This commit is contained in:
parent
66b2fd1209
commit
93ffabd28d
|
@ -1004,8 +1004,6 @@ TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl")
|
|||
TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUci","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUci","","avx512vl")
|
||||
|
@ -1727,16 +1725,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc","","avx512vl")
|
|||
TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512dq,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq")
|
||||
TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq")
|
||||
|
|
|
@ -1116,70 +1116,80 @@ _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
|
|||
}
|
||||
|
||||
#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
|
||||
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
|
||||
(__v8sf)_mm256_setzero_ps(), \
|
||||
(__mmask8)-1); })
|
||||
(__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_undefined_ps(), \
|
||||
((imm) & 1) ? 8 : 0, \
|
||||
((imm) & 1) ? 9 : 1, \
|
||||
((imm) & 1) ? 10 : 2, \
|
||||
((imm) & 1) ? 11 : 3, \
|
||||
((imm) & 1) ? 12 : 4, \
|
||||
((imm) & 1) ? 13 : 5, \
|
||||
((imm) & 1) ? 14 : 6, \
|
||||
((imm) & 1) ? 15 : 7); })
|
||||
|
||||
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
|
||||
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
|
||||
(__v8sf)(__m256)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
|
||||
(__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
|
||||
(__v8sf)(W)); })
|
||||
|
||||
#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
|
||||
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
|
||||
(__v8sf)_mm256_setzero_ps(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
|
||||
(__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
|
||||
(__v8sf)_mm256_setzero_ps()); })
|
||||
|
||||
#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_undefined_pd(), \
|
||||
0 + ((imm) & 0x3) * 2, \
|
||||
1 + ((imm) & 0x3) * 2); })
|
||||
|
||||
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)(__m128d)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
|
||||
(__v2df)_mm512_extractf64x2_pd((A), (imm)), \
|
||||
(__v2df)(W)); })
|
||||
|
||||
#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
|
||||
(__v2df)_mm512_extractf64x2_pd((A), (imm)), \
|
||||
(__v2df)_mm_setzero_pd()); })
|
||||
|
||||
#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v8si)_mm256_setzero_si256(), \
|
||||
(__mmask8)-1); })
|
||||
(__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
|
||||
(__v16si)_mm512_undefined_epi32(), \
|
||||
((imm) & 1) ? 8 : 0, \
|
||||
((imm) & 1) ? 9 : 1, \
|
||||
((imm) & 1) ? 10 : 2, \
|
||||
((imm) & 1) ? 11 : 3, \
|
||||
((imm) & 1) ? 12 : 4, \
|
||||
((imm) & 1) ? 13 : 5, \
|
||||
((imm) & 1) ? 14 : 6, \
|
||||
((imm) & 1) ? 15 : 7); })
|
||||
|
||||
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v8si)(__m256i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
|
||||
(__v8si)(W)); })
|
||||
|
||||
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v8si)_mm256_setzero_si256(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
|
||||
(__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
|
||||
(__v8si)_mm256_setzero_si256()); })
|
||||
|
||||
#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)_mm_setzero_di(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
|
||||
(__v8di)_mm512_undefined_epi32(), \
|
||||
0 + ((imm) & 0x3) * 2, \
|
||||
1 + ((imm) & 0x3) * 2); })
|
||||
|
||||
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)(__m128i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
|
||||
(__v2di)(W)); })
|
||||
|
||||
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)_mm_setzero_di(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
|
||||
(__v2di)_mm_setzero_di()); })
|
||||
|
||||
#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
|
||||
(__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
|
||||
|
|
|
@ -3440,35 +3440,42 @@ _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
|
|||
(__mmask16)(U)); })
|
||||
/* Vector Extract */
|
||||
|
||||
#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
|
||||
(__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
|
||||
(__v4df)_mm256_setzero_si256(), \
|
||||
(__mmask8)-1); })
|
||||
#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
|
||||
(__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_undefined_pd(), \
|
||||
((I) & 1) ? 4 : 0, \
|
||||
((I) & 1) ? 5 : 1, \
|
||||
((I) & 1) ? 6 : 2, \
|
||||
((I) & 1) ? 7 : 3); })
|
||||
|
||||
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
|
||||
(__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
|
||||
(__v4df)(__m256d)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
|
||||
(__v4df)_mm512_extractf64x4_pd((A), (imm)), \
|
||||
(__v4df)(W)); })
|
||||
|
||||
#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
|
||||
(__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
|
||||
(__v4df)_mm256_setzero_pd(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
|
||||
(__v4df)_mm512_extractf64x4_pd((A), (imm)), \
|
||||
(__v4df)_mm256_setzero_pd()); })
|
||||
|
||||
#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
|
||||
(__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1); })
|
||||
#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
|
||||
(__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_undefined_ps(), \
|
||||
0 + ((I) & 0x3) * 4, \
|
||||
1 + ((I) & 0x3) * 4, \
|
||||
2 + ((I) & 0x3) * 4, \
|
||||
3 + ((I) & 0x3) * 4); })
|
||||
|
||||
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
|
||||
(__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
|
||||
(__v4sf)(__m128)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
|
||||
(__v4sf)(W)); })
|
||||
|
||||
#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
|
||||
(__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
|
||||
(__v4sf)_mm_setzero_ps()); })
|
||||
|
||||
/* Vector Blend */
|
||||
|
||||
static __inline __m512d __DEFAULT_FN_ATTRS
|
||||
|
@ -7895,35 +7902,41 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
|
|||
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
|
||||
}
|
||||
|
||||
#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v4si)_mm_undefined_si128(), \
|
||||
(__mmask8)-1); })
|
||||
#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
|
||||
(__v16si)_mm512_undefined_epi32(), \
|
||||
0 + ((imm) & 0x3) * 4, \
|
||||
1 + ((imm) & 0x3) * 4, \
|
||||
2 + ((imm) & 0x3) * 4, \
|
||||
3 + ((imm) & 0x3) * 4); })
|
||||
|
||||
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v4si)(__m128i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
|
||||
(__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
|
||||
(__v4si)__W); })
|
||||
|
||||
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
|
||||
(__v4si)_mm_setzero_si128(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
|
||||
(__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
|
||||
(__v4si)_mm_setzero_si128()); })
|
||||
|
||||
#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
|
||||
(__v4di)_mm256_undefined_si256(), \
|
||||
(__mmask8)-1); })
|
||||
#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
|
||||
(__v8di)_mm512_undefined_epi32(), \
|
||||
((imm) & 1) ? 4 : 0, \
|
||||
((imm) & 1) ? 5 : 1, \
|
||||
((imm) & 1) ? 6 : 2, \
|
||||
((imm) & 1) ? 7 : 3); })
|
||||
|
||||
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
|
||||
(__v4di)(__m256i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
|
||||
(__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
|
||||
(__v4di)__W); })
|
||||
|
||||
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
|
||||
(__v4di)_mm256_setzero_si256(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
|
||||
(__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
|
||||
(__v4di)_mm256_setzero_si256()); })
|
||||
|
||||
#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
|
||||
(__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
|
||||
|
|
|
@ -1096,40 +1096,36 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
|
|||
}
|
||||
|
||||
#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
|
||||
(__v4df)_mm256_undefined_pd(), \
|
||||
((imm) & 1) ? 2 : 0, \
|
||||
((imm) & 1) ? 3 : 1); })
|
||||
|
||||
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)(__m128d)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
|
||||
(__v2df)_mm256_extractf64x2_pd((A), (imm)), \
|
||||
(__v2df)(W)); })
|
||||
|
||||
#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
|
||||
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
|
||||
(int)(imm), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
|
||||
(__v2df)_mm256_extractf64x2_pd((A), (imm)), \
|
||||
(__v2df)_mm_setzero_pd()); })
|
||||
|
||||
#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)_mm_setzero_di(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
|
||||
(__v4di)_mm256_undefined_si256(), \
|
||||
((imm) & 1) ? 2 : 0, \
|
||||
((imm) & 1) ? 3 : 1); })
|
||||
|
||||
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)(__m128i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
|
||||
(__v2di)(W)); })
|
||||
|
||||
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v2di)_mm_setzero_di(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
|
||||
(__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
|
||||
(__v2di)_mm_setzero_di()); })
|
||||
|
||||
#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
|
||||
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
|
||||
|
|
|
@ -8273,40 +8273,40 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
|
|||
}
|
||||
|
||||
#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
|
||||
(__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
|
||||
(int)(imm), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
|
||||
(__v8sf)_mm256_undefined_ps(), \
|
||||
((imm) & 1) ? 4 : 0, \
|
||||
((imm) & 1) ? 5 : 1, \
|
||||
((imm) & 1) ? 6 : 2, \
|
||||
((imm) & 1) ? 7 : 3); })
|
||||
|
||||
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
|
||||
(__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
|
||||
(int)(imm), \
|
||||
(__v4sf)(__m128)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
|
||||
(__v4sf)(W)); })
|
||||
|
||||
#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
|
||||
(__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
|
||||
(int)(imm), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
|
||||
(__v4sf)_mm_setzero_ps()); })
|
||||
|
||||
#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v4si)_mm_setzero_si128(), \
|
||||
(__mmask8)-1); })
|
||||
(__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
|
||||
(__v8si)_mm256_undefined_si256(), \
|
||||
((imm) & 1) ? 4 : 0, \
|
||||
((imm) & 1) ? 5 : 1, \
|
||||
((imm) & 1) ? 6 : 2, \
|
||||
((imm) & 1) ? 7 : 3); })
|
||||
|
||||
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v4si)(__m128i)(W), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
|
||||
(__v4si)(W)); })
|
||||
|
||||
#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
|
||||
(int)(imm), \
|
||||
(__v4si)_mm_setzero_si128(), \
|
||||
(__mmask8)(U)); })
|
||||
(__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
|
||||
(__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
|
||||
(__v4si)_mm_setzero_si128()); })
|
||||
|
||||
#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
|
||||
(__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
|
||||
|
|
|
@ -1971,21 +1971,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
|||
switch (BuiltinID) {
|
||||
default:
|
||||
return false;
|
||||
case X86::BI__builtin_ia32_extractf64x4_mask:
|
||||
case X86::BI__builtin_ia32_extracti64x4_mask:
|
||||
case X86::BI__builtin_ia32_extractf32x8_mask:
|
||||
case X86::BI__builtin_ia32_extracti32x8_mask:
|
||||
case X86::BI__builtin_ia32_extractf64x2_256_mask:
|
||||
case X86::BI__builtin_ia32_extracti64x2_256_mask:
|
||||
case X86::BI__builtin_ia32_extractf32x4_256_mask:
|
||||
case X86::BI__builtin_ia32_extracti32x4_256_mask:
|
||||
i = 1; l = 0; u = 1;
|
||||
break;
|
||||
case X86::BI_mm_prefetch:
|
||||
case X86::BI__builtin_ia32_extractf32x4_mask:
|
||||
case X86::BI__builtin_ia32_extracti32x4_mask:
|
||||
case X86::BI__builtin_ia32_extractf64x2_512_mask:
|
||||
case X86::BI__builtin_ia32_extracti64x2_512_mask:
|
||||
i = 1; l = 0; u = 3;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_insertf32x8_mask:
|
||||
|
|
|
@ -1054,73 +1054,81 @@ __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
|
|||
}
|
||||
__m256 test_mm512_extractf32x8_ps(__m512 __A) {
|
||||
// CHECK-LABEL: @test_mm512_extractf32x8_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x8
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
return _mm512_extractf32x8_ps(__A, 1);
|
||||
}
|
||||
|
||||
__m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x8
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
|
||||
return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x8
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
|
||||
return _mm512_maskz_extractf32x8_ps(__U, __A, 1);
|
||||
}
|
||||
|
||||
__m128d test_mm512_extractf64x2_pd(__m512d __A) {
|
||||
// CHECK-LABEL: @test_mm512_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <8 x double> %0, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
return _mm512_extractf64x2_pd(__A, 3);
|
||||
}
|
||||
|
||||
__m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
|
||||
return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3);
|
||||
}
|
||||
|
||||
__m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
|
||||
return _mm512_maskz_extractf64x2_pd(__U, __A, 3);
|
||||
}
|
||||
|
||||
__m256i test_mm512_extracti32x8_epi32(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_extracti32x8_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x8
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
return _mm512_extracti32x8_epi32(__A, 1);
|
||||
}
|
||||
|
||||
__m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x8
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
|
||||
return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x8
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
|
||||
return _mm512_maskz_extracti32x8_epi32(__U, __A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm512_extracti64x2_epi64(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
|
||||
return _mm512_extracti64x2_epi64(__A, 3);
|
||||
}
|
||||
|
||||
__m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
|
||||
return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3);
|
||||
}
|
||||
|
||||
__m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
|
||||
return _mm512_maskz_extracti64x2_epi64(__U, __A, 3);
|
||||
}
|
||||
|
||||
|
|
|
@ -1236,38 +1236,42 @@ __mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) {
|
|||
__m256d test_mm512_extractf64x4_pd(__m512d a)
|
||||
{
|
||||
// CHECK-LABEL: @test_mm512_extractf64x4_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x4.512
|
||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
return _mm512_extractf64x4_pd(a, 1);
|
||||
}
|
||||
|
||||
__m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
|
||||
//CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
|
||||
//CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
|
||||
return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
|
||||
// CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
|
||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
|
||||
return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
|
||||
//CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
|
||||
//CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
|
||||
return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
|
||||
// CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
|
||||
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
|
||||
return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128 test_mm512_extractf32x4_ps(__m512 a)
|
||||
{
|
||||
// CHECK-LABEL: @test_mm512_extractf32x4_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x4.512
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
return _mm512_extractf32x4_ps(a, 1);
|
||||
}
|
||||
|
||||
__m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512d __A){
|
||||
//CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
|
||||
//CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
|
||||
return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
|
||||
// CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512d __A){
|
||||
//CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
|
||||
//CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
|
||||
// CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
|
||||
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm512_maskz_extractf32x4_ps( __U, __A, 1);
|
||||
}
|
||||
|
||||
|
@ -5097,37 +5101,41 @@ void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
|
|||
|
||||
__m128i test_mm512_extracti32x4_epi32(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||
return _mm512_extracti32x4_epi32(__A, 3);
|
||||
}
|
||||
|
||||
__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
|
||||
return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3);
|
||||
}
|
||||
|
||||
__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
|
||||
return _mm512_maskz_extracti32x4_epi32(__U, __A, 3);
|
||||
}
|
||||
|
||||
__m256i test_mm512_extracti64x4_epi64(__m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_extracti64x4_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x4
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
return _mm512_extracti64x4_epi64(__A, 1);
|
||||
}
|
||||
|
||||
__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x4
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
|
||||
return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x4
|
||||
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
|
||||
return _mm512_maskz_extracti64x4_epi64(__U, __A, 1);
|
||||
}
|
||||
|
||||
|
|
|
@ -6555,37 +6555,41 @@ void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A
|
|||
|
||||
__m128 test_mm256_extractf32x4_ps(__m256 __A) {
|
||||
// CHECK-LABEL: @test_mm256_extractf32x4_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x4
|
||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
return _mm256_extractf32x4_ps(__A, 1);
|
||||
}
|
||||
|
||||
__m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
|
||||
// CHECK-LABEL: @test_mm256_mask_extractf32x4_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x4
|
||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf32x4
|
||||
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm256_maskz_extractf32x4_ps(__U, __A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_extracti32x4_epi32(__m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
return _mm256_extracti32x4_epi32(__A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti32x4
|
||||
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
|
||||
return _mm256_maskz_extracti32x4_epi32(__U, __A, 1);
|
||||
}
|
||||
|
||||
|
|
|
@ -992,37 +992,41 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
|
|||
|
||||
__m128d test_mm256_extractf64x2_pd(__m256d __A) {
|
||||
// CHECK-LABEL: @test_mm256_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
return _mm256_extractf64x2_pd(__A, 1);
|
||||
}
|
||||
|
||||
__m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
|
||||
// CHECK-LABEL: @test_mm256_mask_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
|
||||
return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd
|
||||
// CHECK: @llvm.x86.avx512.mask.vextractf64x2
|
||||
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
|
||||
return _mm256_maskz_extractf64x2_pd(__U, __A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_extracti64x2_epi64(__m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
return _mm256_extracti64x2_epi64(__A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
|
||||
return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1);
|
||||
}
|
||||
|
||||
__m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64
|
||||
// CHECK: @llvm.x86.avx512.mask.vextracti64x2
|
||||
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
|
||||
return _mm256_maskz_extracti64x2_epi64(__U, __A, 1);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue