[X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i|d)/_m256(i|d/_m128(i|d) first.

The majority of the cases were correct. This fixes the few that weren't.

I also removed some superfluous parentheses in non-macros that confused by attempts at grepping for missing casts.

llvm-svn: 333615
This commit is contained in:
Craig Topper 2018-05-31 01:24:40 +00:00
parent 31b373963f
commit cbf3929bc9
7 changed files with 104 additions and 104 deletions

View File

@ -55,8 +55,8 @@
/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
/// \returns The 128-bit integer vector containing the result of the carry-less
/// multiplication of the selected 64-bit values.
#define _mm_clmulepi64_si128(__X, __Y, __I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
(__v2di)(__m128i)(__Y), (char)(__I)))
#define _mm_clmulepi64_si128(X, Y, I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (char)(I)))
#endif /* __WMMINTRIN_PCLMUL_H */

View File

@ -1119,7 +1119,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
(__v8sf)(W))
(__v8sf)(__m256)(W))
#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
@ -1135,7 +1135,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
(__v2df)_mm512_extractf64x2_pd((A), (imm)), \
(__v2df)(W))
(__v2df)(__m128d)(W))
#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
@ -1157,7 +1157,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
(__v8si)(W))
(__v8si)(__m256i)(W))
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
@ -1173,7 +1173,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
(__v2di)(W))
(__v2di)(__m128i)(W))
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
(__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
@ -1203,7 +1203,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
(__v16sf)(W))
(__v16sf)(__m512)(W))
#define _mm512_maskz_insertf32x8(U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
@ -1225,7 +1225,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_insertf64x2((A), (B), (imm)), \
(__v8df)(W))
(__v8df)(__m512d)(W))
#define _mm512_maskz_insertf64x2(U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@ -1255,7 +1255,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_inserti32x8((A), (B), (imm)), \
(__v16si)(W))
(__v16si)(__m512i)(W))
#define _mm512_maskz_inserti32x8(U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
@ -1277,7 +1277,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_inserti64x2((A), (B), (imm)), \
(__v8di)(W))
(__v8di)(__m512i)(W))
#define _mm512_maskz_inserti64x2(U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \

View File

@ -679,13 +679,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_si512 (__m512i __A, __m512i __B)
{
return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
return (__m512i)(~(__v8du)__A & (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_epi32 (__m512i __A, __m512i __B)
{
return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
return (__m512i)(~(__v16su)__A & (__v16su)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@ -706,7 +706,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_andnot_epi64(__m512i __A, __m512i __B)
{
return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
return (__m512i)(~(__v8du)__A & (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@ -3622,7 +3622,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm512_extractf64x4_pd((A), (imm)), \
(__v4df)(W))
(__v4df)(__m256d)(W))
#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
@ -3640,7 +3640,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
(__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
(__v4sf)(W))
(__v4sf)(__m128)(W))
#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
@ -4337,7 +4337,7 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
(__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
(__v8si)(W), \
(__v8si)(__m256i)(W), \
(__mmask8)(U), (int)(R))
#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
@ -7736,7 +7736,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
(__v4si)(W))
(__v4si)(__m128i)(W))
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
@ -7754,7 +7754,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
(__v4di)(W))
(__v4di)(__m256i)(W))
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
@ -7776,7 +7776,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_insertf64x4((A), (B), (imm)), \
(__v8df)(W))
(__v8df)(__m512d)(W))
#define _mm512_maskz_insertf64x4(U, A, B, imm) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@ -7798,7 +7798,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_inserti64x4((A), (B), (imm)), \
(__v8di)(W))
(__v8di)(__m512i)(W))
#define _mm512_maskz_inserti64x4(U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
@ -7828,7 +7828,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
(__v16sf)(W))
(__v16sf)(__m512)(W))
#define _mm512_maskz_insertf32x4(U, A, B, imm) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
@ -7858,7 +7858,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_inserti32x4((A), (B), (imm)), \
(__v16si)(W))
(__v16si)(__m512i)(W))
#define _mm512_maskz_inserti32x4(U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
@ -9386,19 +9386,19 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
(__v2df)(__B),
(__v4sf)(__W),
(__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
(__v2df)__B,
(__v4sf)__W,
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
{
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
(__v2df)(__B),
return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
(__v2df)__B,
(__v4sf)_mm_setzero_ps(),
(__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
#define _mm_cvtss_i32 _mm_cvtss_si32
@ -9459,19 +9459,19 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
{
return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
(__v4sf)(__B),
(__v2df)(__W),
(__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
(__v4sf)__B,
(__v2df)__W,
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
{
return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
(__v4sf)(__B),
(__v2df)_mm_setzero_pd(),
(__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
(__v4sf)__B,
(__v2df)_mm_setzero_pd(),
(__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m128d __DEFAULT_FN_ATTRS

View File

@ -143,10 +143,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
}
#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
(__v8di)(B), \
(__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
(int)(I), \
(__v8di)(S), \
(__v8di)(__m512i)(S), \
(__mmask8)(U))
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
@ -156,10 +156,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
_mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
(__v16si)(B), \
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), \
(int)(I), \
(__v16si)(S), \
(__v16si)(__m512i)(S), \
(__mmask16)(U))
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
@ -169,10 +169,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
_mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
(__v32hi)(B), \
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \
(__v32hi)(__m512i)(B), \
(int)(I), \
(__v32hi)(S), \
(__v32hi)(__m512i)(S), \
(__mmask32)(U))
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
@ -182,10 +182,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
_mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
(__v8di)(B), \
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
(int)(I), \
(__v8di)(S), \
(__v8di)(__m512i)(S), \
(__mmask8)(U))
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
@ -195,10 +195,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
_mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
(__v16si)(B), \
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), \
(int)(I), \
(__v16si)(S), \
(__v16si)(__m512i)(S), \
(__mmask16)(U))
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
@ -208,10 +208,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
_mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
(__v32hi)(B), \
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \
(__v32hi)(__m512i)(B), \
(int)(I), \
(__v32hi)(S), \
(__v32hi)(__m512i)(S), \
(__mmask32)(U))
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \

View File

@ -1094,7 +1094,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
(__v2df)_mm256_extractf64x2_pd((A), (imm)), \
(__v2df)(W))
(__v2df)(__m128d)(W))
#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
@ -1110,7 +1110,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
(__v2di)(W))
(__v2di)(__m128i)(W))
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
@ -1118,7 +1118,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v2di)_mm_setzero_si128())
#define _mm256_insertf64x2(A, B, imm) \
(__m256d)__builtin_shufflevector((__v4df)(A), \
(__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
(__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
((imm) & 0x1) ? 0 : 4, \
((imm) & 0x1) ? 1 : 5, \
@ -1128,7 +1128,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_insertf64x2((A), (B), (imm)), \
(__v4df)(W))
(__v4df)(__m256d)(W))
#define _mm256_maskz_insertf64x2(U, A, B, imm) \
(__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
@ -1136,7 +1136,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
(__v4df)_mm256_setzero_pd())
#define _mm256_inserti64x2(A, B, imm) \
(__m256i)__builtin_shufflevector((__v4di)(A), \
(__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
(__v4di)_mm256_castsi128_si256((__m128i)(B)), \
((imm) & 0x1) ? 0 : 4, \
((imm) & 0x1) ? 1 : 5, \
@ -1146,7 +1146,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_inserti64x2((A), (B), (imm)), \
(__v4di)(W))
(__v4di)(__m256i)(W))
#define _mm256_maskz_inserti64x2(U, A, B, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \

View File

@ -7729,7 +7729,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
(__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
(__v4sf)(W))
(__v4sf)(__m128)(W))
#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
(__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
@ -7747,7 +7747,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
(__v4si)(W))
(__v4si)(__m128i)(W))
#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
@ -7755,7 +7755,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v4si)_mm_setzero_si128())
#define _mm256_insertf32x4(A, B, imm) \
(__m256)__builtin_shufflevector((__v8sf)(A), \
(__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
(__v8sf)_mm256_castps128_ps256((__m128)(B)), \
((imm) & 0x1) ? 0 : 8, \
((imm) & 0x1) ? 1 : 9, \
@ -7769,7 +7769,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
(__v8sf)(W))
(__v8sf)(__m256)(W))
#define _mm256_maskz_insertf32x4(U, A, B, imm) \
(__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
@ -7777,7 +7777,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
(__v8sf)_mm256_setzero_ps())
#define _mm256_inserti32x4(A, B, imm) \
(__m256i)__builtin_shufflevector((__v8si)(A), \
(__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \
(__v8si)_mm256_castsi128_si256((__m128i)(B)), \
((imm) & 0x1) ? 0 : 8, \
((imm) & 0x1) ? 1 : 9, \
@ -7791,7 +7791,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_inserti32x4((A), (B), (imm)), \
(__v8si)(W))
(__v8si)(__m256i)(W))
#define _mm256_maskz_inserti32x4(U, A, B, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \

View File

@ -252,10 +252,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
}
#define _mm256_mask_shldi_epi64(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
(__v4di)(B), \
(__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(int)(I), \
(__v4di)(S), \
(__v4di)(__m256i)(S), \
(__mmask8)(U))
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
@ -265,10 +265,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shldi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
(__v2di)(B), \
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), \
(int)(I), \
(__v2di)(S), \
(__v2di)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi64(U, A, B, I) \
@ -278,10 +278,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
(__v8si)(B), \
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \
(__v8si)(__m256i)(B), \
(int)(I), \
(__v8si)(S), \
(__v8si)(__m256i)(S), \
(__mmask8)(U))
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
@ -291,10 +291,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shldi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
(__v4si)(B), \
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), \
(int)(I), \
(__v4si)(S), \
(__v4si)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi32(U, A, B, I) \
@ -304,10 +304,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
(__v16hi)(B), \
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \
(__v16hi)(__m256i)(B), \
(int)(I), \
(__v16hi)(S), \
(__v16hi)(__m256i)(S), \
(__mmask16)(U))
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
@ -317,10 +317,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shldi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
(__v8hi)(B), \
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), \
(int)(I), \
(__v8hi)(S), \
(__v8hi)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shldi_epi16(U, A, B, I) \
@ -330,10 +330,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
(__v4di)(B), \
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(int)(I), \
(__v4di)(S), \
(__v4di)(__m256i)(S), \
(__mmask8)(U))
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
@ -343,10 +343,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
(__v2di)(B), \
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), \
(int)(I), \
(__v2di)(S), \
(__v2di)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
@ -356,10 +356,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
(__v8si)(B), \
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \
(__v8si)(__m256i)(B), \
(int)(I), \
(__v8si)(S), \
(__v8si)(__m256i)(S), \
(__mmask8)(U))
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
@ -369,10 +369,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
(__v4si)(B), \
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), \
(int)(I), \
(__v4si)(S), \
(__v4si)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
@ -382,10 +382,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
(__v16hi)(B), \
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \
(__v16hi)(__m256i)(B), \
(int)(I), \
(__v16hi)(S), \
(__v16hi)(__m256i)(S), \
(__mmask16)(U))
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
@ -395,10 +395,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
_mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
(__v8hi)(B), \
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \
(__v8hi)(__m128i)(B), \
(int)(I), \
(__v8hi)(S), \
(__v8hi)(__m128i)(S), \
(__mmask8)(U))
#define _mm_maskz_shrdi_epi16(U, A, B, I) \