diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index e53992bbf50e..62d4c33a9e13 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1590,27 +1590,15 @@ TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmb512, "V8LLiUc","","avx512cd") TARGET_BUILTIN(__builtin_ia32_broadcastmw512, "V16iUs","","avx512cd") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_512, "V16fV4fV16fUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_broadcastf64x4_512, "V8dV4dV8dUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_512, "V16iV4iV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_broadcasti64x4_512, "V8LLiV4LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x8_512_mask, "V16fV8fV16fUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_512_mask, "V8dV2dV8dUc","","avx512dq") TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x8_512_mask, "V16iV8iV16iUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_512_mask, "V8LLiV2LLiV8LLiUc","","avx512dq") TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_256_mask, "V4dV2dV4dUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_256_mask, "V4LLiV2LLiV4LLiUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_256_mask, "V8fV4fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_256_mask, "V8iV4iV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index ae44b98a9495..4fd1add7735b 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -995,51 +995,50 @@ _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_broadcast_f32x8 (__m256 __A) +_mm512_broadcast_f32x8(__m256 __A) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - _mm512_undefined_ps(), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, + 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) +_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - (__v16sf)__O, - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, + (__v16sf)_mm512_broadcast_f32x8(__A), + (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) +_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { - return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, - (__v16sf)_mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, + (__v16sf)_mm512_broadcast_f32x8(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_broadcast_f64x2 (__m128d __A) +_mm512_broadcast_f64x2(__m128d __A) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, - (__v8df)_mm512_undefined_pd(), - (__mmask8) -1); + return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) +_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, - (__v8df) - __O, __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x2(__A), + (__v8df)__O); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) +_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, - (__v8df)_mm512_setzero_ps (), - __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x2(__A), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1067,52 +1066,50 @@ _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i32x8 (__m256i __A) +_mm512_broadcast_i32x8(__m256i __A) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, - (__v16si)_mm512_setzero_si512(), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, + 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) +_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, - (__v16si)__O, - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, + (__v16si)_mm512_broadcast_i32x8(__A), + (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) +_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, + (__v16si)_mm512_broadcast_i32x8(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i64x2 (__m128i __A) +_mm512_broadcast_i64x2(__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, - (__v8di)_mm512_setzero_si512(), - (__mmask8) -1); + return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) +_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, - (__v8di) - __O, __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x2(__A), + (__v8di)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) +_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, - (__v8di)_mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x2(__A), + (__v8di)_mm512_setzero_si512()); } #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e6a7217c8967..7d9a52fdf23f 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -7278,107 +7278,97 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8)(U), (int)(R)); }) static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_broadcast_f32x4 (__m128 __A) +_mm512_broadcast_f32x4(__m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) +_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) __O, - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x4(__A), + (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) +_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, - (__v16sf) - _mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x4(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_broadcast_f64x4 (__m256d __A) +_mm512_broadcast_f64x4(__m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) +_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) __O, - __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x4(__A), + (__v8df)__O); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) +_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { - return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, - (__v8df) - _mm512_setzero_pd (), - __M); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, + (__v8df)_mm512_broadcast_f64x4(__A), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i32x4 (__m128i __A) +_mm512_broadcast_i32x4(__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) - _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, + 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) +_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) __O, - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x4(__A), + (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) +_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x4(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcast_i64x4 (__m256i __A) +_mm512_broadcast_i64x4(__m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) - _mm512_undefined_epi32 (), - (__mmask8) -1); + return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) +_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) __O, - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x4(__A), + (__v8di)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) +_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { - return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, + (__v8di)_mm512_broadcast_i64x4(__A), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512d __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index cd9da4370564..aecd7df34d05 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -1000,27 +1000,26 @@ _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_broadcast_f64x2 (__m128d __A) +_mm256_broadcast_f64x2(__m128d __A) { - return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, - (__v4df)_mm256_undefined_pd(), - (__mmask8) -1); + return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, + 0, 1, 0, 1); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) +_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { - return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, - (__v4df) __O, - __M); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, + (__v4df)_mm256_broadcast_f64x2(__A), + (__v4df)__O); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { - return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, - (__v4df) _mm256_setzero_ps (), - __M); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, + (__v4df)_mm256_broadcast_f64x2(__A), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -1072,27 +1071,26 @@ _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_broadcast_i64x2 (__m128i __A) +_mm256_broadcast_i64x2(__m128i __A) { - return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, - (__v4di)_mm256_undefined_si256(), - (__mmask8) -1); + return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, + 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) +_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, - (__v4di) __O, - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_broadcast_i64x2(__A), + (__v4di)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, - (__v4di) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, + (__v4di)_mm256_broadcast_i64x2(__A), + (__v4di)_mm256_setzero_si256()); } #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index f3744da6ab8a..99bb050de4d7 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7189,52 +7189,49 @@ _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_broadcast_f32x4 (__m128 __A) +_mm256_broadcast_f32x4(__m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, - (__v8sf)_mm256_undefined_pd (), - (__mmask8) -1); + return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A) +_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, - (__v8sf) __O, - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x4(__A), + (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, - (__v8sf) _mm256_setzero_ps (), - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x4(__A), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_broadcast_i32x4 (__m128i __A) +_mm256_broadcast_i32x4(__m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, - (__v8si)_mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, + 0, 1, 2, 3, 0, 1, 2, 3); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A) +_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, - (__v8si) - __O, __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x4(__A), + (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) +_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) - __A, - (__v8si) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x4(__A), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m256d __DEFAULT_FN_ATTRS diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c index f57433a3616b..c0e1d648098b 100644 --- a/clang/test/CodeGen/avx512dq-builtins.c +++ b/clang/test/CodeGen/avx512dq-builtins.c @@ -963,40 +963,44 @@ __m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { return _mm512_maskz_broadcast_f32x2(__M, __A); } -__m512 test_mm512_broadcast_f32x8(__m256 __A) { +__m512 test_mm512_broadcast_f32x8(float const* __A) { // CHECK-LABEL: @test_mm512_broadcast_f32x8 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 - return _mm512_broadcast_f32x8(__A); + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + return _mm512_broadcast_f32x8(_mm256_loadu_ps(__A)); } -__m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { +__m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f32x8 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 - return _mm512_mask_broadcast_f32x8(__O, __M, __A); + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); } -__m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { +__m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x8 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 - return _mm512_maskz_broadcast_f32x8(__M, __A); + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); } -__m512d test_mm512_broadcast_f64x2(__m128d __A) { +__m512d test_mm512_broadcast_f64x2(double const* __A) { // CHECK-LABEL: @test_mm512_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm512_broadcast_f64x2(__A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_broadcast_f64x2(_mm_loadu_pd(__A)); } -__m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { +__m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm512_mask_broadcast_f64x2(__O, __M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); } -__m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { +__m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm512_maskz_broadcast_f64x2(__M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); } __m512i test_mm512_broadcast_i32x2(__m128i __A) { @@ -1017,41 +1021,46 @@ __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { return _mm512_maskz_broadcast_i32x2(__M, __A); } -__m512i test_mm512_broadcast_i32x8(__m256i __A) { +__m512i test_mm512_broadcast_i32x8(__m256i const* __A) { // CHECK-LABEL: @test_mm512_broadcast_i32x8 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 - return _mm512_broadcast_i32x8(__A); + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> + return _mm512_broadcast_i32x8(_mm256_loadu_si256(__A)); } -__m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { +__m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i32x8 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 - return _mm512_mask_broadcast_i32x8(__O, __M, __A); + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); } -__m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { +__m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x8 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 - return _mm512_maskz_broadcast_i32x8(__M, __A); + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); } -__m512i test_mm512_broadcast_i64x2(__m128i __A) { +__m512i test_mm512_broadcast_i64x2(__m128i const* __A) { // CHECK-LABEL: @test_mm512_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm512_broadcast_i64x2(__A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + return _mm512_broadcast_i64x2(_mm_loadu_si128(__A)); } -__m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { +__m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm512_mask_broadcast_i64x2(__O, __M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_broadcast_i64x2(__O, __M, _mm_loadu_si128(__A)); } -__m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { +__m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm512_maskz_broadcast_i64x2(__M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); } + __m256 test_mm512_extractf32x8_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_extractf32x8_ps // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 760783af1ce0..5b78dd2ffe7c 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -4647,76 +4647,84 @@ __m128 test_mm_maskz_sqrt_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ return _mm_maskz_sqrt_round_ss(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); } -__m512 test_mm512_broadcast_f32x4(__m128 __A) { +__m512 test_mm512_broadcast_f32x4(float const* __A) { // CHECK-LABEL: @test_mm512_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 - return _mm512_broadcast_f32x4(__A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + return _mm512_broadcast_f32x4(_mm_loadu_ps(__A)); } -__m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { +__m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 - return _mm512_mask_broadcast_f32x4(__O, __M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); } -__m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { +__m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 - return _mm512_maskz_broadcast_f32x4(__M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); } -__m512d test_mm512_broadcast_f64x4(__m256d __A) { +__m512d test_mm512_broadcast_f64x4(float const* __A) { // CHECK-LABEL: @test_mm512_broadcast_f64x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x4 - return _mm512_broadcast_f64x4(__A); + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + return _mm512_broadcast_f64x4(_mm256_loadu_ps(__A)); } -__m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { +__m512d test_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, float const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f64x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x4 - return _mm512_mask_broadcast_f64x4(__O, __M, __A); + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_broadcast_f64x4(__O, __M, _mm256_loadu_ps(__A)); } -__m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { +__m512d test_mm512_maskz_broadcast_f64x4(__mmask8 __M, float const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x4 - return _mm512_maskz_broadcast_f64x4(__M, __A); + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_maskz_broadcast_f64x4(__M, _mm256_loadu_ps(__A)); } -__m512i test_mm512_broadcast_i32x4(__m128i __A) { +__m512i test_mm512_broadcast_i32x4(__m128i const* __A) { // CHECK-LABEL: @test_mm512_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm512_broadcast_i32x4(__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> + return _mm512_broadcast_i32x4(_mm_loadu_si128(__A)); } -__m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { +__m512i test_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm512_mask_broadcast_i32x4(__O, __M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); } -__m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { +__m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm512_maskz_broadcast_i32x4(__M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); } -__m512i test_mm512_broadcast_i64x4(__m256i __A) { +__m512i test_mm512_broadcast_i64x4(__m256i const* __A) { // CHECK-LABEL: @test_mm512_broadcast_i64x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x4 - return _mm512_broadcast_i64x4(__A); + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + return _mm512_broadcast_i64x4(_mm256_loadu_si256(__A)); } -__m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { +__m512i test_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i const* __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i64x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x4 - return _mm512_mask_broadcast_i64x4(__O, __M, __A); + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_broadcast_i64x4(__O, __M, _mm256_loadu_si256(__A)); } -__m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { +__m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i const* __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x4 - return _mm512_maskz_broadcast_i64x4(__M, __A); + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_maskz_broadcast_i64x4(__M, _mm256_loadu_si256(__A)); } __m512d test_mm512_broadcastsd_pd(__m128d __A) { diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index fe4ebe12ddd4..49c7ba0d82d6 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -5734,38 +5734,42 @@ __m256 test_mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A) { __m256 test_mm256_broadcast_f32x4(__m128 __A) { // CHECK-LABEL: @test_mm256_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> return _mm256_broadcast_f32x4(__A); } __m256 test_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_broadcast_f32x4(__O, __M, __A); } __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x4 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_broadcast_f32x4(__M, __A); } -__m256i test_mm256_broadcast_i32x4(__m128i __A) { +__m256i test_mm256_broadcast_i32x4(__m128i const* __A) { // CHECK-LABEL: @test_mm256_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm256_broadcast_i32x4(__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + return _mm256_broadcast_i32x4(_mm_loadu_si128(__A)); } -__m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { +__m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm256_mask_broadcast_i32x4(__O, __M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_mask_broadcast_i32x4(__O, __M, _mm_loadu_si128(__A)); } -__m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { +__m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x4 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 - return _mm256_maskz_broadcast_i32x4(__M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); } __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { diff --git a/clang/test/CodeGen/avx512vldq-builtins.c b/clang/test/CodeGen/avx512vldq-builtins.c index 6834b6f46b28..fac61ae47155 100644 --- a/clang/test/CodeGen/avx512vldq-builtins.c +++ b/clang/test/CodeGen/avx512vldq-builtins.c @@ -918,22 +918,24 @@ __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { return _mm256_maskz_broadcast_f32x2(__M, __A); } -__m256d test_mm256_broadcast_f64x2(__m128d __A) { +__m256d test_mm256_broadcast_f64x2(double const* __A) { // CHECK-LABEL: @test_mm256_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm256_broadcast_f64x2(__A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + return _mm256_broadcast_f64x2(_mm_loadu_pd(__A)); } -__m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { +__m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, double const* __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm256_mask_broadcast_f64x2(__O, __M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); } -__m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { +__m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_f64x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 - return _mm256_maskz_broadcast_f64x2(__M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); } __m128i test_mm_broadcast_i32x2(__m128i __A) { @@ -972,22 +974,24 @@ __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return _mm256_maskz_broadcast_i32x2(__M, __A); } -__m256i test_mm256_broadcast_i64x2(__m128i __A) { +__m256i test_mm256_broadcast_i64x2(__m128i const* __A) { // CHECK-LABEL: @test_mm256_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm256_broadcast_i64x2(__A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + return _mm256_broadcast_i64x2(_mm_loadu_si128(__A)); } -__m256i test_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { +__m256i test_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm256_mask_broadcast_i64x2(__O, __M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_mask_broadcast_i64x2(__O, __M, _mm_loadu_si128(__A)); } -__m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { +__m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_i64x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 - return _mm256_maskz_broadcast_i64x2(__M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); } __m128d test_mm256_extractf64x2_pd(__m256d __A) {