diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 6a3016bcd3bb..06f56c7d0505 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1238,15 +1238,15 @@ TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "nc", "avx512vl,a TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "nc", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "nc", "avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") @@ -1286,15 +1286,15 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "nc", "avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index e0ada4c16f3e..01a75a70bb6d 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -142,131 +142,89 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) __U); } +#define _mm512_shldi_epi64(A, B, I) \ + (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) + #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)(__m512i)(S), \ - (__mmask8)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)) - -#define _mm512_shldi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1) - -#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)(__m512i)(S), \ - (__mmask16)(U)) - -#define _mm512_maskz_shldi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)) + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) #define _mm512_shldi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1) + (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) -#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)(__m512i)(S), \ - (__mmask32)(U)) +#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) -#define _mm512_maskz_shldi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)) +#define _mm512_maskz_shldi_epi32(U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) #define _mm512_shldi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1) + (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)(__m512i)(S), \ - (__mmask8)(U)) +#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) -#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)) +#define _mm512_maskz_shldi_epi16(U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) #define _mm512_shrdi_epi64(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (int)(I), \ - (__v8di)_mm512_undefined_epi32(), \ - (__mmask8)-1) + (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)(__m512i)(S), \ - (__mmask16)(U)) +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)(__m512i)(S)) -#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)) +#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ + (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ + (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ + (__v8di)_mm512_setzero_si512()) #define _mm512_shrdi_epi32(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1) + (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) -#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)(__m512i)(S), \ - (__mmask32)(U)) +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)(__m512i)(S)) -#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_setzero_si512(), \ - (__mmask32)(U)) +#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ + (__v16si)_mm512_setzero_si512()) #define _mm512_shrdi_epi16(A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ - (__v32hi)(__m512i)(B), \ - (int)(I), \ - (__v32hi)_mm512_undefined_epi32(), \ - (__mmask32)-1) + (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), (int)(I)) + +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)(__m512i)(S)) + +#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ + (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ + (__v32hi)_mm512_setzero_si512()) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 6dde1e9643eb..38ef9b0f8497 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -251,257 +251,173 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) __U); } +#define _mm256_shldi_epi64(A, B, I) \ + (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) + #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)(__m256i)(S), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)) - -#define _mm256_shldi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1) - -#define _mm_mask_shldi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)(__m128i)(S), \ - (__mmask8)(U)) - -#define _mm_maskz_shldi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) #define _mm_shldi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) -#define _mm256_mask_shldi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)(__m256i)(S), \ - (__mmask8)(U)) +#define _mm_mask_shldi_epi64(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) -#define _mm256_maskz_shldi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)) +#define _mm_maskz_shldi_epi64(U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shldi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) #define _mm256_shldi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1) + (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) -#define _mm_mask_shldi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)(__m128i)(S), \ - (__mmask8)(U)) +#define _mm256_mask_shldi_epi32(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) -#define _mm_maskz_shldi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) +#define _mm256_maskz_shldi_epi32(U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) #define _mm_shldi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) -#define _mm256_mask_shldi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)(__m256i)(S), \ - (__mmask16)(U)) +#define _mm_mask_shldi_epi32(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) -#define _mm256_maskz_shldi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)) +#define _mm_maskz_shldi_epi32(U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shldi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) #define _mm256_shldi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1) + (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) -#define _mm_mask_shldi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)(__m128i)(S), \ - (__mmask8)(U)) +#define _mm256_mask_shldi_epi16(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) -#define _mm_maskz_shldi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) +#define _mm256_maskz_shldi_epi16(U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) #define _mm_shldi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) -#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)(__m256i)(S), \ - (__mmask8)(U)) +#define _mm_mask_shldi_epi16(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) -#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)) +#define _mm_maskz_shldi_epi16(U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) #define _mm256_shrdi_epi64(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (int)(I), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1) + (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(I)) -#define _mm_mask_shrdi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)(__m128i)(S), \ - (__mmask8)(U)) +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)(__m256i)(S)) -#define _mm_maskz_shrdi_epi64(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)) +#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ + (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ + (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ + (__v4di)_mm256_setzero_si256()) #define _mm_shrdi_epi64(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (int)(I), \ - (__v2di)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(I)) -#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)(__m256i)(S), \ - (__mmask8)(U)) +#define _mm_mask_shrdi_epi64(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)(__m128i)(S)) -#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)) +#define _mm_maskz_shrdi_epi64(U, A, B, I) \ + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ + (__v2di)_mm_setzero_si128()) #define _mm256_shrdi_epi32(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (int)(I), \ - (__v8si)_mm256_undefined_si256(), \ - (__mmask8)-1) + (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(I)) -#define _mm_mask_shrdi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)(__m128i)(S), \ - (__mmask8)(U)) +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)(__m256i)(S)) -#define _mm_maskz_shrdi_epi32(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)) +#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ + (__v8si)_mm256_setzero_si256()) #define _mm_shrdi_epi32(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (int)(I), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(I)) -#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)(__m256i)(S), \ - (__mmask16)(U)) +#define _mm_mask_shrdi_epi32(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)(__m128i)(S)) -#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(U)) +#define _mm_maskz_shrdi_epi32(U, A, B, I) \ + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ + (__v4si)_mm_setzero_si128()) #define _mm256_shrdi_epi16(A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ - (__v16hi)(__m256i)(B), \ - (int)(I), \ - (__v16hi)_mm256_undefined_si256(), \ - (__mmask16)-1) + (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), (int)(I)) -#define _mm_mask_shrdi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)(__m128i)(S), \ - (__mmask8)(U)) +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)(__m256i)(S)) -#define _mm_maskz_shrdi_epi16(U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(U)) +#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ + (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ + (__v16hi)_mm256_setzero_si256()) #define _mm_shrdi_epi16(A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ - (__v8hi)(__m128i)(B), \ - (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1) + (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), (int)(I)) + +#define _mm_mask_shrdi_epi16(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)(__m128i)(S)) + +#define _mm_maskz_shrdi_epi16(U, A, B, I) \ + (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ + (__v8hi)_mm_setzero_si128()) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d5945ef6f625..6a1cbdf62100 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2841,24 +2841,24 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_dbpsadbw128: case X86::BI__builtin_ia32_dbpsadbw256: case X86::BI__builtin_ia32_dbpsadbw512: - case X86::BI__builtin_ia32_vpshldd128_mask: - case X86::BI__builtin_ia32_vpshldd256_mask: - case X86::BI__builtin_ia32_vpshldd512_mask: - case X86::BI__builtin_ia32_vpshldq128_mask: - case X86::BI__builtin_ia32_vpshldq256_mask: - case X86::BI__builtin_ia32_vpshldq512_mask: - case X86::BI__builtin_ia32_vpshldw128_mask: - case X86::BI__builtin_ia32_vpshldw256_mask: - case X86::BI__builtin_ia32_vpshldw512_mask: - case X86::BI__builtin_ia32_vpshrdd128_mask: - case X86::BI__builtin_ia32_vpshrdd256_mask: - case X86::BI__builtin_ia32_vpshrdd512_mask: - case X86::BI__builtin_ia32_vpshrdq128_mask: - case X86::BI__builtin_ia32_vpshrdq256_mask: - case X86::BI__builtin_ia32_vpshrdq512_mask: - case X86::BI__builtin_ia32_vpshrdw128_mask: - case X86::BI__builtin_ia32_vpshrdw256_mask: - case X86::BI__builtin_ia32_vpshrdw512_mask: + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: i = 2; l = 0; u = 255; break; case X86::BI__builtin_ia32_fixupimmpd512_mask: diff --git a/clang/test/CodeGen/avx512vbmi2-builtins.c b/clang/test/CodeGen/avx512vbmi2-builtins.c index 6d20f9d2ee93..db4abdba457f 100644 --- a/clang/test/CodeGen/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/avx512vbmi2-builtins.c @@ -88,109 +88,121 @@ __m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) { __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + // CHECK: @llvm.x86.avx512.vpshld.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + // CHECK: @llvm.x86.avx512.vpshld.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + // CHECK: @llvm.x86.avx512.vpshld.q.512 return _mm512_shldi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + // CHECK: @llvm.x86.avx512.vpshld.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + // CHECK: @llvm.x86.avx512.vpshld.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 63); } __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + // CHECK: @llvm.x86.avx512.vpshld.d.512 return _mm512_shldi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + // CHECK: @llvm.x86.avx512.vpshld.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + // CHECK: @llvm.x86.avx512.vpshld.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 63); } __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + // CHECK: @llvm.x86.avx512.vpshld.w.512 return _mm512_shldi_epi16(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + // CHECK: @llvm.x86.avx512.vpshrd.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + // CHECK: @llvm.x86.avx512.vpshrd.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + // CHECK: @llvm.x86.avx512.vpshrd.q.512 return _mm512_shrdi_epi64(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + // CHECK: @llvm.x86.avx512.vpshrd.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + // CHECK: @llvm.x86.avx512.vpshrd.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 63); } __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + // CHECK: @llvm.x86.avx512.vpshrd.d.512 return _mm512_shrdi_epi32(__A, __B, 31); } __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + // CHECK: @llvm.x86.avx512.vpshrd.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 127); } __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + // CHECK: @llvm.x86.avx512.vpshrd.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 63); } __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + // CHECK: @llvm.x86.avx512.vpshrd.w.512 return _mm512_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/CodeGen/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/avx512vlvbmi2-builtins.c index 8ae0ab7da75a..aceb97616d2b 100644 --- a/clang/test/CodeGen/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/avx512vlvbmi2-builtins.c @@ -172,217 +172,241 @@ __m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) { __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + // CHECK: @llvm.x86.avx512.vpshld.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + // CHECK: @llvm.x86.avx512.vpshld.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + // CHECK: @llvm.x86.avx512.vpshld.q.256 return _mm256_shldi_epi64(__A, __B, 31); } __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + // CHECK: @llvm.x86.avx512.vpshld.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + // CHECK: @llvm.x86.avx512.vpshld.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + // CHECK: @llvm.x86.avx512.vpshld.q.128 return _mm_shldi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + // CHECK: @llvm.x86.avx512.vpshld.d.256 + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + // CHECK: @llvm.x86.avx512.vpshld.d.256 + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 63); } __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + // CHECK: @llvm.x86.avx512.vpshld.d.256 return _mm256_shldi_epi32(__A, __B, 31); } __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + // CHECK: @llvm.x86.avx512.vpshld.d.128 + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + // CHECK: @llvm.x86.avx512.vpshld.d.128 + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 63); } __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + // CHECK: @llvm.x86.avx512.vpshld.d.128 return _mm_shldi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + // CHECK: @llvm.x86.avx512.vpshld.w.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + // CHECK: @llvm.x86.avx512.vpshld.w.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 63); } __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + // CHECK: @llvm.x86.avx512.vpshld.w.256 return _mm256_shldi_epi16(__A, __B, 31); } __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + // CHECK: @llvm.x86.avx512.vpshld.w.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + // CHECK: @llvm.x86.avx512.vpshld.w.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 63); } __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shldi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + // CHECK: @llvm.x86.avx512.vpshld.w.128 return _mm_shldi_epi16(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + // CHECK: @llvm.x86.avx512.vpshrd.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + // CHECK: @llvm.x86.avx512.vpshrd.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + // CHECK: @llvm.x86.avx512.vpshrd.q.256 return _mm256_shrdi_epi64(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + // CHECK: @llvm.x86.avx512.vpshrd.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + // CHECK: @llvm.x86.avx512.vpshrd.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi64 - // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + // CHECK: @llvm.x86.avx512.vpshrd.q.128 return _mm_shrdi_epi64(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + // CHECK: @llvm.x86.avx512.vpshrd.d.256 + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + // CHECK: @llvm.x86.avx512.vpshrd.d.256 + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 63); } __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + // CHECK: @llvm.x86.avx512.vpshrd.d.256 return _mm256_shrdi_epi32(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + // CHECK: @llvm.x86.avx512.vpshrd.d.128 + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + // CHECK: @llvm.x86.avx512.vpshrd.d.128 + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 63); } __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi32 - // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + // CHECK: @llvm.x86.avx512.vpshrd.d.128 return _mm_shrdi_epi32(__A, __B, 31); } __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + // CHECK: @llvm.x86.avx512.vpshrd.w.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 127); } __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + // CHECK: @llvm.x86.avx512.vpshrd.w.256 + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 63); } __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + // CHECK: @llvm.x86.avx512.vpshrd.w.256 return _mm256_shrdi_epi16(__A, __B, 31); } __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + // CHECK: @llvm.x86.avx512.vpshrd.w.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 127); } __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + // CHECK: @llvm.x86.avx512.vpshrd.w.128 + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 63); } __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_shrdi_epi16 - // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + // CHECK: @llvm.x86.avx512.vpshrd.w.128 return _mm_shrdi_epi16(__A, __B, 31); } diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c index ad9e00779e82..e96caf2d7971 100644 --- a/clang/test/Sema/builtins-x86.c +++ b/clang/test/Sema/builtins-x86.c @@ -93,74 +93,74 @@ __m512 _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int co return __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument should be a value from 2 to 3}} } -__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshldq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshldq512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshldd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshldd512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshldw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshldw512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshrdq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdq512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshrdd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdd512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { - return __builtin_ia32_vpshrdw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdw512(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshldq256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { + return __builtin_ia32_vpshldq256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshldq128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shldi_epi64( __m128i __A, __m128i __B) { + return __builtin_ia32_vpshldq128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshldd256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { + return __builtin_ia32_vpshldd256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshldd128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shldi_epi32(__m128i __A, __m128i __B) { + return __builtin_ia32_vpshldd128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshldw256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shldi_epi16( __m256i __A, __m256i __B) { + return __builtin_ia32_vpshldw256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshldw128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shldi_epi16(__m128i __A, __m128i __B) { + return __builtin_ia32_vpshldw128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshrdq256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { + return __builtin_ia32_vpshrdq256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshrdq128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shrdi_epi64(__m128i __A, __m128i __B) { + return __builtin_ia32_vpshrdq128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshrdd256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { + return __builtin_ia32_vpshrdd256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshrdd128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shrdi_epi32(__m128i __A, __m128i __B) { + return __builtin_ia32_vpshrdd128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { - return __builtin_ia32_vpshrdw256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { + return __builtin_ia32_vpshrdw256(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} } -__m128i test_mm128_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return __builtin_ia32_vpshrdw128_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +__m128i test_mm128_shrdi_epi16(__m128i __A, __m128i __B) { + return __builtin_ia32_vpshrdw128(__A, __B, 1024); // expected-error {{argument should be a value from 0 to 255}} }