[AVX-512] Replace masked 512-bit pmuldq and pmuludq builtins with the newly added unmasked versions and selects.

llvm-svn: 290580
This commit is contained in:
Craig Topper 2016-12-27 04:04:57 +00:00
parent 32866ab800
commit 70536f4e47
3 changed files with 40 additions and 36 deletions

View File

@ -974,8 +974,8 @@ TARGET_BUILTIN(__builtin_ia32_pminsd512_mask, "V16iV16iV16iV16iUs", "", "avx512f
TARGET_BUILTIN(__builtin_ia32_pminsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pminud512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmuldq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f")

View File

@ -1416,57 +1416,45 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mul_epi32(__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) -1);
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_mul_epi32(__X, __Y),
(__v8di)__W);
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_mul_epi32(__X, __Y),
(__v8di)_mm512_setzero_si512 ());
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mul_epu32(__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) -1);
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_mul_epu32(__X, __Y),
(__v8di)__W);
}
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_mul_epu32(__X, __Y),
(__v8di)_mm512_setzero_si512 ());
}
static __inline __m512i __DEFAULT_FN_ATTRS

View File

@ -1867,29 +1867,45 @@ __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {
return _mm512_add_epi64(__A,__B);
}
__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
//CHECK-LABEL: @test_mm512_mul_epi32
//CHECK: @llvm.x86.avx512.pmul.dq.512
return _mm512_mul_epi32(__A,__B);
}
__m512i test_mm512_maskz_mul_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: @test_mm512_maskz_mul_epi32
//CHECK: @llvm.x86.avx512.mask.pmul.dq.512
//CHECK: @llvm.x86.avx512.pmul.dq.512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_mul_epi32(__k,__A,__B);
}
__m512i test_mm512_mask_mul_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: @test_mm512_mask_mul_epi32
//CHECK: @llvm.x86.avx512.mask.pmul.dq.512
//CHECK: @llvm.x86.avx512.pmul.dq.512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_mul_epi32(__src,__k,__A,__B);
}
__m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
//CHECK-LABEL: @test_mm512_mul_epu32
//CHECK: @llvm.x86.avx512.pmulu.dq.512
return _mm512_mul_epu32(__A,__B);
}
__m512i test_mm512_maskz_mul_epu32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK-LABEL: @test_mm512_maskz_mul_epu32
//CHECK: @llvm.x86.avx512.mask.pmulu.dq.512
//CHECK: @llvm.x86.avx512.pmulu.dq.512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_mul_epu32(__k,__A,__B);
}
__m512i test_mm512_mask_mul_epu32 (__mmask16 __k,__m512i __A, __m512i __B,
__m512i __src) {
//CHECK-LABEL: @test_mm512_mask_mul_epu32
//CHECK: @llvm.x86.avx512.mask.pmulu.dq.512
//CHECK: @llvm.x86.avx512.pmulu.dq.512
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_mul_epu32(__src,__k,__A,__B);
}