forked from OSchip/llvm-project
[AVX-512] Replace masked 512-bit pmuldq and pmuludq builtins with the newly added unmasked versions and selects.
llvm-svn: 290580
This commit is contained in:
parent
32866ab800
commit
70536f4e47
|
@ -974,8 +974,8 @@ TARGET_BUILTIN(__builtin_ia32_pminsd512_mask, "V16iV16iV16iV16iUs", "", "avx512f
|
|||
TARGET_BUILTIN(__builtin_ia32_pminsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pminud512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmuldq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f")
|
||||
|
|
|
@ -1416,57 +1416,45 @@ _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
|
|||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mul_epi32(__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di) __W, __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
(__v8di)_mm512_mul_epi32(__X, __Y),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
(__v8di)_mm512_mul_epi32(__X, __Y),
|
||||
(__v8di)_mm512_setzero_si512 ());
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mul_epu32(__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di) __W, __M);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
(__v8di)_mm512_mul_epu32(__X, __Y),
|
||||
(__v8di)__W);
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
||||
(__v16si) __Y,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
__M);
|
||||
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
|
||||
(__v8di)_mm512_mul_epu32(__X, __Y),
|
||||
(__v8di)_mm512_setzero_si512 ());
|
||||
}
|
||||
|
||||
static __inline __m512i __DEFAULT_FN_ATTRS
|
||||
|
|
|
@ -1867,29 +1867,45 @@ __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {
|
|||
return _mm512_add_epi64(__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
|
||||
//CHECK-LABEL: @test_mm512_mul_epi32
|
||||
//CHECK: @llvm.x86.avx512.pmul.dq.512
|
||||
return _mm512_mul_epi32(__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_maskz_mul_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
|
||||
//CHECK-LABEL: @test_mm512_maskz_mul_epi32
|
||||
//CHECK: @llvm.x86.avx512.mask.pmul.dq.512
|
||||
//CHECK: @llvm.x86.avx512.pmul.dq.512
|
||||
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||
return _mm512_maskz_mul_epi32(__k,__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mask_mul_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
|
||||
__m512i __src) {
|
||||
//CHECK-LABEL: @test_mm512_mask_mul_epi32
|
||||
//CHECK: @llvm.x86.avx512.mask.pmul.dq.512
|
||||
//CHECK: @llvm.x86.avx512.pmul.dq.512
|
||||
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||
return _mm512_mask_mul_epi32(__src,__k,__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
|
||||
//CHECK-LABEL: @test_mm512_mul_epu32
|
||||
//CHECK: @llvm.x86.avx512.pmulu.dq.512
|
||||
return _mm512_mul_epu32(__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_maskz_mul_epu32 (__mmask16 __k,__m512i __A, __m512i __B) {
|
||||
//CHECK-LABEL: @test_mm512_maskz_mul_epu32
|
||||
//CHECK: @llvm.x86.avx512.mask.pmulu.dq.512
|
||||
//CHECK: @llvm.x86.avx512.pmulu.dq.512
|
||||
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||
return _mm512_maskz_mul_epu32(__k,__A,__B);
|
||||
}
|
||||
|
||||
__m512i test_mm512_mask_mul_epu32 (__mmask16 __k,__m512i __A, __m512i __B,
|
||||
__m512i __src) {
|
||||
//CHECK-LABEL: @test_mm512_mask_mul_epu32
|
||||
//CHECK: @llvm.x86.avx512.mask.pmulu.dq.512
|
||||
//CHECK: @llvm.x86.avx512.pmulu.dq.512
|
||||
//CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
|
||||
return _mm512_mask_mul_epu32(__src,__k,__A,__B);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue