forked from OSchip/llvm-project
[X86] Remove mask parameter from avx512 pmultishiftqb intrinsics. Use select in IR instead.
Fixes PR40259 llvm-svn: 351036
This commit is contained in:
parent
e7b4ea4726
commit
49488407aa
|
@ -1821,9 +1821,9 @@ TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:
|
|||
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512vbmi")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "ncV:128:", "avx512vbmi,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "ncV:256:", "avx512vbmi,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
|
||||
|
||||
// generic select intrinsics
|
||||
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
|
||||
|
|
|
@ -11152,6 +11152,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
|
||||
}
|
||||
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb128:
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb256:
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb512: {
|
||||
Intrinsic::ID ID;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("Unsupported intrinsic!");
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb128:
|
||||
ID = Intrinsic::x86_avx512_pmultishift_qb_128;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb256:
|
||||
ID = Intrinsic::x86_avx512_pmultishift_qb_256;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_vpmultishiftqb512:
|
||||
ID = Intrinsic::x86_avx512_pmultishift_qb_512;
|
||||
break;
|
||||
}
|
||||
|
||||
return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
|
||||
}
|
||||
|
||||
case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
|
||||
case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
|
||||
case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
|
||||
|
@ -11173,8 +11193,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|||
break;
|
||||
}
|
||||
|
||||
Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
|
||||
return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
|
||||
Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
|
||||
return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
|
||||
}
|
||||
|
||||
// packed comparison intrinsics
|
||||
|
|
|
@ -91,30 +91,26 @@ _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
|||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512((__v64qi)__X, (__v64qi) __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) _mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
|
||||
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
|
||||
(__v64qi)_mm512_setzero_si512());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -150,61 +150,49 @@ _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
|||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
|
||||
__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS128
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
|
||||
(__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
|
||||
(__v16qi)_mm_setzero_si128());
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)__W);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS256
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
|
||||
(__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
|
||||
(__v32qi)_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -52,18 +52,20 @@ __m512i test_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A
|
|||
|
||||
__m512i test_mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y) {
|
||||
// CHECK-LABEL: @test_mm512_mask_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.512
|
||||
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
|
||||
return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
|
||||
}
|
||||
|
||||
__m512i test_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y) {
|
||||
// CHECK-LABEL: @test_mm512_maskz_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.512
|
||||
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
|
||||
return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y);
|
||||
}
|
||||
|
||||
__m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
||||
// CHECK-LABEL: @test_mm512_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.512
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.512
|
||||
return _mm512_multishift_epi64_epi8(__X, __Y);
|
||||
}
|
||||
|
|
|
@ -99,37 +99,41 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i _
|
|||
|
||||
__m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) {
|
||||
// CHECK-LABEL: @test_mm_mask_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.128
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
|
||||
return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
|
||||
}
|
||||
|
||||
__m128i test_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) {
|
||||
// CHECK-LABEL: @test_mm_maskz_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.128
|
||||
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
|
||||
return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y);
|
||||
}
|
||||
|
||||
__m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
||||
// CHECK-LABEL: @test_mm_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.128
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.128
|
||||
return _mm_multishift_epi64_epi8(__X, __Y);
|
||||
}
|
||||
|
||||
__m256i test_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) {
|
||||
// CHECK-LABEL: @test_mm256_mask_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.256
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
|
||||
return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
|
||||
}
|
||||
|
||||
__m256i test_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) {
|
||||
// CHECK-LABEL: @test_mm256_maskz_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.256
|
||||
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
|
||||
return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y);
|
||||
}
|
||||
|
||||
__m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
||||
// CHECK-LABEL: @test_mm256_multishift_epi64_epi8
|
||||
// CHECK: @llvm.x86.avx512.mask.pmultishift.qb.256
|
||||
// CHECK: @llvm.x86.avx512.pmultishift.qb.256
|
||||
return _mm256_multishift_epi64_epi8(__X, __Y);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue