[X86] Remove masking from vpternlog intrinsics. Use a select in IR instead.

This removes 6 intrinsics since we no longer need separate mask and maskz intrinsics.

Differential Revision: https://reviews.llvm.org/D47124

llvm-svn: 332890
This commit is contained in:
Craig Topper 2018-05-21 20:58:09 +00:00
parent 274c4f7ab4
commit aad3aefaeb
12 changed files with 1030 additions and 356 deletions

View File

@ -6066,77 +6066,41 @@ let TargetPrefix = "x86" in {
// Bitwise ternary logic
let TargetPrefix = "x86" in {
def int_x86_avx512_mask_pternlog_d_128 :
GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
def int_x86_avx512_pternlog_d_128 :
GCCBuiltin<"__builtin_ia32_pternlogd128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_128 :
GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
def int_x86_avx512_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
def int_x86_avx512_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty], [IntrNoMem]>;
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
def int_x86_avx512_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
def int_x86_avx512_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
def int_x86_avx512_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_maskz_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
llvm_i8_ty], [IntrNoMem]>;
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Misc.

View File

@ -257,6 +257,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@ -2530,6 +2532,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Ops[] = { Arg0, Arg1, Arg2 };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Ops);
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
Name.startswith("avx512.maskz.pternlog."))) {
bool ZeroMask = Name[11] == 'z';
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
Intrinsic::ID IID;
if (VecWidth == 128 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_128;
else if (VecWidth == 256 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_256;
else if (VecWidth == 512 && EltWidth == 32)
IID = Intrinsic::x86_avx512_pternlog_d_512;
else if (VecWidth == 128 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_128;
else if (VecWidth == 256 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_256;
else if (VecWidth == 512 && EltWidth == 64)
IID = Intrinsic::x86_avx512_pternlog_q_512;
else
llvm_unreachable("Unexpected intrinsic");
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
CI->getArgOperand(2), CI->getArgOperand(3) };
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
Args);
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
: CI->getArgOperand(0);
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
} else if (IsX86 && Name.startswith("avx512.mask.") &&
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
// Rep will be updated by the call in the condition.

View File

@ -20631,23 +20631,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Src2, Src3, Src1),
Mask, PassThru, Subtarget, DAG);
}
case TERLOG_OP_MASK:
case TERLOG_OP_MASKZ: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
SDValue Mask = Op.getOperand(5);
MVT VT = Op.getSimpleValueType();
SDValue PassThru = Src1;
// Set PassThru element.
if (IntrData->Type == TERLOG_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Src3, Src4),
Mask, PassThru, Subtarget, DAG);
}
case CVTPD2PS:
// ISD::FP_ROUND has a second argument that indicates if the truncation
// does not change the value. Set it to 0 since it can change.

View File

@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
ROUNDP, ROUNDS
};
@ -949,18 +949,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
@ -1281,18 +1269,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
X86ISD::VFIXUPIMMS, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, ISD::FMA,
@ -1456,6 +1432,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_q_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_q_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_q_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),

View File

@ -2354,5 +2354,129 @@ entry:
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
define <8 x i64> @test_mm512_ternarylogic_epi32(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast <16 x i32> %3 to <8 x i64>
ret <8 x i64> %4
}
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_mask_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
%6 = bitcast <16 x i32> %5 to <8 x i64>
ret <8 x i64> %6
}
define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
%6 = bitcast <16 x i32> %5 to <8 x i64>
ret <8 x i64> %6
}
define <8 x i64> @test_mm512_ternarylogic_epi64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
ret <8 x i64> %0
}
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) #1
define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_mask_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
ret <8 x i64> %2
}
define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_maskz_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
ret <8 x i64> %2
}
!0 = !{i32 1}

View File

@ -4231,3 +4231,71 @@ define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x
%res4 = add <16 x i32> %res3, %res2
ret <16 x i32> %res4
}
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}

View File

@ -3206,71 +3206,75 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x
ret <4 x float> %res2
}
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
%1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%2 = bitcast i16 %x4 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
%4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%res2 = add <16 x i32> %3, %4
ret <16 x i32> %res2
}
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
%res2 = add <16 x i32> %res, %res1
%1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%2 = bitcast i16 %x4 to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
%4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
%res2 = add <16 x i32> %3, %4
ret <16 x i32> %res2
}
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
%1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
%4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
%res2 = add <8 x i64> %3, %4
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
%res2 = add <8 x i64> %res, %res1
%1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
%4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
%res2 = add <8 x i64> %3, %4
ret <8 x i64> %res2
}

File diff suppressed because it is too large Load Diff

View File

@ -3535,6 +3535,264 @@ entry:
ret <2 x i64> %2
}
define <2 x i64> @test_mm_ternarylogic_epi32(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
%2 = bitcast <2 x i64> %__C to <4 x i32>
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
%4 = bitcast <4 x i32> %3 to <2 x i64>
ret <2 x i64> %4
}
declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32) #2
define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_mask_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
%2 = bitcast <2 x i64> %__C to <4 x i32>
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
%4 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> %0
%6 = bitcast <4 x i32> %5 to <2 x i64>
ret <2 x i64> %6
}
define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_maskz_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__A to <4 x i32>
%1 = bitcast <2 x i64> %__B to <4 x i32>
%2 = bitcast <2 x i64> %__C to <4 x i32>
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
%4 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> zeroinitializer
%6 = bitcast <4 x i32> %5 to <2 x i64>
ret <2 x i64> %6
}
define <4 x i64> @test_mm256_ternarylogic_epi32(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
%2 = bitcast <4 x i64> %__C to <8 x i32>
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
%4 = bitcast <8 x i32> %3 to <4 x i64>
ret <4 x i64> %4
}
declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32) #2
define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_mask_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
%2 = bitcast <4 x i64> %__C to <8 x i32>
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0
%6 = bitcast <8 x i32> %5 to <4 x i64>
ret <4 x i64> %6
}
define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_maskz_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__A to <8 x i32>
%1 = bitcast <4 x i64> %__B to <8 x i32>
%2 = bitcast <4 x i64> %__C to <8 x i32>
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
%4 = bitcast i8 %__U to <8 x i1>
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer
%6 = bitcast <8 x i32> %5 to <4 x i64>
ret <4 x i64> %6
}
define <2 x i64> @test_mm_ternarylogic_epi64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
; X64-NEXT: retq
entry:
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
ret <2 x i64> %0
}
declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32) #2
define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_mask_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__A
ret <2 x i64> %2
}
define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X32-LABEL: test_mm_maskz_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
ret <2 x i64> %2
}
define <4 x i64> @test_mm256_ternarylogic_epi64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
; X64-NEXT: retq
entry:
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
ret <4 x i64> %0
}
declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32) #2
define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_mask_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__A
ret <4 x i64> %2
}
define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X32-LABEL: test_mm256_maskz_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
ret <4 x i64> %2
}
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)

View File

@ -6817,3 +6817,139 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64
%res4 = add <4 x i64> %res3, %res2
ret <4 x i64> %res4
}
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2
}
declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
ret <2 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}

View File

@ -2222,20 +2222,23 @@ define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x
ret <8 x float> %res2
}
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
%1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
%4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
%res2 = add <4 x i32> %3, %4
ret <4 x i32> %res2
}
@ -2244,32 +2247,37 @@ declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
%res2 = add <4 x i32> %res, %res1
%1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
%4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
%res2 = add <4 x i32> %3, %4
ret <4 x i32> %res2
}
declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
%1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
%4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
%res2 = add <8 x i32> %3, %4
ret <8 x i32> %res2
}
@ -2278,83 +2286,93 @@ declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
%res2 = add <8 x i32> %res, %res1
%1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
%4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
%res2 = add <8 x i32> %3, %4
ret <8 x i32> %res2
}
declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
%1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
%4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
%res2 = add <2 x i64> %3, %4
ret <2 x i64> %res2
}
declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
%res2 = add <2 x i64> %res, %res1
%1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
%4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
%res2 = add <2 x i64> %3, %4
ret <2 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
%1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
%4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
%res2 = add <4 x i64> %3, %4
ret <4 x i64> %res2
}
declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
%res2 = add <4 x i64> %res, %res1
%1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
%2 = bitcast i8 %x4 to <8 x i1>
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
%4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
%res2 = add <4 x i64> %3, %4
ret <4 x i64> %res2
}

View File

@ -1798,20 +1798,20 @@ define <16 x i32> @stack_fold_ternlogd(<16 x i32> %x0, <16 x i32> %x1, <16 x i32
;CHECK-LABEL: stack_fold_ternlogd
;CHECK: vpternlogd $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
ret <16 x i32> %res
%2 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
ret <16 x i32> %2
}
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
define <8 x i64> @stack_fold_ternlogq(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
;CHECK-LABEL: stack_fold_ternlogq
;CHECK: vpternlogq $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
ret <8 x i64> %res
%2 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
ret <8 x i64> %2
}
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
define <64 x i8> @stack_fold_punpckhbw_zmm(<64 x i8> %a0, <64 x i8> %a1) {
;CHECK-LABEL: stack_fold_punpckhbw_zmm