forked from OSchip/llvm-project
[X86] Remove masking from vpternlog intrinsics. Use a select in IR instead.
This removes 6 intrinsics since we no longer need separate mask and maskz intrinsics. Differential Revision: https://reviews.llvm.org/D47124 llvm-svn: 332890
This commit is contained in:
parent
274c4f7ab4
commit
aad3aefaeb
|
@ -6066,77 +6066,41 @@ let TargetPrefix = "x86" in {
|
|||
|
||||
// Bitwise ternary logic
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_mask_pternlog_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
|
||||
def int_x86_avx512_pternlog_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd128">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_d_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pternlog_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
|
||||
def int_x86_avx512_pternlog_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd256">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_d_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pternlog_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
|
||||
def int_x86_avx512_pternlog_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd512">,
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_d_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_i16_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pternlog_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
|
||||
def int_x86_avx512_pternlog_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq128">,
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_q_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pternlog_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
|
||||
def int_x86_avx512_pternlog_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq256">,
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_q_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pternlog_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
|
||||
def int_x86_avx512_pternlog_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq512">,
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_maskz_pternlog_q_512 :
|
||||
GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
|
|
|
@ -257,6 +257,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
|||
Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
|
||||
Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
|
||||
Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
|
||||
Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
|
||||
Name == "sse.cvtsi2ss" || // Added in 7.0
|
||||
Name == "sse.cvtsi642ss" || // Added in 7.0
|
||||
Name == "sse2.cvtsi2sd" || // Added in 7.0
|
||||
|
@ -2530,6 +2532,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||
Value *Ops[] = { Arg0, Arg1, Arg2 };
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Ops);
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
|
||||
Name.startswith("avx512.maskz.pternlog."))) {
|
||||
bool ZeroMask = Name[11] == 'z';
|
||||
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
|
||||
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
|
||||
Intrinsic::ID IID;
|
||||
if (VecWidth == 128 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_pternlog_d_128;
|
||||
else if (VecWidth == 256 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_pternlog_d_256;
|
||||
else if (VecWidth == 512 && EltWidth == 32)
|
||||
IID = Intrinsic::x86_avx512_pternlog_d_512;
|
||||
else if (VecWidth == 128 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_pternlog_q_128;
|
||||
else if (VecWidth == 256 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_pternlog_q_256;
|
||||
else if (VecWidth == 512 && EltWidth == 64)
|
||||
IID = Intrinsic::x86_avx512_pternlog_q_512;
|
||||
else
|
||||
llvm_unreachable("Unexpected intrinsic");
|
||||
|
||||
Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
|
||||
CI->getArgOperand(2), CI->getArgOperand(3) };
|
||||
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
|
||||
Args);
|
||||
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
|
||||
: CI->getArgOperand(0);
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.") &&
|
||||
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
|
||||
// Rep will be updated by the call in the condition.
|
||||
|
|
|
@ -20631,23 +20631,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Src2, Src3, Src1),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case TERLOG_OP_MASK:
|
||||
case TERLOG_OP_MASKZ: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
|
||||
SDValue Mask = Op.getOperand(5);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue PassThru = Src1;
|
||||
// Set PassThru element.
|
||||
if (IntrData->Type == TERLOG_OP_MASKZ)
|
||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
|
||||
Src1, Src2, Src3, Src4),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case CVTPD2PS:
|
||||
// ISD::FP_ROUND has a second argument that indicates if the truncation
|
||||
// does not change the value. Set it to 0 since it can change.
|
||||
|
|
|
@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
|
|||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, GATHER_AVX2,
|
||||
ROUNDP, ROUNDS
|
||||
};
|
||||
|
@ -949,18 +949,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
|
||||
|
@ -1281,18 +1269,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
|
||||
X86ISD::VFIXUPIMMS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
|
||||
X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, ISD::FMA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, ISD::FMA, 0),
|
||||
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, ISD::FMA,
|
||||
|
@ -1456,6 +1432,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_q_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_q_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_pternlog_q_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
|
||||
X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
|
||||
|
|
|
@ -2354,5 +2354,129 @@ entry:
|
|||
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
|
||||
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
|
||||
|
||||
define <8 x i64> @test_mm512_ternarylogic_epi32(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
|
||||
%4 = bitcast <16 x i32> %3 to <8 x i64>
|
||||
ret <8 x i64> %4
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
|
||||
|
||||
define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_mask_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
|
||||
%4 = bitcast i16 %__U to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
|
||||
%6 = bitcast <16 x i32> %5 to <8 x i64>
|
||||
ret <8 x i64> %6
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_maskz_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
|
||||
%4 = bitcast i16 %__U to <16 x i1>
|
||||
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
|
||||
%6 = bitcast <16 x i32> %5 to <8 x i64>
|
||||
ret <8 x i64> %6
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_ternarylogic_epi64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
|
||||
ret <8 x i64> %0
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) #1
|
||||
|
||||
define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_mask_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
|
||||
; X32-LABEL: test_mm512_maskz_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
!0 = !{i32 1}
|
||||
|
||||
|
|
|
@ -4231,3 +4231,71 @@ define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x
|
|||
%res4 = add <16 x i32> %res3, %res2
|
||||
ret <16 x i32> %res4
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
|
|
@ -3206,71 +3206,75 @@ define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x
|
|||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
||||
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
|
||||
%2 = bitcast i16 %x4 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
|
||||
%res2 = add <16 x i32> %3, %4
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
|
||||
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
%res2 = add <16 x i32> %res, %res1
|
||||
%1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
|
||||
%2 = bitcast i16 %x4 to <16 x i1>
|
||||
%3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
|
||||
%4 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
|
||||
%res2 = add <16 x i32> %3, %4
|
||||
ret <16 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
|
||||
%res2 = add <8 x i64> %3, %4
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
|
||||
|
||||
define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i64> %res, %res1
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
|
||||
%4 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
|
||||
%res2 = add <8 x i64> %3, %4
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3535,6 +3535,264 @@ entry:
|
|||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_ternarylogic_epi32(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <4 x i32>
|
||||
%1 = bitcast <2 x i64> %__B to <4 x i32>
|
||||
%2 = bitcast <2 x i64> %__C to <4 x i32>
|
||||
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
|
||||
%4 = bitcast <4 x i32> %3 to <2 x i64>
|
||||
ret <2 x i64> %4
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32) #2
|
||||
|
||||
define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_mask_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <4 x i32>
|
||||
%1 = bitcast <2 x i64> %__B to <4 x i32>
|
||||
%2 = bitcast <2 x i64> %__C to <4 x i32>
|
||||
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
|
||||
%4 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> %0
|
||||
%6 = bitcast <4 x i32> %5 to <2 x i64>
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_maskz_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <4 x i32>
|
||||
%1 = bitcast <2 x i64> %__B to <4 x i32>
|
||||
%2 = bitcast <2 x i64> %__C to <4 x i32>
|
||||
%3 = tail call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, i32 4)
|
||||
%4 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = select <4 x i1> %extract, <4 x i32> %3, <4 x i32> zeroinitializer
|
||||
%6 = bitcast <4 x i32> %5 to <2 x i64>
|
||||
ret <2 x i64> %6
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_ternarylogic_epi32(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <8 x i32>
|
||||
%1 = bitcast <4 x i64> %__B to <8 x i32>
|
||||
%2 = bitcast <4 x i64> %__C to <8 x i32>
|
||||
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
|
||||
%4 = bitcast <8 x i32> %3 to <4 x i64>
|
||||
ret <4 x i64> %4
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32) #2
|
||||
|
||||
define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_mask_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <8 x i32>
|
||||
%1 = bitcast <4 x i64> %__B to <8 x i32>
|
||||
%2 = bitcast <4 x i64> %__C to <8 x i32>
|
||||
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
|
||||
%4 = bitcast i8 %__U to <8 x i1>
|
||||
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> %0
|
||||
%6 = bitcast <8 x i32> %5 to <4 x i64>
|
||||
ret <4 x i64> %6
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_maskz_ternarylogic_epi32:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_ternarylogic_epi32:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <8 x i32>
|
||||
%1 = bitcast <4 x i64> %__B to <8 x i32>
|
||||
%2 = bitcast <4 x i64> %__C to <8 x i32>
|
||||
%3 = tail call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, i32 4)
|
||||
%4 = bitcast i8 %__U to <8 x i1>
|
||||
%5 = select <8 x i1> %4, <8 x i32> %3, <8 x i32> zeroinitializer
|
||||
%6 = bitcast <8 x i32> %5 to <4 x i64>
|
||||
ret <4 x i64> %6
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_ternarylogic_epi64(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
|
||||
ret <2 x i64> %0
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32) #2
|
||||
|
||||
define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_mask_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> %__A
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
|
||||
; X32-LABEL: test_mm_maskz_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
|
||||
%2 = select <2 x i1> %extract, <2 x i64> %0, <2 x i64> zeroinitializer
|
||||
ret <2 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_ternarylogic_epi64(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
|
||||
ret <4 x i64> %0
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32) #2
|
||||
|
||||
define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_mask_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> %__A
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
|
||||
; X32-LABEL: test_mm256_maskz_ternarylogic_epi64:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: kmovw %eax, %k1
|
||||
; X32-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_ternarylogic_epi64:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C, i32 4)
|
||||
%1 = bitcast i8 %__U to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = select <4 x i1> %extract, <4 x i64> %0, <4 x i64> zeroinitializer
|
||||
ret <4 x i64> %2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
|
||||
declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>)
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
|
||||
|
|
|
@ -6817,3 +6817,139 @@ define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64
|
|||
%res4 = add <4 x i64> %res3, %res2
|
||||
ret <4 x i64> %res4
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
|
|
@ -2222,20 +2222,23 @@ define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x
|
|||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
|
||||
declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
%1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0
|
||||
%4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
|
||||
%res2 = add <4 x i32> %3, %4
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
|
@ -2244,32 +2247,37 @@ declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4
|
|||
define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
%1 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> zeroinitializer
|
||||
%4 = call <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33)
|
||||
%res2 = add <4 x i32> %3, %4
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
|
||||
declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
%1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0
|
||||
%4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
|
||||
%res2 = add <8 x i32> %3, %4
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
|
@ -2278,83 +2286,93 @@ declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8
|
|||
define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
%1 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
|
||||
%4 = call <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33)
|
||||
%res2 = add <8 x i32> %3, %4
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
|
||||
declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
%1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
||||
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0
|
||||
%4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
|
||||
%res2 = add <2 x i64> %3, %4
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
|
||||
|
||||
define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <2 x i64> %res, %res1
|
||||
%1 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
|
||||
%3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> zeroinitializer
|
||||
%4 = call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33)
|
||||
%res2 = add <2 x i64> %3, %4
|
||||
ret <2 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
|
||||
declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
|
||||
%res2 = add <4 x i64> %3, %4
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
|
||||
|
||||
define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
|
||||
; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
|
||||
%res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
|
||||
%res2 = add <4 x i64> %res, %res1
|
||||
%1 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
|
||||
%2 = bitcast i8 %x4 to <8 x i1>
|
||||
%extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> zeroinitializer
|
||||
%4 = call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33)
|
||||
%res2 = add <4 x i64> %3, %4
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
|
|
|
@ -1798,20 +1798,20 @@ define <16 x i32> @stack_fold_ternlogd(<16 x i32> %x0, <16 x i32> %x1, <16 x i32
|
|||
;CHECK-LABEL: stack_fold_ternlogd
|
||||
;CHECK: vpternlogd $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
|
||||
ret <16 x i32> %res
|
||||
%2 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
|
||||
ret <16 x i32> %2
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
|
||||
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
|
||||
|
||||
define <8 x i64> @stack_fold_ternlogq(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
|
||||
;CHECK-LABEL: stack_fold_ternlogq
|
||||
;CHECK: vpternlogq $33, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
|
||||
%res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
|
||||
ret <8 x i64> %res
|
||||
%2 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
|
||||
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
|
||||
|
||||
define <64 x i8> @stack_fold_punpckhbw_zmm(<64 x i8> %a0, <64 x i8> %a1) {
|
||||
;CHECK-LABEL: stack_fold_punpckhbw_zmm
|
||||
|
|
Loading…
Reference in New Issue