forked from OSchip/llvm-project
[X86] Converge X86ISD::VPERMV3 and X86ISD::VPERMIV3 to a single opcode.
These do the same thing with the first and second sources swapped. They previously came from separate intrinsics that specified different masking behavior. But we can cover that with isel patterns and a single node. This is a step towards reducing the number of intrinsics needed. A bunch of tests change because we are now biased to choosing VPERMT over VPERMI when there is nothing to signal that commuting is beneficial. llvm-svn: 333383
This commit is contained in:
parent
6b545182fb
commit
dcfcfdb0d1
|
@ -4372,7 +4372,6 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||
case X86ISD::VPPERM:
|
||||
case X86ISD::VPERMV:
|
||||
case X86ISD::VPERMV3:
|
||||
case X86ISD::VPERMIV3:
|
||||
case X86ISD::VZEXT_MOVL:
|
||||
return true;
|
||||
}
|
||||
|
@ -4388,7 +4387,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
|
|||
case X86ISD::VPPERM:
|
||||
case X86ISD::VPERMV:
|
||||
case X86ISD::VPERMV3:
|
||||
case X86ISD::VPERMIV3:
|
||||
return true;
|
||||
// 'Faux' Target Shuffles.
|
||||
case ISD::AND:
|
||||
|
@ -5977,21 +5975,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
}
|
||||
return false;
|
||||
}
|
||||
case X86ISD::VPERMIV3: {
|
||||
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
|
||||
assert(N->getOperand(2).getValueType() == VT && "Unexpected value type");
|
||||
IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
|
||||
// Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
|
||||
Ops.push_back(N->getOperand(1));
|
||||
Ops.push_back(N->getOperand(2));
|
||||
SDValue MaskNode = N->getOperand(0);
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
if (auto *C = getTargetConstantFromNode(MaskNode)) {
|
||||
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
default: llvm_unreachable("unknown target shuffle node");
|
||||
}
|
||||
|
||||
|
@ -20540,9 +20523,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue PassThru = SDValue();
|
||||
|
||||
// set PassThru element
|
||||
SDValue PassThru;
|
||||
if (IntrData->Type == VPERM_3OP_MASKZ)
|
||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
else
|
||||
|
@ -20554,6 +20537,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
Src2, Src1, Src3),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case VPERMI_3OP_MASK:{
|
||||
// Src2 is the PassThru
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
// set PassThru element
|
||||
SDValue PassThru = DAG.getBitcast(VT, Src2);
|
||||
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
|
||||
dl, Op.getValueType(),
|
||||
Src1, Src2, Src3),
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case FMA_OP_MASK3:
|
||||
case FMA_OP_MASKZ:
|
||||
case FMA_OP_MASK: {
|
||||
|
@ -25873,7 +25872,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
|
||||
case X86ISD::VPERMV: return "X86ISD::VPERMV";
|
||||
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
|
||||
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
|
||||
case X86ISD::VPERMI: return "X86ISD::VPERMI";
|
||||
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
|
||||
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
|
||||
|
@ -38861,7 +38859,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::VPERMI:
|
||||
case X86ISD::VPERMV:
|
||||
case X86ISD::VPERMV3:
|
||||
case X86ISD::VPERMIV3:
|
||||
case X86ISD::VPERMIL2:
|
||||
case X86ISD::VPERMILPI:
|
||||
case X86ISD::VPERMILPV:
|
||||
|
|
|
@ -428,10 +428,6 @@ namespace llvm {
|
|||
// Res = VPERMV3 V0, MaskV, V1
|
||||
VPERMV3,
|
||||
|
||||
// 3-op Variable Permute overwriting the index (VPERMI2).
|
||||
// Res = VPERMIV3 V0, MaskV, V1
|
||||
VPERMIV3,
|
||||
|
||||
// Bitwise ternary logic.
|
||||
VPTERNLOG,
|
||||
// Fix Up Special Packed Float32/64 values.
|
||||
|
|
|
@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
|
||||
// operand differs from the output VT. This requires a bitconvert on
|
||||
// the preserved vector going into the vselect.
|
||||
// NOTE: The unmasked pattern is disabled.
|
||||
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
|
||||
X86VectorVTInfo InVT,
|
||||
dag Outs, dag NonTiedIns, string OpcodeStr,
|
||||
|
@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
|
|||
!con((ins InVT.RC:$src1), NonTiedIns),
|
||||
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
|
||||
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
|
||||
(vselect InVT.KRCWM:$mask, RHS,
|
||||
(bitconvert InVT.RC:$src1)),
|
||||
vselect, "", IsCommutable>;
|
||||
|
@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
|
|||
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched,
|
||||
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
|
||||
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
|
||||
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
|
||||
hasSideEffects = 0 in {
|
||||
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>,
|
||||
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
|
||||
EVEX_4V, AVX5128IBase, Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
|
||||
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
|
||||
EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
|
|||
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched,
|
||||
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
|
||||
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
|
||||
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
|
||||
hasSideEffects = 0, mayLoad = 1 in
|
||||
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
|
||||
!strconcat("$src2, ${src3}", _.BroadcastStr ),
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1,
|
||||
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
|
||||
(_.VT (X86VPermt2 _.RC:$src2,
|
||||
IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
|
||||
AVX5128IBase, EVEX_4V, EVEX_B,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
|
|||
X86VectorVTInfo IdxVT,
|
||||
X86VectorVTInfo CastVT> {
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
|
||||
(_.VT _.RC:$src2), _.RC:$src3),
|
||||
(X86VPermt2 (_.VT _.RC:$src2),
|
||||
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
|
||||
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
|
||||
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, _.RC:$src3)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
|
||||
_.RC:$src2, (_.LdFrag addr:$src3)),
|
||||
(X86VPermt2 _.RC:$src2,
|
||||
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
|
||||
(_.LdFrag addr:$src3)),
|
||||
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
|
||||
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
|
||||
_.RC:$src2,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
|
||||
(X86VPermt2 _.RC:$src2,
|
||||
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
|
||||
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
|
||||
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3)>;
|
||||
|
|
|
@ -417,13 +417,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
|
|||
SDTCisSameSizeAs<0,2>,
|
||||
SDTCisSameAs<0,3>]>, []>;
|
||||
|
||||
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
|
||||
SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
|
||||
SDTCisSameSizeAs<0,1>,
|
||||
SDTCisSameAs<0,2>,
|
||||
SDTCisSameAs<0,3>]>, []>;
|
||||
|
||||
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
|
||||
|
||||
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
|
||||
|
|
|
@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
|
|||
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
|
||||
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
|
||||
IFMA_OP,
|
||||
VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
|
@ -1061,42 +1061,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
|
||||
|
|
|
@ -1963,8 +1963,8 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
|
|||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
|
||||
; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2 = load <16 x i32>, <16 x i32>* %x2p
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
|
||||
|
@ -1979,8 +1979,8 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -1996,8 +1996,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -2013,8 +2013,8 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -1055,8 +1055,8 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
|
|||
; AVX512BW-NEXT: kmovd %edi, %k1
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm3, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
|
||||
|
@ -1064,8 +1064,8 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
|
|||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
|
||||
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
|
||||
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm3, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||
|
|
|
@ -1872,8 +1872,8 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xda]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -1889,8 +1889,8 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xda]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
|
|
@ -51,13 +51,13 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovq %rdi, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm3 {%k1}
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
|
||||
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm3
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z}
|
||||
; CHECK-NEXT: vpaddb %zmm1, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm3, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm3, %zmm4, %zmm0
|
||||
; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
|
||||
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
|
||||
|
|
|
@ -95,8 +95,8 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xda]
|
||||
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
|
||||
|
@ -117,13 +117,13 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xda]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x75,0xca]
|
||||
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
|
||||
; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
|
||||
; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
|
||||
; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
|
||||
; CHECK-NEXT: vpaddb %ymm3, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
|
||||
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
|
||||
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
|
||||
|
|
|
@ -602,8 +602,8 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9]
|
||||
; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xda]
|
||||
; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -619,8 +619,8 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0,
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9]
|
||||
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xda]
|
||||
; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -636,8 +636,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda]
|
||||
; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -665,8 +665,8 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xda]
|
||||
; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
|
||||
; CHECK-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
|
|
Loading…
Reference in New Issue