forked from OSchip/llvm-project
[X86] Simplify b2b KSHIFTL+KSHIFTR using demanded elts.
llvm-svn: 372155
This commit is contained in:
parent
f1ba94ade0
commit
f9a89b6788
|
@ -34627,29 +34627,82 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||||
// TODO convert SrcUndef to KnownUndef.
|
// TODO convert SrcUndef to KnownUndef.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86ISD::KSHIFTL:
|
case X86ISD::KSHIFTL: {
|
||||||
|
SDValue Src = Op.getOperand(0);
|
||||||
|
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
|
||||||
|
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
|
||||||
|
unsigned ShiftAmt = Amt->getZExtValue();
|
||||||
|
|
||||||
|
if (ShiftAmt == 0)
|
||||||
|
return TLO.CombineTo(Op, Src);
|
||||||
|
|
||||||
|
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
|
||||||
|
// single shift. We can do this if the bottom bits (which are shifted
|
||||||
|
// out) are never demanded.
|
||||||
|
if (Src.getOpcode() == X86ISD::KSHIFTR) {
|
||||||
|
if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) {
|
||||||
|
unsigned C1 = Src.getConstantOperandVal(1);
|
||||||
|
unsigned Opc = X86ISD::KSHIFTL;
|
||||||
|
int Diff = ShiftAmt - C1;
|
||||||
|
if (Diff < 0) {
|
||||||
|
Diff = -Diff;
|
||||||
|
Opc = X86ISD::KSHIFTR;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDLoc dl(Op);
|
||||||
|
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
|
||||||
|
return TLO.CombineTo(
|
||||||
|
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
APInt DemandedSrc = DemandedElts.lshr(ShiftAmt);
|
||||||
|
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
|
||||||
|
Depth + 1))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
KnownUndef = KnownUndef.shl(ShiftAmt);
|
||||||
|
KnownZero = KnownZero.shl(ShiftAmt);
|
||||||
|
KnownZero.setLowBits(ShiftAmt);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case X86ISD::KSHIFTR: {
|
case X86ISD::KSHIFTR: {
|
||||||
SDValue Src = Op.getOperand(0);
|
SDValue Src = Op.getOperand(0);
|
||||||
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
|
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
|
||||||
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
|
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
|
||||||
unsigned ShiftAmt = Amt->getZExtValue();
|
unsigned ShiftAmt = Amt->getZExtValue();
|
||||||
bool ShiftLeft = (X86ISD::KSHIFTL == Opc);
|
|
||||||
|
|
||||||
APInt DemandedSrc =
|
if (ShiftAmt == 0)
|
||||||
ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt);
|
return TLO.CombineTo(Op, Src);
|
||||||
|
|
||||||
|
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
|
||||||
|
// single shift. We can do this if the top bits (which are shifted
|
||||||
|
// out) are never demanded.
|
||||||
|
if (Src.getOpcode() == X86ISD::KSHIFTL) {
|
||||||
|
if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) {
|
||||||
|
unsigned C1 = Src.getConstantOperandVal(1);
|
||||||
|
unsigned Opc = X86ISD::KSHIFTR;
|
||||||
|
int Diff = ShiftAmt - C1;
|
||||||
|
if (Diff < 0) {
|
||||||
|
Diff = -Diff;
|
||||||
|
Opc = X86ISD::KSHIFTL;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDLoc dl(Op);
|
||||||
|
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
|
||||||
|
return TLO.CombineTo(
|
||||||
|
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
APInt DemandedSrc = DemandedElts.shl(ShiftAmt);
|
||||||
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
|
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
|
||||||
Depth + 1))
|
Depth + 1))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (ShiftLeft) {
|
KnownUndef = KnownUndef.lshr(ShiftAmt);
|
||||||
KnownUndef = KnownUndef.shl(ShiftAmt);
|
KnownZero = KnownZero.lshr(ShiftAmt);
|
||||||
KnownZero = KnownZero.shl(ShiftAmt);
|
KnownZero.setHighBits(ShiftAmt);
|
||||||
KnownZero.setLowBits(ShiftAmt);
|
|
||||||
} else {
|
|
||||||
KnownUndef = KnownUndef.lshr(ShiftAmt);
|
|
||||||
KnownZero = KnownZero.lshr(ShiftAmt);
|
|
||||||
KnownZero.setHighBits(ShiftAmt);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case X86ISD::CVTSI2P:
|
case X86ISD::CVTSI2P:
|
||||||
|
|
|
@ -1974,8 +1974,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $14, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k1, %k1
|
; KNL-NEXT: kxorw %k2, %k1, %k1
|
||||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
@ -2074,8 +2073,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $14, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
||||||
|
@ -2174,8 +2172,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $1, %k3, %k3
|
; KNL-NEXT: kshiftlw $1, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
||||||
|
@ -2274,8 +2271,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k0, %k0
|
; KNL-NEXT: kxorw %k4, %k0, %k0
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
||||||
|
@ -2396,8 +2392,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
|
||||||
|
@ -2496,8 +2491,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
|
; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
|
||||||
|
@ -2596,8 +2590,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k3, %k3
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k3, %k3
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
|
||||||
|
@ -2696,8 +2689,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
||||||
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
|
||||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
|
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
|
||||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
|
|
@ -1283,8 +1283,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||||
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $6, %k0, %k3
|
; KNL-NEXT: kshiftrw $6, %k0, %k3
|
||||||
; KNL-NEXT: kxorw %k1, %k3, %k1
|
; KNL-NEXT: kxorw %k1, %k3, %k1
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k1
|
; KNL-NEXT: kshiftlw $6, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k0
|
; KNL-NEXT: kshiftlw $9, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $9, %k0, %k0
|
; KNL-NEXT: kshiftrw $9, %k0, %k0
|
||||||
|
@ -1304,8 +1303,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||||
; SKX-NEXT: kshiftrw $9, %k1, %k1
|
; SKX-NEXT: kshiftrw $9, %k1, %k1
|
||||||
; SKX-NEXT: kshiftrb $6, %k0, %k3
|
; SKX-NEXT: kshiftrb $6, %k0, %k3
|
||||||
; SKX-NEXT: kxorb %k1, %k3, %k1
|
; SKX-NEXT: kxorb %k1, %k3, %k1
|
||||||
; SKX-NEXT: kshiftlb $7, %k1, %k1
|
; SKX-NEXT: kshiftlb $6, %k1, %k1
|
||||||
; SKX-NEXT: kshiftrb $1, %k1, %k1
|
|
||||||
; SKX-NEXT: kxorb %k1, %k0, %k0
|
; SKX-NEXT: kxorb %k1, %k0, %k0
|
||||||
; SKX-NEXT: kshiftlb $1, %k0, %k0
|
; SKX-NEXT: kshiftlb $1, %k0, %k0
|
||||||
; SKX-NEXT: kshiftrb $1, %k0, %k0
|
; SKX-NEXT: kshiftrb $1, %k0, %k0
|
||||||
|
@ -1322,8 +1320,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||||
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
|
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
|
||||||
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
|
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
|
||||||
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
|
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
|
||||||
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
|
||||||
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
|
|
||||||
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
|
||||||
; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
|
; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
|
||||||
; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
|
; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
|
||||||
|
@ -1342,8 +1339,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||||
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
|
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
|
||||||
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
|
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
|
||||||
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
|
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
|
||||||
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
|
; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1
|
||||||
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
|
|
||||||
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
|
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
|
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
|
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
|
||||||
|
@ -1363,8 +1359,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||||
; X86-NEXT: kshiftrw $9, %k1, %k1
|
; X86-NEXT: kshiftrw $9, %k1, %k1
|
||||||
; X86-NEXT: kshiftrb $6, %k0, %k3
|
; X86-NEXT: kshiftrb $6, %k0, %k3
|
||||||
; X86-NEXT: kxorb %k1, %k3, %k1
|
; X86-NEXT: kxorb %k1, %k3, %k1
|
||||||
; X86-NEXT: kshiftlb $7, %k1, %k1
|
; X86-NEXT: kshiftlb $6, %k1, %k1
|
||||||
; X86-NEXT: kshiftrb $1, %k1, %k1
|
|
||||||
; X86-NEXT: kxorb %k1, %k0, %k0
|
; X86-NEXT: kxorb %k1, %k0, %k0
|
||||||
; X86-NEXT: kshiftlb $1, %k0, %k0
|
; X86-NEXT: kshiftlb $1, %k0, %k0
|
||||||
; X86-NEXT: kshiftrb $1, %k0, %k0
|
; X86-NEXT: kshiftrb $1, %k0, %k0
|
||||||
|
@ -2842,8 +2837,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k3
|
; KNL-NEXT: kmovw %eax, %k3
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k2
|
; KNL-NEXT: kshiftlw $14, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
|
||||||
; KNL-NEXT: kxorw %k2, %k0, %k0
|
; KNL-NEXT: kxorw %k2, %k0, %k0
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
||||||
|
@ -2942,8 +2936,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k4
|
; KNL-NEXT: kmovw %eax, %k4
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $15, %k3, %k3
|
; KNL-NEXT: kshiftlw $14, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
|
||||||
; KNL-NEXT: kxorw %k3, %k2, %k2
|
; KNL-NEXT: kxorw %k3, %k2, %k2
|
||||||
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
||||||
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
; KNL-NEXT: kshiftrw $1, %k2, %k2
|
||||||
|
@ -3042,8 +3035,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k3, %k3
|
; KNL-NEXT: kxorw %k4, %k3, %k3
|
||||||
; KNL-NEXT: kshiftlw $1, %k3, %k3
|
; KNL-NEXT: kshiftlw $1, %k3, %k3
|
||||||
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
; KNL-NEXT: kshiftrw $1, %k3, %k3
|
||||||
|
@ -3142,8 +3134,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; KNL-NEXT: kmovw %eax, %k5
|
; KNL-NEXT: kmovw %eax, %k5
|
||||||
; KNL-NEXT: kxorw %k5, %k4, %k4
|
; KNL-NEXT: kxorw %k5, %k4, %k4
|
||||||
; KNL-NEXT: kshiftlw $15, %k4, %k4
|
; KNL-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; KNL-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; KNL-NEXT: kxorw %k4, %k1, %k1
|
; KNL-NEXT: kxorw %k4, %k1, %k1
|
||||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
@ -3264,8 +3255,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k3
|
; AVX512DQ-NEXT: kmovw %eax, %k3
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
|
|
||||||
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
|
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
|
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
|
||||||
|
@ -3364,8 +3354,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k4
|
; AVX512DQ-NEXT: kmovw %eax, %k4
|
||||||
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
|
; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
|
|
||||||
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
|
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
|
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
|
||||||
|
@ -3464,8 +3453,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k5
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3
|
; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
|
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
|
||||||
|
@ -3564,8 +3552,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
|
||||||
; AVX512DQ-NEXT: kmovw %eax, %k5
|
; AVX512DQ-NEXT: kmovw %eax, %k5
|
||||||
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
|
||||||
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
|
; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
|
|
||||||
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
|
||||||
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
|
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
|
||||||
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
||||||
|
|
|
@ -23526,8 +23526,7 @@ define i8 @mask_zero_lower(<4 x i32> %a) {
|
||||||
; NoVLX: # %bb.0:
|
; NoVLX: # %bb.0:
|
||||||
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||||
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||||
; NoVLX-NEXT: kshiftlw $12, %k0, %k0
|
; NoVLX-NEXT: kshiftlw $4, %k0, %k0
|
||||||
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
|
|
||||||
; NoVLX-NEXT: kmovw %k0, %eax
|
; NoVLX-NEXT: kmovw %k0, %eax
|
||||||
; NoVLX-NEXT: # kill: def $al killed $al killed $eax
|
; NoVLX-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; NoVLX-NEXT: vzeroupper
|
; NoVLX-NEXT: vzeroupper
|
||||||
|
|
|
@ -2252,8 +2252,7 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||||
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $2, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
|
|
|
@ -1988,8 +1988,7 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
|
||||||
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
; AVX512-NEXT: kxorw %k0, %k2, %k2
|
||||||
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
; AVX512-NEXT: kshiftrw $2, %k2, %k3
|
||||||
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
; AVX512-NEXT: kxorw %k1, %k3, %k1
|
||||||
; AVX512-NEXT: kshiftlw $15, %k1, %k1
|
; AVX512-NEXT: kshiftlw $2, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
|
||||||
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
; AVX512-NEXT: kxorw %k1, %k2, %k1
|
||||||
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
; AVX512-NEXT: kshiftlw $13, %k1, %k1
|
||||||
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
; AVX512-NEXT: kshiftrw $13, %k1, %k1
|
||||||
|
|
Loading…
Reference in New Issue