[X86] Simplify b2b KSHIFTL+KSHIFTR using demanded elts.

llvm-svn: 372155
This commit is contained in:
Craig Topper 2019-09-17 18:02:56 +00:00
parent f1ba94ade0
commit f9a89b6788
6 changed files with 90 additions and 61 deletions

View File

@ -34627,29 +34627,82 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// TODO convert SrcUndef to KnownUndef.
break;
}
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTL: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
unsigned ShiftAmt = Amt->getZExtValue();
if (ShiftAmt == 0)
return TLO.CombineTo(Op, Src);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
if (Src.getOpcode() == X86ISD::KSHIFTR) {
if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) {
unsigned C1 = Src.getConstantOperandVal(1);
unsigned Opc = X86ISD::KSHIFTL;
int Diff = ShiftAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = X86ISD::KSHIFTR;
}
SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
}
}
APInt DemandedSrc = DemandedElts.lshr(ShiftAmt);
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
KnownUndef = KnownUndef.shl(ShiftAmt);
KnownZero = KnownZero.shl(ShiftAmt);
KnownZero.setLowBits(ShiftAmt);
break;
}
case X86ISD::KSHIFTR: {
SDValue Src = Op.getOperand(0);
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
unsigned ShiftAmt = Amt->getZExtValue();
bool ShiftLeft = (X86ISD::KSHIFTL == Opc);
APInt DemandedSrc =
ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt);
if (ShiftAmt == 0)
return TLO.CombineTo(Op, Src);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted
// out) are never demanded.
if (Src.getOpcode() == X86ISD::KSHIFTL) {
if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) {
unsigned C1 = Src.getConstantOperandVal(1);
unsigned Opc = X86ISD::KSHIFTR;
int Diff = ShiftAmt - C1;
if (Diff < 0) {
Diff = -Diff;
Opc = X86ISD::KSHIFTL;
}
SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
}
}
APInt DemandedSrc = DemandedElts.shl(ShiftAmt);
if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
if (ShiftLeft) {
KnownUndef = KnownUndef.shl(ShiftAmt);
KnownZero = KnownZero.shl(ShiftAmt);
KnownZero.setLowBits(ShiftAmt);
} else {
KnownUndef = KnownUndef.lshr(ShiftAmt);
KnownZero = KnownZero.lshr(ShiftAmt);
KnownZero.setHighBits(ShiftAmt);
}
KnownUndef = KnownUndef.lshr(ShiftAmt);
KnownZero = KnownZero.lshr(ShiftAmt);
KnownZero.setHighBits(ShiftAmt);
break;
}
case X86ISD::CVTSI2P:

View File

@ -1974,8 +1974,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
@ -2074,8 +2073,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: kshiftlw $14, %k3, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
@ -2174,8 +2172,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $1, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
@ -2274,8 +2271,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
@ -2396,8 +2392,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQNOBW-NEXT: kxorw %k2, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
@ -2496,8 +2491,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftlw $14, %k3, %k3
; AVX512DQNOBW-NEXT: kxorw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQNOBW-NEXT: kshiftrw $1, %k2, %k2
@ -2596,8 +2590,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftlw $1, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $1, %k3, %k3
@ -2696,8 +2689,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kxorw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQNOBW-NEXT: kxorw %k4, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1

View File

@ -1283,8 +1283,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kxorw %k1, %k3, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kshiftlw $6, %k1, %k1
; KNL-NEXT: kxorw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $9, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k0
@ -1304,8 +1303,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; SKX-NEXT: kshiftrw $9, %k1, %k1
; SKX-NEXT: kshiftrb $6, %k0, %k3
; SKX-NEXT: kxorb %k1, %k3, %k1
; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kshiftrb $1, %k1, %k1
; SKX-NEXT: kshiftlb $6, %k1, %k1
; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kshiftlb $1, %k0, %k0
; SKX-NEXT: kshiftrb $1, %k0, %k0
@ -1322,8 +1320,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
; AVX512BW-NEXT: kxorw %k1, %k3, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kshiftlw $9, %k0, %k0
; AVX512BW-NEXT: kshiftrw $9, %k0, %k0
@ -1342,8 +1339,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
; AVX512DQ-NEXT: kshiftlb $6, %k1, %k1
; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
@ -1363,8 +1359,7 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
; X86-NEXT: kshiftrw $9, %k1, %k1
; X86-NEXT: kshiftrb $6, %k0, %k3
; X86-NEXT: kxorb %k1, %k3, %k1
; X86-NEXT: kshiftlb $7, %k1, %k1
; X86-NEXT: kshiftrb $1, %k1, %k1
; X86-NEXT: kshiftlb $6, %k1, %k1
; X86-NEXT: kxorb %k1, %k0, %k0
; X86-NEXT: kshiftlb $1, %k0, %k0
; X86-NEXT: kshiftrb $1, %k0, %k0
@ -2842,8 +2837,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: kxorw %k2, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
@ -2942,8 +2936,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
; KNL-NEXT: kshiftlw $14, %k3, %k3
; KNL-NEXT: kxorw %k3, %k2, %k2
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: kshiftrw $1, %k2, %k2
@ -3042,8 +3035,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k3, %k3
; KNL-NEXT: kshiftlw $1, %k3, %k3
; KNL-NEXT: kshiftrw $1, %k3, %k3
@ -3142,8 +3134,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kxorw %k5, %k4, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $1, %k4, %k4
; KNL-NEXT: kshiftlw $14, %k4, %k4
; KNL-NEXT: kxorw %k4, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
@ -3264,8 +3255,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQ-NEXT: kxorw %k2, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
@ -3364,8 +3354,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
; AVX512DQ-NEXT: kshiftlw $14, %k3, %k3
; AVX512DQ-NEXT: kxorw %k3, %k2, %k2
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2
; AVX512DQ-NEXT: kshiftrw $1, %k2, %k2
@ -3464,8 +3453,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k3, %k3
; AVX512DQ-NEXT: kshiftlw $1, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $1, %k3, %k3
@ -3564,8 +3552,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kxorw %k5, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $1, %k4, %k4
; AVX512DQ-NEXT: kshiftlw $14, %k4, %k4
; AVX512DQ-NEXT: kxorw %k4, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1

View File

@ -23526,8 +23526,7 @@ define i8 @mask_zero_lower(<4 x i32> %a) {
; NoVLX: # %bb.0:
; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kshiftlw $4, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def $al killed $al killed $eax
; NoVLX-NEXT: vzeroupper

View File

@ -2252,8 +2252,7 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kshiftlw $2, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1

View File

@ -1988,8 +1988,7 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; AVX512-NEXT: kxorw %k0, %k2, %k2
; AVX512-NEXT: kshiftrw $2, %k2, %k3
; AVX512-NEXT: kxorw %k1, %k3, %k1
; AVX512-NEXT: kshiftlw $15, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1
; AVX512-NEXT: kshiftlw $2, %k1, %k1
; AVX512-NEXT: kxorw %k1, %k2, %k1
; AVX512-NEXT: kshiftlw $13, %k1, %k1
; AVX512-NEXT: kshiftrw $13, %k1, %k1