forked from OSchip/llvm-project
[X86] Add VPSLLI/VPSRLI ((X >>u C1) << C2) SimplifyDemandedBits combine
Repeat of the generic SimplifyDemandedBits shift combine llvm-svn: 350399
This commit is contained in:
parent
414ff52d09
commit
9f4dea8c06
|
@ -32403,15 +32403,38 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
|
|||
break;
|
||||
}
|
||||
case X86ISD::VSHLI: {
|
||||
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
|
||||
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op1)) {
|
||||
if (ShiftImm->getAPIntValue().uge(BitWidth))
|
||||
break;
|
||||
|
||||
unsigned ShAmt = ShiftImm->getZExtValue();
|
||||
APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
|
||||
|
||||
if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask,
|
||||
OriginalDemandedElts, Known, TLO, Depth + 1))
|
||||
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
|
||||
// single shift. We can do this if the bottom bits (which are shifted
|
||||
// out) are never demanded.
|
||||
if (Op0.getOpcode() == X86ISD::VSRLI &&
|
||||
OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
|
||||
if (auto *Shift2Imm = dyn_cast<ConstantSDNode>(Op0.getOperand(1))) {
|
||||
if (Shift2Imm->getAPIntValue().ult(BitWidth)) {
|
||||
int Diff = ShAmt - Shift2Imm->getZExtValue();
|
||||
if (Diff == 0)
|
||||
return TLO.CombineTo(Op, Op0.getOperand(0));
|
||||
|
||||
unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
|
||||
SDValue NewShift = TLO.DAG.getNode(
|
||||
NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
|
||||
TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
|
||||
return TLO.CombineTo(Op, NewShift);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
|
||||
TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
|
||||
|
|
|
@ -468,11 +468,7 @@ entry:
|
|||
define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
|
||||
; SSE2-LABEL: trunc8i32_8i16_lshr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -767,18 +763,10 @@ entry:
|
|||
define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
|
||||
; SSE2-LABEL: trunc16i32_16i16_lshr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrld $16, %xmm2
|
||||
; SSE2-NEXT: psrld $16, %xmm3
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pslld $16, %xmm3
|
||||
; SSE2-NEXT: psrad $16, %xmm3
|
||||
; SSE2-NEXT: pslld $16, %xmm2
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
|
@ -787,18 +775,10 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
|
|||
;
|
||||
; SSSE3-LABEL: trunc16i32_16i16_lshr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: psrld $16, %xmm2
|
||||
; SSSE3-NEXT: psrld $16, %xmm3
|
||||
; SSSE3-NEXT: psrld $16, %xmm0
|
||||
; SSSE3-NEXT: psrld $16, %xmm1
|
||||
; SSSE3-NEXT: pslld $16, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: pslld $16, %xmm0
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pslld $16, %xmm3
|
||||
; SSSE3-NEXT: psrad $16, %xmm3
|
||||
; SSSE3-NEXT: pslld $16, %xmm2
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSSE3-NEXT: movdqu %xmm2, (%rax)
|
||||
|
|
|
@ -478,11 +478,7 @@ entry:
|
|||
define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
|
||||
; SSE2-LABEL: trunc8i32_8i16_lshr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
|
@ -777,18 +773,10 @@ entry:
|
|||
define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
|
||||
; SSE2-LABEL: trunc16i32_16i16_lshr:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: psrld $16, %xmm2
|
||||
; SSE2-NEXT: psrld $16, %xmm3
|
||||
; SSE2-NEXT: psrld $16, %xmm0
|
||||
; SSE2-NEXT: psrld $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm1
|
||||
; SSE2-NEXT: psrad $16, %xmm1
|
||||
; SSE2-NEXT: pslld $16, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE2-NEXT: pslld $16, %xmm3
|
||||
; SSE2-NEXT: psrad $16, %xmm3
|
||||
; SSE2-NEXT: pslld $16, %xmm2
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE2-NEXT: movdqu %xmm2, (%rax)
|
||||
|
@ -797,18 +785,10 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
|
|||
;
|
||||
; SSSE3-LABEL: trunc16i32_16i16_lshr:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: psrld $16, %xmm2
|
||||
; SSSE3-NEXT: psrld $16, %xmm3
|
||||
; SSSE3-NEXT: psrld $16, %xmm0
|
||||
; SSSE3-NEXT: psrld $16, %xmm1
|
||||
; SSSE3-NEXT: pslld $16, %xmm1
|
||||
; SSSE3-NEXT: psrad $16, %xmm1
|
||||
; SSSE3-NEXT: pslld $16, %xmm0
|
||||
; SSSE3-NEXT: psrad $16, %xmm0
|
||||
; SSSE3-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pslld $16, %xmm3
|
||||
; SSSE3-NEXT: psrad $16, %xmm3
|
||||
; SSSE3-NEXT: pslld $16, %xmm2
|
||||
; SSSE3-NEXT: psrad $16, %xmm2
|
||||
; SSSE3-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSSE3-NEXT: movdqu %xmm2, (%rax)
|
||||
|
|
Loading…
Reference in New Issue