forked from OSchip/llvm-project
[X86] Improve shift combining
This folds (ashr (shl a, [56,48,32,24,16]), SarConst) into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or into (lshr, (sext (a), SarConst - [56,48,32,24,16])) depending on sign of (SarConst - [56,48,32,24,16]) sexts in X86 are MOVs. The MOVs have the same code size as above SHIFTs (only SHIFT by 1 has lower code size). However the MOVs have 2 advantages to SHIFTs on x86: 1. MOVs can write to a register that differs from source. 2. MOVs accept memory operands. This fixes PR24373. Patch by: evgeny.v.stupachenko@intel.com Differential Revision: http://reviews.llvm.org/D13161 llvm-svn: 255761
This commit is contained in:
parent
94f181a45f
commit
e75e6e2a23
|
@ -24950,6 +24950,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
EVT VT = N0.getValueType();
|
||||
unsigned Size = VT.getSizeInBits();
|
||||
|
||||
// fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
|
||||
// into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
|
||||
// into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
|
||||
// depending on sign of (SarConst - [56,48,32,24,16])
|
||||
|
||||
// sexts in X86 are MOVs. The MOVs have the same code size
|
||||
// as above SHIFTs (only SHIFT on 1 has lower code size).
|
||||
// However the MOVs have 2 advantages to a SHIFT:
|
||||
// 1. MOVs can write to a register that differs from source
|
||||
// 2. MOVs accept memory operands
|
||||
|
||||
if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
|
||||
N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
|
||||
N0.getOperand(1).getOpcode() != ISD::Constant)
|
||||
return SDValue();
|
||||
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
SDValue N01 = N0.getOperand(1);
|
||||
APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
|
||||
APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
|
||||
EVT CVT = N1.getValueType();
|
||||
|
||||
if (SarConst.isNegative())
|
||||
return SDValue();
|
||||
|
||||
for (MVT SVT : MVT::integer_valuetypes()) {
|
||||
unsigned ShiftSize = SVT.getSizeInBits();
|
||||
// skipping types without corresponding sext/zext and
|
||||
// ShlConst that is not one of [56,48,32,24,16]
|
||||
if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
|
||||
continue;
|
||||
SDLoc DL(N);
|
||||
SDValue NN =
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
|
||||
SarConst = SarConst - (Size - ShiftSize);
|
||||
if (SarConst == 0)
|
||||
return NN;
|
||||
else if (SarConst.isNegative())
|
||||
return DAG.getNode(ISD::SHL, DL, VT, NN,
|
||||
DAG.getConstant(-SarConst, DL, CVT));
|
||||
else
|
||||
return DAG.getNode(ISD::SRA, DL, VT, NN,
|
||||
DAG.getConstant(SarConst, DL, CVT));
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// \brief Returns a vector of 0s if the node in input is a vector logical
|
||||
/// shift by a constant amount which is known to be bigger than or equal
|
||||
/// to the vector element size in bits.
|
||||
|
@ -24989,6 +25042,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
|||
if (SDValue V = PerformSHLCombine(N, DAG))
|
||||
return V;
|
||||
|
||||
if (N->getOpcode() == ISD::SRA)
|
||||
if (SDValue V = PerformSRACombine(N, DAG))
|
||||
return V;
|
||||
|
||||
// Try to fold this logical shift into a zero vector.
|
||||
if (N->getOpcode() != ISD::SRA)
|
||||
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
|
||||
|
|
|
@ -4,15 +4,23 @@
|
|||
; a shr (X, -8) that gets subsequently "optimized away" as undef
|
||||
; PR4254
|
||||
|
||||
; after fixing PR24373
|
||||
; shlq $56, %rdi
|
||||
; sarq $48, %rdi
|
||||
; folds into
|
||||
; movsbq %dil, %rax
|
||||
; shlq $8, %rax
|
||||
; which is better for x86
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i64 @foo(i64 %b) nounwind readnone {
|
||||
entry:
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: shlq $56, %rdi
|
||||
; CHECK: sarq $48, %rdi
|
||||
; CHECK: leaq 1(%rdi), %rax
|
||||
; CHECK: movsbq %dil, %rax
|
||||
; CHECK: shlq $8, %rax
|
||||
; CHECK: orq $1, %rax
|
||||
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
|
||||
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
|
||||
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
|
||||
|
||||
define i32 @shl16sar15(i32 %a) #0 {
|
||||
; CHECK-LABEL: shl16sar15:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
%1 = shl i32 %a, 16
|
||||
%2 = ashr exact i32 %1, 15
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @shl16sar17(i32 %a) #0 {
|
||||
; CHECK-LABEL: shl16sar17:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
|
||||
%1 = shl i32 %a, 16
|
||||
%2 = ashr exact i32 %1, 17
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @shl24sar23(i32 %a) #0 {
|
||||
; CHECK-LABEL: shl24sar23:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
%1 = shl i32 %a, 24
|
||||
%2 = ashr exact i32 %1, 23
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @shl24sar25(i32 %a) #0 {
|
||||
; CHECK-LABEL: shl24sar25:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
|
||||
%1 = shl i32 %a, 24
|
||||
%2 = ashr exact i32 %1, 25
|
||||
ret i32 %2
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
define i32 @shl48sar47(i64 %a) #0 {
|
||||
; CHECK-LABEL: shl48sar47:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movswq %di, %rax
|
||||
%1 = shl i64 %a, 48
|
||||
%2 = ashr exact i64 %1, 47
|
||||
%3 = trunc i64 %2 to i32
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
define i32 @shl48sar49(i64 %a) #0 {
|
||||
; CHECK-LABEL: shl48sar49:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movswq %di, %rax
|
||||
%1 = shl i64 %a, 48
|
||||
%2 = ashr exact i64 %1, 49
|
||||
%3 = trunc i64 %2 to i32
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
define i32 @shl56sar55(i64 %a) #0 {
|
||||
; CHECK-LABEL: shl56sar55:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsbq %dil, %rax
|
||||
%1 = shl i64 %a, 56
|
||||
%2 = ashr exact i64 %1, 55
|
||||
%3 = trunc i64 %2 to i32
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
define i32 @shl56sar57(i64 %a) #0 {
|
||||
; CHECK-LABEL: shl56sar57:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: movsbq %dil, %rax
|
||||
%1 = shl i64 %a, 56
|
||||
%2 = ashr exact i64 %1, 57
|
||||
%3 = trunc i64 %2 to i32
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue