forked from OSchip/llvm-project
[InstCombine] Shift amount reassociation in bittest: trunc-of-shl (PR42399)
Summary: This is continuation of D63829 / https://bugs.llvm.org/show_bug.cgi?id=42399 I thought naive pattern would solve my issue, but nope, it involved truncation, thus more folds needed.. This isn't really the fold i'm interested in, i need trunc-of-lshr, but i'we decided to start with `shl` because it's simpler. In this case, no extra legality checks are needed: https://rise4fun.com/Alive/CAb We should be careful about not increasing instruction count, since we need to produce `zext` because `and` is done in wider type. Reviewers: spatel, nikic, xbolva00 Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66057 llvm-svn: 369117
This commit is contained in:
parent
429aa7c1e6
commit
16244fccfe
|
@ -1258,6 +1258,12 @@ inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
|
|||
return CastClass_match<OpTy, Instruction::Trunc>(Op);
|
||||
}
|
||||
|
||||
template <typename OpTy>
|
||||
inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
|
||||
m_TruncOrSelf(const OpTy &Op) {
|
||||
return m_CombineOr(m_Trunc(Op), Op);
|
||||
}
|
||||
|
||||
/// Matches SExt.
|
||||
template <typename OpTy>
|
||||
inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
|
||||
|
|
|
@ -3299,6 +3299,7 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
|
|||
// we should move shifts to the same hand of 'and', i.e. rewrite as
|
||||
// icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
|
||||
// We are only interested in opposite logical shifts here.
|
||||
// One of the shifts can be truncated. For now, it can only be 'shl'.
|
||||
// If we can, we want to end up creating 'lshr' shift.
|
||||
static Value *
|
||||
foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
|
||||
|
@ -3308,18 +3309,37 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
|
|||
return nullptr;
|
||||
|
||||
auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
|
||||
auto m_AnyLShr = m_LShr(m_Value(), m_Value());
|
||||
|
||||
// Look for an 'and' of two (opposite) logical shifts.
|
||||
// Pick the single-use shift as XShift.
|
||||
Instruction *XShift, *YShift;
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
|
||||
m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift)))))
|
||||
// Look for an 'and' of two logical shifts, one of which may be truncated.
|
||||
// We use m_TruncOrSelf() on the RHS to correctly handle commutative case.
|
||||
Instruction *XShift, *MaybeTruncation, *YShift;
|
||||
if (!match(
|
||||
I.getOperand(0),
|
||||
m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
|
||||
m_CombineAnd(m_TruncOrSelf(m_CombineAnd(
|
||||
m_AnyLogicalShift, m_Instruction(YShift))),
|
||||
m_Instruction(MaybeTruncation)))))
|
||||
return nullptr;
|
||||
|
||||
Instruction *UntruncatedShift = XShift;
|
||||
|
||||
// We potentially looked past 'trunc', but only when matching YShift,
|
||||
// therefore YShift must have the widest type.
|
||||
Type *WidestTy = YShift->getType();
|
||||
assert(XShift->getType() == I.getOperand(0)->getType() &&
|
||||
"We did not look past any shifts while matching XShift though.");
|
||||
bool HadTrunc = WidestTy != I.getOperand(0)->getType();
|
||||
|
||||
if (HadTrunc) {
|
||||
// We did indeed have a truncation. For now, let's only proceed if the 'shl'
|
||||
// was truncated, since that does not require any extra legality checks.
|
||||
// FIXME: trunc-of-lshr.
|
||||
if (!match(YShift, m_Shl(m_Value(), m_Value())))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If YShift is a 'lshr', swap the shifts around.
|
||||
if (match(YShift, m_AnyLShr))
|
||||
if (match(YShift, m_LShr(m_Value(), m_Value())))
|
||||
std::swap(XShift, YShift);
|
||||
|
||||
// The shifts must be in opposite directions.
|
||||
|
@ -3328,37 +3348,54 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
|
|||
return nullptr; // Do not care about same-direction shifts here.
|
||||
|
||||
Value *X, *XShAmt, *Y, *YShAmt;
|
||||
match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
|
||||
match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
|
||||
match(XShift, m_BinOp(m_Value(X), m_ZExtOrSelf(m_Value(XShAmt))));
|
||||
match(YShift, m_BinOp(m_Value(Y), m_ZExtOrSelf(m_Value(YShAmt))));
|
||||
|
||||
// If one of the values being shifted is a constant, then we will end with
|
||||
// and+icmp, and shift instr will be constant-folded. If they are not,
|
||||
// and+icmp, and [zext+]shift instrs will be constant-folded. If they are not,
|
||||
// however, we will need to ensure that we won't increase instruction count.
|
||||
if (!isa<Constant>(X) && !isa<Constant>(Y)) {
|
||||
// At least one of the hands of the 'and' should be one-use shift.
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
|
||||
return nullptr;
|
||||
if (HadTrunc) {
|
||||
// Due to the 'trunc', we will need to widen X. For that either the old
|
||||
// 'trunc' or the shift amt in the non-truncated shift should be one-use.
|
||||
if (!MaybeTruncation->hasOneUse() &&
|
||||
!UntruncatedShift->getOperand(1)->hasOneUse())
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now.
|
||||
if (XShAmt->getType() != YShAmt->getType())
|
||||
return nullptr;
|
||||
|
||||
// Can we fold (XShAmt+YShAmt) ?
|
||||
Value *NewShAmt = SimplifyAddInst(XShAmt, YShAmt, /*IsNSW=*/false,
|
||||
/*IsNUW=*/false, SQ.getWithInstruction(&I));
|
||||
auto *NewShAmt = dyn_cast_or_null<Constant>(
|
||||
SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
|
||||
/*isNUW=*/false, SQ.getWithInstruction(&I)));
|
||||
if (!NewShAmt)
|
||||
return nullptr;
|
||||
// Is the new shift amount smaller than the bit width?
|
||||
// FIXME: could also rely on ConstantRange.
|
||||
unsigned BitWidth = X->getType()->getScalarSizeInBits();
|
||||
if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
|
||||
APInt(BitWidth, BitWidth))))
|
||||
if (!match(NewShAmt, m_SpecificInt_ICMP(
|
||||
ICmpInst::Predicate::ICMP_ULT,
|
||||
APInt(NewShAmt->getType()->getScalarSizeInBits(),
|
||||
WidestTy->getScalarSizeInBits()))))
|
||||
return nullptr;
|
||||
// All good, we can do this fold. The shift is the same that was for X.
|
||||
// All good, we can do this fold.
|
||||
NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, WidestTy);
|
||||
X = Builder.CreateZExt(X, WidestTy);
|
||||
// The shift is the same that was for X.
|
||||
Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
|
||||
? Builder.CreateLShr(X, NewShAmt)
|
||||
: Builder.CreateShl(X, NewShAmt);
|
||||
Value *T1 = Builder.CreateAnd(T0, Y);
|
||||
return Builder.CreateICmp(I.getPredicate(), T1,
|
||||
Constant::getNullValue(X->getType()));
|
||||
Constant::getNullValue(WidestTy));
|
||||
}
|
||||
|
||||
/// Try to fold icmp (binop), X or icmp X, (binop).
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
; we should move shifts to the same hand of 'and', i.e. e.g. rewrite as
|
||||
; icmp eq/ne (and (((x shift Q) shift K), y)), 0
|
||||
; We are only interested in opposite logical shifts here.
|
||||
; We still can handle the case where there is a truncation between a shift
|
||||
; and an 'and', but for now only if it's 'shl' - simpler legality check.
|
||||
|
||||
;-------------------------------------------------------------------------------
|
||||
; Basic scalar tests
|
||||
|
@ -13,15 +15,11 @@
|
|||
|
||||
define i1 @t0_const_after_fold_lshr_shl_ne(i32 %x, i64 %y, i32 %len) {
|
||||
; CHECK-LABEL: @t0_const_after_fold_lshr_shl_ne(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -1
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
|
||||
; CHECK-NEXT: [[T3:%.*]] = shl i64 [[Y:%.*]], [[T2_WIDE]]
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0
|
||||
; CHECK-NEXT: ret i1 [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP4]]
|
||||
;
|
||||
%t0 = sub i32 32, %len
|
||||
%t1 = lshr i32 %x, %t0
|
||||
|
@ -40,15 +38,11 @@ define i1 @t0_const_after_fold_lshr_shl_ne(i32 %x, i64 %y, i32 %len) {
|
|||
|
||||
define <2 x i1> @t1_vec_splat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
|
||||
; CHECK-LABEL: @t1_vec_splat(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[LEN:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -1, i32 -1>
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
|
||||
; CHECK-NEXT: [[T3:%.*]] = shl <2 x i64> [[Y:%.*]], [[T2_WIDE]]
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
|
||||
; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[TMP4]]
|
||||
;
|
||||
%t0 = sub <2 x i32> <i32 32, i32 32>, %len
|
||||
%t1 = lshr <2 x i32> %x, %t0
|
||||
|
@ -63,15 +57,11 @@ define <2 x i1> @t1_vec_splat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
|
|||
|
||||
define <2 x i1> @t2_vec_nonsplat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
|
||||
; CHECK-LABEL: @t2_vec_nonsplat(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub <2 x i32> <i32 30, i32 32>, [[LEN:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 1, i32 -2>
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext <2 x i32> [[T2]] to <2 x i64>
|
||||
; CHECK-NEXT: [[T3:%.*]] = shl <2 x i64> [[Y:%.*]], [[T2_WIDE]]
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
|
||||
; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne <2 x i32> [[T4]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 31, i64 30>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], [[Y:%.*]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
|
||||
; CHECK-NEXT: ret <2 x i1> [[TMP4]]
|
||||
;
|
||||
%t0 = sub <2 x i32> <i32 30, i32 32>, %len
|
||||
%t1 = lshr <2 x i32> %x, %t0
|
||||
|
@ -214,17 +204,17 @@ define i1 @t6_oneuse3(i32 %x, i64 %y, i32 %len) {
|
|||
; CHECK-LABEL: @t6_oneuse3(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
|
||||
; CHECK-NEXT: call void @use32(i32 [[T0]])
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -1
|
||||
; CHECK-NEXT: call void @use32(i32 [[T2]])
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
|
||||
; CHECK-NEXT: call void @use64(i64 [[T2_WIDE]])
|
||||
; CHECK-NEXT: [[T3:%.*]] = shl i64 [[Y:%.*]], [[T2_WIDE]]
|
||||
; CHECK-NEXT: call void @use64(i64 [[T3]])
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0
|
||||
; CHECK-NEXT: ret i1 [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP4]]
|
||||
;
|
||||
%t0 = sub i32 32, %len
|
||||
call void @use32(i32 %t0)
|
||||
|
@ -244,9 +234,7 @@ define i1 @t6_oneuse3(i32 %x, i64 %y, i32 %len) {
|
|||
; Ok, shift amount of non-truncated shift has no extra uses;
|
||||
define i1 @t7_oneuse4(i32 %x, i64 %y, i32 %len) {
|
||||
; CHECK-LABEL: @t7_oneuse4(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], -1
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN:%.*]], -1
|
||||
; CHECK-NEXT: call void @use32(i32 [[T2]])
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
|
||||
; CHECK-NEXT: call void @use64(i64 [[T2_WIDE]])
|
||||
|
@ -254,9 +242,11 @@ define i1 @t7_oneuse4(i32 %x, i64 %y, i32 %len) {
|
|||
; CHECK-NEXT: call void @use64(i64 [[T3]])
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
; CHECK-NEXT: call void @use32(i32 [[T3_TRUNC]])
|
||||
; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0
|
||||
; CHECK-NEXT: ret i1 [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], [[Y]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP4]]
|
||||
;
|
||||
%t0 = sub i32 32, %len ; no extra uses
|
||||
%t1 = lshr i32 %x, %t0 ; no extra uses
|
||||
|
@ -288,9 +278,9 @@ define i1 @t8_oneuse5(i32 %x, i64 %y, i32 %len) {
|
|||
; CHECK-NEXT: call void @use64(i64 [[T3]])
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
; CHECK-NEXT: call void @use32(i32 [[T3_TRUNC]])
|
||||
; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0
|
||||
; CHECK-NEXT: ret i1 [[T5]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[Y]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], 0
|
||||
; CHECK-NEXT: ret i1 [[TMP2]]
|
||||
;
|
||||
%t0 = sub i32 32, %len
|
||||
call void @use32(i32 %t0)
|
||||
|
@ -324,9 +314,7 @@ define i1 @t9_oneuse5(i32 %x, i64 %y, i32 %len) {
|
|||
; CHECK-NEXT: call void @use64(i64 [[T3]])
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
; CHECK-NEXT: call void @use32(i32 [[T3_TRUNC]])
|
||||
; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[T3_TRUNC]]
|
||||
; CHECK-NEXT: [[T5:%.*]] = icmp ne i32 [[T4]], 0
|
||||
; CHECK-NEXT: ret i1 [[T5]]
|
||||
; CHECK-NEXT: ret i1 false
|
||||
;
|
||||
%t0 = sub i32 32, %len
|
||||
call void @use32(i32 %t0)
|
||||
|
@ -413,7 +401,7 @@ define i1 @n13_overshift(i32 %x, i64 %y, i32 %len) {
|
|||
; CHECK-LABEL: @n13_overshift(
|
||||
; CHECK-NEXT: [[T0:%.*]] = sub i32 32, [[LEN:%.*]]
|
||||
; CHECK-NEXT: [[T1:%.*]] = lshr i32 [[X:%.*]], [[T0]]
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], 1
|
||||
; CHECK-NEXT: [[T2:%.*]] = add i32 [[LEN]], 32
|
||||
; CHECK-NEXT: [[T2_WIDE:%.*]] = zext i32 [[T2]] to i64
|
||||
; CHECK-NEXT: [[T3:%.*]] = shl i64 [[Y:%.*]], [[T2_WIDE]]
|
||||
; CHECK-NEXT: [[T3_TRUNC:%.*]] = trunc i64 [[T3]] to i32
|
||||
|
@ -423,7 +411,7 @@ define i1 @n13_overshift(i32 %x, i64 %y, i32 %len) {
|
|||
;
|
||||
%t0 = sub i32 32, %len
|
||||
%t1 = lshr i32 %x, %t0
|
||||
%t2 = add i32 %len, 1 ; too much
|
||||
%t2 = add i32 %len, 32 ; too much
|
||||
%t2_wide = zext i32 %t2 to i64
|
||||
%t3 = shl i64 %y, %t2_wide
|
||||
%t3_trunc = trunc i64 %t3 to i32
|
||||
|
|
Loading…
Reference in New Issue