From 58ebc79a64a5ca8dacf504f55d904e030effa164 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 9 Nov 2021 21:31:07 +0100 Subject: [PATCH] [InstCombine] Strip offset when folding and/or of icmps When folding and/or of icmps, look through add of a constant and adjust the icmp range instead. Effectively, this decomposes X + C1 < C2 style range checks back into a normal range. This allows us to fold comparisons involving two range checks or one range check and some other condition. We had a fold for a really specific case of this (or of range check and eq, and only one one side!) while this handles it in fully generality. Differential Revision: https://reviews.llvm.org/D113510 --- .../InstCombine/InstCombineAndOrXor.cpp | 26 ++++++--- .../Transforms/InstCombine/and-or-icmps.ll | 55 ++++++------------- llvm/test/Transforms/InstCombine/or.ll | 16 ++---- .../InstCombine/signed-truncation-check.ll | 47 +++++----------- 4 files changed, 53 insertions(+), 91 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 52c681933258..26983cf15801 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1206,11 +1206,28 @@ static Value *foldAndOrOfICmpsUsingRanges( ICmpInst::Predicate Pred1, Value *V1, const APInt &C1, ICmpInst::Predicate Pred2, Value *V2, const APInt &C2, IRBuilderBase &Builder, bool IsAnd) { + // Look through add of a constant offset on V1, V2, or both operands. This + // allows us to interpret the V + C' < C'' range idiom into a proper range. + const APInt *Offset1 = nullptr, *Offset2 = nullptr; + if (V1 != V2) { + Value *X; + if (match(V1, m_Add(m_Value(X), m_APInt(Offset1)))) + V1 = X; + if (match(V2, m_Add(m_Value(X), m_APInt(Offset2)))) + V2 = X; + } + if (V1 != V2) return nullptr; ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1); + if (Offset1) + CR1 = CR1.subtract(*Offset1); + ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2); + if (Offset2) + CR2 = CR2.subtract(*Offset2); + Optional CR = IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2); if (!CR) @@ -2464,15 +2481,6 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (!LHSC || !RHSC) return nullptr; - // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) - // iff C2 + CA == C1. - if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) { - ConstantInt *AddC; - if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC)))) - if (RHSC->getValue() + AddC->getValue() == LHSC->getValue()) - return Builder.CreateICmpULE(LHS0, LHSC); - } - return foldAndOrOfICmpsUsingRanges(PredL, LHS0, LHSC->getValue(), PredR, RHS0, RHSC->getValue(), Builder, /* IsAnd */ false); diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index d88b4b7702b6..ccc9a660a7cc 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -1047,11 +1047,8 @@ define i1 @substitute_constant_or_ne_ule_use2_logical(i8 %x, i8 %y) { define i1 @or_ranges_overlap(i8 %x) { ; CHECK-LABEL: @or_ranges_overlap( ; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[X]], -10 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 11 -; CHECK-NEXT: [[C7:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[C7]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1066,11 +1063,8 @@ define i1 @or_ranges_overlap(i8 %x) { define i1 @or_ranges_adjacent(i8 %x) { ; CHECK-LABEL: @or_ranges_adjacent( ; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[X]], -11 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 10 -; CHECK-NEXT: [[C7:%.*]] = or i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[C7]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1117,11 +1111,9 @@ define i1 @or_ranges_single_elem_right(i8 %x) { define i1 @or_ranges_single_elem_left(i8 %x) { ; CHECK-LABEL: @or_ranges_single_elem_left( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[C4:%.*]] = icmp eq i8 [[X]], 4 -; CHECK-NEXT: [[C6:%.*]] = or i1 [[TMP2]], [[C4]] -; CHECK-NEXT: ret i1 [[C6]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 7 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1133,12 +1125,9 @@ define i1 @or_ranges_single_elem_left(i8 %x) { define i1 @and_ranges_overlap(i8 %x) { ; CHECK-LABEL: @and_ranges_overlap( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[X]], -7 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 14 -; CHECK-NEXT: [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[C7]] +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1152,12 +1141,8 @@ define i1 @and_ranges_overlap(i8 %x) { define i1 @and_ranges_overlap_single(i8 %x) { ; CHECK-LABEL: @and_ranges_overlap_single( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[X]], -10 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 11 -; CHECK-NEXT: [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[C7]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 10 +; CHECK-NEXT: ret i1 [[TMP1]] ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1171,12 +1156,7 @@ define i1 @and_ranges_overlap_single(i8 %x) { define i1 @and_ranges_no_overlap(i8 %x) { ; CHECK-LABEL: @and_ranges_no_overlap( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -5 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[X]], -11 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 10 -; CHECK-NEXT: [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]] -; CHECK-NEXT: ret i1 [[C7]] +; CHECK-NEXT: ret i1 false ; %c1 = icmp uge i8 %x, 5 %c2 = icmp ule i8 %x, 10 @@ -1190,12 +1170,9 @@ define i1 @and_ranges_no_overlap(i8 %x) { define i1 @and_ranges_signed_pred(i64 %x) { ; CHECK-LABEL: @and_ranges_signed_pred( -; CHECK-NEXT: [[T1:%.*]] = add i64 [[X:%.*]], 127 -; CHECK-NEXT: [[T2:%.*]] = icmp slt i64 [[T1]], 1024 -; CHECK-NEXT: [[T3:%.*]] = add i64 [[X]], 128 -; CHECK-NEXT: [[T4:%.*]] = icmp slt i64 [[T3]], 256 -; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], -9223372036854775681 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -9223372036854775553 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t1 = add i64 %x, 127 %t2 = icmp slt i64 %t1, 1024 diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 9ed333325819..ce64662d191b 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -537,11 +537,9 @@ define <2 x i1> @test37_undef(<2 x i32> %x) { define i1 @test38(i32 %x) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], 7 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 23 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD1]], 30 -; CHECK-NEXT: [[RET1:%.*]] = or i1 [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret i1 [[RET1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 31 +; CHECK-NEXT: ret i1 [[TMP2]] ; %add1 = add i32 %x, 7 %cmp1 = icmp eq i32 %x, 23 @@ -552,11 +550,9 @@ define i1 @test38(i32 %x) { define i1 @test38_logical(i32 %x) { ; CHECK-LABEL: @test38_logical( -; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], 7 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 23 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[ADD1]], 30 -; CHECK-NEXT: [[RET1:%.*]] = or i1 [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret i1 [[RET1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 31 +; CHECK-NEXT: ret i1 [[TMP2]] ; %add1 = add i32 %x, 7 %cmp1 = icmp eq i32 %x, 23 diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll index 6681d5021472..b4a94b2bedb5 100644 --- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll @@ -909,11 +909,8 @@ define i1 @negative_not_less_than_logical(i32 %arg) { define i1 @negative_not_power_of_two(i32 %arg) { ; CHECK-LABEL: @negative_not_power_of_two( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 -; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 255 -; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 -; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] -; CHECK-NEXT: ret i1 [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] ; %t1 = icmp sgt i32 %arg, -1 %t2 = add i32 %arg, 255 ; should be power of two @@ -924,11 +921,8 @@ define i1 @negative_not_power_of_two(i32 %arg) { define i1 @negative_not_power_of_two_logical(i32 %arg) { ; CHECK-LABEL: @negative_not_power_of_two_logical( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 -; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 255 -; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 -; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] -; CHECK-NEXT: ret i1 [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] ; %t1 = icmp sgt i32 %arg, -1 %t2 = add i32 %arg, 255 ; should be power of two @@ -939,11 +933,8 @@ define i1 @negative_not_power_of_two_logical(i32 %arg) { define i1 @negative_not_next_power_of_two(i32 %arg) { ; CHECK-LABEL: @negative_not_next_power_of_two( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 -; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 64 -; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 -; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] -; CHECK-NEXT: ret i1 [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG:%.*]], 192 +; CHECK-NEXT: ret i1 [[TMP1]] ; %t1 = icmp sgt i32 %arg, -1 %t2 = add i32 %arg, 64 ; should be 256 >> 1 @@ -954,11 +945,8 @@ define i1 @negative_not_next_power_of_two(i32 %arg) { define i1 @negative_not_next_power_of_two_logical(i32 %arg) { ; CHECK-LABEL: @negative_not_next_power_of_two_logical( -; CHECK-NEXT: [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1 -; CHECK-NEXT: [[T2:%.*]] = add i32 [[ARG]], 64 -; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 256 -; CHECK-NEXT: [[T4:%.*]] = and i1 [[T1]], [[T3]] -; CHECK-NEXT: ret i1 [[T4]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG:%.*]], 192 +; CHECK-NEXT: ret i1 [[TMP1]] ; %t1 = icmp sgt i32 %arg, -1 %t2 = add i32 %arg, 64 ; should be 256 >> 1 @@ -967,15 +955,11 @@ define i1 @negative_not_next_power_of_two_logical(i32 %arg) { ret i1 %t4 } -; I don't think this can be folded, at least not into single instruction. define i1 @two_signed_truncation_checks(i32 %arg) { ; CHECK-LABEL: @two_signed_truncation_checks( -; CHECK-NEXT: [[T1:%.*]] = add i32 [[ARG:%.*]], 512 -; CHECK-NEXT: [[T2:%.*]] = icmp ult i32 [[T1]], 1024 -; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 -; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 -; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ARG:%.*]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 256 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t1 = add i32 %arg, 512 %t2 = icmp ult i32 %t1, 1024 @@ -987,12 +971,9 @@ define i1 @two_signed_truncation_checks(i32 %arg) { define i1 @two_signed_truncation_checks_logical(i32 %arg) { ; CHECK-LABEL: @two_signed_truncation_checks_logical( -; CHECK-NEXT: [[T1:%.*]] = add i32 [[ARG:%.*]], 512 -; CHECK-NEXT: [[T2:%.*]] = icmp ult i32 [[T1]], 1024 -; CHECK-NEXT: [[T3:%.*]] = add i32 [[ARG]], 128 -; CHECK-NEXT: [[T4:%.*]] = icmp ult i32 [[T3]], 256 -; CHECK-NEXT: [[T5:%.*]] = and i1 [[T2]], [[T4]] -; CHECK-NEXT: ret i1 [[T5]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ARG:%.*]], 128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 256 +; CHECK-NEXT: ret i1 [[TMP2]] ; %t1 = add i32 %arg, 512 %t2 = icmp ult i32 %t1, 1024