From 58ebc79a64a5ca8dacf504f55d904e030effa164 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 9 Nov 2021 21:31:07 +0100
Subject: [PATCH] [InstCombine] Strip offset when folding and/or of icmps

When folding and/or of icmps, look through add of a constant and
adjust the icmp range instead. Effectively, this decomposes
X + C1 < C2 style range checks back into a normal range. This allows
us to fold comparisons involving two range checks or one range check
and some other condition. We had a fold for a really specific case
of this (or of range check and eq, and only one one side!) while
this handles it in fully generality.

Differential Revision: https://reviews.llvm.org/D113510
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 26 ++++++---
 .../Transforms/InstCombine/and-or-icmps.ll    | 55 ++++++-------------
 llvm/test/Transforms/InstCombine/or.ll        | 16 ++----
 .../InstCombine/signed-truncation-check.ll    | 47 +++++-----------
 4 files changed, 53 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 52c681933258..26983cf15801 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1206,11 +1206,28 @@ static Value *foldAndOrOfICmpsUsingRanges(
     ICmpInst::Predicate Pred1, Value *V1, const APInt &C1,
     ICmpInst::Predicate Pred2, Value *V2, const APInt &C2,
     IRBuilderBase &Builder, bool IsAnd) {
+  // Look through add of a constant offset on V1, V2, or both operands. This
+  // allows us to interpret the V + C' < C'' range idiom into a proper range.
+  const APInt *Offset1 = nullptr, *Offset2 = nullptr;
+  if (V1 != V2) {
+    Value *X;
+    if (match(V1, m_Add(m_Value(X), m_APInt(Offset1))))
+      V1 = X;
+    if (match(V2, m_Add(m_Value(X), m_APInt(Offset2))))
+      V2 = X;
+  }
+
   if (V1 != V2)
     return nullptr;
 
   ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1);
+  if (Offset1)
+    CR1 = CR1.subtract(*Offset1);
+
   ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2);
+  if (Offset2)
+    CR2 = CR2.subtract(*Offset2);
+
   Optional<ConstantRange> CR =
       IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2);
   if (!CR)
@@ -2464,15 +2481,6 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   if (!LHSC || !RHSC)
     return nullptr;
 
-  // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
-  //   iff C2 + CA == C1.
-  if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
-    ConstantInt *AddC;
-    if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
-      if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
-        return Builder.CreateICmpULE(LHS0, LHSC);
-  }
-
   return foldAndOrOfICmpsUsingRanges(PredL, LHS0, LHSC->getValue(),
                                      PredR, RHS0, RHSC->getValue(),
                                      Builder, /* IsAnd */ false);
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index d88b4b7702b6..ccc9a660a7cc 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -1047,11 +1047,8 @@ define i1 @substitute_constant_or_ne_ule_use2_logical(i8 %x, i8 %y) {
 define i1 @or_ranges_overlap(i8 %x) {
 ; CHECK-LABEL: @or_ranges_overlap(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[X]], -10
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 11
-; CHECK-NEXT:    [[C7:%.*]] = or i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret i1 [[C7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1066,11 +1063,8 @@ define i1 @or_ranges_overlap(i8 %x) {
 define i1 @or_ranges_adjacent(i8 %x) {
 ; CHECK-LABEL: @or_ranges_adjacent(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[X]], -11
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 10
-; CHECK-NEXT:    [[C7:%.*]] = or i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret i1 [[C7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 16
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1117,11 +1111,9 @@ define i1 @or_ranges_single_elem_right(i8 %x) {
 
 define i1 @or_ranges_single_elem_left(i8 %x) {
 ; CHECK-LABEL: @or_ranges_single_elem_left(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[C4:%.*]] = icmp eq i8 [[X]], 4
-; CHECK-NEXT:    [[C6:%.*]] = or i1 [[TMP2]], [[C4]]
-; CHECK-NEXT:    ret i1 [[C6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 7
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1133,12 +1125,9 @@ define i1 @or_ranges_single_elem_left(i8 %x) {
 
 define i1 @and_ranges_overlap(i8 %x) {
 ; CHECK-LABEL: @and_ranges_overlap(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[X]], -7
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 14
-; CHECK-NEXT:    [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret i1 [[C7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1152,12 +1141,8 @@ define i1 @and_ranges_overlap(i8 %x) {
 
 define i1 @and_ranges_overlap_single(i8 %x) {
 ; CHECK-LABEL: @and_ranges_overlap_single(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[X]], -10
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 11
-; CHECK-NEXT:    [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret i1 [[C7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X:%.*]], 10
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1171,12 +1156,7 @@ define i1 @and_ranges_overlap_single(i8 %x) {
 
 define i1 @and_ranges_no_overlap(i8 %x) {
 ; CHECK-LABEL: @and_ranges_no_overlap(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[X]], -11
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i8 [[TMP3]], 10
-; CHECK-NEXT:    [[C7:%.*]] = and i1 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    ret i1 [[C7]]
+; CHECK-NEXT:    ret i1 false
 ;
   %c1 = icmp uge i8 %x, 5
   %c2 = icmp ule i8 %x, 10
@@ -1190,12 +1170,9 @@ define i1 @and_ranges_no_overlap(i8 %x) {
 
 define i1 @and_ranges_signed_pred(i64 %x) {
 ; CHECK-LABEL: @and_ranges_signed_pred(
-; CHECK-NEXT:    [[T1:%.*]] = add i64 [[X:%.*]], 127
-; CHECK-NEXT:    [[T2:%.*]] = icmp slt i64 [[T1]], 1024
-; CHECK-NEXT:    [[T3:%.*]] = add i64 [[X]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp slt i64 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[X:%.*]], -9223372036854775681
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -9223372036854775553
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %t1 = add i64 %x, 127
   %t2 = icmp slt i64 %t1, 1024
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 9ed333325819..ce64662d191b 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -537,11 +537,9 @@ define <2 x i1> @test37_undef(<2 x i32> %x) {
 
 define i1 @test38(i32 %x) {
 ; CHECK-LABEL: @test38(
-; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X:%.*]], 7
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[X]], 23
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[ADD1]], 30
-; CHECK-NEXT:    [[RET1:%.*]] = or i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    ret i1 [[RET1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 31
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %add1 = add i32 %x, 7
   %cmp1 = icmp eq i32 %x, 23
@@ -552,11 +550,9 @@ define i1 @test38(i32 %x) {
 
 define i1 @test38_logical(i32 %x) {
 ; CHECK-LABEL: @test38_logical(
-; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X:%.*]], 7
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[X]], 23
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[ADD1]], 30
-; CHECK-NEXT:    [[RET1:%.*]] = or i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    ret i1 [[RET1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 31
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %add1 = add i32 %x, 7
   %cmp1 = icmp eq i32 %x, 23
diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
index 6681d5021472..b4a94b2bedb5 100644
--- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
@@ -909,11 +909,8 @@ define i1 @negative_not_less_than_logical(i32 %arg) {
 
 define i1 @negative_not_power_of_two(i32 %arg) {
 ; CHECK-LABEL: @negative_not_power_of_two(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 255
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 255 ; should be power of two
@@ -924,11 +921,8 @@ define i1 @negative_not_power_of_two(i32 %arg) {
 
 define i1 @negative_not_power_of_two_logical(i32 %arg) {
 ; CHECK-LABEL: @negative_not_power_of_two_logical(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 255
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG:%.*]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 255 ; should be power of two
@@ -939,11 +933,8 @@ define i1 @negative_not_power_of_two_logical(i32 %arg) {
 
 define i1 @negative_not_next_power_of_two(i32 %arg) {
 ; CHECK-LABEL: @negative_not_next_power_of_two(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 64
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG:%.*]], 192
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 64 ; should be 256 >> 1
@@ -954,11 +945,8 @@ define i1 @negative_not_next_power_of_two(i32 %arg) {
 
 define i1 @negative_not_next_power_of_two_logical(i32 %arg) {
 ; CHECK-LABEL: @negative_not_next_power_of_two_logical(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 64
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG:%.*]], 192
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 64 ; should be 256 >> 1
@@ -967,15 +955,11 @@ define i1 @negative_not_next_power_of_two_logical(i32 %arg) {
   ret i1 %t4
 }
 
-; I don't think this can be folded, at least not into single instruction.
 define i1 @two_signed_truncation_checks(i32 %arg) {
 ; CHECK-LABEL: @two_signed_truncation_checks(
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], 512
-; CHECK-NEXT:    [[T2:%.*]] = icmp ult i32 [[T1]], 1024
-; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 256
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %t1 = add i32 %arg, 512
   %t2 = icmp ult i32 %t1, 1024
@@ -987,12 +971,9 @@ define i1 @two_signed_truncation_checks(i32 %arg) {
 
 define i1 @two_signed_truncation_checks_logical(i32 %arg) {
 ; CHECK-LABEL: @two_signed_truncation_checks_logical(
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], 512
-; CHECK-NEXT:    [[T2:%.*]] = icmp ult i32 [[T1]], 1024
-; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 256
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %t1 = add i32 %arg, 512
   %t2 = icmp ult i32 %t1, 1024