[InstCombine] Canonicalize ssub.with.overflow with clamp to ssub.sat

Working on top of D69252, this adds canonicalisation patterns for ssub.with.overflow to ssub.sats. Differential Revision: https://reviews.llvm.org/D69753
2019-11-17 10:45:00 +00:00 · 2019-11-17 10:45:00 +00:00 · 08390c52a2
parent 03fce6b12e
commit 08390c52a2
2 changed files with 60 additions and 70 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -1736,6 +1736,7 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
 /// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y
 /// Along with a number of patterns similar to:
 /// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
 static Instruction *
 foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
  Value *CondVal = SI.getCondition();
@ -1750,7 +1751,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
  Value *X = II->getLHS();
  Value *Y = II->getRHS();

-  auto IsSignedSaturateLimit = [&](Value *Limit) {
+  auto IsSignedSaturateLimit = [&](Value *Limit, bool IsAdd) {
    Type *Ty = Limit->getType();

    ICmpInst::Predicate Pred;
@ -1773,20 +1774,43 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
    if (Op != X && Op != Y)
      return false;

-    // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
-    if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
-        IsMinMax(TrueVal, FalseVal))
-      return true;
-    // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
-    // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
-    if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
-        IsMinMax(FalseVal, TrueVal))
-      return true;
+    if (IsAdd) {
+      // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
+      if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
+          IsMinMax(TrueVal, FalseVal))
+        return true;
+      // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+      // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
+      if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
+          IsMinMax(FalseVal, TrueVal))
+        return true;
+    } else {
+      // X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+      // X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+      if (Op == X && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C + 1) &&
+          IsMinMax(TrueVal, FalseVal))
+        return true;
+      // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+      // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+      if (Op == X && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 2) &&
+          IsMinMax(FalseVal, TrueVal))
+        return true;
+      // X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+      // X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+      if (Op == Y && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
+          IsMinMax(FalseVal, TrueVal))
+        return true;
+      // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+      // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+      if (Op == Y && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
+          IsMinMax(TrueVal, FalseVal))
+        return true;
+    }

    return false;
  };
@ -1801,7 +1825,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
    // X - Y overflows ? 0 : X - Y -> usub_sat X, Y
    NewIntrinsicID = Intrinsic::usub_sat;
  else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow &&
-           IsSignedSaturateLimit(TrueVal))
+           IsSignedSaturateLimit(TrueVal, /*IsAdd=*/true))
    // X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
    // X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
    // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
@ -1811,6 +1835,17 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
    // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
    // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
    NewIntrinsicID = Intrinsic::sadd_sat;
+  else if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow &&
+           IsSignedSaturateLimit(TrueVal, /*IsAdd=*/false))
+    // X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+    // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
+    NewIntrinsicID = Intrinsic::ssub_sat;
  else
    return nullptr;

--- a/llvm/test/Transforms/InstCombine/overflow_to_sat.ll
+++ b/llvm/test/Transforms/InstCombine/overflow_to_sat.ll
@ -315,12 +315,7 @@ define i8 @ssub_x_lt_min(i8 %x, i8 %y) {

 define i8 @ssub_x_lt_max(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_x_lt_max(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -391,12 +386,7 @@ define i8 @ssub_x_lt2_min(i8 %x, i8 %y) {

 define i8 @ssub_x_lt2_max(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_x_lt2_max(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X]], -1
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -448,12 +438,7 @@ define i8 @ssub_x_gt_max(i8 %x, i8 %y) {

 define i8 @ssub_x_ge_min(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_x_ge_min(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X]], -1
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -486,12 +471,7 @@ define i8 @ssub_x_ge_max(i8 %x, i8 %y) {

 define i8 @ssub_x_gt2_min(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_x_gt2_min(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[X]], -2
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -525,12 +505,7 @@ define i8 @ssub_x_gt2_max(i8 %x, i8 %y) {

 define i8 @ssub_y_lt_min(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_y_lt_min(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[Y]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -563,12 +538,7 @@ define i8 @ssub_y_lt_max(i8 %x, i8 %y) {

 define i8 @ssub_y_le_min(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_y_le_min(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[Y]], 1
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -620,12 +590,7 @@ define i8 @ssub_y_gt_min(i8 %x, i8 %y) {

 define i8 @ssub_y_gt_max(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_y_gt_max(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[Y]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -658,12 +623,7 @@ define i8 @ssub_y_ge_min(i8 %x, i8 %y) {

 define i8 @ssub_y_ge_max(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ssub_y_ge_max(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i8 [[Y]], -1
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
  %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -692,12 +652,7 @@ define i32 @sadd_i32(i32 %x, i32 %y) {

 define i32 @ssub_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @ssub_i32(
-; CHECK-NEXT:    [[AO:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
-; CHECK-NEXT:    [[O:%.*]] = extractvalue { i32, i1 } [[AO]], 1
-; CHECK-NEXT:    [[A:%.*]] = extractvalue { i32, i1 } [[AO]], 0
-; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X]], 0
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 -2147483648, i32 2147483647
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[O]], i32 [[S]], i32 [[A]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
  %ao = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)