[InstCombine] Canonicalize ssub.with.overflow with clamp to ssub.sat

Working on top of D69252, this adds canonicalisation patterns for ssub.with.overflow to ssub.sats.

Differential Revision: https://reviews.llvm.org/D69753
This commit is contained in:
David Green 2019-11-17 10:45:00 +00:00
parent 03fce6b12e
commit 08390c52a2
2 changed files with 60 additions and 70 deletions

View File

@ -1736,6 +1736,7 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
/// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y
/// Along with a number of patterns similar to:
/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
static Instruction *
foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
Value *CondVal = SI.getCondition();
@ -1750,7 +1751,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
Value *X = II->getLHS();
Value *Y = II->getRHS();
auto IsSignedSaturateLimit = [&](Value *Limit) {
auto IsSignedSaturateLimit = [&](Value *Limit, bool IsAdd) {
Type *Ty = Limit->getType();
ICmpInst::Predicate Pred;
@ -1773,6 +1774,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
if (Op != X && Op != Y)
return false;
if (IsAdd) {
// X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
// X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
// X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
@ -1787,6 +1789,28 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
IsMinMax(FalseVal, TrueVal))
return true;
} else {
// X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
if (Op == X && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C + 1) &&
IsMinMax(TrueVal, FalseVal))
return true;
// X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
if (Op == X && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 2) &&
IsMinMax(FalseVal, TrueVal))
return true;
// X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
if (Op == Y && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&
IsMinMax(FalseVal, TrueVal))
return true;
// X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
if (Op == Y && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&
IsMinMax(TrueVal, FalseVal))
return true;
}
return false;
};
@ -1801,7 +1825,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
// X - Y overflows ? 0 : X - Y -> usub_sat X, Y
NewIntrinsicID = Intrinsic::usub_sat;
else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow &&
IsSignedSaturateLimit(TrueVal))
IsSignedSaturateLimit(TrueVal, /*IsAdd=*/true))
// X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
// X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y
// X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
@ -1811,6 +1835,17 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
// X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
// X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y
NewIntrinsicID = Intrinsic::sadd_sat;
else if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow &&
IsSignedSaturateLimit(TrueVal, /*IsAdd=*/false))
// X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
// X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y
NewIntrinsicID = Intrinsic::ssub_sat;
else
return nullptr;

View File

@ -315,12 +315,7 @@ define i8 @ssub_x_lt_min(i8 %x, i8 %y) {
define i8 @ssub_x_lt_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_x_lt_max(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -391,12 +386,7 @@ define i8 @ssub_x_lt2_min(i8 %x, i8 %y) {
define i8 @ssub_x_lt2_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_x_lt2_max(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], -1
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -448,12 +438,7 @@ define i8 @ssub_x_gt_max(i8 %x, i8 %y) {
define i8 @ssub_x_ge_min(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_x_ge_min(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -1
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -486,12 +471,7 @@ define i8 @ssub_x_ge_max(i8 %x, i8 %y) {
define i8 @ssub_x_gt2_min(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_x_gt2_min(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -2
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -525,12 +505,7 @@ define i8 @ssub_x_gt2_max(i8 %x, i8 %y) {
define i8 @ssub_y_lt_min(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_y_lt_min(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -563,12 +538,7 @@ define i8 @ssub_y_lt_max(i8 %x, i8 %y) {
define i8 @ssub_y_le_min(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_y_le_min(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 1
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -620,12 +590,7 @@ define i8 @ssub_y_gt_min(i8 %x, i8 %y) {
define i8 @ssub_y_gt_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_y_gt_max(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -658,12 +623,7 @@ define i8 @ssub_y_ge_min(i8 %x, i8 %y) {
define i8 @ssub_y_ge_max(i8 %x, i8 %y) {
; CHECK-LABEL: @ssub_y_ge_max(
; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], -1
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y)
@ -692,12 +652,7 @@ define i32 @sadd_i32(i32 %x, i32 %y) {
define i32 @ssub_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @ssub_i32(
; CHECK-NEXT: [[AO:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: [[O:%.*]] = extractvalue { i32, i1 } [[AO]], 1
; CHECK-NEXT: [[A:%.*]] = extractvalue { i32, i1 } [[AO]], 0
; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 -2147483648, i32 2147483647
; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i32 [[S]], i32 [[A]]
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%ao = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)