[InstCombine] move add after min/max intrinsic

This is another regression noted with the proposal to canonicalize
to the min/max intrinsics in D98152.

Here are Alive2 attempts to show correctness without specifying
exact constants:
https://alive2.llvm.org/ce/z/bvfCwh (smax)
https://alive2.llvm.org/ce/z/of7eqy (smin)
https://alive2.llvm.org/ce/z/2Xtxoh (umax)
https://alive2.llvm.org/ce/z/Rm4Ad8 (umin)
(if you comment out the assume and/or no-wrap, you should see failures)

The different output for the umin test is due to a fold added with
c4fc2cb5b2 :

// umin(x, 1) == zext(x != 0)

We probably want to adjust that, so it applies more generally
(umax --> sext or patterns where we can fold to select-of-constants).
Some folds that were ok when starting with cmp+select may increase
instruction count for the equivalent intrinsic, so we have to decide
if it's worth altering a min/max.

Differential Revision: https://reviews.llvm.org/D110038
This commit is contained in:
Sanjay Patel 2021-09-26 09:47:01 -04:00
parent 3538ee763d
commit 6063e6b499
2 changed files with 86 additions and 8 deletions

View File

@ -754,6 +754,45 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
/// can trigger other combines.
static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
InstCombiner::BuilderTy &Builder) {
Intrinsic::ID MinMaxID = II->getIntrinsicID();
assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
"Expected a min or max intrinsic");
// TODO: Match vectors with undef elements, but undef may not propagate.
Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
Value *X;
const APInt *C0, *C1;
if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
!match(Op1, m_APInt(C1)))
return nullptr;
// Check for necessary no-wrap and overflow constraints.
bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
auto *Add = cast<BinaryOperator>(Op0);
if ((IsSigned && !Add->hasNoSignedWrap()) ||
(!IsSigned && !Add->hasNoUnsignedWrap()))
return nullptr;
// If the constant difference overflows, then instsimplify should reduce the
// min/max to the add or C1.
bool Overflow;
APInt CDiff =
IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
assert(!Overflow && "Expected simplify of min/max");
// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
// Note: the "mismatched" no-overflow setting does not propagate.
Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
}
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
@ -1101,6 +1140,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *I = moveNotAfterMinMax(I1, I0))
return I;
if (Instruction *I = moveAddAfterMinMax(II, Builder))
return I;
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
// umax(X, -X) --> -abs(X)

View File

@ -1869,8 +1869,8 @@ define void @cmyk_commute11(i8 %r, i8 %g, i8 %b) {
define i8 @smax_offset(i8 %x) {
; CHECK-LABEL: @smax_offset(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 -124)
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 -127)
; CHECK-NEXT: [[M:%.*]] = add nsw i8 [[TMP1]], 3
; CHECK-NEXT: ret i8 [[M]]
;
%a = add nsw i8 %x, 3
@ -1878,6 +1878,8 @@ define i8 @smax_offset(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @smax_offset_limit(i8 %x) {
; CHECK-LABEL: @smax_offset_limit(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1888,6 +1890,8 @@ define i8 @smax_offset_limit(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @smax_offset_overflow(i8 %x) {
; CHECK-LABEL: @smax_offset_overflow(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1898,6 +1902,8 @@ define i8 @smax_offset_overflow(i8 %x) {
ret i8 %m
}
; negative test - require nsw
define i8 @smax_offset_may_wrap(i8 %x) {
; CHECK-LABEL: @smax_offset_may_wrap(
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 3
@ -1909,6 +1915,8 @@ define i8 @smax_offset_may_wrap(i8 %x) {
ret i8 %m
}
; negative test
define i8 @smax_offset_uses(i8 %x) {
; CHECK-LABEL: @smax_offset_uses(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1924,8 +1932,8 @@ define i8 @smax_offset_uses(i8 %x) {
define <3 x i8> @smin_offset(<3 x i8> %x) {
; CHECK-LABEL: @smin_offset(
; CHECK-NEXT: [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 124, i8 124, i8 124>
; CHECK-NEXT: [[M:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -3, i8 -3, i8 -3>)
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -127, i8 -127, i8 -127>)
; CHECK-NEXT: [[M:%.*]] = or <3 x i8> [[TMP1]], <i8 124, i8 124, i8 124>
; CHECK-NEXT: ret <3 x i8> [[M]]
;
%a = add nsw nuw <3 x i8> %x, <i8 124, i8 124, i8 124>
@ -1933,6 +1941,8 @@ define <3 x i8> @smin_offset(<3 x i8> %x) {
ret <3 x i8> %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @smin_offset_limit(i8 %x) {
; CHECK-LABEL: @smin_offset_limit(
; CHECK-NEXT: ret i8 -3
@ -1942,6 +1952,8 @@ define i8 @smin_offset_limit(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @smin_offset_overflow(i8 %x) {
; CHECK-LABEL: @smin_offset_overflow(
; CHECK-NEXT: ret i8 -3
@ -1951,6 +1963,8 @@ define i8 @smin_offset_overflow(i8 %x) {
ret i8 %m
}
; negative test - require nsw
define i8 @smin_offset_may_wrap(i8 %x) {
; CHECK-LABEL: @smin_offset_may_wrap(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 124
@ -1962,6 +1976,8 @@ define i8 @smin_offset_may_wrap(i8 %x) {
ret i8 %m
}
; negative test
define i8 @smin_offset_uses(i8 %x) {
; CHECK-LABEL: @smin_offset_uses(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], 124
@ -1975,10 +1991,12 @@ define i8 @smin_offset_uses(i8 %x) {
ret i8 %m
}
; Note: 'nsw' must not propagate here.
define <3 x i8> @umax_offset(<3 x i8> %x) {
; CHECK-LABEL: @umax_offset(
; CHECK-NEXT: [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 127, i8 127, i8 127>
; CHECK-NEXT: [[M:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -126, i8 -126, i8 -126>)
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 3, i8 3, i8 3>)
; CHECK-NEXT: [[M:%.*]] = add nuw <3 x i8> [[TMP1]], <i8 127, i8 127, i8 127>
; CHECK-NEXT: ret <3 x i8> [[M]]
;
%a = add nsw nuw <3 x i8> %x, <i8 127, i8 127, i8 127>
@ -1986,6 +2004,8 @@ define <3 x i8> @umax_offset(<3 x i8> %x) {
ret <3 x i8> %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @umax_offset_limit(i8 %x) {
; CHECK-LABEL: @umax_offset_limit(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -1996,6 +2016,8 @@ define i8 @umax_offset_limit(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @umax_offset_overflow(i8 %x) {
; CHECK-LABEL: @umax_offset_overflow(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -2006,6 +2028,8 @@ define i8 @umax_offset_overflow(i8 %x) {
ret i8 %m
}
; negative test - require nuw
define i8 @umax_offset_may_wrap(i8 %x) {
; CHECK-LABEL: @umax_offset_may_wrap(
; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], 3
@ -2017,6 +2041,8 @@ define i8 @umax_offset_may_wrap(i8 %x) {
ret i8 %m
}
; negative test
define i8 @umax_offset_uses(i8 %x) {
; CHECK-LABEL: @umax_offset_uses(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -2032,8 +2058,8 @@ define i8 @umax_offset_uses(i8 %x) {
define i8 @umin_offset(i8 %x) {
; CHECK-LABEL: @umin_offset(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], -5
; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[A]], i8 -4)
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[X:%.*]], 0
; CHECK-NEXT: [[M:%.*]] = select i1 [[DOTNOT]], i8 -5, i8 -4
; CHECK-NEXT: ret i8 [[M]]
;
%a = add nuw i8 %x, 251
@ -2041,6 +2067,8 @@ define i8 @umin_offset(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @umin_offset_limit(i8 %x) {
; CHECK-LABEL: @umin_offset_limit(
; CHECK-NEXT: ret i8 -4
@ -2050,6 +2078,8 @@ define i8 @umin_offset_limit(i8 %x) {
ret i8 %m
}
; This is handled by InstSimplify; testing here to confirm assert.
define i8 @umin_offset_overflow(i8 %x) {
; CHECK-LABEL: @umin_offset_overflow(
; CHECK-NEXT: ret i8 -4
@ -2059,6 +2089,8 @@ define i8 @umin_offset_overflow(i8 %x) {
ret i8 %m
}
; negative test - require nuw
define i8 @umin_offset_may_wrap(i8 %x) {
; CHECK-LABEL: @umin_offset_may_wrap(
; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], -5
@ -2070,6 +2102,8 @@ define i8 @umin_offset_may_wrap(i8 %x) {
ret i8 %m
}
; negative test
define i8 @umin_offset_uses(i8 %x) {
; CHECK-LABEL: @umin_offset_uses(
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X:%.*]], -5
@ -2083,6 +2117,8 @@ define i8 @umin_offset_uses(i8 %x) {
ret i8 %m
}
; TODO: This could transform, but undef element must not propagate to the new add.
define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) {
; CHECK-LABEL: @umax_vector_splat_undef(
; CHECK-NEXT: [[A:%.*]] = add nuw <3 x i8> [[X:%.*]], <i8 undef, i8 64, i8 64>