[InstCombine] move add after min/max intrinsic

This is another regression noted with the proposal to canonicalize to the min/max intrinsics in D98152. Here are Alive2 attempts to show correctness without specifying exact constants: https://alive2.llvm.org/ce/z/bvfCwh (smax) https://alive2.llvm.org/ce/z/of7eqy (smin) https://alive2.llvm.org/ce/z/2Xtxoh (umax) https://alive2.llvm.org/ce/z/Rm4Ad8 (umin) (if you comment out the assume and/or no-wrap, you should see failures) The different output for the umin test is due to a fold added with c4fc2cb5b2 : // umin(x, 1) == zext(x != 0) We probably want to adjust that, so it applies more generally (umax --> sext or patterns where we can fold to select-of-constants). Some folds that were ok when starting with cmp+select may increase instruction count for the equivalent intrinsic, so we have to decide if it's worth altering a min/max. Differential Revision: https://reviews.llvm.org/D110038
2021-09-26 09:47:01 -04:00 · 2021-09-26 09:47:01 -04:00 · 6063e6b499
parent 3538ee763d
commit 6063e6b499
2 changed files with 86 additions and 8 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -754,6 +754,45 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
      ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
 }

+/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
+/// can trigger other combines.
+static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
+                                       InstCombiner::BuilderTy &Builder) {
+  Intrinsic::ID MinMaxID = II->getIntrinsicID();
+  assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
+          MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
+         "Expected a min or max intrinsic");
+
+  // TODO: Match vectors with undef elements, but undef may not propagate.
+  Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+  Value *X;
+  const APInt *C0, *C1;
+  if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
+      !match(Op1, m_APInt(C1)))
+    return nullptr;
+
+  // Check for necessary no-wrap and overflow constraints.
+  bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
+  auto *Add = cast<BinaryOperator>(Op0);
+  if ((IsSigned && !Add->hasNoSignedWrap()) ||
+      (!IsSigned && !Add->hasNoUnsignedWrap()))
+    return nullptr;
+
+  // If the constant difference overflows, then instsimplify should reduce the
+  // min/max to the add or C1.
+  bool Overflow;
+  APInt CDiff =
+      IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
+  assert(!Overflow && "Expected simplify of min/max");
+
+  // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
+  // Note: the "mismatched" no-overflow setting does not propagate.
+  Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
+  Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
+  return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
+                  : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
+}
+
 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
 /// can only be one of two possible constant values -- turn that into a select
 /// of constants.
@ -1101,6 +1140,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
    if (Instruction *I = moveNotAfterMinMax(I1, I0))
      return I;

+    if (Instruction *I = moveAddAfterMinMax(II, Builder))
+      return I;
+
    // smax(X, -X) --> abs(X)
    // smin(X, -X) --> -abs(X)
    // umax(X, -X) --> -abs(X)
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@ -1869,8 +1869,8 @@ define void @cmyk_commute11(i8 %r, i8 %g, i8 %b) {

 define i8 @smax_offset(i8 %x) {
 ; CHECK-LABEL: @smax_offset(
-; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 -124)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 -127)
+; CHECK-NEXT:    [[M:%.*]] = add nsw i8 [[TMP1]], 3
 ; CHECK-NEXT:    ret i8 [[M]]
 ;
  %a = add nsw i8 %x, 3
@ -1878,6 +1878,8 @@ define i8 @smax_offset(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @smax_offset_limit(i8 %x) {
 ; CHECK-LABEL: @smax_offset_limit(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1888,6 +1890,8 @@ define i8 @smax_offset_limit(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @smax_offset_overflow(i8 %x) {
 ; CHECK-LABEL: @smax_offset_overflow(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1898,6 +1902,8 @@ define i8 @smax_offset_overflow(i8 %x) {
  ret i8 %m
 }

+; negative test - require nsw
+
 define i8 @smax_offset_may_wrap(i8 %x) {
 ; CHECK-LABEL: @smax_offset_may_wrap(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 3
@ -1909,6 +1915,8 @@ define i8 @smax_offset_may_wrap(i8 %x) {
  ret i8 %m
 }

+; negative test
+
 define i8 @smax_offset_uses(i8 %x) {
 ; CHECK-LABEL: @smax_offset_uses(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 3
@ -1924,8 +1932,8 @@ define i8 @smax_offset_uses(i8 %x) {

 define <3 x i8> @smin_offset(<3 x i8> %x) {
 ; CHECK-LABEL: @smin_offset(
-; CHECK-NEXT:    [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 124, i8 124, i8 124>
-; CHECK-NEXT:    [[M:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -3, i8 -3, i8 -3>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -127, i8 -127, i8 -127>)
+; CHECK-NEXT:    [[M:%.*]] = or <3 x i8> [[TMP1]], <i8 124, i8 124, i8 124>
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
  %a = add nsw nuw <3 x i8> %x, <i8 124, i8 124, i8 124>
@ -1933,6 +1941,8 @@ define <3 x i8> @smin_offset(<3 x i8> %x) {
  ret <3 x i8> %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @smin_offset_limit(i8 %x) {
 ; CHECK-LABEL: @smin_offset_limit(
 ; CHECK-NEXT:    ret i8 -3
@ -1942,6 +1952,8 @@ define i8 @smin_offset_limit(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @smin_offset_overflow(i8 %x) {
 ; CHECK-LABEL: @smin_offset_overflow(
 ; CHECK-NEXT:    ret i8 -3
@ -1951,6 +1963,8 @@ define i8 @smin_offset_overflow(i8 %x) {
  ret i8 %m
 }

+; negative test - require nsw
+
 define i8 @smin_offset_may_wrap(i8 %x) {
 ; CHECK-LABEL: @smin_offset_may_wrap(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], 124
@ -1962,6 +1976,8 @@ define i8 @smin_offset_may_wrap(i8 %x) {
  ret i8 %m
 }

+; negative test
+
 define i8 @smin_offset_uses(i8 %x) {
 ; CHECK-LABEL: @smin_offset_uses(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], 124
@ -1975,10 +1991,12 @@ define i8 @smin_offset_uses(i8 %x) {
  ret i8 %m
 }

+; Note: 'nsw' must not propagate here.
+
 define <3 x i8> @umax_offset(<3 x i8> %x) {
 ; CHECK-LABEL: @umax_offset(
-; CHECK-NEXT:    [[A:%.*]] = add nuw nsw <3 x i8> [[X:%.*]], <i8 127, i8 127, i8 127>
-; CHECK-NEXT:    [[M:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[A]], <3 x i8> <i8 -126, i8 -126, i8 -126>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 3, i8 3, i8 3>)
+; CHECK-NEXT:    [[M:%.*]] = add nuw <3 x i8> [[TMP1]], <i8 127, i8 127, i8 127>
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
  %a = add nsw nuw <3 x i8> %x, <i8 127, i8 127, i8 127>
@ -1986,6 +2004,8 @@ define <3 x i8> @umax_offset(<3 x i8> %x) {
  ret <3 x i8> %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @umax_offset_limit(i8 %x) {
 ; CHECK-LABEL: @umax_offset_limit(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -1996,6 +2016,8 @@ define i8 @umax_offset_limit(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @umax_offset_overflow(i8 %x) {
 ; CHECK-LABEL: @umax_offset_overflow(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -2006,6 +2028,8 @@ define i8 @umax_offset_overflow(i8 %x) {
  ret i8 %m
 }

+; negative test - require nuw
+
 define i8 @umax_offset_may_wrap(i8 %x) {
 ; CHECK-LABEL: @umax_offset_may_wrap(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 3
@ -2017,6 +2041,8 @@ define i8 @umax_offset_may_wrap(i8 %x) {
  ret i8 %m
 }

+; negative test
+
 define i8 @umax_offset_uses(i8 %x) {
 ; CHECK-LABEL: @umax_offset_uses(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], 3
@ -2032,8 +2058,8 @@ define i8 @umax_offset_uses(i8 %x) {

 define i8 @umin_offset(i8 %x) {
 ; CHECK-LABEL: @umin_offset(
-; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], -5
-; CHECK-NEXT:    [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[A]], i8 -4)
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[DOTNOT]], i8 -5, i8 -4
 ; CHECK-NEXT:    ret i8 [[M]]
 ;
  %a = add nuw i8 %x, 251
@ -2041,6 +2067,8 @@ define i8 @umin_offset(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @umin_offset_limit(i8 %x) {
 ; CHECK-LABEL: @umin_offset_limit(
 ; CHECK-NEXT:    ret i8 -4
@ -2050,6 +2078,8 @@ define i8 @umin_offset_limit(i8 %x) {
  ret i8 %m
 }

+; This is handled by InstSimplify; testing here to confirm assert.
+
 define i8 @umin_offset_overflow(i8 %x) {
 ; CHECK-LABEL: @umin_offset_overflow(
 ; CHECK-NEXT:    ret i8 -4
@ -2059,6 +2089,8 @@ define i8 @umin_offset_overflow(i8 %x) {
  ret i8 %m
 }

+; negative test - require nuw
+
 define i8 @umin_offset_may_wrap(i8 %x) {
 ; CHECK-LABEL: @umin_offset_may_wrap(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw i8 [[X:%.*]], -5
@ -2070,6 +2102,8 @@ define i8 @umin_offset_may_wrap(i8 %x) {
  ret i8 %m
 }

+; negative test
+
 define i8 @umin_offset_uses(i8 %x) {
 ; CHECK-LABEL: @umin_offset_uses(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[X:%.*]], -5
@ -2083,6 +2117,8 @@ define i8 @umin_offset_uses(i8 %x) {
  ret i8 %m
 }

+; TODO: This could transform, but undef element must not propagate to the new add.
+
 define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) {
 ; CHECK-LABEL: @umax_vector_splat_undef(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw <3 x i8> [[X:%.*]], <i8 undef, i8 64, i8 64>