From 2cd23eb2438238b1297ff7b4368d673c449ff24f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 25 Jun 2021 10:24:10 -0700 Subject: [PATCH] [instcombine] Fold overflow check using umulo to comparison If we have a umul.with.overflow where the multiply result is not used and one of the operands is a constant, we can perform the overflow check cheaper with a comparison then by performing the multiply and extracting the overflow flag. (Noticed when looking at the conditions SCEV emits for overflow checks.) Differential Revision: https://reviews.llvm.org/D104665 --- .../InstCombine/InstructionCombining.cpp | 31 ++++++++++++++++--- llvm/test/Transforms/InstCombine/umulo.ll | 18 ++++------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 67aca4e345ad..2dee94392c21 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3083,13 +3083,36 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { return BinaryOperator::Create(BinOp, LHS, RHS); } - // If the normal result of the add is dead, and the RHS is a constant, - // we can transform this into a range comparison. - // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 - if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow) + assert(*EV.idx_begin() == 1 && + "unexpected extract index for overflow inst"); + + // If the normal result of the computation is dead, and the RHS is a + // constant, we can transform this into a range comparison for many cases. + // TODO: We can generalize these for non-constant rhs when the newly + // formed expressions are known to simplify. Constants are merely one + // such case. + // TODO: Handle vector splats. + switch (WO->getIntrinsicID()) { + default: + break; + case Intrinsic::uadd_with_overflow: + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 if (ConstantInt *CI = dyn_cast(WO->getRHS())) return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), ConstantExpr::getNot(CI)); + break; + case Intrinsic::umul_with_overflow: + // overflow for umul a, C --> a > UINT_MAX udiv C + // (unless C == 0, in which case no overflow ever occurs) + if (ConstantInt *CI = dyn_cast(WO->getRHS())) { + assert(!CI->isZero() && "handled by instruction simplify"); + auto UMax = APInt::getMaxValue(CI->getType()->getBitWidth()); + auto *Op = + ConstantExpr::getUDiv(ConstantInt::get(CI->getType(), UMax), CI); + return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), Op); + } + break; + }; } } if (LoadInst *L = dyn_cast(Agg)) diff --git a/llvm/test/Transforms/InstCombine/umulo.ll b/llvm/test/Transforms/InstCombine/umulo.ll index cce455e84288..3fb0b9443a90 100644 --- a/llvm/test/Transforms/InstCombine/umulo.ll +++ b/llvm/test/Transforms/InstCombine/umulo.ll @@ -35,8 +35,7 @@ define i1 @test_constant1(i8 %a) { define i1 @test_constant2(i8 %a) { ; CHECK-LABEL: @test_constant2( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 2) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], 0 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 2) @@ -46,8 +45,7 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 3) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 85 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 3) @@ -57,8 +55,7 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 4) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 63 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 4) @@ -69,8 +66,7 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 127) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 2 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 127) @@ -80,8 +76,7 @@ define i1 @test_constant127(i8 %a) { define i1 @test_constant128(i8 %a) { ; CHECK-LABEL: @test_constant128( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 -128) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 128) @@ -91,8 +86,7 @@ define i1 @test_constant128(i8 %a) { define i1 @test_constant255(i8 %a) { ; CHECK-LABEL: @test_constant255( -; CHECK-NEXT: [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 -1) -; CHECK-NEXT: [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 1 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 255)