[InstCombine] make icmp-mul fold more efficient

There's probably a lot more like this (see also comments in D33338 about responsibility), but I suspect we don't usually get a visible manifestation. Given the recent interest in improving InstCombine efficiency, another potential micro-opt that could be repeated several times in this function: morph the existing icmp pred/operands instead of creating a new instruction. llvm-svn: 303860
2017-05-25 14:13:57 +00:00 · 2017-05-25 14:13:57 +00:00 · 5150612012
parent 32d0d38679
commit 5150612012
2 changed files with 8 additions and 6 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -3057,19 +3057,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
        break;

      const APInt *C;
-      if (match(BO0->getOperand(1), m_APInt(C))) {
+      if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) {
        // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
        // Mask = -1 >> count-trailing-zeros(C).
-        if (*C != 0 && *C != 1) {
-          // FIXME: If trailing zeros is 0, don't bother creating Mask.
+        if (unsigned TZs = C->countTrailingZeros()) {
          Constant *Mask = ConstantInt::get(
              BO0->getType(),
-              APInt::getLowBitsSet(C->getBitWidth(),
-                                   C->getBitWidth() - C->countTrailingZeros()));
+              APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
          Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
          Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
          return new ICmpInst(Pred, And1, And2);
        }
+        // If there are no trailing zeros in the multiplier, just eliminate
+        // the multiplies (no masking is needed):
+        // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
+        return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
      }
      break;
    }
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@ -2920,7 +2920,7 @@ define i1 @eq_mul_constants(i32 %x, i32 %y) {

 define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @eq_mul_constants_splat(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> %y, %x
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> %x, %y
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
  %A = mul <2 x i32> %x, <i32 5, i32 5>