[LICM] Hoist fp division from the loops and replace by a reciprocal

When allowed, we can hoist a division out of a loop in favor of a multiplication by the reciprocal. Fixes PR32157. Patch by vit9696! Differential Revision: https://reviews.llvm.org/D30819 llvm-svn: 299911
2017-04-11 02:22:54 +00:00 · 2017-04-11 02:22:54 +00:00 · b63ed91549
parent cef9e52736
commit b63ed91549
2 changed files with 57 additions and 0 deletions
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@ -431,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
        continue;
      }

+      // Attempt to remove floating point division out of the loop by converting
+      // it to a reciprocal multiplication.
+      if (I.getOpcode() == Instruction::FDiv &&
+          CurLoop->isLoopInvariant(I.getOperand(1)) &&
+          I.hasAllowReciprocal()) {
+        auto Divisor = I.getOperand(1);
+        auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
+        auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
+        ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+        ReciprocalDivisor->insertBefore(&I);
+
+        auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
+                                                  ReciprocalDivisor);
+        Product->setFastMathFlags(I.getFastMathFlags());
+        Product->insertAfter(&I);
+        I.replaceAllUsesWith(Product);
+        I.eraseFromParent();
+
+        hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+        Changed = true;
+        continue;
+      }
+
      // Try hoisting the instruction out to the preheader.  We can only do this
      // if all of the operands of the instruction are loop invariant and if it
      // is safe to hoist the instruction.
--- a/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll
+++ b/llvm/test/Transforms/LICM/hoist-fast-fdiv.ll
@ -0,0 +1,34 @@
+; RUN: opt -licm -S < %s | FileCheck %s
+
+; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
+define zeroext i1 @f(double %v) #0 {
+entry:
+; CHECK-LABEL: @f(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: fdiv fast double 1.000000e+00, %v
+  br label %loop
+
+loop:                                       ; preds = %entry, %loop
+  %v3 = phi i32 [ 0, %entry ], [ %v11, %loop ]
+  %v4 = phi i32 [ 0, %entry ], [ %v12, %loop ]
+  %v5 = uitofp i32 %v4 to double
+
+; CHECK-LABEL: loop:
+; CHECK: fmul fast double
+; CHECK-NOT: fdiv
+  %v6 = fdiv fast double %v5, %v
+  %v7 = fptoui double %v6 to i64
+  %v8 = and i64 %v7, 1
+  %v9 = xor i64 %v8, 1
+  %v10 = trunc i64 %v9 to i32
+  %v11 = add i32 %v10, %v3
+  %v12 = add nuw i32 %v4, 1
+  %v13 = icmp eq i32 %v12, -1
+  br i1 %v13, label %end, label %loop
+
+end:                                      ; preds = %loop
+  %v15 = phi i32 [ %v11, %loop ]
+  %v16 = icmp ne i32 %v15, 0
+  ret i1 %v16
+}
+