[LICM] Hoist fp division from the loops and replace by a reciprocal

When allowed, we can hoist a division out of a loop in favor of a
multiplication by the reciprocal. Fixes PR32157.

Patch by vit9696!

Differential Revision: https://reviews.llvm.org/D30819

llvm-svn: 299911
This commit is contained in:
Hal Finkel 2017-04-11 02:22:54 +00:00
parent cef9e52736
commit b63ed91549
2 changed files with 57 additions and 0 deletions

View File

@ -431,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
// Attempt to remove floating point division out of the loop by converting
// it to a reciprocal multiplication.
if (I.getOpcode() == Instruction::FDiv &&
CurLoop->isLoopInvariant(I.getOperand(1)) &&
I.hasAllowReciprocal()) {
auto Divisor = I.getOperand(1);
auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
ReciprocalDivisor->insertBefore(&I);
auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
ReciprocalDivisor);
Product->setFastMathFlags(I.getFastMathFlags());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
I.eraseFromParent();
hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
Changed = true;
continue;
}
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.

View File

@ -0,0 +1,34 @@
; RUN: opt -licm -S < %s | FileCheck %s
; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
define zeroext i1 @f(double %v) #0 {
entry:
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: fdiv fast double 1.000000e+00, %v
br label %loop
loop: ; preds = %entry, %loop
%v3 = phi i32 [ 0, %entry ], [ %v11, %loop ]
%v4 = phi i32 [ 0, %entry ], [ %v12, %loop ]
%v5 = uitofp i32 %v4 to double
; CHECK-LABEL: loop:
; CHECK: fmul fast double
; CHECK-NOT: fdiv
%v6 = fdiv fast double %v5, %v
%v7 = fptoui double %v6 to i64
%v8 = and i64 %v7, 1
%v9 = xor i64 %v8, 1
%v10 = trunc i64 %v9 to i32
%v11 = add i32 %v10, %v3
%v12 = add nuw i32 %v4, 1
%v13 = icmp eq i32 %v12, -1
br i1 %v13, label %end, label %loop
end: ; preds = %loop
%v15 = phi i32 [ %v11, %loop ]
%v16 = icmp ne i32 %v15, 0
ret i1 %v16
}