forked from OSchip/llvm-project
[LICM] Hoist fp division from the loops and replace by a reciprocal
When allowed, we can hoist a division out of a loop in favor of a multiplication by the reciprocal. Fixes PR32157. Patch by vit9696! Differential Revision: https://reviews.llvm.org/D30819 llvm-svn: 299911
This commit is contained in:
parent
cef9e52736
commit
b63ed91549
|
@ -431,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
continue;
|
||||
}
|
||||
|
||||
// Attempt to remove floating point division out of the loop by converting
|
||||
// it to a reciprocal multiplication.
|
||||
if (I.getOpcode() == Instruction::FDiv &&
|
||||
CurLoop->isLoopInvariant(I.getOperand(1)) &&
|
||||
I.hasAllowReciprocal()) {
|
||||
auto Divisor = I.getOperand(1);
|
||||
auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
|
||||
auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
|
||||
ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
|
||||
ReciprocalDivisor->insertBefore(&I);
|
||||
|
||||
auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
|
||||
ReciprocalDivisor);
|
||||
Product->setFastMathFlags(I.getFastMathFlags());
|
||||
Product->insertAfter(&I);
|
||||
I.replaceAllUsesWith(Product);
|
||||
I.eraseFromParent();
|
||||
|
||||
hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try hoisting the instruction out to the preheader. We can only do this
|
||||
// if all of the operands of the instruction are loop invariant and if it
|
||||
// is safe to hoist the instruction.
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
; RUN: opt -licm -S < %s | FileCheck %s
|
||||
|
||||
; Function Attrs: noinline norecurse nounwind readnone ssp uwtable
|
||||
define zeroext i1 @f(double %v) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: @f(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: fdiv fast double 1.000000e+00, %v
|
||||
br label %loop
|
||||
|
||||
loop: ; preds = %entry, %loop
|
||||
%v3 = phi i32 [ 0, %entry ], [ %v11, %loop ]
|
||||
%v4 = phi i32 [ 0, %entry ], [ %v12, %loop ]
|
||||
%v5 = uitofp i32 %v4 to double
|
||||
|
||||
; CHECK-LABEL: loop:
|
||||
; CHECK: fmul fast double
|
||||
; CHECK-NOT: fdiv
|
||||
%v6 = fdiv fast double %v5, %v
|
||||
%v7 = fptoui double %v6 to i64
|
||||
%v8 = and i64 %v7, 1
|
||||
%v9 = xor i64 %v8, 1
|
||||
%v10 = trunc i64 %v9 to i32
|
||||
%v11 = add i32 %v10, %v3
|
||||
%v12 = add nuw i32 %v4, 1
|
||||
%v13 = icmp eq i32 %v12, -1
|
||||
br i1 %v13, label %end, label %loop
|
||||
|
||||
end: ; preds = %loop
|
||||
%v15 = phi i32 [ %v11, %loop ]
|
||||
%v16 = icmp ne i32 %v15, 0
|
||||
ret i1 %v16
|
||||
}
|
||||
|
Loading…
Reference in New Issue