forked from OSchip/llvm-project
Fold fcmp in cases where value is provably non-negative. By Arch Robison.
This patch folds fcmp in some cases of interest in Julia. The patch adds a function CannotBeOrderedLessThanZero that returns true if a value is provably not less than zero. I.e. the function returns true if the value is provably -0, +0, positive, or a NaN. The patch extends InstructionSimplify.cpp to fold instances of fcmp where: - the predicate is olt or uge - the first operand is provably not less than zero - the second operand is zero The motivation for handling these cases optimizing away domain checks for sqrt in Julia for common idioms such as sqrt(x*x+y*y).. http://reviews.llvm.org/D6972 llvm-svn: 227298
This commit is contained in:
parent
24ea09c20c
commit
45f0448081
|
@ -116,6 +116,11 @@ namespace llvm {
|
|||
///
|
||||
bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
|
||||
|
||||
/// CannotBeOrderedLessThanZero - Return true if we can prove that the
|
||||
/// specified FP value is either a NaN or never less than 0.0.
|
||||
///
|
||||
bool CannotBeOrderedLessThanZero(const Value *V, unsigned Depth = 0);
|
||||
|
||||
/// isBytewiseValue - If the specified value can be set by repeating the same
|
||||
/// byte in memory, return the i8 value that it is represented with. This is
|
||||
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
|
||||
|
|
|
@ -3087,6 +3087,20 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (CFP->getValueAPF().isZero()) {
|
||||
switch (Pred) {
|
||||
case FCmpInst::FCMP_UGE:
|
||||
if (CannotBeOrderedLessThanZero(LHS))
|
||||
return ConstantInt::getTrue(CFP->getContext());
|
||||
break;
|
||||
case FCmpInst::FCMP_OLT:
|
||||
if (CannotBeOrderedLessThanZero(LHS))
|
||||
return ConstantInt::getFalse(CFP->getContext());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2044,6 +2044,59 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
|
||||
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
|
||||
return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
|
||||
|
||||
if (Depth == 6)
|
||||
return false; // Limit search depth.
|
||||
|
||||
const Operator *I = dyn_cast<Operator>(V);
|
||||
if (!I) return false;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
default: break;
|
||||
case Instruction::FMul:
|
||||
// x*x is always non-negative or a NaN.
|
||||
if (I->getOperand(0) == I->getOperand(1))
|
||||
return true;
|
||||
// Fall through
|
||||
case Instruction::FAdd:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::FRem:
|
||||
return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
|
||||
CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
|
||||
case Instruction::FPExt:
|
||||
case Instruction::FPTrunc:
|
||||
// Widening/narrowing never change sign.
|
||||
return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
|
||||
case Instruction::Call:
|
||||
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::exp:
|
||||
case Intrinsic::exp2:
|
||||
case Intrinsic::fabs:
|
||||
case Intrinsic::sqrt:
|
||||
return true;
|
||||
case Intrinsic::powi:
|
||||
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
|
||||
// powi(x,n) is non-negative if n is even.
|
||||
if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
|
||||
return true;
|
||||
}
|
||||
return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::fmuladd:
|
||||
// x*x+y is non-negative if y is non-negative.
|
||||
return I->getOperand(0) == I->getOperand(1) &&
|
||||
CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// If the specified value can be set by repeating the same byte in memory,
|
||||
/// return the i8 value that it is represented with. This is
|
||||
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
; These tests choose arbitrarily between float and double,
|
||||
; and between uge and olt, to give reasonble coverage
|
||||
; without combinatorial explosion.
|
||||
|
||||
declare float @llvm.fabs.f32(float)
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
declare double @llvm.powi.f64(double,i32)
|
||||
declare float @llvm.exp.f32(float)
|
||||
declare double @llvm.exp2.f64(double)
|
||||
declare float @llvm.fma.f32(float,float,float)
|
||||
|
||||
declare void @expect_equal(i1,i1)
|
||||
|
||||
; CHECK-LABEL: @orderedLessZeroTree(
|
||||
define i1 @orderedLessZeroTree(float,float,float,float) {
|
||||
%square = fmul float %0, %0
|
||||
%abs = call float @llvm.fabs.f32(float %1)
|
||||
%sqrt = call float @llvm.sqrt.f32(float %2)
|
||||
%fma = call float @llvm.fma.f32(float %3, float %3, float %sqrt)
|
||||
%div = fdiv float %square, %abs
|
||||
%rem = frem float %sqrt, %fma
|
||||
%add = fadd float %div, %rem
|
||||
%uge = fcmp uge float %add, 0.000000e+00
|
||||
; CHECK: ret i1 true
|
||||
ret i1 %uge
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @orderedLessZeroExpExt(
|
||||
define i1 @orderedLessZeroExpExt(float) {
|
||||
%a = call float @llvm.exp.f32(float %0)
|
||||
%b = fpext float %a to double
|
||||
%uge = fcmp uge double %b, 0.000000e+00
|
||||
; CHECK: ret i1 true
|
||||
ret i1 %uge
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @orderedLessZeroExp2Trunc(
|
||||
define i1 @orderedLessZeroExp2Trunc(double) {
|
||||
%a = call double @llvm.exp2.f64(double %0)
|
||||
%b = fptrunc double %a to float
|
||||
%olt = fcmp olt float %b, 0.000000e+00
|
||||
; CHECK: ret i1 false
|
||||
ret i1 %olt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @orderedLessZeroPowi(
|
||||
define i1 @orderedLessZeroPowi(double,double) {
|
||||
; Even constant exponent
|
||||
%a = call double @llvm.powi.f64(double %0, i32 2)
|
||||
%square = fmul double %1, %1
|
||||
; Odd constant exponent with provably non-negative base
|
||||
%b = call double @llvm.powi.f64(double %square, i32 3)
|
||||
%c = fadd double %a, %b
|
||||
%olt = fcmp olt double %b, 0.000000e+00
|
||||
; CHECK: ret i1 false
|
||||
ret i1 %olt
|
||||
}
|
||||
|
Loading…
Reference in New Issue