forked from OSchip/llvm-project
Optimize away fabs() calls when input is squared (known positive).
Eliminate library calls and intrinsic calls to fabs when the input is a squared value. Note that no unsafe-math / fast-math assumptions are needed for this optimization. Differential Revision: http://reviews.llvm.org/D5777 llvm-svn: 219717
This commit is contained in:
parent
cd11a2806b
commit
0ca42bb5a8
|
@ -92,6 +92,7 @@ private:
|
||||||
Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
|
||||||
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
||||||
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
||||||
|
Value *optimizeFabs(CallInst *CI, IRBuilder<> &B);
|
||||||
Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
|
||||||
|
|
||||||
// Integer Library Call Optimizations
|
// Integer Library Call Optimizations
|
||||||
|
|
|
@ -1230,6 +1230,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
|
||||||
return Ret;
|
return Ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
|
||||||
|
Function *Callee = CI->getCalledFunction();
|
||||||
|
|
||||||
|
Value *Ret = nullptr;
|
||||||
|
if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) {
|
||||||
|
Ret = optimizeUnaryDoubleFP(CI, B, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionType *FT = Callee->getFunctionType();
|
||||||
|
// Make sure this has 1 argument of FP type which matches the result type.
|
||||||
|
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
|
||||||
|
!FT->getParamType(0)->isFloatingPointTy())
|
||||||
|
return Ret;
|
||||||
|
|
||||||
|
Value *Op = CI->getArgOperand(0);
|
||||||
|
if (Instruction *I = dyn_cast<Instruction>(Op)) {
|
||||||
|
// Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
|
||||||
|
if (I->getOpcode() == Instruction::FMul)
|
||||||
|
if (I->getOperand(0) == I->getOperand(1))
|
||||||
|
return Op;
|
||||||
|
}
|
||||||
|
return Ret;
|
||||||
|
}
|
||||||
|
|
||||||
static bool isTrigLibCall(CallInst *CI);
|
static bool isTrigLibCall(CallInst *CI);
|
||||||
static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
|
static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
|
||||||
bool UseFloat, Value *&Sin, Value *&Cos,
|
bool UseFloat, Value *&Sin, Value *&Cos,
|
||||||
|
@ -1893,6 +1917,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||||
return optimizePow(CI, Builder);
|
return optimizePow(CI, Builder);
|
||||||
case Intrinsic::exp2:
|
case Intrinsic::exp2:
|
||||||
return optimizeExp2(CI, Builder);
|
return optimizeExp2(CI, Builder);
|
||||||
|
case Intrinsic::fabs:
|
||||||
|
return optimizeFabs(CI, Builder);
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -1965,6 +1991,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||||
case LibFunc::exp2:
|
case LibFunc::exp2:
|
||||||
case LibFunc::exp2f:
|
case LibFunc::exp2f:
|
||||||
return optimizeExp2(CI, Builder);
|
return optimizeExp2(CI, Builder);
|
||||||
|
case LibFunc::fabsf:
|
||||||
|
case LibFunc::fabs:
|
||||||
|
case LibFunc::fabsl:
|
||||||
|
return optimizeFabs(CI, Builder);
|
||||||
case LibFunc::ffs:
|
case LibFunc::ffs:
|
||||||
case LibFunc::ffsl:
|
case LibFunc::ffsl:
|
||||||
case LibFunc::ffsll:
|
case LibFunc::ffsll:
|
||||||
|
@ -1999,7 +2029,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
||||||
case LibFunc::fputc:
|
case LibFunc::fputc:
|
||||||
return optimizeErrorReporting(CI, Builder, 1);
|
return optimizeErrorReporting(CI, Builder, 1);
|
||||||
case LibFunc::ceil:
|
case LibFunc::ceil:
|
||||||
case LibFunc::fabs:
|
|
||||||
case LibFunc::floor:
|
case LibFunc::floor:
|
||||||
case LibFunc::rint:
|
case LibFunc::rint:
|
||||||
case LibFunc::round:
|
case LibFunc::round:
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||||
|
|
||||||
|
; Make sure all library calls are eliminated when the input is known positive.
|
||||||
|
|
||||||
|
declare float @fabsf(float)
|
||||||
|
declare double @fabs(double)
|
||||||
|
declare fp128 @fabsl(fp128)
|
||||||
|
|
||||||
|
define float @square_fabs_call_f32(float %x) {
|
||||||
|
%mul = fmul float %x, %x
|
||||||
|
%fabsf = tail call float @fabsf(float %mul)
|
||||||
|
ret float %fabsf
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_call_f32(
|
||||||
|
; CHECK-NEXT: %mul = fmul float %x, %x
|
||||||
|
; CHECK-NEXT: ret float %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @square_fabs_call_f64(double %x) {
|
||||||
|
%mul = fmul double %x, %x
|
||||||
|
%fabs = tail call double @fabs(double %mul)
|
||||||
|
ret double %fabs
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_call_f64(
|
||||||
|
; CHECK-NEXT: %mul = fmul double %x, %x
|
||||||
|
; CHECK-NEXT: ret double %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
define fp128 @square_fabs_call_f128(fp128 %x) {
|
||||||
|
%mul = fmul fp128 %x, %x
|
||||||
|
%fabsl = tail call fp128 @fabsl(fp128 %mul)
|
||||||
|
ret fp128 %fabsl
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_call_f128(
|
||||||
|
; CHECK-NEXT: %mul = fmul fp128 %x, %x
|
||||||
|
; CHECK-NEXT: ret fp128 %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
; Make sure all intrinsic calls are eliminated when the input is known positive.
|
||||||
|
|
||||||
|
declare float @llvm.fabs.f32(float)
|
||||||
|
declare double @llvm.fabs.f64(double)
|
||||||
|
declare fp128 @llvm.fabs.f128(fp128)
|
||||||
|
|
||||||
|
define float @square_fabs_intrinsic_f32(float %x) {
|
||||||
|
%mul = fmul float %x, %x
|
||||||
|
%fabsf = tail call float @llvm.fabs.f32(float %mul)
|
||||||
|
ret float %fabsf
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_intrinsic_f32(
|
||||||
|
; CHECK-NEXT: %mul = fmul float %x, %x
|
||||||
|
; CHECK-NEXT: ret float %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @square_fabs_intrinsic_f64(double %x) {
|
||||||
|
%mul = fmul double %x, %x
|
||||||
|
%fabs = tail call double @llvm.fabs.f64(double %mul)
|
||||||
|
ret double %fabs
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_intrinsic_f64(
|
||||||
|
; CHECK-NEXT: %mul = fmul double %x, %x
|
||||||
|
; CHECK-NEXT: ret double %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
|
||||||
|
%mul = fmul fp128 %x, %x
|
||||||
|
%fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul)
|
||||||
|
ret fp128 %fabsl
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_intrinsic_f128(
|
||||||
|
; CHECK-NEXT: %mul = fmul fp128 %x, %x
|
||||||
|
; CHECK-NEXT: ret fp128 %mul
|
||||||
|
}
|
||||||
|
|
||||||
|
; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
|
||||||
|
|
||||||
|
define float @square_fabs_shrink_call1(float %x) {
|
||||||
|
%ext = fpext float %x to double
|
||||||
|
%sq = fmul double %ext, %ext
|
||||||
|
%fabs = call double @fabs(double %sq)
|
||||||
|
%trunc = fptrunc double %fabs to float
|
||||||
|
ret float %trunc
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_shrink_call1(
|
||||||
|
; CHECK-NEXT: %trunc = fmul float %x, %x
|
||||||
|
; CHECK-NEXT: ret float %trunc
|
||||||
|
}
|
||||||
|
|
||||||
|
define float @square_fabs_shrink_call2(float %x) {
|
||||||
|
%sq = fmul float %x, %x
|
||||||
|
%ext = fpext float %sq to double
|
||||||
|
%fabs = call double @fabs(double %ext)
|
||||||
|
%trunc = fptrunc double %fabs to float
|
||||||
|
ret float %trunc
|
||||||
|
|
||||||
|
; CHECK-LABEL: square_fabs_shrink_call2(
|
||||||
|
; CHECK-NEXT: %sq = fmul float %x, %x
|
||||||
|
; CHECK-NEXT: ret float %sq
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue