forked from OSchip/llvm-project
[SimplifyLibCalls] refactor pow(x, n) expansion where n is a constant integer value
Since the backend's codegen is capable to expand powi into fmul's, it is not needed anymore to do so in the ::optimizePow() function of SimplifyLibCalls.cpp. What is sufficient is to always turn pow(x, n) into powi(x, n) for the cases where n is a constant integer value. Dropping the current expansion code allowed relaxation of the folding conditions and now this can also happen at optimization levels below Ofast. The added CodeGen/AArch64/powi.ll test case ensures that powi is actually expanded into fmul's, confirming that this refactor did not cause any performance degradation. Following an idea proposed by David Sherwood <david.sherwood@arm.com>. Differential Revision: https://reviews.llvm.org/D128591
This commit is contained in:
parent
976de7130b
commit
b17754bcaa
|
@ -1638,31 +1638,6 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) {
|
||||
// Multiplications calculated using Addition Chains.
|
||||
// Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
|
||||
|
||||
assert(Exp != 0 && "Incorrect exponent 0 not handled");
|
||||
|
||||
if (InnerChain[Exp])
|
||||
return InnerChain[Exp];
|
||||
|
||||
static const unsigned AddChain[33][2] = {
|
||||
{0, 0}, // Unused.
|
||||
{0, 0}, // Unused (base case = pow1).
|
||||
{1, 1}, // Unused (pre-computed).
|
||||
{1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
|
||||
{1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
|
||||
{3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
|
||||
{6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
|
||||
{3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
|
||||
};
|
||||
|
||||
InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
|
||||
getPow(InnerChain, AddChain[Exp][1], B));
|
||||
return InnerChain[Exp];
|
||||
}
|
||||
|
||||
// Return a properly extended integer (DstWidth bits wide) if the operation is
|
||||
// an itofp.
|
||||
static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
|
||||
|
@ -1963,70 +1938,52 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
|
|||
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
|
||||
return Sqrt;
|
||||
|
||||
// pow(x, n) -> x * x * x * ...
|
||||
// pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
|
||||
const APFloat *ExpoF;
|
||||
if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
|
||||
!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
|
||||
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
|
||||
// If the exponent is an integer+0.5 we generate a call to sqrt and an
|
||||
// additional fmul.
|
||||
// TODO: This whole transformation should be backend specific (e.g. some
|
||||
// backends might prefer libcalls or the limit for the exponent might
|
||||
// be different) and it should also consider optimizing for size.
|
||||
APFloat LimF(ExpoF->getSemantics(), 33),
|
||||
ExpoA(abs(*ExpoF));
|
||||
if (ExpoA < LimF) {
|
||||
// This transformation applies to integer or integer+0.5 exponents only.
|
||||
// For integer+0.5, we create a sqrt(Base) call.
|
||||
Value *Sqrt = nullptr;
|
||||
if (!ExpoA.isInteger()) {
|
||||
APFloat Expo2 = ExpoA;
|
||||
// To check if ExpoA is an integer + 0.5, we add it to itself. If there
|
||||
// is no floating point exception and the result is an integer, then
|
||||
// ExpoA == integer + 0.5
|
||||
if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
|
||||
return nullptr;
|
||||
if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) &&
|
||||
!ExpoF->isExactlyValue(-0.5)) {
|
||||
APFloat ExpoA(abs(*ExpoF));
|
||||
APFloat ExpoI(*ExpoF);
|
||||
Value *Sqrt = nullptr;
|
||||
if (AllowApprox && !ExpoA.isInteger()) {
|
||||
APFloat Expo2 = ExpoA;
|
||||
// To check if ExpoA is an integer + 0.5, we add it to itself. If there
|
||||
// is no floating point exception and the result is an integer, then
|
||||
// ExpoA == integer + 0.5
|
||||
if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
|
||||
return nullptr;
|
||||
|
||||
if (!Expo2.isInteger())
|
||||
return nullptr;
|
||||
if (!Expo2.isInteger())
|
||||
return nullptr;
|
||||
|
||||
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
|
||||
Pow->doesNotAccessMemory(), M, B, TLI);
|
||||
if (!Sqrt)
|
||||
return nullptr;
|
||||
}
|
||||
if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
|
||||
APFloat::opInexact)
|
||||
return nullptr;
|
||||
if (!ExpoI.isInteger())
|
||||
return nullptr;
|
||||
ExpoF = &ExpoI;
|
||||
|
||||
// We will memoize intermediate products of the Addition Chain.
|
||||
Value *InnerChain[33] = {nullptr};
|
||||
InnerChain[1] = Base;
|
||||
InnerChain[2] = B.CreateFMul(Base, Base, "square");
|
||||
|
||||
// We cannot readily convert a non-double type (like float) to a double.
|
||||
// So we first convert it to something which could be converted to double.
|
||||
ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
|
||||
Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
|
||||
|
||||
// Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
|
||||
if (Sqrt)
|
||||
FMul = B.CreateFMul(FMul, Sqrt);
|
||||
|
||||
// If the exponent is negative, then get the reciprocal.
|
||||
if (ExpoF->isNegative())
|
||||
FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
|
||||
|
||||
return FMul;
|
||||
Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
|
||||
Pow->doesNotAccessMemory(), M, B, TLI);
|
||||
if (!Sqrt)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// pow(x, n) -> powi(x, n) if n is a constant signed integer value
|
||||
APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
|
||||
// powf(x, n) -> powi(x, n) if n is a constant signed integer value
|
||||
if (ExpoF->isInteger() &&
|
||||
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
|
||||
APFloat::opOK) {
|
||||
return copyFlags(
|
||||
Value *PowI = copyFlags(
|
||||
*Pow,
|
||||
createPowWithIntegerExponent(
|
||||
Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
|
||||
M, B));
|
||||
|
||||
if (PowI && Sqrt)
|
||||
return B.CreateFMul(PowI, Sqrt);
|
||||
|
||||
return PowI;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
||||
|
||||
declare double @llvm.powi.f64.i32(double, i32)
|
||||
declare float @llvm.powi.f32.i32(float, i32)
|
||||
declare float @pow(double noundef, double noundef)
|
||||
|
||||
define float @powi_f32(float %x) nounwind {
|
||||
; CHECK-LABEL: powi_f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmul s0, s0, s0
|
||||
; CHECK-NEXT: fmul s0, s0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call float @llvm.powi.f32.i32(float %x, i32 4)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
define double @powi_f64(double %x) nounwind {
|
||||
; CHECK-LABEL: powi_f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmul d1, d0, d0
|
||||
; CHECK-NEXT: fmul d0, d0, d1
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call double @llvm.powi.f64.i32(double %x, i32 3)
|
||||
ret double %1
|
||||
}
|
|
@ -13,10 +13,13 @@ declare double @pow(double, double)
|
|||
|
||||
; pow(x, 3.0)
|
||||
define double @test_simplify_3(double %x) {
|
||||
; CHECK-LABEL: @test_simplify_3(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[X]]
|
||||
; CHECK-NEXT: ret double [[TMP1]]
|
||||
; CHECKI32-LABEL: @test_simplify_3(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[X:%.*]], i32 3)
|
||||
; CHECKI32-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_3(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[X:%.*]], i16 3)
|
||||
; CHECKI16-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
|
||||
ret double %1
|
||||
|
@ -24,10 +27,13 @@ define double @test_simplify_3(double %x) {
|
|||
|
||||
; powf(x, 4.0)
|
||||
define float @test_simplify_4f(float %x) {
|
||||
; CHECK-LABEL: @test_simplify_4f(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: ret float [[TMP1]]
|
||||
; CHECKI32-LABEL: @test_simplify_4f(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 4)
|
||||
; CHECKI32-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_4f(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 4)
|
||||
; CHECKI16-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
|
||||
ret float %1
|
||||
|
@ -35,10 +41,13 @@ define float @test_simplify_4f(float %x) {
|
|||
|
||||
; pow(x, 4.0)
|
||||
define double @test_simplify_4(double %x) {
|
||||
; CHECK-LABEL: @test_simplify_4(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: ret double [[TMP1]]
|
||||
; CHECKI32-LABEL: @test_simplify_4(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i32(double [[X:%.*]], i32 4)
|
||||
; CHECKI32-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_4(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast double @llvm.powi.f64.i16(double [[X:%.*]], i16 4)
|
||||
; CHECKI16-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
|
||||
ret double %1
|
||||
|
@ -46,13 +55,13 @@ define double @test_simplify_4(double %x) {
|
|||
|
||||
; powf(x, <15.0, 15.0>)
|
||||
define <2 x float> @test_simplify_15(<2 x float> %x) {
|
||||
; CHECK-LABEL: @test_simplify_15(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x float> [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> [[SQUARE]], [[X]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: ret <2 x float> [[TMP4]]
|
||||
; CHECKI32-LABEL: @test_simplify_15(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[X:%.*]], i32 15)
|
||||
; CHECKI32-NEXT: ret <2 x float> [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_15(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.powi.v2f32.i16(<2 x float> [[X:%.*]], i16 15)
|
||||
; CHECKI16-NEXT: ret <2 x float> [[TMP1]]
|
||||
;
|
||||
%1 = call fast <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 1.500000e+01, float 1.500000e+01>)
|
||||
ret <2 x float> %1
|
||||
|
@ -60,13 +69,13 @@ define <2 x float> @test_simplify_15(<2 x float> %x) {
|
|||
|
||||
; pow(x, -7.0)
|
||||
define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
|
||||
; CHECK-LABEL: @test_simplify_neg_7(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
|
||||
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP3]]
|
||||
; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]]
|
||||
; CHECKI32-LABEL: @test_simplify_neg_7(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X:%.*]], i32 -7)
|
||||
; CHECKI32-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_neg_7(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i16(<2 x double> [[X:%.*]], i16 -7)
|
||||
; CHECKI16-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -7.000000e+00, double -7.000000e+00>)
|
||||
ret <2 x double> %1
|
||||
|
@ -74,15 +83,13 @@ define <2 x double> @test_simplify_neg_7(<2 x double> %x) {
|
|||
|
||||
; powf(x, -19.0)
|
||||
define float @test_simplify_neg_19(float %x) {
|
||||
; CHECK-LABEL: @test_simplify_neg_19(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[SQUARE]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[X]]
|
||||
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP5]]
|
||||
; CHECK-NEXT: ret float [[RECIPROCAL]]
|
||||
; CHECKI32-LABEL: @test_simplify_neg_19(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 -19)
|
||||
; CHECKI32-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_neg_19(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 -19)
|
||||
; CHECKI16-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%1 = call fast float @llvm.pow.f32(float %x, float -1.900000e+01)
|
||||
ret float %1
|
||||
|
@ -100,13 +107,13 @@ define double @test_simplify_11_23(double %x) {
|
|||
|
||||
; powf(x, 32.0)
|
||||
define float @test_simplify_32(float %x) {
|
||||
; CHECK-LABEL: @test_simplify_32(
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X:%.*]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float [[TMP3]], [[TMP3]]
|
||||
; CHECK-NEXT: ret float [[TMP4]]
|
||||
; CHECKI32-LABEL: @test_simplify_32(
|
||||
; CHECKI32-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i32(float [[X:%.*]], i32 32)
|
||||
; CHECKI32-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
; CHECKI16-LABEL: @test_simplify_32(
|
||||
; CHECKI16-NEXT: [[TMP1:%.*]] = call fast float @llvm.powi.f32.i16(float [[X:%.*]], i16 32)
|
||||
; CHECKI16-NEXT: ret float [[TMP1]]
|
||||
;
|
||||
%1 = call fast float @llvm.pow.f32(float %x, float 3.200000e+01)
|
||||
ret float %1
|
||||
|
@ -128,14 +135,17 @@ define double @test_simplify_33(double %x) {
|
|||
|
||||
; pow(x, 16.5) with double
|
||||
define double @test_simplify_16_5(double %x) {
|
||||
; CHECK-LABEL: @test_simplify_16_5(
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; CHECK-NEXT: ret double [[TMP4]]
|
||||
; CHECK32-LABEL: @test_simplify_16_5(
|
||||
; CHECK32-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK32-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i32(double [[X]], i32 16)
|
||||
; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK32-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECK16-LABEL: @test_simplify_16_5(
|
||||
; CHECK16-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK16-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i16(double [[X]], i16 16)
|
||||
; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK16-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @llvm.pow.f64(double %x, double 1.650000e+01)
|
||||
ret double %1
|
||||
|
@ -143,112 +153,80 @@ define double @test_simplify_16_5(double %x) {
|
|||
|
||||
; pow(x, -16.5) with double
|
||||
define double @test_simplify_neg_16_5(double %x) {
|
||||
; CHECK-LABEL: @test_simplify_neg_16_5(
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
|
||||
; CHECK-NEXT: ret double [[RECIPROCAL]]
|
||||
; CHECK32-LABEL: @test_simplify_neg_16_5(
|
||||
; CHECK32-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK32-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i32(double [[X]], i32 -17)
|
||||
; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK32-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECK16-LABEL: @test_simplify_neg_16_5(
|
||||
; CHECK16-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[X:%.*]])
|
||||
; CHECK16-NEXT: [[POWI:%.*]] = call fast double @llvm.powi.f64.i16(double [[X]], i16 -17)
|
||||
; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK16-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @llvm.pow.f64(double %x, double -1.650000e+01)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
; pow(x, 16.5) with double
|
||||
; pow(x, 0.5) with double
|
||||
|
||||
define double @test_simplify_16_5_libcall(double %x) {
|
||||
; SQRT-LABEL: @test_simplify_16_5_libcall(
|
||||
; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
|
||||
; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; SQRT-NEXT: ret double [[TMP4]]
|
||||
;
|
||||
; NOSQRT-LABEL: @test_simplify_16_5_libcall(
|
||||
; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 1.650000e+01)
|
||||
; NOSQRT-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECKSQRT-LABEL: @test_simplify_16_5_libcall(
|
||||
define double @test_simplify_0_5_libcall(double %x) {
|
||||
; CHECKSQRT-LABEL: @test_simplify_0_5_libcall(
|
||||
; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
|
||||
; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; CHECKSQRT-NEXT: ret double [[TMP4]]
|
||||
; CHECKSQRT-NEXT: ret double [[SQRT]]
|
||||
;
|
||||
; CHECKNOSQRT-LABEL: @test_simplify_16_5_libcall(
|
||||
; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 1.650000e+01)
|
||||
; CHECKNOSQRT-LABEL: @test_simplify_0_5_libcall(
|
||||
; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double 5.000000e-01)
|
||||
; CHECKNOSQRT-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @pow(double %x, double 1.650000e+01)
|
||||
%1 = call fast double @pow(double %x, double 5.000000e-01)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
; pow(x, -16.5) with double
|
||||
; pow(x, -0.5) with double
|
||||
|
||||
define double @test_simplify_neg_16_5_libcall(double %x) {
|
||||
; SQRT-LABEL: @test_simplify_neg_16_5_libcall(
|
||||
; SQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
|
||||
; SQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; SQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; SQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; SQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; SQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; SQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
|
||||
; SQRT-NEXT: ret double [[RECIPROCAL]]
|
||||
;
|
||||
; NOSQRT-LABEL: @test_simplify_neg_16_5_libcall(
|
||||
; NOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -1.650000e+01)
|
||||
; NOSQRT-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
; CHECKSQRT-LABEL: @test_simplify_neg_16_5_libcall(
|
||||
define double @test_simplify_neg_0_5_libcall(double %x) {
|
||||
; CHECKSQRT-LABEL: @test_simplify_neg_0_5_libcall(
|
||||
; CHECKSQRT-NEXT: [[SQRT:%.*]] = call fast double @sqrt(double [[X:%.*]])
|
||||
; CHECKSQRT-NEXT: [[SQUARE:%.*]] = fmul fast double [[X]], [[X]]
|
||||
; CHECKSQRT-NEXT: [[TMP1:%.*]] = fmul fast double [[SQUARE]], [[SQUARE]]
|
||||
; CHECKSQRT-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[TMP1]]
|
||||
; CHECKSQRT-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
|
||||
; CHECKSQRT-NEXT: [[TMP4:%.*]] = fmul fast double [[TMP3]], [[SQRT]]
|
||||
; CHECKSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[TMP4]]
|
||||
; CHECKSQRT-NEXT: [[RECIPROCAL:%.*]] = fdiv fast double 1.000000e+00, [[SQRT]]
|
||||
; CHECKSQRT-NEXT: ret double [[RECIPROCAL]]
|
||||
;
|
||||
; CHECKNOSQRT-LABEL: @test_simplify_neg_16_5_libcall(
|
||||
; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -1.650000e+01)
|
||||
; CHECKNOSQRT-LABEL: @test_simplify_neg_0_5_libcall(
|
||||
; CHECKNOSQRT-NEXT: [[TMP1:%.*]] = call fast double @pow(double [[X:%.*]], double -5.000000e-01)
|
||||
; CHECKNOSQRT-NEXT: ret double [[TMP1]]
|
||||
;
|
||||
%1 = call fast double @pow(double %x, double -1.650000e+01)
|
||||
%1 = call fast double @pow(double %x, double -5.000000e-01)
|
||||
ret double %1
|
||||
}
|
||||
|
||||
; pow(x, -8.5) with float
|
||||
define float @test_simplify_neg_8_5(float %x) {
|
||||
; CHECK-LABEL: @test_simplify_neg_8_5(
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast float [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[SQRT]]
|
||||
; CHECK-NEXT: [[RECIPROCAL:%.*]] = fdiv fast float 1.000000e+00, [[TMP2]]
|
||||
; CHECK-NEXT: ret float [[RECIPROCAL]]
|
||||
; CHECK32-LABEL: @test_simplify_neg_8_5(
|
||||
; CHECK32-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
|
||||
; CHECK32-NEXT: [[POWI:%.*]] = call fast float @llvm.powi.f32.i32(float [[X]], i32 -9)
|
||||
; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast float [[POWI]], [[SQRT]]
|
||||
;
|
||||
%1 = call fast float @llvm.pow.f32(float %x, float -0.450000e+01)
|
||||
; CHECK16-LABEL: @test_simplify_neg_8_5(
|
||||
; CHECK16-NEXT: [[SQRT:%.*]] = call fast float @llvm.sqrt.f32(float [[X:%.*]])
|
||||
; CHECK16-NEXT: [[POWI:%.*]] = call fast float @llvm.powi.f32.i16(float [[X]], i16 -9)
|
||||
; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast float [[POWI]], [[SQRT]]
|
||||
;
|
||||
%1 = call fast float @llvm.pow.f32(float %x, float -0.850000e+01)
|
||||
ret float %1
|
||||
}
|
||||
|
||||
; pow(x, 7.5) with <2 x double>
|
||||
define <2 x double> @test_simplify_7_5(<2 x double> %x) {
|
||||
; CHECK-LABEL: @test_simplify_7_5(
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <2 x double> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[SQUARE]], [[SQUARE]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[TMP1]], [[X]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[SQUARE]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP3]], [[SQRT]]
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP4]]
|
||||
; CHECK32-LABEL: @test_simplify_7_5(
|
||||
; CHECK32-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
|
||||
; CHECK32-NEXT: [[POWI:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X]], i32 7)
|
||||
; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[POWI]], [[SQRT]]
|
||||
;
|
||||
; CHECK16-LABEL: @test_simplify_7_5(
|
||||
; CHECK16-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
|
||||
; CHECK16-NEXT: [[POWI:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i16(<2 x double> [[X]], i16 7)
|
||||
; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast <2 x double> [[POWI]], [[SQRT]]
|
||||
;
|
||||
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 7.500000e+00, double 7.500000e+00>)
|
||||
ret <2 x double> %1
|
||||
|
@ -256,12 +234,15 @@ define <2 x double> @test_simplify_7_5(<2 x double> %x) {
|
|||
|
||||
; pow(x, 3.5) with <4 x float>
|
||||
define <4 x float> @test_simplify_3_5(<4 x float> %x) {
|
||||
; CHECK-LABEL: @test_simplify_3_5(
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
|
||||
; CHECK-NEXT: [[SQUARE:%.*]] = fmul fast <4 x float> [[X]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[SQUARE]], [[X]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[SQRT]]
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP2]]
|
||||
; CHECK32-LABEL: @test_simplify_3_5(
|
||||
; CHECK32-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
|
||||
; CHECK32-NEXT: [[POWI:%.*]] = call fast <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[X]], i32 3)
|
||||
; CHECK32-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[POWI]], [[SQRT]]
|
||||
;
|
||||
; CHECK16-LABEL: @test_simplify_3_5(
|
||||
; CHECK16-NEXT: [[SQRT:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[X:%.*]])
|
||||
; CHECK16-NEXT: [[POWI:%.*]] = call fast <4 x float> @llvm.powi.v4f32.i16(<4 x float> [[X]], i16 3)
|
||||
; CHECK16-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[POWI]], [[SQRT]]
|
||||
;
|
||||
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> <float 3.500000e+00, float 3.500000e+00, float 3.500000e+00, float 3.500000e+00>)
|
||||
ret <4 x float> %1
|
||||
|
|
|
@ -444,7 +444,7 @@ define double @pow_uitofp_double_base_no_fast(double %base, i32 %x) {
|
|||
|
||||
define double @powf_exp_const_int_no_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const_int_no_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01)
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 40)
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call double @llvm.pow.f64(double %base, double 4.000000e+01)
|
||||
|
@ -453,7 +453,9 @@ define double @powf_exp_const_int_no_fast(double %base) {
|
|||
|
||||
define double @powf_exp_const_not_int_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const_not_int_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01)
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[BASE:%.*]])
|
||||
; CHECK-NEXT: [[POWI:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[BASE]], i32 37)
|
||||
; CHECK-NEXT: [[RES:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call fast double @llvm.pow.f64(double %base, double 3.750000e+01)
|
||||
|
@ -471,7 +473,7 @@ define double @powf_exp_const_not_int_no_fast(double %base) {
|
|||
|
||||
define double @powf_exp_const2_int_no_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const2_int_no_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double -4.000000e+01)
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i32(double [[BASE:%.*]], i32 -40)
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call double @llvm.pow.f64(double %base, double -4.000000e+01)
|
||||
|
|
|
@ -414,7 +414,7 @@ define double @pow_uitofp_double_base_no_fast(double %base, i16 %x) {
|
|||
|
||||
define double @powf_exp_const_int_no_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const_int_no_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double 4.000000e+01)
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 40)
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call double @llvm.pow.f64(double %base, double 4.000000e+01)
|
||||
|
@ -423,7 +423,9 @@ define double @powf_exp_const_int_no_fast(double %base) {
|
|||
|
||||
define double @powf_exp_const_not_int_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const_not_int_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call fast double @llvm.pow.f64(double [[BASE:%.*]], double 3.750000e+01)
|
||||
; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[BASE:%.*]])
|
||||
; CHECK-NEXT: [[POWI:%.*]] = tail call fast double @llvm.powi.f64.i16(double [[BASE]], i16 37)
|
||||
; CHECK-NEXT: [[RES:%.*]] = fmul fast double [[POWI]], [[SQRT]]
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call fast double @llvm.pow.f64(double %base, double 3.750000e+01)
|
||||
|
@ -441,7 +443,7 @@ define double @powf_exp_const_not_int_no_fast(double %base) {
|
|||
|
||||
define double @powf_exp_const2_int_no_fast(double %base) {
|
||||
; CHECK-LABEL: @powf_exp_const2_int_no_fast(
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.pow.f64(double [[BASE:%.*]], double -4.000000e+01)
|
||||
; CHECK-NEXT: [[RES:%.*]] = tail call double @llvm.powi.f64.i16(double [[BASE:%.*]], i16 -40)
|
||||
; CHECK-NEXT: ret double [[RES]]
|
||||
;
|
||||
%res = tail call double @llvm.pow.f64(double %base, double -4.000000e+01)
|
||||
|
|
Loading…
Reference in New Issue