forked from OSchip/llvm-project
Vectorize intrinsic math function calls in SLPVectorizer.
This patch adds support to recognize and vectorize intrinsic math functions in SLPVectorizer. Review: http://reviews.llvm.org/D3560 and http://reviews.llvm.org/D3559 llvm-svn: 207901
This commit is contained in:
parent
e6c980c41d
commit
ddd0cb5ecf
|
@ -15,6 +15,7 @@
|
|||
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
|
||||
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/Target/TargetLibraryInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -51,6 +52,129 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
|
||||
Intrinsic::ID ValidIntrinsicID) {
|
||||
if (I.getNumArgOperands() != 1 ||
|
||||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
|
||||
I.getType() != I.getArgOperand(0)->getType() ||
|
||||
!I.onlyReadsMemory())
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
return ValidIntrinsicID;
|
||||
}
|
||||
|
||||
static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
|
||||
Intrinsic::ID ValidIntrinsicID) {
|
||||
if (I.getNumArgOperands() != 2 ||
|
||||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
|
||||
!I.getArgOperand(1)->getType()->isFloatingPointTy() ||
|
||||
I.getType() != I.getArgOperand(0)->getType() ||
|
||||
I.getType() != I.getArgOperand(1)->getType() ||
|
||||
!I.onlyReadsMemory())
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
return ValidIntrinsicID;
|
||||
}
|
||||
|
||||
static Intrinsic::ID
|
||||
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
|
||||
// If we have an intrinsic call, check if it is trivially vectorizable.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
|
||||
ID == Intrinsic::lifetime_end)
|
||||
return ID;
|
||||
else
|
||||
return Intrinsic::not_intrinsic;
|
||||
}
|
||||
|
||||
if (!TLI)
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
LibFunc::Func Func;
|
||||
Function *F = CI->getCalledFunction();
|
||||
// We're going to make assumptions on the semantics of the functions, check
|
||||
// that the target knows that it's available in this environment and it does
|
||||
// not have local linkage.
|
||||
if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
// Otherwise check if we have a call to a function that can be turned into a
|
||||
// vector intrinsic.
|
||||
switch (Func) {
|
||||
default:
|
||||
break;
|
||||
case LibFunc::sin:
|
||||
case LibFunc::sinf:
|
||||
case LibFunc::sinl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::sin);
|
||||
case LibFunc::cos:
|
||||
case LibFunc::cosf:
|
||||
case LibFunc::cosl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::cos);
|
||||
case LibFunc::exp:
|
||||
case LibFunc::expf:
|
||||
case LibFunc::expl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::exp);
|
||||
case LibFunc::exp2:
|
||||
case LibFunc::exp2f:
|
||||
case LibFunc::exp2l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
|
||||
case LibFunc::log:
|
||||
case LibFunc::logf:
|
||||
case LibFunc::logl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log);
|
||||
case LibFunc::log10:
|
||||
case LibFunc::log10f:
|
||||
case LibFunc::log10l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log10);
|
||||
case LibFunc::log2:
|
||||
case LibFunc::log2f:
|
||||
case LibFunc::log2l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log2);
|
||||
case LibFunc::fabs:
|
||||
case LibFunc::fabsf:
|
||||
case LibFunc::fabsl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
|
||||
case LibFunc::copysign:
|
||||
case LibFunc::copysignf:
|
||||
case LibFunc::copysignl:
|
||||
return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
|
||||
case LibFunc::floor:
|
||||
case LibFunc::floorf:
|
||||
case LibFunc::floorl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::floor);
|
||||
case LibFunc::ceil:
|
||||
case LibFunc::ceilf:
|
||||
case LibFunc::ceill:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
|
||||
case LibFunc::trunc:
|
||||
case LibFunc::truncf:
|
||||
case LibFunc::truncl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
|
||||
case LibFunc::rint:
|
||||
case LibFunc::rintf:
|
||||
case LibFunc::rintl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::rint);
|
||||
case LibFunc::nearbyint:
|
||||
case LibFunc::nearbyintf:
|
||||
case LibFunc::nearbyintl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
|
||||
case LibFunc::round:
|
||||
case LibFunc::roundf:
|
||||
case LibFunc::roundl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::round);
|
||||
case LibFunc::pow:
|
||||
case LibFunc::powf:
|
||||
case LibFunc::powl:
|
||||
return checkBinaryFloatSignature(*CI, Intrinsic::pow);
|
||||
}
|
||||
|
||||
return Intrinsic::not_intrinsic;
|
||||
}
|
||||
|
||||
|
||||
} // llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -85,7 +85,6 @@
|
|||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetLibraryInfo.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
|
@ -2300,128 +2299,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
|
|||
}
|
||||
}
|
||||
|
||||
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
|
||||
Intrinsic::ID ValidIntrinsicID) {
|
||||
if (I.getNumArgOperands() != 1 ||
|
||||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
|
||||
I.getType() != I.getArgOperand(0)->getType() ||
|
||||
!I.onlyReadsMemory())
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
return ValidIntrinsicID;
|
||||
}
|
||||
|
||||
static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
|
||||
Intrinsic::ID ValidIntrinsicID) {
|
||||
if (I.getNumArgOperands() != 2 ||
|
||||
!I.getArgOperand(0)->getType()->isFloatingPointTy() ||
|
||||
!I.getArgOperand(1)->getType()->isFloatingPointTy() ||
|
||||
I.getType() != I.getArgOperand(0)->getType() ||
|
||||
I.getType() != I.getArgOperand(1)->getType() ||
|
||||
!I.onlyReadsMemory())
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
return ValidIntrinsicID;
|
||||
}
|
||||
|
||||
|
||||
static Intrinsic::ID
|
||||
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
|
||||
// If we have an intrinsic call, check if it is trivially vectorizable.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
|
||||
ID == Intrinsic::lifetime_end)
|
||||
return ID;
|
||||
else
|
||||
return Intrinsic::not_intrinsic;
|
||||
}
|
||||
|
||||
if (!TLI)
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
LibFunc::Func Func;
|
||||
Function *F = CI->getCalledFunction();
|
||||
// We're going to make assumptions on the semantics of the functions, check
|
||||
// that the target knows that it's available in this environment and it does
|
||||
// not have local linkage.
|
||||
if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
|
||||
return Intrinsic::not_intrinsic;
|
||||
|
||||
// Otherwise check if we have a call to a function that can be turned into a
|
||||
// vector intrinsic.
|
||||
switch (Func) {
|
||||
default:
|
||||
break;
|
||||
case LibFunc::sin:
|
||||
case LibFunc::sinf:
|
||||
case LibFunc::sinl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::sin);
|
||||
case LibFunc::cos:
|
||||
case LibFunc::cosf:
|
||||
case LibFunc::cosl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::cos);
|
||||
case LibFunc::exp:
|
||||
case LibFunc::expf:
|
||||
case LibFunc::expl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::exp);
|
||||
case LibFunc::exp2:
|
||||
case LibFunc::exp2f:
|
||||
case LibFunc::exp2l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
|
||||
case LibFunc::log:
|
||||
case LibFunc::logf:
|
||||
case LibFunc::logl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log);
|
||||
case LibFunc::log10:
|
||||
case LibFunc::log10f:
|
||||
case LibFunc::log10l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log10);
|
||||
case LibFunc::log2:
|
||||
case LibFunc::log2f:
|
||||
case LibFunc::log2l:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::log2);
|
||||
case LibFunc::fabs:
|
||||
case LibFunc::fabsf:
|
||||
case LibFunc::fabsl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
|
||||
case LibFunc::copysign:
|
||||
case LibFunc::copysignf:
|
||||
case LibFunc::copysignl:
|
||||
return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
|
||||
case LibFunc::floor:
|
||||
case LibFunc::floorf:
|
||||
case LibFunc::floorl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::floor);
|
||||
case LibFunc::ceil:
|
||||
case LibFunc::ceilf:
|
||||
case LibFunc::ceill:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
|
||||
case LibFunc::trunc:
|
||||
case LibFunc::truncf:
|
||||
case LibFunc::truncl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
|
||||
case LibFunc::rint:
|
||||
case LibFunc::rintf:
|
||||
case LibFunc::rintl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::rint);
|
||||
case LibFunc::nearbyint:
|
||||
case LibFunc::nearbyintf:
|
||||
case LibFunc::nearbyintl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
|
||||
case LibFunc::round:
|
||||
case LibFunc::roundf:
|
||||
case LibFunc::roundl:
|
||||
return checkUnaryFloatSignature(*CI, Intrinsic::round);
|
||||
case LibFunc::pow:
|
||||
case LibFunc::powf:
|
||||
case LibFunc::powl:
|
||||
return checkBinaryFloatSignature(*CI, Intrinsic::pow);
|
||||
}
|
||||
|
||||
return Intrinsic::not_intrinsic;
|
||||
}
|
||||
|
||||
/// This function translates the reduction kind to an LLVM binary operator.
|
||||
static unsigned
|
||||
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
|
||||
|
|
|
@ -346,9 +346,9 @@ public:
|
|||
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||
|
||||
BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
|
||||
TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
|
||||
DominatorTree *Dt) :
|
||||
F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
|
||||
F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
|
||||
Builder(Se->getContext()) {
|
||||
// Setup the block numbering utility for all of the blocks in the
|
||||
// function.
|
||||
|
@ -536,6 +536,7 @@ private:
|
|||
ScalarEvolution *SE;
|
||||
const DataLayout *DL;
|
||||
TargetTransformInfo *TTI;
|
||||
TargetLibraryInfo *TLI;
|
||||
AliasAnalysis *AA;
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
|
@ -949,34 +950,36 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||
}
|
||||
case Instruction::Call: {
|
||||
// Check if the calls are all to the same vectorizable intrinsic.
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
|
||||
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
|
||||
|
||||
CallInst *CI = cast<CallInst>(VL[0]);
|
||||
// Check if this is an Intrinsic call or something that can be
|
||||
// represented by an intrinsic call
|
||||
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
||||
if (!isTriviallyVectorizable(ID)) {
|
||||
newTreeEntry(VL, false);
|
||||
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
Function *Int = II->getCalledFunction();
|
||||
Function *Int = CI->getCalledFunction();
|
||||
|
||||
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
|
||||
IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
|
||||
if (!II2 || II2->getCalledFunction() != Int) {
|
||||
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
|
||||
if (!CI2 || CI2->getCalledFunction() != Int ||
|
||||
getIntrinsicIDForCall(CI2, TLI) != ID) {
|
||||
newTreeEntry(VL, false);
|
||||
DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i]
|
||||
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
|
||||
<< "\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
newTreeEntry(VL, true);
|
||||
for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) {
|
||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
|
||||
ValueList Operands;
|
||||
// Prepare the operand vector.
|
||||
for (unsigned j = 0; j < VL.size(); ++j) {
|
||||
IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]);
|
||||
Operands.push_back(II2->getArgOperand(i));
|
||||
CallInst *CI2 = dyn_cast<CallInst>(VL[j]);
|
||||
Operands.push_back(CI2->getArgOperand(i));
|
||||
}
|
||||
buildTree_rec(Operands, Depth + 1);
|
||||
}
|
||||
|
@ -1132,12 +1135,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(VL0);
|
||||
IntrinsicInst *II = cast<IntrinsicInst>(CI);
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
||||
|
||||
// Calculate the cost of the scalar and vector calls.
|
||||
SmallVector<Type*, 4> ScalarTys, VecTys;
|
||||
for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) {
|
||||
for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
|
||||
ScalarTys.push_back(CI->getArgOperand(op)->getType());
|
||||
VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
|
||||
VecTy->getNumElements()));
|
||||
|
@ -1150,7 +1152,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
|||
|
||||
DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
|
||||
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
|
||||
<< " for " << *II << "\n");
|
||||
<< " for " << *CI << "\n");
|
||||
|
||||
return VecCallCost - ScalarCallCost;
|
||||
}
|
||||
|
@ -1643,7 +1645,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
}
|
||||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(VL0);
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars);
|
||||
std::vector<Value *> OpVecs;
|
||||
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
|
||||
|
@ -1659,8 +1660,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
|||
}
|
||||
|
||||
Module *M = F->getParent();
|
||||
IntrinsicInst *II = cast<IntrinsicInst>(CI);
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
||||
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
|
||||
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
|
||||
Value *V = Builder.CreateCall(CF, OpVecs);
|
||||
|
@ -1867,6 +1867,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
ScalarEvolution *SE;
|
||||
const DataLayout *DL;
|
||||
TargetTransformInfo *TTI;
|
||||
TargetLibraryInfo *TLI;
|
||||
AliasAnalysis *AA;
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
|
@ -1879,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
|
||||
DL = DLP ? &DLP->getDataLayout() : nullptr;
|
||||
TTI = &getAnalysis<TargetTransformInfo>();
|
||||
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
|
||||
AA = &getAnalysis<AliasAnalysis>();
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
|
@ -1904,7 +1906,7 @@ struct SLPVectorizer : public FunctionPass {
|
|||
|
||||
// Use the bottom up slp vectorizer to construct chains that start with
|
||||
// he store instructions.
|
||||
BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
|
||||
BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT);
|
||||
|
||||
// Scan the blocks in the function in post order.
|
||||
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
declare double @sin(double)
|
||||
declare double @cos(double)
|
||||
declare double @pow(double, double)
|
||||
declare double @exp2(double)
|
||||
declare i64 @round(i64)
|
||||
|
||||
|
||||
; CHECK: sin_libm
|
||||
; CHECK: call <2 x double> @llvm.sin.v2f64
|
||||
; CHECK: ret void
|
||||
define void @sin_libm(double* %a, double* %b, double* %c) {
|
||||
entry:
|
||||
%i0 = load double* %a, align 8
|
||||
%i1 = load double* %b, align 8
|
||||
%mul = fmul double %i0, %i1
|
||||
%call = tail call double @sin(double %mul) nounwind readnone
|
||||
%arrayidx3 = getelementptr inbounds double* %a, i64 1
|
||||
%i3 = load double* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds double* %b, i64 1
|
||||
%i4 = load double* %arrayidx4, align 8
|
||||
%mul5 = fmul double %i3, %i4
|
||||
%call5 = tail call double @sin(double %mul5) nounwind readnone
|
||||
store double %call, double* %c, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %c, i64 1
|
||||
store double %call5, double* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: cos_libm
|
||||
; CHECK: call <2 x double> @llvm.cos.v2f64
|
||||
; CHECK: ret void
|
||||
define void @cos_libm(double* %a, double* %b, double* %c) {
|
||||
entry:
|
||||
%i0 = load double* %a, align 8
|
||||
%i1 = load double* %b, align 8
|
||||
%mul = fmul double %i0, %i1
|
||||
%call = tail call double @cos(double %mul) nounwind readnone
|
||||
%arrayidx3 = getelementptr inbounds double* %a, i64 1
|
||||
%i3 = load double* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds double* %b, i64 1
|
||||
%i4 = load double* %arrayidx4, align 8
|
||||
%mul5 = fmul double %i3, %i4
|
||||
%call5 = tail call double @cos(double %mul5) nounwind readnone
|
||||
store double %call, double* %c, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %c, i64 1
|
||||
store double %call5, double* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: pow_libm
|
||||
; CHECK: call <2 x double> @llvm.pow.v2f64
|
||||
; CHECK: ret void
|
||||
define void @pow_libm(double* %a, double* %b, double* %c) {
|
||||
entry:
|
||||
%i0 = load double* %a, align 8
|
||||
%i1 = load double* %b, align 8
|
||||
%mul = fmul double %i0, %i1
|
||||
%call = tail call double @pow(double %mul,double %mul) nounwind readnone
|
||||
%arrayidx3 = getelementptr inbounds double* %a, i64 1
|
||||
%i3 = load double* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds double* %b, i64 1
|
||||
%i4 = load double* %arrayidx4, align 8
|
||||
%mul5 = fmul double %i3, %i4
|
||||
%call5 = tail call double @pow(double %mul5,double %mul5) nounwind readnone
|
||||
store double %call, double* %c, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %c, i64 1
|
||||
store double %call5, double* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: exp2_libm
|
||||
; CHECK: call <2 x double> @llvm.exp2.v2f64
|
||||
; CHECK: ret void
|
||||
define void @exp2_libm(double* %a, double* %b, double* %c) {
|
||||
entry:
|
||||
%i0 = load double* %a, align 8
|
||||
%i1 = load double* %b, align 8
|
||||
%mul = fmul double %i0, %i1
|
||||
%call = tail call double @exp2(double %mul) nounwind readnone
|
||||
%arrayidx3 = getelementptr inbounds double* %a, i64 1
|
||||
%i3 = load double* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds double* %b, i64 1
|
||||
%i4 = load double* %arrayidx4, align 8
|
||||
%mul5 = fmul double %i3, %i4
|
||||
%call5 = tail call double @exp2(double %mul5) nounwind readnone
|
||||
store double %call, double* %c, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %c, i64 1
|
||||
store double %call5, double* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Negative test case
|
||||
; CHECK: round_custom
|
||||
; CHECK-NOT: load <4 x i64>
|
||||
; CHECK: ret void
|
||||
define void @round_custom(i64* %a, i64* %b, i64* %c) {
|
||||
entry:
|
||||
%i0 = load i64* %a, align 8
|
||||
%i1 = load i64* %b, align 8
|
||||
%mul = mul i64 %i0, %i1
|
||||
%call = tail call i64 @round(i64 %mul) nounwind readnone
|
||||
%arrayidx3 = getelementptr inbounds i64* %a, i64 1
|
||||
%i3 = load i64* %arrayidx3, align 8
|
||||
%arrayidx4 = getelementptr inbounds i64* %b, i64 1
|
||||
%i4 = load i64* %arrayidx4, align 8
|
||||
%mul5 = mul i64 %i3, %i4
|
||||
%call5 = tail call i64 @round(i64 %mul5) nounwind readnone
|
||||
store i64 %call, i64* %c, align 8
|
||||
%arrayidx5 = getelementptr inbounds i64* %c, i64 1
|
||||
store i64 %call5, i64* %arrayidx5, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) #0
|
||||
; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0
|
||||
; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0
|
||||
; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) #0
|
||||
|
||||
; CHECK: attributes #0 = { nounwind readnone }
|
||||
|
Loading…
Reference in New Issue