Begin incorporating target information into BBVectorize.

This is the first of several steps to incorporate information from the new
TargetTransformInfo infrastructure into BBVectorize. Two things are done here:

 1. Target information is used to determine if it is profitable to fuse two
    instructions. This means that the cost of the vector operation must not
    be more expensive than the cost of the two original operations. Pairs that
    are not profitable are no longer considered (because current cost information
    is incomplete, for intrinsics for example, equal-cost pairs are still
    considered).

 2. The 'cost savings' computed for the profitability check are also used to
    rank the DAGs that represent the potential vectorization plans. Specifically,
    for nodes of non-trivial depth, the cost savings is used as the node
    weight.

The next step will be to incorporate the shuffle costs into the DAG weighting;
this will give the edges of the DAG weights as well. Once that is done, when
target information is available, we should be able to dispense with the
depth heuristic.

llvm-svn: 166716
This commit is contained in:
Hal Finkel 2012-10-25 21:12:23 +00:00
parent 579042f71b
commit cbf9365f4c
3 changed files with 144 additions and 43 deletions

View File

@ -43,12 +43,17 @@
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ValueHandle.h" #include "llvm/Support/ValueHandle.h"
#include "llvm/DataLayout.h" #include "llvm/DataLayout.h"
#include "llvm/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Vectorize.h"
#include <algorithm> #include <algorithm>
#include <map> #include <map>
using namespace llvm; using namespace llvm;
static cl::opt<bool>
IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
cl::Hidden, cl::desc("Ignore target information"));
static cl::opt<unsigned> static cl::opt<unsigned>
ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
cl::desc("The required chain depth for vectorization")); cl::desc("The required chain depth for vectorization"));
@ -181,9 +186,13 @@ namespace {
DT = &P->getAnalysis<DominatorTree>(); DT = &P->getAnalysis<DominatorTree>();
SE = &P->getAnalysis<ScalarEvolution>(); SE = &P->getAnalysis<ScalarEvolution>();
TD = P->getAnalysisIfAvailable<DataLayout>(); TD = P->getAnalysisIfAvailable<DataLayout>();
TTI = IgnoreTargetInfo ? 0 :
P->getAnalysisIfAvailable<TargetTransformInfo>();
VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
} }
typedef std::pair<Value *, Value *> ValuePair; typedef std::pair<Value *, Value *> ValuePair;
typedef std::pair<ValuePair, int> ValuePairWithCost;
typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
typedef std::pair<std::multimap<Value *, Value *>::iterator, typedef std::pair<std::multimap<Value *, Value *>::iterator,
@ -196,6 +205,8 @@ namespace {
DominatorTree *DT; DominatorTree *DT;
ScalarEvolution *SE; ScalarEvolution *SE;
DataLayout *TD; DataLayout *TD;
TargetTransformInfo *TTI;
const VectorTargetTransformInfo *VTTI;
// FIXME: const correct? // FIXME: const correct?
@ -204,6 +215,7 @@ namespace {
bool getCandidatePairs(BasicBlock &BB, bool getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start, BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len); std::vector<Value *> &PairableInsts, bool NonPow2Len);
void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
@ -216,6 +228,7 @@ namespace {
DenseSet<ValuePair> &PairableInstUsers); DenseSet<ValuePair> &PairableInstUsers);
void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs, std::multimap<ValuePair, ValuePair> &ConnectedPairs,
DenseSet<ValuePair> &PairableInstUsers, DenseSet<ValuePair> &PairableInstUsers,
@ -228,7 +241,8 @@ namespace {
bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
bool areInstsCompatible(Instruction *I, Instruction *J, bool areInstsCompatible(Instruction *I, Instruction *J,
bool IsSimpleLoadStore, bool NonPow2Len); bool IsSimpleLoadStore, bool NonPow2Len,
int &CostSavings);
bool trackUsesOfI(DenseSet<Value *> &Users, bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I, AliasSetTracker &WriteSet, Instruction *I,
@ -270,13 +284,14 @@ namespace {
void findBestTreeFor( void findBestTreeFor(
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs, std::multimap<ValuePair, ValuePair> &ConnectedPairs,
DenseSet<ValuePair> &PairableInstUsers, DenseSet<ValuePair> &PairableInstUsers,
std::multimap<ValuePair, ValuePair> &PairableInstUserMap, std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
DenseMap<Value *, Value *> &ChosenPairs, DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
size_t &BestEffSize, VPIteratorPair ChoiceRange, int &BestEffSize, VPIteratorPair ChoiceRange,
bool UseCycleCheck); bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
@ -339,13 +354,16 @@ namespace {
return false; return false;
} }
DEBUG(if (VTTI) dbgs() << "BBV: using target information\n");
bool changed = false; bool changed = false;
// Iterate a sufficient number of times to merge types of size 1 bit, // Iterate a sufficient number of times to merge types of size 1 bit,
// then 2 bits, then 4, etc. up to half of the target vector width of the // then 2 bits, then 4, etc. up to half of the target vector width of the
// target vector register. // target vector register.
unsigned n = 1; unsigned n = 1;
for (unsigned v = 2; for (unsigned v = 2;
v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); (VTTI || v <= Config.VectorBits) &&
(!Config.MaxIter || n <= Config.MaxIter);
v *= 2, ++n) { v *= 2, ++n) {
DEBUG(dbgs() << "BBV: fusing loop #" << n << DEBUG(dbgs() << "BBV: fusing loop #" << n <<
" for " << BB.getName() << " in " << " for " << BB.getName() << " in " <<
@ -375,6 +393,9 @@ namespace {
DT = &getAnalysis<DominatorTree>(); DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>(); SE = &getAnalysis<ScalarEvolution>();
TD = getAnalysisIfAvailable<DataLayout>(); TD = getAnalysisIfAvailable<DataLayout>();
TTI = IgnoreTargetInfo ? 0 :
getAnalysisIfAvailable<TargetTransformInfo>();
VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
return vectorizeBB(BB); return vectorizeBB(BB);
} }
@ -427,6 +448,10 @@ namespace {
T2 = cast<CastInst>(I)->getSrcTy(); T2 = cast<CastInst>(I)->getSrcTy();
else else
T2 = T1; T2 = T1;
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
T2 = SI->getCondition()->getType();
}
} }
// Returns the weight associated with the provided value. A chain of // Returns the weight associated with the provided value. A chain of
@ -465,18 +490,25 @@ namespace {
// directly after J. // directly after J.
bool getPairPtrInfo(Instruction *I, Instruction *J, bool getPairPtrInfo(Instruction *I, Instruction *J,
Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
unsigned &IAddressSpace, unsigned &JAddressSpace,
int64_t &OffsetInElmts) { int64_t &OffsetInElmts) {
OffsetInElmts = 0; OffsetInElmts = 0;
if (isa<LoadInst>(I)) { if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
IPtr = cast<LoadInst>(I)->getPointerOperand(); LoadInst *LJ = cast<LoadInst>(J);
JPtr = cast<LoadInst>(J)->getPointerOperand(); IPtr = LI->getPointerOperand();
IAlignment = cast<LoadInst>(I)->getAlignment(); JPtr = LJ->getPointerOperand();
JAlignment = cast<LoadInst>(J)->getAlignment(); IAlignment = LI->getAlignment();
JAlignment = LJ->getAlignment();
IAddressSpace = LI->getPointerAddressSpace();
JAddressSpace = LJ->getPointerAddressSpace();
} else { } else {
IPtr = cast<StoreInst>(I)->getPointerOperand(); StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J);
JPtr = cast<StoreInst>(J)->getPointerOperand(); IPtr = SI->getPointerOperand();
IAlignment = cast<StoreInst>(I)->getAlignment(); JPtr = SJ->getPointerOperand();
JAlignment = cast<StoreInst>(J)->getAlignment(); IAlignment = SI->getAlignment();
JAlignment = SJ->getAlignment();
IAddressSpace = SI->getPointerAddressSpace();
JAddressSpace = SJ->getPointerAddressSpace();
} }
const SCEV *IPtrSCEV = SE->getSCEV(IPtr); const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
@ -562,7 +594,9 @@ namespace {
do { do {
std::vector<Value *> PairableInsts; std::vector<Value *> PairableInsts;
std::multimap<Value *, Value *> CandidatePairs; std::multimap<Value *, Value *> CandidatePairs;
DenseMap<ValuePair, int> CandidatePairCostSavings;
ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
CandidatePairCostSavings,
PairableInsts, NonPow2Len); PairableInsts, NonPow2Len);
if (PairableInsts.empty()) continue; if (PairableInsts.empty()) continue;
@ -590,7 +624,8 @@ namespace {
// variables. // variables.
DenseMap<Value *, Value *> ChosenPairs; DenseMap<Value *, Value *> ChosenPairs;
choosePairs(CandidatePairs, PairableInsts, ConnectedPairs, choosePairs(CandidatePairs, CandidatePairCostSavings,
PairableInsts, ConnectedPairs,
PairableInstUsers, ChosenPairs); PairableInstUsers, ChosenPairs);
if (ChosenPairs.empty()) continue; if (ChosenPairs.empty()) continue;
@ -679,15 +714,22 @@ namespace {
!(VectorType::isValidElementType(T2) || T2->isVectorTy())) !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
return false; return false;
if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) { if (T1->getScalarSizeInBits() == 1) {
if (!Config.VectorizeBools) if (!Config.VectorizeBools)
return false; return false;
} else { } else {
if (!Config.VectorizeInts if (!Config.VectorizeInts && T1->isIntOrIntVectorTy())
&& (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
return false; return false;
} }
if (T2->getScalarSizeInBits() == 1) {
if (!Config.VectorizeBools)
return false;
} else {
if (!Config.VectorizeInts && T2->isIntOrIntVectorTy())
return false;
}
if (!Config.VectorizeFloats if (!Config.VectorizeFloats
&& (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
return false; return false;
@ -703,8 +745,8 @@ namespace {
T2->getScalarType()->isPointerTy())) T2->getScalarType()->isPointerTy()))
return false; return false;
if (T1->getPrimitiveSizeInBits() >= Config.VectorBits || if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
T2->getPrimitiveSizeInBits() >= Config.VectorBits) T2->getPrimitiveSizeInBits() >= Config.VectorBits))
return false; return false;
return true; return true;
@ -715,10 +757,13 @@ namespace {
// that I has already been determined to be vectorizable and that J is not // that I has already been determined to be vectorizable and that J is not
// in the use tree of I. // in the use tree of I.
bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
bool IsSimpleLoadStore, bool NonPow2Len) { bool IsSimpleLoadStore, bool NonPow2Len,
int &CostSavings) {
DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
" <-> " << *J << "\n"); " <-> " << *J << "\n");
CostSavings = 0;
// Loads and stores can be merged if they have different alignments, // Loads and stores can be merged if they have different alignments,
// but are otherwise the same. // but are otherwise the same.
if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment |
@ -731,38 +776,62 @@ namespace {
unsigned MaxTypeBits = std::max( unsigned MaxTypeBits = std::max(
IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
if (MaxTypeBits > Config.VectorBits) if (!VTTI && MaxTypeBits > Config.VectorBits)
return false; return false;
// FIXME: handle addsub-type operations! // FIXME: handle addsub-type operations!
if (IsSimpleLoadStore) { if (IsSimpleLoadStore) {
Value *IPtr, *JPtr; Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment; unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts = 0; int64_t OffsetInElmts = 0;
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
IAddressSpace, JAddressSpace,
OffsetInElmts) && abs64(OffsetInElmts) == 1) { OffsetInElmts) && abs64(OffsetInElmts) == 1) {
if (Config.AlignedOnly) { unsigned BottomAlignment = IAlignment;
Type *aTypeI = isa<StoreInst>(I) ? if (OffsetInElmts < 0) BottomAlignment = JAlignment;
cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
Type *aTypeJ = isa<StoreInst>(J) ?
cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
Type *aTypeI = isa<StoreInst>(I) ?
cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
Type *aTypeJ = isa<StoreInst>(J) ?
cast<StoreInst>(J)->getValueOperand()->getType() : J->getType();
Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
if (Config.AlignedOnly) {
// An aligned load or store is possible only if the instruction // An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the // with the lower offset has an alignment suitable for the
// vector type. // vector type.
unsigned BottomAlignment = IAlignment;
if (OffsetInElmts < 0) BottomAlignment = JAlignment;
Type *VType = getVecTypeForPair(aTypeI, aTypeJ);
unsigned VecAlignment = TD->getPrefTypeAlignment(VType); unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment) if (BottomAlignment < VecAlignment)
return false; return false;
} }
if (VTTI) {
unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
IAlignment, IAddressSpace);
unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(),
JAlignment, JAddressSpace);
unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType,
BottomAlignment,
IAddressSpace);
if (VCost > ICost + JCost)
return false;
CostSavings = ICost + JCost - VCost;
}
} else { } else {
return false; return false;
} }
} else if (VTTI) {
unsigned ICost = VTTI->getInstrCost(I->getOpcode(), IT1, IT2);
unsigned JCost = VTTI->getInstrCost(J->getOpcode(), JT1, JT2);
Type *VT1 = getVecTypeForPair(IT1, JT1),
*VT2 = getVecTypeForPair(IT2, JT2);
unsigned VCost = VTTI->getInstrCost(I->getOpcode(), VT1, VT2);
if (VCost > ICost + JCost)
return false;
CostSavings = ICost + JCost - VCost;
} }
// The powi intrinsic is special because only the first argument is // The powi intrinsic is special because only the first argument is
@ -845,6 +914,7 @@ namespace {
bool BBVectorize::getCandidatePairs(BasicBlock &BB, bool BBVectorize::getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start, BasicBlock::iterator &Start,
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len) { std::vector<Value *> &PairableInsts, bool NonPow2Len) {
BasicBlock::iterator E = BB.end(); BasicBlock::iterator E = BB.end();
if (Start == E) return false; if (Start == E) return false;
@ -881,7 +951,9 @@ namespace {
// J does not use I, and comes before the first use of I, so it can be // J does not use I, and comes before the first use of I, so it can be
// merged with I if the instructions are compatible. // merged with I if the instructions are compatible.
if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue; int CostSavings;
if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
CostSavings)) continue;
// J is a candidate for merging with I. // J is a candidate for merging with I.
if (!PairableInsts.size() || if (!PairableInsts.size() ||
@ -890,6 +962,9 @@ namespace {
} }
CandidatePairs.insert(ValuePair(I, J)); CandidatePairs.insert(ValuePair(I, J));
if (VTTI)
CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
CostSavings));
// The next call to this function must start after the last instruction // The next call to this function must start after the last instruction
// selected during this invocation. // selected during this invocation.
@ -899,7 +974,8 @@ namespace {
} }
DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
<< *I << " <-> " << *J << "\n"); << *I << " <-> " << *J << " (cost savings: " <<
CostSavings << ")\n");
// If we have already found too many pairs, break here and this function // If we have already found too many pairs, break here and this function
// will be called again starting after the last instruction selected // will be called again starting after the last instruction selected
@ -1353,13 +1429,14 @@ namespace {
// pairs, given the choice of root pairs as an iterator range. // pairs, given the choice of root pairs as an iterator range.
void BBVectorize::findBestTreeFor( void BBVectorize::findBestTreeFor(
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs, std::multimap<ValuePair, ValuePair> &ConnectedPairs,
DenseSet<ValuePair> &PairableInstUsers, DenseSet<ValuePair> &PairableInstUsers,
std::multimap<ValuePair, ValuePair> &PairableInstUserMap, std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
DenseMap<Value *, Value *> &ChosenPairs, DenseMap<Value *, Value *> &ChosenPairs,
DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
size_t &BestEffSize, VPIteratorPair ChoiceRange, int &BestEffSize, VPIteratorPair ChoiceRange,
bool UseCycleCheck) { bool UseCycleCheck) {
for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
J != ChoiceRange.second; ++J) { J != ChoiceRange.second; ++J) {
@ -1409,17 +1486,26 @@ namespace {
PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
PrunedTree, *J, UseCycleCheck); PrunedTree, *J, UseCycleCheck);
size_t EffSize = 0; int EffSize = 0;
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), if (VTTI) {
E = PrunedTree.end(); S != E; ++S) for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
EffSize += getDepthFactor(S->first); E = PrunedTree.end(); S != E; ++S) {
if (getDepthFactor(S->first))
EffSize += CandidatePairCostSavings.find(*S)->second;
}
} else {
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S)
EffSize += (int) getDepthFactor(S->first);
}
DEBUG(if (DebugPairSelection) DEBUG(if (DebugPairSelection)
dbgs() << "BBV: found pruned Tree for pair {" dbgs() << "BBV: found pruned Tree for pair {"
<< *J->first << " <-> " << *J->second << "} of depth " << << *J->first << " <-> " << *J->second << "} of depth " <<
MaxDepth << " and size " << PrunedTree.size() << MaxDepth << " and size " << PrunedTree.size() <<
" (effective size: " << EffSize << ")\n"); " (effective size: " << EffSize << ")\n");
if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { if (MaxDepth >= Config.ReqChainDepth &&
EffSize > 0 && EffSize > BestEffSize) {
BestMaxDepth = MaxDepth; BestMaxDepth = MaxDepth;
BestEffSize = EffSize; BestEffSize = EffSize;
BestTree = PrunedTree; BestTree = PrunedTree;
@ -1431,6 +1517,7 @@ namespace {
// that will be fused into vector instructions. // that will be fused into vector instructions.
void BBVectorize::choosePairs( void BBVectorize::choosePairs(
std::multimap<Value *, Value *> &CandidatePairs, std::multimap<Value *, Value *> &CandidatePairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, std::vector<Value *> &PairableInsts,
std::multimap<ValuePair, ValuePair> &ConnectedPairs, std::multimap<ValuePair, ValuePair> &ConnectedPairs,
DenseSet<ValuePair> &PairableInstUsers, DenseSet<ValuePair> &PairableInstUsers,
@ -1447,9 +1534,11 @@ namespace {
VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
// The best pair to choose and its tree: // The best pair to choose and its tree:
size_t BestMaxDepth = 0, BestEffSize = 0; size_t BestMaxDepth = 0;
int BestEffSize = 0;
DenseSet<ValuePair> BestTree; DenseSet<ValuePair> BestTree;
findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, findBestTreeFor(CandidatePairs, CandidatePairCostSavings,
PairableInsts, ConnectedPairs,
PairableInstUsers, PairableInstUserMap, ChosenPairs, PairableInstUsers, PairableInstUserMap, ChosenPairs,
BestTree, BestMaxDepth, BestEffSize, ChoiceRange, BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
UseCycleCheck); UseCycleCheck);
@ -1505,12 +1594,13 @@ namespace {
Instruction *I, Instruction *J, unsigned o, Instruction *I, Instruction *J, unsigned o,
bool FlipMemInputs) { bool FlipMemInputs) {
Value *IPtr, *JPtr; Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment; unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts; int64_t OffsetInElmts;
// Note: the analysis might fail here, that is why FlipMemInputs has // Note: the analysis might fail here, that is why FlipMemInputs has
// been precomputed (OffsetInElmts must be unused here). // been precomputed (OffsetInElmts must be unused here).
(void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
IAddressSpace, JAddressSpace,
OffsetInElmts); OffsetInElmts);
// The pointer value is taken to be the one with the lowest offset. // The pointer value is taken to be the one with the lowest offset.
@ -2212,9 +2302,10 @@ namespace {
continue; continue;
Value *IPtr, *JPtr; Value *IPtr, *JPtr;
unsigned IAlignment, JAlignment; unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts; int64_t OffsetInElmts;
if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
IAddressSpace, JAddressSpace,
OffsetInElmts) || abs64(OffsetInElmts) != 1) OffsetInElmts) || abs64(OffsetInElmts) != 1)
llvm_unreachable("Pre-fusion pointer analysis failed"); llvm_unreachable("Pre-fusion pointer analysis failed");

View File

@ -1,8 +1,11 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu" target triple = "x86_64-unknown-linux-gnu"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL ; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
; The second check covers the use of alias analysis (with loop unrolling). ; The second check covers the use of alias analysis (with loop unrolling).
; Both checks are run with and without target information.
define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable { define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
entry: entry:

View File

@ -1,5 +1,6 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-TI
; Basic depth-3 chain ; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %B1, double %B2) { define double @test1(double %A1, double %A2, double %B1, double %B2) {
@ -23,6 +24,9 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 ; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
ret double %R ret double %R
; CHECK: ret double %R ; CHECK: ret double %R
; CHECK-TI: @test1
; CHECK-TI: fsub <2 x double>
; CHECK-TI: ret double
} }
; Basic depth-3 chain (last pair permuted) ; Basic depth-3 chain (last pair permuted)
@ -146,6 +150,9 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2 ; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
ret <8 x i8> %R ret <8 x i8> %R
; CHECK: ret <8 x i8> %R ; CHECK: ret <8 x i8> %R
; CHECK-TI: @test6
; CHECK-TI-NOT: sub <16 x i8>
; CHECK-TI: ret <8 x i8>
} }