forked from OSchip/llvm-project
[LIR] General refactoring to simplify code and the ease future code review.
Move several checks into isLegalStores. Also, delineate between those stores that are memset-able and those that are memcpy-able. http://reviews.llvm.org/D15683 Patch by Haicheng Wu <haicheng@codeaurora.org>! llvm-svn: 256336
This commit is contained in:
parent
42bd26f29d
commit
fba65d2fd3
|
@ -108,7 +108,11 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||||
StoreList StoreRefs;
|
StoreList StoreRefsForMemset;
|
||||||
|
StoreList StoreRefsForMemcpy;
|
||||||
|
bool HasMemset;
|
||||||
|
bool HasMemsetPattern;
|
||||||
|
bool HasMemcpy;
|
||||||
|
|
||||||
/// \name Countable Loop Idiom Handling
|
/// \name Countable Loop Idiom Handling
|
||||||
/// @{
|
/// @{
|
||||||
|
@ -118,17 +122,15 @@ private:
|
||||||
SmallVectorImpl<BasicBlock *> &ExitBlocks);
|
SmallVectorImpl<BasicBlock *> &ExitBlocks);
|
||||||
|
|
||||||
void collectStores(BasicBlock *BB);
|
void collectStores(BasicBlock *BB);
|
||||||
bool isLegalStore(StoreInst *SI);
|
bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
|
||||||
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
|
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
|
||||||
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
|
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
|
||||||
|
|
||||||
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
||||||
unsigned StoreAlignment, Value *SplatValue,
|
unsigned StoreAlignment, Value *StoredVal,
|
||||||
Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
||||||
const SCEV *BECount, bool NegStride);
|
const SCEV *BECount, bool NegStride);
|
||||||
bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
|
||||||
const SCEVAddRecExpr *StoreEv,
|
|
||||||
const SCEV *BECount, bool NegStride);
|
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
/// \name Noncountable Loop Idiom Handling
|
/// \name Noncountable Loop Idiom Handling
|
||||||
|
@ -207,6 +209,11 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||||
*CurLoop->getHeader()->getParent());
|
*CurLoop->getHeader()->getParent());
|
||||||
DL = &CurLoop->getHeader()->getModule()->getDataLayout();
|
DL = &CurLoop->getHeader()->getModule()->getDataLayout();
|
||||||
|
|
||||||
|
HasMemset = TLI->has(LibFunc::memset);
|
||||||
|
HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
|
||||||
|
HasMemcpy = TLI->has(LibFunc::memcpy);
|
||||||
|
|
||||||
|
if (HasMemset || HasMemsetPattern || HasMemcpy)
|
||||||
if (SE->hasLoopInvariantBackedgeTakenCount(L))
|
if (SE->hasLoopInvariantBackedgeTakenCount(L))
|
||||||
return runOnCountableLoop();
|
return runOnCountableLoop();
|
||||||
|
|
||||||
|
@ -297,7 +304,8 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
|
||||||
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
|
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
|
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
|
||||||
|
bool &ForMemcpy) {
|
||||||
// Don't touch volatile stores.
|
// Don't touch volatile stores.
|
||||||
if (!SI->isSimple())
|
if (!SI->isSimple())
|
||||||
return false;
|
return false;
|
||||||
|
@ -322,22 +330,86 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
|
||||||
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
|
if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// See if the store can be turned into a memset.
|
||||||
|
|
||||||
|
// If the stored value is a byte-wise value (like i32 -1), then it may be
|
||||||
|
// turned into a memset of i8 -1, assuming that all the consecutive bytes
|
||||||
|
// are stored. A store of i32 0x01020304 can never be turned into a memset,
|
||||||
|
// but it can be turned into memset_pattern if the target supports it.
|
||||||
|
Value *SplatValue = isBytewiseValue(StoredVal);
|
||||||
|
Constant *PatternValue = nullptr;
|
||||||
|
|
||||||
|
// If we're allowed to form a memset, and the stored value would be
|
||||||
|
// acceptable for memset, use it.
|
||||||
|
if (HasMemset && SplatValue &&
|
||||||
|
// Verify that the stored value is loop invariant. If not, we can't
|
||||||
|
// promote the memset.
|
||||||
|
CurLoop->isLoopInvariant(SplatValue)) {
|
||||||
|
// It looks like we can use SplatValue.
|
||||||
|
ForMemset = true;
|
||||||
|
return true;
|
||||||
|
} else if (HasMemsetPattern &&
|
||||||
|
// Don't create memset_pattern16s with address spaces.
|
||||||
|
StorePtr->getType()->getPointerAddressSpace() == 0 &&
|
||||||
|
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
|
||||||
|
// It looks like we can use PatternValue!
|
||||||
|
ForMemset = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Otherwise, see if the store can be turned into a memcpy.
|
||||||
|
if (HasMemcpy) {
|
||||||
|
// Check to see if the stride matches the size of the store. If so, then we
|
||||||
|
// know that every byte is touched in the loop.
|
||||||
|
unsigned Stride = getStoreStride(StoreEv);
|
||||||
|
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
|
||||||
|
if (StoreSize != Stride && StoreSize != -Stride)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// The store must be feeding a non-volatile load.
|
||||||
|
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
|
||||||
|
if (!LI || !LI->isSimple())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// See if the pointer expression is an AddRec like {base,+,1} on the current
|
||||||
|
// loop, which indicates a strided load. If we have something else, it's a
|
||||||
|
// random load we can't handle.
|
||||||
|
const SCEVAddRecExpr *LoadEv =
|
||||||
|
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
|
||||||
|
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// The store and load must share the same stride.
|
||||||
|
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Success. This store can be converted into a memcpy.
|
||||||
|
ForMemcpy = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// This store can't be transformed into a memset/memcpy.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
|
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
|
||||||
StoreRefs.clear();
|
StoreRefsForMemset.clear();
|
||||||
|
StoreRefsForMemcpy.clear();
|
||||||
for (Instruction &I : *BB) {
|
for (Instruction &I : *BB) {
|
||||||
StoreInst *SI = dyn_cast<StoreInst>(&I);
|
StoreInst *SI = dyn_cast<StoreInst>(&I);
|
||||||
if (!SI)
|
if (!SI)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
bool ForMemset = false;
|
||||||
|
bool ForMemcpy = false;
|
||||||
// Make sure this is a strided store with a constant stride.
|
// Make sure this is a strided store with a constant stride.
|
||||||
if (!isLegalStore(SI))
|
if (!isLegalStore(SI, ForMemset, ForMemcpy))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Save the store locations.
|
// Save the store locations.
|
||||||
StoreRefs.push_back(SI);
|
if (ForMemset)
|
||||||
|
StoreRefsForMemset.push_back(SI);
|
||||||
|
else if (ForMemcpy)
|
||||||
|
StoreRefsForMemcpy.push_back(SI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -357,9 +429,15 @@ bool LoopIdiomRecognize::runOnLoopBlock(
|
||||||
bool MadeChange = false;
|
bool MadeChange = false;
|
||||||
// Look for store instructions, which may be optimized to memset/memcpy.
|
// Look for store instructions, which may be optimized to memset/memcpy.
|
||||||
collectStores(BB);
|
collectStores(BB);
|
||||||
for (auto &SI : StoreRefs)
|
|
||||||
|
// Look for a single store which can be optimized into a memset.
|
||||||
|
for (auto &SI : StoreRefsForMemset)
|
||||||
MadeChange |= processLoopStore(SI, BECount);
|
MadeChange |= processLoopStore(SI, BECount);
|
||||||
|
|
||||||
|
// Optimize the store into a memcpy, if it feeds an similarly strided load.
|
||||||
|
for (auto &SI : StoreRefsForMemcpy)
|
||||||
|
MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
|
||||||
|
|
||||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
|
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
|
||||||
Instruction *Inst = &*I++;
|
Instruction *Inst = &*I++;
|
||||||
// Look for memset instructions, which may be optimized to a larger memset.
|
// Look for memset instructions, which may be optimized to a larger memset.
|
||||||
|
@ -380,7 +458,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
|
||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// processLoopStore - See if this store can be promoted to a memset or memcpy.
|
/// processLoopStore - See if this store can be promoted to a memset.
|
||||||
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
||||||
assert(SI->isSimple() && "Expected only non-volatile stores.");
|
assert(SI->isSimple() && "Expected only non-volatile stores.");
|
||||||
|
|
||||||
|
@ -398,12 +476,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
||||||
bool NegStride = StoreSize == -Stride;
|
bool NegStride = StoreSize == -Stride;
|
||||||
|
|
||||||
// See if we can optimize just this store in isolation.
|
// See if we can optimize just this store in isolation.
|
||||||
if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
|
return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
|
||||||
StoredVal, SI, StoreEv, BECount, NegStride))
|
StoredVal, SI, StoreEv, BECount, NegStride);
|
||||||
return true;
|
|
||||||
|
|
||||||
// Optimize the store into a memcpy, if it feeds an similarly strided load.
|
|
||||||
return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// processLoopMemSet - See if this memset can be promoted to a large memset.
|
/// processLoopMemSet - See if this memset can be promoted to a large memset.
|
||||||
|
@ -496,37 +570,19 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
||||||
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
|
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
|
||||||
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
||||||
const SCEV *BECount, bool NegStride) {
|
const SCEV *BECount, bool NegStride) {
|
||||||
|
|
||||||
// If the stored value is a byte-wise value (like i32 -1), then it may be
|
|
||||||
// turned into a memset of i8 -1, assuming that all the consecutive bytes
|
|
||||||
// are stored. A store of i32 0x01020304 can never be turned into a memset,
|
|
||||||
// but it can be turned into memset_pattern if the target supports it.
|
|
||||||
Value *SplatValue = isBytewiseValue(StoredVal);
|
Value *SplatValue = isBytewiseValue(StoredVal);
|
||||||
Constant *PatternValue = nullptr;
|
Constant *PatternValue = nullptr;
|
||||||
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
|
|
||||||
|
|
||||||
// If we're allowed to form a memset, and the stored value would be acceptable
|
if (!SplatValue)
|
||||||
// for memset, use it.
|
PatternValue = getMemSetPatternValue(StoredVal, DL);
|
||||||
if (SplatValue && TLI->has(LibFunc::memset) &&
|
|
||||||
// Verify that the stored value is loop invariant. If not, we can't
|
assert((SplatValue || PatternValue) &&
|
||||||
// promote the memset.
|
"Expected either splat value or pattern value.");
|
||||||
CurLoop->isLoopInvariant(SplatValue)) {
|
|
||||||
// Keep and use SplatValue.
|
|
||||||
PatternValue = nullptr;
|
|
||||||
} else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
|
|
||||||
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
|
|
||||||
// Don't create memset_pattern16s with address spaces.
|
|
||||||
// It looks like we can use PatternValue!
|
|
||||||
SplatValue = nullptr;
|
|
||||||
} else {
|
|
||||||
// Otherwise, this isn't an idiom we can transform. For example, we can't
|
|
||||||
// do anything with a 3-byte store.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The trip count of the loop and the base pointer of the addrec SCEV is
|
// The trip count of the loop and the base pointer of the addrec SCEV is
|
||||||
// guaranteed to be loop invariant, which means that it should dominate the
|
// guaranteed to be loop invariant, which means that it should dominate the
|
||||||
// header. This allows us to insert code for it in the preheader.
|
// header. This allows us to insert code for it in the preheader.
|
||||||
|
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
|
||||||
BasicBlock *Preheader = CurLoop->getLoopPreheader();
|
BasicBlock *Preheader = CurLoop->getLoopPreheader();
|
||||||
IRBuilder<> Builder(Preheader->getTerminator());
|
IRBuilder<> Builder(Preheader->getTerminator());
|
||||||
SCEVExpander Expander(*SE, *DL, "loop-idiom");
|
SCEVExpander Expander(*SE, *DL, "loop-idiom");
|
||||||
|
@ -608,29 +664,25 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
||||||
/// If the stored value is a strided load in the same loop with the same stride
|
/// If the stored value is a strided load in the same loop with the same stride
|
||||||
/// this may be transformable into a memcpy. This kicks in for stuff like
|
/// this may be transformable into a memcpy. This kicks in for stuff like
|
||||||
/// for (i) A[i] = B[i];
|
/// for (i) A[i] = B[i];
|
||||||
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
|
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
||||||
StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
|
const SCEV *BECount) {
|
||||||
const SCEV *BECount, bool NegStride) {
|
assert(SI->isSimple() && "Expected only non-volatile stores.");
|
||||||
// If we're not allowed to form memcpy, we fail.
|
|
||||||
if (!TLI->has(LibFunc::memcpy))
|
Value *StorePtr = SI->getPointerOperand();
|
||||||
return false;
|
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
|
||||||
|
unsigned Stride = getStoreStride(StoreEv);
|
||||||
|
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
|
||||||
|
bool NegStride = StoreSize == -Stride;
|
||||||
|
|
||||||
// The store must be feeding a non-volatile load.
|
// The store must be feeding a non-volatile load.
|
||||||
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
|
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
|
||||||
if (!LI || !LI->isSimple())
|
assert(LI->isSimple() && "Expected only non-volatile stores.");
|
||||||
return false;
|
|
||||||
|
|
||||||
// See if the pointer expression is an AddRec like {base,+,1} on the current
|
// See if the pointer expression is an AddRec like {base,+,1} on the current
|
||||||
// loop, which indicates a strided load. If we have something else, it's a
|
// loop, which indicates a strided load. If we have something else, it's a
|
||||||
// random load we can't handle.
|
// random load we can't handle.
|
||||||
const SCEVAddRecExpr *LoadEv =
|
const SCEVAddRecExpr *LoadEv =
|
||||||
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
|
cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
|
||||||
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// The store and load must share the same stride.
|
|
||||||
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// The trip count of the loop and the base pointer of the addrec SCEV is
|
// The trip count of the loop and the base pointer of the addrec SCEV is
|
||||||
// guaranteed to be loop invariant, which means that it should dominate the
|
// guaranteed to be loop invariant, which means that it should dominate the
|
||||||
|
|
Loading…
Reference in New Issue