forked from OSchip/llvm-project
More conservatively report status from LoopIdiomRecognize
Being "precise" here is getting us into trouble with one of the EXPENSIVE_CHECKS buildbots, see [1]. Rather than reporting IR additions that later get rolled back as "no change", instead we now conservatively report that there was. 1: http://lists.llvm.org/pipermail/llvm-dev/2020-July/143509.html Differential Revision: https://reviews.llvm.org/D84071
This commit is contained in:
parent
28da5759bd
commit
4d75cc4b0a
|
@ -912,6 +912,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
|
||||
Type *IntIdxTy = DL->getIndexType(DestPtr->getType());
|
||||
|
||||
bool Changed = false;
|
||||
const SCEV *Start = Ev->getStart();
|
||||
// Handle negative strided loops.
|
||||
if (NegStride)
|
||||
|
@ -920,7 +921,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||
// TODO: ideally we should still be able to generate memset if SCEV expander
|
||||
// is taught to generate the dependencies at the latest point.
|
||||
if (!isSafeToExpand(Start, *SE))
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
|
||||
// this into a memset in the loop preheader now if we want. However, this
|
||||
|
@ -929,16 +930,26 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||
// base pointer and checking the region.
|
||||
Value *BasePtr =
|
||||
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
|
||||
|
||||
// From here on out, conservatively report to the pass manager that we've
|
||||
// changed the IR, even if we later clean up these added instructions. There
|
||||
// may be structural differences e.g. in the order of use lists not accounted
|
||||
// for in just a textual dump of the IR. This is written as a variable, even
|
||||
// though statically all the places this dominates could be replaced with
|
||||
// 'true', with the hope that anyone trying to be clever / "more precise" with
|
||||
// the return value will read this comment, and leave them alone.
|
||||
Changed = true;
|
||||
|
||||
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
|
||||
StoreSize, *AA, Stores)) {
|
||||
Expander.clear();
|
||||
// If we generated new code for the base pointer, clean up.
|
||||
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
|
||||
return false;
|
||||
return Changed;
|
||||
}
|
||||
|
||||
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
// Okay, everything looks good, insert the memset.
|
||||
|
||||
|
@ -948,7 +959,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||
// TODO: ideally we should still be able to generate memset if SCEV expander
|
||||
// is taught to generate the dependencies at the latest point.
|
||||
if (!isSafeToExpand(NumBytesS, *SE))
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
Value *NumBytes =
|
||||
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
|
||||
|
@ -1067,6 +1078,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||
|
||||
ExpandedValuesCleaner EVC(Expander, TLI);
|
||||
|
||||
bool Changed = false;
|
||||
const SCEV *StrStart = StoreEv->getStart();
|
||||
unsigned StrAS = SI->getPointerAddressSpace();
|
||||
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
|
||||
|
@ -1085,11 +1097,20 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
|
||||
EVC.add(StoreBasePtr);
|
||||
|
||||
// From here on out, conservatively report to the pass manager that we've
|
||||
// changed the IR, even if we later clean up these added instructions. There
|
||||
// may be structural differences e.g. in the order of use lists not accounted
|
||||
// for in just a textual dump of the IR. This is written as a variable, even
|
||||
// though statically all the places this dominates could be replaced with
|
||||
// 'true', with the hope that anyone trying to be clever / "more precise" with
|
||||
// the return value will read this comment, and leave them alone.
|
||||
Changed = true;
|
||||
|
||||
SmallPtrSet<Instruction *, 1> Stores;
|
||||
Stores.insert(SI);
|
||||
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
|
||||
StoreSize, *AA, Stores))
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
const SCEV *LdStart = LoadEv->getStart();
|
||||
unsigned LdAS = LI->getPointerAddressSpace();
|
||||
|
@ -1106,10 +1127,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||
|
||||
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
|
||||
StoreSize, *AA, Stores))
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
if (avoidLIRForMultiBlockLoop())
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
// Okay, everything is safe, we can transform this!
|
||||
|
||||
|
@ -1133,14 +1154,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||
const Align StoreAlign = SI->getAlign();
|
||||
const Align LoadAlign = LI->getAlign();
|
||||
if (StoreAlign < StoreSize || LoadAlign < StoreSize)
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
// If the element.atomic memcpy is not lowered into explicit
|
||||
// loads/stores later, then it will be lowered into an element-size
|
||||
// specific lib call. If the lib call doesn't exist for our store size, then
|
||||
// we shouldn't generate the memcpy.
|
||||
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
|
||||
return false;
|
||||
return Changed;
|
||||
|
||||
// Create the call.
|
||||
// Note that unordered atomic loads/stores are *required* by the spec to
|
||||
|
|
Loading…
Reference in New Issue