enhance memcpyopt to merge a store and a subsequent

memset into a single larger memset.

llvm-svn: 123086
This commit is contained in:
Chris Lattner 2011-01-08 20:54:51 +00:00
parent 2f2c3351e1
commit 4dc1fd938f
2 changed files with 101 additions and 53 deletions

View File

@ -121,7 +121,7 @@ struct MemsetRange {
unsigned Alignment;
/// TheStores - The actual stores that make up this range.
SmallVector<StoreInst*, 16> TheStores;
SmallVector<Instruction*, 16> TheStores;
bool isProfitableToUseMemset(const TargetData &TD) const;
@ -131,10 +131,19 @@ struct MemsetRange {
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If we found more than 8 stores to merge or 64 bytes, use memset.
if (TheStores.size() >= 8 || End-Start >= 64) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
// If any of the stores are a memset, then it is always good to extend the
// memset.
for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
if (!isa<StoreInst>(TheStores[i]))
return true;
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
if (TheStores.size() <= 2) return false;
if (TheStores.size() == 2) return false;
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
@ -174,26 +183,44 @@ public:
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
void addStore(int64_t OffsetFromFirst, StoreInst *SI);
void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
addStore(OffsetFromFirst, cast<StoreInst>(Inst));
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
addStore(OffsetFromFirst, SI);
else
addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(), SI->getAlignment(), SI);
}
void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
}
void addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst);
};
} // end anon namespace
/// addStore - Add a new store to the MemsetRanges data structure. This adds a
/// addRange - Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
// Do a linear search of the ranges to see if this can be joined and/or to
// find the insertion point in the list. We keep the ranges sorted for
// simplicity here. This is a linear search of a linked list, which is ugly,
// however the number of ranges is limited, so this won't get crazy slow.
///
/// Do a linear search of the ranges to see if this can be joined and/or to
/// find the insertion point in the list. We keep the ranges sorted for
/// simplicity here. This is a linear search of a linked list, which is ugly,
/// however the number of ranges is limited, so this won't get crazy slow.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
@ -206,14 +233,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
R.StartPtr = SI->getPointerOperand();
R.Alignment = SI->getAlignment();
R.TheStores.push_back(SI);
R.StartPtr = Ptr;
R.Alignment = Alignment;
R.TheStores.push_back(Inst);
return;
}
// This store overlaps with I, add it.
I->TheStores.push_back(SI);
I->TheStores.push_back(Inst);
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
@ -228,8 +255,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
// stopped on *it*.
if (Start < I->Start) {
I->Start = Start;
I->StartPtr = SI->getPointerOperand();
I->Alignment = SI->getAlignment();
I->StartPtr = Ptr;
I->Alignment = Alignment;
}
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@ -314,8 +341,6 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
if (TD == 0) return 0;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
@ -324,37 +349,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
// If the call is readnone, ignore it, otherwise bail out. We don't even
// allow readonly here because we don't want something like:
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
// If the instruction is readnone, ignore it, otherwise bail out. We
// don't even allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
if (AA.getModRefBehavior(CallSite(BI)) ==
AliasAnalysis::DoesNotAccessMemory)
continue;
if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
break;
continue;
}
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
if (NextStore->isVolatile()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
// TODO: If this is a memset, try to join it in.
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
break;
break;
} else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
break;
// If this is a non-store instruction it is fine, ignore it.
StoreInst *NextStore = dyn_cast<StoreInst>(BI);
if (NextStore == 0) continue;
// If this is a store, see if we can merge it in.
if (NextStore->isVolatile()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
break;
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
break;
Ranges.addStore(Offset, NextStore);
Ranges.addStore(Offset, NextStore);
} else {
MemSetInst *MSI = cast<MemSetInst>(BI);
if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
!isa<ConstantInt>(MSI->getLength()))
break;
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
break;
Ranges.addMemSet(Offset, MSI);
}
}
// If we have no ranges, then we just had a single store with nothing that
@ -406,7 +437,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
dbgs() << "With: " << *AMemSet << '\n');
// Zap all the stores.
for (SmallVector<StoreInst*, 16>::const_iterator
for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
SE = Range.TheStores.end(); SI != SE; ++SI)
(*SI)->eraseFromParent();
@ -573,8 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
if (AA.getModRefInfo(C, cpyDest, srcSize) !=
AliasAnalysis::NoModRef)
if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
return false;
// All the checks have passed, so do the transformation.

View File

@ -162,3 +162,21 @@ entry:
}
declare void @foo(%struct.MV*, %struct.MV*, i8*)
define void @test3(i32* nocapture %P) nounwind ssp {
entry:
%arrayidx = getelementptr inbounds i32* %P, i64 1
store i32 0, i32* %arrayidx, align 4
%add.ptr = getelementptr inbounds i32* %P, i64 2
%0 = bitcast i32* %add.ptr to i8*
tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
ret void
; CHECK: @test3
; CHECK-NOT: store
; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind