Fix a really nasty SROA bug with how we handled out-of-bounds memcpy

intrinsics.

Reported on the list by Evan with a couple of attempts to fix, but it
took a while to dig down to the root cause. There are two overlapping
bugs here, both centering around the circumstance of discovering
a memcpy operand which is known to be completely outside the bounds of
the alloca.

First, we need to kill the *other* side of the memcpy if it was added to
this alloca. Otherwise we'll factor it into our slicing and try to
rewrite it even though we know for a fact that it is dead. This is made
more tricky because we can visit the sides in either order. So we have
to both kill the other side and skip instructions marked as dead. The
latter really should be goodness in every case, but here is a matter of
correctness.

Second, we need to actually remove the *uses* of the alloca by the
memcpy when queuing it for later deletion. Otherwise it may still be
using the alloca when we go to promote it (if the rewrite re-uses the
existing alloca instruction). Do this by factoring out the
use-clobbering used when for nixing a Phi argument and re-using it
across the operands of a to-be-deleted instruction.

llvm-svn: 199590
This commit is contained in:
Chandler Carruth 2014-01-19 12:16:54 +00:00
parent ba496ff394
commit 1bf38c6a71
2 changed files with 64 additions and 12 deletions

View File

@ -461,14 +461,30 @@ private:
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if ((Length && Length->getValue() == 0) ||
(IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
if (Length && Length->getValue() == 0)
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
// Because we can visit these intrinsics twice, also check to see if the
// first time marked this instruction as dead. If so, skip it.
if (VisitedDeadInsts.count(&II))
return;
if (!IsOffsetKnown)
return PI.setAborted(&II);
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
// FIXME: Yet another place we really should bypass this when
// instrumenting for ASan.
if (!Offset.isNegative() && Offset.uge(AllocSize)) {
SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
if (MTPI != MemTransferSliceMap.end())
S.Slices[MTPI->second].kill();
return markAsDead(II);
}
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
@ -917,6 +933,7 @@ private:
ArrayRef<AllocaSlices::iterator> SplitUses);
bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
@ -2497,8 +2514,11 @@ private:
// alloca that should be re-examined after rewriting this instruction.
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
assert(AI != &OldAI && AI != &NewAI &&
"Splittable transfers cannot reach the same alloca on both ends.");
Pass.Worklist.insert(AI);
}
if (EmitMemCpy) {
Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
@ -3328,6 +3348,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
return Changed;
}
/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
void SROA::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an undef value.
U = UndefValue::get(OldV->getType());
// Check for this making an instruction dead. We have to garbage collect
// all the dead instructions to ensure the uses of any alloca end up being
// minimal.
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
DeadInsts.insert(OldI);
}
}
/// \brief Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
@ -3365,21 +3400,23 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
DE = S.dead_user_end();
DI != DE; ++DI) {
Changed = true;
// Free up everything used by this instruction.
for (User::op_iterator DOI = (*DI)->op_begin(), DOE = (*DI)->op_end();
DOI != DOE; ++DOI)
clobberUse(*DOI);
// Now replace the uses of this instruction.
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
// And mark it for deletion.
DeadInsts.insert(*DI);
Changed = true;
}
for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
DE = S.dead_op_end();
DO != DE; ++DO) {
Value *OldV = **DO;
// Clobber the use with an undef value.
**DO = UndefValue::get(OldV->getType());
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
clobberUse(**DO);
Changed = true;
DeadInsts.insert(OldI);
}
}
// No slices to split. Leave the dead alloca for a later pass to clean up.

View File

@ -1356,3 +1356,18 @@ entry:
%cond105.i.i = load float* %cond105.in.i.i, align 8
ret void
}
define void @test23(i32 %x) {
; CHECK-LABEL: @test23(
; CHECK-NOT: alloca
; CHECK: ret void
entry:
%a = alloca i32, align 4
store i32 %x, i32* %a, align 4
%gep1 = getelementptr inbounds i32* %a, i32 1
%gep0 = getelementptr inbounds i32* %a, i32 0
%cast1 = bitcast i32* %gep1 to i8*
%cast0 = bitcast i32* %gep0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
ret void
}