forked from OSchip/llvm-project
Fix several issues with alignment. We weren't always accounting for type
alignment requirements of the new alloca. As one consequence which was reported as a bug by Duncan, we overaligned memcpy calls to ranges of allocas after they were rewritten to types with lower alignment requirements. Other consquences are possible, but I don't have any test cases for them. llvm-svn: 164937
This commit is contained in:
parent
8f97281b99
commit
176ca71a82
|
@ -2145,6 +2145,20 @@ private:
|
|||
return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
|
||||
}
|
||||
|
||||
unsigned getAdjustedAlign(uint64_t Offset) {
|
||||
unsigned NewAIAlign = NewAI.getAlignment();
|
||||
if (!NewAIAlign)
|
||||
NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType());
|
||||
return MinAlign(NewAIAlign, Offset);
|
||||
}
|
||||
unsigned getAdjustedAlign() {
|
||||
return getAdjustedAlign(BeginOffset - NewAllocaBeginOffset);
|
||||
}
|
||||
|
||||
bool isTypeAlignSufficient(Type *Ty) {
|
||||
return TD.getABITypeAlignment(Ty) >= getAdjustedAlign();
|
||||
}
|
||||
|
||||
ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
|
||||
assert(VecTy && "Can only call getIndex when rewriting a vector");
|
||||
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
|
||||
|
@ -2253,9 +2267,8 @@ private:
|
|||
Value *NewPtr = getAdjustedAllocaPtr(IRB,
|
||||
LI.getPointerOperand()->getType());
|
||||
LI.setOperand(0, NewPtr);
|
||||
if (LI.getAlignment())
|
||||
LI.setAlignment(MinAlign(NewAI.getAlignment(),
|
||||
BeginOffset - NewAllocaBeginOffset));
|
||||
if (LI.getAlignment() || !isTypeAlignSufficient(LI.getType()))
|
||||
LI.setAlignment(getAdjustedAlign());
|
||||
DEBUG(dbgs() << " to: " << LI << "\n");
|
||||
|
||||
deleteIfTriviallyDead(OldOp);
|
||||
|
@ -2307,6 +2320,9 @@ private:
|
|||
Value *NewPtr = getAdjustedAllocaPtr(IRB,
|
||||
SI.getPointerOperand()->getType());
|
||||
SI.setOperand(1, NewPtr);
|
||||
if (SI.getAlignment() ||
|
||||
!isTypeAlignSufficient(SI.getValueOperand()->getType()))
|
||||
SI.setAlignment(getAdjustedAlign());
|
||||
if (SI.getAlignment())
|
||||
SI.setAlignment(MinAlign(NewAI.getAlignment(),
|
||||
BeginOffset - NewAllocaBeginOffset));
|
||||
|
@ -2325,14 +2341,8 @@ private:
|
|||
// pointer to the new alloca.
|
||||
if (!isa<Constant>(II.getLength())) {
|
||||
II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
|
||||
|
||||
Type *CstTy = II.getAlignmentCst()->getType();
|
||||
if (!NewAI.getAlignment())
|
||||
II.setAlignment(ConstantInt::get(CstTy, 0));
|
||||
else
|
||||
II.setAlignment(
|
||||
ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(),
|
||||
BeginOffset - NewAllocaBeginOffset)));
|
||||
II.setAlignment(ConstantInt::get(CstTy, getAdjustedAlign()));
|
||||
|
||||
deleteIfTriviallyDead(OldPtr);
|
||||
return false;
|
||||
|
@ -2353,15 +2363,10 @@ private:
|
|||
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
|
||||
Type *SizeTy = II.getLength()->getType();
|
||||
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
|
||||
unsigned Align = 1;
|
||||
if (NewAI.getAlignment())
|
||||
Align = MinAlign(NewAI.getAlignment(),
|
||||
BeginOffset - NewAllocaBeginOffset);
|
||||
|
||||
CallInst *New
|
||||
= IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
|
||||
II.getRawDest()->getType()),
|
||||
II.getValue(), Size, Align,
|
||||
II.getValue(), Size, getAdjustedAlign(),
|
||||
II.isVolatile());
|
||||
(void)New;
|
||||
DEBUG(dbgs() << " to: " << *New << "\n");
|
||||
|
@ -2443,6 +2448,16 @@ private:
|
|||
const AllocaPartitioning::MemTransferOffsets &MTO
|
||||
= P.getMemTransferOffsets(II);
|
||||
|
||||
// Compute the relative offset within the transfer.
|
||||
unsigned IntPtrWidth = TD.getPointerSizeInBits();
|
||||
APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
|
||||
: MTO.SourceBegin));
|
||||
|
||||
unsigned Align = II.getAlignment();
|
||||
if (Align > 1)
|
||||
Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
|
||||
MinAlign(II.getAlignment(), getAdjustedAlign()));
|
||||
|
||||
// For unsplit intrinsics, we simply modify the source and destination
|
||||
// pointers in place. This isn't just an optimization, it is a matter of
|
||||
// correctness. With unsplit intrinsics we may be dealing with transfers
|
||||
|
@ -2458,11 +2473,7 @@ private:
|
|||
II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
|
||||
|
||||
Type *CstTy = II.getAlignmentCst()->getType();
|
||||
if (II.getAlignment() > 1)
|
||||
II.setAlignment(ConstantInt::get(
|
||||
CstTy, MinAlign(II.getAlignment(),
|
||||
MinAlign(NewAI.getAlignment(),
|
||||
BeginOffset - NewAllocaBeginOffset))));
|
||||
II.setAlignment(ConstantInt::get(CstTy, Align));
|
||||
|
||||
DEBUG(dbgs() << " to: " << II << "\n");
|
||||
deleteIfTriviallyDead(OldOp);
|
||||
|
@ -2474,11 +2485,6 @@ private:
|
|||
// memmove with memcpy, and we don't need to worry about all manner of
|
||||
// downsides to splitting and transforming the operations.
|
||||
|
||||
// Compute the relative offset within the transfer.
|
||||
unsigned IntPtrWidth = TD.getPointerSizeInBits();
|
||||
APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
|
||||
: MTO.SourceBegin));
|
||||
|
||||
// If this doesn't map cleanly onto the alloca type, and that type isn't
|
||||
// a single value type, just emit a memcpy.
|
||||
bool EmitMemCpy
|
||||
|
@ -2521,11 +2527,6 @@ private:
|
|||
OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
|
||||
getName("." + OtherPtr->getName()));
|
||||
|
||||
unsigned Align = II.getAlignment();
|
||||
if (Align > 1)
|
||||
Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
|
||||
MinAlign(II.getAlignment(), NewAI.getAlignment()));
|
||||
|
||||
// Strip all inbounds GEPs and pointer casts to try to dig out any root
|
||||
// alloca that should be re-examined after rewriting this instruction.
|
||||
if (AllocaInst *AI
|
||||
|
|
|
@ -83,3 +83,34 @@ entry:
|
|||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
%struct.S = type { i8, { i64 } }
|
||||
|
||||
define void @test4() {
|
||||
; This test case triggered very strange alginment behavior with memcpy due to
|
||||
; strang splitting. Reported by Duncan.
|
||||
; CHECK: @test4
|
||||
|
||||
entry:
|
||||
%D.2113 = alloca %struct.S
|
||||
%Op = alloca %struct.S
|
||||
%D.2114 = alloca %struct.S
|
||||
%gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0
|
||||
store i8 0, i8* %gep1, align 8
|
||||
%gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0
|
||||
%cast = bitcast i64* %gep2 to double*
|
||||
store double 0.000000e+00, double* %cast, align 8
|
||||
store i64 0, i64* %gep2, align 8
|
||||
%dst1 = bitcast %struct.S* %D.2114 to i8*
|
||||
%src1 = bitcast %struct.S* %Op to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false)
|
||||
%dst2 = bitcast %struct.S* %D.2113 to i8*
|
||||
%src2 = bitcast %struct.S* %D.2114 to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false)
|
||||
; We get 3 memcpy calls with various reasons to shrink their alignment to 1.
|
||||
; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false)
|
||||
; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false)
|
||||
; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue