Change uses of getTypeSize to getABITypeSize, getTypeStoreSize

or getTypeSizeInBits as appropriate in ScalarReplAggregates.
The right change to make was not always obvious, so it would
be good to have an sroa guru review this.  While there I noticed
some bugs, and fixed them: (1) arrays of x86 long double have
holes due to alignment padding, but this wasn't being spotted
by HasStructPadding (renamed to HasPadding).  The same goes
for arrays of oddly sized ints.  Vectors also suffer from this,
in fact the problem for vectors is much worse because basic
vector assumptions seem to be broken by vectors of type with
alignment padding.   I didn't try to fix any of these vector
problems.  (2) The code for extracting smaller integers from
larger ones (in the "int union" case) was wrong on big-endian
machines for integers with size not a multiple of 8, like i1.
Probably this is impossible to hit via llvm-gcc, but I fixed
it anyway while there and added a testcase.  I also got rid of
some trailing whitespace and changed a function name which
had an obvious typo in it.

llvm-svn: 43672
This commit is contained in:
Duncan Sands 2007-11-04 14:43:57 +00:00
parent 5c1b044899
commit 399d97987b
3 changed files with 91 additions and 45 deletions

View File

@ -277,7 +277,11 @@ public:
assert(Idx < NumElements && "Invalid element idx!"); assert(Idx < NumElements && "Invalid element idx!");
return MemberOffsets[Idx]; return MemberOffsets[Idx];
} }
uint64_t getElementOffsetInBits(unsigned Idx) const {
return getElementOffset(Idx)*8;
}
private: private:
friend class TargetData; // Only TargetData can create this class friend class TargetData; // Only TargetData can create this class
StructLayout(const StructType *ST, const TargetData &TD); StructLayout(const StructType *ST, const TargetData &TD);

View File

@ -220,7 +220,7 @@ bool SROA::performScalarRepl(Function &F) {
(isa<StructType>(AI->getAllocatedType()) || (isa<StructType>(AI->getAllocatedType()) ||
isa<ArrayType>(AI->getAllocatedType())) && isa<ArrayType>(AI->getAllocatedType())) &&
AI->getAllocatedType()->isSized() && AI->getAllocatedType()->isSized() &&
TD.getTypeSize(AI->getAllocatedType()) < SRThreshold) { TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) {
// Check that all of the users of the allocation are capable of being // Check that all of the users of the allocation are capable of being
// transformed. // transformed.
switch (isSafeAllocaToScalarRepl(AI)) { switch (isSafeAllocaToScalarRepl(AI)) {
@ -519,7 +519,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
// If not the whole aggregate, give up. // If not the whole aggregate, give up.
const TargetData &TD = getAnalysis<TargetData>(); const TargetData &TD = getAnalysis<TargetData>();
if (Length->getZExtValue() != TD.getTypeSize(AI->getType()->getElementType())) if (Length->getZExtValue() !=
TD.getABITypeSize(AI->getType()->getElementType()))
return MarkUnsafe(Info); return MarkUnsafe(Info);
// We only know about memcpy/memset/memmove. // We only know about memcpy/memset/memmove.
@ -658,17 +659,17 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
const Type *ValTy = EltTy; const Type *ValTy = EltTy;
if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
ValTy = VTy->getElementType(); ValTy = VTy->getElementType();
// Construct an integer with the right value. // Construct an integer with the right value.
unsigned EltSize = TD.getTypeSize(ValTy); unsigned EltSize = TD.getTypeSizeInBits(ValTy);
APInt OneVal(EltSize*8, CI->getZExtValue()); APInt OneVal(EltSize, CI->getZExtValue());
APInt TotalVal(OneVal); APInt TotalVal(OneVal);
// Set each byte. // Set each byte.
for (unsigned i = 0; i != EltSize-1; ++i) { for (unsigned i = 0; 8*i < EltSize; ++i) {
TotalVal = TotalVal.shl(8); TotalVal = TotalVal.shl(8);
TotalVal |= OneVal; TotalVal |= OneVal;
} }
// Convert the integer value to the appropriate type. // Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(TotalVal); StoreVal = ConstantInt::get(TotalVal);
if (isa<PointerType>(ValTy)) if (isa<PointerType>(ValTy))
@ -701,7 +702,7 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
MI); MI);
unsigned EltSize = TD.getTypeSize(EltTy); unsigned EltSize = TD.getABITypeSize(EltTy);
// Finally, insert the meminst for this element. // Finally, insert the meminst for this element.
if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) { if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
@ -730,43 +731,45 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
} }
} }
/// HasStructPadding - Return true if the specified type has any structure /// HasPadding - Return true if the specified type has any structure or
/// padding, false otherwise. /// alignment padding, false otherwise.
static bool HasStructPadding(const Type *Ty, const TargetData &TD) { static bool HasPadding(const Type *Ty, const TargetData &TD) {
if (const StructType *STy = dyn_cast<StructType>(Ty)) { if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy); const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0; unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned FieldBitOffset = SL->getElementOffset(i)*8; unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
// Padding in sub-elements? // Padding in sub-elements?
if (HasStructPadding(STy->getElementType(i), TD)) if (HasPadding(STy->getElementType(i), TD))
return true; return true;
// Check to see if there is any padding between this element and the // Check to see if there is any padding between this element and the
// previous one. // previous one.
if (i) { if (i) {
unsigned PrevFieldEnd = unsigned PrevFieldEnd =
PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
if (PrevFieldEnd < FieldBitOffset) if (PrevFieldEnd < FieldBitOffset)
return true; return true;
} }
PrevFieldBitOffset = FieldBitOffset; PrevFieldBitOffset = FieldBitOffset;
} }
// Check for tail padding. // Check for tail padding.
if (unsigned EltCount = STy->getNumElements()) { if (unsigned EltCount = STy->getNumElements()) {
unsigned PrevFieldEnd = PrevFieldBitOffset + unsigned PrevFieldEnd = PrevFieldBitOffset +
TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
if (PrevFieldEnd < SL->getSizeInBytes()*8) if (PrevFieldEnd < SL->getSizeInBits())
return true; return true;
} }
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
return HasStructPadding(ATy->getElementType(), TD); return HasPadding(ATy->getElementType(), TD);
} else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
return HasPadding(VTy->getElementType(), TD);
} }
return false; return TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty);
} }
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@ -793,10 +796,9 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
// types, but may actually be used. In these cases, we refuse to promote the // types, but may actually be used. In these cases, we refuse to promote the
// struct. // struct.
if (Info.isMemCpySrc && Info.isMemCpyDst && if (Info.isMemCpySrc && Info.isMemCpyDst &&
HasStructPadding(AI->getType()->getElementType(), HasPadding(AI->getType()->getElementType(), getAnalysis<TargetData>()))
getAnalysis<TargetData>()))
return 0; return 0;
// If we require cleanup, return 1, otherwise return 3. // If we require cleanup, return 1, otherwise return 3.
return Info.needsCanon ? 1 : 3; return Info.needsCanon ? 1 : 3;
} }
@ -929,10 +931,10 @@ static bool MergeInType(const Type *In, const Type *&Accum,
return false; return false;
} }
/// getUIntAtLeastAsBitAs - Return an unsigned integer type that is at least /// getUIntAtLeastAsBigAs - Return an unsigned integer type that is at least
/// as big as the specified type. If there is no suitable type, this returns /// as big as the specified type. If there is no suitable type, this returns
/// null. /// null.
const Type *getUIntAtLeastAsBitAs(unsigned NumBits) { const Type *getUIntAtLeastAsBigAs(unsigned NumBits) {
if (NumBits > 64) return 0; if (NumBits > 64) return 0;
if (NumBits > 32) return Type::Int64Ty; if (NumBits > 32) return Type::Int64Ty;
if (NumBits > 16) return Type::Int32Ty; if (NumBits > 16) return Type::Int32Ty;
@ -974,7 +976,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
// Check to see if this is stepping over an element: GEP Ptr, int C // Check to see if this is stepping over an element: GEP Ptr, int C
if (GEP->getNumOperands() == 2 && isa<ConstantInt>(GEP->getOperand(1))) { if (GEP->getNumOperands() == 2 && isa<ConstantInt>(GEP->getOperand(1))) {
unsigned Idx = cast<ConstantInt>(GEP->getOperand(1))->getZExtValue(); unsigned Idx = cast<ConstantInt>(GEP->getOperand(1))->getZExtValue();
unsigned ElSize = TD.getTypeSize(PTy->getElementType()); unsigned ElSize = TD.getABITypeSize(PTy->getElementType());
unsigned BitOffset = Idx*ElSize*8; unsigned BitOffset = Idx*ElSize*8;
if (BitOffset > 64 || !isPowerOf2_32(ElSize)) return 0; if (BitOffset > 64 || !isPowerOf2_32(ElSize)) return 0;
@ -983,7 +985,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
if (SubElt == 0) return 0; if (SubElt == 0) return 0;
if (SubElt != Type::VoidTy && SubElt->isInteger()) { if (SubElt != Type::VoidTy && SubElt->isInteger()) {
const Type *NewTy = const Type *NewTy =
getUIntAtLeastAsBitAs(TD.getTypeSize(SubElt)*8+BitOffset); getUIntAtLeastAsBigAs(TD.getABITypeSizeInBits(SubElt)+BitOffset);
if (NewTy == 0 || MergeInType(NewTy, UsedType, TD)) return 0; if (NewTy == 0 || MergeInType(NewTy, UsedType, TD)) return 0;
continue; continue;
} }
@ -1020,7 +1022,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
} else { } else {
return 0; return 0;
} }
const Type *NTy = getUIntAtLeastAsBitAs(TD.getTypeSize(AggTy)*8); const Type *NTy = getUIntAtLeastAsBigAs(TD.getABITypeSizeInBits(AggTy));
if (NTy == 0 || MergeInType(NTy, UsedType, TD)) return 0; if (NTy == 0 || MergeInType(NTy, UsedType, TD)) return 0;
const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial); const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial);
if (SubTy == 0) return 0; if (SubTy == 0) return 0;
@ -1083,7 +1085,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
} else { } else {
// Must be an element access. // Must be an element access.
unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8); unsigned Elt = Offset/TD.getABITypeSizeInBits(PTy->getElementType());
NV = new ExtractElementInst( NV = new ExtractElementInst(
NV, ConstantInt::get(Type::Int32Ty, Elt), "tmp", LI); NV, ConstantInt::get(Type::Int32Ty, Elt), "tmp", LI);
} }
@ -1094,17 +1096,20 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
} else { } else {
const IntegerType *NTy = cast<IntegerType>(NV->getType()); const IntegerType *NTy = cast<IntegerType>(NV->getType());
unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
// If this is a big-endian system and the load is narrower than the // If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits. // full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0; int ShAmt = 0;
if (TD.isBigEndian()) { if (TD.isBigEndian()) {
ShAmt = NTy->getBitWidth()-LIBitWidth-Offset; // On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
ShAmt = TD.getTypeStoreSizeInBits(NTy) -
TD.getTypeStoreSizeInBits(LI->getType()) - Offset;
} else { } else {
ShAmt = Offset; ShAmt = Offset;
} }
// Note: we support negative bitwidths (with shl) which are not defined. // Note: we support negative bitwidths (with shl) which are not defined.
// We do this to support (f.e.) loads off the end of a structure where // We do this to support (f.e.) loads off the end of a structure where
// only some bits are used. // only some bits are used.
@ -1118,6 +1123,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
LI->getName(), LI); LI->getName(), LI);
// Finally, unconditionally truncate the integer to the right width. // Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
if (LIBitWidth < NTy->getBitWidth()) if (LIBitWidth < NTy->getBitWidth())
NV = new TruncInst(NV, IntegerType::get(LIBitWidth), NV = new TruncInst(NV, IntegerType::get(LIBitWidth),
LI->getName(), LI); LI->getName(), LI);
@ -1151,9 +1157,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
// access or a bitcast to another vector type. // access or a bitcast to another vector type.
if (isa<VectorType>(SV->getType())) { if (isa<VectorType>(SV->getType())) {
SV = new BitCastInst(SV, AllocaType, SV->getName(), SI); SV = new BitCastInst(SV, AllocaType, SV->getName(), SI);
} else { } else {
// Must be an element insertion. // Must be an element insertion.
unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8); unsigned Elt = Offset/TD.getABITypeSizeInBits(PTy->getElementType());
SV = new InsertElementInst(Old, SV, SV = new InsertElementInst(Old, SV,
ConstantInt::get(Type::Int32Ty, Elt), ConstantInt::get(Type::Int32Ty, Elt),
"tmp", SI); "tmp", SI);
@ -1170,7 +1176,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
// If it is a pointer, do the same, and also handle ptr->ptr casts // If it is a pointer, do the same, and also handle ptr->ptr casts
// here. // here.
unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
unsigned DestWidth = AllocaType->getPrimitiveSizeInBits(); unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
if (SV->getType()->isFloatingPoint()) if (SV->getType()->isFloatingPoint())
SV = new BitCastInst(SV, IntegerType::get(SrcWidth), SV = new BitCastInst(SV, IntegerType::get(SrcWidth),
SV->getName(), SI); SV->getName(), SI);
@ -1180,16 +1186,20 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
// Always zero extend the value if needed. // Always zero extend the value if needed.
if (SV->getType() != AllocaType) if (SV->getType() != AllocaType)
SV = new ZExtInst(SV, AllocaType, SV->getName(), SI); SV = new ZExtInst(SV, AllocaType, SV->getName(), SI);
// If this is a big-endian system and the store is narrower than the // If this is a big-endian system and the store is narrower than the
// full alloca type, we need to do a shift to get the right bits. // full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0; int ShAmt = 0;
if (TD.isBigEndian()) { if (TD.isBigEndian()) {
ShAmt = DestWidth-SrcWidth-Offset; // On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
ShAmt = TD.getTypeStoreSizeInBits(AllocaType) -
TD.getTypeStoreSizeInBits(SV->getType()) - Offset;
} else { } else {
ShAmt = Offset; ShAmt = Offset;
} }
// Note: we support negative bitwidths (with shr) which are not defined. // Note: we support negative bitwidths (with shr) which are not defined.
// We do this to support (f.e.) stores off the end of a structure where // We do this to support (f.e.) stores off the end of a structure where
// only some bits in the structure are set. // only some bits in the structure are set.
@ -1225,8 +1235,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
const PointerType *AggPtrTy = const PointerType *AggPtrTy =
cast<PointerType>(GEP->getOperand(0)->getType()); cast<PointerType>(GEP->getOperand(0)->getType());
const TargetData &TD = getAnalysis<TargetData>(); const TargetData &TD = getAnalysis<TargetData>();
unsigned AggSizeInBits = TD.getTypeSize(AggPtrTy->getElementType())*8; unsigned AggSizeInBits =
TD.getABITypeSizeInBits(AggPtrTy->getElementType());
// Check to see if this is stepping over an element: GEP Ptr, int C // Check to see if this is stepping over an element: GEP Ptr, int C
unsigned NewOffset = Offset; unsigned NewOffset = Offset;
if (GEP->getNumOperands() == 2) { if (GEP->getNumOperands() == 2) {
@ -1239,12 +1250,13 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
unsigned Idx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); unsigned Idx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
const Type *AggTy = AggPtrTy->getElementType(); const Type *AggTy = AggPtrTy->getElementType();
if (const SequentialType *SeqTy = dyn_cast<SequentialType>(AggTy)) { if (const SequentialType *SeqTy = dyn_cast<SequentialType>(AggTy)) {
unsigned ElSizeBits = TD.getTypeSize(SeqTy->getElementType())*8; unsigned ElSizeBits =
TD.getABITypeSizeInBits(SeqTy->getElementType());
NewOffset += ElSizeBits*Idx; NewOffset += ElSizeBits*Idx;
} else if (const StructType *STy = dyn_cast<StructType>(AggTy)) { } else if (const StructType *STy = dyn_cast<StructType>(AggTy)) {
unsigned EltBitOffset = unsigned EltBitOffset =
TD.getStructLayout(STy)->getElementOffset(Idx)*8; TD.getStructLayout(STy)->getElementOffsetInBits(Idx);
NewOffset += EltBitOffset; NewOffset += EltBitOffset;
} else { } else {

View File

@ -0,0 +1,30 @@
; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep shr
%struct.S = type { i16 }
define i1 @f(i16 signext %b) zeroext {
entry:
%b_addr = alloca i16 ; <i16*> [#uses=2]
%retval = alloca i32 ; <i32*> [#uses=2]
%s = alloca %struct.S ; <%struct.S*> [#uses=2]
%tmp = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i16 %b, i16* %b_addr
%tmp1 = getelementptr %struct.S* %s, i32 0, i32 0 ; <i16*> [#uses=1]
%tmp2 = load i16* %b_addr, align 2 ; <i16> [#uses=1]
store i16 %tmp2, i16* %tmp1, align 2
%tmp3 = getelementptr %struct.S* %s, i32 0, i32 0 ; <i16*> [#uses=1]
%tmp34 = bitcast i16* %tmp3 to [2 x i1]* ; <[2 x i1]*> [#uses=1]
%tmp5 = getelementptr [2 x i1]* %tmp34, i32 0, i32 1 ; <i1*> [#uses=1]
%tmp6 = load i1* %tmp5, align 1 ; <i1> [#uses=1]
%tmp67 = zext i1 %tmp6 to i32 ; <i32> [#uses=1]
store i32 %tmp67, i32* %tmp, align 4
%tmp8 = load i32* %tmp, align 4 ; <i32> [#uses=1]
store i32 %tmp8, i32* %retval, align 4
br label %return
return: ; preds = %entry
%retval9 = load i32* %retval ; <i32> [#uses=1]
%retval910 = trunc i32 %retval9 to i1 ; <i1> [#uses=1]
ret i1 %retval910
}