Change uses of getTypeSize to getABITypeSize, getTypeStoreSize

or getTypeSizeInBits as appropriate in ScalarReplAggregates. The right change to make was not always obvious, so it would be good to have an sroa guru review this. While there I noticed some bugs, and fixed them: (1) arrays of x86 long double have holes due to alignment padding, but this wasn't being spotted by HasStructPadding (renamed to HasPadding). The same goes for arrays of oddly sized ints. Vectors also suffer from this, in fact the problem for vectors is much worse because basic vector assumptions seem to be broken by vectors of type with alignment padding. I didn't try to fix any of these vector problems. (2) The code for extracting smaller integers from larger ones (in the "int union" case) was wrong on big-endian machines for integers with size not a multiple of 8, like i1. Probably this is impossible to hit via llvm-gcc, but I fixed it anyway while there and added a testcase. I also got rid of some trailing whitespace and changed a function name which had an obvious typo in it. llvm-svn: 43672
2007-11-04 14:43:57 +00:00 · 2007-11-04 14:43:57 +00:00 · 399d97987b
parent 5c1b044899
commit 399d97987b
3 changed files with 91 additions and 45 deletions
--- a/llvm/include/llvm/Target/TargetData.h
+++ b/llvm/include/llvm/Target/TargetData.h
@ -277,7 +277,11 @@ public:
    assert(Idx < NumElements && "Invalid element idx!");
    return MemberOffsets[Idx];
  }
-  
+
  uint64_t getElementOffsetInBits(unsigned Idx) const {
    return getElementOffset(Idx)*8;
  }
 private:
  friend class TargetData;   // Only TargetData can create this class
  StructLayout(const StructType *ST, const TargetData &TD);
--- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@ -220,7 +220,7 @@ bool SROA::performScalarRepl(Function &F) {
        (isa<StructType>(AI->getAllocatedType()) ||
         isa<ArrayType>(AI->getAllocatedType())) &&
        AI->getAllocatedType()->isSized() &&
-        TD.getTypeSize(AI->getAllocatedType()) < SRThreshold) {
+        TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) {
      // Check that all of the users of the allocation are capable of being
      // transformed.
      switch (isSafeAllocaToScalarRepl(AI)) {
@ -519,7 +519,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI,
  // If not the whole aggregate, give up.
  const TargetData &TD = getAnalysis<TargetData>();
-  if (Length->getZExtValue() != TD.getTypeSize(AI->getType()->getElementType()))
+  if (Length->getZExtValue() !=
      TD.getABITypeSize(AI->getType()->getElementType()))
    return MarkUnsafe(Info);
  // We only know about memcpy/memset/memmove.
@ -658,17 +659,17 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
              const Type *ValTy = EltTy;
              if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
                ValTy = VTy->getElementType();
-              
+
              // Construct an integer with the right value.
-              unsigned EltSize = TD.getTypeSize(ValTy);
+              unsigned EltSize = TD.getTypeSizeInBits(ValTy);
-              APInt OneVal(EltSize*8, CI->getZExtValue());
+              APInt OneVal(EltSize, CI->getZExtValue());
              APInt TotalVal(OneVal);
              // Set each byte.
-              for (unsigned i = 0; i != EltSize-1; ++i) {
+              for (unsigned i = 0; 8*i < EltSize; ++i) {
                TotalVal = TotalVal.shl(8);
                TotalVal |= OneVal;
              }
-              
+
              // Convert the integer value to the appropriate type.
              StoreVal = ConstantInt::get(TotalVal);
              if (isa<PointerType>(ValTy))
@ -701,7 +702,7 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
        OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
                                   MI);
-      unsigned EltSize = TD.getTypeSize(EltTy);
+      unsigned EltSize = TD.getABITypeSize(EltTy);
      // Finally, insert the meminst for this element.
      if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
@ -730,43 +731,45 @@ void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI,
  }
 }
-/// HasStructPadding - Return true if the specified type has any structure
+/// HasPadding - Return true if the specified type has any structure or
-/// padding, false otherwise.
+/// alignment padding, false otherwise.
-static bool HasStructPadding(const Type *Ty, const TargetData &TD) {
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
    const StructLayout *SL = TD.getStructLayout(STy);
    unsigned PrevFieldBitOffset = 0;
    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      unsigned FieldBitOffset = SL->getElementOffset(i)*8;
+      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-      
+
      // Padding in sub-elements?
-      if (HasStructPadding(STy->getElementType(i), TD))
+      if (HasPadding(STy->getElementType(i), TD))
        return true;
-      
+
      // Check to see if there is any padding between this element and the
      // previous one.
      if (i) {
-        unsigned PrevFieldEnd = 
+        unsigned PrevFieldEnd =
        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
        if (PrevFieldEnd < FieldBitOffset)
          return true;
      }
-      
+
      PrevFieldBitOffset = FieldBitOffset;
    }
-    
+
    //  Check for tail padding.
    if (unsigned EltCount = STy->getNumElements()) {
      unsigned PrevFieldEnd = PrevFieldBitOffset +
                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
-      if (PrevFieldEnd < SL->getSizeInBytes()*8)
+      if (PrevFieldEnd < SL->getSizeInBits())
        return true;
    }
  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    return HasStructPadding(ATy->getElementType(), TD);
+    return HasPadding(ATy->getElementType(), TD);
  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
    return HasPadding(VTy->getElementType(), TD);
  }
-  return false;
+  return TD.getTypeSizeInBits(Ty) != TD.getABITypeSizeInBits(Ty);
 }
 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@ -793,10 +796,9 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {
  // types, but may actually be used.  In these cases, we refuse to promote the
  // struct.
  if (Info.isMemCpySrc && Info.isMemCpyDst &&
-      HasStructPadding(AI->getType()->getElementType(), 
+      HasPadding(AI->getType()->getElementType(), getAnalysis<TargetData>()))
                       getAnalysis<TargetData>()))
    return 0;
-  
+
  // If we require cleanup, return 1, otherwise return 3.
  return Info.needsCanon ? 1 : 3;
 }
@ -929,10 +931,10 @@ static bool MergeInType(const Type *In, const Type *&Accum,
  return false;
 }
-/// getUIntAtLeastAsBitAs - Return an unsigned integer type that is at least
+/// getUIntAtLeastAsBigAs - Return an unsigned integer type that is at least
 /// as big as the specified type.  If there is no suitable type, this returns
 /// null.
-const Type *getUIntAtLeastAsBitAs(unsigned NumBits) {
+const Type *getUIntAtLeastAsBigAs(unsigned NumBits) {
  if (NumBits > 64) return 0;
  if (NumBits > 32) return Type::Int64Ty;
  if (NumBits > 16) return Type::Int32Ty;
@ -974,7 +976,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
      // Check to see if this is stepping over an element: GEP Ptr, int C
      if (GEP->getNumOperands() == 2 && isa<ConstantInt>(GEP->getOperand(1))) {
        unsigned Idx = cast<ConstantInt>(GEP->getOperand(1))->getZExtValue();
-        unsigned ElSize = TD.getTypeSize(PTy->getElementType());
+        unsigned ElSize = TD.getABITypeSize(PTy->getElementType());
        unsigned BitOffset = Idx*ElSize*8;
        if (BitOffset > 64 || !isPowerOf2_32(ElSize)) return 0;
@ -983,7 +985,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
        if (SubElt == 0) return 0;
        if (SubElt != Type::VoidTy && SubElt->isInteger()) {
          const Type *NewTy = 
-            getUIntAtLeastAsBitAs(TD.getTypeSize(SubElt)*8+BitOffset);
+            getUIntAtLeastAsBigAs(TD.getABITypeSizeInBits(SubElt)+BitOffset);
          if (NewTy == 0 || MergeInType(NewTy, UsedType, TD)) return 0;
          continue;
        }
@ -1020,7 +1022,7 @@ const Type *SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial) {
        } else {
          return 0;
        }
-        const Type *NTy = getUIntAtLeastAsBitAs(TD.getTypeSize(AggTy)*8);
+        const Type *NTy = getUIntAtLeastAsBigAs(TD.getABITypeSizeInBits(AggTy));
        if (NTy == 0 || MergeInType(NTy, UsedType, TD)) return 0;
        const Type *SubTy = CanConvertToScalar(GEP, IsNotTrivial);
        if (SubTy == 0) return 0;
@ -1083,7 +1085,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
          NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
        } else {
          // Must be an element access.
-          unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8);
+          unsigned Elt = Offset/TD.getABITypeSizeInBits(PTy->getElementType());
          NV = new ExtractElementInst(
                         NV, ConstantInt::get(Type::Int32Ty, Elt), "tmp", LI);
        }
@ -1094,17 +1096,20 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
        NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI);
      } else {
        const IntegerType *NTy = cast<IntegerType>(NV->getType());
-        unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
+
        // If this is a big-endian system and the load is narrower than the
        // full alloca type, we need to do a shift to get the right bits.
        int ShAmt = 0;
        if (TD.isBigEndian()) {
-          ShAmt = NTy->getBitWidth()-LIBitWidth-Offset;
+          // On big-endian machines, the lowest bit is stored at the bit offset
          // from the pointer given by getTypeStoreSizeInBits.  This matters for
          // integers with a bitwidth that is not a multiple of 8.
          ShAmt = TD.getTypeStoreSizeInBits(NTy) -
            TD.getTypeStoreSizeInBits(LI->getType()) - Offset;
        } else {
          ShAmt = Offset;
        }
-        
+
        // Note: we support negative bitwidths (with shl) which are not defined.
        // We do this to support (f.e.) loads off the end of a structure where
        // only some bits are used.
@ -1118,6 +1123,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
                                         LI->getName(), LI);
        // Finally, unconditionally truncate the integer to the right width.
        unsigned LIBitWidth = TD.getTypeSizeInBits(LI->getType());
        if (LIBitWidth < NTy->getBitWidth())
          NV = new TruncInst(NV, IntegerType::get(LIBitWidth),
                             LI->getName(), LI);
@ -1151,9 +1157,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
        // access or a bitcast to another vector type.
        if (isa<VectorType>(SV->getType())) {
          SV = new BitCastInst(SV, AllocaType, SV->getName(), SI);
-        } else {            
+        } else {
          // Must be an element insertion.
-          unsigned Elt = Offset/(TD.getTypeSize(PTy->getElementType())*8);
+          unsigned Elt = Offset/TD.getABITypeSizeInBits(PTy->getElementType());
          SV = new InsertElementInst(Old, SV,
                                     ConstantInt::get(Type::Int32Ty, Elt),
                                     "tmp", SI);
@ -1170,7 +1176,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
        // If it is a pointer, do the same, and also handle ptr->ptr casts
        // here.
        unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
-        unsigned DestWidth = AllocaType->getPrimitiveSizeInBits();
+        unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
        if (SV->getType()->isFloatingPoint())
          SV = new BitCastInst(SV, IntegerType::get(SrcWidth),
                               SV->getName(), SI);
@ -1180,16 +1186,20 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
        // Always zero extend the value if needed.
        if (SV->getType() != AllocaType)
          SV = new ZExtInst(SV, AllocaType, SV->getName(), SI);
-        
+
        // If this is a big-endian system and the store is narrower than the
        // full alloca type, we need to do a shift to get the right bits.
        int ShAmt = 0;
        if (TD.isBigEndian()) {
-          ShAmt = DestWidth-SrcWidth-Offset;
+          // On big-endian machines, the lowest bit is stored at the bit offset
          // from the pointer given by getTypeStoreSizeInBits.  This matters for
          // integers with a bitwidth that is not a multiple of 8.
          ShAmt = TD.getTypeStoreSizeInBits(AllocaType) -
            TD.getTypeStoreSizeInBits(SV->getType()) - Offset;
        } else {
          ShAmt = Offset;
        }
-        
+
        // Note: we support negative bitwidths (with shr) which are not defined.
        // We do this to support (f.e.) stores off the end of a structure where
        // only some bits in the structure are set.
@ -1225,8 +1235,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
      const PointerType *AggPtrTy = 
        cast<PointerType>(GEP->getOperand(0)->getType());
      const TargetData &TD = getAnalysis<TargetData>();
-      unsigned AggSizeInBits = TD.getTypeSize(AggPtrTy->getElementType())*8;
+      unsigned AggSizeInBits =
-      
+        TD.getABITypeSizeInBits(AggPtrTy->getElementType());
      // Check to see if this is stepping over an element: GEP Ptr, int C
      unsigned NewOffset = Offset;
      if (GEP->getNumOperands() == 2) {
@ -1239,12 +1250,13 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, unsigned Offset) {
        unsigned Idx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
        const Type *AggTy = AggPtrTy->getElementType();
        if (const SequentialType *SeqTy = dyn_cast<SequentialType>(AggTy)) {
-          unsigned ElSizeBits = TD.getTypeSize(SeqTy->getElementType())*8;
+          unsigned ElSizeBits =
            TD.getABITypeSizeInBits(SeqTy->getElementType());
          NewOffset += ElSizeBits*Idx;
        } else if (const StructType *STy = dyn_cast<StructType>(AggTy)) {
          unsigned EltBitOffset =
-            TD.getStructLayout(STy)->getElementOffset(Idx)*8;
+            TD.getStructLayout(STy)->getElementOffsetInBits(Idx);
          NewOffset += EltBitOffset;
        } else {
--- a/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ b/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@ -0,0 +1,30 @@
 ; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep shr
 %struct.S = type { i16 }
 define i1 @f(i16 signext  %b) zeroext  {
 entry:
 	%b_addr = alloca i16		; <i16*> [#uses=2]
 	%retval = alloca i32		; <i32*> [#uses=2]
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i16 %b, i16* %b_addr
 	%tmp1 = getelementptr %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
 	%tmp2 = load i16* %b_addr, align 2		; <i16> [#uses=1]
 	store i16 %tmp2, i16* %tmp1, align 2
 	%tmp3 = getelementptr %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
 	%tmp34 = bitcast i16* %tmp3 to [2 x i1]*		; <[2 x i1]*> [#uses=1]
 	%tmp5 = getelementptr [2 x i1]* %tmp34, i32 0, i32 1		; <i1*> [#uses=1]
 	%tmp6 = load i1* %tmp5, align 1		; <i1> [#uses=1]
 	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
 	store i32 %tmp67, i32* %tmp, align 4
 	%tmp8 = load i32* %tmp, align 4		; <i32> [#uses=1]
 	store i32 %tmp8, i32* %retval, align 4
 	br label %return
 return:		; preds = %entry
 	%retval9 = load i32* %retval		; <i32> [#uses=1]
 	%retval910 = trunc i32 %retval9 to i1		; <i1> [#uses=1]
 	ret i1 %retval910
 }