[GlobalOpt] Remove heap SROA

GlobalOpt implements a heap SROA (SROA for an malloc allocatated struct or array
of structs) which is largely undertested (heap-sra-[1234].ll are basically the
same test with very little difference) and does not trigger at all when
bootstrapping clang (it only supports the case of one single store).

The heap SROA implementation causes PR50027 (GEP is not properly handled; crash or miscompile).
Just drop the implementation. I have deleted some obviously duplicated tests
but kept `heap-sra-[12]{,-no-nullopt}.ll`.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D102257
This commit is contained in:
Fangrui Song 2021-05-11 11:34:37 -07:00
parent ae2b36e8bd
commit 129f466e22
9 changed files with 15 additions and 721 deletions

View File

@ -78,7 +78,6 @@ using namespace llvm;
STATISTIC(NumMarked , "Number of globals marked constant");
STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
STATISTIC(NumDeleted , "Number of globals deleted");
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
@ -996,9 +995,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
/// Scan the use-list of V checking to make sure that there are no complex uses
/// of V. We permit simple things like dereferencing the pointer, but not
/// storing through the address, unless it is to the specified global.
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
SmallPtrSetImpl<const PHINode*> &PHIs) {
static bool
valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV) {
for (const User *U : V->users()) {
const Instruction *Inst = cast<Instruction>(U);
@ -1012,24 +1011,8 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
continue; // Otherwise, storing through it, or storing into GV... fine.
}
// Must index into the array and into the struct.
if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
return false;
continue;
}
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
if (PHIs.insert(PN).second)
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
return false;
continue;
}
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
return false;
continue;
}
@ -1039,465 +1022,6 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return true;
}
/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
/// allocation into loads from the global and uses of the resultant pointer.
/// Further, delete the store into GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
while (!Alloc->use_empty()) {
Instruction *U = cast<Instruction>(*Alloc->user_begin());
Instruction *InsertPt = U;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If this is the store of the allocation into the global, remove it.
if (SI->getOperand(1) == GV) {
SI->eraseFromParent();
continue;
}
} else if (PHINode *PN = dyn_cast<PHINode>(U)) {
// Insert the load in the corresponding predecessor, not right before the
// PHI.
InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
} else if (isa<BitCastInst>(U)) {
// Must be bitcast between the malloc and store to initialize the global.
ReplaceUsesOfMallocWithGlobal(U, GV);
U->eraseFromParent();
continue;
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
// If this is a "GEP bitcast" and the user is a store to the global, then
// just process it as a bitcast.
if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
if (SI->getOperand(1) == GV) {
// Must be bitcast GEP between the malloc and store to initialize
// the global.
ReplaceUsesOfMallocWithGlobal(GEPI, GV);
GEPI->eraseFromParent();
continue;
}
}
// Insert a load from the global, and use it instead of the malloc.
Value *NL =
new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
}
}
/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
/// perform heap SRA on. This permits GEP's that index through the array and
/// struct field, icmps of null, and PHIs.
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
for (const User *U : V->users()) {
const Instruction *UI = cast<Instruction>(U);
// Comparison against null is ok.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
// getelementptr is also ok, but only a simple form.
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
// Otherwise the GEP is ok.
continue;
}
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
if (!LoadUsingPHIsPerLoad.insert(PN).second)
// This means some phi nodes are dependent on each other.
// Avoid infinite looping!
return false;
if (!LoadUsingPHIs.insert(PN).second)
// If we have already analyzed this PHI, then it is safe.
continue;
// Make sure all uses of the PHI are simple enough to transform.
if (!LoadUsesSimpleEnoughForHeapSRA(PN,
LoadUsingPHIs, LoadUsingPHIsPerLoad))
return false;
continue;
}
// Otherwise we don't know what this is, not ok.
return false;
}
return true;
}
/// If all users of values loaded from GV are simple enough to perform HeapSRA,
/// return true.
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
for (const User *U : GV->users())
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
LoadUsingPHIsPerLoad))
return false;
LoadUsingPHIsPerLoad.clear();
}
// If we reach here, we know that all uses of the loads and transitive uses
// (through PHI nodes) are simple enough to transform. However, we don't know
// that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
for (const PHINode *PN : LoadUsingPHIs) {
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
// PHI of the stored value itself is ok.
if (InVal == StoredVal) continue;
if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
// One of the PHIs in our set is (optimistically) ok.
if (LoadUsingPHIs.count(InPN))
continue;
return false;
}
// Load from GV is ok.
if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
if (LI->getOperand(0) == GV)
continue;
// UNDEF? NULL?
// Anything else is rejected.
return false;
}
}
return true;
}
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
if (FieldNo >= FieldVals.size())
FieldVals.resize(FieldNo+1);
// If we already have this value, just reuse the previously scalarized
// version.
if (Value *FieldVal = FieldVals[FieldNo])
return FieldVal;
// Depending on what instruction this is, we have several cases.
Value *Result;
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
// This is a scalarized version of the load from the global. Just create
// a new Load of the scalarized global.
Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
InsertedScalarizedValues, PHIsToRewrite);
Result = new LoadInst(V->getType()->getPointerElementType(), V,
LI->getName() + ".f" + Twine(FieldNo), LI);
} else {
PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
PointerType *PTy = cast<PointerType>(PN->getType());
StructType *ST = cast<StructType>(PTy->getElementType());
unsigned AS = PTy->getAddressSpace();
PHINode *NewPN =
PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
PN->getNumIncomingValues(),
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
}
return FieldVals[FieldNo] = Result;
}
/// Given a load instruction and a value derived from the load, rewrite the
/// derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
// If this is a comparison against null, handle it.
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
// If we have a setcc of the loaded pointer, we can use a setcc of any
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
InsertedScalarizedValues, PHIsToRewrite);
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
Constant::getNullValue(NPtr->getType()),
SCI->getName());
SCI->replaceAllUsesWith(New);
SCI->eraseFromParent();
return;
}
// Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
&& "Unexpected GEPI!");
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
InsertedScalarizedValues, PHIsToRewrite);
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
return;
}
// Recursively transform the users of PHI nodes. This will lazily create the
// PHIs that are needed for individual elements. Keep track of what PHIs we
// see in InsertedScalarizedValues so that we don't get infinite loops (very
// antisocial). If the PHI is already in InsertedScalarizedValues, it has
// already been seen first by another load, so its uses have already been
// processed.
PHINode *PN = cast<PHINode>(LoadUser);
if (!InsertedScalarizedValues.insert(std::make_pair(PN,
std::vector<Value *>())).second)
return;
// If this is the first time we've seen this PHI, recursively process all
// users.
for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
}
/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
if (Load->use_empty()) {
Load->eraseFromParent();
InsertedScalarizedValues.erase(Load);
}
}
/// CI is an allocation of an array of structures. Break it up into multiple
/// allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI
<< '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
StructType *STy = cast<StructType>(MAT);
// There is guaranteed to be at least one use of the malloc (storing
// it into GV). If there are other uses, change them to be uses of
// the global to simplify later code. This also deletes the store
// into GV.
ReplaceUsesOfMallocWithGlobal(CI, GV);
// Okay, at this point, there are no users of the malloc. Insert N
// new mallocs at the same place as CI, and N globals.
std::vector<Value *> FieldGlobals;
std::vector<Value *> FieldMallocs;
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
unsigned AS = GV->getType()->getPointerAddressSpace();
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
Type *FieldTy = STy->getElementType(FieldNo);
PointerType *PFieldTy = PointerType::get(FieldTy, AS);
GlobalVariable *NGV = new GlobalVariable(
*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
nullptr, GV->getThreadLocalMode());
NGV->copyAttributesFrom(GV);
FieldGlobals.push_back(NGV);
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
Type *IntPtrTy = DL.getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, OpBundles, nullptr,
CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
// The tricky aspect of this transformation is handling the case when malloc
// fails. In the original code, malloc failing would set the result pointer
// of malloc to null. In this case, some mallocs could succeed and others
// could fail. As such, we emit code that looks like this:
// F0 = malloc(field0)
// F1 = malloc(field1)
// F2 = malloc(field2)
// if (F0 == 0 || F1 == 0 || F2 == 0) {
// if (F0) { free(F0); F0 = 0; }
// if (F1) { free(F1); F1 = 0; }
// if (F2) { free(F2); F2 = 0; }
// }
// The malloc can also fail if its argument is too large.
Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
ConstantZero, "isneg");
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
Constant::getNullValue(FieldMallocs[i]->getType()),
"isnull");
RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
}
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
BasicBlock *ContBB =
OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
"malloc_ret_null",
OrigBB->getParent());
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
// branch on RunningOr.
OrigBB->getTerminator()->eraseFromParent();
BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal =
new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
FieldGlobals[i], "tmp", NullPtrBlock);
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()));
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
OrigBB->getParent());
BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
OrigBB->getParent());
Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
Cmp, NullPtrBlock);
// Fill in FreeBlock.
CallInst::CreateFree(GVVal, OpBundles, BI);
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
FreeBlock);
BranchInst::Create(NextBlock, FreeBlock);
NullPtrBlock = NextBlock;
}
BranchInst::Create(ContBB, NullPtrBlock);
// CI is no longer needed, remove it.
CI->eraseFromParent();
/// As we process loads, if we can't immediately update all uses of the load,
/// keep track of what scalarized loads are inserted for a given load.
DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
// Must be a store of null.
StoreInst *SI = cast<StoreInst>(User);
assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!");
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
Constant *Null = Constant::getNullValue(ValTy);
new StoreInst(Null, FieldGlobals[i], SI);
}
// Erase the original store.
SI->eraseFromParent();
}
// While we have PHIs that are interesting to rewrite, do it.
while (!PHIsToRewrite.empty()) {
PHINode *PN = PHIsToRewrite.back().first;
unsigned FieldNo = PHIsToRewrite.back().second;
PHIsToRewrite.pop_back();
PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
// Add all the incoming values. This can materialize more phis.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
PHIsToRewrite);
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
// Drop all inter-phi links and any loads that made it this far.
for (DenseMap<Value *, std::vector<Value *>>::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I->first))
PN->dropAllReferences();
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->dropAllReferences();
}
// Delete all the phis and loads now that inter-references are dead.
for (DenseMap<Value *, std::vector<Value *>>::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I->first))
PN->eraseFromParent();
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->eraseFromParent();
}
// The old global is now dead, remove it.
GV->eraseFromParent();
++NumHeapSRA;
return cast<GlobalVariable>(FieldGlobals[0]);
}
/// This function is called when we see a pointer global variable with a single
/// value stored it that is a malloc or cast of malloc.
static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
@ -1520,11 +1044,9 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
// malloc to be stored into the specified global, loaded icmp'd, and
// GEP'd. These are all things we could transform to using the global
// for.
SmallPtrSet<const PHINode*, 8> PHIs;
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
// malloc to be stored into the specified global, loaded icmp'd.
// These are all things we could transform to using the global for.
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
return false;
// If we have a global that is only initialized with a fixed size malloc,
@ -1545,54 +1067,6 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
return true;
}
// If the allocation is an array of structures, consider transforming this
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
if (Ordering != AtomicOrdering::NotAtomic)
return false;
// If this is an allocation of a fixed size array of structs, analyze as a
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
AllocTy = AT->getElementType();
StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
if (!AllocSTy)
return false;
// This the structure has an unreasonable number of fields, leave it
// alone.
if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
Type *IntPtrTy = DL.getIntPtrType(CI->getType());
unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
Instruction *Malloc =
CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
OpBundles, nullptr, CI->getName());
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
CI = cast<CallInst>(BCI->getOperand(0));
else
CI = cast<CallInst>(Malloc);
}
PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
TLI);
return true;
}
return false;
}

View File

@ -1,8 +1,5 @@
; RUN: opt -globalopt -S < %s | FileCheck %s
; CHECK: @Y.f0
; CHECK: section ".foo"
; CHECK: @Y.f1
; CHECK: section ".foo"
; CHECK: @Y = {{.*}} section ".foo"
%struct.xyz = type { double, i32 }

View File

@ -1,10 +1,10 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
;; Heap SROA has been removed. This tests we don't perform heap SROA.
; CHECK: @X =
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
; CHECK: @X.f0
; CHECK: @X.f1
define void @bar(i64 %Size) nounwind noinline {
entry:

View File

@ -1,10 +1,10 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
;; Heap SROA has been removed. This tests we don't perform heap SROA.
; CHECK: @X =
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]
; CHECK: @X.f0
; CHECK: @X.f1
define void @bar(i32 %Size) nounwind noinline {
entry:

View File

@ -1,41 +0,0 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
; CHECK: @X
; CHECK-NOT: @X.f0
define void @bar(i64 %Size) nounwind noinline #0 {
entry:
%mallocsize = mul i64 8, %Size ; <i64> [#uses=1]
; CHECK: mul i64 8, %Size
%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
%.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
store %struct.foo* %.sub, %struct.foo** @X, align 4
ret void
}
declare noalias i8* @malloc(i64)
define i32 @baz() nounwind readonly noinline #0 {
bb1.thread:
; CHECK: load %struct.foo*, %struct.foo** @X, align 4
%0 = load %struct.foo*, %struct.foo** @X, align 4
br label %bb1
bb1: ; preds = %bb1, %bb1.thread
%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
%2 = load i32, i32* %1, align 4
%3 = add i32 %2, %sum.0.reg2mem.0
%indvar.next = add i32 %i.0.reg2mem.0, 1
%exitcond = icmp eq i32 %indvar.next, 1200
br i1 %exitcond, label %bb2, label %bb1
bb2: ; preds = %bb1
ret i32 %3
}
attributes #0 = { null_pointer_is_valid }

View File

@ -1,46 +0,0 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
; CHECK: @X.f0
; CHECK: @X.f1
define void @bar(i64 %Size) nounwind noinline {
entry:
%mallocsize = mul i64 8, %Size ; <i64> [#uses=1]
; CHECK: mul i64 %Size, 4
%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
%.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
store %struct.foo* %.sub, %struct.foo** @X, align 4
ret void
}
declare noalias i8* @malloc(i64)
define i32 @baz() nounwind readonly noinline {
bb1.thread:
%0 = load %struct.foo*, %struct.foo** @X, align 4
br label %bb1
bb1: ; preds = %bb1, %bb1.thread
%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
%2 = load i32, i32* %1, align 4
%3 = add i32 %2, %sum.0.reg2mem.0
%indvar.next = add i32 %i.0.reg2mem.0, 1
%exitcond = icmp eq i32 %indvar.next, 1200
br i1 %exitcond, label %bb2, label %bb1
bb2: ; preds = %bb1
ret i32 %3
}
define void @bam(i64 %Size) nounwind noinline #0 {
entry:
%0 = load %struct.foo*, %struct.foo** @X, align 4
ret void
}
attributes #0 = { null_pointer_is_valid }

View File

@ -1,44 +0,0 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
; CHECK: @X
; CHECK-NOT: @X.f0
define void @bar(i64 %Size) nounwind noinline #0 {
entry:
%mallocsize = shl i64 %Size, 3 ; <i64> [#uses=1]
%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
; CHECK: shl i64 %Size, 3
%.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
store %struct.foo* %.sub, %struct.foo** @X, align 4
ret void
}
declare noalias i8* @malloc(i64)
define i32 @baz() nounwind readonly noinline #0 {
; CHECK-LABEL: @baz(
bb1.thread:
; CHECK: load %struct.foo*, %struct.foo** @X, align 4
%0 = load %struct.foo*, %struct.foo** @X, align 4
br label %bb1
bb1: ; preds = %bb1, %bb1.thread
%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
%2 = load i32, i32* %1, align 4
; CHECK: load i32, i32* %1, align 4
%3 = add i32 %2, %sum.0.reg2mem.0
%indvar.next = add i32 %i.0.reg2mem.0, 1
%exitcond = icmp eq i32 %indvar.next, 1200
br i1 %exitcond, label %bb2, label %bb1
bb2: ; preds = %bb1
ret i32 %3
}
attributes #0 = { null_pointer_is_valid }

View File

@ -1,47 +0,0 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null
; CHECK: @X.f0
; CHECK: @X.f1
define void @bar(i64 %Size) nounwind noinline {
entry:
%mallocsize = shl i64 %Size, 3 ; <i64> [#uses=1]
%malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
; CHECK: mul i64 %Size, 4
%.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
store %struct.foo* %.sub, %struct.foo** @X, align 4
ret void
}
declare noalias i8* @malloc(i64)
define i32 @baz() nounwind readonly noinline {
bb1.thread:
%0 = load %struct.foo*, %struct.foo** @X, align 4
br label %bb1
bb1: ; preds = %bb1, %bb1.thread
%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
%2 = load i32, i32* %1, align 4
%3 = add i32 %2, %sum.0.reg2mem.0
%indvar.next = add i32 %i.0.reg2mem.0, 1
%exitcond = icmp eq i32 %indvar.next, 1200
br i1 %exitcond, label %bb2, label %bb1
bb2: ; preds = %bb1
ret i32 %3
}
define void @bam(i64 %Size) nounwind noinline #0 {
entry:
%0 = load %struct.foo*, %struct.foo** @X, align 4
ret void
}
attributes #0 = { null_pointer_is_valid }

View File

@ -1,8 +1,9 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
; CHECK: tmp.f1 = phi i32*
; CHECK: tmp.f0 = phi i32*
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
;; Heap SROA has been removed. This tests we don't perform heap SROA.
; CHECK: @X =
%struct.foo = type { i32, i32 }
@X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]