|
|
|
@ -78,7 +78,6 @@ using namespace llvm;
|
|
|
|
|
STATISTIC(NumMarked , "Number of globals marked constant");
|
|
|
|
|
STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
|
|
|
|
|
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
|
|
|
|
|
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
|
|
|
|
|
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
|
|
|
|
|
STATISTIC(NumDeleted , "Number of globals deleted");
|
|
|
|
|
STATISTIC(NumGlobUses , "Number of global uses devirtualized");
|
|
|
|
@ -996,9 +995,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
|
|
|
|
|
/// Scan the use-list of V checking to make sure that there are no complex uses
|
|
|
|
|
/// of V. We permit simple things like dereferencing the pointer, but not
|
|
|
|
|
/// storing through the address, unless it is to the specified global.
|
|
|
|
|
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
|
|
|
|
|
const GlobalVariable *GV,
|
|
|
|
|
SmallPtrSetImpl<const PHINode*> &PHIs) {
|
|
|
|
|
static bool
|
|
|
|
|
valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
|
|
|
|
|
const GlobalVariable *GV) {
|
|
|
|
|
for (const User *U : V->users()) {
|
|
|
|
|
const Instruction *Inst = cast<Instruction>(U);
|
|
|
|
|
|
|
|
|
@ -1012,24 +1011,8 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
|
|
|
|
|
continue; // Otherwise, storing through it, or storing into GV... fine.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Must index into the array and into the struct.
|
|
|
|
|
if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
|
|
|
|
|
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
|
|
|
|
|
return false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
|
|
|
|
|
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
|
|
|
|
|
// cycles.
|
|
|
|
|
if (PHIs.insert(PN).second)
|
|
|
|
|
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
|
|
|
|
|
return false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
|
|
|
|
|
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
|
|
|
|
|
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
|
|
|
|
|
return false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
@ -1039,465 +1022,6 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// The Alloc pointer is stored into GV somewhere. Transform all uses of the
|
|
|
|
|
/// allocation into loads from the global and uses of the resultant pointer.
|
|
|
|
|
/// Further, delete the store into GV. This assumes that these value pass the
|
|
|
|
|
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
|
|
|
|
|
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
|
|
|
|
|
GlobalVariable *GV) {
|
|
|
|
|
while (!Alloc->use_empty()) {
|
|
|
|
|
Instruction *U = cast<Instruction>(*Alloc->user_begin());
|
|
|
|
|
Instruction *InsertPt = U;
|
|
|
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
|
|
|
|
|
// If this is the store of the allocation into the global, remove it.
|
|
|
|
|
if (SI->getOperand(1) == GV) {
|
|
|
|
|
SI->eraseFromParent();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} else if (PHINode *PN = dyn_cast<PHINode>(U)) {
|
|
|
|
|
// Insert the load in the corresponding predecessor, not right before the
|
|
|
|
|
// PHI.
|
|
|
|
|
InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
|
|
|
|
|
} else if (isa<BitCastInst>(U)) {
|
|
|
|
|
// Must be bitcast between the malloc and store to initialize the global.
|
|
|
|
|
ReplaceUsesOfMallocWithGlobal(U, GV);
|
|
|
|
|
U->eraseFromParent();
|
|
|
|
|
continue;
|
|
|
|
|
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
|
|
|
|
|
// If this is a "GEP bitcast" and the user is a store to the global, then
|
|
|
|
|
// just process it as a bitcast.
|
|
|
|
|
if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
|
|
|
|
|
if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
|
|
|
|
|
if (SI->getOperand(1) == GV) {
|
|
|
|
|
// Must be bitcast GEP between the malloc and store to initialize
|
|
|
|
|
// the global.
|
|
|
|
|
ReplaceUsesOfMallocWithGlobal(GEPI, GV);
|
|
|
|
|
GEPI->eraseFromParent();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Insert a load from the global, and use it instead of the malloc.
|
|
|
|
|
Value *NL =
|
|
|
|
|
new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
|
|
|
|
|
U->replaceUsesOfWith(Alloc, NL);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
|
|
|
|
|
/// perform heap SRA on. This permits GEP's that index through the array and
|
|
|
|
|
/// struct field, icmps of null, and PHIs.
|
|
|
|
|
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
|
|
|
|
|
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
|
|
|
|
|
SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
|
|
|
|
|
// We permit two users of the load: setcc comparing against the null
|
|
|
|
|
// pointer, and a getelementptr of a specific form.
|
|
|
|
|
for (const User *U : V->users()) {
|
|
|
|
|
const Instruction *UI = cast<Instruction>(U);
|
|
|
|
|
|
|
|
|
|
// Comparison against null is ok.
|
|
|
|
|
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
|
|
|
|
|
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
|
|
|
|
|
return false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// getelementptr is also ok, but only a simple form.
|
|
|
|
|
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
|
|
|
|
|
// Must index into the array and into the struct.
|
|
|
|
|
if (GEPI->getNumOperands() < 3)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// Otherwise the GEP is ok.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
|
|
|
|
|
if (!LoadUsingPHIsPerLoad.insert(PN).second)
|
|
|
|
|
// This means some phi nodes are dependent on each other.
|
|
|
|
|
// Avoid infinite looping!
|
|
|
|
|
return false;
|
|
|
|
|
if (!LoadUsingPHIs.insert(PN).second)
|
|
|
|
|
// If we have already analyzed this PHI, then it is safe.
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Make sure all uses of the PHI are simple enough to transform.
|
|
|
|
|
if (!LoadUsesSimpleEnoughForHeapSRA(PN,
|
|
|
|
|
LoadUsingPHIs, LoadUsingPHIsPerLoad))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Otherwise we don't know what this is, not ok.
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// If all users of values loaded from GV are simple enough to perform HeapSRA,
|
|
|
|
|
/// return true.
|
|
|
|
|
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
|
|
|
|
|
Instruction *StoredVal) {
|
|
|
|
|
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
|
|
|
|
|
SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
|
|
|
|
|
for (const User *U : GV->users())
|
|
|
|
|
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
|
|
|
|
|
if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
|
|
|
|
|
LoadUsingPHIsPerLoad))
|
|
|
|
|
return false;
|
|
|
|
|
LoadUsingPHIsPerLoad.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we reach here, we know that all uses of the loads and transitive uses
|
|
|
|
|
// (through PHI nodes) are simple enough to transform. However, we don't know
|
|
|
|
|
// that all inputs the to the PHI nodes are in the same equivalence sets.
|
|
|
|
|
// Check to verify that all operands of the PHIs are either PHIS that can be
|
|
|
|
|
// transformed, loads from GV, or MI itself.
|
|
|
|
|
for (const PHINode *PN : LoadUsingPHIs) {
|
|
|
|
|
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
|
|
|
|
|
Value *InVal = PN->getIncomingValue(op);
|
|
|
|
|
|
|
|
|
|
// PHI of the stored value itself is ok.
|
|
|
|
|
if (InVal == StoredVal) continue;
|
|
|
|
|
|
|
|
|
|
if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
|
|
|
|
|
// One of the PHIs in our set is (optimistically) ok.
|
|
|
|
|
if (LoadUsingPHIs.count(InPN))
|
|
|
|
|
continue;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load from GV is ok.
|
|
|
|
|
if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
|
|
|
|
|
if (LI->getOperand(0) == GV)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// UNDEF? NULL?
|
|
|
|
|
|
|
|
|
|
// Anything else is rejected.
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
|
|
|
|
|
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
|
|
|
|
|
std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
|
|
|
|
|
std::vector<Value *> &FieldVals = InsertedScalarizedValues[V];
|
|
|
|
|
|
|
|
|
|
if (FieldNo >= FieldVals.size())
|
|
|
|
|
FieldVals.resize(FieldNo+1);
|
|
|
|
|
|
|
|
|
|
// If we already have this value, just reuse the previously scalarized
|
|
|
|
|
// version.
|
|
|
|
|
if (Value *FieldVal = FieldVals[FieldNo])
|
|
|
|
|
return FieldVal;
|
|
|
|
|
|
|
|
|
|
// Depending on what instruction this is, we have several cases.
|
|
|
|
|
Value *Result;
|
|
|
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
|
|
|
|
|
// This is a scalarized version of the load from the global. Just create
|
|
|
|
|
// a new Load of the scalarized global.
|
|
|
|
|
Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
|
|
|
|
|
InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
Result = new LoadInst(V->getType()->getPointerElementType(), V,
|
|
|
|
|
LI->getName() + ".f" + Twine(FieldNo), LI);
|
|
|
|
|
} else {
|
|
|
|
|
PHINode *PN = cast<PHINode>(V);
|
|
|
|
|
// PN's type is pointer to struct. Make a new PHI of pointer to struct
|
|
|
|
|
// field.
|
|
|
|
|
|
|
|
|
|
PointerType *PTy = cast<PointerType>(PN->getType());
|
|
|
|
|
StructType *ST = cast<StructType>(PTy->getElementType());
|
|
|
|
|
|
|
|
|
|
unsigned AS = PTy->getAddressSpace();
|
|
|
|
|
PHINode *NewPN =
|
|
|
|
|
PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
|
|
|
|
|
PN->getNumIncomingValues(),
|
|
|
|
|
PN->getName()+".f"+Twine(FieldNo), PN);
|
|
|
|
|
Result = NewPN;
|
|
|
|
|
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return FieldVals[FieldNo] = Result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Given a load instruction and a value derived from the load, rewrite the
|
|
|
|
|
/// derived value to use the HeapSRoA'd load.
|
|
|
|
|
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
|
|
|
|
|
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
|
|
|
|
|
std::vector<std::pair<PHINode *, unsigned>> &PHIsToRewrite) {
|
|
|
|
|
// If this is a comparison against null, handle it.
|
|
|
|
|
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
|
|
|
|
|
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
|
|
|
|
|
// If we have a setcc of the loaded pointer, we can use a setcc of any
|
|
|
|
|
// field.
|
|
|
|
|
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
|
|
|
|
|
InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
|
|
|
|
|
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
|
|
|
|
|
Constant::getNullValue(NPtr->getType()),
|
|
|
|
|
SCI->getName());
|
|
|
|
|
SCI->replaceAllUsesWith(New);
|
|
|
|
|
SCI->eraseFromParent();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
|
|
|
|
|
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
|
|
|
|
|
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
|
|
|
|
|
&& "Unexpected GEPI!");
|
|
|
|
|
|
|
|
|
|
// Load the pointer for this field.
|
|
|
|
|
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
|
|
|
|
|
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
|
|
|
|
|
InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
|
|
|
|
|
// Create the new GEP idx vector.
|
|
|
|
|
SmallVector<Value*, 8> GEPIdx;
|
|
|
|
|
GEPIdx.push_back(GEPI->getOperand(1));
|
|
|
|
|
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
|
|
|
|
|
|
|
|
|
|
Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
|
|
|
|
|
GEPI->getName(), GEPI);
|
|
|
|
|
GEPI->replaceAllUsesWith(NGEPI);
|
|
|
|
|
GEPI->eraseFromParent();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Recursively transform the users of PHI nodes. This will lazily create the
|
|
|
|
|
// PHIs that are needed for individual elements. Keep track of what PHIs we
|
|
|
|
|
// see in InsertedScalarizedValues so that we don't get infinite loops (very
|
|
|
|
|
// antisocial). If the PHI is already in InsertedScalarizedValues, it has
|
|
|
|
|
// already been seen first by another load, so its uses have already been
|
|
|
|
|
// processed.
|
|
|
|
|
PHINode *PN = cast<PHINode>(LoadUser);
|
|
|
|
|
if (!InsertedScalarizedValues.insert(std::make_pair(PN,
|
|
|
|
|
std::vector<Value *>())).second)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// If this is the first time we've seen this PHI, recursively process all
|
|
|
|
|
// users.
|
|
|
|
|
for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
|
|
|
|
|
Instruction *User = cast<Instruction>(*UI++);
|
|
|
|
|
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// We are performing Heap SRoA on a global. Ptr is a value loaded from the
|
|
|
|
|
/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead.
|
|
|
|
|
/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
|
|
|
|
|
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
|
|
|
|
|
DenseMap<Value *, std::vector<Value *>> &InsertedScalarizedValues,
|
|
|
|
|
std::vector<std::pair<PHINode *, unsigned> > &PHIsToRewrite) {
|
|
|
|
|
for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
|
|
|
|
|
Instruction *User = cast<Instruction>(*UI++);
|
|
|
|
|
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Load->use_empty()) {
|
|
|
|
|
Load->eraseFromParent();
|
|
|
|
|
InsertedScalarizedValues.erase(Load);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// CI is an allocation of an array of structures. Break it up into multiple
|
|
|
|
|
/// allocations of arrays of the fields.
|
|
|
|
|
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
|
|
|
|
|
Value *NElems, const DataLayout &DL,
|
|
|
|
|
const TargetLibraryInfo *TLI) {
|
|
|
|
|
LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI
|
|
|
|
|
<< '\n');
|
|
|
|
|
Type *MAT = getMallocAllocatedType(CI, TLI);
|
|
|
|
|
StructType *STy = cast<StructType>(MAT);
|
|
|
|
|
|
|
|
|
|
// There is guaranteed to be at least one use of the malloc (storing
|
|
|
|
|
// it into GV). If there are other uses, change them to be uses of
|
|
|
|
|
// the global to simplify later code. This also deletes the store
|
|
|
|
|
// into GV.
|
|
|
|
|
ReplaceUsesOfMallocWithGlobal(CI, GV);
|
|
|
|
|
|
|
|
|
|
// Okay, at this point, there are no users of the malloc. Insert N
|
|
|
|
|
// new mallocs at the same place as CI, and N globals.
|
|
|
|
|
std::vector<Value *> FieldGlobals;
|
|
|
|
|
std::vector<Value *> FieldMallocs;
|
|
|
|
|
|
|
|
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
|
|
|
|
CI->getOperandBundlesAsDefs(OpBundles);
|
|
|
|
|
|
|
|
|
|
unsigned AS = GV->getType()->getPointerAddressSpace();
|
|
|
|
|
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
|
|
|
|
|
Type *FieldTy = STy->getElementType(FieldNo);
|
|
|
|
|
PointerType *PFieldTy = PointerType::get(FieldTy, AS);
|
|
|
|
|
|
|
|
|
|
GlobalVariable *NGV = new GlobalVariable(
|
|
|
|
|
*GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
|
|
|
|
|
Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
|
|
|
|
|
nullptr, GV->getThreadLocalMode());
|
|
|
|
|
NGV->copyAttributesFrom(GV);
|
|
|
|
|
FieldGlobals.push_back(NGV);
|
|
|
|
|
|
|
|
|
|
unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
|
|
|
|
|
if (StructType *ST = dyn_cast<StructType>(FieldTy))
|
|
|
|
|
TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
|
|
|
|
|
Type *IntPtrTy = DL.getIntPtrType(CI->getType());
|
|
|
|
|
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
|
|
|
|
|
ConstantInt::get(IntPtrTy, TypeSize),
|
|
|
|
|
NElems, OpBundles, nullptr,
|
|
|
|
|
CI->getName() + ".f" + Twine(FieldNo));
|
|
|
|
|
FieldMallocs.push_back(NMI);
|
|
|
|
|
new StoreInst(NMI, NGV, CI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The tricky aspect of this transformation is handling the case when malloc
|
|
|
|
|
// fails. In the original code, malloc failing would set the result pointer
|
|
|
|
|
// of malloc to null. In this case, some mallocs could succeed and others
|
|
|
|
|
// could fail. As such, we emit code that looks like this:
|
|
|
|
|
// F0 = malloc(field0)
|
|
|
|
|
// F1 = malloc(field1)
|
|
|
|
|
// F2 = malloc(field2)
|
|
|
|
|
// if (F0 == 0 || F1 == 0 || F2 == 0) {
|
|
|
|
|
// if (F0) { free(F0); F0 = 0; }
|
|
|
|
|
// if (F1) { free(F1); F1 = 0; }
|
|
|
|
|
// if (F2) { free(F2); F2 = 0; }
|
|
|
|
|
// }
|
|
|
|
|
// The malloc can also fail if its argument is too large.
|
|
|
|
|
Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
|
|
|
|
|
Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
|
|
|
|
|
ConstantZero, "isneg");
|
|
|
|
|
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
|
|
|
|
|
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
|
|
|
|
|
Constant::getNullValue(FieldMallocs[i]->getType()),
|
|
|
|
|
"isnull");
|
|
|
|
|
RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Split the basic block at the old malloc.
|
|
|
|
|
BasicBlock *OrigBB = CI->getParent();
|
|
|
|
|
BasicBlock *ContBB =
|
|
|
|
|
OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
|
|
|
|
|
|
|
|
|
|
// Create the block to check the first condition. Put all these blocks at the
|
|
|
|
|
// end of the function as they are unlikely to be executed.
|
|
|
|
|
BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
|
|
|
|
|
"malloc_ret_null",
|
|
|
|
|
OrigBB->getParent());
|
|
|
|
|
|
|
|
|
|
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
|
|
|
|
|
// branch on RunningOr.
|
|
|
|
|
OrigBB->getTerminator()->eraseFromParent();
|
|
|
|
|
BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
|
|
|
|
|
|
|
|
|
|
// Within the NullPtrBlock, we need to emit a comparison and branch for each
|
|
|
|
|
// pointer, because some may be null while others are not.
|
|
|
|
|
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
|
|
|
|
|
Value *GVVal =
|
|
|
|
|
new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
|
|
|
|
|
FieldGlobals[i], "tmp", NullPtrBlock);
|
|
|
|
|
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
|
|
|
|
|
Constant::getNullValue(GVVal->getType()));
|
|
|
|
|
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
|
|
|
|
|
OrigBB->getParent());
|
|
|
|
|
BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
|
|
|
|
|
OrigBB->getParent());
|
|
|
|
|
Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
|
|
|
|
|
Cmp, NullPtrBlock);
|
|
|
|
|
|
|
|
|
|
// Fill in FreeBlock.
|
|
|
|
|
CallInst::CreateFree(GVVal, OpBundles, BI);
|
|
|
|
|
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
|
|
|
|
|
FreeBlock);
|
|
|
|
|
BranchInst::Create(NextBlock, FreeBlock);
|
|
|
|
|
|
|
|
|
|
NullPtrBlock = NextBlock;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BranchInst::Create(ContBB, NullPtrBlock);
|
|
|
|
|
|
|
|
|
|
// CI is no longer needed, remove it.
|
|
|
|
|
CI->eraseFromParent();
|
|
|
|
|
|
|
|
|
|
/// As we process loads, if we can't immediately update all uses of the load,
|
|
|
|
|
/// keep track of what scalarized loads are inserted for a given load.
|
|
|
|
|
DenseMap<Value *, std::vector<Value *>> InsertedScalarizedValues;
|
|
|
|
|
InsertedScalarizedValues[GV] = FieldGlobals;
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<PHINode *, unsigned>> PHIsToRewrite;
|
|
|
|
|
|
|
|
|
|
// Okay, the malloc site is completely handled. All of the uses of GV are now
|
|
|
|
|
// loads, and all uses of those loads are simple. Rewrite them to use loads
|
|
|
|
|
// of the per-field globals instead.
|
|
|
|
|
for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
|
|
|
|
|
Instruction *User = cast<Instruction>(*UI++);
|
|
|
|
|
|
|
|
|
|
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
|
|
|
|
|
RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Must be a store of null.
|
|
|
|
|
StoreInst *SI = cast<StoreInst>(User);
|
|
|
|
|
assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
|
|
|
|
|
"Unexpected heap-sra user!");
|
|
|
|
|
|
|
|
|
|
// Insert a store of null into each global.
|
|
|
|
|
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
|
|
|
|
|
Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
|
|
|
|
|
Constant *Null = Constant::getNullValue(ValTy);
|
|
|
|
|
new StoreInst(Null, FieldGlobals[i], SI);
|
|
|
|
|
}
|
|
|
|
|
// Erase the original store.
|
|
|
|
|
SI->eraseFromParent();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// While we have PHIs that are interesting to rewrite, do it.
|
|
|
|
|
while (!PHIsToRewrite.empty()) {
|
|
|
|
|
PHINode *PN = PHIsToRewrite.back().first;
|
|
|
|
|
unsigned FieldNo = PHIsToRewrite.back().second;
|
|
|
|
|
PHIsToRewrite.pop_back();
|
|
|
|
|
PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
|
|
|
|
|
assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
|
|
|
|
|
|
|
|
|
|
// Add all the incoming values. This can materialize more phis.
|
|
|
|
|
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
|
|
|
|
Value *InVal = PN->getIncomingValue(i);
|
|
|
|
|
InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
|
|
|
|
|
PHIsToRewrite);
|
|
|
|
|
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Drop all inter-phi links and any loads that made it this far.
|
|
|
|
|
for (DenseMap<Value *, std::vector<Value *>>::iterator
|
|
|
|
|
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(I->first))
|
|
|
|
|
PN->dropAllReferences();
|
|
|
|
|
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
|
|
|
|
|
LI->dropAllReferences();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Delete all the phis and loads now that inter-references are dead.
|
|
|
|
|
for (DenseMap<Value *, std::vector<Value *>>::iterator
|
|
|
|
|
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
if (PHINode *PN = dyn_cast<PHINode>(I->first))
|
|
|
|
|
PN->eraseFromParent();
|
|
|
|
|
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
|
|
|
|
|
LI->eraseFromParent();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The old global is now dead, remove it.
|
|
|
|
|
GV->eraseFromParent();
|
|
|
|
|
|
|
|
|
|
++NumHeapSRA;
|
|
|
|
|
return cast<GlobalVariable>(FieldGlobals[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// This function is called when we see a pointer global variable with a single
|
|
|
|
|
/// value stored it that is a malloc or cast of malloc.
|
|
|
|
|
static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
|
|
|
|
@ -1520,11 +1044,9 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
|
|
|
|
|
|
|
|
|
|
// We can't optimize this if the malloc itself is used in a complex way,
|
|
|
|
|
// for example, being stored into multiple globals. This allows the
|
|
|
|
|
// malloc to be stored into the specified global, loaded icmp'd, and
|
|
|
|
|
// GEP'd. These are all things we could transform to using the global
|
|
|
|
|
// for.
|
|
|
|
|
SmallPtrSet<const PHINode*, 8> PHIs;
|
|
|
|
|
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
|
|
|
|
|
// malloc to be stored into the specified global, loaded icmp'd.
|
|
|
|
|
// These are all things we could transform to using the global for.
|
|
|
|
|
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If we have a global that is only initialized with a fixed size malloc,
|
|
|
|
@ -1545,54 +1067,6 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If the allocation is an array of structures, consider transforming this
|
|
|
|
|
// into multiple malloc'd arrays, one for each field. This is basically
|
|
|
|
|
// SRoA for malloc'd memory.
|
|
|
|
|
|
|
|
|
|
if (Ordering != AtomicOrdering::NotAtomic)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If this is an allocation of a fixed size array of structs, analyze as a
|
|
|
|
|
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
|
|
|
|
|
if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
|
|
|
|
|
if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
|
|
|
|
|
AllocTy = AT->getElementType();
|
|
|
|
|
|
|
|
|
|
StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
|
|
|
|
|
if (!AllocSTy)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// This the structure has an unreasonable number of fields, leave it
|
|
|
|
|
// alone.
|
|
|
|
|
if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
|
|
|
|
|
AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
|
|
|
|
|
|
|
|
|
|
// If this is a fixed size array, transform the Malloc to be an alloc of
|
|
|
|
|
// structs. malloc [100 x struct],1 -> malloc struct, 100
|
|
|
|
|
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
|
|
|
|
|
Type *IntPtrTy = DL.getIntPtrType(CI->getType());
|
|
|
|
|
unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
|
|
|
|
|
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
|
|
|
|
|
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
|
|
|
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
|
|
|
|
CI->getOperandBundlesAsDefs(OpBundles);
|
|
|
|
|
Instruction *Malloc =
|
|
|
|
|
CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
|
|
|
|
|
OpBundles, nullptr, CI->getName());
|
|
|
|
|
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
|
|
|
|
|
CI->replaceAllUsesWith(Cast);
|
|
|
|
|
CI->eraseFromParent();
|
|
|
|
|
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
|
|
|
|
|
CI = cast<CallInst>(BCI->getOperand(0));
|
|
|
|
|
else
|
|
|
|
|
CI = cast<CallInst>(Malloc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
|
|
|
|
|
TLI);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|