forked from OSchip/llvm-project
parent
456a806692
commit
24d3d4280a
|
@ -36,6 +36,7 @@ namespace {
|
||||||
Statistic<> NumMarked ("globalopt", "Number of globals marked constant");
|
Statistic<> NumMarked ("globalopt", "Number of globals marked constant");
|
||||||
Statistic<> NumSRA ("globalopt", "Number of aggregate globals broken "
|
Statistic<> NumSRA ("globalopt", "Number of aggregate globals broken "
|
||||||
"into scalars");
|
"into scalars");
|
||||||
|
Statistic<> NumHeapSRA ("globalopt", "Number of heap objects SRA'd");
|
||||||
Statistic<> NumSubstitute("globalopt",
|
Statistic<> NumSubstitute("globalopt",
|
||||||
"Number of globals with initializers stored into them");
|
"Number of globals with initializers stored into them");
|
||||||
Statistic<> NumDeleted ("globalopt", "Number of globals deleted");
|
Statistic<> NumDeleted ("globalopt", "Number of globals deleted");
|
||||||
|
@ -794,9 +795,235 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
|
||||||
|
/// somewhere. Transform all uses of the allocation into loads from the
|
||||||
|
/// global and uses of the resultant pointer. Further, delete the store into
|
||||||
|
/// GV. This assumes that these value pass the
|
||||||
|
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
|
||||||
|
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
|
||||||
|
GlobalVariable *GV) {
|
||||||
|
while (!Alloc->use_empty()) {
|
||||||
|
Instruction *U = Alloc->use_back();
|
||||||
|
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
|
||||||
|
// If this is the store of the allocation into the global, remove it.
|
||||||
|
if (SI->getOperand(1) == GV) {
|
||||||
|
SI->eraseFromParent();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a load from the global, and use it instead of the malloc.
|
||||||
|
Value *NL = new LoadInst(GV, GV->getName()+".val", U);
|
||||||
|
U->replaceUsesOfWith(Alloc, NL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
|
||||||
|
/// GV are simple enough to perform HeapSRA, return true.
|
||||||
|
static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
|
||||||
|
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
|
||||||
|
++UI)
|
||||||
|
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
|
||||||
|
// We permit two users of the load: setcc comparing against the null
|
||||||
|
// pointer, and a getelementptr of a specific form.
|
||||||
|
for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E;
|
||||||
|
++UI) {
|
||||||
|
// Comparison against null is ok.
|
||||||
|
if (SetCondInst *SCI = dyn_cast<SetCondInst>(*UI)) {
|
||||||
|
if (!isa<ConstantPointerNull>(SCI->getOperand(1)))
|
||||||
|
return false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// getelementptr is also ok, but only a simple form.
|
||||||
|
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI);
|
||||||
|
if (!GEPI) return false;
|
||||||
|
|
||||||
|
// Must index into the array and into the struct.
|
||||||
|
if (GEPI->getNumOperands() < 3)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Otherwise the GEP is ok.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr
|
||||||
|
/// is a value loaded from the global. Eliminate all uses of Ptr, making them
|
||||||
|
/// use FieldGlobals instead. All uses of loaded values satisfy
|
||||||
|
/// GlobalLoadUsesSimpleEnoughForHeapSRA.
|
||||||
|
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Ptr,
|
||||||
|
const std::vector<GlobalVariable*> &FieldGlobals) {
|
||||||
|
std::vector<Value *> InsertedLoadsForPtr;
|
||||||
|
//InsertedLoadsForPtr.resize(FieldGlobals.size());
|
||||||
|
while (!Ptr->use_empty()) {
|
||||||
|
Instruction *User = Ptr->use_back();
|
||||||
|
|
||||||
|
// If this is a comparison against null, handle it.
|
||||||
|
if (SetCondInst *SCI = dyn_cast<SetCondInst>(User)) {
|
||||||
|
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
|
||||||
|
// If we have a setcc of the loaded pointer, we can use a setcc of any
|
||||||
|
// field.
|
||||||
|
Value *NPtr;
|
||||||
|
if (InsertedLoadsForPtr.empty()) {
|
||||||
|
NPtr = new LoadInst(FieldGlobals[0], Ptr->getName()+".f0", Ptr);
|
||||||
|
InsertedLoadsForPtr.push_back(Ptr);
|
||||||
|
} else {
|
||||||
|
NPtr = InsertedLoadsForPtr.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *New = new SetCondInst(SCI->getOpcode(), NPtr,
|
||||||
|
Constant::getNullValue(NPtr->getType()),
|
||||||
|
SCI->getName(), SCI);
|
||||||
|
SCI->replaceAllUsesWith(New);
|
||||||
|
SCI->eraseFromParent();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, this should be: 'getelementptr Ptr, Idx, uint FieldNo ...'
|
||||||
|
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
|
||||||
|
assert(GEPI->getNumOperands() >= 3 && isa<ConstantUInt>(GEPI->getOperand(2))
|
||||||
|
&& "Unexpected GEPI!");
|
||||||
|
|
||||||
|
// Load the pointer for this field.
|
||||||
|
unsigned FieldNo = cast<ConstantUInt>(GEPI->getOperand(2))->getValue();
|
||||||
|
if (InsertedLoadsForPtr.size() <= FieldNo)
|
||||||
|
InsertedLoadsForPtr.resize(FieldNo+1);
|
||||||
|
if (InsertedLoadsForPtr[FieldNo] == 0)
|
||||||
|
InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo],
|
||||||
|
Ptr->getName()+".f" +
|
||||||
|
utostr(FieldNo), Ptr);
|
||||||
|
Value *NewPtr = InsertedLoadsForPtr[FieldNo];
|
||||||
|
|
||||||
|
// Create the new GEP idx vector.
|
||||||
|
std::vector<Value*> GEPIdx;
|
||||||
|
GEPIdx.push_back(GEPI->getOperand(1));
|
||||||
|
GEPIdx.insert(GEPIdx.end(), GEPI->op_begin()+3, GEPI->op_end());
|
||||||
|
|
||||||
|
Value *NGEPI = new GetElementPtrInst(NewPtr, GEPIdx, GEPI->getName(), GEPI);
|
||||||
|
GEPI->replaceAllUsesWith(NGEPI);
|
||||||
|
GEPI->eraseFromParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break
|
||||||
|
/// it up into multiple allocations of arrays of the fields.
|
||||||
|
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
|
||||||
|
/*DEBUG*/(std::cerr << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI);
|
||||||
|
const StructType *STy = cast<StructType>(MI->getAllocatedType());
|
||||||
|
|
||||||
|
// There is guaranteed to be at least one use of the malloc (storing
|
||||||
|
// it into GV). If there are other uses, change them to be uses of
|
||||||
|
// the global to simplify later code. This also deletes the store
|
||||||
|
// into GV.
|
||||||
|
ReplaceUsesOfMallocWithGlobal(MI, GV);
|
||||||
|
|
||||||
|
// Okay, at this point, there are no users of the malloc. Insert N
|
||||||
|
// new mallocs at the same place as MI, and N globals.
|
||||||
|
std::vector<GlobalVariable*> FieldGlobals;
|
||||||
|
std::vector<MallocInst*> FieldMallocs;
|
||||||
|
|
||||||
|
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
|
||||||
|
const Type *FieldTy = STy->getElementType(FieldNo);
|
||||||
|
const Type *PFieldTy = PointerType::get(FieldTy);
|
||||||
|
|
||||||
|
GlobalVariable *NGV =
|
||||||
|
new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage,
|
||||||
|
Constant::getNullValue(PFieldTy),
|
||||||
|
GV->getName() + ".f" + utostr(FieldNo), GV);
|
||||||
|
FieldGlobals.push_back(NGV);
|
||||||
|
|
||||||
|
MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
|
||||||
|
MI->getName() + ".f" + utostr(FieldNo),MI);
|
||||||
|
FieldMallocs.push_back(NMI);
|
||||||
|
new StoreInst(NMI, NGV, MI);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The tricky aspect of this transformation is handling the case when malloc
|
||||||
|
// fails. In the original code, malloc failing would set the result pointer
|
||||||
|
// of malloc to null. In this case, some mallocs could succeed and others
|
||||||
|
// could fail. As such, we emit code that looks like this:
|
||||||
|
// F0 = malloc(field0)
|
||||||
|
// F1 = malloc(field1)
|
||||||
|
// F2 = malloc(field2)
|
||||||
|
// if (F0 == 0 || F1 == 0 || F2 == 0) {
|
||||||
|
// if (F0) { free(F0); F0 = 0; }
|
||||||
|
// if (F1) { free(F1); F1 = 0; }
|
||||||
|
// if (F2) { free(F2); F2 = 0; }
|
||||||
|
// }
|
||||||
|
Value *RunningOr = 0;
|
||||||
|
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
|
||||||
|
Value *Cond = new SetCondInst(Instruction::SetEQ, FieldMallocs[i],
|
||||||
|
Constant::getNullValue(FieldMallocs[i]->getType()),
|
||||||
|
"isnull", MI);
|
||||||
|
if (!RunningOr)
|
||||||
|
RunningOr = Cond; // First seteq
|
||||||
|
else
|
||||||
|
RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split the basic block at the old malloc.
|
||||||
|
BasicBlock *OrigBB = MI->getParent();
|
||||||
|
BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont");
|
||||||
|
|
||||||
|
// Create the block to check the first condition. Put all these blocks at the
|
||||||
|
// end of the function as they are unlikely to be executed.
|
||||||
|
BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null",
|
||||||
|
OrigBB->getParent());
|
||||||
|
|
||||||
|
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
|
||||||
|
// branch on RunningOr.
|
||||||
|
OrigBB->getTerminator()->eraseFromParent();
|
||||||
|
new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB);
|
||||||
|
|
||||||
|
// Within the NullPtrBlock, we need to emit a comparison and branch for each
|
||||||
|
// pointer, because some may be null while others are not.
|
||||||
|
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
|
||||||
|
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
|
||||||
|
Value *Cmp = new SetCondInst(Instruction::SetNE, GVVal,
|
||||||
|
Constant::getNullValue(GVVal->getType()),
|
||||||
|
"tmp", NullPtrBlock);
|
||||||
|
BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent());
|
||||||
|
BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent());
|
||||||
|
new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock);
|
||||||
|
|
||||||
|
// Fill in FreeBlock.
|
||||||
|
new FreeInst(GVVal, FreeBlock);
|
||||||
|
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
|
||||||
|
FreeBlock);
|
||||||
|
new BranchInst(NextBlock, FreeBlock);
|
||||||
|
|
||||||
|
NullPtrBlock = NextBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
new BranchInst(ContBB, NullPtrBlock);
|
||||||
|
|
||||||
|
|
||||||
|
// MI is no longer needed, remove it.
|
||||||
|
MI->eraseFromParent();
|
||||||
|
|
||||||
|
|
||||||
|
// Okay, the malloc site is completely handled. All of the uses of GV are now
|
||||||
|
// loads, and all uses of those loads are simple. Rewrite them to use loads
|
||||||
|
// of the per-field globals instead.
|
||||||
|
while (!GV->use_empty()) {
|
||||||
|
LoadInst *LI = cast<LoadInst>(GV->use_back());
|
||||||
|
RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals);
|
||||||
|
LI->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The old global is now dead, remove it.
|
||||||
|
GV->eraseFromParent();
|
||||||
|
|
||||||
|
++NumHeapSRA;
|
||||||
|
return FieldGlobals[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
|
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
|
||||||
// that only one value (besides its initializer) is ever stored to the global.
|
// that only one value (besides its initializer) is ever stored to the global.
|
||||||
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
|
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
|
||||||
|
@ -835,23 +1062,52 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
|
||||||
if (!MI->getAllocatedType()->isSized())
|
if (!MI->getAllocatedType()->isSized())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// We can't optimize this global unless all uses of it are *known* to be
|
||||||
|
// of the malloc value, not of the null initializer value (consider a use
|
||||||
|
// that compares the global's value against zero to see if the malloc has
|
||||||
|
// been reached). To do this, we check to see if all uses of the global
|
||||||
|
// would trap if the global were null: this proves that they must all
|
||||||
|
// happen after the malloc.
|
||||||
|
if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We can't optimize this if the malloc itself is used in a complex way,
|
||||||
|
// for example, being stored into multiple globals. This allows the
|
||||||
|
// malloc to be stored into the specified global, loaded setcc'd, and
|
||||||
|
// GEP'd. These are all things we could transform to using the global
|
||||||
|
// for.
|
||||||
|
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
|
||||||
// If we have a global that is only initialized with a fixed size malloc,
|
// If we have a global that is only initialized with a fixed size malloc,
|
||||||
// and if all users of the malloc trap, and if the malloc'd address is not
|
// transform the program to use global memory instead of malloc'd memory.
|
||||||
// put anywhere else, transform the program to use global memory instead
|
// This eliminates dynamic allocation, avoids an indirection accessing the
|
||||||
// of malloc'd memory. This eliminates dynamic allocation (good) and
|
// data, and exposes the resultant global to further GlobalOpt.
|
||||||
// exposes the resultant global to further GlobalOpt (even better). Note
|
|
||||||
// that we restrict this transformation to only working on small
|
|
||||||
// allocations (2048 bytes currently), as we don't want to introduce a 16M
|
|
||||||
// global or something.
|
|
||||||
if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
|
if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
|
||||||
|
// Restrict this transformation to only working on small allocations
|
||||||
|
// (2048 bytes currently), as we don't want to introduce a 16M global or
|
||||||
|
// something.
|
||||||
if (NElements->getRawValue()*
|
if (NElements->getRawValue()*
|
||||||
TD.getTypeSize(MI->getAllocatedType()) < 2048 &&
|
TD.getTypeSize(MI->getAllocatedType()) < 2048) {
|
||||||
AllUsesOfLoadedValueWillTrapIfNull(GV) &&
|
|
||||||
ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) {
|
|
||||||
GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
|
GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the allocation is an array of structures, consider transforming this
|
||||||
|
// into multiple malloc'd arrays, one for each field. This is basically
|
||||||
|
// SRoA for malloc'd memory.
|
||||||
|
if (const StructType *AllocTy =
|
||||||
|
dyn_cast<StructType>(MI->getAllocatedType())) {
|
||||||
|
// This the structure has an unreasonable number of fields, leave it
|
||||||
|
// alone.
|
||||||
|
if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
|
||||||
|
GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) {
|
||||||
|
GVI = PerformHeapAllocSRoA(GV, MI);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue