llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp

667 lines
25 KiB
C++
Raw Normal View History

//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file defines late ObjC ARC optimizations. ARC stands for Automatic
/// Reference Counting and is a system for managing reference counts for objects
/// in Objective C.
///
/// This specific file mainly deals with ``contracting'' multiple lower level
/// operations into singular higher level operations through pattern matching.
///
/// WARNING: This file knows about certain library functions. It recognizes them
/// by name, and hardwires knowledge of their semantics.
///
/// WARNING: This file knows about how certain Objective-C library functions are
/// used. Naive LLVM IR transformations which would otherwise be
/// behavior-preserving may break these assumptions.
///
//===----------------------------------------------------------------------===//
// TODO: ObjCARCContract could insert PHI nodes when uses aren't
// dominated by single calls.
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-contract"
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
//===----------------------------------------------------------------------===//
// Declarations
//===----------------------------------------------------------------------===//
namespace {
/// \brief Late ARC optimizations
///
/// These change the IR in a way that makes it difficult to be analyzed by
/// ObjCARCOpt, so it's run late.
class ObjCARCContract : public FunctionPass {
bool Changed;
AliasAnalysis *AA;
DominatorTree *DT;
ProvenanceAnalysis PA;
ARCRuntimeEntryPoints EP;
/// A flag indicating whether this optimization pass should run.
bool Run;
/// The inline asm string to insert between calls and RetainRV calls to make
/// the optimization work on targets which need it.
const MDString *RVInstMarker;
/// The set of inserted objc_storeStrong calls. If at the end of walking the
/// function we have found no alloca instructions, these calls can be marked
/// "tail".
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
/// Returns true if we eliminated Inst.
bool tryToPeepholeInstruction(Function &F, Instruction *Inst,
inst_iterator &Iter,
SmallPtrSetImpl<Instruction *> &DepInsts,
SmallPtrSetImpl<const BasicBlock *> &Visited,
bool &TailOkForStoreStrong);
bool optimizeRetainCall(Function &F, Instruction *Retain);
bool
contractAutorelease(Function &F, Instruction *Autorelease,
ARCInstKind Class,
SmallPtrSetImpl<Instruction *> &DependingInstructions,
SmallPtrSetImpl<const BasicBlock *> &Visited);
void tryToContractReleaseIntoStoreStrong(Instruction *Release,
inst_iterator &Iter);
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
public:
static char ID;
ObjCARCContract() : FunctionPass(ID) {
initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
}
};
}
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
/// return value. We do this late so we do not disrupt the dataflow analysis in
/// ObjCARCOpt.
bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
ImmutableCallSite CS(GetArgRCIdentityRoot(Retain));
const Instruction *Call = CS.getInstruction();
if (!Call)
return false;
if (Call->getParent() != Retain->getParent())
return false;
// Check that the call is next to the retain.
BasicBlock::const_iterator I = ++Call->getIterator();
while (IsNoopInstruction(&*I))
++I;
if (&*I != Retain)
return false;
// Turn it to an objc_retainAutoreleasedReturnValue.
Changed = true;
++NumPeeps;
DEBUG(dbgs() << "Transforming objc_retain => "
"objc_retainAutoreleasedReturnValue since the operand is a "
"return value.\nOld: "<< *Retain << "\n");
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
cast<CallInst>(Retain)->setCalledFunction(Decl);
DEBUG(dbgs() << "New: " << *Retain << "\n");
return true;
}
/// Merge an autorelease with a retain into a fused call.
bool ObjCARCContract::contractAutorelease(
Function &F, Instruction *Autorelease, ARCInstKind Class,
SmallPtrSetImpl<Instruction *> &DependingInstructions,
SmallPtrSetImpl<const BasicBlock *> &Visited) {
const Value *Arg = GetArgRCIdentityRoot(Autorelease);
// Check that there are no instructions between the retain and the autorelease
// (such as an autorelease_pop) which may change the count.
CallInst *Retain = nullptr;
if (Class == ARCInstKind::AutoreleaseRV)
FindDependencies(RetainAutoreleaseRVDep, Arg,
Autorelease->getParent(), Autorelease,
DependingInstructions, Visited, PA);
else
FindDependencies(RetainAutoreleaseDep, Arg,
Autorelease->getParent(), Autorelease,
DependingInstructions, Visited, PA);
Visited.clear();
if (DependingInstructions.size() != 1) {
DependingInstructions.clear();
return false;
}
Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
DependingInstructions.clear();
if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
GetArgRCIdentityRoot(Retain) != Arg)
return false;
Changed = true;
++NumPeeps;
DEBUG(dbgs() << " Fusing retain/autorelease!\n"
" Autorelease:" << *Autorelease << "\n"
" Retain: " << *Retain << "\n");
Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
: ARCRuntimeEntryPointKind::RetainAutorelease);
Retain->setCalledFunction(Decl);
DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n");
EraseInstruction(Autorelease);
return true;
}
static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
Instruction *Release,
ProvenanceAnalysis &PA,
AliasAnalysis *AA) {
StoreInst *Store = nullptr;
bool SawRelease = false;
// Get the location associated with Load.
MemoryLocation Loc = MemoryLocation::get(Load);
auto *LocPtr = Loc.Ptr->stripPointerCasts();
// Walk down to find the store and the release, which may be in either order.
for (auto I = std::next(BasicBlock::iterator(Load)),
E = Load->getParent()->end();
I != E; ++I) {
// If we found the store we were looking for and saw the release,
// break. There is no more work to be done.
if (Store && SawRelease)
break;
// Now we know that we have not seen either the store or the release. If I
// is the release, mark that we saw the release and continue.
Instruction *Inst = &*I;
if (Inst == Release) {
SawRelease = true;
continue;
}
// Otherwise, we check if Inst is a "good" store. Grab the instruction class
// of Inst.
ARCInstKind Class = GetBasicARCInstKind(Inst);
// If Inst is an unrelated retain, we don't care about it.
//
// TODO: This is one area where the optimization could be made more
// aggressive.
if (IsRetain(Class))
continue;
// If we have seen the store, but not the release...
if (Store) {
// We need to make sure that it is safe to move the release from its
// current position to the store. This implies proving that any
// instruction in between Store and the Release conservatively can not use
// the RCIdentityRoot of Release. If we can prove we can ignore Inst, so
// continue...
if (!CanUse(Inst, Load, PA, Class)) {
continue;
}
// Otherwise, be conservative and return nullptr.
return nullptr;
}
// Ok, now we know we have not seen a store yet. See if Inst can write to
// our load location, if it can not, just ignore the instruction.
if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod))
continue;
Store = dyn_cast<StoreInst>(Inst);
// If Inst can, then check if Inst is a simple store. If Inst is not a
// store or a store that is not simple, then we have some we do not
// understand writing to this memory implying we can not move the load
// over the write to any subsequent store that we may find.
if (!Store || !Store->isSimple())
return nullptr;
// Then make sure that the pointer we are storing to is Ptr. If so, we
// found our Store!
if (Store->getPointerOperand()->stripPointerCasts() == LocPtr)
continue;
// Otherwise, we have an unknown store to some other ptr that clobbers
// Loc.Ptr. Bail!
return nullptr;
}
// If we did not find the store or did not see the release, fail.
if (!Store || !SawRelease)
return nullptr;
// We succeeded!
return Store;
}
static Instruction *
findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
Instruction *Release,
ProvenanceAnalysis &PA) {
// Walk up from the Store to find the retain.
BasicBlock::iterator I = Store->getIterator();
BasicBlock::iterator Begin = Store->getParent()->begin();
while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) {
Instruction *Inst = &*I;
// It is only safe to move the retain to the store if we can prove
// conservatively that nothing besides the release can decrement reference
// counts in between the retain and the store.
if (CanDecrementRefCount(Inst, New, PA) && Inst != Release)
return nullptr;
--I;
}
Instruction *Retain = &*I;
if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
return nullptr;
if (GetArgRCIdentityRoot(Retain) != New)
return nullptr;
return Retain;
}
/// Attempt to merge an objc_release with a store, load, and objc_retain to form
/// an objc_storeStrong. An objc_storeStrong:
///
/// objc_storeStrong(i8** %old_ptr, i8* new_value)
///
/// is equivalent to the following IR sequence:
///
/// ; Load old value.
/// %old_value = load i8** %old_ptr (1)
///
/// ; Increment the new value and then release the old value. This must occur
/// ; in order in case old_value releases new_value in its destructor causing
/// ; us to potentially have a dangling ptr.
/// tail call i8* @objc_retain(i8* %new_value) (2)
/// tail call void @objc_release(i8* %old_value) (3)
///
/// ; Store the new_value into old_ptr
/// store i8* %new_value, i8** %old_ptr (4)
///
/// The safety of this optimization is based around the following
/// considerations:
///
/// 1. We are forming the store strong at the store. Thus to perform this
/// optimization it must be safe to move the retain, load, and release to
/// (4).
/// 2. We need to make sure that any re-orderings of (1), (2), (3), (4) are
/// safe.
void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
inst_iterator &Iter) {
// See if we are releasing something that we just loaded.
auto *Load = dyn_cast<LoadInst>(GetArgRCIdentityRoot(Release));
if (!Load || !Load->isSimple())
return;
// For now, require everything to be in one basic block.
BasicBlock *BB = Release->getParent();
if (Load->getParent() != BB)
return;
// First scan down the BB from Load, looking for a store of the RCIdentityRoot
// of Load's
StoreInst *Store =
findSafeStoreForStoreStrongContraction(Load, Release, PA, AA);
// If we fail, bail.
if (!Store)
return;
// Then find what new_value's RCIdentity Root is.
Value *New = GetRCIdentityRoot(Store->getValueOperand());
// Then walk up the BB and look for a retain on New without any intervening
// instructions which conservatively might decrement ref counts.
Instruction *Retain =
findRetainForStoreStrongContraction(New, Store, Release, PA);
// If we fail, bail.
if (!Retain)
return;
Changed = true;
++NumStoreStrongs;
DEBUG(
llvm::dbgs() << " Contracting retain, release into objc_storeStrong.\n"
<< " Old:\n"
<< " Store: " << *Store << "\n"
<< " Release: " << *Release << "\n"
<< " Retain: " << *Retain << "\n"
<< " Load: " << *Load << "\n");
LLVMContext &C = Release->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *I8XX = PointerType::getUnqual(I8X);
Value *Args[] = { Load->getPointerOperand(), New };
if (Args[0]->getType() != I8XX)
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
// We can't set the tail flag yet, because we haven't yet determined
// whether there are any escaping allocas. Remember this call, so that
// we can set the tail flag once we know it's safe.
StoreStrongCalls.insert(StoreStrong);
DEBUG(llvm::dbgs() << " New Store Strong: " << *StoreStrong << "\n");
if (&*Iter == Store) ++Iter;
Store->eraseFromParent();
Release->eraseFromParent();
EraseInstruction(Retain);
if (Load->use_empty())
Load->eraseFromParent();
}
bool ObjCARCContract::tryToPeepholeInstruction(
Function &F, Instruction *Inst, inst_iterator &Iter,
SmallPtrSetImpl<Instruction *> &DependingInsts,
SmallPtrSetImpl<const BasicBlock *> &Visited,
bool &TailOkForStoreStrongs) {
// Only these library routines return their argument. In particular,
// objc_retainBlock does not necessarily return its argument.
ARCInstKind Class = GetBasicARCInstKind(Inst);
switch (Class) {
case ARCInstKind::FusedRetainAutorelease:
case ARCInstKind::FusedRetainAutoreleaseRV:
return false;
case ARCInstKind::Autorelease:
case ARCInstKind::AutoreleaseRV:
return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
case ARCInstKind::Retain:
// Attempt to convert retains to retainrvs if they are next to function
// calls.
if (!optimizeRetainCall(F, Inst))
return false;
// If we succeed in our optimization, fall through.
// FALLTHROUGH
case ARCInstKind::RetainRV:
case ARCInstKind::ClaimRV: {
// If we're compiling for a target which needs a special inline-asm
// marker to do the return value optimization, insert it now.
if (!RVInstMarker)
return false;
BasicBlock::iterator BBI = Inst->getIterator();
BasicBlock *InstParent = Inst->getParent();
// Step up to see if the call immediately precedes the RV call.
// If it's an invoke, we have to cross a block boundary. And we have
// to carefully dodge no-op instructions.
do {
if (BBI == InstParent->begin()) {
BasicBlock *Pred = InstParent->getSinglePredecessor();
if (!Pred)
goto decline_rv_optimization;
BBI = Pred->getTerminator()->getIterator();
break;
}
--BBI;
} while (IsNoopInstruction(&*BBI));
if (&*BBI == GetArgRCIdentityRoot(Inst)) {
DEBUG(dbgs() << "Adding inline asm marker for the return value "
"optimization.\n");
Changed = true;
InlineAsm *IA = InlineAsm::get(
FunctionType::get(Type::getVoidTy(Inst->getContext()),
/*isVarArg=*/false),
RVInstMarker->getString(),
/*Constraints=*/"", /*hasSideEffects=*/true);
CallInst::Create(IA, "", Inst);
}
decline_rv_optimization:
return false;
}
case ARCInstKind::InitWeak: {
// objc_initWeak(p, null) => *p = null
CallInst *CI = cast<CallInst>(Inst);
if (IsNullOrUndef(CI->getArgOperand(1))) {
Value *Null =
ConstantPointerNull::get(cast<PointerType>(CI->getType()));
Changed = true;
new StoreInst(Null, CI->getArgOperand(0), CI);
DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
<< " New = " << *Null << "\n");
CI->replaceAllUsesWith(Null);
CI->eraseFromParent();
}
return true;
}
case ARCInstKind::Release:
// Try to form an objc store strong from our release. If we fail, there is
// nothing further to do below, so continue.
tryToContractReleaseIntoStoreStrong(Inst, Iter);
return true;
case ARCInstKind::User:
// Be conservative if the function has any alloca instructions.
// Technically we only care about escaping alloca instructions,
// but this is sufficient to handle some interesting cases.
if (isa<AllocaInst>(Inst))
TailOkForStoreStrongs = false;
return true;
case ARCInstKind::IntrinsicUser:
// Remove calls to @clang.arc.use(...).
Inst->eraseFromParent();
return true;
default:
return true;
}
}
//===----------------------------------------------------------------------===//
// Top Level Driver
//===----------------------------------------------------------------------===//
bool ObjCARCContract::runOnFunction(Function &F) {
if (!EnableARCOpts)
return false;
// If nothing in the Module uses ARC, don't do anything.
if (!Run)
return false;
Changed = false;
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
// Track whether it's ok to mark objc_storeStrong calls with the "tail"
// keyword. Be conservative if the function has variadic arguments.
// It seems that functions which "return twice" are also unsafe for the
// "tail" argument, because they are setjmp, which could need to
// return to an earlier stack state.
bool TailOkForStoreStrongs =
!F.isVarArg() && !F.callsFunctionThatReturnsTwice();
// For ObjC library calls which return their argument, replace uses of the
// argument with uses of the call return value, if it dominates the use. This
// reduces register pressure.
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
Instruction *Inst = &*I++;
DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
// First try to peephole Inst. If there is nothing further we can do in
// terms of undoing objc-arc-expand, process the next inst.
if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
TailOkForStoreStrongs))
continue;
// Otherwise, try to undo objc-arc-expand.
// Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
// and such; to do the replacement, the argument must have type i8*.
[C++11] Add range based accessors for the Use-Def chain of a Value. This requires a number of steps. 1) Move value_use_iterator into the Value class as an implementation detail 2) Change it to actually be a *Use* iterator rather than a *User* iterator. 3) Add an adaptor which is a User iterator that always looks through the Use to the User. 4) Wrap these in Value::use_iterator and Value::user_iterator typedefs. 5) Add the range adaptors as Value::uses() and Value::users(). 6) Update *all* of the callers to correctly distinguish between whether they wanted a use_iterator (and to explicitly dig out the User when needed), or a user_iterator which makes the Use itself totally opaque. Because #6 requires churning essentially everything that walked the Use-Def chains, I went ahead and added all of the range adaptors and switched them to range-based loops where appropriate. Also because the renaming requires at least churning every line of code, it didn't make any sense to split these up into multiple commits -- all of which would touch all of the same lies of code. The result is still not quite optimal. The Value::use_iterator is a nice regular iterator, but Value::user_iterator is an iterator over User*s rather than over the User objects themselves. As a consequence, it fits a bit awkwardly into the range-based world and it has the weird extra-dereferencing 'operator->' that so many of our iterators have. I think this could be fixed by providing something which transforms a range of T&s into a range of T*s, but that *can* be separated into another patch, and it isn't yet 100% clear whether this is the right move. However, this change gets us most of the benefit and cleans up a substantial amount of code around Use and User. =] llvm-svn: 203364
2014-03-09 11:16:01 +08:00
Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
// TODO: Change this to a do-while.
for (;;) {
// If we're compiling bugpointed code, don't get in trouble.
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
break;
// Look through the uses of the pointer.
[C++11] Add range based accessors for the Use-Def chain of a Value. This requires a number of steps. 1) Move value_use_iterator into the Value class as an implementation detail 2) Change it to actually be a *Use* iterator rather than a *User* iterator. 3) Add an adaptor which is a User iterator that always looks through the Use to the User. 4) Wrap these in Value::use_iterator and Value::user_iterator typedefs. 5) Add the range adaptors as Value::uses() and Value::users(). 6) Update *all* of the callers to correctly distinguish between whether they wanted a use_iterator (and to explicitly dig out the User when needed), or a user_iterator which makes the Use itself totally opaque. Because #6 requires churning essentially everything that walked the Use-Def chains, I went ahead and added all of the range adaptors and switched them to range-based loops where appropriate. Also because the renaming requires at least churning every line of code, it didn't make any sense to split these up into multiple commits -- all of which would touch all of the same lies of code. The result is still not quite optimal. The Value::use_iterator is a nice regular iterator, but Value::user_iterator is an iterator over User*s rather than over the User objects themselves. As a consequence, it fits a bit awkwardly into the range-based world and it has the weird extra-dereferencing 'operator->' that so many of our iterators have. I think this could be fixed by providing something which transforms a range of T&s into a range of T*s, but that *can* be separated into another patch, and it isn't yet 100% clear whether this is the right move. However, this change gets us most of the benefit and cleans up a substantial amount of code around Use and User. =] llvm-svn: 203364
2014-03-09 11:16:01 +08:00
for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
UI != UE; ) {
[C++11] Add range based accessors for the Use-Def chain of a Value. This requires a number of steps. 1) Move value_use_iterator into the Value class as an implementation detail 2) Change it to actually be a *Use* iterator rather than a *User* iterator. 3) Add an adaptor which is a User iterator that always looks through the Use to the User. 4) Wrap these in Value::use_iterator and Value::user_iterator typedefs. 5) Add the range adaptors as Value::uses() and Value::users(). 6) Update *all* of the callers to correctly distinguish between whether they wanted a use_iterator (and to explicitly dig out the User when needed), or a user_iterator which makes the Use itself totally opaque. Because #6 requires churning essentially everything that walked the Use-Def chains, I went ahead and added all of the range adaptors and switched them to range-based loops where appropriate. Also because the renaming requires at least churning every line of code, it didn't make any sense to split these up into multiple commits -- all of which would touch all of the same lies of code. The result is still not quite optimal. The Value::use_iterator is a nice regular iterator, but Value::user_iterator is an iterator over User*s rather than over the User objects themselves. As a consequence, it fits a bit awkwardly into the range-based world and it has the weird extra-dereferencing 'operator->' that so many of our iterators have. I think this could be fixed by providing something which transforms a range of T&s into a range of T*s, but that *can* be separated into another patch, and it isn't yet 100% clear whether this is the right move. However, this change gets us most of the benefit and cleans up a substantial amount of code around Use and User. =] llvm-svn: 203364
2014-03-09 11:16:01 +08:00
// Increment UI now, because we may unlink its element.
Use &U = *UI++;
unsigned OperandNo = U.getOperandNo();
// If the call's return value dominates a use of the call's argument
// value, rewrite the use to use the return value. We check for
// reachability here because an unreachable call is considered to
// trivially dominate itself, which would lead us to rewriting its
// argument in terms of its return value, which would lead to
// infinite loops in GetArgRCIdentityRoot.
if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
Changed = true;
Instruction *Replacement = Inst;
Type *UseTy = U.get()->getType();
if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
// For PHI nodes, insert the bitcast in the predecessor block.
unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
BasicBlock *BB = PHI->getIncomingBlock(ValNo);
if (Replacement->getType() != UseTy)
Replacement = new BitCastInst(Replacement, UseTy, "",
&BB->back());
// While we're here, rewrite all edges for this PHI, rather
// than just one use at a time, to minimize the number of
// bitcasts we emit.
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
if (PHI->getIncomingBlock(i) == BB) {
// Keep the UI iterator valid.
if (UI != UE &&
&PHI->getOperandUse(
PHINode::getOperandNumForIncomingValue(i)) == &*UI)
++UI;
PHI->setIncomingValue(i, Replacement);
}
} else {
if (Replacement->getType() != UseTy)
Replacement = new BitCastInst(Replacement, UseTy, "",
cast<Instruction>(U.getUser()));
U.set(Replacement);
}
}
}
// If Arg is a no-op casted pointer, strip one level of casts and iterate.
if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
Arg = BI->getOperand(0);
else if (isa<GEPOperator>(Arg) &&
cast<GEPOperator>(Arg)->hasAllZeroIndices())
Arg = cast<GEPOperator>(Arg)->getPointerOperand();
else if (isa<GlobalAlias>(Arg) &&
Don't IPO over functions that can be de-refined Summary: Fixes PR26774. If you're aware of the issue, feel free to skip the "Motivation" section and jump directly to "This patch". Motivation: I define "refinement" as discarding behaviors from a program that the optimizer has license to discard. So transforming: ``` void f(unsigned x) { unsigned t = 5 / x; (void)t; } ``` to ``` void f(unsigned x) { } ``` is refinement, since the behavior went from "if x == 0 then undefined else nothing" to "nothing" (the optimizer has license to discard undefined behavior). Refinement is a fundamental aspect of many mid-level optimizations done by LLVM. For instance, transforming `x == (x + 1)` to `false` also involves refinement since the expression's value went from "if x is `undef` then { `true` or `false` } else { `false` }" to "`false`" (by definition, the optimizer has license to fold `undef` to any non-`undef` value). Unfortunately, refinement implies that the optimizer cannot assume that the implementation of a function it can see has all of the behavior an unoptimized or a differently optimized version of the same function can have. This is a problem for functions with comdat linkage, where a function can be replaced by an unoptimized or a differently optimized version of the same source level function. For instance, FunctionAttrs cannot assume a comdat function is actually `readnone` even if it does not have any loads or stores in it; since there may have been loads and stores in the "original function" that were refined out in the currently visible variant, and at the link step the linker may in fact choose an implementation with a load or a store. As an example, consider a function that does two atomic loads from the same memory location, and writes to memory only if the two values are not equal. The optimizer is allowed to refine this function by first CSE'ing the two loads, and the folding the comparision to always report that the two values are equal. Such a refined variant will look like it is `readonly`. However, the unoptimized version of the function can still write to memory (since the two loads //can// result in different values), and selecting the unoptimized version at link time will retroactively invalidate transforms we may have done under the assumption that the function does not write to memory. Note: this is not just a problem with atomics or with linking differently optimized object files. See PR26774 for more realistic examples that involved neither. This patch: This change introduces a new set of linkage types, predicated as `GlobalValue::mayBeDerefined` that returns true if the linkage type allows a function to be replaced by a differently optimized variant at link time. It then changes a set of IPO passes to bail out if they see such a function. Reviewers: chandlerc, hfinkel, dexonsmith, joker.eph, rnk Subscribers: mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D18634 llvm-svn: 265762
2016-04-08 08:48:30 +08:00
!cast<GlobalAlias>(Arg)->isInterposable())
Arg = cast<GlobalAlias>(Arg)->getAliasee();
else
break;
}
}
// If this function has no escaping allocas or suspicious vararg usage,
// objc_storeStrong calls can be marked with the "tail" keyword.
if (TailOkForStoreStrongs)
for (CallInst *CI : StoreStrongCalls)
CI->setTailCall();
StoreStrongCalls.clear();
return Changed;
}
//===----------------------------------------------------------------------===//
// Misc Pass Manager
//===----------------------------------------------------------------------===//
char ObjCARCContract::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
"ObjC ARC contraction", false, false)
void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible with the new pass manager, and no longer relying on analysis groups. This builds essentially a ground-up new AA infrastructure stack for LLVM. The core ideas are the same that are used throughout the new pass manager: type erased polymorphism and direct composition. The design is as follows: - FunctionAAResults is a type-erasing alias analysis results aggregation interface to walk a single query across a range of results from different alias analyses. Currently this is function-specific as we always assume that aliasing queries are *within* a function. - AAResultBase is a CRTP utility providing stub implementations of various parts of the alias analysis result concept, notably in several cases in terms of other more general parts of the interface. This can be used to implement only a narrow part of the interface rather than the entire interface. This isn't really ideal, this logic should be hoisted into FunctionAAResults as currently it will cause a significant amount of redundant work, but it faithfully models the behavior of the prior infrastructure. - All the alias analysis passes are ported to be wrapper passes for the legacy PM and new-style analysis passes for the new PM with a shared result object. In some cases (most notably CFL), this is an extremely naive approach that we should revisit when we can specialize for the new pass manager. - BasicAA has been restructured to reflect that it is much more fundamentally a function analysis because it uses dominator trees and loop info that need to be constructed for each function. All of the references to getting alias analysis results have been updated to use the new aggregation interface. All the preservation and other pass management code has been updated accordingly. The way the FunctionAAResultsWrapperPass works is to detect the available alias analyses when run, and add them to the results object. This means that we should be able to continue to respect when various passes are added to the pipeline, for example adding CFL or adding TBAA passes should just cause their results to be available and to get folded into this. The exception to this rule is BasicAA which really needs to be a function pass due to using dominator trees and loop info. As a consequence, the FunctionAAResultsWrapperPass directly depends on BasicAA and always includes it in the aggregation. This has significant implications for preserving analyses. Generally, most passes shouldn't bother preserving FunctionAAResultsWrapperPass because rebuilding the results just updates the set of known AA passes. The exception to this rule are LoopPass instances which need to preserve all the function analyses that the loop pass manager will end up needing. This means preserving both BasicAAWrapperPass and the aggregating FunctionAAResultsWrapperPass. Now, when preserving an alias analysis, you do so by directly preserving that analysis. This is only necessary for non-immutable-pass-provided alias analyses though, and there are only three of interest: BasicAA, GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is preserved when needed because it (like DominatorTree and LoopInfo) is marked as a CFG-only pass. I've expanded GlobalsAA into the preserved set everywhere we previously were preserving all of AliasAnalysis, and I've added SCEVAA in the intersection of that with where we preserve SCEV itself. One significant challenge to all of this is that the CGSCC passes were actually using the alias analysis implementations by taking advantage of a pretty amazing set of loop holes in the old pass manager's analysis management code which allowed analysis groups to slide through in many cases. Moving away from analysis groups makes this problem much more obvious. To fix it, I've leveraged the flexibility the design of the new PM components provides to just directly construct the relevant alias analyses for the relevant functions in the IPO passes that need them. This is a bit hacky, but should go away with the new pass manager, and is already in many ways cleaner than the prior state. Another significant challenge is that various facilities of the old alias analysis infrastructure just don't fit any more. The most significant of these is the alias analysis 'counter' pass. That pass relied on the ability to snoop on AA queries at different points in the analysis group chain. Instead, I'm planning to build printing functionality directly into the aggregation layer. I've not included that in this patch merely to keep it smaller. Note that all of this needs a nearly complete rewrite of the AA documentation. I'm planning to do that, but I'd like to make sure the new design settles, and to flesh out a bit more of what it looks like in the new pass manager first. Differential Revision: http://reviews.llvm.org/D12080 llvm-svn: 247167
2015-09-10 01:55:00 +08:00
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
Pass *llvm::createObjCARCContractPass() { return new ObjCARCContract(); }
bool ObjCARCContract::doInitialization(Module &M) {
// If nothing in the Module uses ARC, don't do anything.
Run = ModuleHasARC(M);
if (!Run)
return false;
EP.init(&M);
// Initialize RVInstMarker.
RVInstMarker = nullptr;
if (NamedMDNode *NMD =
M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
if (NMD->getNumOperands() == 1) {
const MDNode *N = NMD->getOperand(0);
if (N->getNumOperands() == 1)
if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
RVInstMarker = S;
}
return false;
}