2013-01-29 13:07:18 +08:00
|
|
|
//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
|
2013-01-29 11:03:03 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// \file
|
|
|
|
/// This file defines late ObjC ARC optimizations. ARC stands for Automatic
|
|
|
|
/// Reference Counting and is a system for managing reference counts for objects
|
|
|
|
/// in Objective C.
|
|
|
|
///
|
2013-02-07 12:12:57 +08:00
|
|
|
/// This specific file mainly deals with ``contracting'' multiple lower level
|
|
|
|
/// operations into singular higher level operations through pattern matching.
|
|
|
|
///
|
2013-01-29 11:03:03 +08:00
|
|
|
/// WARNING: This file knows about certain library functions. It recognizes them
|
|
|
|
/// by name, and hardwires knowledge of their semantics.
|
|
|
|
///
|
|
|
|
/// WARNING: This file knows about how certain Objective-C library functions are
|
|
|
|
/// used. Naive LLVM IR transformations which would otherwise be
|
|
|
|
/// behavior-preserving may break these assumptions.
|
|
|
|
///
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// TODO: ObjCARCContract could insert PHI nodes when uses aren't
|
|
|
|
// dominated by single calls.
|
|
|
|
|
2013-07-06 09:39:26 +08:00
|
|
|
#include "ARCRuntimeEntryPoints.h"
|
2013-01-29 11:03:03 +08:00
|
|
|
#include "DependencyAnalysis.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "ObjCARC.h"
|
2013-01-29 12:20:52 +08:00
|
|
|
#include "ProvenanceAnalysis.h"
|
2013-01-29 11:03:03 +08:00
|
|
|
#include "llvm/ADT/Statistic.h"
|
2014-01-13 17:26:24 +08:00
|
|
|
#include "llvm/IR/Dominators.h"
|
2013-01-29 11:03:03 +08:00
|
|
|
#include "llvm/IR/InlineAsm.h"
|
|
|
|
#include "llvm/IR/Operator.h"
|
2013-01-29 12:51:59 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2015-03-24 03:32:43 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::objcarc;
|
2014-04-22 10:55:47 +08:00
|
|
|
|
|
|
|
#define DEBUG_TYPE "objc-arc-contract"
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
|
|
|
|
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
|
|
|
|
|
2015-02-19 08:42:27 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Declarations
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2013-01-29 11:03:03 +08:00
|
|
|
namespace {
|
|
|
|
/// \brief Late ARC optimizations
|
|
|
|
///
|
|
|
|
/// These change the IR in a way that makes it difficult to be analyzed by
|
|
|
|
/// ObjCARCOpt, so it's run late.
|
|
|
|
class ObjCARCContract : public FunctionPass {
|
|
|
|
bool Changed;
|
|
|
|
AliasAnalysis *AA;
|
|
|
|
DominatorTree *DT;
|
|
|
|
ProvenanceAnalysis PA;
|
2013-07-06 09:39:26 +08:00
|
|
|
ARCRuntimeEntryPoints EP;
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
/// A flag indicating whether this optimization pass should run.
|
|
|
|
bool Run;
|
|
|
|
|
|
|
|
/// The inline asm string to insert between calls and RetainRV calls to make
|
|
|
|
/// the optimization work on targets which need it.
|
2016-01-28 03:05:08 +08:00
|
|
|
const MDString *RVInstMarker;
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
/// The set of inserted objc_storeStrong calls. If at the end of walking the
|
|
|
|
/// function we have found no alloca instructions, these calls can be marked
|
|
|
|
/// "tail".
|
|
|
|
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
|
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
/// Returns true if we eliminated Inst.
|
|
|
|
bool tryToPeepholeInstruction(Function &F, Instruction *Inst,
|
|
|
|
inst_iterator &Iter,
|
|
|
|
SmallPtrSetImpl<Instruction *> &DepInsts,
|
|
|
|
SmallPtrSetImpl<const BasicBlock *> &Visited,
|
|
|
|
bool &TailOkForStoreStrong);
|
|
|
|
|
2015-02-19 08:42:27 +08:00
|
|
|
bool optimizeRetainCall(Function &F, Instruction *Retain);
|
2013-04-29 14:53:53 +08:00
|
|
|
|
2015-02-19 08:42:27 +08:00
|
|
|
bool
|
|
|
|
contractAutorelease(Function &F, Instruction *Autorelease,
|
2015-02-20 03:51:32 +08:00
|
|
|
ARCInstKind Class,
|
2015-02-19 08:42:27 +08:00
|
|
|
SmallPtrSetImpl<Instruction *> &DependingInstructions,
|
|
|
|
SmallPtrSetImpl<const BasicBlock *> &Visited);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:34 +08:00
|
|
|
void tryToContractReleaseIntoStoreStrong(Instruction *Release,
|
|
|
|
inst_iterator &Iter);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2014-03-05 17:10:37 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
|
|
bool doInitialization(Module &M) override;
|
|
|
|
bool runOnFunction(Function &F) override;
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
ObjCARCContract() : FunctionPass(ID) {
|
|
|
|
initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
|
|
|
};
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:27 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2013-04-29 14:53:53 +08:00
|
|
|
/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
|
|
|
|
/// return value. We do this late so we do not disrupt the dataflow analysis in
|
|
|
|
/// ObjCARCOpt.
|
2015-02-19 08:42:27 +08:00
|
|
|
bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
|
2015-02-19 08:42:38 +08:00
|
|
|
ImmutableCallSite CS(GetArgRCIdentityRoot(Retain));
|
2013-04-29 14:53:53 +08:00
|
|
|
const Instruction *Call = CS.getInstruction();
|
|
|
|
if (!Call)
|
|
|
|
return false;
|
|
|
|
if (Call->getParent() != Retain->getParent())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check that the call is next to the retain.
|
2015-10-20 07:20:14 +08:00
|
|
|
BasicBlock::const_iterator I = ++Call->getIterator();
|
|
|
|
while (IsNoopInstruction(&*I))
|
|
|
|
++I;
|
2013-04-29 14:53:53 +08:00
|
|
|
if (&*I != Retain)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Turn it to an objc_retainAutoreleasedReturnValue.
|
|
|
|
Changed = true;
|
|
|
|
++NumPeeps;
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "Transforming objc_retain => "
|
|
|
|
"objc_retainAutoreleasedReturnValue since the operand is a "
|
|
|
|
"return value.\nOld: "<< *Retain << "\n");
|
|
|
|
|
|
|
|
// We do not have to worry about tail calls/does not throw since
|
|
|
|
// retain/retainRV have the same properties.
|
2015-03-16 15:02:24 +08:00
|
|
|
Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
|
2013-07-06 09:39:26 +08:00
|
|
|
cast<CallInst>(Retain)->setCalledFunction(Decl);
|
2013-04-29 14:53:53 +08:00
|
|
|
|
|
|
|
DEBUG(dbgs() << "New: " << *Retain << "\n");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-01-29 11:03:03 +08:00
|
|
|
/// Merge an autorelease with a retain into a fused call.
|
2015-02-19 08:42:27 +08:00
|
|
|
bool ObjCARCContract::contractAutorelease(
|
2015-02-20 03:51:32 +08:00
|
|
|
Function &F, Instruction *Autorelease, ARCInstKind Class,
|
2015-02-19 08:42:27 +08:00
|
|
|
SmallPtrSetImpl<Instruction *> &DependingInstructions,
|
|
|
|
SmallPtrSetImpl<const BasicBlock *> &Visited) {
|
2015-02-19 08:42:38 +08:00
|
|
|
const Value *Arg = GetArgRCIdentityRoot(Autorelease);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
// Check that there are no instructions between the retain and the autorelease
|
|
|
|
// (such as an autorelease_pop) which may change the count.
|
2014-04-25 13:29:35 +08:00
|
|
|
CallInst *Retain = nullptr;
|
2015-02-20 03:51:32 +08:00
|
|
|
if (Class == ARCInstKind::AutoreleaseRV)
|
2013-01-29 11:03:03 +08:00
|
|
|
FindDependencies(RetainAutoreleaseRVDep, Arg,
|
|
|
|
Autorelease->getParent(), Autorelease,
|
|
|
|
DependingInstructions, Visited, PA);
|
|
|
|
else
|
|
|
|
FindDependencies(RetainAutoreleaseDep, Arg,
|
|
|
|
Autorelease->getParent(), Autorelease,
|
|
|
|
DependingInstructions, Visited, PA);
|
|
|
|
|
|
|
|
Visited.clear();
|
|
|
|
if (DependingInstructions.size() != 1) {
|
|
|
|
DependingInstructions.clear();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
|
|
|
|
DependingInstructions.clear();
|
|
|
|
|
2015-02-20 03:51:32 +08:00
|
|
|
if (!Retain || GetBasicARCInstKind(Retain) != ARCInstKind::Retain ||
|
2015-02-19 08:42:38 +08:00
|
|
|
GetArgRCIdentityRoot(Retain) != Arg)
|
2013-01-29 11:03:03 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
Changed = true;
|
|
|
|
++NumPeeps;
|
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
DEBUG(dbgs() << " Fusing retain/autorelease!\n"
|
|
|
|
" Autorelease:" << *Autorelease << "\n"
|
|
|
|
" Retain: " << *Retain << "\n");
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-20 03:51:32 +08:00
|
|
|
Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
|
2015-03-16 15:02:24 +08:00
|
|
|
? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
|
|
|
|
: ARCRuntimeEntryPointKind::RetainAutorelease);
|
2013-07-06 09:39:26 +08:00
|
|
|
Retain->setCalledFunction(Decl);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n");
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
EraseInstruction(Autorelease);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
|
|
|
|
Instruction *Release,
|
|
|
|
ProvenanceAnalysis &PA,
|
|
|
|
AliasAnalysis *AA) {
|
2014-04-25 13:29:35 +08:00
|
|
|
StoreInst *Store = nullptr;
|
2013-01-29 11:03:03 +08:00
|
|
|
bool SawRelease = false;
|
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// Get the location associated with Load.
|
2015-06-17 15:18:54 +08:00
|
|
|
MemoryLocation Loc = MemoryLocation::get(Load);
|
2016-05-27 10:13:53 +08:00
|
|
|
auto *LocPtr = Loc.Ptr->stripPointerCasts();
|
2015-02-20 08:02:49 +08:00
|
|
|
|
|
|
|
// Walk down to find the store and the release, which may be in either order.
|
|
|
|
for (auto I = std::next(BasicBlock::iterator(Load)),
|
|
|
|
E = Load->getParent()->end();
|
|
|
|
I != E; ++I) {
|
|
|
|
// If we found the store we were looking for and saw the release,
|
|
|
|
// break. There is no more work to be done.
|
|
|
|
if (Store && SawRelease)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Now we know that we have not seen either the store or the release. If I
|
2015-06-19 09:53:21 +08:00
|
|
|
// is the release, mark that we saw the release and continue.
|
2015-02-20 08:02:49 +08:00
|
|
|
Instruction *Inst = &*I;
|
2013-01-29 11:03:03 +08:00
|
|
|
if (Inst == Release) {
|
|
|
|
SawRelease = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// Otherwise, we check if Inst is a "good" store. Grab the instruction class
|
|
|
|
// of Inst.
|
2015-02-20 03:51:32 +08:00
|
|
|
ARCInstKind Class = GetBasicARCInstKind(Inst);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// If Inst is an unrelated retain, we don't care about it.
|
|
|
|
//
|
|
|
|
// TODO: This is one area where the optimization could be made more
|
|
|
|
// aggressive.
|
2013-01-29 11:03:03 +08:00
|
|
|
if (IsRetain(Class))
|
|
|
|
continue;
|
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// If we have seen the store, but not the release...
|
2013-01-29 11:03:03 +08:00
|
|
|
if (Store) {
|
2015-02-20 08:02:49 +08:00
|
|
|
// We need to make sure that it is safe to move the release from its
|
|
|
|
// current position to the store. This implies proving that any
|
|
|
|
// instruction in between Store and the Release conservatively can not use
|
|
|
|
// the RCIdentityRoot of Release. If we can prove we can ignore Inst, so
|
|
|
|
// continue...
|
|
|
|
if (!CanUse(Inst, Load, PA, Class)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, be conservative and return nullptr.
|
|
|
|
return nullptr;
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
2015-02-20 08:02:49 +08:00
|
|
|
|
|
|
|
// Ok, now we know we have not seen a store yet. See if Inst can write to
|
|
|
|
// our load location, if it can not, just ignore the instruction.
|
2015-07-23 07:15:57 +08:00
|
|
|
if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod))
|
2015-02-20 08:02:49 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
Store = dyn_cast<StoreInst>(Inst);
|
|
|
|
|
|
|
|
// If Inst can, then check if Inst is a simple store. If Inst is not a
|
|
|
|
// store or a store that is not simple, then we have some we do not
|
|
|
|
// understand writing to this memory implying we can not move the load
|
|
|
|
// over the write to any subsequent store that we may find.
|
|
|
|
if (!Store || !Store->isSimple())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Then make sure that the pointer we are storing to is Ptr. If so, we
|
|
|
|
// found our Store!
|
2016-05-27 10:13:53 +08:00
|
|
|
if (Store->getPointerOperand()->stripPointerCasts() == LocPtr)
|
2015-02-20 08:02:49 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
// Otherwise, we have an unknown store to some other ptr that clobbers
|
|
|
|
// Loc.Ptr. Bail!
|
|
|
|
return nullptr;
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// If we did not find the store or did not see the release, fail.
|
|
|
|
if (!Store || !SawRelease)
|
|
|
|
return nullptr;
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-20 08:02:49 +08:00
|
|
|
// We succeeded!
|
|
|
|
return Store;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Instruction *
|
|
|
|
findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
|
|
|
|
Instruction *Release,
|
|
|
|
ProvenanceAnalysis &PA) {
|
|
|
|
// Walk up from the Store to find the retain.
|
2015-10-20 07:20:14 +08:00
|
|
|
BasicBlock::iterator I = Store->getIterator();
|
2015-02-20 08:02:49 +08:00
|
|
|
BasicBlock::iterator Begin = Store->getParent()->begin();
|
2015-10-20 07:20:14 +08:00
|
|
|
while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) {
|
2015-02-20 08:02:49 +08:00
|
|
|
Instruction *Inst = &*I;
|
|
|
|
|
|
|
|
// It is only safe to move the retain to the store if we can prove
|
|
|
|
// conservatively that nothing besides the release can decrement reference
|
|
|
|
// counts in between the retain and the store.
|
|
|
|
if (CanDecrementRefCount(Inst, New, PA) && Inst != Release)
|
|
|
|
return nullptr;
|
2013-01-29 11:03:03 +08:00
|
|
|
--I;
|
2015-02-20 08:02:49 +08:00
|
|
|
}
|
2015-10-20 07:20:14 +08:00
|
|
|
Instruction *Retain = &*I;
|
2015-02-20 03:51:32 +08:00
|
|
|
if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
|
2015-02-20 08:02:49 +08:00
|
|
|
return nullptr;
|
|
|
|
if (GetArgRCIdentityRoot(Retain) != New)
|
|
|
|
return nullptr;
|
|
|
|
return Retain;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Attempt to merge an objc_release with a store, load, and objc_retain to form
|
|
|
|
/// an objc_storeStrong. An objc_storeStrong:
|
|
|
|
///
|
|
|
|
/// objc_storeStrong(i8** %old_ptr, i8* new_value)
|
|
|
|
///
|
|
|
|
/// is equivalent to the following IR sequence:
|
|
|
|
///
|
|
|
|
/// ; Load old value.
|
|
|
|
/// %old_value = load i8** %old_ptr (1)
|
|
|
|
///
|
|
|
|
/// ; Increment the new value and then release the old value. This must occur
|
|
|
|
/// ; in order in case old_value releases new_value in its destructor causing
|
|
|
|
/// ; us to potentially have a dangling ptr.
|
|
|
|
/// tail call i8* @objc_retain(i8* %new_value) (2)
|
|
|
|
/// tail call void @objc_release(i8* %old_value) (3)
|
|
|
|
///
|
|
|
|
/// ; Store the new_value into old_ptr
|
|
|
|
/// store i8* %new_value, i8** %old_ptr (4)
|
|
|
|
///
|
|
|
|
/// The safety of this optimization is based around the following
|
|
|
|
/// considerations:
|
|
|
|
///
|
|
|
|
/// 1. We are forming the store strong at the store. Thus to perform this
|
|
|
|
/// optimization it must be safe to move the retain, load, and release to
|
|
|
|
/// (4).
|
|
|
|
/// 2. We need to make sure that any re-orderings of (1), (2), (3), (4) are
|
|
|
|
/// safe.
|
|
|
|
void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
|
|
|
|
inst_iterator &Iter) {
|
|
|
|
// See if we are releasing something that we just loaded.
|
|
|
|
auto *Load = dyn_cast<LoadInst>(GetArgRCIdentityRoot(Release));
|
|
|
|
if (!Load || !Load->isSimple())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// For now, require everything to be in one basic block.
|
|
|
|
BasicBlock *BB = Release->getParent();
|
|
|
|
if (Load->getParent() != BB)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// First scan down the BB from Load, looking for a store of the RCIdentityRoot
|
|
|
|
// of Load's
|
|
|
|
StoreInst *Store =
|
|
|
|
findSafeStoreForStoreStrongContraction(Load, Release, PA, AA);
|
|
|
|
// If we fail, bail.
|
|
|
|
if (!Store)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Then find what new_value's RCIdentity Root is.
|
|
|
|
Value *New = GetRCIdentityRoot(Store->getValueOperand());
|
|
|
|
|
|
|
|
// Then walk up the BB and look for a retain on New without any intervening
|
|
|
|
// instructions which conservatively might decrement ref counts.
|
|
|
|
Instruction *Retain =
|
|
|
|
findRetainForStoreStrongContraction(New, Store, Release, PA);
|
|
|
|
|
|
|
|
// If we fail, bail.
|
|
|
|
if (!Retain)
|
2015-02-20 03:51:32 +08:00
|
|
|
return;
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
Changed = true;
|
|
|
|
++NumStoreStrongs;
|
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
DEBUG(
|
|
|
|
llvm::dbgs() << " Contracting retain, release into objc_storeStrong.\n"
|
|
|
|
<< " Old:\n"
|
|
|
|
<< " Store: " << *Store << "\n"
|
|
|
|
<< " Release: " << *Release << "\n"
|
|
|
|
<< " Retain: " << *Retain << "\n"
|
|
|
|
<< " Load: " << *Load << "\n");
|
|
|
|
|
2013-01-29 11:03:03 +08:00
|
|
|
LLVMContext &C = Release->getContext();
|
|
|
|
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
|
|
|
|
Type *I8XX = PointerType::getUnqual(I8X);
|
|
|
|
|
|
|
|
Value *Args[] = { Load->getPointerOperand(), New };
|
|
|
|
if (Args[0]->getType() != I8XX)
|
|
|
|
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
|
|
|
|
if (Args[1]->getType() != I8X)
|
|
|
|
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
|
2015-03-16 15:02:24 +08:00
|
|
|
Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
|
2013-07-06 09:39:26 +08:00
|
|
|
CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
|
2013-01-29 11:03:03 +08:00
|
|
|
StoreStrong->setDoesNotThrow();
|
|
|
|
StoreStrong->setDebugLoc(Store->getDebugLoc());
|
|
|
|
|
|
|
|
// We can't set the tail flag yet, because we haven't yet determined
|
|
|
|
// whether there are any escaping allocas. Remember this call, so that
|
|
|
|
// we can set the tail flag once we know it's safe.
|
|
|
|
StoreStrongCalls.insert(StoreStrong);
|
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
DEBUG(llvm::dbgs() << " New Store Strong: " << *StoreStrong << "\n");
|
|
|
|
|
2017-04-05 11:44:09 +08:00
|
|
|
if (&*Iter == Retain) ++Iter;
|
2013-01-29 11:03:03 +08:00
|
|
|
if (&*Iter == Store) ++Iter;
|
|
|
|
Store->eraseFromParent();
|
|
|
|
Release->eraseFromParent();
|
|
|
|
EraseInstruction(Retain);
|
|
|
|
if (Load->use_empty())
|
|
|
|
Load->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
bool ObjCARCContract::tryToPeepholeInstruction(
|
|
|
|
Function &F, Instruction *Inst, inst_iterator &Iter,
|
|
|
|
SmallPtrSetImpl<Instruction *> &DependingInsts,
|
|
|
|
SmallPtrSetImpl<const BasicBlock *> &Visited,
|
|
|
|
bool &TailOkForStoreStrongs) {
|
2013-01-29 11:03:03 +08:00
|
|
|
// Only these library routines return their argument. In particular,
|
|
|
|
// objc_retainBlock does not necessarily return its argument.
|
2015-02-20 03:51:32 +08:00
|
|
|
ARCInstKind Class = GetBasicARCInstKind(Inst);
|
2013-01-29 11:03:03 +08:00
|
|
|
switch (Class) {
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::FusedRetainAutorelease:
|
|
|
|
case ARCInstKind::FusedRetainAutoreleaseRV:
|
2015-02-19 08:42:30 +08:00
|
|
|
return false;
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::Autorelease:
|
|
|
|
case ARCInstKind::AutoreleaseRV:
|
2015-02-19 08:42:30 +08:00
|
|
|
return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::Retain:
|
2013-04-29 14:53:53 +08:00
|
|
|
// Attempt to convert retains to retainrvs if they are next to function
|
|
|
|
// calls.
|
2015-02-19 08:42:27 +08:00
|
|
|
if (!optimizeRetainCall(F, Inst))
|
2015-02-19 08:42:30 +08:00
|
|
|
return false;
|
2013-04-29 14:53:53 +08:00
|
|
|
// If we succeed in our optimization, fall through.
|
2016-08-17 13:10:15 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
2016-01-28 03:05:08 +08:00
|
|
|
case ARCInstKind::RetainRV:
|
|
|
|
case ARCInstKind::ClaimRV: {
|
2013-01-29 11:03:03 +08:00
|
|
|
// If we're compiling for a target which needs a special inline-asm
|
2016-01-28 03:05:08 +08:00
|
|
|
// marker to do the return value optimization, insert it now.
|
|
|
|
if (!RVInstMarker)
|
2015-02-19 08:42:30 +08:00
|
|
|
return false;
|
2015-10-20 07:20:14 +08:00
|
|
|
BasicBlock::iterator BBI = Inst->getIterator();
|
2013-01-29 11:03:03 +08:00
|
|
|
BasicBlock *InstParent = Inst->getParent();
|
|
|
|
|
2016-01-28 03:05:08 +08:00
|
|
|
// Step up to see if the call immediately precedes the RV call.
|
2013-01-29 11:03:03 +08:00
|
|
|
// If it's an invoke, we have to cross a block boundary. And we have
|
|
|
|
// to carefully dodge no-op instructions.
|
|
|
|
do {
|
2016-02-22 04:39:50 +08:00
|
|
|
if (BBI == InstParent->begin()) {
|
2013-01-29 11:03:03 +08:00
|
|
|
BasicBlock *Pred = InstParent->getSinglePredecessor();
|
|
|
|
if (!Pred)
|
|
|
|
goto decline_rv_optimization;
|
2015-10-20 07:20:14 +08:00
|
|
|
BBI = Pred->getTerminator()->getIterator();
|
2013-01-29 11:03:03 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
--BBI;
|
2015-10-20 07:20:14 +08:00
|
|
|
} while (IsNoopInstruction(&*BBI));
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:38 +08:00
|
|
|
if (&*BBI == GetArgRCIdentityRoot(Inst)) {
|
2016-01-28 03:05:08 +08:00
|
|
|
DEBUG(dbgs() << "Adding inline asm marker for the return value "
|
|
|
|
"optimization.\n");
|
2013-01-29 11:03:03 +08:00
|
|
|
Changed = true;
|
2016-01-28 03:05:08 +08:00
|
|
|
InlineAsm *IA = InlineAsm::get(
|
|
|
|
FunctionType::get(Type::getVoidTy(Inst->getContext()),
|
|
|
|
/*isVarArg=*/false),
|
|
|
|
RVInstMarker->getString(),
|
|
|
|
/*Constraints=*/"", /*hasSideEffects=*/true);
|
2013-01-29 11:03:03 +08:00
|
|
|
CallInst::Create(IA, "", Inst);
|
|
|
|
}
|
|
|
|
decline_rv_optimization:
|
2015-02-19 08:42:30 +08:00
|
|
|
return false;
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::InitWeak: {
|
2013-01-29 11:03:03 +08:00
|
|
|
// objc_initWeak(p, null) => *p = null
|
|
|
|
CallInst *CI = cast<CallInst>(Inst);
|
2013-03-25 17:27:43 +08:00
|
|
|
if (IsNullOrUndef(CI->getArgOperand(1))) {
|
2013-01-29 11:03:03 +08:00
|
|
|
Value *Null =
|
|
|
|
ConstantPointerNull::get(cast<PointerType>(CI->getType()));
|
|
|
|
Changed = true;
|
|
|
|
new StoreInst(Null, CI->getArgOperand(0), CI);
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
|
|
|
|
<< " New = " << *Null << "\n");
|
|
|
|
|
|
|
|
CI->replaceAllUsesWith(Null);
|
|
|
|
CI->eraseFromParent();
|
|
|
|
}
|
2015-02-19 08:42:30 +08:00
|
|
|
return true;
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::Release:
|
2015-02-19 08:42:34 +08:00
|
|
|
// Try to form an objc store strong from our release. If we fail, there is
|
|
|
|
// nothing further to do below, so continue.
|
|
|
|
tryToContractReleaseIntoStoreStrong(Inst, Iter);
|
2015-02-19 08:42:30 +08:00
|
|
|
return true;
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::User:
|
2013-01-29 11:03:03 +08:00
|
|
|
// Be conservative if the function has any alloca instructions.
|
|
|
|
// Technically we only care about escaping alloca instructions,
|
|
|
|
// but this is sufficient to handle some interesting cases.
|
|
|
|
if (isa<AllocaInst>(Inst))
|
|
|
|
TailOkForStoreStrongs = false;
|
2015-02-19 08:42:30 +08:00
|
|
|
return true;
|
2015-02-20 03:51:32 +08:00
|
|
|
case ARCInstKind::IntrinsicUser:
|
2013-03-23 05:38:36 +08:00
|
|
|
// Remove calls to @clang.arc.use(...).
|
|
|
|
Inst->eraseFromParent();
|
2015-02-19 08:42:30 +08:00
|
|
|
return true;
|
2013-01-29 11:03:03 +08:00
|
|
|
default:
|
2015-02-19 08:42:30 +08:00
|
|
|
return true;
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
2015-02-19 08:42:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Top Level Driver
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
bool ObjCARCContract::runOnFunction(Function &F) {
|
|
|
|
if (!EnableARCOpts)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If nothing in the Module uses ARC, don't do anything.
|
|
|
|
if (!Run)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Changed = false;
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
2015-02-19 08:42:30 +08:00
|
|
|
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
|
|
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:30 +08:00
|
|
|
DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
|
|
|
|
|
|
|
|
// Track whether it's ok to mark objc_storeStrong calls with the "tail"
|
|
|
|
// keyword. Be conservative if the function has variadic arguments.
|
|
|
|
// It seems that functions which "return twice" are also unsafe for the
|
|
|
|
// "tail" argument, because they are setjmp, which could need to
|
|
|
|
// return to an earlier stack state.
|
|
|
|
bool TailOkForStoreStrongs =
|
|
|
|
!F.isVarArg() && !F.callsFunctionThatReturnsTwice();
|
|
|
|
|
|
|
|
// For ObjC library calls which return their argument, replace uses of the
|
|
|
|
// argument with uses of the call return value, if it dominates the use. This
|
|
|
|
// reduces register pressure.
|
|
|
|
SmallPtrSet<Instruction *, 4> DependingInstructions;
|
|
|
|
SmallPtrSet<const BasicBlock *, 4> Visited;
|
|
|
|
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
|
|
|
|
Instruction *Inst = &*I++;
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
|
|
|
|
|
|
|
|
// First try to peephole Inst. If there is nothing further we can do in
|
|
|
|
// terms of undoing objc-arc-expand, process the next inst.
|
|
|
|
if (tryToPeepholeInstruction(F, Inst, I, DependingInstructions, Visited,
|
|
|
|
TailOkForStoreStrongs))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Otherwise, try to undo objc-arc-expand.
|
2013-01-29 11:03:03 +08:00
|
|
|
|
2015-02-19 08:42:38 +08:00
|
|
|
// Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
|
2013-01-29 11:03:03 +08:00
|
|
|
// and such; to do the replacement, the argument must have type i8*.
|
2015-02-19 08:42:30 +08:00
|
|
|
|
2016-09-14 07:43:11 +08:00
|
|
|
// Function for replacing uses of Arg dominated by Inst.
|
|
|
|
auto ReplaceArgUses = [Inst, this](Value *Arg) {
|
2013-01-29 11:03:03 +08:00
|
|
|
// If we're compiling bugpointed code, don't get in trouble.
|
|
|
|
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
|
2016-09-14 07:43:11 +08:00
|
|
|
return;
|
|
|
|
|
2013-01-29 11:03:03 +08:00
|
|
|
// Look through the uses of the pointer.
|
2014-03-09 11:16:01 +08:00
|
|
|
for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
|
2013-01-29 11:03:03 +08:00
|
|
|
UI != UE; ) {
|
2014-03-09 11:16:01 +08:00
|
|
|
// Increment UI now, because we may unlink its element.
|
|
|
|
Use &U = *UI++;
|
|
|
|
unsigned OperandNo = U.getOperandNo();
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
// If the call's return value dominates a use of the call's argument
|
|
|
|
// value, rewrite the use to use the return value. We check for
|
|
|
|
// reachability here because an unreachable call is considered to
|
|
|
|
// trivially dominate itself, which would lead us to rewriting its
|
|
|
|
// argument in terms of its return value, which would lead to
|
2015-02-19 08:42:38 +08:00
|
|
|
// infinite loops in GetArgRCIdentityRoot.
|
2013-01-29 11:03:03 +08:00
|
|
|
if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
|
|
|
|
Changed = true;
|
|
|
|
Instruction *Replacement = Inst;
|
|
|
|
Type *UseTy = U.get()->getType();
|
|
|
|
if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
|
|
|
|
// For PHI nodes, insert the bitcast in the predecessor block.
|
|
|
|
unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
|
|
|
|
BasicBlock *BB = PHI->getIncomingBlock(ValNo);
|
|
|
|
if (Replacement->getType() != UseTy)
|
|
|
|
Replacement = new BitCastInst(Replacement, UseTy, "",
|
|
|
|
&BB->back());
|
|
|
|
// While we're here, rewrite all edges for this PHI, rather
|
|
|
|
// than just one use at a time, to minimize the number of
|
|
|
|
// bitcasts we emit.
|
|
|
|
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
|
|
|
|
if (PHI->getIncomingBlock(i) == BB) {
|
|
|
|
// Keep the UI iterator valid.
|
2014-03-19 06:32:43 +08:00
|
|
|
if (UI != UE &&
|
|
|
|
&PHI->getOperandUse(
|
|
|
|
PHINode::getOperandNumForIncomingValue(i)) == &*UI)
|
2013-01-29 11:03:03 +08:00
|
|
|
++UI;
|
|
|
|
PHI->setIncomingValue(i, Replacement);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (Replacement->getType() != UseTy)
|
|
|
|
Replacement = new BitCastInst(Replacement, UseTy, "",
|
|
|
|
cast<Instruction>(U.getUser()));
|
|
|
|
U.set(Replacement);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-09-14 07:43:11 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-09-14 07:53:43 +08:00
|
|
|
Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
|
|
|
|
Value *OrigArg = Arg;
|
2016-09-14 07:43:11 +08:00
|
|
|
|
|
|
|
// TODO: Change this to a do-while.
|
|
|
|
for (;;) {
|
|
|
|
ReplaceArgUses(Arg);
|
2013-01-29 11:03:03 +08:00
|
|
|
|
|
|
|
// If Arg is a no-op casted pointer, strip one level of casts and iterate.
|
|
|
|
if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
|
|
|
|
Arg = BI->getOperand(0);
|
|
|
|
else if (isa<GEPOperator>(Arg) &&
|
|
|
|
cast<GEPOperator>(Arg)->hasAllZeroIndices())
|
|
|
|
Arg = cast<GEPOperator>(Arg)->getPointerOperand();
|
|
|
|
else if (isa<GlobalAlias>(Arg) &&
|
Don't IPO over functions that can be de-refined
Summary:
Fixes PR26774.
If you're aware of the issue, feel free to skip the "Motivation"
section and jump directly to "This patch".
Motivation:
I define "refinement" as discarding behaviors from a program that the
optimizer has license to discard. So transforming:
```
void f(unsigned x) {
unsigned t = 5 / x;
(void)t;
}
```
to
```
void f(unsigned x) { }
```
is refinement, since the behavior went from "if x == 0 then undefined
else nothing" to "nothing" (the optimizer has license to discard
undefined behavior).
Refinement is a fundamental aspect of many mid-level optimizations done
by LLVM. For instance, transforming `x == (x + 1)` to `false` also
involves refinement since the expression's value went from "if x is
`undef` then { `true` or `false` } else { `false` }" to "`false`" (by
definition, the optimizer has license to fold `undef` to any non-`undef`
value).
Unfortunately, refinement implies that the optimizer cannot assume
that the implementation of a function it can see has all of the
behavior an unoptimized or a differently optimized version of the same
function can have. This is a problem for functions with comdat
linkage, where a function can be replaced by an unoptimized or a
differently optimized version of the same source level function.
For instance, FunctionAttrs cannot assume a comdat function is
actually `readnone` even if it does not have any loads or stores in
it; since there may have been loads and stores in the "original
function" that were refined out in the currently visible variant, and
at the link step the linker may in fact choose an implementation with
a load or a store. As an example, consider a function that does two
atomic loads from the same memory location, and writes to memory only
if the two values are not equal. The optimizer is allowed to refine
this function by first CSE'ing the two loads, and the folding the
comparision to always report that the two values are equal. Such a
refined variant will look like it is `readonly`. However, the
unoptimized version of the function can still write to memory (since
the two loads //can// result in different values), and selecting the
unoptimized version at link time will retroactively invalidate
transforms we may have done under the assumption that the function
does not write to memory.
Note: this is not just a problem with atomics or with linking
differently optimized object files. See PR26774 for more realistic
examples that involved neither.
This patch:
This change introduces a new set of linkage types, predicated as
`GlobalValue::mayBeDerefined` that returns true if the linkage type
allows a function to be replaced by a differently optimized variant at
link time. It then changes a set of IPO passes to bail out if they see
such a function.
Reviewers: chandlerc, hfinkel, dexonsmith, joker.eph, rnk
Subscribers: mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D18634
llvm-svn: 265762
2016-04-08 08:48:30 +08:00
|
|
|
!cast<GlobalAlias>(Arg)->isInterposable())
|
2013-01-29 11:03:03 +08:00
|
|
|
Arg = cast<GlobalAlias>(Arg)->getAliasee();
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
2016-09-14 07:43:11 +08:00
|
|
|
|
|
|
|
// Replace bitcast users of Arg that are dominated by Inst.
|
|
|
|
SmallVector<BitCastInst *, 2> BitCastUsers;
|
|
|
|
|
|
|
|
// Add all bitcast users of the function argument first.
|
|
|
|
for (User *U : OrigArg->users())
|
|
|
|
if (auto *BC = dyn_cast<BitCastInst>(U))
|
|
|
|
BitCastUsers.push_back(BC);
|
|
|
|
|
|
|
|
// Replace the bitcasts with the call return. Iterate until list is empty.
|
|
|
|
while (!BitCastUsers.empty()) {
|
|
|
|
auto *BC = BitCastUsers.pop_back_val();
|
|
|
|
for (User *U : BC->users())
|
|
|
|
if (auto *B = dyn_cast<BitCastInst>(U))
|
|
|
|
BitCastUsers.push_back(B);
|
|
|
|
|
|
|
|
ReplaceArgUses(BC);
|
|
|
|
}
|
2013-01-29 11:03:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If this function has no escaping allocas or suspicious vararg usage,
|
|
|
|
// objc_storeStrong calls can be marked with the "tail" keyword.
|
|
|
|
if (TailOkForStoreStrongs)
|
2014-08-25 07:23:06 +08:00
|
|
|
for (CallInst *CI : StoreStrongCalls)
|
|
|
|
CI->setTailCall();
|
2013-01-29 11:03:03 +08:00
|
|
|
StoreStrongCalls.clear();
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|
2015-02-19 08:42:27 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Misc Pass Manager
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
char ObjCARCContract::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
|
|
|
|
"ObjC ARC contraction", false, false)
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
2015-02-19 08:42:27 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
|
|
|
|
"ObjC ARC contraction", false, false)
|
|
|
|
|
|
|
|
void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
|
[PM/AA] Rebuild LLVM's alias analysis infrastructure in a way compatible
with the new pass manager, and no longer relying on analysis groups.
This builds essentially a ground-up new AA infrastructure stack for
LLVM. The core ideas are the same that are used throughout the new pass
manager: type erased polymorphism and direct composition. The design is
as follows:
- FunctionAAResults is a type-erasing alias analysis results aggregation
interface to walk a single query across a range of results from
different alias analyses. Currently this is function-specific as we
always assume that aliasing queries are *within* a function.
- AAResultBase is a CRTP utility providing stub implementations of
various parts of the alias analysis result concept, notably in several
cases in terms of other more general parts of the interface. This can
be used to implement only a narrow part of the interface rather than
the entire interface. This isn't really ideal, this logic should be
hoisted into FunctionAAResults as currently it will cause
a significant amount of redundant work, but it faithfully models the
behavior of the prior infrastructure.
- All the alias analysis passes are ported to be wrapper passes for the
legacy PM and new-style analysis passes for the new PM with a shared
result object. In some cases (most notably CFL), this is an extremely
naive approach that we should revisit when we can specialize for the
new pass manager.
- BasicAA has been restructured to reflect that it is much more
fundamentally a function analysis because it uses dominator trees and
loop info that need to be constructed for each function.
All of the references to getting alias analysis results have been
updated to use the new aggregation interface. All the preservation and
other pass management code has been updated accordingly.
The way the FunctionAAResultsWrapperPass works is to detect the
available alias analyses when run, and add them to the results object.
This means that we should be able to continue to respect when various
passes are added to the pipeline, for example adding CFL or adding TBAA
passes should just cause their results to be available and to get folded
into this. The exception to this rule is BasicAA which really needs to
be a function pass due to using dominator trees and loop info. As
a consequence, the FunctionAAResultsWrapperPass directly depends on
BasicAA and always includes it in the aggregation.
This has significant implications for preserving analyses. Generally,
most passes shouldn't bother preserving FunctionAAResultsWrapperPass
because rebuilding the results just updates the set of known AA passes.
The exception to this rule are LoopPass instances which need to preserve
all the function analyses that the loop pass manager will end up
needing. This means preserving both BasicAAWrapperPass and the
aggregating FunctionAAResultsWrapperPass.
Now, when preserving an alias analysis, you do so by directly preserving
that analysis. This is only necessary for non-immutable-pass-provided
alias analyses though, and there are only three of interest: BasicAA,
GlobalsAA (formerly GlobalsModRef), and SCEVAA. Usually BasicAA is
preserved when needed because it (like DominatorTree and LoopInfo) is
marked as a CFG-only pass. I've expanded GlobalsAA into the preserved
set everywhere we previously were preserving all of AliasAnalysis, and
I've added SCEVAA in the intersection of that with where we preserve
SCEV itself.
One significant challenge to all of this is that the CGSCC passes were
actually using the alias analysis implementations by taking advantage of
a pretty amazing set of loop holes in the old pass manager's analysis
management code which allowed analysis groups to slide through in many
cases. Moving away from analysis groups makes this problem much more
obvious. To fix it, I've leveraged the flexibility the design of the new
PM components provides to just directly construct the relevant alias
analyses for the relevant functions in the IPO passes that need them.
This is a bit hacky, but should go away with the new pass manager, and
is already in many ways cleaner than the prior state.
Another significant challenge is that various facilities of the old
alias analysis infrastructure just don't fit any more. The most
significant of these is the alias analysis 'counter' pass. That pass
relied on the ability to snoop on AA queries at different points in the
analysis group chain. Instead, I'm planning to build printing
functionality directly into the aggregation layer. I've not included
that in this patch merely to keep it smaller.
Note that all of this needs a nearly complete rewrite of the AA
documentation. I'm planning to do that, but I'd like to make sure the
new design settles, and to flesh out a bit more of what it looks like in
the new pass manager first.
Differential Revision: http://reviews.llvm.org/D12080
llvm-svn: 247167
2015-09-10 01:55:00 +08:00
|
|
|
AU.addRequired<AAResultsWrapperPass>();
|
2015-02-19 08:42:27 +08:00
|
|
|
AU.addRequired<DominatorTreeWrapperPass>();
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
}
|
|
|
|
|
|
|
|
Pass *llvm::createObjCARCContractPass() { return new ObjCARCContract(); }
|
|
|
|
|
|
|
|
bool ObjCARCContract::doInitialization(Module &M) {
|
|
|
|
// If nothing in the Module uses ARC, don't do anything.
|
|
|
|
Run = ModuleHasARC(M);
|
|
|
|
if (!Run)
|
|
|
|
return false;
|
|
|
|
|
2015-03-16 15:02:27 +08:00
|
|
|
EP.init(&M);
|
2015-02-19 08:42:27 +08:00
|
|
|
|
2016-01-28 03:05:08 +08:00
|
|
|
// Initialize RVInstMarker.
|
|
|
|
RVInstMarker = nullptr;
|
2015-02-19 08:42:27 +08:00
|
|
|
if (NamedMDNode *NMD =
|
|
|
|
M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
|
|
|
|
if (NMD->getNumOperands() == 1) {
|
|
|
|
const MDNode *N = NMD->getOperand(0);
|
|
|
|
if (N->getNumOperands() == 1)
|
|
|
|
if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
|
2016-01-28 03:05:08 +08:00
|
|
|
RVInstMarker = S;
|
2015-02-19 08:42:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|