forked from OSchip/llvm-project
Switch GVN and memdep to use PHITransAddr, which correctly handles
phi translation of complex expressions like &A[i+1]. This has the following benefits: 1. The phi translation logic is all contained in its own class with a strong interface and verification that it is self consistent. 2. The logic is more correct than before. Previously, if intermediate expressions got PHI translated, we'd miss the update and scan for the wrong pointers in predecessor blocks. @phi_trans2 is a testcase for this. 3. We have a lot less code in memdep. We can handle phi translation across blocks of things like @phi_trans3, which is pretty insane :). This patch should fix the miscompiles of 255.vortex, and I tested it with a bootstrap of llvm-gcc, llvm-test and dejagnu of course. llvm-svn: 90926
This commit is contained in:
parent
d938faff4b
commit
972e6d8d00
|
@ -31,6 +31,7 @@ namespace llvm {
|
||||||
class MemoryDependenceAnalysis;
|
class MemoryDependenceAnalysis;
|
||||||
class PredIteratorCache;
|
class PredIteratorCache;
|
||||||
class DominatorTree;
|
class DominatorTree;
|
||||||
|
class PHITransAddr;
|
||||||
|
|
||||||
/// MemDepResult - A memory dependence query can return one of three different
|
/// MemDepResult - A memory dependence query can return one of three different
|
||||||
/// answers, described below.
|
/// answers, described below.
|
||||||
|
@ -245,29 +246,6 @@ namespace llvm {
|
||||||
BasicBlock *BB,
|
BasicBlock *BB,
|
||||||
SmallVectorImpl<NonLocalDepEntry> &Result);
|
SmallVectorImpl<NonLocalDepEntry> &Result);
|
||||||
|
|
||||||
/// GetPHITranslatedValue - Find an available version of the specified value
|
|
||||||
/// PHI translated across the specified edge. If MemDep isn't able to
|
|
||||||
/// satisfy this request, it returns null.
|
|
||||||
Value *GetPHITranslatedValue(Value *V,
|
|
||||||
BasicBlock *CurBB, BasicBlock *PredBB,
|
|
||||||
const TargetData *TD) const;
|
|
||||||
|
|
||||||
/// GetAvailablePHITranslatedValue - Return the value computed by
|
|
||||||
/// PHITranslatePointer if it dominates PredBB, otherwise return null.
|
|
||||||
Value *GetAvailablePHITranslatedValue(Value *V,
|
|
||||||
BasicBlock *CurBB, BasicBlock *PredBB,
|
|
||||||
const TargetData *TD,
|
|
||||||
const DominatorTree &DT) const;
|
|
||||||
|
|
||||||
/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
|
|
||||||
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
|
|
||||||
/// block. All newly created instructions are added to the NewInsts list.
|
|
||||||
Value *InsertPHITranslatedPointer(Value *V,
|
|
||||||
BasicBlock *CurBB, BasicBlock *PredBB,
|
|
||||||
const TargetData *TD,
|
|
||||||
const DominatorTree &DT,
|
|
||||||
SmallVectorImpl<Instruction*> &NewInsts) const;
|
|
||||||
|
|
||||||
/// removeInstruction - Remove an instruction from the dependence analysis,
|
/// removeInstruction - Remove an instruction from the dependence analysis,
|
||||||
/// updating the dependence of instructions that previously depended on it.
|
/// updating the dependence of instructions that previously depended on it.
|
||||||
void removeInstruction(Instruction *InstToRemove);
|
void removeInstruction(Instruction *InstToRemove);
|
||||||
|
@ -288,7 +266,7 @@ namespace llvm {
|
||||||
MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
|
MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
|
||||||
BasicBlock::iterator ScanIt,
|
BasicBlock::iterator ScanIt,
|
||||||
BasicBlock *BB);
|
BasicBlock *BB);
|
||||||
bool getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size,
|
bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t Size,
|
||||||
bool isLoad, BasicBlock *BB,
|
bool isLoad, BasicBlock *BB,
|
||||||
SmallVectorImpl<NonLocalDepEntry> &Result,
|
SmallVectorImpl<NonLocalDepEntry> &Result,
|
||||||
DenseMap<BasicBlock*, Value*> &Visited,
|
DenseMap<BasicBlock*, Value*> &Visited,
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "llvm/Analysis/Dominators.h"
|
#include "llvm/Analysis/Dominators.h"
|
||||||
#include "llvm/Analysis/InstructionSimplify.h"
|
#include "llvm/Analysis/InstructionSimplify.h"
|
||||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||||
|
#include "llvm/Analysis/PHITransAddr.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
#include "llvm/Support/PredIteratorCache.h"
|
#include "llvm/Support/PredIteratorCache.h"
|
||||||
|
@ -587,12 +588,14 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
|
||||||
const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
|
const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
|
||||||
uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
|
uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
|
||||||
|
|
||||||
|
PHITransAddr Address(Pointer, TD);
|
||||||
|
|
||||||
// This is the set of blocks we've inspected, and the pointer we consider in
|
// This is the set of blocks we've inspected, and the pointer we consider in
|
||||||
// each block. Because of critical edges, we currently bail out if querying
|
// each block. Because of critical edges, we currently bail out if querying
|
||||||
// a block with multiple different pointers. This can happen during PHI
|
// a block with multiple different pointers. This can happen during PHI
|
||||||
// translation.
|
// translation.
|
||||||
DenseMap<BasicBlock*, Value*> Visited;
|
DenseMap<BasicBlock*, Value*> Visited;
|
||||||
if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
|
if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
|
||||||
Result, Visited, true))
|
Result, Visited, true))
|
||||||
return;
|
return;
|
||||||
Result.clear();
|
Result.clear();
|
||||||
|
@ -707,275 +710,6 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isPHITranslatable - Return true if the specified computation is derived from
|
|
||||||
/// a PHI node in the current block and if it is simple enough for us to handle.
|
|
||||||
static bool isPHITranslatable(Instruction *Inst) {
|
|
||||||
if (isa<PHINode>(Inst))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// We can handle bitcast of a PHI, but the PHI needs to be in the same block
|
|
||||||
// as the bitcast.
|
|
||||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
|
|
||||||
Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0));
|
|
||||||
if (OpI == 0 || OpI->getParent() != Inst->getParent())
|
|
||||||
return true;
|
|
||||||
return isPHITranslatable(OpI);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We can translate a GEP if all of its operands defined in this block are phi
|
|
||||||
// translatable.
|
|
||||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
|
|
||||||
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
|
|
||||||
Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i));
|
|
||||||
if (OpI == 0 || OpI->getParent() != Inst->getParent())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!isPHITranslatable(OpI))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Inst->getOpcode() == Instruction::Add &&
|
|
||||||
isa<ConstantInt>(Inst->getOperand(1))) {
|
|
||||||
Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
|
|
||||||
if (OpI == 0 || OpI->getParent() != Inst->getParent())
|
|
||||||
return true;
|
|
||||||
return isPHITranslatable(OpI);
|
|
||||||
}
|
|
||||||
|
|
||||||
// cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
|
|
||||||
// if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
|
|
||||||
// cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GetPHITranslatedValue - Given a computation that satisfied the
|
|
||||||
/// isPHITranslatable predicate, see if we can translate the computation into
|
|
||||||
/// the specified predecessor block. If so, return that value.
|
|
||||||
Value *MemoryDependenceAnalysis::
|
|
||||||
GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred,
|
|
||||||
const TargetData *TD) const {
|
|
||||||
// If the input value is not an instruction, or if it is not defined in CurBB,
|
|
||||||
// then we don't need to phi translate it.
|
|
||||||
Instruction *Inst = dyn_cast<Instruction>(InVal);
|
|
||||||
if (Inst == 0 || Inst->getParent() != CurBB)
|
|
||||||
return InVal;
|
|
||||||
|
|
||||||
if (PHINode *PN = dyn_cast<PHINode>(Inst))
|
|
||||||
return PN->getIncomingValueForBlock(Pred);
|
|
||||||
|
|
||||||
// Handle bitcast of PHI.
|
|
||||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
|
|
||||||
// PHI translate the input operand.
|
|
||||||
Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD);
|
|
||||||
if (PHIIn == 0) return 0;
|
|
||||||
|
|
||||||
// Constants are trivial to phi translate.
|
|
||||||
if (Constant *C = dyn_cast<Constant>(PHIIn))
|
|
||||||
return ConstantExpr::getBitCast(C, BC->getType());
|
|
||||||
|
|
||||||
// Otherwise we have to see if a bitcasted version of the incoming pointer
|
|
||||||
// is available. If so, we can use it, otherwise we have to fail.
|
|
||||||
for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
|
|
||||||
UI != E; ++UI) {
|
|
||||||
if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
|
|
||||||
if (BCI->getType() == BC->getType())
|
|
||||||
return BCI;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle getelementptr with at least one PHI translatable operand.
|
|
||||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
|
|
||||||
SmallVector<Value*, 8> GEPOps;
|
|
||||||
BasicBlock *CurBB = GEP->getParent();
|
|
||||||
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
|
|
||||||
Value *GEPOp = GEP->getOperand(i);
|
|
||||||
// No PHI translation is needed of operands whose values are live in to
|
|
||||||
// the predecessor block.
|
|
||||||
if (!isa<Instruction>(GEPOp) ||
|
|
||||||
cast<Instruction>(GEPOp)->getParent() != CurBB) {
|
|
||||||
GEPOps.push_back(GEPOp);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the operand is a phi node, do phi translation.
|
|
||||||
Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD);
|
|
||||||
if (InOp == 0) return 0;
|
|
||||||
|
|
||||||
GEPOps.push_back(InOp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simplify the GEP to handle 'gep x, 0' -> x etc.
|
|
||||||
if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD))
|
|
||||||
return V;
|
|
||||||
|
|
||||||
// Scan to see if we have this GEP available.
|
|
||||||
Value *APHIOp = GEPOps[0];
|
|
||||||
for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
|
|
||||||
UI != E; ++UI) {
|
|
||||||
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
|
|
||||||
if (GEPI->getType() == GEP->getType() &&
|
|
||||||
GEPI->getNumOperands() == GEPOps.size() &&
|
|
||||||
GEPI->getParent()->getParent() == CurBB->getParent()) {
|
|
||||||
bool Mismatch = false;
|
|
||||||
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
|
|
||||||
if (GEPI->getOperand(i) != GEPOps[i]) {
|
|
||||||
Mismatch = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!Mismatch)
|
|
||||||
return GEPI;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle add with a constant RHS.
|
|
||||||
if (Inst->getOpcode() == Instruction::Add &&
|
|
||||||
isa<ConstantInt>(Inst->getOperand(1))) {
|
|
||||||
// PHI translate the LHS.
|
|
||||||
Value *LHS;
|
|
||||||
Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
|
|
||||||
Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
|
|
||||||
bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
|
|
||||||
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
|
|
||||||
|
|
||||||
if (OpI == 0 || OpI->getParent() != Inst->getParent())
|
|
||||||
LHS = Inst->getOperand(0);
|
|
||||||
else {
|
|
||||||
LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD);
|
|
||||||
if (LHS == 0)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the PHI translated LHS is an add of a constant, fold the immediates.
|
|
||||||
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
|
|
||||||
if (BOp->getOpcode() == Instruction::Add)
|
|
||||||
if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
|
|
||||||
LHS = BOp->getOperand(0);
|
|
||||||
RHS = ConstantExpr::getAdd(RHS, CI);
|
|
||||||
isNSW = isNUW = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// See if the add simplifies away.
|
|
||||||
if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD))
|
|
||||||
return Res;
|
|
||||||
|
|
||||||
// Otherwise, see if we have this add available somewhere.
|
|
||||||
for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
|
|
||||||
UI != E; ++UI) {
|
|
||||||
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
|
|
||||||
if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
|
|
||||||
BO->getParent()->getParent() == CurBB->getParent())
|
|
||||||
return BO;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GetAvailablePHITranslatePointer - Return the value computed by
|
|
||||||
/// PHITranslatePointer if it dominates PredBB, otherwise return null.
|
|
||||||
Value *MemoryDependenceAnalysis::
|
|
||||||
GetAvailablePHITranslatedValue(Value *V,
|
|
||||||
BasicBlock *CurBB, BasicBlock *PredBB,
|
|
||||||
const TargetData *TD,
|
|
||||||
const DominatorTree &DT) const {
|
|
||||||
// See if PHI translation succeeds.
|
|
||||||
V = GetPHITranslatedValue(V, CurBB, PredBB, TD);
|
|
||||||
if (V == 0) return 0;
|
|
||||||
|
|
||||||
// Make sure the value is live in the predecessor.
|
|
||||||
if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
|
|
||||||
if (!DT.dominates(Inst->getParent(), PredBB))
|
|
||||||
return 0;
|
|
||||||
return V;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
|
|
||||||
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
|
|
||||||
/// block. All newly created instructions are added to the NewInsts list.
|
|
||||||
///
|
|
||||||
Value *MemoryDependenceAnalysis::
|
|
||||||
InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
|
|
||||||
BasicBlock *PredBB, const TargetData *TD,
|
|
||||||
const DominatorTree &DT,
|
|
||||||
SmallVectorImpl<Instruction*> &NewInsts) const {
|
|
||||||
// See if we have a version of this value already available and dominating
|
|
||||||
// PredBB. If so, there is no need to insert a new copy.
|
|
||||||
if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT))
|
|
||||||
return Res;
|
|
||||||
|
|
||||||
// If we don't have an available version of this value, it must be an
|
|
||||||
// instruction.
|
|
||||||
Instruction *Inst = cast<Instruction>(InVal);
|
|
||||||
|
|
||||||
// Handle bitcast of PHI translatable value.
|
|
||||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
|
|
||||||
Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0),
|
|
||||||
CurBB, PredBB, TD, DT, NewInsts);
|
|
||||||
if (OpVal == 0) return 0;
|
|
||||||
|
|
||||||
// Otherwise insert a bitcast at the end of PredBB.
|
|
||||||
BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
|
|
||||||
InVal->getName()+".phi.trans.insert",
|
|
||||||
PredBB->getTerminator());
|
|
||||||
NewInsts.push_back(New);
|
|
||||||
return New;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle getelementptr with at least one PHI operand.
|
|
||||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
|
|
||||||
SmallVector<Value*, 8> GEPOps;
|
|
||||||
BasicBlock *CurBB = GEP->getParent();
|
|
||||||
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
|
|
||||||
Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i),
|
|
||||||
CurBB, PredBB, TD, DT, NewInsts);
|
|
||||||
if (OpVal == 0) return 0;
|
|
||||||
GEPOps.push_back(OpVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
GetElementPtrInst *Result =
|
|
||||||
GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
|
|
||||||
InVal->getName()+".phi.trans.insert",
|
|
||||||
PredBB->getTerminator());
|
|
||||||
Result->setIsInBounds(GEP->isInBounds());
|
|
||||||
NewInsts.push_back(Result);
|
|
||||||
return Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// FIXME: This code works, but it is unclear that we actually want to insert
|
|
||||||
// a big chain of computation in order to make a value available in a block.
|
|
||||||
// This needs to be evaluated carefully to consider its cost trade offs.
|
|
||||||
|
|
||||||
// Handle add with a constant RHS.
|
|
||||||
if (Inst->getOpcode() == Instruction::Add &&
|
|
||||||
isa<ConstantInt>(Inst->getOperand(1))) {
|
|
||||||
// PHI translate the LHS.
|
|
||||||
Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0),
|
|
||||||
CurBB, PredBB, TD, DT, NewInsts);
|
|
||||||
if (OpVal == 0) return 0;
|
|
||||||
|
|
||||||
BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
|
|
||||||
InVal->getName()+".phi.trans.insert",
|
|
||||||
PredBB->getTerminator());
|
|
||||||
Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
|
|
||||||
Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
|
|
||||||
NewInsts.push_back(Res);
|
|
||||||
return Res;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// getNonLocalPointerDepFromBB - Perform a dependency query based on
|
/// getNonLocalPointerDepFromBB - Perform a dependency query based on
|
||||||
/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
|
/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
|
||||||
/// results to the results vector and keep track of which blocks are visited in
|
/// results to the results vector and keep track of which blocks are visited in
|
||||||
|
@ -989,14 +723,14 @@ InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
|
||||||
/// not compute dependence information for some reason. This should be treated
|
/// not compute dependence information for some reason. This should be treated
|
||||||
/// as a clobber dependence on the first instruction in the predecessor block.
|
/// as a clobber dependence on the first instruction in the predecessor block.
|
||||||
bool MemoryDependenceAnalysis::
|
bool MemoryDependenceAnalysis::
|
||||||
getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
|
||||||
bool isLoad, BasicBlock *StartBB,
|
bool isLoad, BasicBlock *StartBB,
|
||||||
SmallVectorImpl<NonLocalDepEntry> &Result,
|
SmallVectorImpl<NonLocalDepEntry> &Result,
|
||||||
DenseMap<BasicBlock*, Value*> &Visited,
|
DenseMap<BasicBlock*, Value*> &Visited,
|
||||||
bool SkipFirstBlock) {
|
bool SkipFirstBlock) {
|
||||||
|
|
||||||
// Look up the cached info for Pointer.
|
// Look up the cached info for Pointer.
|
||||||
ValueIsLoadPair CacheKey(Pointer, isLoad);
|
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
|
||||||
|
|
||||||
std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
|
std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
|
||||||
&NonLocalPointerDeps[CacheKey];
|
&NonLocalPointerDeps[CacheKey];
|
||||||
|
@ -1014,7 +748,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
|
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
|
DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
|
||||||
if (VI == Visited.end() || VI->second == Pointer) continue;
|
if (VI == Visited.end() || VI->second == Pointer.getAddr())
|
||||||
|
continue;
|
||||||
|
|
||||||
// We have a pointer mismatch in a block. Just return clobber, saying
|
// We have a pointer mismatch in a block. Just return clobber, saying
|
||||||
// that something was clobbered in this result. We could also do a
|
// that something was clobbered in this result. We could also do a
|
||||||
|
@ -1025,7 +760,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
|
|
||||||
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
|
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
Visited.insert(std::make_pair(I->first, Pointer));
|
Visited.insert(std::make_pair(I->first, Pointer.getAddr()));
|
||||||
if (!I->second.isNonLocal())
|
if (!I->second.isNonLocal())
|
||||||
Result.push_back(*I);
|
Result.push_back(*I);
|
||||||
}
|
}
|
||||||
|
@ -1065,8 +800,9 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
// Get the dependency info for Pointer in BB. If we have cached
|
// Get the dependency info for Pointer in BB. If we have cached
|
||||||
// information, we will use it, otherwise we compute it.
|
// information, we will use it, otherwise we compute it.
|
||||||
DEBUG(AssertSorted(*Cache, NumSortedEntries));
|
DEBUG(AssertSorted(*Cache, NumSortedEntries));
|
||||||
MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
|
MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
|
||||||
BB, Cache, NumSortedEntries);
|
isLoad, BB, Cache,
|
||||||
|
NumSortedEntries);
|
||||||
|
|
||||||
// If we got a Def or Clobber, add this to the list of results.
|
// If we got a Def or Clobber, add this to the list of results.
|
||||||
if (!Dep.isNonLocal()) {
|
if (!Dep.isNonLocal()) {
|
||||||
|
@ -1077,18 +813,14 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
|
|
||||||
// If 'Pointer' is an instruction defined in this block, then we need to do
|
// If 'Pointer' is an instruction defined in this block, then we need to do
|
||||||
// phi translation to change it into a value live in the predecessor block.
|
// phi translation to change it into a value live in the predecessor block.
|
||||||
// If phi translation fails, then we can't continue dependence analysis.
|
// If not, we just add the predecessors to the worklist and scan them with
|
||||||
Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
|
// the same Pointer.
|
||||||
bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
|
if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
|
||||||
|
|
||||||
// If no PHI translation is needed, just add all the predecessors of this
|
|
||||||
// block to scan them as well.
|
|
||||||
if (!NeedsPHITranslation) {
|
|
||||||
SkipFirstBlock = false;
|
SkipFirstBlock = false;
|
||||||
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
|
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
|
||||||
// Verify that we haven't looked at this block yet.
|
// Verify that we haven't looked at this block yet.
|
||||||
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
|
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
|
||||||
InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
|
InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
|
||||||
if (InsertRes.second) {
|
if (InsertRes.second) {
|
||||||
// First time we've looked at *PI.
|
// First time we've looked at *PI.
|
||||||
Worklist.push_back(*PI);
|
Worklist.push_back(*PI);
|
||||||
|
@ -1098,16 +830,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
// If we have seen this block before, but it was with a different
|
// If we have seen this block before, but it was with a different
|
||||||
// pointer then we have a phi translation failure and we have to treat
|
// pointer then we have a phi translation failure and we have to treat
|
||||||
// this as a clobber.
|
// this as a clobber.
|
||||||
if (InsertRes.first->second != Pointer)
|
if (InsertRes.first->second != Pointer.getAddr())
|
||||||
goto PredTranslationFailure;
|
goto PredTranslationFailure;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we do need to do phi translation, then there are a bunch of different
|
// We do need to do phi translation, if we know ahead of time we can't phi
|
||||||
// cases, because we have to find a Value* live in the predecessor block. We
|
// translate this value, don't even try.
|
||||||
// know that PtrInst is defined in this block at least.
|
if (!Pointer.IsPotentiallyPHITranslatable())
|
||||||
|
goto PredTranslationFailure;
|
||||||
|
|
||||||
// We may have added values to the cache list before this PHI translation.
|
// We may have added values to the cache list before this PHI translation.
|
||||||
// If so, we haven't done anything to ensure that the cache remains sorted.
|
// If so, we haven't done anything to ensure that the cache remains sorted.
|
||||||
// Sort it now (if needed) so that recursive invocations of
|
// Sort it now (if needed) so that recursive invocations of
|
||||||
|
@ -1117,19 +850,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
|
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
|
||||||
NumSortedEntries = Cache->size();
|
NumSortedEntries = Cache->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is a computation derived from a PHI node, use the suitably
|
|
||||||
// translated incoming values for each pred as the phi translated version.
|
|
||||||
if (!isPHITranslatable(PtrInst))
|
|
||||||
goto PredTranslationFailure;
|
|
||||||
|
|
||||||
Cache = 0;
|
Cache = 0;
|
||||||
|
|
||||||
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
|
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
|
||||||
BasicBlock *Pred = *PI;
|
BasicBlock *Pred = *PI;
|
||||||
// Get the PHI translated pointer in this predecessor. This can fail and
|
|
||||||
// return null if not translatable.
|
// Get the PHI translated pointer in this predecessor. This can fail if
|
||||||
Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD);
|
// not translatable, in which case the getAddr() returns null.
|
||||||
|
PHITransAddr PredPointer(Pointer);
|
||||||
|
PredPointer.PHITranslateValue(BB, Pred);
|
||||||
|
|
||||||
|
Value *PredPtrVal = PredPointer.getAddr();
|
||||||
|
|
||||||
// Check to see if we have already visited this pred block with another
|
// Check to see if we have already visited this pred block with another
|
||||||
// pointer. If so, we can't do this lookup. This failure can occur
|
// pointer. If so, we can't do this lookup. This failure can occur
|
||||||
|
@ -1137,12 +868,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
// the successor translates to a pointer value different than the
|
// the successor translates to a pointer value different than the
|
||||||
// pointer the block was first analyzed with.
|
// pointer the block was first analyzed with.
|
||||||
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
|
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
|
||||||
InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
|
InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
|
||||||
|
|
||||||
if (!InsertRes.second) {
|
if (!InsertRes.second) {
|
||||||
// If the predecessor was visited with PredPtr, then we already did
|
// If the predecessor was visited with PredPtr, then we already did
|
||||||
// the analysis and can ignore it.
|
// the analysis and can ignore it.
|
||||||
if (InsertRes.first->second == PredPtr)
|
if (InsertRes.first->second == PredPtrVal)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Otherwise, the block was previously analyzed with a different
|
// Otherwise, the block was previously analyzed with a different
|
||||||
|
@ -1155,7 +886,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
// predecessor, then we have to assume that the pointer is clobbered in
|
// predecessor, then we have to assume that the pointer is clobbered in
|
||||||
// that predecessor. We can still do PRE of the load, which would insert
|
// that predecessor. We can still do PRE of the load, which would insert
|
||||||
// a computation of the pointer in this predecessor.
|
// a computation of the pointer in this predecessor.
|
||||||
if (PredPtr == 0) {
|
if (PredPtrVal == 0) {
|
||||||
// Add the entry to the Result list.
|
// Add the entry to the Result list.
|
||||||
NonLocalDepEntry Entry(Pred,
|
NonLocalDepEntry Entry(Pred,
|
||||||
MemDepResult::getClobber(Pred->getTerminator()));
|
MemDepResult::getClobber(Pred->getTerminator()));
|
||||||
|
@ -1201,7 +932,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
|
||||||
|
|
||||||
// If we have a problem phi translating, fall through to the code below
|
// If we have a problem phi translating, fall through to the code below
|
||||||
// to handle the failure condition.
|
// to handle the failure condition.
|
||||||
if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
|
if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
|
||||||
Result, Visited))
|
Result, Visited))
|
||||||
goto PredTranslationFailure;
|
goto PredTranslationFailure;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include "llvm/Analysis/Dominators.h"
|
#include "llvm/Analysis/Dominators.h"
|
||||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||||
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
|
||||||
|
#include "llvm/Analysis/PHITransAddr.h"
|
||||||
#include "llvm/Support/CFG.h"
|
#include "llvm/Support/CFG.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
@ -1597,38 +1598,42 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
|
||||||
// Do PHI translation to get its value in the predecessor if necessary. The
|
// Do PHI translation to get its value in the predecessor if necessary. The
|
||||||
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
|
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
|
||||||
//
|
//
|
||||||
// FIXME: This may insert a computation, but we don't tell scalar GVN
|
|
||||||
// optimization stuff about it. How do we do this?
|
|
||||||
SmallVector<Instruction*, 8> NewInsts;
|
SmallVector<Instruction*, 8> NewInsts;
|
||||||
Value *LoadPtr = 0;
|
|
||||||
|
|
||||||
// If all preds have a single successor, then we know it is safe to insert the
|
// If all preds have a single successor, then we know it is safe to insert the
|
||||||
// load on the pred (?!?), so we can insert code to materialize the pointer if
|
// load on the pred (?!?), so we can insert code to materialize the pointer if
|
||||||
// it is not available.
|
// it is not available.
|
||||||
|
PHITransAddr Address(LI->getOperand(0), TD);
|
||||||
|
Value *LoadPtr = 0;
|
||||||
if (allSingleSucc) {
|
if (allSingleSucc) {
|
||||||
LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
|
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
|
||||||
UnavailablePred, TD, *DT,NewInsts);
|
*DT, NewInsts);
|
||||||
} else {
|
} else {
|
||||||
LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
|
Address.PHITranslateValue(LoadBB, UnavailablePred);
|
||||||
UnavailablePred, TD, *DT);
|
LoadPtr = Address.getAddr();
|
||||||
|
|
||||||
|
// Make sure the value is live in the predecessor.
|
||||||
|
if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
|
||||||
|
if (!DT->dominates(Inst->getParent(), UnavailablePred))
|
||||||
|
LoadPtr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we couldn't find or insert a computation of this phi translated value,
|
||||||
|
// we fail PRE.
|
||||||
|
if (LoadPtr == 0) {
|
||||||
|
assert(NewInsts.empty() && "Shouldn't insert insts on failure");
|
||||||
|
DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
|
||||||
|
<< *LI->getOperand(0) << "\n");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assign value numbers to these new instructions.
|
// Assign value numbers to these new instructions.
|
||||||
for (SmallVector<Instruction*, 8>::iterator NI = NewInsts.begin(),
|
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
|
||||||
NE = NewInsts.end(); NI != NE; ++NI) {
|
|
||||||
// FIXME: We really _ought_ to insert these value numbers into their
|
// FIXME: We really _ought_ to insert these value numbers into their
|
||||||
// parent's availability map. However, in doing so, we risk getting into
|
// parent's availability map. However, in doing so, we risk getting into
|
||||||
// ordering issues. If a block hasn't been processed yet, we would be
|
// ordering issues. If a block hasn't been processed yet, we would be
|
||||||
// marking a value as AVAIL-IN, which isn't what we intend.
|
// marking a value as AVAIL-IN, which isn't what we intend.
|
||||||
VN.lookup_or_add(*NI);
|
VN.lookup_or_add(NewInsts[i]);
|
||||||
}
|
|
||||||
|
|
||||||
// If we couldn't find or insert a computation of this phi translated value,
|
|
||||||
// we fail PRE.
|
|
||||||
if (LoadPtr == 0) {
|
|
||||||
DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
|
|
||||||
<< *LI->getOperand(0) << "\n");
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure it is valid to move this load here. We have to watch out for:
|
// Make sure it is valid to move this load here. We have to watch out for:
|
||||||
|
|
|
@ -388,6 +388,7 @@ declare i1 @cond() readonly
|
||||||
declare i1 @cond2() readonly
|
declare i1 @cond2() readonly
|
||||||
|
|
||||||
define i32 @phi_trans2() {
|
define i32 @phi_trans2() {
|
||||||
|
; CHECK: @phi_trans2
|
||||||
entry:
|
entry:
|
||||||
%P = alloca i32, i32 400
|
%P = alloca i32, i32 400
|
||||||
br label %F1
|
br label %F1
|
||||||
|
@ -411,9 +412,57 @@ F:
|
||||||
br label %F1
|
br label %F1
|
||||||
|
|
||||||
TX:
|
TX:
|
||||||
ret i32 %x ;; SHOULD NOT BE COMPILED TO 'ret i32 42'.
|
; This load should not be compiled to 'ret i32 42'. An overly clever
|
||||||
|
; implementation of GVN would see that we're returning 17 if the loop
|
||||||
|
; executes once or 42 if it executes more than that, but we'd have to do
|
||||||
|
; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
|
||||||
|
; transformation.
|
||||||
|
|
||||||
|
; CHECK: TX:
|
||||||
|
; CHECK: ret i32 %x
|
||||||
|
ret i32 %x
|
||||||
TY:
|
TY:
|
||||||
ret i32 0
|
ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @phi_trans3(i32* %p) {
|
||||||
|
; CHECK: @phi_trans3
|
||||||
|
block1:
|
||||||
|
br i1 true, label %block2, label %block3
|
||||||
|
|
||||||
|
block2:
|
||||||
|
store i32 87, i32* %p
|
||||||
|
br label %block4
|
||||||
|
|
||||||
|
block3:
|
||||||
|
%p2 = getelementptr i32* %p, i32 43
|
||||||
|
store i32 97, i32* %p2
|
||||||
|
br label %block4
|
||||||
|
|
||||||
|
block4:
|
||||||
|
%A = phi i32 [-1, %block2], [42, %block3]
|
||||||
|
br i1 true, label %block5, label %exit
|
||||||
|
|
||||||
|
; CHECK: block4:
|
||||||
|
; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]
|
||||||
|
; CHECK-NOT: load
|
||||||
|
|
||||||
|
block5:
|
||||||
|
%B = add i32 %A, 1
|
||||||
|
br i1 true, label %block6, label %exit
|
||||||
|
|
||||||
|
block6:
|
||||||
|
%C = getelementptr i32* %p, i32 %B
|
||||||
|
br i1 true, label %block7, label %exit
|
||||||
|
|
||||||
|
block7:
|
||||||
|
%D = load i32* %C
|
||||||
|
ret i32 %D
|
||||||
|
|
||||||
|
; CHECK: block7:
|
||||||
|
; CHECK-NEXT: ret i32 %D
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret i32 -1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue