forked from OSchip/llvm-project
Re-apply the patch to improve the optimizations of memcpy's, with several
bugs fixed. This now passes PPC bootstrap. llvm-svn: 47026
This commit is contained in:
parent
f213e82bc5
commit
00dba4f734
|
@ -100,6 +100,11 @@ class MemoryDependenceAnalysis : public FunctionPass {
|
|||
/// updating the dependence of instructions that previously depended on it.
|
||||
void removeInstruction(Instruction* rem);
|
||||
|
||||
/// dropInstruction - Remove an instruction from the analysis, making
|
||||
/// absolutely conservative assumptions when updating the cache. This is
|
||||
/// useful, for example when an instruction is changed rather than removed.
|
||||
void dropInstruction(Instruction* drop);
|
||||
|
||||
private:
|
||||
Instruction* getCallSiteDependency(CallSite C, Instruction* start,
|
||||
BasicBlock* block);
|
||||
|
|
|
@ -457,6 +457,46 @@ Instruction* MemoryDependenceAnalysis::getDependency(Instruction* query,
|
|||
return NonLocal;
|
||||
}
|
||||
|
||||
/// dropInstruction - Remove an instruction from the analysis, making
|
||||
/// absolutely conservative assumptions when updating the cache. This is
|
||||
/// useful, for example when an instruction is changed rather than removed.
|
||||
void MemoryDependenceAnalysis::dropInstruction(Instruction* drop) {
|
||||
depMapType::iterator depGraphEntry = depGraphLocal.find(drop);
|
||||
if (depGraphEntry != depGraphLocal.end())
|
||||
reverseDep[depGraphEntry->second.first].erase(drop);
|
||||
|
||||
// Drop dependency information for things that depended on this instr
|
||||
SmallPtrSet<Instruction*, 4>& set = reverseDep[drop];
|
||||
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
|
||||
I != E; ++I)
|
||||
depGraphLocal.erase(*I);
|
||||
|
||||
depGraphLocal.erase(drop);
|
||||
reverseDep.erase(drop);
|
||||
|
||||
for (DenseMap<BasicBlock*, Value*>::iterator DI =
|
||||
depGraphNonLocal[drop].begin(), DE = depGraphNonLocal[drop].end();
|
||||
DI != DE; ++DI)
|
||||
if (DI->second != None)
|
||||
reverseDepNonLocal[DI->second].erase(drop);
|
||||
|
||||
if (reverseDepNonLocal.count(drop)) {
|
||||
SmallPtrSet<Instruction*, 4>& set = reverseDepNonLocal[drop];
|
||||
for (SmallPtrSet<Instruction*, 4>::iterator I = set.begin(), E = set.end();
|
||||
I != E; ++I)
|
||||
for (DenseMap<BasicBlock*, Value*>::iterator DI =
|
||||
depGraphNonLocal[*I].begin(), DE = depGraphNonLocal[*I].end();
|
||||
DI != DE; ++DI)
|
||||
if (DI->second == drop)
|
||||
DI->second = Dirty;
|
||||
}
|
||||
|
||||
reverseDepNonLocal.erase(drop);
|
||||
nonLocalDepMapType::iterator I = depGraphNonLocal.find(drop);
|
||||
if (I != depGraphNonLocal.end())
|
||||
depGraphNonLocal.erase(I);
|
||||
}
|
||||
|
||||
/// removeInstruction - Remove an instruction from the dependence analysis,
|
||||
/// updating the dependence of instructions that previously depended on it.
|
||||
/// This method attempts to keep the cache coherent using the reverse map.
|
||||
|
@ -473,7 +513,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction* rem) {
|
|||
depMapType::iterator depGraphEntry = depGraphLocal.find(rem);
|
||||
|
||||
if (depGraphEntry != depGraphLocal.end()) {
|
||||
reverseDep[depGraphLocal[rem].first].erase(rem);
|
||||
reverseDep[depGraphEntry->second.first].erase(rem);
|
||||
|
||||
if (depGraphEntry->second.first != NonLocal &&
|
||||
depGraphEntry->second.first != None &&
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Value.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
@ -736,6 +737,7 @@ namespace {
|
|||
SmallVector<Instruction*, 4>& toErase);
|
||||
bool processNonLocalLoad(LoadInst* L,
|
||||
SmallVector<Instruction*, 4>& toErase);
|
||||
bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
|
||||
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
|
||||
DenseMap<BasicBlock*, Value*> &Phis,
|
||||
bool top_level = false);
|
||||
|
@ -1044,6 +1046,79 @@ bool GVN::processLoad(LoadInst* L,
|
|||
return deletedLoad;
|
||||
}
|
||||
|
||||
/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
|
||||
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
|
||||
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
|
||||
/// This allows later passes to remove the first memcpy altogether.
|
||||
bool GVN::processMemCpy(MemCpyInst* M,
|
||||
SmallVector<Instruction*, 4>& toErase) {
|
||||
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
// First, we have to check that the dependency is another memcpy
|
||||
Instruction* dep = MD.getDependency(M);
|
||||
if (dep == MemoryDependenceAnalysis::None ||
|
||||
dep == MemoryDependenceAnalysis::NonLocal ||
|
||||
!isa<MemCpyInst>(dep))
|
||||
return false;
|
||||
|
||||
// We can only transforms memcpy's where the dest of one is the source of the
|
||||
// other
|
||||
MemCpyInst* MDep = cast<MemCpyInst>(dep);
|
||||
if (M->getSource() != MDep->getDest())
|
||||
return false;
|
||||
|
||||
// Second, the length of the memcpy's must be the same, or the preceeding one
|
||||
// must be larger than the following one.
|
||||
ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
|
||||
if (!C1 || !C2)
|
||||
return false;
|
||||
|
||||
uint64_t CpySize = C1->getValue().getZExtValue();
|
||||
uint64_t DepSize = C2->getValue().getZExtValue();
|
||||
|
||||
if (DepSize < CpySize)
|
||||
return false;
|
||||
|
||||
// Finally, we have to make sure that the dest of the second does not
|
||||
// alias the source of the first
|
||||
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
|
||||
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
|
||||
AliasAnalysis::NoAlias)
|
||||
return false;
|
||||
else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
|
||||
AliasAnalysis::NoAlias)
|
||||
return false;
|
||||
else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
|
||||
!= AliasAnalysis::NoAlias)
|
||||
return false;
|
||||
|
||||
// If all checks passed, then we can transform these memcpy's
|
||||
bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
|
||||
Function* MemMoveFun = Intrinsic::getDeclaration(
|
||||
M->getParent()->getParent()->getParent(),
|
||||
is32bit ? Intrinsic::memcpy_i32 :
|
||||
Intrinsic::memcpy_i64);
|
||||
|
||||
std::vector<Value*> args;
|
||||
args.push_back(M->getRawDest());
|
||||
args.push_back(MDep->getRawSource());
|
||||
args.push_back(M->getLength());
|
||||
args.push_back(M->getAlignment());
|
||||
|
||||
CallInst* C = new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
|
||||
|
||||
if (MD.getDependency(C) == MDep) {
|
||||
MD.dropInstruction(M);
|
||||
toErase.push_back(M);
|
||||
return true;
|
||||
} else {
|
||||
MD.removeInstruction(C);
|
||||
toErase.push_back(C);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// processInstruction - When calculating availability, handle an instruction
|
||||
/// by inserting it into the appropriate sets
|
||||
bool GVN::processInstruction(Instruction* I,
|
||||
|
@ -1052,6 +1127,8 @@ bool GVN::processInstruction(Instruction* I,
|
|||
SmallVector<Instruction*, 4>& toErase) {
|
||||
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
|
||||
return processLoad(L, lastSeenLoad, toErase);
|
||||
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
|
||||
return processMemCpy(M, toErase);
|
||||
}
|
||||
|
||||
unsigned num = VN.lookup_or_add(I);
|
||||
|
@ -1161,8 +1238,9 @@ bool GVN::iterateOnFunction(Function &F) {
|
|||
++BI;
|
||||
|
||||
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
|
||||
E = toErase.end(); I != E; ++I)
|
||||
E = toErase.end(); I != E; ++I) {
|
||||
(*I)->eraseFromParent();
|
||||
}
|
||||
|
||||
toErase.clear();
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | grep memcpy | count 2
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i686-apple-darwin9"
|
||||
|
|
Loading…
Reference in New Issue