forked from OSchip/llvm-project
Allow GVN to hack on memcpy's, making them open to further optimization.
llvm-svn: 46693
This commit is contained in:
parent
da8e5d979e
commit
c4a7c41869
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Value.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
@ -736,6 +737,7 @@ namespace {
|
|||
SmallVector<Instruction*, 4>& toErase);
|
||||
bool processNonLocalLoad(LoadInst* L,
|
||||
SmallVector<Instruction*, 4>& toErase);
|
||||
bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
|
||||
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
|
||||
DenseMap<BasicBlock*, Value*> &Phis,
|
||||
bool top_level = false);
|
||||
|
@ -1017,6 +1019,84 @@ bool GVN::processLoad(LoadInst* L,
|
|||
return deletedLoad;
|
||||
}
|
||||
|
||||
/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
|
||||
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
|
||||
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
|
||||
/// This allows later passes to remove the first memcpy altogether.
|
||||
bool GVN::processMemCpy(MemCpyInst* M,
|
||||
SmallVector<Instruction*, 4>& toErase) {
|
||||
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
|
||||
|
||||
// First, we have to check that the dependency is another memcpy
|
||||
Instruction* dep = MD.getDependency(M);
|
||||
if (dep == MemoryDependenceAnalysis::None ||
|
||||
dep == MemoryDependenceAnalysis::NonLocal ||
|
||||
!isa<MemCpyInst>(dep))
|
||||
return false;
|
||||
|
||||
// We can only transforms memcpy's where the dest of one is the source of the
|
||||
// other
|
||||
MemCpyInst* MDep = cast<MemCpyInst>(dep);
|
||||
if (M->getSource() != MDep->getDest())
|
||||
return false;
|
||||
|
||||
// Second, the length of the memcpy's must be the same, or the preceeding one
|
||||
// must be larger than the following one.
|
||||
Value* DepLength = MDep->getLength();
|
||||
uint64_t CpySize = ~0UL;
|
||||
uint64_t DepSize = ~0UL;
|
||||
if (isa<ConstantInt>(DepLength)) {
|
||||
if (isa<ConstantInt>(M->getLength())) {
|
||||
if (cast<ConstantInt>(DepLength)->getLimitedValue() <
|
||||
cast<ConstantInt>(M->getLength())->getLimitedValue()) {
|
||||
return false;
|
||||
} else {
|
||||
CpySize = cast<ConstantInt>(M->getLength())->getLimitedValue();
|
||||
DepSize = cast<ConstantInt>(DepLength)->getLimitedValue();
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finally, we have to make sure that the dest of the second does not
|
||||
// alias the source of the first
|
||||
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
|
||||
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
|
||||
AliasAnalysis::NoAlias) {
|
||||
// If they don't, we can still make the transformation by first turning M
|
||||
// into a memmove rather than a memcpy.
|
||||
bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
|
||||
Function* MemMoveFun = Intrinsic::getDeclaration(
|
||||
M->getParent()->getParent()->getParent(),
|
||||
is32bit ? Intrinsic::memmove_i32 :
|
||||
Intrinsic::memmove_i64);
|
||||
|
||||
std::vector<Value*> args;
|
||||
args.push_back(M->getRawDest());
|
||||
args.push_back(MDep->getRawSource());
|
||||
args.push_back(M->getLength());
|
||||
args.push_back(M->getAlignment());
|
||||
|
||||
new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
|
||||
|
||||
MD.removeInstruction(M);
|
||||
toErase.push_back(M);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// If all checks passed, then we can transform these memcpy's
|
||||
M->setSource(MDep->getRawSource());
|
||||
|
||||
// Reset dependence information for the memcpy
|
||||
MD.removeInstruction(M);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// processInstruction - When calculating availability, handle an instruction
|
||||
/// by inserting it into the appropriate sets
|
||||
bool GVN::processInstruction(Instruction* I,
|
||||
|
@ -1025,6 +1105,8 @@ bool GVN::processInstruction(Instruction* I,
|
|||
SmallVector<Instruction*, 4>& toErase) {
|
||||
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
|
||||
return processLoad(L, lastSeenLoad, toErase);
|
||||
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
|
||||
return processMemCpy(M, toErase);
|
||||
}
|
||||
|
||||
unsigned num = VN.lookup_or_add(I);
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | not grep {i8* %agg.result21, i8* %tmp219}
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i686-apple-darwin9"
|
||||
|
||||
define void @ccosl({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind {
|
||||
entry:
|
||||
%tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
|
||||
%memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
|
||||
%tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1 ; <x86_fp80> [#uses=1]
|
||||
call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind
|
||||
%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; <i8*> [#uses=2]
|
||||
%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
|
||||
%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
|
||||
|
||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
|
Loading…
Reference in New Issue