Allow GVN to hack on memcpy's, making them open to further optimization.

llvm-svn: 46693
This commit is contained in:
Owen Anderson 2008-02-04 02:59:58 +00:00
parent da8e5d979e
commit c4a7c41869
2 changed files with 104 additions and 0 deletions

View File

@ -19,6 +19,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
#include "llvm/Value.h"
#include "llvm/ADT/BitVector.h"
@ -736,6 +737,7 @@ namespace {
SmallVector<Instruction*, 4>& toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVector<Instruction*, 4>& toErase);
bool processMemCpy(MemCpyInst* M, SmallVector<Instruction*, 4>& toErase);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
DenseMap<BasicBlock*, Value*> &Phis,
bool top_level = false);
@ -1017,6 +1019,84 @@ bool GVN::processLoad(LoadInst* L,
return deletedLoad;
}
/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
/// This allows later passes to remove the first memcpy altogether.
bool GVN::processMemCpy(MemCpyInst* M,
SmallVector<Instruction*, 4>& toErase) {
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
// First, we have to check that the dependency is another memcpy
Instruction* dep = MD.getDependency(M);
if (dep == MemoryDependenceAnalysis::None ||
dep == MemoryDependenceAnalysis::NonLocal ||
!isa<MemCpyInst>(dep))
return false;
// We can only transforms memcpy's where the dest of one is the source of the
// other
MemCpyInst* MDep = cast<MemCpyInst>(dep);
if (M->getSource() != MDep->getDest())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
// must be larger than the following one.
Value* DepLength = MDep->getLength();
uint64_t CpySize = ~0UL;
uint64_t DepSize = ~0UL;
if (isa<ConstantInt>(DepLength)) {
if (isa<ConstantInt>(M->getLength())) {
if (cast<ConstantInt>(DepLength)->getLimitedValue() <
cast<ConstantInt>(M->getLength())->getLimitedValue()) {
return false;
} else {
CpySize = cast<ConstantInt>(M->getLength())->getLimitedValue();
DepSize = cast<ConstantInt>(DepLength)->getLimitedValue();
}
} else {
return false;
}
} else {
return false;
}
// Finally, we have to make sure that the dest of the second does not
// alias the source of the first
AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
AliasAnalysis::NoAlias) {
// If they don't, we can still make the transformation by first turning M
// into a memmove rather than a memcpy.
bool is32bit = M->getIntrinsicID() == Intrinsic::memcpy_i32;
Function* MemMoveFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
is32bit ? Intrinsic::memmove_i32 :
Intrinsic::memmove_i64);
std::vector<Value*> args;
args.push_back(M->getRawDest());
args.push_back(MDep->getRawSource());
args.push_back(M->getLength());
args.push_back(M->getAlignment());
new CallInst(MemMoveFun, args.begin(), args.end(), "", M);
MD.removeInstruction(M);
toErase.push_back(M);
return true;
}
// If all checks passed, then we can transform these memcpy's
M->setSource(MDep->getRawSource());
// Reset dependence information for the memcpy
MD.removeInstruction(M);
return true;
}
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction* I,
@ -1025,6 +1105,8 @@ bool GVN::processInstruction(Instruction* I,
SmallVector<Instruction*, 4>& toErase) {
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
return processLoad(L, lastSeenLoad, toErase);
} else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
return processMemCpy(M, toErase);
}
unsigned num = VN.lookup_or_add(I);

View File

@ -0,0 +1,22 @@
; RUN: llvm-as < %s | opt -gvn -dse | llvm-dis | not grep {i8* %agg.result21, i8* %tmp219}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"
define void @ccosl({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind {
entry:
%tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
%memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
%tmp5 = sub x86_fp80 0xK80000000000000000000, %z.1 ; <x86_fp80> [#uses=1]
call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind
%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; <i8*> [#uses=2]
%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
ret void
}
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind