forked from OSchip/llvm-project
Atomic load/store handling for the passes using memdep (GVN, DSE, memcpyopt).
llvm-svn: 137888
This commit is contained in:
parent
5b8ad40664
commit
9a468153e1
|
@ -213,9 +213,9 @@ getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
|
|||
/// isRemovable - If the value of this instruction and the memory it writes to
|
||||
/// is unused, may we delete this instruction?
|
||||
static bool isRemovable(Instruction *I) {
|
||||
// Don't remove volatile stores.
|
||||
// Don't remove volatile/atomic stores.
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||
return !SI->isVolatile();
|
||||
return SI->isUnordered();
|
||||
|
||||
IntrinsicInst *II = cast<IntrinsicInst>(I);
|
||||
switch (II->getIntrinsicID()) {
|
||||
|
@ -447,7 +447,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
|
|||
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
|
||||
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
|
||||
SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
|
||||
SI->getOperand(0) == DepLoad && isRemovable(SI)) {
|
||||
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
|
||||
<< "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
|
||||
|
||||
|
@ -670,6 +670,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
|
|||
|
||||
// If we encounter a use of the pointer, it is no longer considered dead
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
|
||||
if (!L->isUnordered()) // Be conservative with atomic/volatile load
|
||||
break;
|
||||
LoadedLoc = AA->getLocation(L);
|
||||
} else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
|
||||
LoadedLoc = AA->getLocation(V);
|
||||
|
|
|
@ -982,8 +982,8 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
|
|||
unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
|
||||
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
|
||||
if (Offset+LoadSize > SrcValSize) {
|
||||
assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
|
||||
assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
|
||||
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
|
||||
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
|
||||
// If we have a load/load clobber an DepLI can be widened to cover this
|
||||
// load, then we should widen it to the next power of 2 size big enough!
|
||||
unsigned NewLoadSize = Offset+LoadSize;
|
||||
|
@ -1669,7 +1669,7 @@ bool GVN::processLoad(LoadInst *L) {
|
|||
if (!MD)
|
||||
return false;
|
||||
|
||||
if (L->isVolatile())
|
||||
if (!L->isSimple())
|
||||
return false;
|
||||
|
||||
if (L->use_empty()) {
|
||||
|
|
|
@ -384,7 +384,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
|||
|
||||
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
|
||||
// If this is a store, see if we can merge it in.
|
||||
if (NextStore->isVolatile()) break;
|
||||
if (!NextStore->isSimple()) break;
|
||||
|
||||
// Check to see if this stored value is of the same byte-splattable value.
|
||||
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
|
||||
|
@ -479,7 +479,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
|
|||
|
||||
|
||||
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
if (SI->isVolatile()) return false;
|
||||
if (!SI->isSimple()) return false;
|
||||
|
||||
if (TD == 0) return false;
|
||||
|
||||
|
@ -487,7 +487,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
|||
// happen to be using a load-store pair to implement it, rather than
|
||||
// a memcpy.
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
|
||||
if (!LI->isVolatile() && LI->hasOneUse() &&
|
||||
if (LI->isSimple() && LI->hasOneUse() &&
|
||||
LI->getParent() == SI->getParent()) {
|
||||
MemDepResult ldep = MD->getDependency(LI);
|
||||
CallInst *C = 0;
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
; RUN: opt -basicaa -dse -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
; Sanity tests for atomic stores.
|
||||
; Note that it turns out essentially every transformation DSE does is legal on
|
||||
; atomic ops, just some transformations are not allowed across them.
|
||||
|
||||
@x = common global i32 0, align 4
|
||||
@y = common global i32 0, align 4
|
||||
|
||||
declare void @randomop(i32*)
|
||||
|
||||
; DSE across unordered store (allowed)
|
||||
define void @test1() nounwind uwtable ssp {
|
||||
; CHECK: test1
|
||||
; CHECK-NOT: store i32 0
|
||||
; CHECK: store i32 1
|
||||
entry:
|
||||
store i32 0, i32* @x
|
||||
store atomic i32 0, i32* @y unordered, align 4
|
||||
store i32 1, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE across seq_cst load (allowed in theory; not implemented ATM)
|
||||
define i32 @test2() nounwind uwtable ssp {
|
||||
; CHECK: test2
|
||||
; CHECK: store i32 0
|
||||
; CHECK: store i32 1
|
||||
entry:
|
||||
store i32 0, i32* @x
|
||||
%x = load atomic i32* @y seq_cst, align 4
|
||||
store i32 1, i32* @x
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
; DSE across seq_cst store (store before atomic store must not be removed)
|
||||
define void @test3() nounwind uwtable ssp {
|
||||
; CHECK: test3
|
||||
; CHECK: store i32
|
||||
; CHECK: store atomic i32 2
|
||||
entry:
|
||||
store i32 0, i32* @x
|
||||
store atomic i32 2, i32* @y seq_cst, align 4
|
||||
store i32 1, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE remove unordered store (allowed)
|
||||
define void @test4() nounwind uwtable ssp {
|
||||
; CHECK: test4
|
||||
; CHECK-NOT: store atomic
|
||||
; CHECK: store i32 1
|
||||
entry:
|
||||
store atomic i32 0, i32* @x unordered, align 4
|
||||
store i32 1, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE unordered store overwriting non-atomic store (allowed)
|
||||
define void @test5() nounwind uwtable ssp {
|
||||
; CHECK: test5
|
||||
; CHECK: store atomic i32 1
|
||||
entry:
|
||||
store i32 0, i32* @x
|
||||
store atomic i32 1, i32* @x unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE no-op unordered atomic store (allowed)
|
||||
define void @test6() nounwind uwtable ssp {
|
||||
; CHECK: test6
|
||||
; CHECK-NOT: store
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
%x = load atomic i32* @x unordered, align 4
|
||||
store atomic i32 %x, i32* @x unordered, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
|
||||
; to reason about atomic operations).
|
||||
define void @test7() nounwind uwtable ssp {
|
||||
; CHECK: test7
|
||||
; CHECK: store atomic
|
||||
entry:
|
||||
%a = alloca i32
|
||||
store atomic i32 0, i32* %a seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
|
||||
; to reason about atomic operations).
|
||||
define i32 @test8() nounwind uwtable ssp {
|
||||
; CHECK: test8
|
||||
; CHECK: store
|
||||
; CHECK: load atomic
|
||||
entry:
|
||||
%a = alloca i32
|
||||
call void @randomop(i32* %a)
|
||||
store i32 0, i32* %a, align 4
|
||||
%x = load atomic i32* @x seq_cst, align 4
|
||||
ret i32 %x
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
@x = common global i32 0, align 4
|
||||
@y = common global i32 0, align 4
|
||||
|
||||
; GVN across unordered store (allowed)
|
||||
define i32 @test1() nounwind uwtable ssp {
|
||||
; CHECK: test1
|
||||
; CHECK: add i32 %x, %x
|
||||
entry:
|
||||
%x = load i32* @y
|
||||
store atomic i32 %x, i32* @x unordered, align 4
|
||||
%y = load i32* @y
|
||||
%z = add i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
; GVN across seq_cst store (allowed in theory; not implemented ATM)
|
||||
define i32 @test2() nounwind uwtable ssp {
|
||||
; CHECK: test2
|
||||
; CHECK: add i32 %x, %y
|
||||
entry:
|
||||
%x = load i32* @y
|
||||
store atomic i32 %x, i32* @x seq_cst, align 4
|
||||
%y = load i32* @y
|
||||
%z = add i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
; GVN across unordered load (allowed)
|
||||
define i32 @test3() nounwind uwtable ssp {
|
||||
; CHECK: test3
|
||||
; CHECK: add i32 %x, %x
|
||||
entry:
|
||||
%x = load i32* @y
|
||||
%y = load atomic i32* @x unordered, align 4
|
||||
%z = load i32* @y
|
||||
%a = add i32 %x, %z
|
||||
%b = add i32 %y, %a
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
; GVN across acquire load (load after atomic load must not be removed)
|
||||
define i32 @test4() nounwind uwtable ssp {
|
||||
; CHECK: test4
|
||||
; CHECK: load atomic i32* @x
|
||||
; CHECK: load i32* @y
|
||||
entry:
|
||||
%x = load i32* @y
|
||||
%y = load atomic i32* @x seq_cst, align 4
|
||||
%x2 = load i32* @y
|
||||
%x3 = add i32 %x, %x2
|
||||
%y2 = add i32 %y, %x3
|
||||
ret i32 %y2
|
||||
}
|
||||
|
||||
; GVN load to unordered load (allowed)
|
||||
define i32 @test5() nounwind uwtable ssp {
|
||||
; CHECK: test5
|
||||
; CHECK: add i32 %x, %x
|
||||
entry:
|
||||
%x = load atomic i32* @x unordered, align 4
|
||||
%y = load i32* @x
|
||||
%z = add i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
; GVN unordered load to load (unordered load must not be removed)
|
||||
define i32 @test6() nounwind uwtable ssp {
|
||||
; CHECK: test6
|
||||
; CHECK: load atomic i32* @x unordered
|
||||
entry:
|
||||
%x = load i32* @x
|
||||
%x2 = load atomic i32* @x unordered, align 4
|
||||
%x3 = add i32 %x, %x2
|
||||
ret i32 %x3
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
@x = global i32 0
|
||||
|
||||
declare void @otherf(i32*)
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
||||
|
||||
; memcpyopt should not touch atomic ops
|
||||
define void @test1() nounwind uwtable ssp {
|
||||
; CHECK: test1
|
||||
; CHECK: store atomic
|
||||
%x = alloca [101 x i32], align 16
|
||||
%bc = bitcast [101 x i32]* %x to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false)
|
||||
%gep1 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 100
|
||||
store atomic i32 0, i32* %gep1 unordered, align 4
|
||||
%gep2 = getelementptr inbounds [101 x i32]* %x, i32 0, i32 0
|
||||
call void @otherf(i32* %gep2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; memcpyopt across unordered store
|
||||
define void @test2() nounwind uwtable ssp {
|
||||
; CHECK: test2
|
||||
; CHECK: call
|
||||
; CHECK-NEXT: store atomic
|
||||
; CHECK-NEXT: call
|
||||
%old = alloca i32
|
||||
%new = alloca i32
|
||||
call void @otherf(i32* nocapture %old)
|
||||
store atomic i32 0, i32* @x unordered, align 4
|
||||
%v = load i32* %old
|
||||
store i32 %v, i32* %new
|
||||
call void @otherf(i32* nocapture %new)
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue