Teach the load analysis driving core instcombine logic and other bits of

logic to look through pointer casts, making them trivially stronger in
the face of loads and stores with intervening pointer casts.

I've included a few test cases that demonstrate the kind of folding
instcombine can do without pointer casts and then variations which
obfuscate the logic through bitcasts. Without this patch, the variations
all fail to optimize fully.

This is more important now than it has been in the past as I've started
moving the load canonicialization to more closely follow the value type
requirements rather than the pointer type requirements and thus this
needs to be prepared for more pointer casts. When I made the same change
to stores several test cases regressed without logic along these lines
so I wanted to systematically improve matters first.

llvm-svn: 220178
This commit is contained in:
Chandler Carruth 2014-10-20 00:24:14 +00:00
parent 25d50758f3
commit eeec35ae1c
4 changed files with 164 additions and 23 deletions

View File

@ -86,6 +86,9 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
}
}
PointerType *AddrTy = cast<PointerType>(V->getType());
uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
// If we found a base allocated type from either an alloca or global variable,
// try to see if we are definitively within the allocated region. We need to
// know the size of the base type and the loaded type to do anything in this
@ -96,8 +99,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
PointerType *AddrTy = cast<PointerType>(V->getType());
uint64_t LoadSize = DL->getTypeStoreSize(AddrTy->getElementType());
if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
@ -111,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// the load entirely).
BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
// We can at least always strip pointer casts even though we can't use the
// base here.
V = V->stripPointerCasts();
while (BBI != E) {
--BBI;
@ -120,13 +125,25 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
!isa<DbgInfoIntrinsic>(BBI))
return false;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
if (AreEquivalentAddressValues(LI->getOperand(0), V))
return true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
if (AreEquivalentAddressValues(SI->getOperand(1), V))
return true;
}
Value *AccessedPtr;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
AccessedPtr = LI->getPointerOperand();
else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
AccessedPtr = SI->getPointerOperand();
else
continue;
// Handle trivial cases even w/o DataLayout or other work.
if (AccessedPtr == V)
return true;
if (!DL)
continue;
auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
return true;
}
return false;
}
@ -157,12 +174,12 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
// If we're using alias analysis to disambiguate get the size of *Ptr.
uint64_t AccessSize = 0;
if (AA) {
Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
AccessSize = AA->getTypeStoreSize(AccessTy);
}
uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0;
Value *StrippedPtr = Ptr->stripPointerCasts();
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
@ -183,17 +200,21 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
if (AreEquivalentAddressValues(
LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
CastInst::isBitCastable(LI->getType(), AccessTy)) {
if (AATags)
LI->getAAMetadata(*AATags);
return LI;
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) {
if (AATags)
SI->getAAMetadata(*AATags);
return SI->getOperand(0);
@ -202,15 +223,15 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If Ptr is an alloca and this is a store to a different alloca, ignore
// the store. This is a trivial form of alias analysis that is important
// for reg2mem'd code.
if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
(isa<AllocaInst>(SI->getOperand(1)) ||
isa<GlobalVariable>(SI->getOperand(1))))
if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
(isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)))
continue;
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
if (AA &&
(AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
(AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
AliasAnalysis::Mod) == 0)
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
@ -223,7 +244,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
(AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
(AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
AliasAnalysis::Mod) == 0)
continue;
// May modify the pointer, bail out.

View File

@ -420,7 +420,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
return ReplaceInstUsesWith(LI, AvailableVal);
return ReplaceInstUsesWith(
LI, Builder->CreateBitCast(AvailableVal, LI.getType()));
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {

View File

@ -901,6 +901,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
if (AvailableVal->getType() != LI->getType())
AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal,
LI->getType(), "", LI);
LI->replaceAllUsesWith(AvailableVal);
LI->eraseFromParent();
return true;
@ -1031,7 +1034,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
PN->addIncoming(I->second, I->first);
// If we have an available predecessor but it requires casting, insert the
// cast in the predecessor and use the cast.
Value *PredV = I->second;
if (PredV->getType() != LI->getType())
PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "", P);
PN->addIncoming(PredV, I->first);
}
//cerr << "PRE: " << *LI << *PN << "\n";

View File

@ -1276,3 +1276,112 @@ define i32 @test77(i1 %flag, i32* %x) {
%v = load i32* %p
ret i32 %v
}
define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
; CHECK-LABEL: @test78(
; CHECK: %[[V1:.*]] = load i32* %x
; CHECK-NEXT: %[[V2:.*]] = load i32* %y
; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
; CHECK-NEXT: ret i32 %[[S]]
entry:
store i32 0, i32* %x
store i32 0, i32* %y
; Block forwarding by storing to %z which could alias either %x or %y.
store i32 42, i32* %z
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32* %p
ret i32 %v
}
define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
; CHECK-LABEL: @test79(
; CHECK: %[[V1:.*]] = load float* %x
; CHECK-NEXT: %[[V2:.*]] = load float* %y
; CHECK-NEXT: %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
; CHECK-NEXT: ret float %[[S]]
entry:
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
store i32 0, i32* %x1
store i32 0, i32* %y
; Block forwarding by storing to %z which could alias either %x or %y.
store i32 42, i32* %z
%p = select i1 %flag, float* %x, float* %y1
%v = load float* %p
ret float %v
}
define i32 @test80(i1 %flag) {
; Test that when we speculate the loads around the select they fold throug
; load->load folding and load->store folding.
; CHECK-LABEL: @test80(
; CHECK: %[[X:.*]] = alloca i32
; CHECK-NEXT: %[[Y:.*]] = alloca i32
; CHECK: %[[V:.*]] = load i32* %[[X]]
; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
; CHECK-NEXT: ret i32 %[[V]]
entry:
%x = alloca i32
%y = alloca i32
call void @scribble_on_memory(i32* %x)
call void @scribble_on_memory(i32* %y)
%tmp = load i32* %x
store i32 %tmp, i32* %y
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32* %p
ret i32 %v
}
define float @test81(i1 %flag) {
; Test that we can speculate the load around the select even though they use
; differently typed pointers.
; CHECK-LABEL: @test81(
; CHECK: %[[X:.*]] = alloca i32
; CHECK-NEXT: %[[Y:.*]] = alloca i32
; CHECK: %[[V:.*]] = load i32* %[[X]]
; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
; CHECK-NEXT: %[[C:.*]] = bitcast i32 %[[V]] to float
; CHECK-NEXT: ret float %[[C]]
entry:
%x = alloca float
%y = alloca i32
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
call void @scribble_on_memory(i32* %x1)
call void @scribble_on_memory(i32* %y)
%tmp = load i32* %x1
store i32 %tmp, i32* %y
%p = select i1 %flag, float* %x, float* %y1
%v = load float* %p
ret float %v
}
define i32 @test82(i1 %flag) {
; Test that we can speculate the load around the select even though they use
; differently typed pointers.
; CHECK-LABEL: @test82(
; CHECK: %[[X:.*]] = alloca float
; CHECK-NEXT: %[[Y:.*]] = alloca i32
; CHECK-NEXT: %[[X1:.*]] = bitcast float* %[[X]] to i32*
; CHECK-NEXT: %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
; CHECK: %[[V:.*]] = load float* %[[X]]
; CHECK-NEXT: store float %[[V]], float* %[[Y1]]
; CHECK-NEXT: %[[C:.*]] = bitcast float %[[V]] to i32
; CHECK-NEXT: ret i32 %[[C]]
entry:
%x = alloca float
%y = alloca i32
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
call void @scribble_on_memory(i32* %x1)
call void @scribble_on_memory(i32* %y)
%tmp = load float* %x
store float %tmp, float* %y1
%p = select i1 %flag, i32* %x1, i32* %y
%v = load i32* %p
ret i32 %v
}