[MemDep] Handle gep with zeros for invariant.group

Summary:
gep 0, 0 is equivalent to bitcast. LLVM canonicalizes it
to getelementptr because it make SROA can then handle it.

Simple case like

    void g(A &a) {
        z(a);
        if (glob)
            a.foo();
    }
    void testG() {
        A a;
        g(a);
    }

was not devirtualized with -fstrict-vtable-pointers because luck of
handling for gep 0 in Memory Dependence Analysis

Reviewers: dberlin, nlewycky, chandlerc

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D28126

llvm-svn: 290763
This commit is contained in:
Piotr Padlewski 2016-12-30 18:45:07 +00:00
parent 20dfba0d70
commit da36215017
4 changed files with 90 additions and 20 deletions

View File

@ -4904,7 +4904,8 @@ The existence of the ``invariant.group`` metadata on the instruction tells
the optimizer that every ``load`` and ``store`` to the same pointer operand the optimizer that every ``load`` and ``store`` to the same pointer operand
within the same invariant group can be assumed to load or store the same within the same invariant group can be assumed to load or store the same
value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
when two pointers are considered the same). when two pointers are considered the same). Pointers returned by bitcast or
getelementptr with only zero indices are considered the same.
Examples: Examples:

View File

@ -339,43 +339,62 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
MemDepResult MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
BasicBlock *BB) { BasicBlock *BB) {
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
if (!InvariantGroupMD)
return MemDepResult::getUnknown();
Value *LoadOperand = LI->getPointerOperand(); Value *LoadOperand = LI->getPointerOperand();
// It's is not safe to walk the use list of global value, because function // It's is not safe to walk the use list of global value, because function
// passes aren't allowed to look outside their functions. // passes aren't allowed to look outside their functions.
if (isa<GlobalValue>(LoadOperand)) if (isa<GlobalValue>(LoadOperand))
return MemDepResult::getUnknown(); return MemDepResult::getUnknown();
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
if (!InvariantGroupMD)
return MemDepResult::getUnknown();
SmallSet<Value *, 14> Seen;
// Queue to process all pointers that are equivalent to load operand. // Queue to process all pointers that are equivalent to load operand.
SmallVector<Value *, 8> LoadOperandsQueue; SmallVector<const Value *, 8> LoadOperandsQueue;
LoadOperandsQueue.push_back(LoadOperand); SmallSet<const Value *, 14> SeenValues;
Seen.insert(LoadOperand); auto TryInsertToQueue = [&](Value *V) {
if (SeenValues.insert(V).second)
LoadOperandsQueue.push_back(V);
};
TryInsertToQueue(LoadOperand);
while (!LoadOperandsQueue.empty()) { while (!LoadOperandsQueue.empty()) {
Value *Ptr = LoadOperandsQueue.pop_back_val(); const Value *Ptr = LoadOperandsQueue.pop_back_val();
assert(Ptr);
if (isa<GlobalValue>(Ptr)) if (isa<GlobalValue>(Ptr))
continue; continue;
if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) { // Value comes from bitcast: Ptr = bitcast x. Insert x.
if (Seen.insert(BCI->getOperand(0)).second) { if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
LoadOperandsQueue.push_back(BCI->getOperand(0)); TryInsertToQueue(BCI->getOperand(0));
} // Gep with zeros is equivalent to bitcast.
} // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
// or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
// pointers will be upstream then both cases will be gone (and this BFS
// also won't be needed).
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
if (GEP->hasAllZeroIndices())
TryInsertToQueue(GEP->getOperand(0));
for (Use &Us : Ptr->uses()) { for (const Use &Us : Ptr->uses()) {
auto *U = dyn_cast<Instruction>(Us.getUser()); auto *U = dyn_cast<Instruction>(Us.getUser());
if (!U || U == LI || !DT.dominates(U, LI)) if (!U || U == LI || !DT.dominates(U, LI))
continue; continue;
if (auto *BCI = dyn_cast<BitCastInst>(U)) { // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
if (Seen.insert(BCI).second) { // users. U = bitcast Ptr
LoadOperandsQueue.push_back(BCI); if (isa<BitCastInst>(U)) {
} TryInsertToQueue(U);
continue; continue;
} }
// U = getelementptr Ptr, 0, 0...
if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
if (GEP->hasAllZeroIndices()) {
TryInsertToQueue(U);
continue;
}
// If we hit load/store with the same invariant.group metadata (and the // If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer // same pointer operand) we can assume that value pointed by pointer
// operand didn't change. // operand didn't change.

View File

@ -319,6 +319,31 @@ entry:
ret i8 %d ret i8 %d
} }
; This test checks if invariant.group understands gep with zeros
; CHECK-LABEL: define void @testGEP0() {
define void @testGEP0() {
%a = alloca %struct.A, align 8
%1 = bitcast %struct.A* %a to i8*
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a)
call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a) ; This call may change vptr
%3 = load i8, i8* @unknownPtr, align 4
%4 = icmp eq i8 %3, 0
br i1 %4, label %_Z1gR1A.exit, label %5
; This should be devirtualized by invariant.group
%6 = bitcast %struct.A* %a to void (%struct.A*)***
%7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0
%8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
call void %8(%struct.A* nonnull %a)
br label %_Z1gR1A.exit
_Z1gR1A.exit: ; preds = %0, %5
ret void
}
declare void @foo(i8*) declare void @foo(i8*)
declare void @bar(i8) declare void @bar(i8)
declare i8* @getPointer(i8*) declare i8* @getPointer(i8*)

View File

@ -320,6 +320,31 @@ entry:
ret i8 %d ret i8 %d
} }
; This test checks if invariant.group understands gep with zeros
; CHECK-LABEL: define void @testGEP0() {
define void @testGEP0() {
%a = alloca %struct.A, align 8
%1 = bitcast %struct.A* %a to i8*
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a)
call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a) ; This call may change vptr
%3 = load i8, i8* @unknownPtr, align 4
%4 = icmp eq i8 %3, 0
br i1 %4, label %_Z1gR1A.exit, label %5
; This should be devirtualized by invariant.group
%6 = bitcast %struct.A* %a to void (%struct.A*)***
%7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0
%8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
call void %8(%struct.A* nonnull %a)
br label %_Z1gR1A.exit
_Z1gR1A.exit: ; preds = %0, %5
ret void
}
declare void @foo(i8*) declare void @foo(i8*)
declare void @bar(i8) declare void @bar(i8)
declare i8* @getPointer(i8*) declare i8* @getPointer(i8*)