forked from OSchip/llvm-project
[MemDep] Handle gep with zeros for invariant.group
Summary: gep 0, 0 is equivalent to bitcast. LLVM canonicalizes it to getelementptr because it make SROA can then handle it. Simple case like void g(A &a) { z(a); if (glob) a.foo(); } void testG() { A a; g(a); } was not devirtualized with -fstrict-vtable-pointers because luck of handling for gep 0 in Memory Dependence Analysis Reviewers: dberlin, nlewycky, chandlerc Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28126 llvm-svn: 290763
This commit is contained in:
parent
20dfba0d70
commit
da36215017
|
@ -4904,7 +4904,8 @@ The existence of the ``invariant.group`` metadata on the instruction tells
|
|||
the optimizer that every ``load`` and ``store`` to the same pointer operand
|
||||
within the same invariant group can be assumed to load or store the same
|
||||
value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
|
||||
when two pointers are considered the same).
|
||||
when two pointers are considered the same). Pointers returned by bitcast or
|
||||
getelementptr with only zero indices are considered the same.
|
||||
|
||||
Examples:
|
||||
|
||||
|
|
|
@ -339,43 +339,62 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
|
|||
MemDepResult
|
||||
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
|
||||
BasicBlock *BB) {
|
||||
|
||||
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
|
||||
if (!InvariantGroupMD)
|
||||
return MemDepResult::getUnknown();
|
||||
|
||||
Value *LoadOperand = LI->getPointerOperand();
|
||||
// It's is not safe to walk the use list of global value, because function
|
||||
// passes aren't allowed to look outside their functions.
|
||||
if (isa<GlobalValue>(LoadOperand))
|
||||
return MemDepResult::getUnknown();
|
||||
|
||||
auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
|
||||
if (!InvariantGroupMD)
|
||||
return MemDepResult::getUnknown();
|
||||
|
||||
SmallSet<Value *, 14> Seen;
|
||||
// Queue to process all pointers that are equivalent to load operand.
|
||||
SmallVector<Value *, 8> LoadOperandsQueue;
|
||||
LoadOperandsQueue.push_back(LoadOperand);
|
||||
Seen.insert(LoadOperand);
|
||||
SmallVector<const Value *, 8> LoadOperandsQueue;
|
||||
SmallSet<const Value *, 14> SeenValues;
|
||||
auto TryInsertToQueue = [&](Value *V) {
|
||||
if (SeenValues.insert(V).second)
|
||||
LoadOperandsQueue.push_back(V);
|
||||
};
|
||||
|
||||
TryInsertToQueue(LoadOperand);
|
||||
while (!LoadOperandsQueue.empty()) {
|
||||
Value *Ptr = LoadOperandsQueue.pop_back_val();
|
||||
const Value *Ptr = LoadOperandsQueue.pop_back_val();
|
||||
assert(Ptr);
|
||||
if (isa<GlobalValue>(Ptr))
|
||||
continue;
|
||||
|
||||
if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
|
||||
if (Seen.insert(BCI->getOperand(0)).second) {
|
||||
LoadOperandsQueue.push_back(BCI->getOperand(0));
|
||||
}
|
||||
}
|
||||
// Value comes from bitcast: Ptr = bitcast x. Insert x.
|
||||
if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
|
||||
TryInsertToQueue(BCI->getOperand(0));
|
||||
// Gep with zeros is equivalent to bitcast.
|
||||
// FIXME: we are not sure if some bitcast should be canonicalized to gep 0
|
||||
// or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
|
||||
// pointers will be upstream then both cases will be gone (and this BFS
|
||||
// also won't be needed).
|
||||
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
|
||||
if (GEP->hasAllZeroIndices())
|
||||
TryInsertToQueue(GEP->getOperand(0));
|
||||
|
||||
for (Use &Us : Ptr->uses()) {
|
||||
for (const Use &Us : Ptr->uses()) {
|
||||
auto *U = dyn_cast<Instruction>(Us.getUser());
|
||||
if (!U || U == LI || !DT.dominates(U, LI))
|
||||
continue;
|
||||
|
||||
if (auto *BCI = dyn_cast<BitCastInst>(U)) {
|
||||
if (Seen.insert(BCI).second) {
|
||||
LoadOperandsQueue.push_back(BCI);
|
||||
}
|
||||
// Bitcast or gep with zeros are using Ptr. Add to queue to check it's
|
||||
// users. U = bitcast Ptr
|
||||
if (isa<BitCastInst>(U)) {
|
||||
TryInsertToQueue(U);
|
||||
continue;
|
||||
}
|
||||
// U = getelementptr Ptr, 0, 0...
|
||||
if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
|
||||
if (GEP->hasAllZeroIndices()) {
|
||||
TryInsertToQueue(U);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we hit load/store with the same invariant.group metadata (and the
|
||||
// same pointer operand) we can assume that value pointed by pointer
|
||||
// operand didn't change.
|
||||
|
|
|
@ -319,6 +319,31 @@ entry:
|
|||
ret i8 %d
|
||||
}
|
||||
|
||||
; This test checks if invariant.group understands gep with zeros
|
||||
; CHECK-LABEL: define void @testGEP0() {
|
||||
define void @testGEP0() {
|
||||
%a = alloca %struct.A, align 8
|
||||
%1 = bitcast %struct.A* %a to i8*
|
||||
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
|
||||
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
|
||||
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a)
|
||||
call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a) ; This call may change vptr
|
||||
%3 = load i8, i8* @unknownPtr, align 4
|
||||
%4 = icmp eq i8 %3, 0
|
||||
br i1 %4, label %_Z1gR1A.exit, label %5
|
||||
|
||||
; This should be devirtualized by invariant.group
|
||||
%6 = bitcast %struct.A* %a to void (%struct.A*)***
|
||||
%7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0
|
||||
%8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8
|
||||
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
|
||||
call void %8(%struct.A* nonnull %a)
|
||||
br label %_Z1gR1A.exit
|
||||
|
||||
_Z1gR1A.exit: ; preds = %0, %5
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i8*)
|
||||
declare void @bar(i8)
|
||||
declare i8* @getPointer(i8*)
|
||||
|
|
|
@ -320,6 +320,31 @@ entry:
|
|||
ret i8 %d
|
||||
}
|
||||
|
||||
; This test checks if invariant.group understands gep with zeros
|
||||
; CHECK-LABEL: define void @testGEP0() {
|
||||
define void @testGEP0() {
|
||||
%a = alloca %struct.A, align 8
|
||||
%1 = bitcast %struct.A* %a to i8*
|
||||
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
|
||||
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
|
||||
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a)
|
||||
call void @_ZN1A3fooEv(%struct.A* nonnull dereferenceable(8) %a) ; This call may change vptr
|
||||
%3 = load i8, i8* @unknownPtr, align 4
|
||||
%4 = icmp eq i8 %3, 0
|
||||
br i1 %4, label %_Z1gR1A.exit, label %5
|
||||
|
||||
; This should be devirtualized by invariant.group
|
||||
%6 = bitcast %struct.A* %a to void (%struct.A*)***
|
||||
%7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0
|
||||
%8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8
|
||||
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
|
||||
call void %8(%struct.A* nonnull %a)
|
||||
br label %_Z1gR1A.exit
|
||||
|
||||
_Z1gR1A.exit: ; preds = %0, %5
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo(i8*)
|
||||
declare void @bar(i8)
|
||||
declare i8* @getPointer(i8*)
|
||||
|
|
Loading…
Reference in New Issue