[GVN] Do PHI translations across all edges between the load and the unavailable pred.

Currently we do not properly translate addresses with PHIs if LoadBB !=
LI->getParent(), because PHITranslateAddr expects a direct predecessor as argument,
because it considers all instructions outside of the current block to
not requiring translation.

The amount of cases that trigger this should be very low, as most single
predecessor blocks should be folded into their predecessor by GVN before
we actually start with value numbering. It is still not guaranteed to
happen, so we should do PHI translation along all edges between the
loads' block and the predecessor where we have to place a load.

There are a few test cases showing current limits of the PHI translation, which
could be improved later.

Reviewers: spatel, reames, efriedma, john.brawn

Reviewed By: efriedma

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65020

llvm-svn: 369570
This commit is contained in:
Florian Hahn 2019-08-21 20:06:50 +00:00
parent 6a29ff1754
commit b5e52bfd83
2 changed files with 150 additions and 6 deletions

View File

@ -1164,15 +1164,30 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
// We do the translation for each edge we skipped by going from LI's block
// to LoadBB, otherwise we might miss pieces needing translation.
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), DL, AC);
Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
Value *LoadPtr = LI->getPointerOperand();
BasicBlock *Cur = LI->getParent();
while (Cur != LoadBB) {
PHITransAddr Address(LoadPtr, DL, AC);
LoadPtr = Address.PHITranslateWithInsertion(
Cur, Cur->getSinglePredecessor(), *DT, NewInsts);
if (!LoadPtr) {
CanDoPRE = false;
break;
}
Cur = Cur->getSinglePredecessor();
}
if (LoadPtr) {
PHITransAddr Address(LoadPtr, DL, AC);
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT,
NewInsts);
}
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
if (!LoadPtr) {
@ -1187,8 +1202,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (!CanDoPRE) {
while (!NewInsts.empty()) {
Instruction *I = NewInsts.pop_back_val();
markInstructionForDeletion(I);
// Erase instructions generated by the failed PHI translation before
// trying to number them. PHI translation might insert instructions
// in basic blocks other than the current one, and we delete them
// directly, as markInstructionForDeletion only allows removing from the
// current basic block.
NewInsts.pop_back_val()->eraseFromParent();
}
// HINT: Don't revert the edge-splitting as following transformation may
// also need to split these critical edges.

View File

@ -546,6 +546,130 @@ out:
ret i8 %R
}
declare void @use_i32(i32) readonly
; indirectbr currently prevents MergeBlockIntoPredecessor from merging latch
; into header. Make sure we translate the address for %l1 correctly where
; parts of the address computations are in different basic blocks.
define i32 @phi_trans6(i32* noalias nocapture readonly %x, i1 %cond) {
; CHECK-LABEL: define i32 @phi_trans6(
; CHECK-LABEL: entry:
; CHECK-NEXT: %l0 = load i32, i32* %x
;
; CHECK-LABEL: header:
; CHECK-NEXT: %l1 = phi i32 [ %l0, %entry ], [ %l1.pre, %latch.header_crit_edge ]
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %latch.header_crit_edge ]
; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans6, %latch), [label %latch]
;
; CHECK-LABEL: latch:
; CHECK-NEXT: %iv.next = add i32 %iv, 1
; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge
;
; CHECK-LABEL: latch.header_crit_edge:
; CHECK-NEXT: %gep.1.phi.trans.insert.phi.trans.insert = getelementptr i32, i32* %x, i32 %iv.next
; CHECK-NEXT: %l1.pre = load i32, i32* %gep.1.phi.trans.insert.phi.trans.insert
; CHECK-LABEL: br label %header
;
entry:
%l0 = load i32, i32* %x
call void @use_i32(i32 %l0)
br label %header
header:
%iv = phi i32 [0, %entry], [ %iv.next, %latch]
indirectbr i8* blockaddress(@phi_trans6, %latch), [label %latch]
latch:
%gep.1 = getelementptr i32, i32* %x, i32 %iv
%l1 = load i32, i32* %gep.1
%iv.next = add i32 %iv, 1
br i1 %cond, label %exit, label %header
exit:
ret i32 %l1
}
; FIXME: Currently we fail to translate the PHI in this case.
define i32 @phi_trans7(i32* noalias nocapture readonly %x, i1 %cond) {
; CHECK-LABEL: define i32 @phi_trans7(
; CHECK-LABEL: entry:
; CHECK-NEXT: %l0 = load i32, i32* %x
;
; CHECK-LABEL: header:
; CHECK-NEXT: %iv = phi i32 [ 2, %entry ], [ %iv.next, %latch.header_crit_edge ]
; CHECK-NEXT: %offset = add i32 %iv, -2
; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans7, %latch), [label %latch]
;
; CHECK-LABEL: latch:
; CHECK-NEXT: %gep.1 = getelementptr i32, i32* %x, i32 %offset
; CHECK-NEXT: %l1 = load i32, i32* %gep.1
; CHECK-NEXT: %iv.next = add i32 %iv, 1
; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge
;
; CHECK-LABEL: latch.header_crit_edge:
; CHECK-LABEL: br label %header
;
entry:
%l0 = load i32, i32* %x
call void @use_i32(i32 %l0)
br label %header
header:
%iv = phi i32 [2, %entry], [ %iv.next, %latch]
%offset = add i32 %iv, -2
indirectbr i8* blockaddress(@phi_trans7, %latch), [label %latch]
latch:
%gep.1 = getelementptr i32, i32* %x, i32 %offset
%l1 = load i32, i32* %gep.1
%iv.next = add i32 %iv, 1
br i1 %cond, label %exit, label %header
exit:
ret i32 %l1
}
; FIXME: Currently we fail to translate the PHI in this case.
define i32 @phi_trans8(i32* noalias nocapture readonly %x, i1 %cond) {
; CHECK-LABEL: define i32 @phi_trans8(
; CHECK-LABEL: entry:
; CHECK-NEXT: %l0 = load i32, i32* %x
;
; CHECK-LABEL: header:
; CHECK-NEXT: %iv = phi i32 [ 2, %entry ], [ %iv.next, %latch.header_crit_edge ]
; CHECK-NEXT: indirectbr i8* blockaddress(@phi_trans8, %latch), [label %latch]
;
; CHECK-LABEL: latch:
; CHECK-NEXT: %offset = add i32 %iv, -2
; CHECK-NEXT: %gep.1 = getelementptr i32, i32* %x, i32 %offset
; CHECK-NEXT: %l1 = load i32, i32* %gep.1
; CHECK-NEXT: %iv.next = add i32 %iv, 1
; CHECK-NEXT: br i1 %cond, label %exit, label %latch.header_crit_edge
;
; CHECK-LABEL: latch.header_crit_edge:
; CHECK-LABEL: br label %header
;
entry:
%l0 = load i32, i32* %x
call void @use_i32(i32 %l0)
br label %header
header:
%iv = phi i32 [2, %entry], [ %iv.next, %latch]
indirectbr i8* blockaddress(@phi_trans8, %latch), [label %latch]
latch:
%offset = add i32 %iv, -2
%gep.1 = getelementptr i32, i32* %x, i32 %offset
%l1 = load i32, i32* %gep.1
%iv.next = add i32 %iv, 1
br i1 %cond, label %exit, label %header
exit:
ret i32 %l1
}
; PR6642
define i32 @memset_to_load() nounwind readnone {
@ -661,6 +785,7 @@ entry:
; CHECK: ret i32
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind