[LoopPeel] Peel loops with exits followed by an unreachable or deopt block

Added support for peeling loops with exits that are followed either by an
unreachable-terminated block or block that has a terminatnig deoptimize call.
All blocks in the sequence must have an unique successor, maybe except
for the last one.

Reviewed By: mkazantsev

Differential Revision: https://reviews.llvm.org/D110922
This commit is contained in:
Dmitry Makogon 2021-11-02 23:09:37 +07:00
parent e2024d72fa
commit e09958d5eb
4 changed files with 71 additions and 15 deletions

View File

@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
/// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
void ReplaceInstWithInst(Instruction *From, Instruction *To);
/// Check if we can prove that all paths starting from this block converge
/// to a block that either has a @llvm.experimental.deoptimize call
/// prior to its terminating return instruction or is terminated by unreachable.
/// All blocks in the traversed sequence must have an unique successor, maybe
/// except for the last one.
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
/// Option class for critical edge splitting.
///
/// This provides a builder interface for overriding the default options used

View File

@ -39,6 +39,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@ -52,6 +53,12 @@ using namespace llvm;
#define DEBUG_TYPE "basicblock-utils"
static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
"max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden,
cl::desc("Set the maximum path length when checking whether a basic block "
"is followed by a block that either has a terminating "
"deoptimizing call or is terminated with an unreachable"));
void llvm::DetatchDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@ -485,6 +492,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BI = New;
}
bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
// Remember visited blocks to avoid infinite loop
SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
unsigned Depth = 0;
while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth &&
VisitedBlocks.insert(BB).second) {
if (BB->getTerminatingDeoptimizeCall() ||
isa<UnreachableInst>(BB->getTerminator()))
return true;
BB = BB->getUniqueSuccessor();
}
return false;
}
void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
BasicBlock::iterator BI(From);
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);

View File

@ -103,15 +103,15 @@ bool llvm::canPeel(Loop *L) {
SmallVector<BasicBlock *, 4> Exits;
L->getUniqueNonLatchExitBlocks(Exits);
// The latch must either be the only exiting block or all non-latch exit
// blocks have either a deopt or unreachable terminator. Both deopt and
// unreachable terminators are a strong indication they are not taken. Note
// that this is a profitability check, not a legality check. Also note that
// LoopPeeling currently can only update the branch weights of latch blocks
// and branch weights to blocks with deopt or unreachable do not need
// blocks have either a deopt or unreachable terminator or compose a chain of
// blocks where the last one is either deopt or unreachable terminated. Both
// deopt and unreachable terminators are a strong indication they are not
// taken. Note that this is a profitability check, not a legality check. Also
// note that LoopPeeling currently can only update the branch weights of latch
// blocks and branch weights to blocks with deopt or unreachable do not need
// updating.
return all_of(Exits, [](const BasicBlock *BB) {
return BB->getTerminatingDeoptimizeCall() ||
isa<UnreachableInst>(BB->getTerminator());
return IsBlockFollowedByDeoptOrUnreachable(BB);
});
}

View File

@ -193,28 +193,56 @@ unreachable.exit:
define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) {
; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]]
; CHECK: loop.header.peel.begin:
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]]
; CHECK: loop.header.peel:
; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2
; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]]
; CHECK: else.peel:
; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]]
; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]]
; CHECK: then.peel:
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]]
; CHECK: loop.latch.peel:
; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ]
; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1
; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4
; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
; CHECK: loop.header.peel.next:
; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
; CHECK: loop.header.peel.next1:
; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
; CHECK: entry.peel.newph:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2
; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]]
; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
; CHECK: else:
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]]
; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]]
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]]
; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]]
; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit.loopexit:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: exit.1.loopexit:
; CHECK-NEXT: br label [[EXIT_1]]
; CHECK: exit.1:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[UNREACHABLE_TERM:%.*]]
; CHECK: exit.2.loopexit:
; CHECK-NEXT: br label [[EXIT_2]]
; CHECK: exit.2:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label [[UNREACHABLE_TERM]]