[LoopUnswitch] Add shortcut if unswitched path is a no-op.

If we determine that the invariant path through the loop has no effects,
we can directly branch to the exit block, instead to unswitching first.

Besides avoiding some extra work (unswitching first, then deleting the
loop again) this allows to be more aggressive than regular unswitching
with respect to cost-modeling. This approach should always be be
desirable.

This is similar in spirit to D93734, just that it uses the previously
added checks for loop-unswitching.

I tried to add the required no-op checks from scratch, as we only check
a subset of the loop. There is potential to unify the checks with
LoopDeletion, at the cost of adding a predicate whether a block should
be considered.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95468
This commit is contained in:
Florian Hahn 2021-02-01 08:51:22 +00:00
parent 80cdd30eb9
commit b8c81fa5c7
No known key found for this signature in database
GPG Key ID: 61D7554B5CECDC0D
2 changed files with 204 additions and 75 deletions

View File

@ -640,6 +640,26 @@ static bool equalityPropUnSafe(Value &LoopCond) {
return false;
}
namespace {
/// Struct to hold information about a partially invariant condition.
struct IVConditionInfo {
/// Instructions that need to be duplicated and checked for the unswitching
/// condition.
SmallVector<Instruction *, 4> InstToDuplicate;
/// Constant to indicate for which value the condition is invariant.
Constant *KnownValue = nullptr;
/// True if the partially invariant path is no-op (=does not have any
/// side-effects and no loop value is used outside the loop).
bool PathIsNoop = true;
/// If the partially invariant path reaches a single exit block, ExitForPath
/// is set to that block. Otherwise it is nullptr.
BasicBlock *ExitForPath = nullptr;
};
} // namespace
/// Check if the loop header has a conditional branch that is not
/// loop-invariant, because it involves load instructions. If all paths from
/// either the true or false successor to the header or loop exists do not
@ -651,9 +671,8 @@ static bool equalityPropUnSafe(Value &LoopCond) {
/// If the branch condition of the header is partially invariant, return a pair
/// containing the instructions to duplicate and a boolean Constant to update
/// the condition in the loops created for the true or false successors.
static std::pair<SmallVector<Instruction *, 4>, Constant *>
hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
SmallVector<Instruction *, 4> ToDuplicate;
static Optional<IVConditionInfo> hasPartialIVCondition(Loop *L, MemorySSA &MSSA,
AAResults *AA) {
auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
if (!TI || !TI->isConditional())
@ -665,7 +684,8 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
if (!CondI || !L->contains(CondI))
return {};
ToDuplicate.push_back(CondI);
SmallVector<Instruction *, 4> InstToDuplicate;
InstToDuplicate.push_back(CondI);
SmallVector<Value *, 4> WorkList;
WorkList.append(CondI->op_begin(), CondI->op_end());
@ -686,7 +706,7 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
if (LI->isVolatile() || LI->isAtomic())
return {};
ToDuplicate.push_back(I);
InstToDuplicate.push_back(I);
if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
// Queue the defining access to check for alias checks.
@ -701,80 +721,126 @@ hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
WorkList.append(I->op_begin(), I->op_end());
}
if (ToDuplicate.size() <= 1)
if (InstToDuplicate.size() <= 1)
return {};
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
auto HasNoClobbersOnPath =
[L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
SmallVector<MemoryAccess *, 4> AccessesToCheck) {
// First, collect all blocks in the loop that are on a patch from Succ
// to the header.
SmallVector<BasicBlock *, 4> WorkList;
WorkList.push_back(Succ);
WorkList.push_back(Header);
SmallPtrSet<BasicBlock *, 4> Seen;
Seen.insert(Header);
while (!WorkList.empty()) {
BasicBlock *Current = WorkList.pop_back_val();
if (!L->contains(Current))
continue;
const auto &SeenIns = Seen.insert(Current);
if (!SeenIns.second)
[L, AA, &AccessedLocs, &ExitingBlocks,
&InstToDuplicate](BasicBlock *Succ, BasicBlock *Header,
SmallVector<MemoryAccess *, 4> AccessesToCheck)
-> Optional<IVConditionInfo> {
IVConditionInfo Info;
// First, collect all blocks in the loop that are on a patch from Succ
// to the header.
SmallVector<BasicBlock *, 4> WorkList;
WorkList.push_back(Succ);
WorkList.push_back(Header);
SmallPtrSet<BasicBlock *, 4> Seen;
Seen.insert(Header);
Info.PathIsNoop &=
all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });
while (!WorkList.empty()) {
BasicBlock *Current = WorkList.pop_back_val();
if (!L->contains(Current))
continue;
const auto &SeenIns = Seen.insert(Current);
if (!SeenIns.second)
continue;
Info.PathIsNoop &= all_of(
*Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });
WorkList.append(succ_begin(Current), succ_end(Current));
}
// Require at least 2 blocks on a path through the loop. This skips
// paths that directly exit the loop.
if (Seen.size() < 2)
return {};
// Next, check if there are any MemoryDefs that are on the path through
// the loop (in the Seen set) and they may-alias any of the locations in
// AccessedLocs. If that is the case, they may modify the condition and
// partial unswitching is not possible.
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
while (!AccessesToCheck.empty()) {
MemoryAccess *Current = AccessesToCheck.pop_back_val();
auto SeenI = SeenAccesses.insert(Current);
if (!SeenI.second || !Seen.contains(Current->getBlock()))
continue;
// Bail out if exceeded the threshold.
if (SeenAccesses.size() >= MSSAThreshold)
return {};
// MemoryUse are read-only accesses.
if (isa<MemoryUse>(Current))
continue;
// For a MemoryDef, check if is aliases any of the location feeding
// the original condition.
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
return isModSet(
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
}))
return {};
}
for (Use &U : Current->uses())
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
}
// We could also allow loops with known trip counts without mustprogress,
// but ScalarEvolution may not be available.
Info.PathIsNoop &=
L->getHeader()->getParent()->mustProgress() || hasMustProgress(L);
// If the path is considered a no-op so far, check if it reaches a
// single exit block without any phis. This ensures no values from the
// loop are used outside of the loop.
if (Info.PathIsNoop) {
for (auto *Exiting : ExitingBlocks) {
if (!Seen.contains(Exiting))
continue;
for (auto *Succ : successors(Exiting)) {
if (L->contains(Succ))
continue;
WorkList.append(succ_begin(Current), succ_end(Current));
Info.PathIsNoop &= empty(Succ->phis()) &&
(!Info.ExitForPath || Info.ExitForPath == Succ);
if (!Info.PathIsNoop)
break;
assert(!Info.ExitForPath || Info.ExitForPath == Succ &&
"cannot have multiple exit blocks");
Info.ExitForPath = Succ;
}
}
}
if (!Info.ExitForPath)
Info.PathIsNoop = false;
// Require at least 2 blocks on a path through the loop. This skips
// paths that directly exit the loop.
if (Seen.size() < 2)
return false;
// Next, check if there are any MemoryDefs that are on the path through
// the loop (in the Seen set) and they may-alias any of the locations in
// AccessedLocs. If that is the case, they may modify the condition and
// partial unswitching is not possible.
SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
while (!AccessesToCheck.empty()) {
MemoryAccess *Current = AccessesToCheck.pop_back_val();
auto SeenI = SeenAccesses.insert(Current);
if (!SeenI.second || !Seen.contains(Current->getBlock()))
continue;
// Bail out if exceeded the threshold.
if (SeenAccesses.size() >= MSSAThreshold)
return false;
// MemoryUse are read-only accesses.
if (isa<MemoryUse>(Current))
continue;
// For a MemoryDef, check if is aliases any of the location feeding
// the original condition.
if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
return isModSet(
AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
}))
return false;
}
for (Use &U : Current->uses())
AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
}
return true;
};
Info.InstToDuplicate = InstToDuplicate;
return Info;
};
// If we branch to the same successor, partial unswitching will not be
// beneficial.
if (TI->getSuccessor(0) == TI->getSuccessor(1))
return {};
if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(),
AccessesToCheck)) {
Info->KnownValue = ConstantInt::getTrue(TI->getContext());
return Info;
}
if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(),
AccessesToCheck)) {
Info->KnownValue = ConstantInt::getFalse(TI->getContext());
return Info;
}
return {};
}
@ -986,17 +1052,56 @@ bool LoopUnswitch::processCurrentLoop() {
// metadata, to avoid unswitching the same loop multiple times.
if (MSSA &&
!findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
if (!ToDuplicate.first.empty()) {
if (auto Info = hasPartialIVCondition(CurrentLoop, *MSSA, AA)) {
assert(!Info->InstToDuplicate.empty() &&
"need at least a partially invariant condition");
LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
<< *ToDuplicate.first[0] << "\n");
++NumBranches;
unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
CurrentLoop->getHeader()->getTerminator(),
ToDuplicate.first);
<< *Info->InstToDuplicate[0] << "\n");
RedoLoop = false;
return true;
Instruction *TI = CurrentLoop->getHeader()->getTerminator();
Value *LoopCond = Info->InstToDuplicate[0];
// If the partially unswitched path is a no-op and has a single exit
// block, we do not need to do full unswitching. Instead, we can directly
// branch to the exit.
// TODO: Instead of duplicating the checks, we could also just directly
// branch to the exit from the conditional branch in the loop.
if (Info->PathIsNoop) {
if (HasBranchDivergence &&
getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
<< CurrentLoop->getHeader()->getName()
<< " at non-trivial condition '"
<< *Info->KnownValue << "' == " << *LoopCond << "\n"
<< ". Condition is divergent.\n");
return false;
}
++NumBranches;
BasicBlock *TrueDest = LoopHeader;
BasicBlock *FalseDest = Info->ExitForPath;
if (Info->KnownValue->isOneValue())
std::swap(TrueDest, FalseDest);
auto *OldBr =
cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator());
emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest,
FalseDest, OldBr, TI,
Info->InstToDuplicate);
delete OldBr;
RedoLoop = false;
return true;
}
// Otherwise, the path is not a no-op. Run regular unswitching.
if (unswitchIfProfitable(LoopCond, Info->KnownValue,
CurrentLoop->getHeader()->getTerminator(),
Info->InstToDuplicate)) {
++NumBranches;
RedoLoop = false;
return true;
}
}
}

View File

@ -48,8 +48,16 @@ exit:
define i32 @partial_unswitch_shortcut_mustprogress(i32* %ptr, i32 %N) mustprogress {
; CHECK-LABEL: @partial_unswitch_shortcut_mustprogress
; CHECK-LABEL: entry:
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:[a-z._]+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
;
; CHECK: [[CRIT_TO_HEADER]]:
; CHECK-NEXT: br label %loop.header
;
; CHECK: [[CRIT_TO_EXIT]]:
; CHECK-NEXT: br label %exit
;
entry:
br label %loop.header
@ -86,8 +94,16 @@ exit:
define i32 @partial_unswitch_shortcut_mustprogress_single_exit_on_path(i32* %ptr, i32 %N) mustprogress {
; CHECK-LABEL: @partial_unswitch_shortcut_mustprogress_single_exit_on_path
; CHECK-LABEL: entry:
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:.+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
;
; CHECK: [[CRIT_TO_HEADER]]:
; CHECK-NEXT: br label %loop.header
;
; CHECK: [[CRIT_TO_EXIT]]:
; CHECK-NEXT: br label %exit
;
entry:
br label %loop.header
@ -208,8 +224,16 @@ exit:
define i32 @partial_unswitch_shortcut_multiple_exiting_blocks(i32* %ptr, i32 %N, i1 %ec.1) mustprogress {
; CHECK-LABEL: @partial_unswitch_shortcut_multiple_exiting_blocks
; CHECK-LABEL: entry:
; CHECK-NEXT: [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
; CHECK-NEXT: [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
; CHECK-NEXT: br i1 [[C]], label %[[CRIT_TO_EXIT:.+]], label %[[CRIT_TO_HEADER:[a-z._]+]]
;
; CHECK: [[CRIT_TO_HEADER]]:
; CHECK-NEXT: br label %loop.header
;
; CHECK: [[CRIT_TO_EXIT]]:
; CHECK-NEXT: br label %exit
;
entry:
br label %loop.header