Add jump-threading optimization for deterministic finite automata

The current JumpThreading pass does not jump thread loops since it can
result in irreducible control flow that harms other optimizations. This
prevents switch statements inside a loop from being optimized to use
unconditional branches.

This code pattern occurs in the core_state_transition function of
Coremark. The state machine can be implemented manually with goto
statements resulting in a large runtime improvement, and this transform
makes the switch implementation match the goto version in performance.

This patch specifically targets switch statements inside a loop that
have the opportunity to be threaded. Once it identifies an opportunity,
it creates new paths that branch directly to the correct code block.
For example, the left CFG could be transformed to the right CFG:

```
          sw.bb                        sw.bb
        /   |   \                    /   |   \
   case1  case2  case3          case1  case2  case3
        \   |   /                /       |       \
        latch.bb             latch.2  latch.3  latch.1
         br sw.bb              /         |         \
                           sw.bb.2     sw.bb.3     sw.bb.1
                            br case2    br case3    br case1
```

Co-author: Justin Kreiner @jkreiner
Co-author: Ehsan Amiri @amehsan

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D99205
This commit is contained in:
Alexey Zhikhartsev 2021-07-27 14:26:49 -04:00 committed by Danilo C. Grael
parent 8e8701abca
commit 02077da7e7
16 changed files with 2395 additions and 0 deletions

View File

@ -124,6 +124,7 @@ void initializeCrossDSOCFIPass(PassRegistry&);
void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
void initializeDCELegacyPassPass(PassRegistry&);
void initializeDFAJumpThreadingLegacyPassPass(PassRegistry &);
void initializeDSELegacyPassPass(PassRegistry&);
void initializeDataFlowSanitizerLegacyPassPass(PassRegistry &);
void initializeDeadMachineInstructionElimPass(PassRegistry&);

View File

@ -174,6 +174,7 @@ namespace {
(void) llvm::createStripDeadPrototypesPass();
(void) llvm::createTailCallEliminationPass();
(void) llvm::createJumpThreadingPass();
(void) llvm::createDFAJumpThreadingPass();
(void) llvm::createUnifyFunctionExitNodesPass();
(void) llvm::createInstCountPass();
(void) llvm::createConstantHoistingPass();

View File

@ -253,6 +253,14 @@ FunctionPass *createReassociatePass();
FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false,
int Threshold = -1);
//===----------------------------------------------------------------------===//
//
// DFAJumpThreading - When a switch statement inside a loop is used to
// implement a deterministic finite automata we can jump thread the switch
// statement reducing number of conditional jumps.
//
FunctionPass *createDFAJumpThreadingPass();
//===----------------------------------------------------------------------===//
//
// CFGSimplification - Merge basic blocks, eliminate unreachable blocks,

View File

@ -0,0 +1,27 @@
//===- DFAJumpThreading.h - Threads a switch statement inside a loop ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the interface for the DFAJumpThreading pass.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H
#define LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
struct DFAJumpThreadingPass : PassInfoMixin<DFAJumpThreadingPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_SCALAR_DFAJUMPTHREADING_H

View File

@ -145,6 +145,7 @@
#include "llvm/Transforms/Scalar/ConstraintElimination.h"
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/Transforms/Scalar/DivRemPairs.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
@ -302,6 +303,7 @@ extern cl::opt<bool> EnableCHR;
extern cl::opt<bool> EnableLoopInterchange;
extern cl::opt<bool> EnableUnrollAndJam;
extern cl::opt<bool> EnableLoopFlatten;
extern cl::opt<bool> EnableDFAJumpThreading;
extern cl::opt<bool> RunNewGVN;
extern cl::opt<bool> RunPartialInlining;
extern cl::opt<bool> ExtraVectorizerPasses;
@ -829,6 +831,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Re-consider control flow based optimizations after redundancy elimination,
// redo DCE, etc.
if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
FPM.addPass(DFAJumpThreadingPass());
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());

View File

@ -212,6 +212,7 @@ FUNCTION_PASS("coro-elide", CoroElidePass())
FUNCTION_PASS("coro-cleanup", CoroCleanupPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("dfa-jump-threading", DFAJumpThreadingPass())
FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())

View File

@ -99,6 +99,10 @@ cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));
cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading."),
cl::init(false), cl::Hidden);
static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
@ -500,6 +504,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1) {
if (EnableDFAJumpThreading && SizeLevel == 0)
MPM.add(createDFAJumpThreadingPass());
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
}

View File

@ -9,6 +9,7 @@ add_llvm_component_library(LLVMScalarOpts
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
DFAJumpThreading.cpp
DivRemPairs.cpp
EarlyCSE.cpp
FlattenCFGPass.cpp

File diff suppressed because it is too large Load Diff

View File

@ -60,6 +60,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeInferAddressSpacesPass(Registry);
initializeInstSimplifyLegacyPassPass(Registry);
initializeJumpThreadingPass(Registry);
initializeDFAJumpThreadingLegacyPassPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
initializeLoopFuseLegacyPass(Registry);

View File

@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dfa-jump-threading -sccp -simplifycfg %s | FileCheck %s
; This test checks that a constant propagation is applied for a basic loop.
; Related to bug 44679.
define i32 @test(i32 %a) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 3
;
entry:
br label %while.cond
while.cond:
%num = phi i32 [ 0, %entry ], [ %add, %case1 ]
%state = phi i32 [ 1, %entry ], [ %state.next, %case1 ]
switch i32 %state, label %end [
i32 1, label %case1
i32 2, label %case2
]
case1:
%state.next = phi i32 [ 3, %case2 ], [ 2, %while.cond ]
%add = add nsw i32 %num, %state
br label %while.cond
case2:
br label %case1
end:
ret i32 %num
}

View File

@ -0,0 +1,180 @@
; REQUIRES: asserts
; RUN: opt -S -dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
; This test checks that the analysis identifies all threadable paths in a
; simple CFG. A threadable path includes a list of basic blocks, the exit
; state, and the block that determines the next state.
; < path of BBs that form a cycle > [ state, determinator ]
define i32 @test1(i32 %num) {
; CHECK: < for.body for.inc > [ 1, for.inc ]
; CHECK-NEXT: < for.body case1 for.inc > [ 2, for.inc ]
; CHECK-NEXT: < for.body case2 for.inc > [ 1, for.inc ]
; CHECK-NEXT: < for.body case2 si.unfold.false for.inc > [ 2, for.inc ]
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
; This test checks that the analysis finds threadable paths in a more
; complicated CFG. Here the FSM is represented as a nested loop, with
; fallthrough cases.
define i32 @test2(i32 %init) {
; CHECK: < loop.3 case2 > [ 3, loop.3 ]
; CHECK-NEXT: < loop.3 case2 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case2 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 4, loop.1.backedge ]
; CHECK-NEXT: < loop.3 case3 loop.2.backedge loop.2 > [ 0, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ]
; CHECK-NEXT: < loop.3 case4 loop.2.backedge loop.2 > [ 3, loop.2.backedge ]
; CHECK-NEXT: < loop.3 case4 loop.1.backedge loop.1 loop.2 > [ 1, loop.1 ]
; CHECK-NEXT: < loop.3 case4 loop.1.backedge si.unfold.false loop.1 loop.2 > [ 2, loop.1.backedge ]
entry:
%cmp = icmp eq i32 %init, 0
%sel = select i1 %cmp, i32 0, i32 2
br label %loop.1
loop.1:
%state.1 = phi i32 [ %sel, %entry ], [ %state.1.be2, %loop.1.backedge ]
br label %loop.2
loop.2:
%state.2 = phi i32 [ %state.1, %loop.1 ], [ %state.2.be, %loop.2.backedge ]
br label %loop.3
loop.3:
%state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ]
switch i32 %state, label %infloop.i [
i32 2, label %case2
i32 3, label %case3
i32 4, label %case4
i32 0, label %case0
i32 1, label %case1
]
case2:
br i1 %cmp, label %loop.3, label %loop.1.backedge
case3:
br i1 %cmp, label %loop.2.backedge, label %case4
case4:
br i1 %cmp, label %loop.2.backedge, label %loop.1.backedge
loop.1.backedge:
%state.1.be = phi i32 [ 2, %case4 ], [ 4, %case2 ]
%state.1.be2 = select i1 %cmp, i32 1, i32 %state.1.be
br label %loop.1
loop.2.backedge:
%state.2.be = phi i32 [ 3, %case4 ], [ 0, %case3 ]
br label %loop.2
case0:
br label %exit
case1:
br label %exit
infloop.i:
br label %infloop.i
exit:
ret i32 0
}
declare void @baz()
; Verify that having the switch block as a determinator is handled correctly.
define i32 @main() {
; CHECK: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ]
; CHECK-NEXT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ]
bb:
%i = alloca [420 x i8], align 1
%i2 = getelementptr inbounds [420 x i8], [420 x i8]* %i, i64 0, i64 390
br label %bb3
bb3: ; preds = %bb59, %bb
%i4 = phi i8* [ %i2, %bb ], [ %i60, %bb59 ]
%i5 = phi i8 [ 77, %bb ], [ %i64, %bb59 ]
%i6 = phi i32 [ 2, %bb ], [ %i63, %bb59 ]
%i7 = phi i32 [ 26, %bb ], [ %i62, %bb59 ]
%i8 = phi i32 [ 25, %bb ], [ %i61, %bb59 ]
%i9 = icmp sgt i32 %i7, 2
%i10 = select i1 %i9, i32 %i7, i32 2
%i11 = add i32 %i8, 2
%i12 = sub i32 %i11, %i10
%i13 = mul nsw i32 %i12, 3
%i14 = add nsw i32 %i13, %i6
%i15 = sext i32 %i14 to i64
%i16 = getelementptr inbounds i8, i8* %i4, i64 %i15
%i17 = load i8, i8* %i16, align 1
%i18 = icmp sgt i8 %i17, 0
br i1 %i18, label %bb21, label %bb31
bb21: ; preds = %bb3
br i1 true, label %bb59, label %bb43
bb59: ; preds = %bb49, %bb43, %bb31, %bb21
%i60 = phi i8* [ %i44, %bb49 ], [ %i44, %bb43 ], [ %i34, %bb31 ], [ %i4, %bb21 ]
%i61 = phi i32 [ %i45, %bb49 ], [ %i45, %bb43 ], [ %i33, %bb31 ], [ %i8, %bb21 ]
%i62 = phi i32 [ %i47, %bb49 ], [ %i47, %bb43 ], [ %i32, %bb31 ], [ %i7, %bb21 ]
%i63 = phi i32 [ %i48, %bb49 ], [ %i48, %bb43 ], [ 2, %bb31 ], [ %i6, %bb21 ]
%i64 = phi i8 [ %i46, %bb49 ], [ %i46, %bb43 ], [ 77, %bb31 ], [ %i5, %bb21 ]
%i65 = icmp sgt i32 %i62, 0
br i1 %i65, label %bb3, label %bb66
bb31: ; preds = %bb3
%i32 = add nsw i32 %i7, -1
%i33 = add nsw i32 %i8, -1
%i34 = getelementptr inbounds i8, i8* %i4, i64 -15
%i35 = icmp eq i8 %i5, 77
br i1 %i35, label %bb59, label %bb41
bb41: ; preds = %bb31
tail call void @baz()
br label %bb43
bb43: ; preds = %bb41, %bb21
%i44 = phi i8* [ %i34, %bb41 ], [ %i4, %bb21 ]
%i45 = phi i32 [ %i33, %bb41 ], [ %i8, %bb21 ]
%i46 = phi i8 [ 77, %bb41 ], [ %i5, %bb21 ]
%i47 = phi i32 [ %i32, %bb41 ], [ %i7, %bb21 ]
%i48 = phi i32 [ 2, %bb41 ], [ %i6, %bb21 ]
tail call void @baz()
switch i8 %i5, label %bb59 [
i8 68, label %bb49
i8 73, label %bb49
]
bb49: ; preds = %bb43, %bb43
tail call void @baz()
br label %bb59
bb66: ; preds = %bb59
ret i32 0
}

View File

@ -0,0 +1,234 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dfa-jump-threading %s | FileCheck %s
; These tests check that the DFA jump threading transformation is applied
; properly to two CFGs. It checks that blocks are cloned, branches are updated,
; and SSA form is restored.
define i32 @test1(i32 %num) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT2:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT1:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COUNT1]], 50
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT1]], [[SI_UNFOLD_FALSE]] ], [ [[COUNT2]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[CASE1]] ], [ 2, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT1]], [[CASE2]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[CASE2]] ], [ 1, [[FOR_BODY]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @test2(i32 %init) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INIT:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_1:%.*]], label [[SI_UNFOLD_FALSE1:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br label [[LOOP_1]]
; CHECK: si.unfold.false.jt2:
; CHECK-NEXT: br label [[LOOP_1_JT2:%.*]]
; CHECK: si.unfold.false.jt4:
; CHECK-NEXT: br label [[LOOP_1_JT4:%.*]]
; CHECK: si.unfold.false1:
; CHECK-NEXT: br label [[LOOP_1]]
; CHECK: loop.1:
; CHECK-NEXT: [[STATE_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ undef, [[SI_UNFOLD_FALSE:%.*]] ], [ 2, [[SI_UNFOLD_FALSE1]] ]
; CHECK-NEXT: br label [[LOOP_2:%.*]]
; CHECK: loop.1.jt2:
; CHECK-NEXT: [[STATE_1_JT2:%.*]] = phi i32 [ [[STATE_1_BE_JT2:%.*]], [[SI_UNFOLD_FALSE_JT2:%.*]] ]
; CHECK-NEXT: br label [[LOOP_2_JT2:%.*]]
; CHECK: loop.1.jt4:
; CHECK-NEXT: [[STATE_1_JT4:%.*]] = phi i32 [ [[STATE_1_BE_JT4:%.*]], [[SI_UNFOLD_FALSE_JT4:%.*]] ]
; CHECK-NEXT: br label [[LOOP_2_JT4:%.*]]
; CHECK: loop.1.jt1:
; CHECK-NEXT: [[STATE_1_JT1:%.*]] = phi i32 [ 1, [[LOOP_1_BACKEDGE:%.*]] ], [ 1, [[LOOP_1_BACKEDGE_JT4:%.*]] ], [ 1, [[LOOP_1_BACKEDGE_JT2:%.*]] ]
; CHECK-NEXT: br label [[LOOP_2_JT1:%.*]]
; CHECK: loop.2:
; CHECK-NEXT: [[STATE_2:%.*]] = phi i32 [ [[STATE_1]], [[LOOP_1]] ], [ undef, [[LOOP_2_BACKEDGE:%.*]] ]
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.2.jt2:
; CHECK-NEXT: [[STATE_2_JT2:%.*]] = phi i32 [ [[STATE_1_JT2]], [[LOOP_1_JT2]] ]
; CHECK-NEXT: br label [[LOOP_3_JT2:%.*]]
; CHECK: loop.2.jt3:
; CHECK-NEXT: [[STATE_2_JT3:%.*]] = phi i32 [ [[STATE_2_BE_JT3:%.*]], [[LOOP_2_BACKEDGE_JT3:%.*]] ]
; CHECK-NEXT: br label [[LOOP_3_JT3:%.*]]
; CHECK: loop.2.jt0:
; CHECK-NEXT: [[STATE_2_JT0:%.*]] = phi i32 [ [[STATE_2_BE_JT0:%.*]], [[LOOP_2_BACKEDGE_JT0:%.*]] ]
; CHECK-NEXT: br label [[LOOP_3_JT0:%.*]]
; CHECK: loop.2.jt4:
; CHECK-NEXT: [[STATE_2_JT4:%.*]] = phi i32 [ [[STATE_1_JT4]], [[LOOP_1_JT4]] ]
; CHECK-NEXT: br label [[LOOP_3_JT4:%.*]]
; CHECK: loop.2.jt1:
; CHECK-NEXT: [[STATE_2_JT1:%.*]] = phi i32 [ [[STATE_1_JT1]], [[LOOP_1_JT1:%.*]] ]
; CHECK-NEXT: br label [[LOOP_3_JT1:%.*]]
; CHECK: loop.3:
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ [[STATE_2]], [[LOOP_2]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[INFLOOP_I:%.*]] [
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: i32 3, label [[CASE3:%.*]]
; CHECK-NEXT: i32 4, label [[CASE4:%.*]]
; CHECK-NEXT: i32 0, label [[CASE0:%.*]]
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: ]
; CHECK: loop.3.jt2:
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_2_JT2]], [[LOOP_2_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: loop.3.jt0:
; CHECK-NEXT: [[STATE_JT0:%.*]] = phi i32 [ [[STATE_2_JT0]], [[LOOP_2_JT0:%.*]] ]
; CHECK-NEXT: br label [[CASE0]]
; CHECK: loop.3.jt4:
; CHECK-NEXT: [[STATE_JT4:%.*]] = phi i32 [ [[STATE_2_JT4]], [[LOOP_2_JT4]] ]
; CHECK-NEXT: br label [[CASE4]]
; CHECK: loop.3.jt1:
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_2_JT1]], [[LOOP_2_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: loop.3.jt3:
; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ 3, [[CASE2]] ], [ [[STATE_2_JT3]], [[LOOP_2_JT3:%.*]] ]
; CHECK-NEXT: br label [[CASE3]]
; CHECK: case2:
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_3_JT3]], label [[LOOP_1_BACKEDGE_JT4]]
; CHECK: case3:
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_2_BACKEDGE_JT0]], label [[CASE4]]
; CHECK: case4:
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_2_BACKEDGE_JT3]], label [[LOOP_1_BACKEDGE_JT2]]
; CHECK: loop.1.backedge:
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_1_JT1]], label [[SI_UNFOLD_FALSE]]
; CHECK: loop.1.backedge.jt2:
; CHECK-NEXT: [[STATE_1_BE_JT2]] = phi i32 [ 2, [[CASE4]] ]
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_1_JT1]], label [[SI_UNFOLD_FALSE_JT2]]
; CHECK: loop.1.backedge.jt4:
; CHECK-NEXT: [[STATE_1_BE_JT4]] = phi i32 [ 4, [[CASE2]] ]
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_1_JT1]], label [[SI_UNFOLD_FALSE_JT4]]
; CHECK: loop.2.backedge:
; CHECK-NEXT: br label [[LOOP_2]]
; CHECK: loop.2.backedge.jt3:
; CHECK-NEXT: [[STATE_2_BE_JT3]] = phi i32 [ 3, [[CASE4]] ]
; CHECK-NEXT: br label [[LOOP_2_JT3]]
; CHECK: loop.2.backedge.jt0:
; CHECK-NEXT: [[STATE_2_BE_JT0]] = phi i32 [ 0, [[CASE3]] ]
; CHECK-NEXT: br label [[LOOP_2_JT0]]
; CHECK: case0:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: case1:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: infloop.i:
; CHECK-NEXT: br label [[INFLOOP_I]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
entry:
%cmp = icmp eq i32 %init, 0
%sel = select i1 %cmp, i32 0, i32 2
br label %loop.1
loop.1:
%state.1 = phi i32 [ %sel, %entry ], [ %state.1.be2, %loop.1.backedge ]
br label %loop.2
loop.2:
%state.2 = phi i32 [ %state.1, %loop.1 ], [ %state.2.be, %loop.2.backedge ]
br label %loop.3
loop.3:
%state = phi i32 [ %state.2, %loop.2 ], [ 3, %case2 ]
switch i32 %state, label %infloop.i [
i32 2, label %case2
i32 3, label %case3
i32 4, label %case4
i32 0, label %case0
i32 1, label %case1
]
case2:
br i1 %cmp, label %loop.3, label %loop.1.backedge
case3:
br i1 %cmp, label %loop.2.backedge, label %case4
case4:
br i1 %cmp, label %loop.2.backedge, label %loop.1.backedge
loop.1.backedge:
%state.1.be = phi i32 [ 2, %case4 ], [ 4, %case2 ]
%state.1.be2 = select i1 %cmp, i32 1, i32 %state.1.be
br label %loop.1
loop.2.backedge:
%state.2.be = phi i32 [ 3, %case4 ], [ 0, %case3 ]
br label %loop.2
case0:
br label %exit
case1:
br label %exit
infloop.i:
br label %infloop.i
exit:
ret i32 0
}

View File

@ -0,0 +1,293 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dfa-jump-threading %s | FileCheck %s
; These tests check if selects are unfolded properly for jump threading
; opportunities. There are three different patterns to consider:
; 1) Both operands are constant and the false branch is unfolded by default
; 2) One operand is constant and the other is another select to be unfolded. In
; this case a single select is sunk to a new block to unfold.
; 3) Both operands are a select, and both should be sunk to new blocks.
define i32 @test1(i32 %num) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT2:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT1:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[COUNT1]], 50
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT1]], [[SI_UNFOLD_FALSE]] ], [ [[COUNT2]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[CASE1]] ], [ 2, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT1]], [[CASE2]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[CASE2]] ], [ 1, [[FOR_BODY]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp slt i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @test2(i32 %num) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt3:
; CHECK-NEXT: [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ]
; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT5:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_C1:%.*]] = icmp slt i32 [[COUNT5]], 50
; CHECK-NEXT: [[CMP2_C1:%.*]] = icmp slt i32 [[COUNT5]], 100
; CHECK-NEXT: br i1 [[CMP2_C1]], label [[SI_UNFOLD_TRUE:%.*]], label [[FOR_INC_JT3]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_C2:%.*]] = icmp slt i32 [[COUNT3]], 50
; CHECK-NEXT: [[CMP2_C2:%.*]] = icmp sgt i32 [[COUNT3]], 100
; CHECK-NEXT: br i1 [[CMP2_C2]], label [[FOR_INC_JT3]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br i1 [[CMP_C2]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE1:%.*]]
; CHECK: si.unfold.false1:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: si.unfold.true:
; CHECK-NEXT: br i1 [[CMP_C1]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE2:%.*]]
; CHECK: si.unfold.false2:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt3:
; CHECK-NEXT: [[COUNT6:%.*]] = phi i32 [ [[COUNT3]], [[CASE2]] ], [ [[COUNT5]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT3]] = phi i32 [ 3, [[CASE1]] ], [ 3, [[CASE2]] ]
; CHECK-NEXT: [[INC_JT3]] = add nsw i32 [[COUNT6]], 1
; CHECK-NEXT: [[CMP_EXIT_JT3:%.*]] = icmp slt i32 [[INC_JT3]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT3]], label [[FOR_BODY_JT3:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT7:%.*]] = phi i32 [ [[COUNT3]], [[SI_UNFOLD_FALSE1]] ], [ [[COUNT5]], [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[SI_UNFOLD_FALSE1]] ], [ 2, [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT7]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT3]], [[SI_UNFOLD_FALSE]] ], [ [[COUNT5]], [[SI_UNFOLD_TRUE]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[SI_UNFOLD_FALSE]] ], [ 1, [[SI_UNFOLD_TRUE]] ], [ 1, [[FOR_BODY_JT3]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
%cmp.c1 = icmp slt i32 %count, 50
%cmp2.c1 = icmp slt i32 %count, 100
%state1.1 = select i1 %cmp.c1, i32 1, i32 2
%state1.2 = select i1 %cmp2.c1, i32 %state1.1, i32 3
br label %for.inc
case2:
%cmp.c2 = icmp slt i32 %count, 50
%cmp2.c2 = icmp sgt i32 %count, 100
%state2.1 = select i1 %cmp.c2, i32 1, i32 2
%state2.2 = select i1 %cmp2.c2, i32 3, i32 %state2.1
br label %for.inc
for.inc:
%state.next = phi i32 [ %state1.2, %case1 ], [ %state2.2, %case2 ], [ 1, %for.body ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @test3(i32 %num) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ undef, [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt4:
; CHECK-NEXT: [[COUNT_JT4:%.*]] = phi i32 [ [[INC_JT4:%.*]], [[FOR_INC_JT4:%.*]] ]
; CHECK-NEXT: [[STATE_JT4:%.*]] = phi i32 [ [[STATE_NEXT_JT4:%.*]], [[FOR_INC_JT4]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt3:
; CHECK-NEXT: [[COUNT_JT3:%.*]] = phi i32 [ [[INC_JT3:%.*]], [[FOR_INC_JT3:%.*]] ]
; CHECK-NEXT: [[STATE_JT3:%.*]] = phi i32 [ [[STATE_NEXT_JT3:%.*]], [[FOR_INC_JT3]] ]
; CHECK-NEXT: br label [[FOR_INC_JT1]]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT5:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT4:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[COUNT4]], 50
; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[COUNT4]], 100
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP_3]], label [[SI_UNFOLD_TRUE:%.*]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.true:
; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE2:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_INC_JT3]], label [[SI_UNFOLD_FALSE1:%.*]]
; CHECK: si.unfold.false1:
; CHECK-NEXT: br label [[FOR_INC_JT4]]
; CHECK: si.unfold.false2:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 undef, 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt4:
; CHECK-NEXT: [[STATE_NEXT_JT4]] = phi i32 [ 4, [[SI_UNFOLD_FALSE1]] ]
; CHECK-NEXT: [[INC_JT4]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT4:%.*]] = icmp slt i32 [[INC_JT4]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT4]], label [[FOR_BODY_JT4:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt3:
; CHECK-NEXT: [[STATE_NEXT_JT3]] = phi i32 [ 3, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT3]] = add nsw i32 [[COUNT4]], 1
; CHECK-NEXT: [[CMP_EXIT_JT3:%.*]] = icmp slt i32 [[INC_JT3]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT3]], label [[FOR_BODY_JT3:%.*]], label [[FOR_END]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[COUNT6:%.*]] = phi i32 [ [[COUNT4]], [[SI_UNFOLD_FALSE2]] ], [ [[COUNT5]], [[CASE1]] ]
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[CASE1]] ], [ 2, [[SI_UNFOLD_FALSE2]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT6]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT_JT4]], [[FOR_BODY_JT4]] ], [ [[COUNT_JT3]], [[FOR_BODY_JT3]] ], [ [[COUNT4]], [[SI_UNFOLD_TRUE]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[FOR_BODY]] ], [ 1, [[SI_UNFOLD_TRUE]] ], [ 1, [[FOR_BODY_JT3]] ], [ 1, [[FOR_BODY_JT4]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp.1 = icmp slt i32 %count, 50
%cmp.2 = icmp slt i32 %count, 100
%0 = and i32 %count, 1
%cmp.3 = icmp eq i32 %0, 0
%sel.1 = select i1 %cmp.1, i32 1, i32 2
%sel.2 = select i1 %cmp.2, i32 3, i32 4
%sel.3 = select i1 %cmp.3, i32 %sel.1, i32 %sel.2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel.3, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}

View File

@ -0,0 +1,101 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -dfa-jump-threading -dfa-max-path-length=6 %s | FileCheck %s
; Make the path
; <%for.body %case1 %case1.1 %case1.2 %case1.3 %case1.4 %for.inc %for.end>
; too long so that it is not jump-threaded.
define i32 @max_path_length(i32 %num) {
; CHECK-LABEL: @max_path_length(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[STATE_NEXT:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC_JT1:%.*]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: for.body.jt2:
; CHECK-NEXT: [[COUNT_JT2:%.*]] = phi i32 [ [[INC_JT2:%.*]], [[FOR_INC_JT2:%.*]] ]
; CHECK-NEXT: [[STATE_JT2:%.*]] = phi i32 [ [[STATE_NEXT_JT2:%.*]], [[FOR_INC_JT2]] ]
; CHECK-NEXT: br label [[CASE2]]
; CHECK: for.body.jt1:
; CHECK-NEXT: [[COUNT_JT1:%.*]] = phi i32 [ [[INC_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: [[STATE_JT1:%.*]] = phi i32 [ [[STATE_NEXT_JT1:%.*]], [[FOR_INC_JT1]] ]
; CHECK-NEXT: br label [[CASE1]]
; CHECK: case1:
; CHECK-NEXT: [[COUNT2:%.*]] = phi i32 [ [[COUNT_JT1]], [[FOR_BODY_JT1:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[CASE1_1:%.*]]
; CHECK: case1.1:
; CHECK-NEXT: br label [[CASE1_2:%.*]]
; CHECK: case1.2:
; CHECK-NEXT: br label [[CASE1_3:%.*]]
; CHECK: case1.3:
; CHECK-NEXT: br label [[CASE1_4:%.*]]
; CHECK: case1.4:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: case2:
; CHECK-NEXT: [[COUNT1:%.*]] = phi i32 [ [[COUNT_JT2]], [[FOR_BODY_JT2:%.*]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COUNT1]], 50
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_INC_JT1]], label [[SI_UNFOLD_FALSE:%.*]]
; CHECK: si.unfold.false:
; CHECK-NEXT: br label [[FOR_INC_JT2]]
; CHECK: for.inc:
; CHECK-NEXT: [[STATE_NEXT]] = phi i32 [ 2, [[CASE1_4]] ]
; CHECK-NEXT: [[INC]] = add nsw i32 [[COUNT2]], 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.inc.jt2:
; CHECK-NEXT: [[STATE_NEXT_JT2]] = phi i32 [ 2, [[SI_UNFOLD_FALSE]] ]
; CHECK-NEXT: [[INC_JT2]] = add nsw i32 [[COUNT1]], 1
; CHECK-NEXT: [[CMP_EXIT_JT2:%.*]] = icmp slt i32 [[INC_JT2]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT2]], label [[FOR_BODY_JT2]], label [[FOR_END]]
; CHECK: for.inc.jt1:
; CHECK-NEXT: [[COUNT3:%.*]] = phi i32 [ [[COUNT1]], [[CASE2]] ], [ [[COUNT]], [[FOR_BODY]] ]
; CHECK-NEXT: [[STATE_NEXT_JT1]] = phi i32 [ 1, [[CASE2]] ], [ 1, [[FOR_BODY]] ]
; CHECK-NEXT: [[INC_JT1]] = add nsw i32 [[COUNT3]], 1
; CHECK-NEXT: [[CMP_EXIT_JT1:%.*]] = icmp slt i32 [[INC_JT1]], [[NUM]]
; CHECK-NEXT: br i1 [[CMP_EXIT_JT1]], label [[FOR_BODY_JT1]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %case1.1
case1.1:
br label %case1.2
case1.2:
br label %case1.3
case1.3:
br label %case1.4
case1.4:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1.4 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}

View File

@ -0,0 +1,216 @@
; RUN: opt -dfa-jump-threading -dfa-cost-threshold=25 -pass-remarks-missed='dfa-jump-threading' -pass-remarks-output=%t -disable-output %s
; RUN: FileCheck --input-file %t --check-prefix=REMARK %s
; RUN: opt -S -dfa-jump-threading %s | FileCheck %s
; This negative test case checks that the optimization doesn't trigger
; when the code size cost is too high.
define i32 @negative1(i32 %num) {
; REMARK: NotProfitable
; REMARK-NEXT: negative1
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%add1 = add i32 %num, %num
%add2 = add i32 %add1, %add1
%add3 = add i32 %add2, %add2
%add4 = add i32 %add3, %add3
%add5 = add i32 %add4, %add4
%add6 = add i32 %add5, %add5
%add7 = add i32 %add6, %add6
%add8 = add i32 %add7, %add7
%add9 = add i32 %add8, %add8
%add10 = add i32 %add9, %add9
%add11 = add i32 %add10, %add10
%add12 = add i32 %add11, %add11
%add13 = add i32 %add12, %add12
%add14 = add i32 %add13, %add13
%add15 = add i32 %add14, %add14
%add16 = add i32 %add15, %add15
%add17 = add i32 %add16, %add16
%add18 = add i32 %add17, %add17
%add19 = add i32 %add18, %add18
%add20 = add i32 %add19, %add19
%add21 = add i32 %add20, %add20
%add22 = add i32 %add21, %add21
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 %add22
}
declare void @func()
define i32 @negative2(i32 %num) {
; REMARK: NonDuplicatableInst
; REMARK-NEXT: negative2
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
call void @func() noduplicate
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @negative3(i32 %num) {
; REMARK: ConvergentInst
; REMARK-NEXT: negative3
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
call void @func() convergent
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
define i32 @negative4(i32 %num) {
; REMARK: SwitchNotPredictable
; REMARK-NEXT: negative4
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
; the switch variable is not predictable since the exit value for %case1
; is defined through a non-instruction (function argument).
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ %num, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}
; Do not optimize if marked minsize.
define i32 @negative5(i32 %num) minsize {
; CHECK-LABEL: @negative5(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[STATE:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[STATE_NEXT:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: switch i32 [[STATE]], label [[FOR_INC]] [
; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
; CHECK-NEXT: i32 2, label [[CASE2:%.*]]
; CHECK-NEXT: ]
; CHECK: case1:
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: case2:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COUNT]], 50
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 1, i32 2
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[STATE_NEXT]] = phi i32 [ [[SEL]], [[CASE2]] ], [ 1, [[FOR_BODY]] ], [ 2, [[CASE1]] ]
; CHECK-NEXT: [[INC]] = add nsw i32 [[COUNT]], 1
; CHECK-NEXT: [[CMP_EXIT:%.*]] = icmp slt i32 [[INC]], [[NUM:%.*]]
; CHECK-NEXT: br i1 [[CMP_EXIT]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 0
;
entry:
br label %for.body
for.body:
%count = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%state = phi i32 [ 1, %entry ], [ %state.next, %for.inc ]
switch i32 %state, label %for.inc [
i32 1, label %case1
i32 2, label %case2
]
case1:
br label %for.inc
case2:
%cmp = icmp eq i32 %count, 50
%sel = select i1 %cmp, i32 1, i32 2
br label %for.inc
for.inc:
%state.next = phi i32 [ %sel, %case2 ], [ 1, %for.body ], [ 2, %case1 ]
%inc = add nsw i32 %count, 1
%cmp.exit = icmp slt i32 %inc, %num
br i1 %cmp.exit, label %for.body, label %for.end
for.end:
ret i32 0
}