[SimplifyCFG] use profile metadata to refine merging branch conditions

2nd try (original: 27ae17a6b0) with fix/test for crash. We must make
sure that TTI is available before trying to use it because it is not
required (might be another bug).

Original commit message:

This is one step towards solving:
https://llvm.org/PR49336

In that example, we disregard the recommended usage of builtin_expect,
so an expensive (unpredictable) branch is folded into another branch
that is guarding it.
Here, we read the profile metadata to see if the 1st (predecessor)
condition is likely to cause execution to bypass the 2nd (successor)
condition before merging conditions by using logic ops.

Differential Revision: https://reviews.llvm.org/D98898
This commit is contained in:
Sanjay Patel 2021-03-23 09:55:04 -04:00
parent ed0558a09d
commit 1bf8f9e228
4 changed files with 220 additions and 72 deletions

View File

@ -63,6 +63,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@ -2840,31 +2841,53 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}
// Determine if the two branches share a common destination,
// and deduce a glue that we need to use to join branch's conditions
// to arrive at the common destination.
/// Determine if the two branches share a common destination and deduce a glue
/// that joins the branches' conditions to arrive at the common destination if
/// that would be profitable.
static Optional<std::pair<Instruction::BinaryOps, bool>>
CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
const TargetTransformInfo *TTI) {
assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
"Both blocks must end with a conditional branches.");
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
"PredBB must be a predecessor of BB.");
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
return {{Instruction::Or, false}};
else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
return {{Instruction::And, false}};
else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
return {{Instruction::And, true}};
else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
return {{Instruction::Or, true}};
// We have the potential to fold the conditions together, but if the
// predecessor branch is predictable, we may not want to merge them.
uint64_t PTWeight, PFWeight;
BranchProbability PBITrueProb, Likely;
if (TTI && PBI->extractProfMetadata(PTWeight, PFWeight) &&
(PTWeight + PFWeight) != 0) {
PBITrueProb =
BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
Likely = TTI->getPredictableBranchThreshold();
}
if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
// Speculate the 2nd condition unless the 1st is probably true.
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
return {{Instruction::Or, false}};
} else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
// Speculate the 2nd condition unless the 1st is probably false.
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
return {{Instruction::And, false}};
} else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
// Speculate the 2nd condition unless the 1st is probably true.
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
return {{Instruction::And, true}};
} else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
// Speculate the 2nd condition unless the 1st is probably false.
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
return {{Instruction::Or, true}};
}
return None;
}
static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
bool PoisonSafe) {
bool PoisonSafe,
const TargetTransformInfo *TTI) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();
@ -2872,7 +2895,7 @@ static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
Instruction::BinaryOps Opc;
bool InvertPredCond;
std::tie(Opc, InvertPredCond) =
*CheckIfCondBranchesShareCommonDestination(BI, PBI);
*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
@ -3070,8 +3093,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Determine if the two branches share a common destination.
Instruction::BinaryOps Opc;
bool InvertPredCond;
if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
std::tie(Opc, InvertPredCond) = *Recepie;
if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
std::tie(Opc, InvertPredCond) = *Recipe;
else
continue;
@ -3088,7 +3111,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
continue;
}
return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe);
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, PoisonSafe,
TTI);
}
return Changed;
}

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s
; Loopsimplify should be able to merge the two loop exits
@ -7,42 +8,143 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
; CHECK-LABEL: @test1
; CHECK: bb:
; CHECK: phi i64
; CHECK-NOT: phi i64
; CHECK-NOT: sext
define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0
; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]]
; CHECK: bb.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64
; CHECK-NEXT: br label [[BB:%.*]]
; CHECK: bb:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4
; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4
; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]]
; CHECK: bb1.bb3_crit_edge:
; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
; CHECK-NEXT: ret float [[T13]]
;
entry:
%t0 = load float, float* %peakWeight, align 4
br label %bb1
%t0 = load float, float* %peakWeight, align 4
br label %bb1
bb: ; preds = %bb2
%t1 = sext i32 %hiPart.0 to i64
%t2 = getelementptr float, float* %pTmp1, i64 %t1
%t3 = load float, float* %t2, align 4
%t4 = fadd float %t3, %distERBhi.0
%t5 = add i32 %hiPart.0, 1
%t6 = sext i32 %t5 to i64
%t7 = getelementptr float, float* %peakWeight, i64 %t6
%t8 = load float, float* %t7, align 4
%t9 = fadd float %t8, %peakCount.0
br label %bb1
%t1 = sext i32 %hiPart.0 to i64
%t2 = getelementptr float, float* %pTmp1, i64 %t1
%t3 = load float, float* %t2, align 4
%t4 = fadd float %t3, %distERBhi.0
%t5 = add i32 %hiPart.0, 1
%t6 = sext i32 %t5 to i64
%t7 = getelementptr float, float* %peakWeight, i64 %t6
%t8 = load float, float* %t7, align 4
%t9 = fadd float %t8, %peakCount.0
br label %bb1
bb1: ; preds = %bb, %entry
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
br i1 %t10, label %bb3, label %bb2
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
br i1 %t10, label %bb3, label %bb2
bb2: ; preds = %bb1
%t11 = add i32 %bandEdgeIndex, -1
%t12 = icmp sgt i32 %t11, %hiPart.0
br i1 %t12, label %bb, label %bb3
%t11 = add i32 %bandEdgeIndex, -1
%t12 = icmp sgt i32 %t11, %hiPart.0
br i1 %t12, label %bb, label %bb3
bb3: ; preds = %bb2, %bb1
%t13 = fdiv float %peakCount.0, %distERBhi.0
ret float %t13
%t13 = fdiv float %peakCount.0, %distERBhi.0
ret float %t13
}
; Same test as above.
; This would crash because we assumed TTI was available to process the metadata.
define float @merge_branches_profile_metadata(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
; CHECK-LABEL: @merge_branches_profile_metadata(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[PEAKWEIGHT:%.*]], align 4
; CHECK-NEXT: [[T11:%.*]] = add i32 [[BANDEDGEINDEX:%.*]], -1
; CHECK-NEXT: [[T121:%.*]] = icmp sgt i32 [[T11]], 0
; CHECK-NEXT: br i1 [[T121]], label [[BB_LR_PH:%.*]], label [[BB3:%.*]], !prof !0
; CHECK: bb.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[T11]] to i64
; CHECK-NEXT: br label [[BB:%.*]]
; CHECK: bb:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[BB_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ]
; CHECK-NEXT: [[DISTERBHI_04:%.*]] = phi float [ 0.000000e+00, [[BB_LR_PH]] ], [ [[T4:%.*]], [[BB]] ]
; CHECK-NEXT: [[PEAKCOUNT_02:%.*]] = phi float [ [[T0]], [[BB_LR_PH]] ], [ [[T9:%.*]], [[BB]] ]
; CHECK-NEXT: [[T2:%.*]] = getelementptr float, float* [[PTMP1:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[T2]], align 4
; CHECK-NEXT: [[T4]] = fadd float [[T3]], [[DISTERBHI_04]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[T7:%.*]] = getelementptr float, float* [[PEAKWEIGHT]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[T8:%.*]] = load float, float* [[T7]], align 4
; CHECK-NEXT: [[T9]] = fadd float [[T8]], [[PEAKCOUNT_02]]
; CHECK-NEXT: [[T10:%.*]] = fcmp olt float [[T4]], 2.500000e+00
; CHECK-NEXT: [[T12:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[T10]], [[T12]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB]], label [[BB1_BB3_CRIT_EDGE:%.*]], !prof !0
; CHECK: bb1.bb3_crit_edge:
; CHECK-NEXT: [[T4_LCSSA:%.*]] = phi float [ [[T4]], [[BB]] ]
; CHECK-NEXT: [[T9_LCSSA:%.*]] = phi float [ [[T9]], [[BB]] ]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[PEAKCOUNT_0_LCSSA:%.*]] = phi float [ [[T9_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ [[T0]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[DISTERBHI_0_LCSSA:%.*]] = phi float [ [[T4_LCSSA]], [[BB1_BB3_CRIT_EDGE]] ], [ 0.000000e+00, [[ENTRY]] ]
; CHECK-NEXT: [[T13:%.*]] = fdiv float [[PEAKCOUNT_0_LCSSA]], [[DISTERBHI_0_LCSSA]]
; CHECK-NEXT: ret float [[T13]]
;
entry:
%t0 = load float, float* %peakWeight, align 4
br label %bb1
bb: ; preds = %bb2
%t1 = sext i32 %hiPart.0 to i64
%t2 = getelementptr float, float* %pTmp1, i64 %t1
%t3 = load float, float* %t2, align 4
%t4 = fadd float %t3, %distERBhi.0
%t5 = add i32 %hiPart.0, 1
%t6 = sext i32 %t5 to i64
%t7 = getelementptr float, float* %peakWeight, i64 %t6
%t8 = load float, float* %t7, align 4
%t9 = fadd float %t8, %peakCount.0
br label %bb1
bb1: ; preds = %bb, %entry
%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
br i1 %t10, label %bb3, label %bb2, !prof !0
bb2: ; preds = %bb1
%t11 = add i32 %bandEdgeIndex, -1
%t12 = icmp sgt i32 %t11, %hiPart.0
br i1 %t12, label %bb, label %bb3
bb3: ; preds = %bb2, %bb1
%t13 = fdiv float %peakCount.0, %distERBhi.0
ret float %t13
}
!0 = !{!"branch_weights", i32 2000, i32 1}

View File

@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
; CHECK-LABEL: @test_chr_14(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1
; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
; CHECK: entry.split.nonchr:
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[V1]], i1 [[V3_NONCHR]], i1 false
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16
; CHECK: bb0.nonchr:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB1]]
@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
; CHECK-NEXT: ], !prof !20
; CHECK-NEXT: ], !prof !19
; CHECK: bb2:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: call void @foo()
@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 {
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20
; CHECK: bb0:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20
; CHECK: bb2:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB3]]
@ -2550,4 +2551,3 @@ bb3:
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}

View File

@ -636,16 +636,17 @@ exit:
ret i32 %outval
}
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.
define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_harmful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -668,16 +669,17 @@ exit:
ret void
}
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.
define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -700,7 +702,8 @@ exit:
ret void
}
; The probability threshold is set by a builtin_expect setting.
; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.
define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_not_that_harmful(
@ -708,7 +711,7 @@ define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -731,13 +734,16 @@ exit:
ret void
}
; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.
define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_not_that_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -760,13 +766,15 @@ exit:
ret void
}
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_useful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -789,13 +797,15 @@ exit:
ret void
}
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_useful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 true, i1 [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -849,16 +859,17 @@ exit:
ret void
}
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.
define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_harmful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_FALSE]], i1 [[EXPENSIVE]], i1 false
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -881,16 +892,17 @@ exit:
ret void
}
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.
define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EXPECTED_TRUE]], i1 [[EXPENSIVE]], i1 false
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
@ -913,6 +925,9 @@ exit:
ret void
}
; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.
define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_not_that_harmful(
; CHECK-NEXT: entry:
@ -942,6 +957,9 @@ exit:
ret void
}
; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.
define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_not_that_harmful_inverted(
; CHECK-NEXT: entry:
@ -971,6 +989,8 @@ exit:
ret void
}
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_useful(
; CHECK-NEXT: entry:
@ -1000,6 +1020,8 @@ exit:
ret void
}
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_useful_inverted(
; CHECK-NEXT: entry: