forked from OSchip/llvm-project
[PGO][CHR] A bug fix.
Summary: Fix a transformation bug where two scopes share a common instrution to hoist. Reviewers: davidxl Reviewed By: davidxl Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61405 llvm-svn: 359736
This commit is contained in:
parent
42a3b4ff0e
commit
1620104034
|
@ -1424,7 +1424,8 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
|
|||
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
|
||||
HoistStopMapTy &HoistStopMap,
|
||||
DenseSet<Instruction *> &HoistedSet,
|
||||
DenseSet<PHINode *> &TrivialPHIs) {
|
||||
DenseSet<PHINode *> &TrivialPHIs,
|
||||
DominatorTree &DT) {
|
||||
auto IT = HoistStopMap.find(R);
|
||||
assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
|
||||
DenseSet<Instruction *> &HoistStops = IT->second;
|
||||
|
@ -1444,8 +1445,21 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
|
|||
// Already hoisted, return.
|
||||
return;
|
||||
assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
|
||||
assert(DT.getNode(I->getParent()) && "DT must contain I's block");
|
||||
assert(DT.getNode(HoistPoint->getParent()) &&
|
||||
"DT must contain HoistPoint block");
|
||||
if (DT.dominates(I, HoistPoint))
|
||||
// We are already above the hoist point. Stop here. This may be necessary
|
||||
// when multiple scopes would independently hoist the same
|
||||
// instruction. Since an outer (dominating) scope would hoist it to its
|
||||
// entry before an inner (dominated) scope would to its entry, the inner
|
||||
// scope may see the instruction already hoisted, in which case it
|
||||
// potentially wrong for the inner scope to hoist it and could cause bad
|
||||
// IR (non-dominating def), but safe to skip hoisting it instead because
|
||||
// it's already in a block that dominates the inner scope.
|
||||
return;
|
||||
for (Value *Op : I->operands()) {
|
||||
hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
|
||||
hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
|
||||
}
|
||||
I->moveBefore(HoistPoint);
|
||||
HoistedSet.insert(I);
|
||||
|
@ -1456,7 +1470,8 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
|
|||
// Hoist the dependent condition values of the branches and the selects in the
|
||||
// scope to the insert point.
|
||||
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
|
||||
DenseSet<PHINode *> &TrivialPHIs) {
|
||||
DenseSet<PHINode *> &TrivialPHIs,
|
||||
DominatorTree &DT) {
|
||||
DenseSet<Instruction *> HoistedSet;
|
||||
for (const RegInfo &RI : Scope->CHRRegions) {
|
||||
Region *R = RI.R;
|
||||
|
@ -1465,7 +1480,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
|
|||
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
|
||||
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
|
||||
hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
|
||||
HoistedSet, TrivialPHIs);
|
||||
HoistedSet, TrivialPHIs, DT);
|
||||
}
|
||||
for (SelectInst *SI : RI.Selects) {
|
||||
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
|
||||
|
@ -1473,7 +1488,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
|
|||
if (!(IsTrueBiased || IsFalseBiased))
|
||||
continue;
|
||||
hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
|
||||
HoistedSet, TrivialPHIs);
|
||||
HoistedSet, TrivialPHIs, DT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1707,7 +1722,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
|
|||
#endif
|
||||
|
||||
// Hoist the conditional values of the branches/selects.
|
||||
hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
|
||||
hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
|
||||
|
|
|
@ -1886,6 +1886,125 @@ bb4:
|
|||
ret i32 %v13
|
||||
}
|
||||
|
||||
; Test the case where two scopes share a common instruction to hoist (%cmp.i).
|
||||
; Two scopes would hoist it to their hoist points, but since the outer scope
|
||||
; hoists (entry/bb6-9) it first to its hoist point, it'd be wrong (causing bad
|
||||
; IR) for the inner scope (bb1-4) to hoist the same instruction to its hoist
|
||||
; point.
|
||||
; Roughly,
|
||||
; if (j != k) {
|
||||
; if (i != 2)
|
||||
; foo();
|
||||
; cmp.i = i == 86
|
||||
; if (!cmp.i)
|
||||
; foo();
|
||||
; if (j != i)
|
||||
; foo();
|
||||
; if (!cmp.i)
|
||||
; foo();
|
||||
; }
|
||||
; return 45;
|
||||
define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
|
||||
; CHECK-LABEL: @test_chr_21(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J:%.*]], [[K:%.*]]
|
||||
; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[J]], [[I:%.*]]
|
||||
; CHECK-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[I]], 86
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP0]], [[CMP3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP_I]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
|
||||
; CHECK: bb1:
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[I]], 2
|
||||
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
|
||||
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
|
||||
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
|
||||
; CHECK-NEXT: ], !prof !20
|
||||
; CHECK: bb2:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB7:%.*]]
|
||||
; CHECK: bb2.nonchr1:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB3_NONCHR2]]
|
||||
; CHECK: bb3.nonchr2:
|
||||
; CHECK-NEXT: br i1 [[CMP_I]], label [[BB4_NONCHR3:%.*]], label [[BB7]], !prof !18
|
||||
; CHECK: bb4.nonchr3:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB7]]
|
||||
; CHECK: bb7:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB10:%.*]]
|
||||
; CHECK: entry.split.nonchr:
|
||||
; CHECK-NEXT: br i1 [[CMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof !18
|
||||
; CHECK: bb1.nonchr:
|
||||
; CHECK-NEXT: [[CMP2_NONCHR:%.*]] = icmp eq i64 [[I]], 2
|
||||
; CHECK-NEXT: br i1 [[CMP2_NONCHR]], label [[BB3_NONCHR:%.*]], label [[BB2_NONCHR:%.*]], !prof !16
|
||||
; CHECK: bb3.nonchr:
|
||||
; CHECK-NEXT: [[CMP_I_NONCHR:%.*]] = icmp eq i64 [[I]], 86
|
||||
; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB6_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16
|
||||
; CHECK: bb6.nonchr:
|
||||
; CHECK-NEXT: [[CMP3_NONCHR:%.*]] = icmp eq i64 [[J]], [[I]]
|
||||
; CHECK-NEXT: br i1 [[CMP3_NONCHR]], label [[BB8_NONCHR:%.*]], label [[BB7_NONCHR:%.*]], !prof !16
|
||||
; CHECK: bb8.nonchr:
|
||||
; CHECK-NEXT: br i1 [[CMP_I_NONCHR]], label [[BB10]], label [[BB9_NONCHR:%.*]], !prof !16
|
||||
; CHECK: bb9.nonchr:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB10]]
|
||||
; CHECK: bb7.nonchr:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB8_NONCHR]]
|
||||
; CHECK: bb4.nonchr:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB6_NONCHR]]
|
||||
; CHECK: bb2.nonchr:
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: br label [[BB3_NONCHR]]
|
||||
; CHECK: bb10:
|
||||
; CHECK-NEXT: ret i32 45
|
||||
;
|
||||
entry:
|
||||
%cmp0 = icmp eq i64 %j, %k
|
||||
br i1 %cmp0, label %bb10, label %bb1, !prof !15
|
||||
|
||||
bb1:
|
||||
%cmp2 = icmp eq i64 %i, 2
|
||||
br i1 %cmp2, label %bb3, label %bb2, !prof !15
|
||||
|
||||
bb2:
|
||||
call void @foo()
|
||||
br label %bb3
|
||||
|
||||
bb3:
|
||||
%cmp.i = icmp eq i64 %i, 86
|
||||
br i1 %cmp.i, label %bb5, label %bb4, !prof !15
|
||||
|
||||
bb4:
|
||||
call void @foo()
|
||||
br label %bb5
|
||||
|
||||
bb5:
|
||||
br label %bb6
|
||||
|
||||
bb6:
|
||||
%cmp3 = icmp eq i64 %j, %i
|
||||
br i1 %cmp3, label %bb8, label %bb7, !prof !15
|
||||
|
||||
bb7:
|
||||
call void @foo()
|
||||
br label %bb8
|
||||
|
||||
bb8:
|
||||
br i1 %cmp.i, label %bb10, label %bb9, !prof !15
|
||||
|
||||
bb9:
|
||||
call void @foo()
|
||||
br label %bb10
|
||||
|
||||
bb10:
|
||||
ret i32 45
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
|
|
Loading…
Reference in New Issue