[PGO][PGSO] Handle MBFIWrapper

Some code gen passes use MBFIWrapper to keep track of the frequency of new
blocks. This was not taken into account and could lead to incorrect frequencies
as MBFI silently returns zero frequency for unknown/new blocks.

Add a variant for MBFIWrapper in the PGSO query interface.

Depends on D73494.
This commit is contained in:
Hiroshi Yamauchi 2020-01-29 09:36:31 -08:00
parent 2a1b5af299
commit ac8da31a0f
10 changed files with 134 additions and 16 deletions

View File

@ -21,6 +21,7 @@ class ProfileSummaryInfo;
class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineFunction;
class MBFIWrapper;
/// Returns true if machine function \p MF is suggested to be size-optimized
/// based on the profile.
@ -33,6 +34,12 @@ bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType = PGSOQueryType::Other);
/// Returns true if machine basic block \p MBB is suggested to be size-optimized
/// based on the profile.
bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
MBFIWrapper *MBFIWrapper,
PGSOQueryType QueryType = PGSOQueryType::Other);
} // end namespace llvm

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include <utility>
#include <vector>
@ -42,7 +43,7 @@ class TailDuplicator {
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
const MachineBlockFrequencyInfo *MBFI;
MBFIWrapper *MBFI;
ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
@ -69,7 +70,7 @@ public:
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
const MachineBlockFrequencyInfo *MBFI,
MBFIWrapper *MBFI,
ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);

View File

@ -63,10 +63,9 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
F, PSI, *BFI);
}
template<typename AdapterT, typename BlockT, typename BFIT>
bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
BFIT *BFI, PGSOQueryType QueryType) {
assert(BB);
if (!PSI || !BFI || !PSI->hasProfileSummary())
return false;
if (ForcePGSO)
@ -81,11 +80,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
if (PGSOColdCodeOnly ||
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
// Even if the working set size isn't large, size-optimize cold code.
return AdapterT::isColdBlock(BB, PSI, BFI);
return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
}
return !AdapterT::isHotBlockNthPercentile(
PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
BB, PSI, BFI);
BBOrBlockFreq, PSI, BFI);
}
/// Returns true if function \p F is suggested to be size-optimized based on the

View File

@ -655,8 +655,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineFunction *MF = MBB1->getParent();
bool OptForSize =
MF->getFunction().hasOptSize() ||
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
return EffectiveTailLen >= 2 && OptForSize &&
(FullBlockTail1 || FullBlockTail2);
}
@ -1511,7 +1511,7 @@ ReoptimizeBlock:
bool OptForSize =
MF.getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing

View File

@ -2082,8 +2082,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
bool OptForSize = F->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(L.getHeader(), PSI,
&MBFI->getMBFI());
llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
if (OptForSize)
return L.getHeader();
@ -2841,7 +2840,7 @@ void MachineBlockPlacement::alignBlocks() {
continue;
// If the global profiles indicates so, don't align it.
if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
!TLI->alignLoopsWithOptSize())
continue;
@ -3088,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
/* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@ -33,6 +34,13 @@ bool isColdBlock(const MachineBasicBlock *MBB,
return Count && PSI->isColdCount(*Count);
}
bool isColdBlock(BlockFrequency BlockFreq,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI) {
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
return Count && PSI->isColdCount(*Count);
}
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
static bool isHotBlockNthPercentile(int PercentileCutoff,
const MachineBasicBlock *MBB,
@ -42,6 +50,14 @@ static bool isHotBlockNthPercentile(int PercentileCutoff,
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
}
static bool isHotBlockNthPercentile(int PercentileCutoff,
BlockFrequency BlockFreq,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI) {
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
}
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
/// MachineFunction.
bool isFunctionColdInCallGraph(
@ -95,6 +111,11 @@ struct MachineBasicBlockBFIAdapter {
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
}
static bool isColdBlock(BlockFrequency BlockFreq,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
}
static bool isHotBlockNthPercentile(int CutOff,
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
@ -102,6 +123,13 @@ struct MachineBasicBlockBFIAdapter {
return machine_size_opts_detail::isHotBlockNthPercentile(
CutOff, MBB, PSI, MBFI);
}
static bool isHotBlockNthPercentile(int CutOff,
BlockFrequency BlockFreq,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isHotBlockNthPercentile(
CutOff, BlockFreq, PSI, MBFI);
}
};
} // end anonymous namespace
@ -117,6 +145,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
assert(MBB);
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
MBB, PSI, MBFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
MBFIWrapper *MBFIW,
PGSOQueryType QueryType) {
assert(MBB);
if (!PSI || !MBFIW)
return false;
BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
}

View File

@ -31,6 +31,7 @@ namespace {
class TailDuplicateBase : public MachineFunctionPass {
TailDuplicator Duplicator;
std::unique_ptr<MBFIWrapper> MBFIW;
bool PreRegAlloc;
public:
TailDuplicateBase(char &PassID, bool PreRegAlloc)
@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
if (MBFI)
MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
/*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())

View File

@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
const MachineBlockFrequencyInfo *MBFIin,
MBFIWrapper *MBFIin,
ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;

View File

@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
assert(BB);
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
QueryType);
}

View File

@ -846,6 +846,72 @@ cont4:
ret void
}
; This triggers a situation where a new block (bb4 is split) is created and then
; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
@GV = global i32 0
define void @bfi_new_block_pgso(i32 %c) nounwind {
; CHECK-LABEL: bfi_new_block_pgso:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je .LBB14_4
; CHECK-NEXT: # %bb.1: # %bb1
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: cmpl $16, %edi
; CHECK-NEXT: je .LBB14_6
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: cmpl $17, %edi
; CHECK-NEXT: je .LBB14_7
; CHECK-NEXT: # %bb.3: # %bb4
; CHECK-NEXT: popq %rax
; CHECK-NEXT: jmp tail_call_me # TAILCALL
; CHECK-NEXT: .LBB14_4: # %bb5
; CHECK-NEXT: cmpl $128, %edi
; CHECK-NEXT: jne .LBB14_8
; CHECK-NEXT: # %bb.5: # %return
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB14_6: # %bb3
; CHECK-NEXT: movl $0, {{.*}}(%rip)
; CHECK-NEXT: .LBB14_7: # %bb4
; CHECK-NEXT: callq func
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB14_8: # %bb6
; CHECK-NEXT: jmp tail_call_me # TAILCALL
entry:
%0 = icmp eq i32 %c, 0
br i1 %0, label %bb5, label %bb1
bb1:
switch i32 %c, label %bb4 [
i32 16, label %bb3
i32 17, label %bb2
]
bb2:
call void @func()
br label %bb4
bb3:
store i32 0, i32* @GV
call void @func()
br label %bb4
bb4:
tail call void @tail_call_me()
br label %return
bb5:
switch i32 %c, label %bb6 [
i32 128, label %return
]
bb6:
tail call void @tail_call_me()
br label %return
return:
ret void
}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}