forked from OSchip/llvm-project
[PGO][PGSO] Handle MBFIWrapper
Some code gen passes use MBFIWrapper to keep track of the frequency of new blocks. This was not taken into account and could lead to incorrect frequencies as MBFI silently returns zero frequency for unknown/new blocks. Add a variant for MBFIWrapper in the PGSO query interface. Depends on D73494.
This commit is contained in:
parent
2a1b5af299
commit
ac8da31a0f
|
@ -21,6 +21,7 @@ class ProfileSummaryInfo;
|
|||
class MachineBasicBlock;
|
||||
class MachineBlockFrequencyInfo;
|
||||
class MachineFunction;
|
||||
class MBFIWrapper;
|
||||
|
||||
/// Returns true if machine function \p MF is suggested to be size-optimized
|
||||
/// based on the profile.
|
||||
|
@ -33,6 +34,12 @@ bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
|
|||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
PGSOQueryType QueryType = PGSOQueryType::Other);
|
||||
/// Returns true if machine basic block \p MBB is suggested to be size-optimized
|
||||
/// based on the profile.
|
||||
bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
|
||||
ProfileSummaryInfo *PSI,
|
||||
MBFIWrapper *MBFIWrapper,
|
||||
PGSOQueryType QueryType = PGSOQueryType::Other);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MBFIWrapper.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
@ -42,7 +43,7 @@ class TailDuplicator {
|
|||
const MachineModuleInfo *MMI;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineFunction *MF;
|
||||
const MachineBlockFrequencyInfo *MBFI;
|
||||
MBFIWrapper *MBFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
bool PreRegAlloc;
|
||||
bool LayoutMode;
|
||||
|
@ -69,7 +70,7 @@ public:
|
|||
/// default implies using the command line value TailDupSize.
|
||||
void initMF(MachineFunction &MF, bool PreRegAlloc,
|
||||
const MachineBranchProbabilityInfo *MBPI,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
MBFIWrapper *MBFI,
|
||||
ProfileSummaryInfo *PSI,
|
||||
bool LayoutMode, unsigned TailDupSize = 0);
|
||||
|
||||
|
|
|
@ -63,10 +63,9 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
|
|||
F, PSI, *BFI);
|
||||
}
|
||||
|
||||
template<typename AdapterT, typename BlockT, typename BFIT>
|
||||
bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
|
||||
template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
|
||||
bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
|
||||
BFIT *BFI, PGSOQueryType QueryType) {
|
||||
assert(BB);
|
||||
if (!PSI || !BFI || !PSI->hasProfileSummary())
|
||||
return false;
|
||||
if (ForcePGSO)
|
||||
|
@ -81,11 +80,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
|
|||
if (PGSOColdCodeOnly ||
|
||||
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
|
||||
// Even if the working set size isn't large, size-optimize cold code.
|
||||
return AdapterT::isColdBlock(BB, PSI, BFI);
|
||||
return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
|
||||
}
|
||||
return !AdapterT::isHotBlockNthPercentile(
|
||||
PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
|
||||
BB, PSI, BFI);
|
||||
BBOrBlockFreq, PSI, BFI);
|
||||
}
|
||||
|
||||
/// Returns true if function \p F is suggested to be size-optimized based on the
|
||||
|
|
|
@ -655,8 +655,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
|
|||
MachineFunction *MF = MBB1->getParent();
|
||||
bool OptForSize =
|
||||
MF->getFunction().hasOptSize() ||
|
||||
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
|
||||
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
|
||||
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
|
||||
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
|
||||
return EffectiveTailLen >= 2 && OptForSize &&
|
||||
(FullBlockTail1 || FullBlockTail2);
|
||||
}
|
||||
|
@ -1511,7 +1511,7 @@ ReoptimizeBlock:
|
|||
|
||||
bool OptForSize =
|
||||
MF.getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
|
||||
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
|
||||
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
|
||||
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
|
||||
// direction, thereby defeating careful block placement and regressing
|
||||
|
|
|
@ -2082,8 +2082,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
|
|||
// In practice this never happens though: there always seems to be a preheader
|
||||
// that can fallthrough and that is also placed before the header.
|
||||
bool OptForSize = F->getFunction().hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L.getHeader(), PSI,
|
||||
&MBFI->getMBFI());
|
||||
llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
|
||||
if (OptForSize)
|
||||
return L.getHeader();
|
||||
|
||||
|
@ -2841,7 +2840,7 @@ void MachineBlockPlacement::alignBlocks() {
|
|||
continue;
|
||||
|
||||
// If the global profiles indicates so, don't align it.
|
||||
if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
|
||||
if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
|
||||
!TLI->alignLoopsWithOptSize())
|
||||
continue;
|
||||
|
||||
|
@ -3088,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (OptForSize)
|
||||
TailDupSize = 1;
|
||||
bool PreRegAlloc = false;
|
||||
TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
|
||||
TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
|
||||
/* LayoutMode */ true, TailDupSize);
|
||||
precomputeTriangleChains();
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||
#include "llvm/CodeGen/MBFIWrapper.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
|
||||
|
@ -33,6 +34,13 @@ bool isColdBlock(const MachineBasicBlock *MBB,
|
|||
return Count && PSI->isColdCount(*Count);
|
||||
}
|
||||
|
||||
bool isColdBlock(BlockFrequency BlockFreq,
|
||||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI) {
|
||||
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
|
||||
return Count && PSI->isColdCount(*Count);
|
||||
}
|
||||
|
||||
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
|
||||
static bool isHotBlockNthPercentile(int PercentileCutoff,
|
||||
const MachineBasicBlock *MBB,
|
||||
|
@ -42,6 +50,14 @@ static bool isHotBlockNthPercentile(int PercentileCutoff,
|
|||
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
|
||||
}
|
||||
|
||||
static bool isHotBlockNthPercentile(int PercentileCutoff,
|
||||
BlockFrequency BlockFreq,
|
||||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI) {
|
||||
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
|
||||
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
|
||||
}
|
||||
|
||||
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
|
||||
/// MachineFunction.
|
||||
bool isFunctionColdInCallGraph(
|
||||
|
@ -95,6 +111,11 @@ struct MachineBasicBlockBFIAdapter {
|
|||
const MachineBlockFrequencyInfo *MBFI) {
|
||||
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
|
||||
}
|
||||
static bool isColdBlock(BlockFrequency BlockFreq,
|
||||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI) {
|
||||
return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
|
||||
}
|
||||
static bool isHotBlockNthPercentile(int CutOff,
|
||||
const MachineBasicBlock *MBB,
|
||||
ProfileSummaryInfo *PSI,
|
||||
|
@ -102,6 +123,13 @@ struct MachineBasicBlockBFIAdapter {
|
|||
return machine_size_opts_detail::isHotBlockNthPercentile(
|
||||
CutOff, MBB, PSI, MBFI);
|
||||
}
|
||||
static bool isHotBlockNthPercentile(int CutOff,
|
||||
BlockFrequency BlockFreq,
|
||||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI) {
|
||||
return machine_size_opts_detail::isHotBlockNthPercentile(
|
||||
CutOff, BlockFreq, PSI, MBFI);
|
||||
}
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
|
@ -117,6 +145,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
|
|||
ProfileSummaryInfo *PSI,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
PGSOQueryType QueryType) {
|
||||
assert(MBB);
|
||||
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
|
||||
MBB, PSI, MBFI, QueryType);
|
||||
}
|
||||
|
||||
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
|
||||
ProfileSummaryInfo *PSI,
|
||||
MBFIWrapper *MBFIW,
|
||||
PGSOQueryType QueryType) {
|
||||
assert(MBB);
|
||||
if (!PSI || !MBFIW)
|
||||
return false;
|
||||
BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
|
||||
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
|
||||
BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ namespace {
|
|||
|
||||
class TailDuplicateBase : public MachineFunctionPass {
|
||||
TailDuplicator Duplicator;
|
||||
std::unique_ptr<MBFIWrapper> MBFIW;
|
||||
bool PreRegAlloc;
|
||||
public:
|
||||
TailDuplicateBase(char &PassID, bool PreRegAlloc)
|
||||
|
@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
|
|||
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
|
||||
if (MBFI)
|
||||
MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
|
||||
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
|
||||
/*LayoutMode=*/false);
|
||||
|
||||
bool MadeChange = false;
|
||||
while (Duplicator.tailDuplicateBlocks())
|
||||
|
|
|
@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
|
|||
|
||||
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
|
||||
const MachineBranchProbabilityInfo *MBPIin,
|
||||
const MachineBlockFrequencyInfo *MBFIin,
|
||||
MBFIWrapper *MBFIin,
|
||||
ProfileSummaryInfo *PSIin,
|
||||
bool LayoutModeIn, unsigned TailDupSizeIn) {
|
||||
MF = &MFin;
|
||||
|
|
|
@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
|
|||
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI,
|
||||
PGSOQueryType QueryType) {
|
||||
assert(BB);
|
||||
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
|
||||
QueryType);
|
||||
}
|
||||
|
|
|
@ -846,6 +846,72 @@ cont4:
|
|||
ret void
|
||||
}
|
||||
|
||||
; This triggers a situation where a new block (bb4 is split) is created and then
|
||||
; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
|
||||
@GV = global i32 0
|
||||
define void @bfi_new_block_pgso(i32 %c) nounwind {
|
||||
; CHECK-LABEL: bfi_new_block_pgso:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: je .LBB14_4
|
||||
; CHECK-NEXT: # %bb.1: # %bb1
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: cmpl $16, %edi
|
||||
; CHECK-NEXT: je .LBB14_6
|
||||
; CHECK-NEXT: # %bb.2: # %bb1
|
||||
; CHECK-NEXT: cmpl $17, %edi
|
||||
; CHECK-NEXT: je .LBB14_7
|
||||
; CHECK-NEXT: # %bb.3: # %bb4
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
||||
; CHECK-NEXT: .LBB14_4: # %bb5
|
||||
; CHECK-NEXT: cmpl $128, %edi
|
||||
; CHECK-NEXT: jne .LBB14_8
|
||||
; CHECK-NEXT: # %bb.5: # %return
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB14_6: # %bb3
|
||||
; CHECK-NEXT: movl $0, {{.*}}(%rip)
|
||||
; CHECK-NEXT: .LBB14_7: # %bb4
|
||||
; CHECK-NEXT: callq func
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: .LBB14_8: # %bb6
|
||||
; CHECK-NEXT: jmp tail_call_me # TAILCALL
|
||||
entry:
|
||||
%0 = icmp eq i32 %c, 0
|
||||
br i1 %0, label %bb5, label %bb1
|
||||
|
||||
bb1:
|
||||
switch i32 %c, label %bb4 [
|
||||
i32 16, label %bb3
|
||||
i32 17, label %bb2
|
||||
]
|
||||
|
||||
bb2:
|
||||
call void @func()
|
||||
br label %bb4
|
||||
|
||||
bb3:
|
||||
store i32 0, i32* @GV
|
||||
call void @func()
|
||||
br label %bb4
|
||||
|
||||
bb4:
|
||||
tail call void @tail_call_me()
|
||||
br label %return
|
||||
|
||||
bb5:
|
||||
switch i32 %c, label %bb6 [
|
||||
i32 128, label %return
|
||||
]
|
||||
|
||||
bb6:
|
||||
tail call void @tail_call_me()
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
|
|
Loading…
Reference in New Issue