Revert "[MBP] Reduce code size by running tail merging in MBP."

This reverts commit r271930, r271915, r271923.  They break a thumb selfhosting
bot.

llvm-svn: 272017
This commit is contained in:
Haicheng Wu 2016-06-07 15:17:21 +00:00
parent 15c6ab5fac
commit 4fa9f3ae45
6 changed files with 35 additions and 185 deletions

View File

@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@ -100,9 +99,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
BranchFolder::MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineBranchProbabilityInfo>());
return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
MF.getSubtarget().getRegisterInfo(),
@ -110,7 +108,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBlockFrequencyInfo &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo)
: EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
MBPI(ProbInfo) {
@ -138,8 +136,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
FuncletMembership.erase(MBB);
if (MLI)
MLI->removeBlock(MBB);
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@ -196,22 +192,18 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
}
/// OptimizeFunction - Perhaps branch folding, tail merging and other
/// CFG optimizations on the given function. Block placement changes the layout
/// and may create new tail merging opportunities.
/// CFG optimizations on the given function.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
MachineModuleInfo *mmi,
MachineLoopInfo *mli, bool AfterPlacement) {
MachineModuleInfo *mmi) {
if (!tii) return false;
TriedMerging.clear();
AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
MMI = mmi;
MLI = mli;
RS = nullptr;
// Use a RegScavenger to help update liveness when required.
@ -237,10 +229,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = TailMergeBlocks(MF);
// No need to clean up if tail merging does not change anything after the
// block placement.
if (!AfterBlockPlacement || MadeChangeThisIteration)
MadeChangeThisIteration |= OptimizeBranches(MF);
MadeChangeThisIteration |= OptimizeBranches(MF);
if (EnableHoistCommonCode)
MadeChangeThisIteration |= HoistCommonCode(MF);
MadeChange |= MadeChangeThisIteration;
@ -457,11 +446,6 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// NewMBB belongs to the same loop as CurMBB.
if (MLI)
if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
// NewMBB inherits CurMBB's block frequency.
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
@ -556,18 +540,6 @@ void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
MergedBBFreq[MBB] = F;
}
raw_ostream &
BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const {
return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
}
raw_ostream &
BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const {
return MBFI.printBlockFreq(OS, Freq);
}
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@ -949,28 +921,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return MadeChange;
// First find blocks with no successors.
// Block placement does not create new tail merging opportunities for these
// blocks.
if (!AfterBlockPlacement) {
MergePotentials.clear();
for (MachineBasicBlock &MBB : MF) {
if (MergePotentials.size() == TailMergeThreshold)
break;
if (!TriedMerging.count(&MBB) && MBB.succ_empty())
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
}
// If this is a large problem, avoid visiting the same basic blocks
// multiple times.
MergePotentials.clear();
for (MachineBasicBlock &MBB : MF) {
if (MergePotentials.size() == TailMergeThreshold)
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
TriedMerging.insert(MergePotentials[i].getBlock());
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
break;
if (!TriedMerging.count(&MBB) && MBB.succ_empty())
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
}
// If this is a large problem, avoid visiting the same basic blocks
// multiple times.
if (MergePotentials.size() == TailMergeThreshold)
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
TriedMerging.insert(MergePotentials[i].getBlock());
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
@ -1016,17 +984,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->hasEHPadSuccessor())
continue;
// Bail out if the loop header (IBB) is not the top of the loop chain
// after the block placement. Otherwise, the common tail of IBB's
// predecessors may become the loop top if block placement is called again
// and the predecessors may branch to this common tail.
// FIXME: Relaxed this check if the algorithm of finding loop top is
// changed in MBP.
if (AfterBlockPlacement && MLI)
if (MachineLoop *ML = MLI->getLoopFor(IBB))
if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
continue;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {

View File

@ -20,24 +20,20 @@ namespace llvm {
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineModuleInfo;
class MachineLoopInfo;
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
class MBFIWrapper;
explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &MBFI,
const MachineBlockFrequencyInfo &MBFI,
const MachineBranchProbabilityInfo &MBPI);
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
MachineLoopInfo *mli = nullptr,
bool AfterPlacement = false);
bool OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
MachineModuleInfo *mmi);
private:
class MergePotentialsElt {
unsigned Hash;
@ -95,16 +91,13 @@ namespace llvm {
};
std::vector<SameTailElt> SameTails;
bool AfterBlockPlacement;
bool EnableTailMerge;
bool EnableHoistCommonCode;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
MachineLoopInfo *MLI;
RegScavenger *RS;
public:
/// \brief This class keeps track of branch frequencies of newly created
/// blocks and tail-merged blocks.
class MBFIWrapper {
@ -112,18 +105,13 @@ namespace llvm {
MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
raw_ostream &printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const;
private:
const MachineBlockFrequencyInfo &MBFI;
DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
};
private:
MBFIWrapper &MBBFreqInfo;
MBFIWrapper MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
bool TailMergeBlocks(MachineFunction &MF);

View File

@ -163,6 +163,7 @@ namespace {
const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
MachineRegisterInfo *MRI;
@ -290,7 +291,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = ST.getTargetLowering();
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
SchedModel.init(ST.getSchedModel(), &ST, TII);
@ -302,7 +303,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, MBFI, *MBPI);
BranchFolder BF(true, false, *MBFI, *MBPI);
BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@ -426,7 +427,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false, false, MBFI, *MBPI);
BranchFolder BF(false, false, *MBFI, *MBPI);
BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}

View File

@ -26,8 +26,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "BranchFolding.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@ -118,12 +116,6 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
static cl::opt<bool>
BranchFoldPlacement("branch-fold-placement",
cl::desc("Perform branch folding during placement. "
"Reduces code size."),
cl::init(true), cl::Hidden);
extern cl::opt<unsigned> StaticLikelyProb;
namespace {
@ -240,10 +232,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI;
/// \brief A handle to the function-wide block frequency pass.
std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
const MachineBlockFrequencyInfo *MBFI;
/// \brief A handle to the loop info.
MachineLoopInfo *MLI;
const MachineLoopInfo *MLI;
/// \brief A handle to the target's instruction info.
const TargetInstrInfo *TII;
@ -331,7 +323,6 @@ public:
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@ -1471,8 +1462,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
return false;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfo>());
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
@ -1480,29 +1470,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
assert(BlockToChain.empty());
buildCFGChains(F);
// Changing the layout can create new tail merging opportunities.
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structurized CFG.
bool EnableTailMerge = !F.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge() &&
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (F.size() > 3 && EnableTailMerge) {
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI);
if (BF.OptimizeFunction(F, TII, F.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
ChainAllocator.DestroyAll();
buildCFGChains(F);
}
}
optimizeBranches(F);
alignBlocks(F);

View File

@ -1,63 +0,0 @@
; RUN: llc <%s -march=aarch64 | FileCheck %s
; CHECK-LABEL: test:
; CHECK: LBB0_7:
; CHECK: b.hi
; CHECK-NEXT: b
; CHECK-NEXT: LBB0_8:
; CHECK-NEXT: mov x8, x9
; CHECK-NEXT: LBB0_9:
define i64 @test(i64 %n, i64* %a, i64* %b, i64* %c, i64* %d, i64* %e, i64* %f) {
entry:
%cmp28 = icmp sgt i64 %n, 1
br i1 %cmp28, label %for.body, label %for.end
for.body: ; preds = %for.body.lr.ph, %if.end
%j = phi i64 [ %n, %entry ], [ %div, %if.end ]
%div = lshr i64 %j, 1
%a.arrayidx = getelementptr inbounds i64, i64* %a, i64 %div
%a.j = load i64, i64* %a.arrayidx
%b.arrayidx = getelementptr inbounds i64, i64* %b, i64 %div
%b.j = load i64, i64* %b.arrayidx
%cmp.i = icmp slt i64 %a.j, %b.j
br i1 %cmp.i, label %for.end.loopexit, label %cond.false.i
cond.false.i: ; preds = %for.body
%cmp4.i = icmp sgt i64 %a.j, %b.j
br i1 %cmp4.i, label %if.end, label %cond.false6.i
cond.false6.i: ; preds = %cond.false.i
%c.arrayidx = getelementptr inbounds i64, i64* %c, i64 %div
%c.j = load i64, i64* %c.arrayidx
%d.arrayidx = getelementptr inbounds i64, i64* %d, i64 %div
%d.j = load i64, i64* %d.arrayidx
%cmp9.i = icmp slt i64 %c.j, %d.j
br i1 %cmp9.i, label %for.end.loopexit, label %cond.false11.i
cond.false11.i: ; preds = %cond.false6.i
%cmp14.i = icmp sgt i64 %c.j, %d.j
br i1 %cmp14.i, label %if.end, label %cond.false12.i
cond.false12.i: ; preds = %cond.false11.i
%e.arrayidx = getelementptr inbounds i64, i64* %e, i64 %div
%e.j = load i64, i64* %e.arrayidx
%f.arrayidx = getelementptr inbounds i64, i64* %f, i64 %div
%f.j = load i64, i64* %f.arrayidx
%cmp19.i = icmp sgt i64 %e.j, %f.j
br i1 %cmp19.i, label %if.end, label %for.end.loopexit
if.end: ; preds = %cond.false12.i, %cond.false11.i, %cond.false.i
%cmp = icmp ugt i64 %j, 3
br i1 %cmp, label %for.body, label %for.end.loopexit
for.end.loopexit: ; preds = %cond.false12.i, %cond.false6.i, %for.body, %if.end
%j.0.lcssa.ph = phi i64 [ %j, %cond.false12.i ], [ %j, %cond.false6.i ], [ %j, %for.body ], [ %div, %if.end ]
br label %for.end
for.end: ; preds = %for.end.loopexit, %entry
%j.0.lcssa = phi i64 [ %n, %entry ], [ %j.0.lcssa.ph, %for.end.loopexit ]
%j.2 = add i64 %j.0.lcssa, %n
%j.3 = mul i64 %j.2, %n
%j.4 = add i64 %j.3, 10
ret i64 %j.4
}

View File

@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse
; V8-NEXT: beq
; V8-NEXT: %tailrecurse.switch
; V8: cmp
; V8-NEXT: beq
; V8-NEXT: bne
; V8-NEXT: b
; The trailing space in the last line checks that the branch is unconditional
switch i32 %and, label %sw.epilog [