diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 63ffc1fbb8fd..c4c060fba2e9 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -100,9 +99,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder::MBFIWrapper MBBFreqInfo( - getAnalysis()); - BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, + getAnalysis(), getAnalysis()); return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), @@ -110,7 +108,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, - MBFIWrapper &FreqInfo, + const MachineBlockFrequencyInfo &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo) : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { @@ -138,8 +136,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MF->erase(MBB); FuncletMembership.erase(MBB); - if (MLI) - MLI->removeBlock(MBB); } /// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def @@ -196,22 +192,18 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { } /// OptimizeFunction - Perhaps branch folding, tail merging and other -/// CFG optimizations on the given function. Block placement changes the layout -/// and may create new tail merging opportunities. +/// CFG optimizations on the given function. bool BranchFolder::OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, - MachineModuleInfo *mmi, - MachineLoopInfo *mli, bool AfterPlacement) { + MachineModuleInfo *mmi) { if (!tii) return false; TriedMerging.clear(); - AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; - MLI = mli; RS = nullptr; // Use a RegScavenger to help update liveness when required. @@ -237,10 +229,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = TailMergeBlocks(MF); - // No need to clean up if tail merging does not change anything after the - // block placement. - if (!AfterBlockPlacement || MadeChangeThisIteration) - MadeChangeThisIteration |= OptimizeBranches(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); if (EnableHoistCommonCode) MadeChangeThisIteration |= HoistCommonCode(MF); MadeChange |= MadeChangeThisIteration; @@ -457,11 +446,6 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); - // NewMBB belongs to the same loop as CurMBB. - if (MLI) - if (MachineLoop *ML = MLI->getLoopFor(&CurMBB)) - ML->addBasicBlockToLoop(NewMBB, MLI->getBase()); - // NewMBB inherits CurMBB's block frequency. MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); @@ -556,18 +540,6 @@ void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, MergedBBFreq[MBB] = F; } -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const { - return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); -} - -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const { - return MBFI.printBlockFreq(OS, Freq); -} - /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -949,28 +921,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. - // Block placement does not create new tail merging opportunities for these - // blocks. - if (!AfterBlockPlacement) { - MergePotentials.clear(); - for (MachineBasicBlock &MBB : MF) { - if (MergePotentials.size() == TailMergeThreshold) - break; - if (!TriedMerging.count(&MBB) && MBB.succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); - } - - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. + MergePotentials.clear(); + for (MachineBasicBlock &MBB : MF) { if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); - - // See if we can do any tail merging on those. - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); } + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor @@ -1016,17 +984,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (PBB->hasEHPadSuccessor()) continue; - // Bail out if the loop header (IBB) is not the top of the loop chain - // after the block placement. Otherwise, the common tail of IBB's - // predecessors may become the loop top if block placement is called again - // and the predecessors may branch to this common tail. - // FIXME: Relaxed this check if the algorithm of finding loop top is - // changed in MBP. - if (AfterBlockPlacement && MLI) - if (MachineLoop *ML = MLI->getLoopFor(IBB)) - if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB)) - continue; - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index f7040990f131..d759d53e27f2 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -20,24 +20,20 @@ namespace llvm { class MachineBranchProbabilityInfo; class MachineFunction; class MachineModuleInfo; - class MachineLoopInfo; class RegScavenger; class TargetInstrInfo; class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: - class MBFIWrapper; - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, - MBFIWrapper &MBFI, + const MachineBlockFrequencyInfo &MBFI, const MachineBranchProbabilityInfo &MBPI); - bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, MachineModuleInfo *mmi, - MachineLoopInfo *mli = nullptr, - bool AfterPlacement = false); - + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); private: class MergePotentialsElt { unsigned Hash; @@ -95,16 +91,13 @@ namespace llvm { }; std::vector SameTails; - bool AfterBlockPlacement; bool EnableTailMerge; bool EnableHoistCommonCode; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; - MachineLoopInfo *MLI; RegScavenger *RS; - public: /// \brief This class keeps track of branch frequencies of newly created /// blocks and tail-merged blocks. class MBFIWrapper { @@ -112,18 +105,13 @@ namespace llvm { MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); - raw_ostream &printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const; private: const MachineBlockFrequencyInfo &MBFI; DenseMap MergedBBFreq; }; - private: - MBFIWrapper &MBBFreqInfo; + MBFIWrapper MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; bool TailMergeBlocks(MachineFunction &MF); diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index 5fd18ed2fe2c..c2586b5bf8ce 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -163,6 +163,7 @@ namespace { const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; @@ -290,7 +291,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); - BranchFolder::MBFIWrapper MBFI(getAnalysis()); + MBFI = &getAnalysis(); MBPI = &getAnalysis(); MRI = &MF.getRegInfo(); SchedModel.init(ST.getSchedModel(), &ST, TII); @@ -302,7 +303,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false, MBFI, *MBPI); + BranchFolder BF(true, false, *MBFI, *MBPI); BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), getAnalysisIfAvailable()); } @@ -426,7 +427,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false, MBFI, *MBPI); + BranchFolder BF(false, false, *MBFI, *MBPI); BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable()); } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 42bad4c73018..c562af9d9648 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -26,8 +26,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -118,12 +116,6 @@ static cl::opt JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); -static cl::opt -BranchFoldPlacement("branch-fold-placement", - cl::desc("Perform branch folding during placement. " - "Reduces code size."), - cl::init(true), cl::Hidden); - extern cl::opt StaticLikelyProb; namespace { @@ -240,10 +232,10 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI; /// \brief A handle to the function-wide block frequency pass. - std::unique_ptr MBFI; + const MachineBlockFrequencyInfo *MBFI; /// \brief A handle to the loop info. - MachineLoopInfo *MLI; + const MachineLoopInfo *MLI; /// \brief A handle to the target's instruction info. const TargetInstrInfo *TII; @@ -331,7 +323,6 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -1471,8 +1462,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { return false; MBPI = &getAnalysis(); - MBFI = llvm::make_unique( - getAnalysis()); + MBFI = &getAnalysis(); MLI = &getAnalysis(); TII = F.getSubtarget().getInstrInfo(); TLI = F.getSubtarget().getTargetLowering(); @@ -1480,29 +1470,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - - // Changing the layout can create new tail merging opportunities. - TargetPassConfig *PassConfig = &getAnalysis(); - // TailMerge can create jump into if branches that make CFG irreducible for - // HW that requires structurized CFG. - bool EnableTailMerge = !F.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge() && - BranchFoldPlacement; - // No tail merging opportunities if the block number is less than four. - if (F.size() > 3 && EnableTailMerge) { - BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, - *MBPI); - - if (BF.OptimizeFunction(F, TII, F.getSubtarget().getRegisterInfo(), - getAnalysisIfAvailable(), MLI, - /*AfterBlockPlacement=*/true)) { - // Redo the layout if tail merging creates/removes/moves blocks. - BlockToChain.clear(); - ChainAllocator.DestroyAll(); - buildCFGChains(F); - } - } - optimizeBranches(F); alignBlocks(F); diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll deleted file mode 100644 index 29df9def992e..000000000000 --- a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc <%s -march=aarch64 | FileCheck %s - -; CHECK-LABEL: test: -; CHECK: LBB0_7: -; CHECK: b.hi -; CHECK-NEXT: b -; CHECK-NEXT: LBB0_8: -; CHECK-NEXT: mov x8, x9 -; CHECK-NEXT: LBB0_9: -define i64 @test(i64 %n, i64* %a, i64* %b, i64* %c, i64* %d, i64* %e, i64* %f) { -entry: - %cmp28 = icmp sgt i64 %n, 1 - br i1 %cmp28, label %for.body, label %for.end - -for.body: ; preds = %for.body.lr.ph, %if.end - %j = phi i64 [ %n, %entry ], [ %div, %if.end ] - %div = lshr i64 %j, 1 - %a.arrayidx = getelementptr inbounds i64, i64* %a, i64 %div - %a.j = load i64, i64* %a.arrayidx - %b.arrayidx = getelementptr inbounds i64, i64* %b, i64 %div - %b.j = load i64, i64* %b.arrayidx - %cmp.i = icmp slt i64 %a.j, %b.j - br i1 %cmp.i, label %for.end.loopexit, label %cond.false.i - -cond.false.i: ; preds = %for.body - %cmp4.i = icmp sgt i64 %a.j, %b.j - br i1 %cmp4.i, label %if.end, label %cond.false6.i - -cond.false6.i: ; preds = %cond.false.i - %c.arrayidx = getelementptr inbounds i64, i64* %c, i64 %div - %c.j = load i64, i64* %c.arrayidx - %d.arrayidx = getelementptr inbounds i64, i64* %d, i64 %div - %d.j = load i64, i64* %d.arrayidx - %cmp9.i = icmp slt i64 %c.j, %d.j - br i1 %cmp9.i, label %for.end.loopexit, label %cond.false11.i - -cond.false11.i: ; preds = %cond.false6.i - %cmp14.i = icmp sgt i64 %c.j, %d.j - br i1 %cmp14.i, label %if.end, label %cond.false12.i - -cond.false12.i: ; preds = %cond.false11.i - %e.arrayidx = getelementptr inbounds i64, i64* %e, i64 %div - %e.j = load i64, i64* %e.arrayidx - %f.arrayidx = getelementptr inbounds i64, i64* %f, i64 %div - %f.j = load i64, i64* %f.arrayidx - %cmp19.i = icmp sgt i64 %e.j, %f.j - br i1 %cmp19.i, label %if.end, label %for.end.loopexit - -if.end: ; preds = %cond.false12.i, %cond.false11.i, %cond.false.i - %cmp = icmp ugt i64 %j, 3 - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %cond.false12.i, %cond.false6.i, %for.body, %if.end - %j.0.lcssa.ph = phi i64 [ %j, %cond.false12.i ], [ %j, %cond.false6.i ], [ %j, %for.body ], [ %div, %if.end ] - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - %j.0.lcssa = phi i64 [ %n, %entry ], [ %j.0.lcssa.ph, %for.end.loopexit ] - %j.2 = add i64 %j.0.lcssa, %n - %j.3 = mul i64 %j.2, %n - %j.4 = add i64 %j.3, 10 - ret i64 %j.4 -} diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll index 151cc1b12ed2..04eae8f9afec 100644 --- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: beq +; V8-NEXT: bne ; V8-NEXT: b ; The trailing space in the last line checks that the branch is unconditional switch i32 %and, label %sw.epilog [