forked from OSchip/llvm-project
[MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.
Summary: Current PRE hoists common computations into CMBB = DT->findNearestCommonDominator(MBB, MBB1). However, if CMBB is in a hot loop body, we might get performance degradation. Differential Revision: https://reviews.llvm.org/D64394 llvm-svn: 366570
This commit is contained in:
parent
e238a4c757
commit
dec624682e
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
@ -66,6 +67,7 @@ namespace {
|
|||
AliasAnalysis *AA;
|
||||
MachineDominatorTree *DT;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
|
@ -83,6 +85,8 @@ namespace {
|
|||
AU.addPreservedID(MachineLoopInfoID);
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addPreserved<MachineBlockFrequencyInfo>();
|
||||
}
|
||||
|
||||
void releaseMemory() override {
|
||||
|
@ -133,6 +137,11 @@ namespace {
|
|||
bool isPRECandidate(MachineInstr *MI);
|
||||
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
|
||||
bool PerformSimplePRE(MachineDominatorTree *DT);
|
||||
/// Heuristics to see if it's beneficial to move common computations of MBB
|
||||
/// and MBB1 to CandidateBB.
|
||||
bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
|
|||
if (!CMBB->isLegalToHoistInto())
|
||||
continue;
|
||||
|
||||
if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
|
||||
continue;
|
||||
|
||||
// Two instrs are partial redundant if their basic blocks are reachable
|
||||
// from one to another but one doesn't dominate another.
|
||||
if (CMBB != MBB1) {
|
||||
|
@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
|
|||
return Changed;
|
||||
}
|
||||
|
||||
bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1) {
|
||||
if (CandidateBB->getParent()->getFunction().hasMinSize())
|
||||
return true;
|
||||
assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
|
||||
assert(DT->dominates(CandidateBB, MBB1) &&
|
||||
"CandidateBB should dominate MBB1");
|
||||
return MBFI->getBlockFreq(CandidateBB) <=
|
||||
MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
|
||||
}
|
||||
|
||||
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
|||
MRI = &MF.getRegInfo();
|
||||
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||
DT = &getAnalysis<MachineDominatorTree>();
|
||||
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
||||
LookAheadLimit = TII->getMachineCSELookAheadLimit();
|
||||
bool ChangedPRE, ChangedCSE;
|
||||
ChangedPRE = PerformSimplePRE(DT);
|
||||
|
|
|
@ -98,9 +98,9 @@
|
|||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -72,9 +72,9 @@
|
|||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -8,25 +8,25 @@ define i32 @t(i32 %n, i32 %delta, i32 %a) {
|
|||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: lis r7, 0
|
||||
; CHECK-P9-NEXT: li r6, 0
|
||||
; CHECK-P9-NEXT: li r8, 0
|
||||
; CHECK-P9-NEXT: li r9, 0
|
||||
; CHECK-P9-NEXT: li r10, 0
|
||||
; CHECK-P9-NEXT: ori r7, r7, 65535
|
||||
; CHECK-P9-NEXT: .p2align 5
|
||||
; CHECK-P9-NEXT: .LBB0_1: # %header
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: addi r10, r10, 1
|
||||
; CHECK-P9-NEXT: cmpw r10, r3
|
||||
; CHECK-P9-NEXT: addi r8, r5, 1024
|
||||
; CHECK-P9-NEXT: addi r9, r9, 1
|
||||
; CHECK-P9-NEXT: cmpw r9, r3
|
||||
; CHECK-P9-NEXT: blt cr0, .LBB0_4
|
||||
; CHECK-P9-NEXT: # %bb.2: # %cont
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r9, r9, r4
|
||||
; CHECK-P9-NEXT: cmpw r9, r7
|
||||
; CHECK-P9-NEXT: add r8, r8, r4
|
||||
; CHECK-P9-NEXT: cmpw r8, r7
|
||||
; CHECK-P9-NEXT: bgt cr0, .LBB0_1
|
||||
; CHECK-P9-NEXT: # %bb.3: # %cont.1
|
||||
; CHECK-P9-NEXT: mr r6, r8
|
||||
; CHECK-P9-NEXT: addi r6, r5, 1024
|
||||
; CHECK-P9-NEXT: .LBB0_4: # %return
|
||||
; CHECK-P9-NEXT: mullw r3, r6, r8
|
||||
; CHECK-P9-NEXT: addi r3, r5, 1024
|
||||
; CHECK-P9-NEXT: mullw r3, r6, r3
|
||||
; CHECK-P9-NEXT: blr
|
||||
entry:
|
||||
br label %header
|
||||
|
@ -75,16 +75,19 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_
|
|||
; CHECK-P9-NEXT: lis r3, 21845
|
||||
; CHECK-P9-NEXT: add r28, r30, r29
|
||||
; CHECK-P9-NEXT: ori r27, r3, 21846
|
||||
; CHECK-P9-NEXT: b .LBB1_3
|
||||
; CHECK-P9-NEXT: b .LBB1_4
|
||||
; CHECK-P9-NEXT: .p2align 4
|
||||
; CHECK-P9-NEXT: .LBB1_1: # %sw.bb3
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r28, r3, r28
|
||||
; CHECK-P9-NEXT: mulli r3, r30, 23
|
||||
; CHECK-P9-NEXT: .LBB1_2: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r28, r3, r28
|
||||
; CHECK-P9-NEXT: .LBB1_3: # %sw.epilog
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: cmpwi r28, 1025
|
||||
; CHECK-P9-NEXT: bge cr0, .LBB1_6
|
||||
; CHECK-P9-NEXT: .LBB1_3: # %while.cond
|
||||
; CHECK-P9-NEXT: bge cr0, .LBB1_7
|
||||
; CHECK-P9-NEXT: .LBB1_4: # %while.cond
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: extsw r3, r29
|
||||
; CHECK-P9-NEXT: bl bar
|
||||
|
@ -101,41 +104,40 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_
|
|||
; CHECK-P9-NEXT: add r4, r4, r5
|
||||
; CHECK-P9-NEXT: slwi r5, r4, 1
|
||||
; CHECK-P9-NEXT: add r4, r4, r5
|
||||
; CHECK-P9-NEXT: subf r5, r4, r3
|
||||
; CHECK-P9-NEXT: mulli r4, r29, 13
|
||||
; CHECK-P9-NEXT: mulli r3, r30, 23
|
||||
; CHECK-P9-NEXT: cmplwi r5, 1
|
||||
; CHECK-P9-NEXT: subf r3, r4, r3
|
||||
; CHECK-P9-NEXT: cmplwi r3, 1
|
||||
; CHECK-P9-NEXT: beq cr0, .LBB1_1
|
||||
; CHECK-P9-NEXT: # %bb.4: # %while.cond
|
||||
; CHECK-P9-NEXT: # %bb.5: # %while.cond
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: cmplwi r5, 0
|
||||
; CHECK-P9-NEXT: bne cr0, .LBB1_2
|
||||
; CHECK-P9-NEXT: # %bb.5: # %sw.bb
|
||||
; CHECK-P9-NEXT: cmplwi r3, 0
|
||||
; CHECK-P9-NEXT: bne cr0, .LBB1_3
|
||||
; CHECK-P9-NEXT: # %bb.6: # %sw.bb
|
||||
; CHECK-P9-NEXT: #
|
||||
; CHECK-P9-NEXT: add r28, r4, r28
|
||||
; CHECK-P9-NEXT: cmpwi r28, 1025
|
||||
; CHECK-P9-NEXT: blt cr0, .LBB1_3
|
||||
; CHECK-P9-NEXT: .LBB1_6: # %while.end
|
||||
; CHECK-P9-NEXT: lis r5, -13108
|
||||
; CHECK-P9-NEXT: ori r5, r5, 52429
|
||||
; CHECK-P9-NEXT: mullw r5, r28, r5
|
||||
; CHECK-P9-NEXT: lis r6, 13107
|
||||
; CHECK-P9-NEXT: ori r6, r6, 13108
|
||||
; CHECK-P9-NEXT: cmplw r5, r6
|
||||
; CHECK-P9-NEXT: blt cr0, .LBB1_8
|
||||
; CHECK-P9-NEXT: # %bb.7: # %if.then8
|
||||
; CHECK-P9-NEXT: extsw r4, r4
|
||||
; CHECK-P9-NEXT: extsw r5, r28
|
||||
; CHECK-P9-NEXT: mulli r3, r29, 13
|
||||
; CHECK-P9-NEXT: b .LBB1_2
|
||||
; CHECK-P9-NEXT: .LBB1_7: # %while.end
|
||||
; CHECK-P9-NEXT: lis r3, -13108
|
||||
; CHECK-P9-NEXT: ori r3, r3, 52429
|
||||
; CHECK-P9-NEXT: mullw r3, r28, r3
|
||||
; CHECK-P9-NEXT: lis r4, 13107
|
||||
; CHECK-P9-NEXT: ori r4, r4, 13108
|
||||
; CHECK-P9-NEXT: cmplw r3, r4
|
||||
; CHECK-P9-NEXT: blt cr0, .LBB1_9
|
||||
; CHECK-P9-NEXT: # %bb.8: # %if.then8
|
||||
; CHECK-P9-NEXT: mulli r3, r29, 13
|
||||
; CHECK-P9-NEXT: mulli r5, r30, 23
|
||||
; CHECK-P9-NEXT: extsw r4, r28
|
||||
; CHECK-P9-NEXT: extsw r3, r3
|
||||
; CHECK-P9-NEXT: extsw r5, r5
|
||||
; CHECK-P9-NEXT: sub r3, r4, r3
|
||||
; CHECK-P9-NEXT: sub r4, r5, r4
|
||||
; CHECK-P9-NEXT: sub r3, r3, r5
|
||||
; CHECK-P9-NEXT: rldicl r4, r4, 1, 63
|
||||
; CHECK-P9-NEXT: rldicl r3, r3, 1, 63
|
||||
; CHECK-P9-NEXT: or r3, r4, r3
|
||||
; CHECK-P9-NEXT: b .LBB1_9
|
||||
; CHECK-P9-NEXT: .LBB1_8: # %cleanup20
|
||||
; CHECK-P9-NEXT: li r3, 0
|
||||
; CHECK-P9-NEXT: rldicl r4, r4, 1, 63
|
||||
; CHECK-P9-NEXT: or r3, r3, r4
|
||||
; CHECK-P9-NEXT: b .LBB1_10
|
||||
; CHECK-P9-NEXT: .LBB1_9: # %cleanup20
|
||||
; CHECK-P9-NEXT: li r3, 0
|
||||
; CHECK-P9-NEXT: .LBB1_10: # %cleanup20
|
||||
; CHECK-P9-NEXT: addi r1, r1, 80
|
||||
; CHECK-P9-NEXT: ld r0, 16(r1)
|
||||
; CHECK-P9-NEXT: mtlr r0
|
||||
|
|
|
@ -84,9 +84,9 @@
|
|||
; CHECK-NEXT: MachineDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Natural Loop Construction
|
||||
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
Loading…
Reference in New Issue