forked from OSchip/llvm-project
[PGO][PGSO] Add profile guided size optimization to the X86 LEA fixup.
Differential Revision: https://reviews.llvm.org/D83330
This commit is contained in:
parent
8779b11410
commit
153a0b8906
|
@ -16,8 +16,11 @@
|
||||||
#include "X86InstrInfo.h"
|
#include "X86InstrInfo.h"
|
||||||
#include "X86Subtarget.h"
|
#include "X86Subtarget.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||||
|
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||||
#include "llvm/CodeGen/Passes.h"
|
#include "llvm/CodeGen/Passes.h"
|
||||||
#include "llvm/CodeGen/TargetSchedule.h"
|
#include "llvm/CodeGen/TargetSchedule.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
@ -111,6 +114,12 @@ public:
|
||||||
MachineFunctionProperties::Property::NoVRegs);
|
MachineFunctionProperties::Property::NoVRegs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
|
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||||
|
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||||
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TargetSchedModel TSM;
|
TargetSchedModel TSM;
|
||||||
const X86InstrInfo *TII = nullptr;
|
const X86InstrInfo *TII = nullptr;
|
||||||
|
@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
TSM.init(&ST);
|
TSM.init(&ST);
|
||||||
TII = ST.getInstrInfo();
|
TII = ST.getInstrInfo();
|
||||||
TRI = ST.getRegisterInfo();
|
TRI = ST.getRegisterInfo();
|
||||||
|
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||||
|
auto *MBFI = (PSI && PSI->hasProfileSummary())
|
||||||
|
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
|
||||||
|
: nullptr;
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
|
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
|
||||||
for (MachineBasicBlock &MBB : MF) {
|
for (MachineBasicBlock &MBB : MF) {
|
||||||
// First pass. Try to remove or optimize existing LEAs.
|
// First pass. Try to remove or optimize existing LEAs.
|
||||||
|
bool OptIncDecPerBB =
|
||||||
|
OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
|
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
|
||||||
if (!isLEA(I->getOpcode()))
|
if (!isLEA(I->getOpcode()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
|
if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (IsSlowLEA)
|
if (IsSlowLEA)
|
||||||
processInstructionForSlowLEA(I, MBB);
|
processInstructionForSlowLEA(I, MBB);
|
||||||
else if (IsSlow3OpsLEA)
|
else if (IsSlow3OpsLEA)
|
||||||
processInstrForSlow3OpLEA(I, MBB, OptIncDec);
|
processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second pass for creating LEAs. This may reverse some of the
|
// Second pass for creating LEAs. This may reverse some of the
|
||||||
|
|
|
@ -58,6 +58,7 @@ namespace {
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||||
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
|
||||||
|
AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,31 +109,18 @@ for.end:
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @foo_pgso(i32 inreg %dns) !prof !14 {
|
define void @foo_pgso(i32 inreg %dns) !prof !14 {
|
||||||
; SLOW-LABEL: foo_pgso:
|
; CHECK-LABEL: foo_pgso:
|
||||||
; SLOW: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; SLOW-NEXT: xorl %ecx, %ecx
|
; CHECK-NEXT: xorl %ecx, %ecx
|
||||||
; SLOW-NEXT: decl %ecx
|
; CHECK-NEXT: decl %ecx
|
||||||
; SLOW-NEXT: .LBB4_1: # %for.body
|
; CHECK-NEXT: .LBB4_1: # %for.body
|
||||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; SLOW-NEXT: movzwl %cx, %edx
|
; CHECK-NEXT: movzwl %cx, %edx
|
||||||
; SLOW-NEXT: decl %ecx
|
; CHECK-NEXT: decl %ecx
|
||||||
; SLOW-NEXT: cmpl %eax, %edx
|
; CHECK-NEXT: cmpl %eax, %edx
|
||||||
; SLOW-NEXT: jl .LBB4_1
|
; CHECK-NEXT: jl .LBB4_1
|
||||||
; SLOW-NEXT: # %bb.2: # %for.end
|
; CHECK-NEXT: # %bb.2: # %for.end
|
||||||
; SLOW-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
|
||||||
; FAST-LABEL: foo_pgso:
|
|
||||||
; FAST: # %bb.0: # %entry
|
|
||||||
; FAST-NEXT: xorl %ecx, %ecx
|
|
||||||
; FAST-NEXT: decl %ecx
|
|
||||||
; FAST-NEXT: .LBB4_1: # %for.body
|
|
||||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
|
||||||
; FAST-NEXT: movzwl %cx, %edx
|
|
||||||
; FAST-NEXT: addl $-1, %ecx
|
|
||||||
; FAST-NEXT: cmpl %eax, %edx
|
|
||||||
; FAST-NEXT: jl .LBB4_1
|
|
||||||
; FAST-NEXT: # %bb.2: # %for.end
|
|
||||||
; FAST-NEXT: retl
|
|
||||||
entry:
|
entry:
|
||||||
br label %for.body
|
br label %for.body
|
||||||
|
|
||||||
|
@ -149,31 +136,18 @@ for.end:
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @bar_pgso(i32 inreg %dns) !prof !14 {
|
define void @bar_pgso(i32 inreg %dns) !prof !14 {
|
||||||
; SLOW-LABEL: bar_pgso:
|
; CHECK-LABEL: bar_pgso:
|
||||||
; SLOW: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; SLOW-NEXT: xorl %ecx, %ecx
|
; CHECK-NEXT: xorl %ecx, %ecx
|
||||||
; SLOW-NEXT: incl %ecx
|
; CHECK-NEXT: incl %ecx
|
||||||
; SLOW-NEXT: .LBB5_1: # %for.body
|
; CHECK-NEXT: .LBB5_1: # %for.body
|
||||||
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; SLOW-NEXT: movzwl %cx, %edx
|
; CHECK-NEXT: movzwl %cx, %edx
|
||||||
; SLOW-NEXT: incl %ecx
|
; CHECK-NEXT: incl %ecx
|
||||||
; SLOW-NEXT: cmpl %eax, %edx
|
; CHECK-NEXT: cmpl %eax, %edx
|
||||||
; SLOW-NEXT: jl .LBB5_1
|
; CHECK-NEXT: jl .LBB5_1
|
||||||
; SLOW-NEXT: # %bb.2: # %for.end
|
; CHECK-NEXT: # %bb.2: # %for.end
|
||||||
; SLOW-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
|
||||||
; FAST-LABEL: bar_pgso:
|
|
||||||
; FAST: # %bb.0: # %entry
|
|
||||||
; FAST-NEXT: xorl %ecx, %ecx
|
|
||||||
; FAST-NEXT: incl %ecx
|
|
||||||
; FAST-NEXT: .LBB5_1: # %for.body
|
|
||||||
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
|
||||||
; FAST-NEXT: movzwl %cx, %edx
|
|
||||||
; FAST-NEXT: addl $1, %ecx
|
|
||||||
; FAST-NEXT: cmpl %eax, %edx
|
|
||||||
; FAST-NEXT: jl .LBB5_1
|
|
||||||
; FAST-NEXT: # %bb.2: # %for.end
|
|
||||||
; FAST-NEXT: retl
|
|
||||||
entry:
|
entry:
|
||||||
br label %for.body
|
br label %for.body
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue