[PGO][PGSO] Add profile guided size optimization to the X86 LEA fixup.

Differential Revision: https://reviews.llvm.org/D83330
This commit is contained in:
Hiroshi Yamauchi 2020-07-07 10:19:54 -07:00
parent 8779b11410
commit 153a0b8906
3 changed files with 42 additions and 52 deletions

View File

@ -16,8 +16,11 @@
#include "X86InstrInfo.h" #include "X86InstrInfo.h"
#include "X86Subtarget.h" #include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
@ -111,6 +114,12 @@ public:
MachineFunctionProperties::Property::NoVRegs); MachineFunctionProperties::Property::NoVRegs);
} }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private: private:
TargetSchedModel TSM; TargetSchedModel TSM;
const X86InstrInfo *TII = nullptr; const X86InstrInfo *TII = nullptr;
@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
TSM.init(&ST); TSM.init(&ST);
TII = ST.getInstrInfo(); TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo(); TRI = ST.getRegisterInfo();
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary())
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";); LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock &MBB : MF) {
// First pass. Try to remove or optimize existing LEAs. // First pass. Try to remove or optimize existing LEAs.
bool OptIncDecPerBB =
OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
if (!isLEA(I->getOpcode())) if (!isLEA(I->getOpcode()))
continue; continue;
if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP)) if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
continue; continue;
if (IsSlowLEA) if (IsSlowLEA)
processInstructionForSlowLEA(I, MBB); processInstructionForSlowLEA(I, MBB);
else if (IsSlow3OpsLEA) else if (IsSlow3OpsLEA)
processInstrForSlow3OpLEA(I, MBB, OptIncDec); processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
} }
// Second pass for creating LEAs. This may reverse some of the // Second pass for creating LEAs. This may reverse some of the

View File

@ -58,6 +58,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU);
} }

View File

@ -109,31 +109,18 @@ for.end:
} }
define void @foo_pgso(i32 inreg %dns) !prof !14 { define void @foo_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: foo_pgso: ; CHECK-LABEL: foo_pgso:
; SLOW: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; SLOW-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ecx, %ecx
; SLOW-NEXT: decl %ecx ; CHECK-NEXT: decl %ecx
; SLOW-NEXT: .LBB4_1: # %for.body ; CHECK-NEXT: .LBB4_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx ; CHECK-NEXT: movzwl %cx, %edx
; SLOW-NEXT: decl %ecx ; CHECK-NEXT: decl %ecx
; SLOW-NEXT: cmpl %eax, %edx ; CHECK-NEXT: cmpl %eax, %edx
; SLOW-NEXT: jl .LBB4_1 ; CHECK-NEXT: jl .LBB4_1
; SLOW-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl ; CHECK-NEXT: retl
;
; FAST-LABEL: foo_pgso:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xorl %ecx, %ecx
; FAST-NEXT: decl %ecx
; FAST-NEXT: .LBB4_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
; FAST-NEXT: addl $-1, %ecx
; FAST-NEXT: cmpl %eax, %edx
; FAST-NEXT: jl .LBB4_1
; FAST-NEXT: # %bb.2: # %for.end
; FAST-NEXT: retl
entry: entry:
br label %for.body br label %for.body
@ -149,31 +136,18 @@ for.end:
} }
define void @bar_pgso(i32 inreg %dns) !prof !14 { define void @bar_pgso(i32 inreg %dns) !prof !14 {
; SLOW-LABEL: bar_pgso: ; CHECK-LABEL: bar_pgso:
; SLOW: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; SLOW-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ecx, %ecx
; SLOW-NEXT: incl %ecx ; CHECK-NEXT: incl %ecx
; SLOW-NEXT: .LBB5_1: # %for.body ; CHECK-NEXT: .LBB5_1: # %for.body
; SLOW-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; SLOW-NEXT: movzwl %cx, %edx ; CHECK-NEXT: movzwl %cx, %edx
; SLOW-NEXT: incl %ecx ; CHECK-NEXT: incl %ecx
; SLOW-NEXT: cmpl %eax, %edx ; CHECK-NEXT: cmpl %eax, %edx
; SLOW-NEXT: jl .LBB5_1 ; CHECK-NEXT: jl .LBB5_1
; SLOW-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: # %bb.2: # %for.end
; SLOW-NEXT: retl ; CHECK-NEXT: retl
;
; FAST-LABEL: bar_pgso:
; FAST: # %bb.0: # %entry
; FAST-NEXT: xorl %ecx, %ecx
; FAST-NEXT: incl %ecx
; FAST-NEXT: .LBB5_1: # %for.body
; FAST-NEXT: # =>This Inner Loop Header: Depth=1
; FAST-NEXT: movzwl %cx, %edx
; FAST-NEXT: addl $1, %ecx
; FAST-NEXT: cmpl %eax, %edx
; FAST-NEXT: jl .LBB5_1
; FAST-NEXT: # %bb.2: # %for.end
; FAST-NEXT: retl
entry: entry:
br label %for.body br label %for.body