[AArch64][GlobalISel] Enable use of the optsize predicate in the selector.

To do this while supporting the existing functionality in SelectionDAG of using
PGO info, we add the ProfileSummaryInfo and LazyBlockFrequencyInfo analysis
dependencies to the instruction selector pass.

Then, use the predicate to generate constant pool loads for f32 materialization,
if we're targeting optsize/minsize.

Differential Revision: https://reviews.llvm.org/D97732
This commit is contained in:
Amara Emerson 2021-02-24 22:45:25 -08:00
parent c0f8115c73
commit 8a316045ed
19 changed files with 185 additions and 39 deletions

View File

@ -17,6 +17,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
class BlockFrequencyInfo;
class ProfileSummaryInfo;
/// This pass is responsible for selecting generic machine instructions to
/// target-specific instructions. It relies on the InstructionSelector provided
/// by the target.
@ -43,9 +47,16 @@ public:
MachineFunctionProperties::Property::Selected);
}
InstructionSelect(CodeGenOpt::Level OL);
InstructionSelect();
bool runOnMachineFunction(MachineFunction &MF) override;
protected:
BlockFrequencyInfo *BFI = nullptr;
ProfileSummaryInfo *PSI = nullptr;
CodeGenOpt::Level OptLevel = CodeGenOpt::None;
};
} // End namespace llvm.

View File

@ -18,6 +18,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include <bitset>
@ -429,18 +434,25 @@ public:
CodeGenCoverage *CoverageInfo = nullptr;
GISelKnownBits *KnownBits = nullptr;
MachineFunction *MF = nullptr;
ProfileSummaryInfo *PSI = nullptr;
BlockFrequencyInfo *BFI = nullptr;
// For some predicates, we need to track the current MBB.
MachineBasicBlock *CurMBB = nullptr;
virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
llvm_unreachable("TableGen should have emitted implementation");
}
/// Setup per-MF selector state.
virtual void setupMF(MachineFunction &mf,
GISelKnownBits &KB,
CodeGenCoverage &covinfo) {
virtual void setupMF(MachineFunction &mf, GISelKnownBits *KB,
CodeGenCoverage &covinfo, ProfileSummaryInfo *psi,
BlockFrequencyInfo *bfi) {
CoverageInfo = &covinfo;
KnownBits = &KB;
KnownBits = KB;
MF = &mf;
PSI = psi;
BFI = bfi;
CurMBB = nullptr;
setupGeneratedPerFunctionState(mf);
}
@ -463,6 +475,12 @@ protected:
MatcherState(unsigned MaxRenderers);
};
bool shouldOptForSize(const MachineFunction *MF) const {
const auto &F = MF->getFunction();
return F.hasOptSize() || F.hasMinSize() ||
(PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI));
}
public:
template <class PredicateBitset, class ComplexMatcherMemFn,
class CustomRendererFn>

View File

@ -15,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/LowLevelTypeImpl.h"
@ -23,6 +24,7 @@
namespace llvm {
class AnalysisUsage;
class BlockFrequencyInfo;
class GISelKnownBits;
class MachineFunction;
class MachineInstr;
@ -32,6 +34,7 @@ class MachineOptimizationRemarkMissed;
struct MachinePointerInfo;
class MachineRegisterInfo;
class MCInstrDesc;
class ProfileSummaryInfo;
class RegisterBankInfo;
class TargetInstrInfo;
class TargetLowering;
@ -283,5 +286,9 @@ bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
/// Returns an integer representing true, as defined by the
/// TargetBooleanContents.
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
/// Returns true if the given block should be optimized for size.
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI);
} // End namespace llvm.
#endif

View File

@ -11,7 +11,11 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@ -50,16 +54,29 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
: MachineFunctionPass(ID), OptLevel(OL) {}
// In order not to crash when calling getAnalysis during testing with -run-pass
// we use the default opt level here instead of None, so that the addRequired()
// calls are made in getAnalysisUsage().
InstructionSelect::InstructionSelect()
: MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
if (OptLevel != CodeGenOpt::None) {
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -71,13 +88,26 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
CodeGenOpt::Level OldOptLevel = OptLevel;
auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
: MF.getTarget().getOptLevel();
GISelKnownBits *KB = nullptr;
if (OptLevel != CodeGenOpt::None) {
KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (PSI && PSI->hasProfileSummary())
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
}
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
ISel->setupMF(MF, KB, CoverageInfo);
ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@ -102,6 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
#endif
for (MachineBasicBlock *MBB : post_order(&MF)) {
ISel->CurMBB = MBB;
if (MBB->empty())
continue;

View File

@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@ -851,3 +852,10 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
}
llvm_unreachable("Invalid boolean contents");
}
bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
const auto &F = MBB.getParent()->getFunction();
return F.hasOptSize() || F.hasMinSize() ||
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}

View File

@ -588,7 +588,7 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() {
}
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
if (getOptLevel() != CodeGenOpt::None)
addPass(createAArch64PostSelectOptimize());
return false;

View File

@ -47,6 +47,11 @@
using namespace llvm;
using namespace MIPatternMatch;
namespace llvm {
class BlockFrequencyInfo;
class ProfileSummaryInfo;
}
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@ -62,9 +67,10 @@ public:
bool select(MachineInstr &I) override;
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo);
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
// hasFnAttribute() is expensive to call on every BRCOND selection, so
// cache it here for each run of the selector.
@ -2426,7 +2432,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
: AArch64::FPR128RegClass);
// For 64b values, emit a constant pool load instead.
if (DefSize == 64 || DefSize == 128) {
// For s32, use a cp load if we have optsize/minsize.
if (DefSize == 64 || DefSize == 128 ||
(DefSize == 32 && shouldOptForSize(&MF))) {
auto *FPImm = I.getOperand(1).getFPImm();
MachineIRBuilder MIB(I);
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
@ -4051,10 +4059,18 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 8:
LoadMI = &*MIRBuilder
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
.addConstantPoolIndex(
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
case 4:
LoadMI =
&*MIRBuilder
.buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "

View File

@ -59,11 +59,13 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) {
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage &CoverageInfo,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
MRI = &MF.getRegInfo();
Subtarget = &MF.getSubtarget<GCNSubtarget>();
InstructionSelector::setupMF(MF, KB, CoverageInfo);
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
}
bool AMDGPUInstructionSelector::isVCC(Register Reg,

View File

@ -36,6 +36,8 @@ struct ImageDimIntrinsicInfo;
class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class AMDGPUTargetMachine;
class BlockFrequencyInfo;
class ProfileSummaryInfo;
class GCNSubtarget;
class MachineInstr;
class MachineIRBuilder;
@ -60,8 +62,9 @@ public:
bool select(MachineInstr &I) override;
static const char *getName();
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
CodeGenCoverage &CoverageInfo) override;
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override;
private:
struct GEPInfo {

View File

@ -1114,7 +1114,7 @@ bool GCNPassConfig::addRegBankSelect() {
}
bool GCNPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
// TODO: Fix instruction selection to do the right thing for image
// instructions with tfe or lwe in the first place, instead of running a
// separate pass to fix them up?

View File

@ -487,7 +487,7 @@ bool ARMPassConfig::addRegBankSelect() {
}
bool ARMPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
return false;
}

View File

@ -335,6 +335,6 @@ bool MipsPassConfig::addRegBankSelect() {
}
bool MipsPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
return false;
}

View File

@ -568,6 +568,6 @@ bool PPCPassConfig::addRegBankSelect() {
}
bool PPCPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
return false;
}

View File

@ -174,7 +174,7 @@ bool RISCVPassConfig::addRegBankSelect() {
}
bool RISCVPassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
return false;
}

View File

@ -465,7 +465,7 @@ bool X86PassConfig::addRegBankSelect() {
}
bool X86PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
addPass(new InstructionSelect(getOptLevel()));
return false;
}

View File

@ -65,7 +65,9 @@
; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Localizer
; VERIFY-O0-NEXT: Verify generated machine code
; ENABLED-NEXT: Analysis for ComputingKnownBits
; ENABLED-O1-NEXT: Analysis for ComputingKnownBits
; ENABLED-O1-NEXT: Lazy Branch Probability Analysis
; ENABLED-O1-NEXT: Lazy Block Frequency Analysis
; ENABLED-NEXT: InstructionSelect
; ENABLED-O1-NEXT: AArch64 Post Select Optimizer
; VERIFY-NEXT: Verify generated machine code

View File

@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@ -7,6 +7,12 @@
define void @imm_s32_gpr() { ret void }
define void @imm_s64_gpr() { ret void }
define void @test_f64_cp() { ret void }
define void @test_f32_cp_optsize() #0 { ret void }
define void @test_f32_cp_minsize() #1 { ret void }
attributes #0 = { optsize }
attributes #1 = { minsize }
...
@ -76,3 +82,53 @@ body: |
RET_ReallyLR implicit $d0
...
# 32b FP immediates need to be loaded if using optsize.
---
name: test_f32_cp_optsize
legalized: true
regBankSelected: true
liveins:
- { reg: '$s0' }
body: |
bb.1 (%ir-block.0):
liveins: $s0
; CHECK-LABEL: name: test_f32_cp_optsize
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[LDRSui]]
; CHECK: $s0 = COPY [[FADDSrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = G_FCONSTANT float 0x3FDB267DE0000000
%2:fpr(s32) = G_FADD %0, %1
$s0 = COPY %2(s32)
RET_ReallyLR implicit $s0
...
# 32b FP immediates need to be loaded if using minsize.
---
name: test_f32_cp_minsize
legalized: true
regBankSelected: true
liveins:
- { reg: '$s0' }
body: |
bb.1 (%ir-block.0):
liveins: $s0
; CHECK-LABEL: name: test_f32_cp_minsize
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[LDRSui]]
; CHECK: $s0 = COPY [[FADDSrr]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
%1:fpr(s32) = G_FCONSTANT float 0x3FDB267DE0000000
%2:fpr(s32) = G_FADD %0, %1
$s0 = COPY %2(s32)
RET_ReallyLR implicit $s0
...

View File

@ -39,7 +39,6 @@
; CHECK-NEXT: AArch64PostLegalizerLowering
; CHECK-NEXT: RegBankSelect
; CHECK-NEXT: Localizer
; CHECK-NEXT: Analysis for ComputingKnownBits
; CHECK-NEXT: InstructionSelect
; CHECK-NEXT: ResetMachineFunction
; CHECK-NEXT: AArch64 Instruction Selection

View File

@ -5683,13 +5683,6 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
"(const " << Target.getName() << "Subtarget *)&MF.getSubtarget(), &MF);\n"
"}\n";
if (Target.getName() == "X86" || Target.getName() == "AArch64") {
// TODO: Implement PGSO.
OS << "static bool shouldOptForSize(const MachineFunction *MF) {\n";
OS << " return MF->getFunction().hasOptSize();\n";
OS << "}\n\n";
}
SubtargetFeatureInfo::emitComputeAvailableFeatures(
Target.getName(), "InstructionSelector",
"computeAvailableFunctionFeatures", FunctionFeatures, OS,