forked from OSchip/llvm-project
[AArch64][GlobalISel] Enable use of the optsize predicate in the selector.
To do this while supporting the existing functionality in SelectionDAG of using PGO info, we add the ProfileSummaryInfo and LazyBlockFrequencyInfo analysis dependencies to the instruction selector pass. Then, use the predicate to generate constant pool loads for f32 materialization, if we're targeting optsize/minsize. Differential Revision: https://reviews.llvm.org/D97732
This commit is contained in:
parent
c0f8115c73
commit
8a316045ed
|
@ -17,6 +17,10 @@
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
|
class BlockFrequencyInfo;
|
||||||
|
class ProfileSummaryInfo;
|
||||||
|
|
||||||
/// This pass is responsible for selecting generic machine instructions to
|
/// This pass is responsible for selecting generic machine instructions to
|
||||||
/// target-specific instructions. It relies on the InstructionSelector provided
|
/// target-specific instructions. It relies on the InstructionSelector provided
|
||||||
/// by the target.
|
/// by the target.
|
||||||
|
@ -43,9 +47,16 @@ public:
|
||||||
MachineFunctionProperties::Property::Selected);
|
MachineFunctionProperties::Property::Selected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionSelect(CodeGenOpt::Level OL);
|
||||||
InstructionSelect();
|
InstructionSelect();
|
||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
BlockFrequencyInfo *BFI = nullptr;
|
||||||
|
ProfileSummaryInfo *PSI = nullptr;
|
||||||
|
|
||||||
|
CodeGenOpt::Level OptLevel = CodeGenOpt::None;
|
||||||
};
|
};
|
||||||
} // End namespace llvm.
|
} // End namespace llvm.
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,11 @@
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||||
|
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||||
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/Support/CodeGenCoverage.h"
|
#include "llvm/Support/CodeGenCoverage.h"
|
||||||
#include "llvm/Support/LowLevelTypeImpl.h"
|
#include "llvm/Support/LowLevelTypeImpl.h"
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
@ -429,18 +434,25 @@ public:
|
||||||
CodeGenCoverage *CoverageInfo = nullptr;
|
CodeGenCoverage *CoverageInfo = nullptr;
|
||||||
GISelKnownBits *KnownBits = nullptr;
|
GISelKnownBits *KnownBits = nullptr;
|
||||||
MachineFunction *MF = nullptr;
|
MachineFunction *MF = nullptr;
|
||||||
|
ProfileSummaryInfo *PSI = nullptr;
|
||||||
|
BlockFrequencyInfo *BFI = nullptr;
|
||||||
|
// For some predicates, we need to track the current MBB.
|
||||||
|
MachineBasicBlock *CurMBB = nullptr;
|
||||||
|
|
||||||
virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
|
virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
|
||||||
llvm_unreachable("TableGen should have emitted implementation");
|
llvm_unreachable("TableGen should have emitted implementation");
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Setup per-MF selector state.
|
/// Setup per-MF selector state.
|
||||||
virtual void setupMF(MachineFunction &mf,
|
virtual void setupMF(MachineFunction &mf, GISelKnownBits *KB,
|
||||||
GISelKnownBits &KB,
|
CodeGenCoverage &covinfo, ProfileSummaryInfo *psi,
|
||||||
CodeGenCoverage &covinfo) {
|
BlockFrequencyInfo *bfi) {
|
||||||
CoverageInfo = &covinfo;
|
CoverageInfo = &covinfo;
|
||||||
KnownBits = &KB;
|
KnownBits = KB;
|
||||||
MF = &mf;
|
MF = &mf;
|
||||||
|
PSI = psi;
|
||||||
|
BFI = bfi;
|
||||||
|
CurMBB = nullptr;
|
||||||
setupGeneratedPerFunctionState(mf);
|
setupGeneratedPerFunctionState(mf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -463,6 +475,12 @@ protected:
|
||||||
MatcherState(unsigned MaxRenderers);
|
MatcherState(unsigned MaxRenderers);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool shouldOptForSize(const MachineFunction *MF) const {
|
||||||
|
const auto &F = MF->getFunction();
|
||||||
|
return F.hasOptSize() || F.hasMinSize() ||
|
||||||
|
(PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI));
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
template <class PredicateBitset, class ComplexMatcherMemFn,
|
template <class PredicateBitset, class ComplexMatcherMemFn,
|
||||||
class CustomRendererFn>
|
class CustomRendererFn>
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
|
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
|
||||||
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
#include "llvm/CodeGen/Register.h"
|
#include "llvm/CodeGen/Register.h"
|
||||||
#include "llvm/Support/Alignment.h"
|
#include "llvm/Support/Alignment.h"
|
||||||
#include "llvm/Support/LowLevelTypeImpl.h"
|
#include "llvm/Support/LowLevelTypeImpl.h"
|
||||||
|
@ -23,6 +24,7 @@
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
class AnalysisUsage;
|
class AnalysisUsage;
|
||||||
|
class BlockFrequencyInfo;
|
||||||
class GISelKnownBits;
|
class GISelKnownBits;
|
||||||
class MachineFunction;
|
class MachineFunction;
|
||||||
class MachineInstr;
|
class MachineInstr;
|
||||||
|
@ -32,6 +34,7 @@ class MachineOptimizationRemarkMissed;
|
||||||
struct MachinePointerInfo;
|
struct MachinePointerInfo;
|
||||||
class MachineRegisterInfo;
|
class MachineRegisterInfo;
|
||||||
class MCInstrDesc;
|
class MCInstrDesc;
|
||||||
|
class ProfileSummaryInfo;
|
||||||
class RegisterBankInfo;
|
class RegisterBankInfo;
|
||||||
class TargetInstrInfo;
|
class TargetInstrInfo;
|
||||||
class TargetLowering;
|
class TargetLowering;
|
||||||
|
@ -283,5 +286,9 @@ bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
|
||||||
/// Returns an integer representing true, as defined by the
|
/// Returns an integer representing true, as defined by the
|
||||||
/// TargetBooleanContents.
|
/// TargetBooleanContents.
|
||||||
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
|
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
|
||||||
|
|
||||||
|
/// Returns true if the given block should be optimized for size.
|
||||||
|
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
|
||||||
|
BlockFrequencyInfo *BFI);
|
||||||
} // End namespace llvm.
|
} // End namespace llvm.
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -11,7 +11,11 @@
|
||||||
|
|
||||||
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
|
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
|
||||||
#include "llvm/ADT/PostOrderIterator.h"
|
#include "llvm/ADT/PostOrderIterator.h"
|
||||||
|
#include "llvm/ADT/ScopeExit.h"
|
||||||
#include "llvm/ADT/Twine.h"
|
#include "llvm/ADT/Twine.h"
|
||||||
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||||
|
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||||
|
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
||||||
|
@ -50,16 +54,29 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
|
||||||
false, false)
|
false, false)
|
||||||
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
||||||
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
|
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
|
||||||
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
|
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
|
||||||
"Select target instructions out of generic instructions",
|
"Select target instructions out of generic instructions",
|
||||||
false, false)
|
false, false)
|
||||||
|
|
||||||
InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
|
InstructionSelect::InstructionSelect(CodeGenOpt::Level OL)
|
||||||
|
: MachineFunctionPass(ID), OptLevel(OL) {}
|
||||||
|
|
||||||
|
// In order not to crash when calling getAnalysis during testing with -run-pass
|
||||||
|
// we use the default opt level here instead of None, so that the addRequired()
|
||||||
|
// calls are made in getAnalysisUsage().
|
||||||
|
InstructionSelect::InstructionSelect()
|
||||||
|
: MachineFunctionPass(ID), OptLevel(CodeGenOpt::Default) {}
|
||||||
|
|
||||||
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
|
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
AU.addRequired<TargetPassConfig>();
|
AU.addRequired<TargetPassConfig>();
|
||||||
AU.addRequired<GISelKnownBitsAnalysis>();
|
if (OptLevel != CodeGenOpt::None) {
|
||||||
AU.addPreserved<GISelKnownBitsAnalysis>();
|
AU.addRequired<GISelKnownBitsAnalysis>();
|
||||||
|
AU.addPreserved<GISelKnownBitsAnalysis>();
|
||||||
|
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||||
|
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
|
||||||
|
}
|
||||||
getSelectionDAGFallbackAnalysisUsage(AU);
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
}
|
}
|
||||||
|
@ -71,13 +88,26 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
|
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
|
||||||
GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
|
||||||
|
|
||||||
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
|
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
|
||||||
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
|
InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
|
||||||
|
|
||||||
|
CodeGenOpt::Level OldOptLevel = OptLevel;
|
||||||
|
auto RestoreOptLevel = make_scope_exit([=]() { OptLevel = OldOptLevel; });
|
||||||
|
OptLevel = MF.getFunction().hasOptNone() ? CodeGenOpt::None
|
||||||
|
: MF.getTarget().getOptLevel();
|
||||||
|
|
||||||
|
GISelKnownBits *KB = nullptr;
|
||||||
|
if (OptLevel != CodeGenOpt::None) {
|
||||||
|
KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
||||||
|
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||||
|
if (PSI && PSI->hasProfileSummary())
|
||||||
|
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
|
||||||
|
}
|
||||||
|
|
||||||
CodeGenCoverage CoverageInfo;
|
CodeGenCoverage CoverageInfo;
|
||||||
assert(ISel && "Cannot work without InstructionSelector");
|
assert(ISel && "Cannot work without InstructionSelector");
|
||||||
ISel->setupMF(MF, KB, CoverageInfo);
|
ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
|
||||||
|
|
||||||
// An optimization remark emitter. Used to report failures.
|
// An optimization remark emitter. Used to report failures.
|
||||||
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
|
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
|
||||||
|
@ -102,6 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (MachineBasicBlock *MBB : post_order(&MF)) {
|
for (MachineBasicBlock *MBB : post_order(&MF)) {
|
||||||
|
ISel->CurMBB = MBB;
|
||||||
if (MBB->empty())
|
if (MBB->empty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "llvm/CodeGen/MachineInstr.h"
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
|
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
|
||||||
|
#include "llvm/CodeGen/MachineSizeOpts.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/StackProtector.h"
|
#include "llvm/CodeGen/StackProtector.h"
|
||||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||||
|
@ -851,3 +852,10 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
|
||||||
}
|
}
|
||||||
llvm_unreachable("Invalid boolean contents");
|
llvm_unreachable("Invalid boolean contents");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
|
||||||
|
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
|
||||||
|
const auto &F = MBB.getParent()->getFunction();
|
||||||
|
return F.hasOptSize() || F.hasMinSize() ||
|
||||||
|
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
|
||||||
|
}
|
||||||
|
|
|
@ -588,7 +588,7 @@ void AArch64PassConfig::addPreGlobalInstructionSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AArch64PassConfig::addGlobalInstructionSelect() {
|
bool AArch64PassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
if (getOptLevel() != CodeGenOpt::None)
|
if (getOptLevel() != CodeGenOpt::None)
|
||||||
addPass(createAArch64PostSelectOptimize());
|
addPass(createAArch64PostSelectOptimize());
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -47,6 +47,11 @@
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace MIPatternMatch;
|
using namespace MIPatternMatch;
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
class BlockFrequencyInfo;
|
||||||
|
class ProfileSummaryInfo;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
#define GET_GLOBALISEL_PREDICATE_BITSET
|
#define GET_GLOBALISEL_PREDICATE_BITSET
|
||||||
|
@ -62,9 +67,10 @@ public:
|
||||||
bool select(MachineInstr &I) override;
|
bool select(MachineInstr &I) override;
|
||||||
static const char *getName() { return DEBUG_TYPE; }
|
static const char *getName() { return DEBUG_TYPE; }
|
||||||
|
|
||||||
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
|
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
|
||||||
CodeGenCoverage &CoverageInfo) override {
|
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
|
||||||
InstructionSelector::setupMF(MF, KB, CoverageInfo);
|
BlockFrequencyInfo *BFI) override {
|
||||||
|
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
|
||||||
|
|
||||||
// hasFnAttribute() is expensive to call on every BRCOND selection, so
|
// hasFnAttribute() is expensive to call on every BRCOND selection, so
|
||||||
// cache it here for each run of the selector.
|
// cache it here for each run of the selector.
|
||||||
|
@ -2426,7 +2432,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
||||||
: AArch64::FPR128RegClass);
|
: AArch64::FPR128RegClass);
|
||||||
|
|
||||||
// For 64b values, emit a constant pool load instead.
|
// For 64b values, emit a constant pool load instead.
|
||||||
if (DefSize == 64 || DefSize == 128) {
|
// For s32, use a cp load if we have optsize/minsize.
|
||||||
|
if (DefSize == 64 || DefSize == 128 ||
|
||||||
|
(DefSize == 32 && shouldOptForSize(&MF))) {
|
||||||
auto *FPImm = I.getOperand(1).getFPImm();
|
auto *FPImm = I.getOperand(1).getFPImm();
|
||||||
MachineIRBuilder MIB(I);
|
MachineIRBuilder MIB(I);
|
||||||
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
|
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
|
||||||
|
@ -4051,10 +4059,18 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
|
||||||
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
LoadMI = &*MIRBuilder
|
LoadMI =
|
||||||
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
|
&*MIRBuilder
|
||||||
.addConstantPoolIndex(
|
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
|
||||||
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
.addConstantPoolIndex(CPIdx, 0,
|
||||||
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
LoadMI =
|
||||||
|
&*MIRBuilder
|
||||||
|
.buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
|
||||||
|
.addConstantPoolIndex(CPIdx, 0,
|
||||||
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
|
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
|
||||||
|
|
|
@ -59,11 +59,13 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
|
||||||
|
|
||||||
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
|
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
|
||||||
|
|
||||||
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
|
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
|
||||||
CodeGenCoverage &CoverageInfo) {
|
CodeGenCoverage &CoverageInfo,
|
||||||
|
ProfileSummaryInfo *PSI,
|
||||||
|
BlockFrequencyInfo *BFI) {
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
Subtarget = &MF.getSubtarget<GCNSubtarget>();
|
Subtarget = &MF.getSubtarget<GCNSubtarget>();
|
||||||
InstructionSelector::setupMF(MF, KB, CoverageInfo);
|
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUInstructionSelector::isVCC(Register Reg,
|
bool AMDGPUInstructionSelector::isVCC(Register Reg,
|
||||||
|
|
|
@ -36,6 +36,8 @@ struct ImageDimIntrinsicInfo;
|
||||||
class AMDGPUInstrInfo;
|
class AMDGPUInstrInfo;
|
||||||
class AMDGPURegisterBankInfo;
|
class AMDGPURegisterBankInfo;
|
||||||
class AMDGPUTargetMachine;
|
class AMDGPUTargetMachine;
|
||||||
|
class BlockFrequencyInfo;
|
||||||
|
class ProfileSummaryInfo;
|
||||||
class GCNSubtarget;
|
class GCNSubtarget;
|
||||||
class MachineInstr;
|
class MachineInstr;
|
||||||
class MachineIRBuilder;
|
class MachineIRBuilder;
|
||||||
|
@ -60,8 +62,9 @@ public:
|
||||||
bool select(MachineInstr &I) override;
|
bool select(MachineInstr &I) override;
|
||||||
static const char *getName();
|
static const char *getName();
|
||||||
|
|
||||||
void setupMF(MachineFunction &MF, GISelKnownBits &KB,
|
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
|
||||||
CodeGenCoverage &CoverageInfo) override;
|
CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
|
||||||
|
BlockFrequencyInfo *BFI) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct GEPInfo {
|
struct GEPInfo {
|
||||||
|
|
|
@ -1114,7 +1114,7 @@ bool GCNPassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GCNPassConfig::addGlobalInstructionSelect() {
|
bool GCNPassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
// TODO: Fix instruction selection to do the right thing for image
|
// TODO: Fix instruction selection to do the right thing for image
|
||||||
// instructions with tfe or lwe in the first place, instead of running a
|
// instructions with tfe or lwe in the first place, instead of running a
|
||||||
// separate pass to fix them up?
|
// separate pass to fix them up?
|
||||||
|
|
|
@ -487,7 +487,7 @@ bool ARMPassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARMPassConfig::addGlobalInstructionSelect() {
|
bool ARMPassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -335,6 +335,6 @@ bool MipsPassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MipsPassConfig::addGlobalInstructionSelect() {
|
bool MipsPassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -568,6 +568,6 @@ bool PPCPassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PPCPassConfig::addGlobalInstructionSelect() {
|
bool PPCPassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -174,7 +174,7 @@ bool RISCVPassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RISCVPassConfig::addGlobalInstructionSelect() {
|
bool RISCVPassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -465,7 +465,7 @@ bool X86PassConfig::addRegBankSelect() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86PassConfig::addGlobalInstructionSelect() {
|
bool X86PassConfig::addGlobalInstructionSelect() {
|
||||||
addPass(new InstructionSelect());
|
addPass(new InstructionSelect(getOptLevel()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,9 @@
|
||||||
; VERIFY-NEXT: Verify generated machine code
|
; VERIFY-NEXT: Verify generated machine code
|
||||||
; ENABLED-NEXT: Localizer
|
; ENABLED-NEXT: Localizer
|
||||||
; VERIFY-O0-NEXT: Verify generated machine code
|
; VERIFY-O0-NEXT: Verify generated machine code
|
||||||
; ENABLED-NEXT: Analysis for ComputingKnownBits
|
; ENABLED-O1-NEXT: Analysis for ComputingKnownBits
|
||||||
|
; ENABLED-O1-NEXT: Lazy Branch Probability Analysis
|
||||||
|
; ENABLED-O1-NEXT: Lazy Block Frequency Analysis
|
||||||
; ENABLED-NEXT: InstructionSelect
|
; ENABLED-NEXT: InstructionSelect
|
||||||
; ENABLED-O1-NEXT: AArch64 Post Select Optimizer
|
; ENABLED-O1-NEXT: AArch64 Post Select Optimizer
|
||||||
; VERIFY-NEXT: Verify generated machine code
|
; VERIFY-NEXT: Verify generated machine code
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
--- |
|
--- |
|
||||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||||
|
@ -7,6 +7,12 @@
|
||||||
define void @imm_s32_gpr() { ret void }
|
define void @imm_s32_gpr() { ret void }
|
||||||
define void @imm_s64_gpr() { ret void }
|
define void @imm_s64_gpr() { ret void }
|
||||||
define void @test_f64_cp() { ret void }
|
define void @test_f64_cp() { ret void }
|
||||||
|
define void @test_f32_cp_optsize() #0 { ret void }
|
||||||
|
define void @test_f32_cp_minsize() #1 { ret void }
|
||||||
|
|
||||||
|
|
||||||
|
attributes #0 = { optsize }
|
||||||
|
attributes #1 = { minsize }
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -76,3 +82,53 @@ body: |
|
||||||
RET_ReallyLR implicit $d0
|
RET_ReallyLR implicit $d0
|
||||||
|
|
||||||
...
|
...
|
||||||
|
# 32b FP immediates need to be loaded if using optsize.
|
||||||
|
---
|
||||||
|
name: test_f32_cp_optsize
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
liveins:
|
||||||
|
- { reg: '$s0' }
|
||||||
|
body: |
|
||||||
|
bb.1 (%ir-block.0):
|
||||||
|
liveins: $s0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_f32_cp_optsize
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
|
||||||
|
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||||
|
; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||||
|
; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[LDRSui]]
|
||||||
|
; CHECK: $s0 = COPY [[FADDSrr]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $s0
|
||||||
|
%0:fpr(s32) = COPY $s0
|
||||||
|
%1:fpr(s32) = G_FCONSTANT float 0x3FDB267DE0000000
|
||||||
|
%2:fpr(s32) = G_FADD %0, %1
|
||||||
|
$s0 = COPY %2(s32)
|
||||||
|
RET_ReallyLR implicit $s0
|
||||||
|
|
||||||
|
...
|
||||||
|
# 32b FP immediates need to be loaded if using minsize.
|
||||||
|
---
|
||||||
|
name: test_f32_cp_minsize
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
liveins:
|
||||||
|
- { reg: '$s0' }
|
||||||
|
body: |
|
||||||
|
bb.1 (%ir-block.0):
|
||||||
|
liveins: $s0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_f32_cp_minsize
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
|
||||||
|
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
|
||||||
|
; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
|
||||||
|
; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[LDRSui]]
|
||||||
|
; CHECK: $s0 = COPY [[FADDSrr]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $s0
|
||||||
|
%0:fpr(s32) = COPY $s0
|
||||||
|
%1:fpr(s32) = G_FCONSTANT float 0x3FDB267DE0000000
|
||||||
|
%2:fpr(s32) = G_FADD %0, %1
|
||||||
|
$s0 = COPY %2(s32)
|
||||||
|
RET_ReallyLR implicit $s0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
|
@ -39,7 +39,6 @@
|
||||||
; CHECK-NEXT: AArch64PostLegalizerLowering
|
; CHECK-NEXT: AArch64PostLegalizerLowering
|
||||||
; CHECK-NEXT: RegBankSelect
|
; CHECK-NEXT: RegBankSelect
|
||||||
; CHECK-NEXT: Localizer
|
; CHECK-NEXT: Localizer
|
||||||
; CHECK-NEXT: Analysis for ComputingKnownBits
|
|
||||||
; CHECK-NEXT: InstructionSelect
|
; CHECK-NEXT: InstructionSelect
|
||||||
; CHECK-NEXT: ResetMachineFunction
|
; CHECK-NEXT: ResetMachineFunction
|
||||||
; CHECK-NEXT: AArch64 Instruction Selection
|
; CHECK-NEXT: AArch64 Instruction Selection
|
||||||
|
|
|
@ -5683,13 +5683,6 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
|
||||||
"(const " << Target.getName() << "Subtarget *)&MF.getSubtarget(), &MF);\n"
|
"(const " << Target.getName() << "Subtarget *)&MF.getSubtarget(), &MF);\n"
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
if (Target.getName() == "X86" || Target.getName() == "AArch64") {
|
|
||||||
// TODO: Implement PGSO.
|
|
||||||
OS << "static bool shouldOptForSize(const MachineFunction *MF) {\n";
|
|
||||||
OS << " return MF->getFunction().hasOptSize();\n";
|
|
||||||
OS << "}\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
SubtargetFeatureInfo::emitComputeAvailableFeatures(
|
SubtargetFeatureInfo::emitComputeAvailableFeatures(
|
||||||
Target.getName(), "InstructionSelector",
|
Target.getName(), "InstructionSelector",
|
||||||
"computeAvailableFunctionFeatures", FunctionFeatures, OS,
|
"computeAvailableFunctionFeatures", FunctionFeatures, OS,
|
||||||
|
|
Loading…
Reference in New Issue