forked from OSchip/llvm-project
[ARM] Do not test for CPUs, use SubtargetFeatures (Part 1). NFCI
This is a cleanup commit similar to r271555, but for ARM. The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods. Since the ARM backend seems to have quite a lot of calls to these methods, I intend to submit 5-6 subtarget features at a time, instead of one big lump. Differential Revision: http://reviews.llvm.org/D21432 llvm-svn: 273544
This commit is contained in:
parent
597aa42fec
commit
c5baa43f53
|
@ -106,6 +106,44 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
|
||||||
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
|
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
|
||||||
"Has zero-cycle zeroing instructions">;
|
"Has zero-cycle zeroing instructions">;
|
||||||
|
|
||||||
|
// Whether or not it may be profitable to unpredicate certain instructions
|
||||||
|
// during if conversion.
|
||||||
|
def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr",
|
||||||
|
"IsProfitableToUnpredicate",
|
||||||
|
"true",
|
||||||
|
"Is profitable to unpredicate">;
|
||||||
|
|
||||||
|
// Some targets (e.g. Swift) have microcoded VGETLNi32.
|
||||||
|
def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32",
|
||||||
|
"HasSlowVGETLNi32", "true",
|
||||||
|
"Has slow VGETLNi32 - prefer VMOV">;
|
||||||
|
|
||||||
|
// Some targets (e.g. Swift) have microcoded VDUP32.
|
||||||
|
def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true",
|
||||||
|
"Has slow VDUP32 - prefer VMOV">;
|
||||||
|
|
||||||
|
// Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON
|
||||||
|
// for scalar FP, as this allows more effective execution domain optimization.
|
||||||
|
def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR",
|
||||||
|
"true", "Prefer VMOVSR">;
|
||||||
|
|
||||||
|
// Swift has ISHST barriers compatible with Atomic Release semantics but weaker
|
||||||
|
// than ISH
|
||||||
|
def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST",
|
||||||
|
"true", "Prefer ISHST barriers">;
|
||||||
|
|
||||||
|
// Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
|
||||||
|
// VFP to NEON, as an execution domain optimization.
|
||||||
|
def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs",
|
||||||
|
"true", "Convert VMOVSR, VMOVRS, VMOVS to NEON">;
|
||||||
|
|
||||||
|
// Some processors benefit from using NEON instructions for scalar
|
||||||
|
// single-precision FP operations. This affects instruction selection and should
|
||||||
|
// only be enabled if the handling of denormals is not important.
|
||||||
|
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
||||||
|
"true",
|
||||||
|
"Use NEON for single precision FP">;
|
||||||
|
|
||||||
// Some processors have FP multiply-accumulate instructions that don't
|
// Some processors have FP multiply-accumulate instructions that don't
|
||||||
// play nicely with other VFP / NEON instructions, and it's generally better
|
// play nicely with other VFP / NEON instructions, and it's generally better
|
||||||
// to just not use them.
|
// to just not use them.
|
||||||
|
@ -117,12 +155,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
|
||||||
"HasVMLxForwarding", "true",
|
"HasVMLxForwarding", "true",
|
||||||
"Has multiplier accumulator forwarding">;
|
"Has multiplier accumulator forwarding">;
|
||||||
|
|
||||||
// Some processors benefit from using NEON instructions for scalar
|
|
||||||
// single-precision FP operations.
|
|
||||||
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
|
||||||
"true",
|
|
||||||
"Use NEON for single precision FP">;
|
|
||||||
|
|
||||||
// Disable 32-bit to 16-bit narrowing for experimentation.
|
// Disable 32-bit to 16-bit narrowing for experimentation.
|
||||||
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
|
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
|
||||||
"Prefer 32-bit Thumb instrs">;
|
"Prefer 32-bit Thumb instrs">;
|
||||||
|
@ -533,6 +565,8 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
|
||||||
FeatureT2XtPk,
|
FeatureT2XtPk,
|
||||||
FeatureFP16,
|
FeatureFP16,
|
||||||
FeatureAvoidPartialCPSR,
|
FeatureAvoidPartialCPSR,
|
||||||
|
FeaturePreferVMOVSR,
|
||||||
|
FeatureNEONForFPMovs,
|
||||||
FeatureMP]>;
|
FeatureMP]>;
|
||||||
|
|
||||||
// FIXME: A12 has currently the same Schedule model as A9
|
// FIXME: A12 has currently the same Schedule model as A9
|
||||||
|
@ -596,7 +630,11 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
|
||||||
FeatureHWDivARM,
|
FeatureHWDivARM,
|
||||||
FeatureAvoidPartialCPSR,
|
FeatureAvoidPartialCPSR,
|
||||||
FeatureAvoidMOVsShOp,
|
FeatureAvoidMOVsShOp,
|
||||||
FeatureHasSlowFPVMLx]>;
|
FeatureHasSlowFPVMLx,
|
||||||
|
FeatureProfUnpredicate,
|
||||||
|
FeaturePrefISHSTBarrier,
|
||||||
|
FeatureSlowVGETLNi32,
|
||||||
|
FeatureSlowVDUP32]>;
|
||||||
|
|
||||||
// FIXME: R4 has currently the same ProcessorModel as A8.
|
// FIXME: R4 has currently the same ProcessorModel as A8.
|
||||||
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
|
def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
|
||||||
|
|
|
@ -1766,9 +1766,9 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||||
bool
|
bool
|
||||||
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
||||||
MachineBasicBlock &FMBB) const {
|
MachineBasicBlock &FMBB) const {
|
||||||
// Reduce false anti-dependencies to let Swift's out-of-order execution
|
// Reduce false anti-dependencies to let the target's out-of-order execution
|
||||||
// engine do its thing.
|
// engine do its thing.
|
||||||
return Subtarget.isSwift();
|
return Subtarget.isProfitableToUnpredicate();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
||||||
|
@ -4178,7 +4178,7 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
|
||||||
|
|
||||||
// CortexA9 is particularly picky about mixing the two and wants these
|
// CortexA9 is particularly picky about mixing the two and wants these
|
||||||
// converted.
|
// converted.
|
||||||
if (Subtarget.isCortexA9() && !isPredicated(*MI) &&
|
if (Subtarget.useNEONForFPMovs() && !isPredicated(*MI) &&
|
||||||
(MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
|
(MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
|
||||||
MI->getOpcode() == ARM::VMOVS))
|
MI->getOpcode() == ARM::VMOVS))
|
||||||
return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
|
return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
|
||||||
|
|
|
@ -3024,7 +3024,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
|
||||||
if (Subtarget->isMClass()) {
|
if (Subtarget->isMClass()) {
|
||||||
// Only a full system barrier exists in the M-class architectures.
|
// Only a full system barrier exists in the M-class architectures.
|
||||||
Domain = ARM_MB::SY;
|
Domain = ARM_MB::SY;
|
||||||
} else if (Subtarget->isSwift() && Ord == AtomicOrdering::Release) {
|
} else if (Subtarget->preferISHSTBarriers() &&
|
||||||
|
Ord == AtomicOrdering::Release) {
|
||||||
// Swift happens to implement ISHST barriers in a way that's compatible with
|
// Swift happens to implement ISHST barriers in a way that's compatible with
|
||||||
// Release semantics but weaker than ISH so we'd be fools not to use
|
// Release semantics but weaker than ISH so we'd be fools not to use
|
||||||
// it. Beware: other processors probably don't!
|
// it. Beware: other processors probably don't!
|
||||||
|
@ -12236,7 +12237,7 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
|
||||||
/*FALLTHROUGH*/
|
/*FALLTHROUGH*/
|
||||||
case AtomicOrdering::Release:
|
case AtomicOrdering::Release:
|
||||||
case AtomicOrdering::AcquireRelease:
|
case AtomicOrdering::AcquireRelease:
|
||||||
if (Subtarget->isSwift())
|
if (Subtarget->preferISHSTBarriers())
|
||||||
return makeDMB(Builder, ARM_MB::ISHST);
|
return makeDMB(Builder, ARM_MB::ISHST);
|
||||||
// FIXME: add a comment with a link to documentation justifying this.
|
// FIXME: add a comment with a link to documentation justifying this.
|
||||||
else
|
else
|
||||||
|
|
|
@ -321,19 +321,16 @@ def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion =="
|
||||||
" Subtarget->hasVFP4()) || "
|
" Subtarget->hasVFP4()) || "
|
||||||
"Subtarget->isTargetDarwin()">;
|
"Subtarget->isTargetDarwin()">;
|
||||||
|
|
||||||
// VGETLNi32 is microcoded on Swift - prefer VMOV.
|
def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
|
||||||
def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">;
|
def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
|
||||||
def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">;
|
|
||||||
|
|
||||||
// VDUP.32 is microcoded on Swift - prefer VMOV.
|
def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
|
||||||
def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">;
|
def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
|
||||||
def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
|
|
||||||
|
|
||||||
// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as
|
def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
|
||||||
// this allows more effective execution domain optimization. See
|
"!Subtarget->useNEONForSinglePrecisionFP()">;
|
||||||
// setExecutionDomain().
|
def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
|
||||||
def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
|
"Subtarget->useNEONForSinglePrecisionFP()">;
|
||||||
def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
|
|
||||||
|
|
||||||
def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
|
def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
|
||||||
def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
|
def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
|
||||||
|
|
|
@ -154,6 +154,12 @@ void ARMSubtarget::initializeEnvironment() {
|
||||||
HasCRC = false;
|
HasCRC = false;
|
||||||
HasRAS = false;
|
HasRAS = false;
|
||||||
HasZeroCycleZeroing = false;
|
HasZeroCycleZeroing = false;
|
||||||
|
IsProfitableToUnpredicate = false;
|
||||||
|
HasSlowVGETLNi32 = false;
|
||||||
|
HasSlowVDUP32 = false;
|
||||||
|
PreferVMOVSR = false;
|
||||||
|
PreferISHST = false;
|
||||||
|
UseNEONForFPMovs = false;
|
||||||
StrictAlign = false;
|
StrictAlign = false;
|
||||||
HasDSP = false;
|
HasDSP = false;
|
||||||
UseNaClTrap = false;
|
UseNaClTrap = false;
|
||||||
|
|
|
@ -218,6 +218,24 @@ protected:
|
||||||
/// particularly effective at zeroing a VFP register.
|
/// particularly effective at zeroing a VFP register.
|
||||||
bool HasZeroCycleZeroing;
|
bool HasZeroCycleZeroing;
|
||||||
|
|
||||||
|
/// If true, if conversion may decide to leave some instructions unpredicated.
|
||||||
|
bool IsProfitableToUnpredicate;
|
||||||
|
|
||||||
|
/// If true, VMOV will be favored over VGETLNi32.
|
||||||
|
bool HasSlowVGETLNi32;
|
||||||
|
|
||||||
|
/// If true, VMOV will be favored over VDUP.
|
||||||
|
bool HasSlowVDUP32;
|
||||||
|
|
||||||
|
/// If true, VMOVSR will be favored over VMOVDRR.
|
||||||
|
bool PreferVMOVSR;
|
||||||
|
|
||||||
|
/// If true, ISHST barriers will be used for Release semantics.
|
||||||
|
bool PreferISHST;
|
||||||
|
|
||||||
|
/// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
|
||||||
|
bool UseNEONForFPMovs;
|
||||||
|
|
||||||
/// StrictAlign - If true, the subtarget disallows unaligned memory
|
/// StrictAlign - If true, the subtarget disallows unaligned memory
|
||||||
/// accesses for some types. For details, see
|
/// accesses for some types. For details, see
|
||||||
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
|
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
|
||||||
|
@ -376,6 +394,12 @@ public:
|
||||||
bool hasTrustZone() const { return HasTrustZone; }
|
bool hasTrustZone() const { return HasTrustZone; }
|
||||||
bool has8MSecExt() const { return Has8MSecExt; }
|
bool has8MSecExt() const { return Has8MSecExt; }
|
||||||
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
|
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
|
||||||
|
bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
|
||||||
|
bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
|
||||||
|
bool hasSlowVDUP32() const { return HasSlowVDUP32; }
|
||||||
|
bool preferVMOVSR() const { return PreferVMOVSR; }
|
||||||
|
bool preferISHSTBarriers() const { return PreferISHST; }
|
||||||
|
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
|
||||||
bool prefers32BitThumb() const { return Pref32BitThumb; }
|
bool prefers32BitThumb() const { return Pref32BitThumb; }
|
||||||
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
|
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
|
||||||
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
|
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
|
||||||
|
|
Loading…
Reference in New Issue