diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 84d388d94e59..872343e8b8f8 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -154,10 +154,6 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true", "Has zero-cycle zeroing instructions for generic registers">; -// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". -// as movi is more efficient across all cores. Newer cores can eliminate -// fmovs early and there is no difference with movi, but this not true for -// all implementations. def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false", "Has no zero-cycle zeroing instructions for FP registers">; @@ -172,7 +168,7 @@ def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", "The zero-cycle floating-point zeroing instruction has a bug">; def FeatureStrictAlign : SubtargetFeature<"strict-align", - "RequiresStrictAlign", "true", + "StrictAlign", "true", "Disallow all unaligned memory " "access">; @@ -194,11 +190,11 @@ def FeaturePredictableSelectIsExpensive : SubtargetFeature< "Prefer likely predicted branches over selects">; def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "HasCustomCheapAsMoveHandling", "true", + "CustomAsCheapAsMove", "true", "Use custom handling of cheap instructions">; def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", - "HasExynosCheapAsMoveHandling", "true", + "ExynosAsCheapAsMove", "true", "Use Exynos specific handling of cheap instructions", [FeatureCustomCheapAsMoveHandling]>; @@ -206,12 +202,12 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store", - "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">; + "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">; def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", - "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; + "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">; -def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", +def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "STRQroIsSlow", "true", "STR of Q register with register offset is slow">; def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature< diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index a2ae62b4ec4c..a5a678571fea 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -87,14 +87,191 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily = Others; + bool HasV8_0aOps = false; + bool HasV8_1aOps = false; + bool HasV8_2aOps = false; + bool HasV8_3aOps = false; + bool HasV8_4aOps = false; + bool HasV8_5aOps = false; + bool HasV8_6aOps = false; + bool HasV8_7aOps = false; + bool HasV8_8aOps = false; + bool HasV9_0aOps = false; + bool HasV9_1aOps = false; + bool HasV9_2aOps = false; + bool HasV9_3aOps = false; + bool HasV8_0rOps = false; + + bool HasCONTEXTIDREL2 = false; + bool HasEL2VMSA = false; + bool HasEL3 = false; + bool HasFPARMv8 = false; + bool HasNEON = false; + bool HasCrypto = false; + bool HasDotProd = false; + bool HasCRC = false; + bool HasLSE = false; + bool HasLSE2 = false; + bool HasRAS = false; + bool HasRDM = false; + bool HasPerfMon = false; + bool HasFullFP16 = false; + bool HasFP16FML = false; + bool HasSPE = false; + + bool FixCortexA53_835769 = false; + + // ARMv8.1 extensions + bool HasVH = false; + bool HasPAN = false; + bool HasLOR = false; + + // ARMv8.2 extensions + bool HasPsUAO = false; + bool HasPAN_RWV = false; + bool HasCCPP = false; + + // SVE extensions + bool HasSVE = false; + bool UseExperimentalZeroingPseudos = false; + bool UseScalarIncVL = false; + + // Armv8.2 Crypto extensions + bool HasSM4 = false; + bool HasSHA3 = false; + bool HasSHA2 = false; + bool HasAES = false; + + // ARMv8.3 extensions + bool HasPAuth = false; + bool HasJS = false; + bool HasCCIDX = false; + bool HasComplxNum = false; + + // ARMv8.4 extensions + bool HasNV = false; + bool HasMPAM = false; + bool HasDIT = false; + bool HasTRACEV8_4 = false; + bool HasAM = false; + bool HasSEL2 = false; + bool HasTLB_RMI = false; + bool HasFlagM = false; + bool HasRCPC_IMMO = false; + + bool HasLSLFast = false; + bool HasRCPC = false; + bool HasAggressiveFMA = false; + + // Armv8.5-A Extensions + bool HasAlternativeNZCV = false; + bool HasFRInt3264 = false; + bool HasSpecRestrict = false; + bool HasSSBS = false; + bool HasSB = false; + bool HasPredRes = false; + bool HasCCDP = false; + bool HasBTI = false; + bool HasRandGen = false; + bool HasMTE = false; + bool HasTME = false; + + // Armv8.6-A Extensions + bool HasBF16 = false; + bool HasMatMulInt8 = false; + bool HasMatMulFP32 = false; + bool HasMatMulFP64 = false; + bool HasAMVS = false; + bool HasFineGrainedTraps = false; + bool HasEnhancedCounterVirtualization = false; + + // Armv8.7-A Extensions + bool HasXS = false; + bool HasWFxT = false; + bool HasHCX = false; + bool HasLS64 = false; + + // Armv8.8-A Extensions + bool HasHBC = false; + bool HasMOPS = false; + + // Arm SVE2 extensions + bool HasSVE2 = false; + bool HasSVE2AES = false; + bool HasSVE2SM4 = false; + bool HasSVE2SHA3 = false; + bool HasSVE2BitPerm = false; + + // Armv9-A Extensions + bool HasRME = false; + + // Arm Scalable Matrix Extension (SME) + bool HasSME = false; + bool HasSMEF64 = false; + bool HasSMEI64 = false; + bool HasStreamingSVE = false; + + // AppleA7 system register. + bool HasAppleA7SysReg = false; + + // Future architecture extensions. + bool HasETE = false; + bool HasTRBE = false; + bool HasBRBE = false; + bool HasSPE_EEF = false; + + // HasZeroCycleRegMove - Has zero-cycle register mov instructions. + bool HasZeroCycleRegMove = false; + + // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. + bool HasZeroCycleZeroing = false; + bool HasZeroCycleZeroingGP = false; + bool HasZeroCycleZeroingFPWorkaround = false; + + // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0". + // as movi is more efficient across all cores. Newer cores can eliminate + // fmovs early and there is no difference with movi, but this not true for + // all implementations. + bool HasZeroCycleZeroingFP = true; + + // StrictAlign - Disallow unaligned memory accesses. + bool StrictAlign = false; + + // NegativeImmediates - transform instructions with negative immediates + bool NegativeImmediates = true; + // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; -// Bool members corresponding to the SubtargetFeatures defined in tablegen -#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ - bool ATTRIBUTE = DEFAULT; -#include "AArch64GenSubtargetInfo.inc" - + bool OutlineAtomics = false; + bool PredictableSelectIsExpensive = false; + bool BalanceFPOps = false; + bool CustomAsCheapAsMove = false; + bool ExynosAsCheapAsMove = false; + bool UsePostRAScheduler = false; + bool Misaligned128StoreIsSlow = false; + bool Paired128IsSlow = false; + bool STRQroIsSlow = false; + bool UseAlternateSExtLoadCVTF32Pattern = false; + bool HasArithmeticBccFusion = false; + bool HasArithmeticCbzFusion = false; + bool HasCmpBccFusion = false; + bool HasFuseAddress = false; + bool HasFuseAES = false; + bool HasFuseArithmeticLogic = false; + bool HasFuseCCSelect = false; + bool HasFuseCryptoEOR = false; + bool HasFuseLiterals = false; + bool DisableLatencySchedHeuristic = false; + bool UseRSqrt = false; + bool Force32BitJumpTables = false; + bool UseEL1ForTP = false; + bool UseEL2ForTP = false; + bool UseEL3ForTP = false; + bool AllowTaggedGlobals = false; + bool HardenSlsRetBr = false; + bool HardenSlsBlr = false; + bool HardenSlsNoComdat = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; @@ -154,11 +331,6 @@ public: unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0); -// Getters for SubtargetFeatures defined in tablegen -#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ - bool GETTER() const { return ATTRIBUTE; } -#include "AArch64GenSubtargetInfo.inc" - const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -179,7 +351,9 @@ public: const RegisterBankInfo *getRegBankInfo() const override; const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } - bool enablePostRAScheduler() const override { return usePostRAScheduler(); } + bool enablePostRAScheduler() const override { + return UsePostRAScheduler; + } /// Returns ARM processor family. /// Avoid this function! CPU specifics should be kept local to this class @@ -189,6 +363,30 @@ public: return ARMProcFamily; } + bool hasV8_0aOps() const { return HasV8_0aOps; } + bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasV8_2aOps() const { return HasV8_2aOps; } + bool hasV8_3aOps() const { return HasV8_3aOps; } + bool hasV8_4aOps() const { return HasV8_4aOps; } + bool hasV8_5aOps() const { return HasV8_5aOps; } + bool hasV9_0aOps() const { return HasV9_0aOps; } + bool hasV9_1aOps() const { return HasV9_1aOps; } + bool hasV9_2aOps() const { return HasV9_2aOps; } + bool hasV9_3aOps() const { return HasV9_3aOps; } + bool hasV8_0rOps() const { return HasV8_0rOps; } + + bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } + + bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; } + + bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; } + + bool hasZeroCycleZeroingFPWorkaround() const { + return HasZeroCycleZeroingFPWorkaround; + } + + bool requiresStrictAlign() const { return StrictAlign; } + bool isXRaySupported() const override { return true; } unsigned getMinVectorRegisterBitWidth() const { @@ -201,6 +399,41 @@ public: return CustomCallSavedXRegs[i]; } bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } + bool hasFPARMv8() const { return HasFPARMv8; } + bool hasNEON() const { return HasNEON; } + bool hasCrypto() const { return HasCrypto; } + bool hasDotProd() const { return HasDotProd; } + bool hasCRC() const { return HasCRC; } + bool hasLSE() const { return HasLSE; } + bool hasLSE2() const { return HasLSE2; } + bool hasRAS() const { return HasRAS; } + bool hasRDM() const { return HasRDM; } + bool hasSM4() const { return HasSM4; } + bool hasSHA3() const { return HasSHA3; } + bool hasSHA2() const { return HasSHA2; } + bool hasAES() const { return HasAES; } + bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; } + bool balanceFPOps() const { return BalanceFPOps; } + bool predictableSelectIsExpensive() const { + return PredictableSelectIsExpensive; + } + bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } + bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; } + bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } + bool isPaired128Slow() const { return Paired128IsSlow; } + bool isSTRQroSlow() const { return STRQroIsSlow; } + bool useAlternateSExtLoadCVTF32Pattern() const { + return UseAlternateSExtLoadCVTF32Pattern; + } + bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } + bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } + bool hasCmpBccFusion() const { return HasCmpBccFusion; } + bool hasFuseAddress() const { return HasFuseAddress; } + bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } + bool hasFuseCCSelect() const { return HasFuseCCSelect; } + bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } + bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { @@ -209,6 +442,16 @@ public: hasFuseCCSelect() || hasFuseLiterals(); } + bool hardenSlsRetBr() const { return HardenSlsRetBr; } + bool hardenSlsBlr() const { return HardenSlsBlr; } + bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } + + bool useEL1ForTP() const { return UseEL1ForTP; } + bool useEL2ForTP() const { return UseEL2ForTP; } + bool useEL3ForTP() const { return UseEL3ForTP; } + + bool useRSqrt() const { return UseRSqrt; } + bool force32BitJumpTables() const { return Force32BitJumpTables; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; @@ -237,10 +480,58 @@ public: unsigned getWideningBaseCost() const { return WideningBaseCost; } + bool useExperimentalZeroingPseudos() const { + return UseExperimentalZeroingPseudos; + } + + bool useScalarIncVL() const { return UseScalarIncVL; } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; + bool hasPerfMon() const { return HasPerfMon; } + bool hasFullFP16() const { return HasFullFP16; } + bool hasFP16FML() const { return HasFP16FML; } + bool hasSPE() const { return HasSPE; } + bool hasLSLFast() const { return HasLSLFast; } + bool hasSVE() const { return HasSVE; } + bool hasSVE2() const { return HasSVE2; } + bool hasRCPC() const { return HasRCPC; } + bool hasAggressiveFMA() const { return HasAggressiveFMA; } + bool hasAlternativeNZCV() const { return HasAlternativeNZCV; } + bool hasFRInt3264() const { return HasFRInt3264; } + bool hasSpecRestrict() const { return HasSpecRestrict; } + bool hasSSBS() const { return HasSSBS; } + bool hasSB() const { return HasSB; } + bool hasPredRes() const { return HasPredRes; } + bool hasCCDP() const { return HasCCDP; } + bool hasBTI() const { return HasBTI; } + bool hasRandGen() const { return HasRandGen; } + bool hasMTE() const { return HasMTE; } + bool hasTME() const { return HasTME; } + // Arm SVE2 extensions + bool hasSVE2AES() const { return HasSVE2AES; } + bool hasSVE2SM4() const { return HasSVE2SM4; } + bool hasSVE2SHA3() const { return HasSVE2SHA3; } + bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } + bool hasMatMulInt8() const { return HasMatMulInt8; } + bool hasMatMulFP32() const { return HasMatMulFP32; } + bool hasMatMulFP64() const { return HasMatMulFP64; } + + // Armv8.6-A Extensions + bool hasBF16() const { return HasBF16; } + bool hasFineGrainedTraps() const { return HasFineGrainedTraps; } + bool hasEnhancedCounterVirtualization() const { + return HasEnhancedCounterVirtualization; + } + + // Arm Scalable Matrix Extension (SME) + bool hasSME() const { return HasSME; } + bool hasSMEF64() const { return HasSMEF64; } + bool hasSMEI64() const { return HasSMEI64; } + bool hasStreamingSVE() const { return HasStreamingSVE; } + bool isLittleEndian() const { return IsLittle; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -261,6 +552,42 @@ public: bool useAA() const override; + bool outlineAtomics() const { return OutlineAtomics; } + + bool hasVH() const { return HasVH; } + bool hasPAN() const { return HasPAN; } + bool hasLOR() const { return HasLOR; } + + bool hasPsUAO() const { return HasPsUAO; } + bool hasPAN_RWV() const { return HasPAN_RWV; } + bool hasCCPP() const { return HasCCPP; } + + bool hasPAuth() const { return HasPAuth; } + bool hasJS() const { return HasJS; } + bool hasCCIDX() const { return HasCCIDX; } + bool hasComplxNum() const { return HasComplxNum; } + + bool hasNV() const { return HasNV; } + bool hasMPAM() const { return HasMPAM; } + bool hasDIT() const { return HasDIT; } + bool hasTRACEV8_4() const { return HasTRACEV8_4; } + bool hasAM() const { return HasAM; } + bool hasAMVS() const { return HasAMVS; } + bool hasXS() const { return HasXS; } + bool hasWFxT() const { return HasWFxT; } + bool hasHCX() const { return HasHCX; } + bool hasLS64() const { return HasLS64; } + bool hasSEL2() const { return HasSEL2; } + bool hasTLB_RMI() const { return HasTLB_RMI; } + bool hasFlagM() const { return HasFlagM; } + bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } + bool hasEL2VMSA() const { return HasEL2VMSA; } + bool hasEL3() const { return HasEL3; } + bool hasHBC() const { return HasHBC; } + bool hasMOPS() const { return HasMOPS; } + + bool fixCortexA53_835769() const { return FixCortexA53_835769; } + bool addrSinkUsingGEPs() const override { // Keeping GEPs inbounds is important for exploiting AArch64 // addressing-modes in ILP32 mode. diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index d9bc2827f7d2..27edf69b4abf 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -19,11 +19,9 @@ include "llvm/Target/Target.td" // ARM Subtarget state. // -// True if compiling for Thumb, false for ARM. -def ModeThumb : SubtargetFeature<"thumb-mode", "IsThumb", +def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", "Thumb mode">; -// True if we're using software floating point features. def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", "true", "Use software floating " "point features.">; @@ -50,18 +48,14 @@ def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true", "Enable 64-bit FP registers", [FeatureFPRegs]>; -// True if the floating point unit supports double precision. def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", "Floating point unit supports " "double precision", [FeatureFPRegs64]>; -// True if subtarget has the full 32 double precision FP registers for VFPv3. def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", "Extend FP to 32 double registers">; -/// Versions of the VFP flags restricted to single precision, or to -/// 16 d-registers, or both. multiclass VFPver prev, list otherimplies, @@ -106,7 +100,6 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; -// True if subtarget supports half-precision FP conversions. def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision " "floating point">; @@ -117,211 +110,169 @@ defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", [FeatureVFP4], []>; -// True if subtarget supports half-precision FP operations. def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " "floating point", [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>; -// True if subtarget supports half-precision FP fml operations. def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", "Enable full half-precision " "floating point fml instructions", [FeatureFullFP16]>; -// True if subtarget supports [su]div in Thumb mode. def FeatureHWDivThumb : SubtargetFeature<"hwdiv", - "HasDivideInThumbMode", "true", + "HasHardwareDivideInThumb", "true", "Enable divide instructions in Thumb">; -// True if subtarget supports [su]div in ARM mode. def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", - "HasDivideInARMMode", "true", + "HasHardwareDivideInARM", "true", "Enable divide instructions in ARM mode">; // Atomic Support - -// True if the subtarget supports DMB / DSB data barrier instructions. def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb/dsb) instructions">; -// True if the subtarget supports CLREX instructions. def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; -// True if the subtarget supports DFB data barrier instruction. def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", "Has full data barrier (dfb) instruction">; -// True if the subtarget supports v8 atomics (LDA/LDAEX etc) instructions. def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " " etc) instructions">; -// True if floating point compare + branch is slow. -def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "IsFPBrccSlow", "true", +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; -// True if the processor supports the Performance Monitor Extensions. These -// include a generic cycle-counter as well as more fine-grained (often -// implementation-specific) events. def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance " "Monitor extensions">; // TrustZone Security Extensions - -// True if processor supports TrustZone security extensions. def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone " "security extensions">; -// True if processor supports ARMv8-M Security Extensions. def Feature8MSecExt : SubtargetFeature<"8msecext", "Has8MSecExt", "true", "Enable support for ARMv8-M " "Security Extensions">; -// True if processor supports SHA1 and SHA256. def FeatureSHA2 : SubtargetFeature<"sha2", "HasSHA2", "true", "Enable SHA1 and SHA256 support", [FeatureNEON]>; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES support", [FeatureNEON]>; -// True if processor supports Cryptography extensions. def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable support for " "Cryptography extensions", [FeatureNEON, FeatureSHA2, FeatureAES]>; -// True if processor supports CRC instructions. def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; -// True if the ARMv8.2A dot product instructions are supported. def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", "Enable support for dot product instructions", [FeatureNEON]>; -// True if the processor supports RAS extensions. -// Not to be confused with FeatureHasRetAddrStack (return address stack). +// Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability " "and Serviceability extensions">; -// Fast computation of non-negative address offsets. -// True if processor does positive address offset computation faster. +// Fast computation of non-negative address offsets def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of " "positive address offsets">; -// Fast execution of AES crypto operations. -// True if processor executes back to back AES instruction pairs faster. +// Fast execution of AES crypto operations def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; -// Fast execution of bottom and top halves of literal generation. -// True if processor executes back to back bottom and top halves of literal generation faster. +// Fast execution of bottom and top halves of literal generation def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; -// The way of reading thread pointer. -// True if read thread pointer from coprocessor register. -def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true", +// The way of reading thread pointer +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", "Reading thread pointer from register">; // Cyclone can zero VFP registers in 0 cycles. -// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are -// particularly effective at zeroing a VFP register. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; -// Whether it is profitable to unpredicate certain instructions during if-conversion. -// True if if conversion may decide to leave some instructions unpredicated. +// Whether it is profitable to unpredicate certain instructions during if-conversion def FeatureProfUnpredicate : SubtargetFeature<"prof-unpr", "IsProfitableToUnpredicate", "true", "Is profitable to unpredicate">; // Some targets (e.g. Swift) have microcoded VGETLNi32. -// True if VMOV will be favored over VGETLNi32. def FeatureSlowVGETLNi32 : SubtargetFeature<"slow-vgetlni32", "HasSlowVGETLNi32", "true", "Has slow VGETLNi32 - prefer VMOV">; // Some targets (e.g. Swift) have microcoded VDUP32. -// True if VMOV will be favored over VDUP. def FeatureSlowVDUP32 : SubtargetFeature<"slow-vdup32", "HasSlowVDUP32", "true", "Has slow VDUP32 - prefer VMOV">; // Some targets (e.g. Cortex-A9) prefer VMOVSR to VMOVDRR even when using NEON // for scalar FP, as this allows more effective execution domain optimization. -// True if VMOVSR will be favored over VMOVDRR. def FeaturePreferVMOVSR : SubtargetFeature<"prefer-vmovsr", "PreferVMOVSR", "true", "Prefer VMOVSR">; // Swift has ISHST barriers compatible with Atomic Release semantics but weaker -// than ISH. -// True if ISHST barriers will be used for Release semantics. -def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHSTBarriers", +// than ISH +def FeaturePrefISHSTBarrier : SubtargetFeature<"prefer-ishst", "PreferISHST", "true", "Prefer ISHST barriers">; // Some targets (e.g. Cortex-A9) have muxed AGU and NEON/FPU. -// True if the AGU and NEON/FPU units are multiplexed. def FeatureMuxedUnits : SubtargetFeature<"muxed-units", "HasMuxedUnits", "true", "Has muxed AGU and NEON/FPU">; // Whether VLDM/VSTM starting with odd register number need more microops -// than single VLDRS. -// True if a VLDM/VSTM starting with an odd register number is considered to -// take more microops than single VLDRS/VSTRS. -def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "HasSlowOddRegister", +// than single VLDRS +def FeatureSlowOddRegister : SubtargetFeature<"slow-odd-reg", "SlowOddRegister", "true", "VLDM/VSTM starting " "with an odd register is slow">; // Some targets have a renaming dependency when loading into D subregisters. -// True if loading into a D subregister will be penalized. def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg", - "HasSlowLoadDSubregister", "true", + "SlowLoadDSubregister", "true", "Loading into D subregs is slow">; -// True if use a wider stride when allocating VFP registers. def FeatureUseWideStrideVFP : SubtargetFeature<"wide-stride-vfp", "UseWideStrideVFP", "true", "Use a wide stride when allocating VFP registers">; // Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD. -// True if VMOVS will never be widened to VMOVD. def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs", "DontWidenVMOVS", "true", "Don't widen VMOVS to VMOVD">; // Some targets (e.g. Cortex-A15) prefer to avoid mixing operations on different // VFP register widths. -// True if splat a register between VFP and NEON instructions. def FeatureSplatVFPToNeon : SubtargetFeature<"splat-vfp-neon", - "UseSplatVFPToNeon", "true", + "SplatVFPToNeon", "true", "Splat register from VFP to NEON", [FeatureDontWidenVMOVS]>; // Whether or not it is profitable to expand VFP/NEON MLA/MLS instructions. -// True if run the MLx expansion pass. def FeatureExpandMLx : SubtargetFeature<"expand-fp-mlx", "ExpandMLx", "true", "Expand VFP/NEON MLA/MLS instructions">; // Some targets have special RAW hazards for VFP/NEON VMLA/VMLS. -// True if VFP/NEON VMLA/VMLS have special RAW hazards. def FeatureHasVMLxHazards : SubtargetFeature<"vmlx-hazards", "HasVMLxHazards", "true", "Has VMLx hazards">; // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from // VFP to NEON, as an execution domain optimization. -// True if VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", "UseNEONForFPMovs", "true", "Convert VMOVSR, VMOVRS, " @@ -330,21 +281,18 @@ def FeatureNEONForFPMovs : SubtargetFeature<"neon-fpmovs", // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. This affects instruction selection and should // only be enabled if the handling of denormals is not important. -// Use the method useNEONForSinglePrecisionFP() to determine if NEON should actually be used. def FeatureNEONForFP : SubtargetFeature<"neonfp", - "HasNEONForFP", + "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; // On some processors, VLDn instructions that access unaligned data take one // extra cycle. Take that into account when computing operand latencies. -// True if VLDn instructions take an extra cycle for unaligned accesses. -def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAccessAlignment", +def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign", "true", "Check for VLDn unaligned access">; // Some processors have a nonpipelined VFP coprocessor. -// True if VFP instructions are not pipelined. def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", "NonpipelinedVFP", "true", "VFP instructions are not pipelined">; @@ -352,27 +300,20 @@ def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp", // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. -// If the VFP2 / NEON instructions are available, indicates -// whether the FP VML[AS] instructions are slow (if so, don't use them). def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; -// VFPv4 added VFMA instructions that can similarly be fast or slow. -// If the VFP4 / NEON instructions are available, indicates -// whether the FP VFM[AS] instructions are slow (if so, don't use them). +// VFPv4 added VFMA instructions that can similar be fast or slow. def FeatureHasSlowFPVFMx : SubtargetFeature<"slowfpvfmx", "SlowFPVFMx", "true", "Disable VFP / NEON FMA instructions">; // Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. -/// True if NEON has special multiplier accumulator -/// forwarding to allow mul + mla being issued back to back. def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", "HasVMLxForwarding", "true", "Has multiplier accumulator forwarding">; // Disable 32-bit to 16-bit narrowing for experimentation. -// True if codegen would prefer 32-bit Thumb instructions over 16-bit ones. -def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Prefers32BitThumb", "true", +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; def FeaturePrefLoopAlign32 : SubtargetFeature<"loop-align", "PrefLoopLogAlignment","2", @@ -391,22 +332,17 @@ def FeatureMVEVectorCostFactor4 : SubtargetFeature<"mve4beat", "MVEVectorCostFac /// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is /// mapped to a separate physical register. Avoid partial CPSR update for these /// processors. -/// True if codegen would avoid using instructions -/// that partially update CPSR and add false dependency on the previous -/// CPSR setting instruction. def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; /// Disable +1 predication cost for instructions updating CPSR. /// Enabled for Cortex-A57. -/// True if disable +1 predication cost for instructions updating CPSR. Enabled for Cortex-A57. def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", "CheapPredicableCPSRDef", "true", "Disable +1 predication cost for instructions updating CPSR">; -// True if codegen should avoid using flag setting movs with shifter operand (i.e. asr, lsl, lsr). def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with " @@ -421,20 +357,16 @@ def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", // Some processors have no branch predictor, which changes the expected cost of // taking a branch which affects the choice of whether to use predicated // instructions. -// True if the subtarget has a branch predictor. Having -// a branch predictor or not changes the expected cost of taking a branch -// which affects the choice of whether to use predicated instructions. def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", "HasBranchPredictor", "false", "Has no branch predictor">; /// DSP extension. -/// True if the subtarget supports the DSP (saturating arith and such) instructions. def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Supports DSP instructions in " "ARM and/or Thumb2">; -// True if the subtarget supports Multiprocessing extension (ARMv7 only). +// Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -446,42 +378,31 @@ def FeatureVirtualization : SubtargetFeature<"virtualization", // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. -// True if NaCl TRAP instruction is generated instead of the regular TRAP. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; -// True if the subtarget disallows unaligned memory -// accesses for some types. For details, see -// ARMTargetLowering::allowsMisalignedMemoryAccesses(). def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " "access">; -// Generate calls via indirect call instructions. def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; -// Generate code that does not contain data access to code sections. def FeatureExecuteOnly : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", "Enable the generation of " "execute only code.">; -// True if R9 is not available as a general purpose register. def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", "Reserve R9, making it unavailable" " as GPR">; -// True if MOVT / MOVW pairs are not used for materialization of -// 32-bit imms (including global addresses). def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " "32-bit imms">; -/// Implicitly convert an instruction to a different one if its immediates -/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "NegativeImmediates", "false", @@ -494,35 +415,28 @@ def FeatureNoNegativeImmediates def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", "Use the MachineScheduler">; -// False if scheduling should happen again after register allocation. def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", "DisablePostRAScheduler", "true", "Don't schedule again after register allocation">; // Armv8.5-A extensions -// Has speculation barrier. def FeatureSB : SubtargetFeature<"sb", "HasSB", "true", "Enable v8.5a Speculation Barrier" >; // Armv8.6-A extensions - -// True if subtarget supports BFloat16 floating point operations. def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true", "Enable support for BFloat16 instructions", [FeatureNEON]>; -// True if subtarget supports 8-bit integer matrix multiply. def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", "true", "Enable Matrix Multiply Int8 Extension", [FeatureNEON]>; // Armv8.1-M extensions -// True if the processor supports the Low Overhead Branch extension. def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true", "Enable Low Overhead Branch " "extensions">; -// Mitigate against the cve-2021-35465 security vulnurability. def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", "FixCMSE_CVE_2021_35465", "true", "Mitigate against the cve-2021-35465 " @@ -532,7 +446,6 @@ def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", "Enable Pointer Authentication and Branch " "Target Identification">; -/// Don't place a BTI instruction after return-twice constructs (setjmp). def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", "NoBTIAtReturnTwice", "true", "Don't place a BTI instruction " @@ -554,18 +467,16 @@ def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; -// True if Thumb2 instructions are supported. + def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; -// True if subtarget does not support ARM mode execution. def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; //===----------------------------------------------------------------------===// // ARM ISAa. // -// Specify whether target support specific ARM ISA variants. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -688,16 +599,13 @@ foreach i = {0-7} in // Control codegen mitigation against Straight Line Speculation vulnerability. //===----------------------------------------------------------------------===// -/// Harden against Straight Line Speculation for Returns and Indirect Branches. def FeatureHardenSlsRetBr : SubtargetFeature<"harden-sls-retbr", "HardenSlsRetBr", "true", "Harden against straight line speculation across RETurn and BranchRegister " "instructions">; -/// Harden against Straight Line Speculation for indirect calls. def FeatureHardenSlsBlr : SubtargetFeature<"harden-sls-blr", "HardenSlsBlr", "true", "Harden against straight line speculation across indirect calls">; -/// Generate thunk code for SLS mitigation in the normal text section. def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat", "HardenSlsNoComdat", "true", "Generate thunk code for SLS mitigation in the normal text section">; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cdf1cbf183aa..52d173439e46 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2337,7 +2337,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Lower 'returns_twice' calls to a pseudo-instruction. if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && - !Subtarget->noBTIAtReturnTwice()) + !Subtarget->getNoBTIAtReturnTwice()) GuardWithBTI = AFI->branchTargetEnforcement(); // Determine whether this is a non-secure function call. diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index f1040ee8c790..8d335a3ea520 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -245,7 +245,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) - HasNEONForFP = true; + UseNEONForSinglePrecisionFP = true; if (isRWPI()) ReserveR9 = true; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index b9c6245179a9..13f6d44ebe62 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -150,11 +150,6 @@ public: }; protected: -// Bool members corresponding to the SubtargetFeatures defined in tablegen -#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ - bool ATTRIBUTE = DEFAULT; -#include "ARMGenSubtargetInfo.inc" - /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily = Others; @@ -164,22 +159,343 @@ protected: /// ARMArch - ARM architecture ARMArchEnum ARMArch = ARMv4t; + /// HasV4TOps, HasV5TOps, HasV5TEOps, + /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - + /// Specify whether target support specific ARM ISA variants. + bool HasV4TOps = false; + bool HasV5TOps = false; + bool HasV5TEOps = false; + bool HasV6Ops = false; + bool HasV6MOps = false; + bool HasV6KOps = false; + bool HasV6T2Ops = false; + bool HasV7Ops = false; + bool HasV8Ops = false; + bool HasV8_1aOps = false; + bool HasV8_2aOps = false; + bool HasV8_3aOps = false; + bool HasV8_4aOps = false; + bool HasV8_5aOps = false; + bool HasV8_6aOps = false; + bool HasV8_8aOps = false; + bool HasV8_7aOps = false; + bool HasV9_0aOps = false; + bool HasV9_1aOps = false; + bool HasV9_2aOps = false; + bool HasV9_3aOps = false; + bool HasV8MBaselineOps = false; + bool HasV8MMainlineOps = false; + bool HasV8_1MMainlineOps = false; + bool HasMVEIntegerOps = false; + bool HasMVEFloatOps = false; + bool HasCDEOps = false; + + /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what + /// floating point ISAs are supported. + bool HasVFPv2 = false; + bool HasVFPv3 = false; + bool HasVFPv4 = false; + bool HasFPARMv8 = false; + bool HasNEON = false; + bool HasFPRegs = false; + bool HasFPRegs16 = false; + bool HasFPRegs64 = false; + + /// Versions of the VFP flags restricted to single precision, or to + /// 16 d-registers, or both. + bool HasVFPv2SP = false; + bool HasVFPv3SP = false; + bool HasVFPv4SP = false; + bool HasFPARMv8SP = false; + bool HasVFPv3D16 = false; + bool HasVFPv4D16 = false; + bool HasFPARMv8D16 = false; + bool HasVFPv3D16SP = false; + bool HasVFPv4D16SP = false; + bool HasFPARMv8D16SP = false; + + /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. + bool HasDotProd = false; + + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been + /// specified. Use the method useNEONForSinglePrecisionFP() to + /// determine if NEON should actually be used. + bool UseNEONForSinglePrecisionFP = false; + /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. bool UseMulOps = false; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates + /// whether the FP VML[AS] instructions are slow (if so, don't use them). + bool SlowFPVMLx = false; + + /// SlowFPVFMx - If the VFP4 / NEON instructions are available, indicates + /// whether the FP VFM[AS] instructions are slow (if so, don't use them). + bool SlowFPVFMx = false; + + /// HasVMLxForwarding - If true, NEON has special multiplier accumulator + /// forwarding to allow mul + mla being issued back to back. + bool HasVMLxForwarding = false; + + /// SlowFPBrcc - True if floating point compare + branch is slow. + bool SlowFPBrcc = false; + + /// InThumbMode - True if compiling for Thumb, false for ARM. + bool InThumbMode = false; + + /// UseSoftFloat - True if we're using software floating point features. + bool UseSoftFloat = false; + + /// UseMISched - True if MachineScheduler should be used for this subtarget. + bool UseMISched = false; + + /// DisablePostRAScheduler - False if scheduling should happen again after + /// register allocation. + bool DisablePostRAScheduler = false; + + /// HasThumb2 - True if Thumb2 instructions are supported. + bool HasThumb2 = false; + + /// NoARM - True if subtarget does not support ARM mode execution. + bool NoARM = false; + + /// ReserveR9 - True if R9 is not available as a general purpose register. + bool ReserveR9 = false; + + /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of + /// 32-bit imms (including global addresses). + bool NoMovt = false; + /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall = false; + /// HasFP16 - True if subtarget supports half-precision FP conversions + bool HasFP16 = false; + + /// HasFullFP16 - True if subtarget supports half-precision FP operations + bool HasFullFP16 = false; + + /// HasFP16FML - True if subtarget supports half-precision FP fml operations + bool HasFP16FML = false; + + /// HasBF16 - True if subtarget supports BFloat16 floating point operations + bool HasBF16 = false; + + /// HasMatMulInt8 - True if subtarget supports 8-bit integer matrix multiply + bool HasMatMulInt8 = false; + + /// HasD32 - True if subtarget has the full 32 double precision + /// FP registers for VFPv3. + bool HasD32 = false; + + /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode + bool HasHardwareDivideInThumb = false; + + /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode + bool HasHardwareDivideInARM = false; + + /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier + /// instructions. + bool HasDataBarrier = false; + + /// HasFullDataBarrier - True if the subtarget supports DFB data barrier + /// instruction. + bool HasFullDataBarrier = false; + + /// HasV7Clrex - True if the subtarget supports CLREX instructions + bool HasV7Clrex = false; + + /// HasAcquireRelease - True if the subtarget supports v8 atomics (LDA/LDAEX etc) + /// instructions + bool HasAcquireRelease = false; + + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions + /// over 16-bit ones. + bool Pref32BitThumb = false; + + /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions + /// that partially update CPSR and add false dependency on the previous + /// CPSR setting instruction. + bool AvoidCPSRPartialUpdate = false; + + /// CheapPredicableCPSRDef - If true, disable +1 predication cost + /// for instructions updating CPSR. Enabled for Cortex-A57. + bool CheapPredicableCPSRDef = false; + + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting + /// movs with shifter operand (i.e. asr, lsl, lsr). + bool AvoidMOVsShifterOperand = false; + + /// HasRetAddrStack - Some processors perform return stack prediction. CodeGen should + /// avoid issue "normal" call instructions to callees which do not return. + bool HasRetAddrStack = false; + + /// HasBranchPredictor - True if the subtarget has a branch predictor. Having + /// a branch predictor or not changes the expected cost of taking a branch + /// which affects the choice of whether to use predicated instructions. + bool HasBranchPredictor = true; + + /// HasMPExtension - True if the subtarget supports Multiprocessing + /// extension (ARMv7 only). + bool HasMPExtension = false; + + /// HasVirtualization - True if the subtarget supports the Virtualization + /// extension. + bool HasVirtualization = false; + + /// HasFP64 - If true, the floating point unit supports double + /// precision. + bool HasFP64 = false; + + /// If true, the processor supports the Performance Monitor Extensions. These + /// include a generic cycle-counter as well as more fine-grained (often + /// implementation-specific) events. + bool HasPerfMon = false; + + /// HasTrustZone - if true, processor supports TrustZone security extensions + bool HasTrustZone = false; + + /// Has8MSecExt - if true, processor supports ARMv8-M Security Extensions + bool Has8MSecExt = false; + + /// HasSHA2 - if true, processor supports SHA1 and SHA256 + bool HasSHA2 = false; + + /// HasAES - if true, processor supports AES + bool HasAES = false; + + /// HasCrypto - if true, processor supports Cryptography extensions + bool HasCrypto = false; + + /// HasCRC - if true, processor supports CRC instructions + bool HasCRC = false; + + /// HasRAS - if true, the processor supports RAS extensions + bool HasRAS = false; + + /// HasLOB - if true, the processor supports the Low Overhead Branch extension + bool HasLOB = false; + + bool HasPACBTI = false; + + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are + /// particularly effective at zeroing a VFP register. + bool HasZeroCycleZeroing = false; + + /// HasFPAO - if true, processor does positive address offset computation faster + bool HasFPAO = false; + + /// HasFuseAES - if true, processor executes back to back AES instruction + /// pairs faster. + bool HasFuseAES = false; + + /// HasFuseLiterals - if true, processor executes back to back + /// bottom and top halves of literal generation faster. + bool HasFuseLiterals = false; + + /// If true, if conversion may decide to leave some instructions unpredicated. + bool IsProfitableToUnpredicate = false; + + /// If true, VMOV will be favored over VGETLNi32. + bool HasSlowVGETLNi32 = false; + + /// If true, VMOV will be favored over VDUP. + bool HasSlowVDUP32 = false; + + /// If true, VMOVSR will be favored over VMOVDRR. + bool PreferVMOVSR = false; + + /// If true, ISHST barriers will be used for Release semantics. + bool PreferISHST = false; + + /// If true, a VLDM/VSTM starting with an odd register number is considered to + /// take more microops than single VLDRS/VSTRS. + bool SlowOddRegister = false; + + /// If true, loading into a D subregister will be penalized. + bool SlowLoadDSubregister = false; + + /// If true, use a wider stride when allocating VFP registers. + bool UseWideStrideVFP = false; + + /// If true, the AGU and NEON/FPU units are multiplexed. + bool HasMuxedUnits = false; + + /// If true, VMOVS will never be widened to VMOVD. + bool DontWidenVMOVS = false; + + /// If true, splat a register between VFP and NEON instructions. + bool SplatVFPToNeon = false; + + /// If true, run the MLx expansion pass. + bool ExpandMLx = false; + + /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. + bool HasVMLxHazards = false; + + // If true, read thread pointer from coprocessor register. + bool ReadTPHard = false; + + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. + bool UseNEONForFPMovs = false; + + /// If true, VLDn instructions take an extra cycle for unaligned accesses. + bool CheckVLDnAlign = false; + + /// If true, VFP instructions are not pipelined. + bool NonpipelinedVFP = false; + + /// StrictAlign - If true, the subtarget disallows unaligned memory + /// accesses for some types. For details, see + /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). + bool StrictAlign = false; + /// RestrictIT - If true, the subtarget disallows generation of complex IT /// blocks. bool RestrictIT = false; + /// HasDSP - If true, the subtarget supports the DSP (saturating arith + /// and such) instructions. + bool HasDSP = false; + + /// NaCl TRAP instruction is generated instead of the regular TRAP. + bool UseNaClTrap = false; + + /// Generate calls via indirect call instructions. + bool GenLongCalls = false; + + /// Generate code that does not contain data access to code sections. + bool GenExecuteOnly = false; + + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath = false; + /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; + /// Has speculation barrier + bool HasSB = false; + + /// Implicitly convert an instruction to a different one if its immediates + /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. + bool NegativeImmediates = true; + + /// Mitigate against the cve-2021-35465 security vulnurability. + bool FixCMSE_CVE_2021_35465 = false; + + /// Harden against Straight Line Speculation for Returns and Indirect + /// Branches. + bool HardenSlsRetBr = false; + + /// Harden against Straight Line Speculation for indirect calls. + bool HardenSlsBlr = false; + + /// Generate thunk code for SLS mitigation in the normal text section. + bool HardenSlsNoComdat = false; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); @@ -224,6 +540,10 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// NoBTIAtReturnTwice - Don't place a BTI instruction after + /// return-twice constructs (setjmp) + bool NoBTIAtReturnTwice = false; + /// Options passed via command line that could influence the target const TargetOptions &Options; @@ -302,13 +622,39 @@ private: std::bitset<8> CoprocCDE = {}; public: -// Getters for SubtargetFeatures defined in tablegen -#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ - bool GETTER() const { return ATTRIBUTE; } -#include "ARMGenSubtargetInfo.inc" - void computeIssueWidth(); + bool hasV4TOps() const { return HasV4TOps; } + bool hasV5TOps() const { return HasV5TOps; } + bool hasV5TEOps() const { return HasV5TEOps; } + bool hasV6Ops() const { return HasV6Ops; } + bool hasV6MOps() const { return HasV6MOps; } + bool hasV6KOps() const { return HasV6KOps; } + bool hasV6T2Ops() const { return HasV6T2Ops; } + bool hasV7Ops() const { return HasV7Ops; } + bool hasV8Ops() const { return HasV8Ops; } + bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasV8_2aOps() const { return HasV8_2aOps; } + bool hasV8_3aOps() const { return HasV8_3aOps; } + bool hasV8_4aOps() const { return HasV8_4aOps; } + bool hasV8_5aOps() const { return HasV8_5aOps; } + bool hasV8_6aOps() const { return HasV8_6aOps; } + bool hasV8_7aOps() const { return HasV8_7aOps; } + bool hasV8_8aOps() const { return HasV8_8aOps; } + bool hasV9_0aOps() const { return HasV9_0aOps; } + bool hasV9_1aOps() const { return HasV9_1aOps; } + bool hasV9_2aOps() const { return HasV9_2aOps; } + bool hasV9_3aOps() const { return HasV9_3aOps; } + bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } + bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } + bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; } + bool hasMVEIntegerOps() const { return HasMVEIntegerOps; } + bool hasMVEFloatOps() const { return HasMVEFloatOps; } + bool hasCDEOps() const { return HasCDEOps; } + bool hasFPRegs() const { return HasFPRegs; } + bool hasFPRegs16() const { return HasFPRegs16; } + bool hasFPRegs64() const { return HasFPRegs64; } + /// @{ /// These functions are obsolete, please consider adding subtarget features /// or properties instead of calling them. @@ -327,14 +673,31 @@ public: bool hasARMOps() const { return !NoARM; } + bool hasVFP2Base() const { return HasVFPv2SP; } + bool hasVFP3Base() const { return HasVFPv3D16SP; } + bool hasVFP4Base() const { return HasVFPv4D16SP; } + bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } + bool hasNEON() const { return HasNEON; } + bool hasSHA2() const { return HasSHA2; } + bool hasAES() const { return HasAES; } + bool hasCrypto() const { return HasCrypto; } + bool hasDotProd() const { return HasDotProd; } + bool hasCRC() const { return HasCRC; } + bool hasRAS() const { return HasRAS; } + bool hasLOB() const { return HasLOB; } + bool hasPACBTI() const { return HasPACBTI; } + bool hasVirtualization() const { return HasVirtualization; } + bool useNEONForSinglePrecisionFP() const { - return hasNEON() && hasNEONForFP(); + return hasNEON() && UseNEONForSinglePrecisionFP; } - bool hasVFP2Base() const { return hasVFPv2SP(); } - bool hasVFP3Base() const { return hasVFPv3D16SP(); } - bool hasVFP4Base() const { return hasVFPv4D16SP(); } - bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); } + bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } + bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } + bool hasDataBarrier() const { return HasDataBarrier; } + bool hasFullDataBarrier() const { return HasFullDataBarrier; } + bool hasV7Clrex() const { return HasV7Clrex; } + bool hasAcquireRelease() const { return HasAcquireRelease; } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); @@ -347,7 +710,43 @@ public: } bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); } bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); } + bool hasVMLxForwarding() const { return HasVMLxForwarding; } + bool isFPBrccSlow() const { return SlowFPBrcc; } + bool hasFP64() const { return HasFP64; } + bool hasPerfMon() const { return HasPerfMon; } + bool hasTrustZone() const { return HasTrustZone; } + bool has8MSecExt() const { return Has8MSecExt; } + bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasFPAO() const { return HasFPAO; } + bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } + bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } + bool hasSlowVDUP32() const { return HasSlowVDUP32; } + bool preferVMOVSR() const { return PreferVMOVSR; } + bool preferISHSTBarriers() const { return PreferISHST; } + bool expandMLx() const { return ExpandMLx; } + bool hasVMLxHazards() const { return HasVMLxHazards; } + bool hasSlowOddRegister() const { return SlowOddRegister; } + bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; } + bool useWideStrideVFP() const { return UseWideStrideVFP; } + bool hasMuxedUnits() const { return HasMuxedUnits; } + bool dontWidenVMOVS() const { return DontWidenVMOVS; } + bool useSplatVFPToNeon() const { return SplatVFPToNeon; } + bool useNEONForFPMovs() const { return UseNEONForFPMovs; } + bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; } + bool nonpipelinedVFP() const { return NonpipelinedVFP; } + bool prefers32BitThumb() const { return Pref32BitThumb; } + bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } + bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } + bool hasRetAddrStack() const { return HasRetAddrStack; } + bool hasBranchPredictor() const { return HasBranchPredictor; } + bool hasMPExtension() const { return HasMPExtension; } + bool hasDSP() const { return HasDSP; } + bool useNaClTrap() const { return UseNaClTrap; } bool useSjLjEH() const { return UseSjLjEH; } + bool hasSB() const { return HasSB; } + bool genLongCalls() const { return GenLongCalls; } + bool genExecuteOnly() const { return GenExecuteOnly; } bool hasBaseDSP() const { if (isThumb()) return hasDSP(); @@ -355,9 +754,19 @@ public: return hasV5TEOps(); } + bool hasFP16() const { return HasFP16; } + bool hasD32() const { return HasD32; } + bool hasFullFP16() const { return HasFullFP16; } + bool hasFP16FML() const { return HasFP16FML; } + bool hasBF16() const { return HasBF16; } + + bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseLiterals() const { return HasFuseLiterals; } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } + bool hasMatMulInt8() const { return HasMatMulInt8; } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } @@ -417,12 +826,17 @@ public: bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } + bool disablePostRAScheduler() const { return DisablePostRAScheduler; } + bool useSoftFloat() const { return UseSoftFloat; } + bool isThumb() const { return InThumbMode; } bool hasMinSize() const { return OptMinSize; } - bool isThumb1Only() const { return isThumb() && !hasThumb2(); } - bool isThumb2() const { return isThumb() && hasThumb2(); } + bool isThumb1Only() const { return InThumbMode && !HasThumb2; } + bool isThumb2() const { return InThumbMode && HasThumb2; } + bool hasThumb2() const { return HasThumb2; } bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } + bool isReadTPHard() const { return ReadTPHard; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; @@ -543,6 +957,14 @@ public: bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; unsigned getGPRAllocationOrder(const MachineFunction &MF) const; + + bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; } + + bool hardenSlsRetBr() const { return HardenSlsRetBr; } + bool hardenSlsBlr() const { return HardenSlsBlr; } + bool hardenSlsNoComdat() const { return HardenSlsNoComdat; } + + bool getNoBTIAtReturnTwice() const { return NoBTIAtReturnTwice; } }; } // end namespace llvm diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index dad94ccae585..78bbb3196e5c 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -1803,32 +1803,6 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n"; - OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n"; - std::vector FeatureList = - Records.getAllDerivedDefinitions("SubtargetFeature"); - llvm::sort(FeatureList, LessRecordFieldName()); - for (const Record *Feature : FeatureList) { - const StringRef Attribute = Feature->getValueAsString("Attribute"); - const StringRef Value = Feature->getValueAsString("Value"); - - // Only handle boolean features for now, excluding BitVectors and enums. - const bool IsBool = (Value == "false" || Value == "true") && - !StringRef(Attribute).contains('['); - if (!IsBool) - continue; - - // Some features default to true, with values set to false if enabled. - const char *Default = Value == "false" ? "true" : "false"; - - // Define the getter with lowercased first char: xxxYyy() { return XxxYyy; } - const auto Getter = Attribute.substr(0, 1).lower() + Attribute.substr(1); - - OS << "GET_SUBTARGETINFO_MACRO(" << Attribute << ", " << Default << ", " - << Getter << ")\n"; - } - OS << "#undef GET_SUBTARGETINFO_MACRO\n"; - OS << "#endif // GET_SUBTARGETINFO_MACRO\n\n"; - OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n"; OS << "#undef GET_SUBTARGETINFO_MC_DESC\n\n";