forked from OSchip/llvm-project
[AArch64] Split out processor/tuning features
Following on from an earlier patch that introduced support for -mtune for AArch64 backends, this patch splits out the tuning features from the processor features. This gives us the ability to enable architectural feature set A for a given processor with "-mcpu=A" and define the set of tuning features B with "-mtune=B". It's quite difficult to write a test that proves we select the right features according to the tuning attribute because most of these relate to scheduling. I have created a test here: CodeGen/AArch64/misched-fusion-addr-tune.ll that demonstrates the different scheduling choices based upon the tuning. Differential Revision: https://reviews.llvm.org/D111551
This commit is contained in:
parent
23db763b7d
commit
5ea35791e6
|
@ -446,6 +446,10 @@ def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
|
||||||
def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true",
|
def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true",
|
||||||
"Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>;
|
"Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>;
|
||||||
|
|
||||||
|
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
|
||||||
|
"Apple A7 (the CPU formerly known as Cyclone)">;
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Architectures.
|
// Architectures.
|
||||||
//
|
//
|
||||||
|
@ -597,678 +601,516 @@ include "AArch64SchedA64FX.td"
|
||||||
include "AArch64SchedThunderX3T110.td"
|
include "AArch64SchedThunderX3T110.td"
|
||||||
include "AArch64SchedTSV110.td"
|
include "AArch64SchedTSV110.td"
|
||||||
|
|
||||||
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
|
def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
|
||||||
"Cortex-A35 ARM processors", [
|
"Cortex-A35 ARM processors">;
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
|
def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
|
||||||
"Cortex-A53 ARM processors", [
|
"Cortex-A53 ARM processors", [
|
||||||
|
FeatureFuseAES,
|
||||||
FeatureBalanceFPOps,
|
FeatureBalanceFPOps,
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
FeaturePostRAScheduler]>;
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
|
def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
|
||||||
"Cortex-A55 ARM processors", [
|
"Cortex-A55 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureNEON,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureFuseAddress,
|
FeatureFuseAddress]>;
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
|
def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
|
||||||
"Cortex-A510 ARM processors", [
|
"Cortex-A510 ARM processors", [
|
||||||
HasV9_0aOps,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureMatMulInt8,
|
|
||||||
FeatureBF16,
|
|
||||||
FeatureAM,
|
|
||||||
FeatureMTE,
|
|
||||||
FeatureETE,
|
|
||||||
FeatureSVE2BitPerm,
|
|
||||||
FeatureFP16FML,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeaturePostRAScheduler
|
FeaturePostRAScheduler
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
|
def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
|
||||||
"Cortex-A57 ARM processors", [
|
"Cortex-A57 ARM processors", [
|
||||||
|
FeatureFuseAES,
|
||||||
FeatureBalanceFPOps,
|
FeatureBalanceFPOps,
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureFuseLiterals,
|
FeatureFuseLiterals,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
|
def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
|
||||||
"Cortex-A65 ARM processors", [
|
"Cortex-A65 ARM processors", [
|
||||||
HasV8_2aOps,
|
FeatureFuseAES,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureFuseAddress,
|
FeatureFuseAddress,
|
||||||
FeatureFuseAES,
|
FeatureFuseLiterals]>;
|
||||||
FeatureFuseLiterals,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureRAS,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeatureSSBS,
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
|
def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
|
||||||
"Cortex-A72 ARM processors", [
|
"Cortex-A72 ARM processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseLiterals,
|
FeatureFuseLiterals]>;
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
|
def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
|
||||||
"Cortex-A73 ARM processors", [
|
"Cortex-A73 ARM processors", [
|
||||||
FeatureCRC,
|
FeatureFuseAES]>;
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
|
def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
|
||||||
"Cortex-A75 ARM processors", [
|
"Cortex-A75 ARM processors", [
|
||||||
HasV8_2aOps,
|
FeatureFuseAES]>;
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeaturePerfMon
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
|
def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
|
||||||
"Cortex-A76 ARM processors", [
|
"Cortex-A76 ARM processors", [
|
||||||
HasV8_2aOps,
|
FeatureFuseAES]>;
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureSSBS
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
|
def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
|
||||||
"Cortex-A77 ARM processors", [
|
"Cortex-A77 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCmpBccFusion,
|
FeatureCmpBccFusion,
|
||||||
FeatureFPARMv8,
|
FeatureFuseAES]>;
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON, FeatureRCPC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureDotProd
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
|
def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
|
||||||
"CortexA78",
|
|
||||||
"Cortex-A78 ARM processors", [
|
"Cortex-A78 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCmpBccFusion,
|
FeatureCmpBccFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureNEON,
|
FeaturePostRAScheduler]>;
|
||||||
FeatureRCPC,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
FeatureSPE,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureSSBS,
|
|
||||||
FeatureDotProd]>;
|
|
||||||
|
|
||||||
def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
|
def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
|
||||||
"CortexA78C",
|
"CortexA78C",
|
||||||
"Cortex-A78C ARM processors", [
|
"Cortex-A78C ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCmpBccFusion,
|
FeatureCmpBccFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureFlagM,
|
|
||||||
FeatureFP16FML,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureNEON,
|
FeaturePostRAScheduler]>;
|
||||||
FeaturePAuth,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeatureSPE,
|
|
||||||
FeatureSSBS]>;
|
|
||||||
|
|
||||||
def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
|
def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
|
||||||
"CortexR82",
|
"CortexR82",
|
||||||
"Cortex-R82 ARM Processors", [
|
"Cortex-R82 ARM processors", [
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler]>;
|
||||||
// All other features are implied by v8_0r ops:
|
|
||||||
HasV8_0rOps,
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
|
def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
|
||||||
"Cortex-X1 ARM processors", [
|
"Cortex-X1 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCmpBccFusion,
|
FeatureCmpBccFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureNEON,
|
FeaturePostRAScheduler]>;
|
||||||
FeatureRCPC,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
FeatureSPE,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureDotProd]>;
|
|
||||||
|
|
||||||
def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
|
def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
|
||||||
"Fujitsu A64FX processors", [
|
"Fujitsu A64FX processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureSHA2,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureSVE,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureComplxNum,
|
|
||||||
FeatureAggressiveFMA,
|
FeatureAggressiveFMA,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeaturePredictableSelectIsExpensive
|
FeaturePredictableSelectIsExpensive
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
|
def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
|
||||||
"Nvidia Carmel processors", [
|
"Nvidia Carmel processors">;
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFullFP16
|
|
||||||
]>;
|
|
||||||
|
|
||||||
// Note that cyclone does not fuse AES instructions, but newer apple chips do
|
// Note that cyclone does not fuse AES instructions, but newer apple chips do
|
||||||
// perform the fusion and cyclone is used by default when targetting apple OSes.
|
// perform the fusion and cyclone is used by default when targetting apple OSes.
|
||||||
def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
|
def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
|
||||||
"Apple A7 (the CPU formerly known as Cyclone)", [
|
"Apple A7 (the CPU formerly known as Cyclone)", [
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
FeatureFuseAES, FeatureFuseCryptoEOR,
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureFuseCryptoEOR,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing,
|
||||||
FeatureZCZeroingFPWorkaround
|
FeatureZCZeroingFPWorkaround]
|
||||||
]>;
|
>;
|
||||||
|
|
||||||
def ProcAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
|
def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
|
||||||
"Apple A10", [
|
"Apple A10", [
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseCryptoEOR,
|
FeatureFuseCryptoEOR,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing]
|
||||||
FeatureCRC,
|
>;
|
||||||
FeatureRDM,
|
|
||||||
FeaturePAN,
|
|
||||||
FeatureLOR,
|
|
||||||
FeatureVH,
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
|
def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
|
||||||
"Apple A11", [
|
"Apple A11", [
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseCryptoEOR,
|
FeatureFuseCryptoEOR,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing]
|
||||||
FeatureFullFP16,
|
>;
|
||||||
HasV8_2aOps
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
|
def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
|
||||||
"Apple A12", [
|
"Apple A12", [
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseCryptoEOR,
|
FeatureFuseCryptoEOR,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing]
|
||||||
FeatureFullFP16,
|
>;
|
||||||
HasV8_3aOps
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
|
def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
|
||||||
"Apple A13", [
|
"Apple A13", [
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseCryptoEOR,
|
FeatureFuseCryptoEOR,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing]
|
||||||
FeatureFullFP16,
|
>;
|
||||||
FeatureFP16FML,
|
|
||||||
FeatureSHA3,
|
|
||||||
HasV8_4aOps
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
|
def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
|
||||||
"Apple A14", [
|
"Apple A14", [
|
||||||
FeatureAggressiveFMA,
|
FeatureAggressiveFMA,
|
||||||
FeatureAlternateSExtLoadCVTF32Pattern,
|
FeatureAlternateSExtLoadCVTF32Pattern,
|
||||||
FeatureAltFPCmp,
|
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDisableLatencySchedHeuristic,
|
FeatureDisableLatencySchedHeuristic,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFRInt3264,
|
|
||||||
FeatureFuseAddress,
|
FeatureFuseAddress,
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseArithmeticLogic,
|
FeatureFuseArithmeticLogic,
|
||||||
FeatureFuseCCSelect,
|
FeatureFuseCCSelect,
|
||||||
FeatureFuseCryptoEOR,
|
FeatureFuseCryptoEOR,
|
||||||
FeatureFuseLiterals,
|
FeatureFuseLiterals,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeatureSpecRestrict,
|
|
||||||
FeatureSSBS,
|
|
||||||
FeatureSB,
|
|
||||||
FeaturePredRes,
|
|
||||||
FeatureCacheDeepPersist,
|
|
||||||
FeatureZCRegMove,
|
FeatureZCRegMove,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing]>;
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureFP16FML,
|
|
||||||
FeatureSHA3,
|
|
||||||
HasV8_4aOps
|
|
||||||
]>;
|
|
||||||
|
|
||||||
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
|
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
|
||||||
"Samsung Exynos-M3 processors",
|
"Samsung Exynos-M3 processors",
|
||||||
[FeatureCRC,
|
[FeatureExynosCheapAsMoveHandling,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureExynosCheapAsMoveHandling,
|
|
||||||
FeatureForce32BitJumpTables,
|
FeatureForce32BitJumpTables,
|
||||||
FeatureFuseAddress,
|
FeatureFuseAddress,
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseCCSelect,
|
FeatureFuseCCSelect,
|
||||||
FeatureFuseLiterals,
|
FeatureFuseLiterals,
|
||||||
FeatureLSLFast,
|
FeatureLSLFast,
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive]>;
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
|
|
||||||
def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
|
def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
|
||||||
"Samsung Exynos-M4 processors",
|
"Samsung Exynos-M3 processors",
|
||||||
[HasV8_2aOps,
|
[FeatureArithmeticBccFusion,
|
||||||
FeatureArithmeticBccFusion,
|
|
||||||
FeatureArithmeticCbzFusion,
|
FeatureArithmeticCbzFusion,
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureExynosCheapAsMoveHandling,
|
FeatureExynosCheapAsMoveHandling,
|
||||||
FeatureForce32BitJumpTables,
|
FeatureForce32BitJumpTables,
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureFuseAddress,
|
FeatureFuseAddress,
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureFuseArithmeticLogic,
|
FeatureFuseArithmeticLogic,
|
||||||
FeatureFuseCCSelect,
|
FeatureFuseCCSelect,
|
||||||
FeatureFuseLiterals,
|
FeatureFuseLiterals,
|
||||||
FeatureLSLFast,
|
FeatureLSLFast,
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureZCZeroing]>;
|
FeatureZCZeroing]>;
|
||||||
|
|
||||||
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
|
def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
|
||||||
"Qualcomm Kryo processors", [
|
"Qualcomm Kryo processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing,
|
||||||
FeatureLSLFast
|
FeatureLSLFast]
|
||||||
]>;
|
>;
|
||||||
|
|
||||||
def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
|
def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
|
||||||
"Qualcomm Falkor processors", [
|
"Qualcomm Falkor processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive,
|
||||||
FeatureRDM,
|
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing,
|
||||||
FeatureLSLFast,
|
FeatureLSLFast,
|
||||||
FeatureSlowSTRQro
|
FeatureSlowSTRQro
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily",
|
def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
|
||||||
"NeoverseE1",
|
|
||||||
"Neoverse E1 ARM processors", [
|
"Neoverse E1 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeatureSSBS,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureFuseAES,
|
FeatureFuseAES
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
|
def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1",
|
||||||
"NeoverseN1",
|
|
||||||
"Neoverse N1 ARM processors", [
|
"Neoverse N1 ARM processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureDotProd,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureRCPC,
|
|
||||||
FeatureSPE,
|
|
||||||
FeatureSSBS,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureFuseAES,
|
FeatureFuseAES
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
|
def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
|
||||||
"NeoverseN2",
|
|
||||||
"Neoverse N2 ARM processors", [
|
"Neoverse N2 ARM processors", [
|
||||||
HasV8_5aOps,
|
|
||||||
FeatureBF16,
|
|
||||||
FeatureETE,
|
|
||||||
FeatureMatMulInt8,
|
|
||||||
FeatureMTE,
|
|
||||||
FeatureSVE2,
|
|
||||||
FeatureSVE2BitPerm,
|
|
||||||
FeatureTRBE,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeatureCrypto,
|
FeatureFuseAES
|
||||||
FeatureFuseAES,
|
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
|
def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
|
||||||
"NeoverseV1",
|
|
||||||
"Neoverse V1 ARM processors", [
|
"Neoverse V1 ARM processors", [
|
||||||
HasV8_4aOps,
|
|
||||||
FeatureBF16,
|
|
||||||
FeatureCacheDeepPersist,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFP16FML,
|
|
||||||
FeatureFullFP16,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureMatMulInt8,
|
FeaturePostRAScheduler]>;
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
FeatureRandGen,
|
|
||||||
FeatureSPE,
|
|
||||||
FeatureSSBS,
|
|
||||||
FeatureSVE]>;
|
|
||||||
|
|
||||||
def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
|
def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
|
||||||
"Qualcomm Saphira processors", [
|
"Qualcomm Saphira processors", [
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureNEON,
|
|
||||||
FeatureSPE,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive,
|
||||||
FeatureZCZeroing,
|
FeatureZCZeroing,
|
||||||
FeatureLSLFast,
|
FeatureLSLFast]>;
|
||||||
HasV8_4aOps]>;
|
|
||||||
|
|
||||||
def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
|
def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
|
||||||
"ThunderX2T99",
|
|
||||||
"Cavium ThunderX2 processors", [
|
"Cavium ThunderX2 processors", [
|
||||||
FeatureAggressiveFMA,
|
FeatureAggressiveFMA,
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
FeatureLSE,
|
|
||||||
HasV8_1aOps]>;
|
|
||||||
|
|
||||||
def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
|
def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
|
||||||
"ThunderX3T110",
|
"ThunderX3T110",
|
||||||
"Marvell ThunderX3 processors", [
|
"Marvell ThunderX3 processors", [
|
||||||
FeatureAggressiveFMA,
|
FeatureAggressiveFMA,
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureArithmeticBccFusion,
|
FeatureArithmeticBccFusion,
|
||||||
FeatureNEON,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive,
|
||||||
FeatureLSE,
|
|
||||||
FeaturePAuth,
|
|
||||||
FeatureBalanceFPOps,
|
FeatureBalanceFPOps,
|
||||||
FeaturePerfMon,
|
FeatureStrictAlign]>;
|
||||||
FeatureStrictAlign,
|
|
||||||
HasV8_3aOps]>;
|
|
||||||
|
|
||||||
def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
|
def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
|
||||||
"Cavium ThunderX processors", [
|
"Cavium ThunderX processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
FeatureNEON]>;
|
|
||||||
|
|
||||||
def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
|
def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
|
||||||
"ThunderXT88",
|
"ThunderXT88",
|
||||||
"Cavium ThunderX processors", [
|
"Cavium ThunderX processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
FeatureNEON]>;
|
|
||||||
|
|
||||||
def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
|
def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
|
||||||
"ThunderXT81",
|
"ThunderXT81",
|
||||||
"Cavium ThunderX processors", [
|
"Cavium ThunderX processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
FeatureNEON]>;
|
|
||||||
|
|
||||||
def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
|
def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
|
||||||
"ThunderXT83",
|
"ThunderXT83",
|
||||||
"Cavium ThunderX processors", [
|
"Cavium ThunderX processors", [
|
||||||
FeatureCRC,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
FeaturePostRAScheduler,
|
||||||
FeaturePredictableSelectIsExpensive,
|
FeaturePredictableSelectIsExpensive]>;
|
||||||
FeatureNEON]>;
|
|
||||||
|
|
||||||
def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
|
def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
|
||||||
"HiSilicon TS-V110 processors", [
|
"HiSilicon TS-V110 processors", [
|
||||||
HasV8_2aOps,
|
|
||||||
FeatureCrypto,
|
|
||||||
FeatureCustomCheapAsMoveHandling,
|
FeatureCustomCheapAsMoveHandling,
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
FeatureFuseAES,
|
||||||
FeatureNEON,
|
FeaturePostRAScheduler]>;
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
FeatureSPE,
|
def ProcessorFeatures {
|
||||||
FeatureFullFP16,
|
list<SubtargetFeature> A53 = [FeatureCRC, FeatureCrypto, FeatureFPARMv8,
|
||||||
FeatureFP16FML,
|
FeatureNEON, FeaturePerfMon];
|
||||||
FeatureDotProd]>;
|
list<SubtargetFeature> A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureRCPC, FeaturePerfMon];
|
||||||
|
list<SubtargetFeature> A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
|
||||||
|
FeatureMatMulInt8, FeatureBF16, FeatureAM,
|
||||||
|
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
|
||||||
|
FeatureFP16FML];
|
||||||
|
list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureRCPC, FeatureSSBS, FeatureRAS];
|
||||||
|
list<SubtargetFeature> A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureRCPC, FeatureSSBS];
|
||||||
|
list<SubtargetFeature> A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureRCPC];
|
||||||
|
list<SubtargetFeature> A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureRCPC, FeaturePerfMon, FeatureSPE,
|
||||||
|
FeatureSSBS];
|
||||||
|
list<SubtargetFeature> A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureFullFP16, FeatureDotProd,
|
||||||
|
FeatureFlagM, FeatureFP16FML, FeaturePAuth,
|
||||||
|
FeaturePerfMon, FeatureRCPC, FeatureSPE,
|
||||||
|
FeatureSSBS];
|
||||||
|
list<SubtargetFeature> R82 = [HasV8_0rOps];
|
||||||
|
list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureRCPC, FeaturePerfMon,
|
||||||
|
FeatureSPE, FeatureFullFP16, FeatureDotProd];
|
||||||
|
list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
|
||||||
|
FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
|
||||||
|
FeatureSVE, FeatureComplxNum];
|
||||||
|
list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto,
|
||||||
|
FeatureFullFP16];
|
||||||
|
list<SubtargetFeature> AppleA7 = [FeatureCrypto, FeatureFPARMv8, FeatureNEON,
|
||||||
|
FeaturePerfMon, FeatureAppleA7SysReg];
|
||||||
|
list<SubtargetFeature> AppleA10 = [FeatureCrypto, FeatureFPARMv8, FeatureNEON,
|
||||||
|
FeaturePerfMon, FeatureCRC, FeatureRDM,
|
||||||
|
FeaturePAN, FeatureLOR, FeatureVH];
|
||||||
|
list<SubtargetFeature> AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureFullFP16];
|
||||||
|
list<SubtargetFeature> AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureFullFP16];
|
||||||
|
list<SubtargetFeature> AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureFullFP16,
|
||||||
|
FeatureFP16FML, FeatureSHA3];
|
||||||
|
list<SubtargetFeature> AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureFRInt3264,
|
||||||
|
FeatureSpecRestrict, FeatureSSBS, FeatureSB,
|
||||||
|
FeaturePredRes, FeatureCacheDeepPersist,
|
||||||
|
FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
|
||||||
|
FeatureAltFPCmp];
|
||||||
|
list<SubtargetFeature> ExynosM3 = [FeatureCRC, FeatureCrypto, FeaturePerfMon];
|
||||||
|
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
|
||||||
|
FeatureFullFP16, FeaturePerfMon];
|
||||||
|
list<SubtargetFeature> Falkor = [FeatureCRC, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureRDM];
|
||||||
|
list<SubtargetFeature> NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
|
||||||
|
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
|
||||||
|
FeatureRCPC, FeatureSSBS];
|
||||||
|
list<SubtargetFeature> NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
|
||||||
|
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
|
||||||
|
FeatureRCPC, FeatureSPE, FeatureSSBS];
|
||||||
|
list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
|
||||||
|
FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
|
||||||
|
FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto];
|
||||||
|
list<SubtargetFeature> NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
|
||||||
|
FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
|
||||||
|
FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
|
||||||
|
FeaturePerfMon, FeatureRandGen, FeatureSPE,
|
||||||
|
FeatureSSBS, FeatureSVE];
|
||||||
|
list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeatureSPE, FeaturePerfMon];
|
||||||
|
list<SubtargetFeature> ThunderX = [FeatureCRC, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeaturePerfMon, FeatureNEON];
|
||||||
|
list<SubtargetFeature> ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto,
|
||||||
|
FeatureFPARMv8, FeatureNEON, FeatureLSE];
|
||||||
|
list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto,
|
||||||
|
FeatureFPARMv8, FeatureNEON, FeatureLSE,
|
||||||
|
FeaturePAuth, FeaturePerfMon];
|
||||||
|
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||||
|
FeatureNEON, FeaturePerfMon, FeatureSPE,
|
||||||
|
FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
|
||||||
|
|
||||||
def : ProcessorModel<"generic", CortexA55Model, [
|
|
||||||
FeatureFPARMv8,
|
|
||||||
FeatureFuseAES,
|
|
||||||
FeatureNEON,
|
|
||||||
FeaturePerfMon,
|
|
||||||
FeaturePostRAScheduler,
|
|
||||||
// ETE and TRBE are future architecture extensions. We temporarily enable them
|
// ETE and TRBE are future architecture extensions. We temporarily enable them
|
||||||
// by default for users targeting generic AArch64, until it is decided in which
|
// by default for users targeting generic AArch64. The extensions do not
|
||||||
// armv8.x-a architecture revision they will end up. The extensions do not
|
|
||||||
// affect code generated by the compiler and can be used only by explicitly
|
// affect code generated by the compiler and can be used only by explicitly
|
||||||
// mentioning the new system register names in assembly.
|
// mentioning the new system register names in assembly.
|
||||||
FeatureETE
|
list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE];
|
||||||
]>;
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
|
||||||
|
[FeatureFuseAES, FeaturePostRAScheduler]>;
|
||||||
|
def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA35]>;
|
||||||
|
def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA35]>;
|
||||||
|
def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA53]>;
|
||||||
|
def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
|
||||||
|
[TuneA55]>;
|
||||||
|
def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510,
|
||||||
|
[TuneA510]>;
|
||||||
|
def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA57]>;
|
||||||
|
def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
|
||||||
|
[TuneA65]>;
|
||||||
|
def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65,
|
||||||
|
[TuneA65]>;
|
||||||
|
def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA72]>;
|
||||||
|
def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53,
|
||||||
|
[TuneA73]>;
|
||||||
|
def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55,
|
||||||
|
[TuneA75]>;
|
||||||
|
def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76,
|
||||||
|
[TuneA76]>;
|
||||||
|
def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76,
|
||||||
|
[TuneA76]>;
|
||||||
|
def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77,
|
||||||
|
[TuneA77]>;
|
||||||
|
def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78,
|
||||||
|
[TuneA78]>;
|
||||||
|
def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C,
|
||||||
|
[TuneA78C]>;
|
||||||
|
def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
|
||||||
|
[TuneR82]>;
|
||||||
|
def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
|
||||||
|
[TuneX1]>;
|
||||||
|
def : ProcessorModel<"neoverse-e1", CortexA53Model,
|
||||||
|
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
|
||||||
|
def : ProcessorModel<"neoverse-n1", CortexA57Model,
|
||||||
|
ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
|
||||||
|
def : ProcessorModel<"neoverse-n2", CortexA57Model,
|
||||||
|
ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
|
||||||
|
def : ProcessorModel<"neoverse-v1", CortexA57Model,
|
||||||
|
ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
|
||||||
|
def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
|
||||||
|
[TuneExynosM3]>;
|
||||||
|
def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4,
|
||||||
|
[TuneExynosM4]>;
|
||||||
|
def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4,
|
||||||
|
[TuneExynosM4]>;
|
||||||
|
def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor,
|
||||||
|
[TuneFalkor]>;
|
||||||
|
def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira,
|
||||||
|
[TuneSaphira]>;
|
||||||
|
def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>;
|
||||||
|
|
||||||
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
|
|
||||||
def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
|
|
||||||
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
|
|
||||||
def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
|
|
||||||
def : ProcessorModel<"cortex-a510", CortexA55Model, [ProcA510]>;
|
|
||||||
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
|
|
||||||
def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
|
|
||||||
def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
|
|
||||||
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
|
|
||||||
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
|
|
||||||
def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
|
|
||||||
def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
|
|
||||||
def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
|
|
||||||
def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
|
|
||||||
def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
|
|
||||||
def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
|
|
||||||
def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
|
|
||||||
def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
|
|
||||||
def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
|
|
||||||
def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
|
|
||||||
def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
|
|
||||||
def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
|
|
||||||
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
|
|
||||||
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
|
|
||||||
def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
|
|
||||||
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
|
|
||||||
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
|
|
||||||
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
|
|
||||||
// Cavium ThunderX/ThunderX T8X Processors
|
// Cavium ThunderX/ThunderX T8X Processors
|
||||||
def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
|
def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX,
|
||||||
def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
|
[TuneThunderX]>;
|
||||||
def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
|
def : ProcessorModel<"thunderxt88", ThunderXT8XModel,
|
||||||
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
|
ProcessorFeatures.ThunderX, [TuneThunderXT88]>;
|
||||||
|
def : ProcessorModel<"thunderxt81", ThunderXT8XModel,
|
||||||
|
ProcessorFeatures.ThunderX, [TuneThunderXT81]>;
|
||||||
|
def : ProcessorModel<"thunderxt83", ThunderXT8XModel,
|
||||||
|
ProcessorFeatures.ThunderX, [TuneThunderXT83]>;
|
||||||
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
|
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
|
||||||
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
|
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
|
||||||
|
ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>;
|
||||||
// Marvell ThunderX3T110 Processors.
|
// Marvell ThunderX3T110 Processors.
|
||||||
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
|
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
|
||||||
def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
|
ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
|
||||||
|
def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
|
||||||
|
[TuneTSV110]>;
|
||||||
|
|
||||||
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
|
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
|
||||||
def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
|
def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
|
||||||
|
[TuneAppleA7]>;
|
||||||
|
|
||||||
// iPhone and iPad CPUs
|
// iPhone and iPad CPUs
|
||||||
def : ProcessorModel<"apple-a7", CycloneModel, [ProcAppleA7]>;
|
def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7,
|
||||||
def : ProcessorModel<"apple-a8", CycloneModel, [ProcAppleA7]>;
|
[TuneAppleA7]>;
|
||||||
def : ProcessorModel<"apple-a9", CycloneModel, [ProcAppleA7]>;
|
def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7,
|
||||||
def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
|
[TuneAppleA7]>;
|
||||||
def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
|
def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7,
|
||||||
def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
|
[TuneAppleA7]>;
|
||||||
def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
|
def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10,
|
||||||
def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
|
[TuneAppleA10]>;
|
||||||
|
def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11,
|
||||||
|
[TuneAppleA11]>;
|
||||||
|
def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12,
|
||||||
|
[TuneAppleA12]>;
|
||||||
|
def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
|
||||||
|
[TuneAppleA13]>;
|
||||||
|
def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
|
||||||
|
[TuneAppleA14]>;
|
||||||
|
|
||||||
// Mac CPUs
|
// Mac CPUs
|
||||||
def : ProcessorModel<"apple-m1", CycloneModel, [ProcAppleA14]>;
|
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
|
||||||
|
[TuneAppleA14]>;
|
||||||
|
|
||||||
// watch CPUs.
|
// watch CPUs.
|
||||||
def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
|
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
|
||||||
def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
|
[TuneAppleA12]>;
|
||||||
|
def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
|
||||||
|
[TuneAppleA12]>;
|
||||||
|
|
||||||
// Alias for the latest Apple processor model supported by LLVM.
|
// Alias for the latest Apple processor model supported by LLVM.
|
||||||
def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA14]>;
|
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
|
||||||
|
[TuneAppleA14]>;
|
||||||
|
|
||||||
// Fujitsu A64FX
|
// Fujitsu A64FX
|
||||||
def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
|
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
|
||||||
|
[TuneA64FX]>;
|
||||||
|
|
||||||
// Nvidia Carmel
|
// Nvidia Carmel
|
||||||
def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
|
def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
|
||||||
|
[TuneCarmel]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Assembly parser
|
// Assembly parser
|
||||||
|
|
|
@ -197,6 +197,9 @@ protected:
|
||||||
bool HasSMEI64 = false;
|
bool HasSMEI64 = false;
|
||||||
bool HasStreamingSVE = false;
|
bool HasStreamingSVE = false;
|
||||||
|
|
||||||
|
// AppleA7 system register.
|
||||||
|
bool HasAppleA7SysReg = false;
|
||||||
|
|
||||||
// Future architecture extensions.
|
// Future architecture extensions.
|
||||||
bool HasETE = false;
|
bool HasETE = false;
|
||||||
bool HasTRBE = false;
|
bool HasTRBE = false;
|
||||||
|
|
|
@ -1635,7 +1635,7 @@ def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
|
||||||
|
|
||||||
// Cyclone specific system registers
|
// Cyclone specific system registers
|
||||||
// Op0 Op1 CRn CRm Op2
|
// Op0 Op1 CRn CRm Op2
|
||||||
let Requires = [{ {AArch64::ProcAppleA7} }] in
|
let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
|
||||||
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
|
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
|
||||||
|
|
||||||
// Scalable Matrix Extension (SME)
|
// Scalable Matrix Extension (SME)
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s
|
||||||
|
|
||||||
|
@var_float = dso_local global float 0.0
|
||||||
|
@var_double = dso_local global double 0.0
|
||||||
|
@var_double2 = dso_local global <2 x double> <double 0.0, double 0.0>
|
||||||
|
|
||||||
|
define dso_local void @ldst_double() {
|
||||||
|
%valf = load volatile float, float* @var_float
|
||||||
|
%vale = fpext float %valf to double
|
||||||
|
%vald = load volatile double, double* @var_double
|
||||||
|
%vald1 = insertelement <2 x double> undef, double %vald, i32 0
|
||||||
|
%vald2 = insertelement <2 x double> %vald1, double %vale, i32 1
|
||||||
|
store volatile <2 x double> %vald2, <2 x double>* @var_double2
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: ldst_double:
|
||||||
|
; CHECK: adrp [[RD:x[0-9]+]], var_double
|
||||||
|
; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
|
||||||
|
; CHECK: adrp [[RQ:x[0-9]+]], var_double2
|
||||||
|
; CHECK-NEXT: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
|
||||||
|
}
|
||||||
|
|
||||||
|
define dso_local void @ldst_double_tune_a53() #0 {
|
||||||
|
%valf = load volatile float, float* @var_float
|
||||||
|
%vale = fpext float %valf to double
|
||||||
|
%vald = load volatile double, double* @var_double
|
||||||
|
%vald1 = insertelement <2 x double> undef, double %vald, i32 0
|
||||||
|
%vald2 = insertelement <2 x double> %vald1, double %vale, i32 1
|
||||||
|
store volatile <2 x double> %vald2, <2 x double>* @var_double2
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: ldst_double_tune_a53:
|
||||||
|
; CHECK: adrp [[RD:x[0-9]+]], var_double
|
||||||
|
; CHECK-NEXT: ldr {{d[0-9]+}}, {{\[}}[[RD]], {{#?}}:lo12:var_double{{\]}}
|
||||||
|
; CHECK-NEXT: adrp [[RQ:x[0-9]+]], var_double2
|
||||||
|
; CHECK: fcvt
|
||||||
|
; CHECK: str {{q[0-9]+}}, {{\[}}[[RQ]], {{#?}}:lo12:var_double2{{\]}}
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "tune-cpu"="cortex-a53" }
|
Loading…
Reference in New Issue