AArch64: add support for newer Apple CPUs

They're roughly ARMv8.6. This works in the .td file, but in
AArch64TargetParser.def, marking them v8.6 brings in support for the SM4
cryptographic hash and we don't actually have that. So TargetParser side
they're marked as v8.5, with the extra features (BF16 and I8MM added manually).

Finally, A16 supports the HCX extension in addition to v8.6. This has no
TargetParser implications.
This commit is contained in:
Tim Northover 2022-09-20 15:03:26 +01:00
parent e030be64d8
commit 677da09d02
6 changed files with 90 additions and 5 deletions

View File

@ -5,11 +5,11 @@
// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
// AARCH64: error: unknown target CPU 'not-a-cpu'
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'

View File

@ -253,8 +253,17 @@ AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-a15", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-a16", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-m1", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-m2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,

View File

@ -857,6 +857,38 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureZCRegMove,
FeatureZCZeroing]>;
def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
"Apple A15", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCZeroing
]>;
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
"Apple A16", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCZeroing
]>;
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureExynosCheapAsMoveHandling,
@ -1072,6 +1104,13 @@ def ProcessorFeatures {
FeaturePredRes, FeatureCacheDeepPersist,
FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
FeatureAltFPCmp];
list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML];
list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@ -1229,10 +1268,16 @@ def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
[TuneAppleA13]>;
def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
@ -1241,8 +1286,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
[TuneAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
// Fujitsu A64FX
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,

View File

@ -160,6 +160,8 @@ void AArch64Subtarget::initializeProperties() {
case AppleA12:
case AppleA13:
case AppleA14:
case AppleA15:
case AppleA16:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;

View File

@ -47,6 +47,8 @@ public:
AppleA12,
AppleA13,
AppleA14,
AppleA15,
AppleA16,
Carmel,
CortexA35,
CortexA53,

View File

@ -1112,6 +1112,24 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
"8.5-A"),
ARMCPUTestParams("apple-a15", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-a16", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-m1", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
@ -1120,6 +1138,15 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
"8.5-A"),
ARMCPUTestParams("apple-m2", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
@ -1257,7 +1284,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_LSE | AArch64::AEK_RDM,
"8.2-A")));
static constexpr unsigned NumAArch64CPUArchs = 54;
static constexpr unsigned NumAArch64CPUArchs = 57;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;