From bd2cf96c098ed0020f5178eda12cf30d3980d9bd Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 24 Oct 2020 18:00:20 +0200 Subject: [PATCH] [X86] Add a stub for znver3 based on the little public information there is in AMD's manuals No scheduling, no autodetection. Just enough so -march=znver3 works. --- clang/lib/Basic/Targets/X86.cpp | 4 + clang/test/Misc/target-invalid-cpu-note.c | 8 +- .../Preprocessor/predefined-arch-macros.c | 104 ++++++++++++++++++ llvm/include/llvm/Support/X86TargetParser.h | 1 + llvm/lib/Support/X86TargetParser.cpp | 4 + llvm/lib/Target/X86/X86.td | 9 ++ llvm/test/CodeGen/X86/cpus-amd.ll | 1 + 7 files changed, 127 insertions(+), 4 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index c8d96f887e90..3239e3231cc0 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -542,6 +542,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_ZNVER2: defineCPUMacros(Builder, "znver2"); break; + case CK_ZNVER3: + defineCPUMacros(Builder, "znver3"); + break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; @@ -1325,6 +1328,7 @@ Optional X86TargetInfo::getCPUCacheLineSize() const { // Zen case CK_ZNVER1: case CK_ZNVER2: + case CK_ZNVER3: // Deprecated case CK_x86_64: case CK_x86_64_v2: diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index a2d20c31caa4..82165de6f079 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -24,7 +24,7 @@ // X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, lakemont, k6, k6-2, k6-3, // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, -// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, // X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 @@ -35,7 +35,7 @@ // X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, // X86_64-SAME: icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, -// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, // X86_64-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 @@ -49,7 +49,7 @@ // TUNE_X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, lakemont, k6, k6-2, k6-3, // TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, -// TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, // TUNE_X86-SAME: x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 @@ -63,7 +63,7 @@ // TUNE_X86_64-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, lakemont, k6, k6-2, k6-3, // TUNE_X86_64-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, // TUNE_X86_64-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, -// TUNE_X86_64-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, +// TUNE_X86_64-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, // TUNE_X86_64-SAME: x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index f8ee187b15f3..65089e8f97c7 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -3154,6 +3154,110 @@ // CHECK_ZNVER2_M64: #define __znver2 1 // CHECK_ZNVER2_M64: #define __znver2__ 1 +// RUN: %clang -march=znver3 -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M32 +// CHECK_ZNVER3_M32-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER3_M32-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER3_M32: #define __ADX__ 1 +// CHECK_ZNVER3_M32: #define __AES__ 1 +// CHECK_ZNVER3_M32: #define __AVX2__ 1 +// CHECK_ZNVER3_M32: #define __AVX__ 1 +// CHECK_ZNVER3_M32: #define __BMI2__ 1 +// CHECK_ZNVER3_M32: #define __BMI__ 1 +// CHECK_ZNVER3_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER3_M32: #define __CLWB__ 1 +// CHECK_ZNVER3_M32: #define __CLZERO__ 1 +// CHECK_ZNVER3_M32: #define __F16C__ 1 +// CHECK_ZNVER3_M32-NOT: #define __FMA4__ 1 +// CHECK_ZNVER3_M32: #define __FMA__ 1 +// CHECK_ZNVER3_M32: #define __FSGSBASE__ 1 +// CHECK_ZNVER3_M32: #define __LZCNT__ 1 +// CHECK_ZNVER3_M32: #define __MMX__ 1 +// CHECK_ZNVER3_M32: #define __PCLMUL__ 1 +// CHECK_ZNVER3_M32: #define __PKU__ 1 +// CHECK_ZNVER3_M32: #define __POPCNT__ 1 +// CHECK_ZNVER3_M32: #define __PRFCHW__ 1 +// CHECK_ZNVER3_M32: #define __RDPID__ 1 +// CHECK_ZNVER3_M32: #define __RDRND__ 1 +// CHECK_ZNVER3_M32: #define __RDSEED__ 1 +// CHECK_ZNVER3_M32: #define __SHA__ 1 +// CHECK_ZNVER3_M32: #define __SSE2_MATH__ 1 +// CHECK_ZNVER3_M32: #define __SSE2__ 1 +// CHECK_ZNVER3_M32: #define __SSE3__ 1 +// CHECK_ZNVER3_M32: #define __SSE4A__ 1 +// CHECK_ZNVER3_M32: #define __SSE4_1__ 1 +// CHECK_ZNVER3_M32: #define __SSE4_2__ 1 +// CHECK_ZNVER3_M32: #define __SSE_MATH__ 1 +// CHECK_ZNVER3_M32: #define __SSE__ 1 +// CHECK_ZNVER3_M32: #define __SSSE3__ 1 +// CHECK_ZNVER3_M32-NOT: #define __TBM__ 1 +// CHECK_ZNVER3_M32: #define __WBNOINVD__ 1 +// CHECK_ZNVER3_M32-NOT: #define __XOP__ 1 +// CHECK_ZNVER3_M32: #define __XSAVEC__ 1 +// CHECK_ZNVER3_M32: #define __XSAVEOPT__ 1 +// CHECK_ZNVER3_M32: #define __XSAVES__ 1 +// CHECK_ZNVER3_M32: #define __XSAVE__ 1 +// CHECK_ZNVER3_M32: #define __i386 1 +// CHECK_ZNVER3_M32: #define __i386__ 1 +// CHECK_ZNVER3_M32: #define __tune_znver3__ 1 +// CHECK_ZNVER3_M32: #define __znver3 1 +// CHECK_ZNVER3_M32: #define __znver3__ 1 + +// RUN: %clang -march=znver3 -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M64 +// CHECK_ZNVER3_M64-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER3_M64-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER3_M64: #define __ADX__ 1 +// CHECK_ZNVER3_M64: #define __AES__ 1 +// CHECK_ZNVER3_M64: #define __AVX2__ 1 +// CHECK_ZNVER3_M64: #define __AVX__ 1 +// CHECK_ZNVER3_M64: #define __BMI2__ 1 +// CHECK_ZNVER3_M64: #define __BMI__ 1 +// CHECK_ZNVER3_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER3_M64: #define __CLWB__ 1 +// CHECK_ZNVER3_M64: #define __CLZERO__ 1 +// CHECK_ZNVER3_M64: #define __F16C__ 1 +// CHECK_ZNVER3_M64-NOT: #define __FMA4__ 1 +// CHECK_ZNVER3_M64: #define __FMA__ 1 +// CHECK_ZNVER3_M64: #define __FSGSBASE__ 1 +// CHECK_ZNVER3_M64: #define __LZCNT__ 1 +// CHECK_ZNVER3_M64: #define __MMX__ 1 +// CHECK_ZNVER3_M64: #define __PCLMUL__ 1 +// CHECK_ZNVER3_M64: #define __PKU__ 1 +// CHECK_ZNVER3_M64: #define __POPCNT__ 1 +// CHECK_ZNVER3_M64: #define __PRFCHW__ 1 +// CHECK_ZNVER3_M64: #define __RDPID__ 1 +// CHECK_ZNVER3_M64: #define __RDRND__ 1 +// CHECK_ZNVER3_M64: #define __RDSEED__ 1 +// CHECK_ZNVER3_M64: #define __SHA__ 1 +// CHECK_ZNVER3_M64: #define __SSE2_MATH__ 1 +// CHECK_ZNVER3_M64: #define __SSE2__ 1 +// CHECK_ZNVER3_M64: #define __SSE3__ 1 +// CHECK_ZNVER3_M64: #define __SSE4A__ 1 +// CHECK_ZNVER3_M64: #define __SSE4_1__ 1 +// CHECK_ZNVER3_M64: #define __SSE4_2__ 1 +// CHECK_ZNVER3_M64: #define __SSE_MATH__ 1 +// CHECK_ZNVER3_M64: #define __SSE__ 1 +// CHECK_ZNVER3_M64: #define __SSSE3__ 1 +// CHECK_ZNVER3_M64-NOT: #define __TBM__ 1 +// CHECK_ZNVER3_M64: #define __VAES__ 1 +// CHECK_ZNVER3_M64: #define __VPCLMULQDQ__ 1 +// CHECK_ZNVER3_M64: #define __WBNOINVD__ 1 +// CHECK_ZNVER3_M64-NOT: #define __XOP__ 1 +// CHECK_ZNVER3_M64: #define __XSAVEC__ 1 +// CHECK_ZNVER3_M64: #define __XSAVEOPT__ 1 +// CHECK_ZNVER3_M64: #define __XSAVES__ 1 +// CHECK_ZNVER3_M64: #define __XSAVE__ 1 +// CHECK_ZNVER3_M64: #define __amd64 1 +// CHECK_ZNVER3_M64: #define __amd64__ 1 +// CHECK_ZNVER3_M64: #define __tune_znver3__ 1 +// CHECK_ZNVER3_M64: #define __x86_64 1 +// CHECK_ZNVER3_M64: #define __x86_64__ 1 +// CHECK_ZNVER3_M64: #define __znver3 1 +// CHECK_ZNVER3_M64: #define __znver3__ 1 + // End X86/GCC/Linux tests ------------------ // Begin PPC/GCC/Linux tests ---------------- diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index e66a074aa000..9a2599bd88b4 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -120,6 +120,7 @@ enum CPUKind { CK_BDVER4, CK_ZNVER1, CK_ZNVER2, + CK_ZNVER3, CK_x86_64, CK_x86_64_v2, CK_x86_64_v3, diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 7b1e0c0bc685..0a803d0a6a3c 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -279,6 +279,9 @@ constexpr FeatureBitset FeaturesZNVER1 = FeatureXSAVEOPT | FeatureXSAVES; constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD; +static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 | + FeatureINVPCID | FeaturePKU | + FeatureVAES | FeatureVPCLMULQDQ; constexpr ProcInfo Processors[] = { // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility. @@ -391,6 +394,7 @@ constexpr ProcInfo Processors[] = { // Zen architecture processors. { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 }, { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 }, + { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 }, // Generic 64-bit processor. { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 }, { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 }, diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3c5dd7300552..f39fbf1fca77 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1056,6 +1056,13 @@ def ProcessorFeatures { list ZN2Tuning = ZNTuning; list ZN2Features = !listconcat(ZNFeatures, ZN2AdditionalFeatures); + list ZN3AdditionalFeatures = [FeatureINVPCID, + FeaturePKU, + FeatureVAES, + FeatureVPCLMULQDQ]; + list ZN3Tuning = ZNTuning; + list ZN3Features = + !listconcat(ZN2Features, ZN3AdditionalFeatures); } //===----------------------------------------------------------------------===// @@ -1340,6 +1347,8 @@ def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures, ProcessorFeatures.ZNTuning>; def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features, ProcessorFeatures.ZN2Tuning>; +def : ProcModel<"znver3", Znver2Model, ProcessorFeatures.ZN3Features, + ProcessorFeatures.ZN3Tuning>; def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA], [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>; diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll index c5716d68e636..ce90d6b969ac 100644 --- a/llvm/test/CodeGen/X86/cpus-amd.ll +++ b/llvm/test/CodeGen/X86/cpus-amd.ll @@ -27,6 +27,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void