[PATCH] [ARM] ARMv8.6-a command-line + BFloat16 Asm Support

Summary:
This patch introduces command-line support for the Armv8.6-a architecture and assembly support for BFloat16. Details can be found
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

in addition to the GCC patch for the 8..6-a CLI:
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-11/msg02647.html

In detail this patch

- march options for armv8.6-a
- BFloat16 assembly

This is part of a patch series, starting with command-line and Bfloat16
assembly support. The subsequent patches will upstream intrinsics
support for BFloat16, followed by Matrix Multiplication and the
remaining Virtualization features of the armv8.6-a architecture.

Based on work by:
- labrinea
- MarkMurrayARM
- Luke Cheeseman
- Javed Asbar
- Mikhail Maltsev
- Luke Geeson

Reviewers: SjoerdMeijer, craig.topper, rjmccall, jfb, LukeGeeson

Reviewed By: SjoerdMeijer

Subscribers: stuij, kristof.beyls, hiraditya, dexonsmith, danielkiss, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D76062
This commit is contained in:
Ties Stuij 2020-03-26 08:17:29 +00:00
parent 6a946993d5
commit 71ae267d1f
50 changed files with 1650 additions and 16 deletions

View File

@ -151,6 +151,7 @@ void AArch64TargetInfo::fillValidCPUList(
void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// FIXME: Armv8.1 makes __ARM_FEATURE_CRC32 mandatory. Handle it here.
Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
}
@ -171,17 +172,26 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Also include the Armv8.3 defines
// FIXME: Armv8.4 makes some extensions mandatory. Handle them here.
// FIXME: Armv8.4 makes __ARM_FEATURE_ATOMICS, defined in GCC, mandatory.
// Add and handle it here.
getTargetDefinesARMV83A(Opts, Builder);
}
void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Also include the Armv8.4 defines
// FIXME: Armv8.5 makes some extensions mandatory. Handle them here.
getTargetDefinesARMV84A(Opts, Builder);
}
void AArch64TargetInfo::getTargetDefinesARMV86A(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Also include the Armv8.5 defines
// FIXME: Armv8.6 makes the following extensions mandatory:
// - __ARM_FEATURE_BF16
// - __ARM_FEATURE_MATMUL_INT8
// Handle them here.
getTargetDefinesARMV85A(Opts, Builder);
}
void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
@ -290,6 +300,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
case llvm::AArch64::ArchKind::ARMV8_5A:
getTargetDefinesARMV85A(Opts, Builder);
break;
case llvm::AArch64::ArchKind::ARMV8_6A:
getTargetDefinesARMV86A(Opts, Builder);
break;
}
// All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
@ -344,6 +357,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
ArchKind = llvm::AArch64::ArchKind::ARMV8_4A;
if (Feature == "+v8.5a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
if (Feature == "+v8.6a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
if (Feature == "+fullfp16")
HasFullFP16 = true;
if (Feature == "+dotprod")

View File

@ -70,6 +70,8 @@ public:
MacroBuilder &Builder) const;
void getTargetDefinesARMV85A(const LangOptions &Opts,
MacroBuilder &Builder) const;
void getTargetDefinesARMV86A(const LangOptions &Opts,
MacroBuilder &Builder) const;
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;

View File

@ -201,6 +201,8 @@ StringRef ARMTargetInfo::getCPUAttr() const {
return "8_4A";
case llvm::ARM::ArchKind::ARMV8_5A:
return "8_5A";
case llvm::ARM::ArchKind::ARMV8_6A:
return "8_6A";
case llvm::ARM::ArchKind::ARMV8MBaseline:
return "8M_BASE";
case llvm::ARM::ArchKind::ARMV8MMainline:
@ -830,6 +832,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
case llvm::ARM::ArchKind::ARMV8_3A:
case llvm::ARM::ArchKind::ARMV8_4A:
case llvm::ARM::ArchKind::ARMV8_5A:
case llvm::ARM::ArchKind::ARMV8_6A:
getTargetDefinesARMV83A(Opts, Builder);
break;
}

View File

@ -603,6 +603,39 @@
// RUN: %clang -target aarch64 -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-FP16 %s
// GENERICV85A-FP16: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.5a" "-target-feature" "+fullfp16"
// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// RUN: %clang -target aarch64 -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
// GENERICV86A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
// RUN: %clang -target aarch64_be -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// RUN: %clang -target aarch64_be -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
// GENERICV86A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
// The SVE extension is an optional extension for Armv8-A.
// RUN: %clang -target aarch64 -march=armv8a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
// GENERICV8A-SVE: "-target-feature" "+sve"
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
// GENERICV8A-NOSVE-NOT: "-target-feature" "+sve"
// The BFloat16 extension is a mandatory component of the Armv8.6-A extensions, but is permitted as an
// optional feature for any implementation of Armv8.2-A to Armv8.5-A (inclusive)
// RUN: %clang -target aarch64 -march=armv8.5a+bf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16 %s
// GENERICV85A-BF16: "-target-feature" "+bf16"
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+nobf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16-NO-BF16 %s
// GENERICV85A-BF16-NO-BF16: "-target-feature" "-bf16"
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s
// GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve"
// fullfp16 is off by default for v8a, feature must not be mentioned
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
// RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s

View File

@ -335,6 +335,23 @@
// RUN: %clang -target arm -march=armebv8.5-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V85A %s
// CHECK-BE-V85A: "-cc1"{{.*}} "-triple" "armebv8.5{{.*}}" "-target-cpu" "generic"
// RUN: %clang -target armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target arm -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target arm -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// RUN: %clang -target arm -mlittle-endian -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
// CHECK-V86A: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic"
// RUN: %clang -target armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// RUN: %clang -target armv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// RUN: %clang -target armeb -march=armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// RUN: %clang -target armeb -march=armebv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// RUN: %clang -target arm -march=armebv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// RUN: %clang -target arm -march=armebv8.6-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
// CHECK-BE-V86A: "-cc1"{{.*}} "-triple" "armebv8.6{{.*}}" "-target-cpu" "generic"
// Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it
// on and off. Cortex-A53 is a placeholder for now.
// RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s
@ -432,6 +449,9 @@
// RUN: %clang -target armv8a-linux-eabi -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V85A-FP16 %s
// CHECK-V85A-FP16: "-cc1"{{.*}} "-triple" "armv8.5{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+fullfp16"
// RUN: %clang -target armv8a-linux-eabi -march=armv8.6-a+bf16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V86A-BF16 %s
// CHECK-V86A-BF16: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+bf16"
// RUN: %clang -target arm -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
// RUN: %clang -target arm -march=armv8.2-a+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
// RUN: %clang -target arm -march=armv8.2-a+fp16+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s

View File

@ -841,5 +841,10 @@
// CHECK-V85A: #define __ARM_ARCH_8_5A__ 1
// CHECK-V85A: #define __ARM_ARCH_PROFILE 'A'
// RUN: %clang -target armv8.6a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V86A %s
// CHECK-V86A: #define __ARM_ARCH 8
// CHECK-V86A: #define __ARM_ARCH_8_6A__ 1
// CHECK-V86A: #define __ARM_ARCH_PROFILE 'A'
// RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s
// CHECK-SOFTVFP-NOT: #define __ARM_FP 0x

View File

@ -101,6 +101,7 @@ public:
enum SubArchType {
NoSubArch,
ARMSubArch_v8_6a,
ARMSubArch_v8_5a,
ARMSubArch_v8_4a,
ARMSubArch_v8_3a,

View File

@ -44,6 +44,13 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
(AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
(AArch64::AEK_CRC | AArch64::AEK_FP |
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
#undef AARCH64_ARCH
#ifndef AARCH64_ARCH_EXT_NAME
@ -79,6 +86,8 @@ AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte"
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
#undef AARCH64_ARCH_EXT_NAME

View File

@ -55,6 +55,8 @@ enum ArchExtKind : unsigned {
AEK_SVE2SHA3 = 1 << 26,
AEK_SVE2BITPERM = 1 << 27,
AEK_TME = 1 << 28,
AEK_BF16 = 1 << 29,
AEK_I8MM = 1 << 30,
};
enum class ArchKind {

View File

@ -112,6 +112,11 @@ ARM_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD))
ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@ -164,6 +169,7 @@ ARM_ARCH_EXT_NAME("iwmmxt2", ARM::AEK_IWMMXT2, nullptr, nullptr)
ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr)
ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16")
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0")

View File

@ -46,14 +46,15 @@ enum ArchExtKind : uint64_t {
AEK_SB = 1 << 17,
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
AEK_CDECP0 = 1 << 20,
AEK_CDECP1 = 1 << 21,
AEK_CDECP2 = 1 << 22,
AEK_CDECP3 = 1 << 23,
AEK_CDECP4 = 1 << 24,
AEK_CDECP5 = 1 << 25,
AEK_CDECP6 = 1 << 26,
AEK_CDECP7 = 1 << 27,
AEK_BF16 = 1 << 20,
AEK_CDECP0 = 1 << 21,
AEK_CDECP1 = 1 << 22,
AEK_CDECP2 = 1 << 23,
AEK_CDECP3 = 1 << 24,
AEK_CDECP4 = 1 << 25,
AEK_CDECP5 = 1 << 26,
AEK_CDECP6 = 1 << 27,
AEK_CDECP7 = 1 << 28,
// Unsupported extensions.
AEK_OS = 1ULL << 59,

View File

@ -116,6 +116,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
Features.push_back("+v8.4a");
if (AK == ArchKind::ARMV8_5A)
Features.push_back("+v8.5a");
if (AK == AArch64::ArchKind::ARMV8_6A)
Features.push_back("+v8.6a");
return AK != ArchKind::INVALID;
}

View File

@ -74,6 +74,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV8_3A:
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
case ArchKind::ARMV8R:
case ArchKind::ARMV8MBaseline:
case ArchKind::ARMV8MMainline:
@ -108,6 +109,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ArchKind::ARMV8_3A:
case ArchKind::ARMV8_4A:
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
return ProfileKind::A;
case ArchKind::ARMV2:
case ArchKind::ARMV2A:
@ -150,6 +152,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.3a", "v8.3-a")
.Case("v8.4a", "v8.4-a")
.Case("v8.5a", "v8.5-a")
.Case("v8.6a", "v8.6-a")
.Case("v8r", "v8-r")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")

View File

@ -627,6 +627,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8_4a;
case ARM::ArchKind::ARMV8_5A:
return Triple::ARMSubArch_v8_5a;
case ARM::ArchKind::ARMV8_6A:
return Triple::ARMSubArch_v8_6a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:

View File

@ -365,6 +365,9 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
"true", "Enable BFloat16 Extension" >;
//===----------------------------------------------------------------------===//
// Architectures.
//
@ -391,8 +394,11 @@ def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
FeatureBranchTargetId]
>;
FeatureBranchTargetId]>;
def HasV8_6aOps : SubtargetFeature<
"v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
[HasV8_5aOps, FeatureBF16]>;
//===----------------------------------------------------------------------===//
// Register File Description

View File

@ -7786,6 +7786,110 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
//----------------------------------------------------------------------------
// Armv8.6 BFloat16 Extension
//----------------------------------------------------------------------------
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in {
class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
string kind2, RegisterOperand RegType,
ValueType AccumType, ValueType InputType>
: BaseSIMDThreeSameVectorTied<Q, U, 0b010, 0b11111, RegType, asm, kind1, []> {
let AsmString = !strconcat(asm,
"{\t$Rd" # kind1 # ", $Rn" # kind2 #
", $Rm" # kind2 # "}");
}
multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
def v4f16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
v2f32, v8i8>;
def v8f16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
v4f32, v16i8>;
}
class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
string dst_kind, string lhs_kind,
string rhs_kind,
RegisterOperand RegType,
ValueType AccumType,
ValueType InputType>
: BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111,
RegType, RegType, V128, VectorIndexS,
asm, "", dst_kind, lhs_kind, rhs_kind,
[]> {
bits<2> idx;
let Inst{21} = idx{0}; // L
let Inst{11} = idx{1}; // H
}
multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
def v4f16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
".2h", V64, v2f32, v8i8>;
def v8f16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
".2h", V128, v4f32, v16i8>;
}
class SIMDBF16MLAL<bit Q, string asm>
: BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
[]> { // TODO: Add intrinsics
let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
}
class SIMDBF16MLALIndex<bit Q, string asm>
: I<(outs V128:$dst),
(ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm,
"{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
[]>, // TODO: Add intrinsics
Sched<[WriteV]> {
bits<5> Rd;
bits<5> Rn;
bits<4> Rm;
bits<3> idx;
let Inst{31} = 0;
let Inst{30} = Q;
let Inst{29-22} = 0b00111111;
let Inst{21-20} = idx{1-0};
let Inst{19-16} = Rm;
let Inst{15-12} = 0b1111;
let Inst{11} = idx{2}; // H
let Inst{10} = 0;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}
class SIMDThreeSameVectorBF16MatrixMul<string asm>
: BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101,
V128, asm, ".4s",
[]> {
let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
", $Rm", ".8h", "}");
}
class SIMD_BFCVTN
: BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128,
"bfcvtn", ".4h", ".4s",
[]>;
class SIMD_BFCVTN2
: BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128,
"bfcvtn2", ".8h", ".4s",
[]>;
class BF16ToSinglePrecision<string asm>
: I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", []>,
Sched<[WriteFCvt]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-10} = 0b0001111001100011010000;
let Inst{9-5} = Rn;
let Inst{4-0} = Rd;
}
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
// ARMv8.2-A Dot Product Instructions (Indexed)
class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
string lhs_kind, string rhs_kind,

View File

@ -23,6 +23,8 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
@ -142,6 +144,8 @@ def HasETE : Predicate<"Subtarget->hasETE()">,
AssemblerPredicate<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
AssemblerPredicate<(all_of FeatureBF16), "bf16">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@ -746,6 +750,20 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
}
// ARMv8.6-A BFloat
let Predicates = [HasBF16] in {
defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
def BFMLALB : SIMDBF16MLAL<0, "bfmlalb">;
def BFMLALT : SIMDBF16MLAL<1, "bfmlalt">;
def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb">;
def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt">;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
}
// ARMv8.2-A FP16 Fused Multiply-Add Long
let Predicates = [HasNEON, HasFP16FML] in {
defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;

View File

@ -1197,6 +1197,18 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
let Predicates = [HasBF16, HasSVE] in {
def BFDOT_ZZZ : sve_bfloat_dot<"bfdot">;
def BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot">;
def BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla">;
def BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb">;
def BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt">;
def BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb">;
def BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt">;
def BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt">;
def BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt">;
}
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;

View File

@ -76,6 +76,7 @@ protected:
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
@ -144,6 +145,9 @@ protected:
bool HasMTE = false;
bool HasTME = false;
// Armv8.6-A Extensions
bool HasBF16 = false;
// Arm SVE2 extensions
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
@ -403,6 +407,9 @@ public:
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
// Armv8.6-A Extensions
bool hasBF16() const { return HasBF16; }
bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }

View File

@ -2859,6 +2859,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.4a";
else if (FBS[AArch64::HasV8_5aOps])
Str += "ARMv8.5a";
else if (FBS[AArch64::HasV8_6aOps])
Str += "ARMv8.6a";
else {
auto ext = std::find_if(std::begin(ExtensionMap),
std::end(ExtensionMap),
@ -5094,6 +5096,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
RequestedExtensions.push_back("sha2");
@ -5113,6 +5116,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
break;
case AArch64::ArchKind::ARMV8_4A:
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");

View File

@ -7394,6 +7394,96 @@ multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
// SVE BFloat16 Group
//===----------------------------------------------------------------------===//
class sve_bfloat_dot_base<bits<2> opc, string asm, string ops, dag iops>
: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> {
bits<5> Zda;
bits<5> Zn;
let Inst{31-21} = 0b01100100011;
let Inst{15-14} = opc;
let Inst{13-10} = 0b0000;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeH;
}
class sve_bfloat_dot<string asm>
: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm",
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> {
bits<5> Zm;
let Inst{20-16} = Zm;
}
class sve_bfloat_dot_indexed<string asm>
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
bits<2> iop;
bits<3> Zm;
let Inst{20-19} = iop;
let Inst{18-16} = Zm;
}
class sve_bfloat_matmul<string asm>
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zm;
bits<5> Zda;
bits<5> Zn;
let Inst{31-21} = 0b01100100011;
let Inst{20-16} = Zm;
let Inst{15-10} = 0b111001;
let Inst{9-5} = Zn;
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeH;
}
class sve_bfloat_matmul_longvecl<bit BT, string asm>
: sve_bfloat_matmul<asm> {
let Inst{23} = 0b1;
let Inst{14-13} = 0b00;
let Inst{10} = BT;
}
class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> {
bits<3> iop;
bits<3> Zm;
let Inst{23} = 0b1;
let Inst{20-19} = iop{2-1};
let Inst{18-16} = Zm;
let Inst{11} = iop{0};
let Inst{10} = BT;
}
class sve_bfloat_convert<bit N, string asm>
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
bits<5> Zd;
bits<3> Pg;
bits<5> Zn;
let Inst{31-25} = 0b0110010;
let Inst{24} = N;
let Inst{23-13} = 0b10001010101;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let hasSideEffects = 1;
let ElementSize = ElementSizeS;
}
/// Addressing modes
def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;

View File

@ -424,6 +424,10 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
// Armv8.6-A extensions
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
"Enable support for BFloat16 instructions", [FeatureNEON]>;
// Armv8.1-M extensions
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
@ -523,6 +527,10 @@ def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
"Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureSB]>;
def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
"Support ARM v8.6a instructions",
[HasV8_5aOps, FeatureBF16]>;
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@ -797,6 +805,19 @@ def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
FeatureAClass,
FeatureDB,
FeatureFPARMv8,
FeatureNEON,
FeatureDSP,
FeatureTrustZone,
FeatureMP,
FeatureVirtualization,
FeatureCrypto,
FeatureCRC,
FeatureRAS,
FeatureDotProd]>;
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,

View File

@ -8926,3 +8926,93 @@ def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
(VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
// ARMv8.6a BFloat16 instructions.
let Predicates = [HasBF16, HasNEON] in {
class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
dag oops, dag iops>
: N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
N3RegFrm, IIC_VDOTPROD, "", "", []> {
let hasNoSchedulingInfo = 1;
let DecoderNamespace = "VFPV8";
}
class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
: BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
(ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm)> {
let Constraints = "$dst = $Vd";
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
let DecoderNamespace = "VFPV8";
}
multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
ValueType InputTy, dag RHS> {
def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
(ins RegTy:$Vd, RegTy:$Vn,
DPR_VFP2:$Vm, VectorIndex32:$lane)> {
bit lane;
let Inst{5} = lane;
let Constraints = "$dst = $Vd";
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
let DecoderNamespace = "VFPV8";
}
}
def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>;
def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>;
defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>;
defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
class BF16MM<bit Q, RegisterClass RegTy,
string opc>
: N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
(outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
N3RegFrm, IIC_VDOTPROD, "", "", []> {
let Constraints = "$dst = $Vd";
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
let DecoderNamespace = "VFPV8";
let hasNoSchedulingInfo = 1;
}
def VMMLA : BF16MM<1, QPR, "vmmla">;
class VBF16MALQ<bit T, string suffix>
: N3VCP8<0b00, 0b11, T, 1,
(outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
[]> { // TODO: Add intrinsics
let Constraints = "$dst = $Vd";
let DecoderNamespace = "VFPV8";
let hasNoSchedulingInfo = 1;
}
def VBF16MALTQ: VBF16MALQ<1, "t">;
def VBF16MALBQ: VBF16MALQ<0, "b">;
multiclass VBF16MALQI<bit T, string suffix> {
def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
(ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
bits<2> idx;
let Inst{5} = idx{1};
let Inst{3} = idx{0};
let Constraints = "$dst = $Vd";
let DecoderNamespace = "VFPV8";
let hasNoSchedulingInfo = 1;
}
}
defm VBF16MALTQI: VBF16MALQI<1, "t">;
defm VBF16MALBQI: VBF16MALQI<0, "b">;
let hasNoSchedulingInfo = 1 in {
def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
(outs DPR:$Vd), (ins QPR:$Vm),
NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
}
}
// End of BFloat16 instructions

View File

@ -1867,6 +1867,35 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
} // End of 'let Constraints = "$a = $dst" in'
// BFloat16 - Single precision, unary, predicated
class BF16_VCVT<string opc, bits<2> op7_6>
: VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
VFPUnaryFrm, NoItinerary,
opc, ".bf16.f32\t$Sd, $Sm", []>,
RegConstraint<"$dst = $Sd">,
Requires<[HasBF16]>,
Sched<[]> {
bits<5> Sd;
bits<5> Sm;
// Encode instruction operands.
let Inst{3-0} = Sm{4-1};
let Inst{5} = Sm{0};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
let Inst{27-23} = 0b11101; // opcode1
let Inst{21-20} = 0b11; // opcode2
let Inst{19-16} = 0b0011; // opcode3
let Inst{11-8} = 0b1001;
let Inst{7-6} = op7_6;
let Inst{4} = 0;
let DecoderNamespace = "VFPV8";
}
def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>;
def BF16_VCVTT : BF16_VCVT<"vcvtt", 0b11>;
//===----------------------------------------------------------------------===//
// FP Multiply-Accumulate Operations.
//

View File

@ -72,6 +72,8 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
@ -106,6 +108,8 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<(all_of FeatureFullFP16),"full half-float">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
AssemblerPredicate<(all_of FeatureFP16FML),"full half-float fml">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
AssemblerPredicate<(all_of FeatureBF16),"BFloat16 floating point extension">;
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
AssemblerPredicate<(all_of FeatureHWDivThumb), "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,

View File

@ -108,6 +108,7 @@ protected:
ARMv83a,
ARMv84a,
ARMv85a,
ARMv86a,
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
@ -157,6 +158,7 @@ protected:
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@ -255,6 +257,9 @@ protected:
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
bool HasFP16FML = false;
/// HasBF16 - True if subtarget supports BFloat16 floating point operations
bool HasBF16 = false;
/// HasD32 - True if subtarget has the full 32 double precision
/// FP registers for VFPv3.
bool HasD32 = false;
@ -581,6 +586,7 @@ public:
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }

View File

@ -6322,6 +6322,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
@ -6462,6 +6463,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
Mnemonic == "vfmat" || Mnemonic == "vfmab" ||
Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "sb" || Mnemonic == "ssbb" ||
Mnemonic == "pssbb" ||
Mnemonic == "bfcsel" || Mnemonic == "wls" ||

View File

@ -856,6 +856,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
case ARM::ArchKind::ARMV8_6A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);

View File

@ -0,0 +1,27 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
bfcvt z0.s, p0/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfcvt z0.s, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvt z0.h, p0/m, z1.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfcvt z0.h, p0/m, z1.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvt z0.h, p0/z, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bfcvt z0.h, p0/z, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvt z0.h, p8/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: bfcvt z0.h, p8/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.h, p0/m, z7.h
bfcvt z0.h, p0/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
// CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,29 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
bfcvt z0.H, p0/m, z1.S
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0.S, p0/m, z2.S
// CHECK-INST: movprfx z0.s, p0/m, z2.s
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
// CHECK-ERROR: instruction requires: sve
bfcvt z0.H, p0/m, z1.S
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z2
// CHECK-INST: movprfx z0, z2
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfcvt z0.H, p0/m, z1.S
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
// CHECK-ERROR: instruction requires: bf16 sve

View File

@ -0,0 +1,27 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
bfcvtnt z0.s, p0/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfcvtnt z0.s, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvtnt z0.h, p0/m, z1.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvtnt z0.h, p0/z, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfcvtnt z0.h, p8/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.h, p0/m, z7.h
bfcvtnt z0.h, p0/m, z1.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,29 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0.S, p0/m, z2.S
// CHECK-INST: movprfx z0.s, p0/m, z2.s
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
// CHECK-ERROR: instruction requires: sve
bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z2
// CHECK-INST: movprfx z0, z2
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve

View File

@ -0,0 +1,53 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
bfdot z0.s, z1.s, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfdot z0.s, z1.s, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.h, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfdot z0.h, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.s, z1.h, z2.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
// CHECK-NEXT: bfdot z0.s, z1.h, z2.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.s, p0/m, z7.s
bfdot z0.s, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.s, z1.s, z2.h[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfdot z0.s, z1.s, z2.h[0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.h, z1.h, z2.h[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfdot z0.h, z1.h, z2.h[0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.s, z1.h, z2.s[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
// CHECK-NEXT: bfdot z0.s, z1.h, z2.s[0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.s, z1.h, z8.h[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bfdot z0.s, z1.h, z8.h[0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfdot z0.s, z1.h, z2.h[4]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[4]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.s, p0/m, z7.s
bfdot z0.s, z1.h, z2.h[0]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0]
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,52 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
bfdot z0.S, z1.H, z2.H
// CHECK-INST: bfdot z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfdot z0.S, z1.H, z2.H[0]
// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfdot z0.S, z1.H, z2.H[3]
// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfdot z0.S, z1.H, z2.H
// CHECK-INST: bfdot z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfdot z0.S, z1.H, z2.H[0]
// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfdot z0.S, z1.H, z2.H[3]
// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve

View File

@ -0,0 +1,42 @@
// RUN: not llvm-mc -o - -triple=aarch64 -mattr=+sve,bf16 2>&1 %s | FileCheck %s
bfmlalb z0.S, z1.H, z7.H[8]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
// CHECK-NEXT: bfmlalb z0.S, z1.H, z7.H[8]
// CHECK-NEXT: ^
bfmlalb z0.S, z1.H, z8.H[7]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bfmlalb z0.S, z1.H, z8.H[7]
// CHECK-NEXT: ^
bfmlalt z0.S, z1.H, z7.H[8]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.H[8]
// CHECK-NEXT: ^
bfmlalt z0.S, z1.H, z8.H[7]
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// CHECK-NEXT: bfmlalt z0.S, z1.H, z8.H[7]
// CHECK-NEXT: ^
bfmlalt z0.S, z1.H, z7.2h[2]
// CHECK: error: invalid vector kind qualifier
// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.2h[2]
// CHECK-NEXT: ^
bfmlalt z0.S, z1.H, z2.s[2]
// CHECK: error: Invalid restricted vector register, expected z0.h..z7.h
// CHECK-NEXT: bfmlalt z0.S, z1.H, z2.s[2]
// CHECK-NEXT: ^
bfmlalt z0.S, z1.s, z2.h[2]
// CHECK: error: invalid element width
// CHECK-NEXT: bfmlalt z0.S, z1.s, z2.h[2]
// CHECK-NEXT: ^
movprfx z0.s, p0/m, z7.s
bfmlalt z0.s, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx
// CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h
// CHECK-NEXT: ^

View File

@ -0,0 +1,157 @@
// RUN: llvm-mc -o - -triple=aarch64 -show-encoding -mattr=+sve,+bf16 %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -o - -triple=aarch64 -show-encoding %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
bfmlalb z0.S, z1.H, z2.H
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalt z0.S, z1.H, z2.H
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalb z0.S, z1.H, z2.H[0]
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalt z0.S, z1.H, z2.H[0]
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalb z0.S, z1.H, z2.H[7]
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalt z0.S, z1.H, z2.H[7]
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalt z0.S, z1.H, z7.H[7]
// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalb z10.S, z21.H, z14.H
// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalt z14.S, z10.H, z21.H
// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
bfmlalb z21.s, z14.h, z3.h[2]
// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalb z0.S, z1.H, z2.H
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalt z0.S, z1.H, z2.H
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalb z0.S, z1.H, z2.H[0]
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalt z0.S, z1.H, z2.H[0]
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalb z0.S, z1.H, z2.H[7]
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalt z0.S, z1.H, z2.H[7]
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalt z0.S, z1.H, z7.H[7]
// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z10, z7
// CHECK-INST: movprfx z10, z7
// CHECK-ENCODING: [0xea,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalb z10.S, z21.H, z14.H
// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z14, z7
// CHECK-INST: movprfx z14, z7
// CHECK-ENCODING: [0xee,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalt z14.S, z10.H, z21.H
// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
movprfx z21, z7
// CHECK-INST: movprfx z21, z7
// CHECK-ENCODING: [0xf5,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmlalb z21.s, z14.h, z3.h[2]
// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
// CHECK-ERROR: instruction requires: bf16 sve

View File

@ -0,0 +1,22 @@
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
bfmmla z0.s, z1.s, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfmmla z0.s, z1.s, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfmmla z0.h, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfmmla z0.h, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
bfmmla z0.s, z1.h, z2.s
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
// CHECK-NEXT: bfmmla z0.s, z1.h, z2.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
movprfx z0.s, p0/m, z7.s
bfmmla z0.s, z1.h, z2.h
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
// CHECK-NEXT: bfmmla z0.s, z1.h, z2.h
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

View File

@ -0,0 +1,22 @@
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
bfmmla z0.S, z1.H, z2.H
// CHECK-INST: bfmmla z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve
// --------------------------------------------------------------------------//
// Test compatibility with MOVPRFX instruction.
movprfx z0, z7
// CHECK-INST: movprfx z0, z7
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
// CHECK-ERROR: instruction requires: sve
bfmmla z0.S, z1.H, z2.H
// CHECK-INST: bfmmla z0.s, z1.h, z2.h
// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
// CHECK-ERROR: instruction requires: bf16 sve

View File

@ -0,0 +1,115 @@
// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+bf16 < %s | FileCheck %s
// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a < %s | FileCheck %s
// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-bf16 < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
bfdot v2.2s, v3.4h, v4.4h
bfdot v2.4s, v3.8h, v4.8h
// CHECK: bfdot v2.2s, v3.4h, v4.4h // encoding: [0x62,0xfc,0x44,0x2e]
// CHECK: bfdot v2.4s, v3.8h, v4.8h // encoding: [0x62,0xfc,0x44,0x6e]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.4h
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.8h
bfdot v2.2s, v3.4h, v4.2h[0]
bfdot v2.2s, v3.4h, v4.2h[1]
bfdot v2.2s, v3.4h, v4.2h[2]
bfdot v2.2s, v3.4h, v4.2h[3]
// CHECK: bfdot v2.2s, v3.4h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x0f]
// CHECK: bfdot v2.2s, v3.4h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x0f]
// CHECK: bfdot v2.2s, v3.4h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x0f]
// CHECK: bfdot v2.2s, v3.4h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x0f]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[0]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[1]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[2]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[3]
bfdot v2.4s, v3.8h, v4.2h[0]
bfdot v2.4s, v3.8h, v4.2h[1]
bfdot v2.4s, v3.8h, v4.2h[2]
bfdot v2.4s, v3.8h, v4.2h[3]
// CHECK: bfdot v2.4s, v3.8h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x4f]
// CHECK: bfdot v2.4s, v3.8h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x4f]
// CHECK: bfdot v2.4s, v3.8h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x4f]
// CHECK: bfdot v2.4s, v3.8h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x4f]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[0]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[1]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[2]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[3]
bfmmla v2.4s, v3.8h, v4.8h
bfmmla v3.4s, v4.8h, v5.8h
// CHECK: bfmmla v2.4s, v3.8h, v4.8h // encoding: [0x62,0xec,0x44,0x6e]
// CHECK: bfmmla v3.4s, v4.8h, v5.8h // encoding: [0x83,0xec,0x45,0x6e]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmmla v2.4s, v3.8h, v4.8h
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmmla v3.4s, v4.8h, v5.8h
bfcvtn v5.4h, v5.4s
bfcvtn2 v5.8h, v5.4s
// CHECK: bfcvtn v5.4h, v5.4s // encoding: [0xa5,0x68,0xa1,0x0e]
// CHECK: bfcvtn2 v5.8h, v5.4s // encoding: [0xa5,0x68,0xa1,0x4e]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfcvtn v5.4h, v5.4s
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfcvtn2 v5.8h, v5.4s
bfcvt h5, s3
// CHECK: bfcvt h5, s3 // encoding: [0x65,0x40,0x63,0x1e]
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfcvt h5, s3
bfmlalb V10.4S, V21.8h, V14.8H
bfmlalt V21.4S, V14.8h, V10.8H
// CHECK: bfmlalb v10.4s, v21.8h, v14.8h // encoding: [0xaa,0xfe,0xce,0x2e]
// CHECK-NEXT: bfmlalt v21.4s, v14.8h, v10.8h // encoding: [0xd5,0xfd,0xca,0x6e]
// NOBF16: error: instruction requires: bf16
// NOBF16-NEXT: bfmlalb V10.4S, V21.8h, V14.8H
// NOBF16-NEXT: ^
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmlalt V21.4S, V14.8h, V10.8H
// NOBF16-NEXT: ^
bfmlalb V14.4S, V21.8H, V10.H[1]
bfmlalb V14.4S, V21.8H, V10.H[2]
bfmlalb V14.4S, V21.8H, V10.H[7]
bfmlalt V21.4S, V10.8H, V14.H[1]
bfmlalt V21.4S, V10.8H, V14.H[2]
bfmlalt V21.4S, V10.8H, V14.H[7]
// CHECK: bfmlalb v14.4s, v21.8h, v10.h[1] // encoding: [0xae,0xf2,0xda,0x0f]
// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[2] // encoding: [0xae,0xf2,0xea,0x0f]
// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[7] // encoding: [0xae,0xfa,0xfa,0x0f]
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[1] // encoding: [0x55,0xf1,0xde,0x4f]
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[2] // encoding: [0x55,0xf1,0xee,0x4f]
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[7] // encoding: [0x55,0xf9,0xfe,0x4f]
// NOBF16: error: instruction requires: bf16
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[1]
// NOBF16-NEXT: ^
// NOBF16: error: instruction requires: bf16
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[2]
// NOBF16-NEXT: ^
// NOBF16: error: instruction requires: bf16
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[7]
// NOBF16-NEXT: ^
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[1]
// NOBF16-NEXT: ^
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[2]
// NOBF16-NEXT: ^
// NOBF16: instruction requires: bf16
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[7]
// NOBF16-NEXT: ^

View File

@ -0,0 +1,57 @@
// RUN: not llvm-mc -triple arm -mattr=+bf16,-neon %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
// RUN: not llvm-mc -triple arm -mattr=-bf16 %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NOBF16,ALL
// RUN: not llvm-mc -triple arm %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
//
vdot.bf16 d3, d4, d5
vdot.bf16 q0, q1, q2
vdot.bf16 d3, d4, d5[1]
vdot.bf16 q0, q1, d5[1]
vmmla.bf16 q0, q1, q2
vcvt.bf16.f32 d1, q3
vcvtbeq.bf16.f32 s1, s3
vcvttne.bf16.f32 s1, s3
// NOBF16: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vdot.bf16 d3, d4, d5
// NOBF16-NEXT: ^
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vdot.bf16 q0, q1, q2
// NOBF16-NEXT: ^
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vdot.bf16 d3, d4, d5[1]
// NOBF16-NEXT: ^
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vdot.bf16 q0, q1, d5[1]
// NOBF16-NEXT: ^
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vmmla.bf16 q0, q1, q2
// NOBF16-NEXT: ^
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
// NOBF16-NEXT: vcvt.bf16.f32 d1, q3
// NOBF16-NEXT: ^
// NONEON: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vdot.bf16 d3, d4, d5
// NONEON-NEXT: ^
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vdot.bf16 q0, q1, q2
// NONEON-NEXT: ^
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vdot.bf16 d3, d4, d5[1]
// NONEON-NEXT: ^
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vdot.bf16 q0, q1, d5[1]
// NONEON-NEXT: ^
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vmmla.bf16 q0, q1, q2
// NONEON-NEXT: ^
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
// NONEON-NEXT: vcvt.bf16.f32 d1, q3
// NONEON-NEXT: ^
// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
// ALL-NEXT: vcvtbeq.bf16.f32 s1, s3
// ALL-NEXT: ^
// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
// ALL-NEXT: vcvttne.bf16.f32 s1, s3
// ALL-NEXT: ^

View File

@ -0,0 +1,134 @@
// RUN: not llvm-mc -o - -triple arm -mattr=+v8.6a -show-encoding %s 2>&1 | FileCheck %s
vfmat.bf16 d0, d0, d0
vfmat.bf16 d0, d0, q0
vfmat.bf16 d0, q0, d0
vfmat.bf16 q0, d0, d0
vfmat.bf16 q0, q0, d0
vfmat.bf16 q0, d0, q0
vfmat.bf16 d0, q0, q0
vfmat.bf16 q0, q0, q0[3]
vfmat.bf16 q0, q0, q0[3]
vfmat.bf16 q0, d0, d0[0]
vfmat.bf16 d0, q0, d0[0]
vfmat.bf16 q0, d0, d0[9]
vfmab.bf16 d0, d0, d0
vfmab.bf16 d0, d0, q0
vfmab.bf16 d0, q0, d0
vfmab.bf16 q0, d0, d0
vfmab.bf16 q0, q0, d0
vfmab.bf16 q0, d0, q0
vfmab.bf16 d0, q0, q0
vfmab.bf16 q0, q0, q0[3]
vfmab.bf16 q0, q0, q0[3]
vfmab.bf16 q0, d0, d0[0]
vfmab.bf16 d0, q0, d0[0]
vfmab.bf16 q0, d0, d0[9]
//CHECK:error: invalid instruction
//CHECK-NEXT:vfmat.bf16 d0, d0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmat.bf16 d0, d0, q0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmat.bf16 d0, q0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmat.bf16 q0, d0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
//CHECK-NEXT:^
//CHECK-NEXT:note: too few operands for instruction
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
//CHECK-NEXT: ^
//CHECK-NEXT:note: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmat.bf16 q0, d0, q0
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmat.bf16 d0, q0, q0
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT:^
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:note: too many operands for instruction
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT:^
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:note: too many operands for instruction
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmat.bf16 q0, d0, d0[0]
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmat.bf16 d0, q0, d0[0]
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmat.bf16 q0, d0, d0[9]
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmab.bf16 d0, d0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmab.bf16 d0, d0, q0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmab.bf16 d0, q0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmab.bf16 q0, d0, d0
//CHECK-NEXT:^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
//CHECK-NEXT:^
//CHECK-NEXT:note: too few operands for instruction
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
//CHECK-NEXT: ^
//CHECK-NEXT:note: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmab.bf16 q0, d0, q0
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmab.bf16 d0, q0, q0
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT:^
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:note: too many operands for instruction
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT:^
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:note: too many operands for instruction
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmab.bf16 q0, d0, d0[0]
//CHECK-NEXT: ^
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
//CHECK-NEXT:vfmab.bf16 d0, q0, d0[0]
//CHECK-NEXT: ^
//CHECK-NEXT:error: invalid instruction
//CHECK-NEXT:vfmab.bf16 q0, d0, d0[9]

View File

@ -0,0 +1,55 @@
// RUN: llvm-mc -triple arm -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
// RUN: llvm-mc -triple arm -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
vdot.bf16 d3, d4, d5
// CHECK: vdot.bf16 d3, d4, d5 @ encoding: [0x05,0x3d,0x04,0xfc]
vdot.bf16 q0, q1, q2
// CHECK-NEXT: vdot.bf16 q0, q1, q2 @ encoding: [0x44,0x0d,0x02,0xfc]
vdot.bf16 d3, d4, d5[1]
// CHECK-NEXT: vdot.bf16 d3, d4, d5[1] @ encoding: [0x25,0x3d,0x04,0xfe]
vdot.bf16 q0, q1, d5[1]
// CHECK-NEXT: vdot.bf16 q0, q1, d5[1] @ encoding: [0x65,0x0d,0x02,0xfe]
vmmla.bf16 q0, q1, q2
// CHECK-NEXT: vmmla.bf16 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xfc]
vcvt.bf16.f32 d1, q3
// CHECK-NEXT: vcvt.bf16.f32 d1, q3 @ encoding: [0x46,0x16,0xb6,0xf3]
vcvtbeq.bf16.f32 s1, s3
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0x61,0x09,0xf3,0x0e]
vcvttne.bf16.f32 s1, s3
// CHECK-NEXT: vcvttne.bf16.f32 s1, s3 @ encoding: [0xe1,0x09,0xf3,0x1e]
vfmat.bf16 q0, q0, q0
//CHECK-NEXT: vfmat.bf16 q0, q0, q0 @ encoding: [0x50,0x08,0x30,0xfc]
vfmat.bf16 q0, q0, q15
//CHECK-NEXT: vfmat.bf16 q0, q0, q15 @ encoding: [0x7e,0x08,0x30,0xfc]
vfmat.bf16 q0, q15, q0
//CHECK-NEXT: vfmat.bf16 q0, q15, q0 @ encoding: [0xd0,0x08,0x3e,0xfc]
vfmat.bf16 q0, q15, q15
//CHECK-NEXT: vfmat.bf16 q0, q15, q15 @ encoding: [0xfe,0x08,0x3e,0xfc]
vfmat.bf16 q7, q0, q0
//CHECK-NEXT: vfmat.bf16 q7, q0, q0 @ encoding: [0x50,0xe8,0x30,0xfc]
vfmat.bf16 q8, q0, q0
//CHECK-NEXT: vfmat.bf16 q8, q0, q0 @ encoding: [0x50,0x08,0x70,0xfc]
vfmab.bf16 q0, q0, q0
//CHECK-NEXT: vfmab.bf16 q0, q0, q0 @ encoding: [0x10,0x08,0x30,0xfc]
vfmab.bf16 q0, q0, q15
//CHECK-NEXT: vfmab.bf16 q0, q0, q15 @ encoding: [0x3e,0x08,0x30,0xfc]
vfmab.bf16 q0, q15, q0
//CHECK-NEXT: vfmab.bf16 q0, q15, q0 @ encoding: [0x90,0x08,0x3e,0xfc]
vfmab.bf16 q0, q15, q15
//CHECK-NEXT: vfmab.bf16 q0, q15, q15 @ encoding: [0xbe,0x08,0x3e,0xfc]
vfmab.bf16 q7, q0, q0
//CHECK-NEXT: vfmab.bf16 q7, q0, q0 @ encoding: [0x10,0xe8,0x30,0xfc]
vfmab.bf16 q8, q0, q0
//CHECK-NEXT: vfmab.bf16 q8, q0, q0 @ encoding: [0x10,0x08,0x70,0xfc]
vfmat.bf16 q0, q0, d0[0]
//CHECK-NEXT: vfmat.bf16 q0, q0, d0[0] @ encoding: [0x50,0x08,0x30,0xfe]
vfmat.bf16 q0, q0, d0[3]
//CHECK-NEXT: vfmat.bf16 q0, q0, d0[3] @ encoding: [0x78,0x08,0x30,0xfe]
vfmat.bf16 q0, q0, d7[0]
//CHECK-NEXT: vfmat.bf16 q0, q0, d7[0] @ encoding: [0x57,0x08,0x30,0xfe]
vfmab.bf16 q0, q0, d0[0]
//CHECK-NEXT: vfmab.bf16 q0, q0, d0[0] @ encoding: [0x10,0x08,0x30,0xfe]
vfmab.bf16 q0, q0, d0[3]
//CHECK-NEXT: vfmab.bf16 q0, q0, d0[3] @ encoding: [0x38,0x08,0x30,0xfe]
vfmab.bf16 q0, q0, d7[0]
//CHECK-NEXT: vfmab.bf16 q0, q0, d7[0] @ encoding: [0x17,0x08,0x30,0xfe]

View File

@ -0,0 +1,32 @@
// RUN: not llvm-mc -triple thumbv8 -mattr=-bf16 < %s 2>&1 | FileCheck %s
vdot.bf16 d3, d4, d5
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vdot.bf16 d3, d4, d5
vdot.bf16 q0, q1, q2
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vdot.bf16 q0, q1, q2
vdot.bf16 d3, d4, d5[1]
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vdot.bf16 d3, d4, d5[1]
vdot.bf16 q0, q1, d5[1]
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
vmmla.bf16 q0, q1, q2
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vmmla.bf16 q0, q1, q2
vcvt.bf16.f32 d1, q3
// CHECK: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vcvt.bf16.f32 d1, q3
vcvtbeq.bf16.f32 s1, s3
// CHECK: note: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
vcvttne.bf16.f32 s1, s3
// CHECK: note: instruction requires: BFloat16 floating point extension
// CHECK-NEXT: vcvttne.bf16.f32 s1, s3

View File

@ -0,0 +1,15 @@
// RUN: llvm-mc -triple thumbv8 -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
// RUN: llvm-mc -triple thumbv8 -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
vcvt.bf16.f32 d1, q3
// CHECK: vcvt.bf16.f32 d1, q3 @ encoding: [0xb6,0xff,0x46,0x16]
it eq
vcvtbeq.bf16.f32 s1, s3
// CHECK: it eq @ encoding: [0x08,0xbf]
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0x61,0x09]
it ne
vcvttne.bf16.f32 s1, s3
// CHECK: it ne @ encoding: [0x18,0xbf]
// CHECK: vcvttne.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0xe1,0x09]

View File

@ -0,0 +1,74 @@
# RUN: llvm-mc -triple=aarch64 -mattr=+bf16 -disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple=aarch64 -mattr=+v8.6a -disassemble < %s | FileCheck %s
# RUN: not llvm-mc -triple=aarch64 -mattr=-bf16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
# RUN: not llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
[0x62,0xfc,0x44,0x2e]
[0x62,0xfc,0x44,0x6e]
# CHECK: bfdot v2.2s, v3.4h, v4.4h
# CHECK: bfdot v2.4s, v3.8h, v4.8h
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0x62,0xfc,0x44,0x2e]
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0x62,0xfc,0x44,0x6e]
[0x62,0xf0,0x44,0x4f]
[0x62,0xf0,0x64,0x4f]
[0x62,0xf8,0x44,0x4f]
[0x62,0xf8,0x64,0x4f]
# CHECK: bfdot v2.4s, v3.8h, v4.2h[0]
# CHECK: bfdot v2.4s, v3.8h, v4.2h[1]
# CHECK: bfdot v2.4s, v3.8h, v4.2h[2]
# CHECK: bfdot v2.4s, v3.8h, v4.2h[3]
# NOBF16: warning: invalid instruction encoding
# NOBF-NEXT: [0x62,0xf0,0x44,0x4f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf0,0x64,0x4f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf8,0x44,0x4f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf8,0x64,0x4f]
[0x62,0xf0,0x44,0x0f]
[0x62,0xf0,0x64,0x0f]
[0x62,0xf8,0x44,0x0f]
[0x62,0xf8,0x64,0x0f]
# CHECK: bfdot v2.2s, v3.4h, v4.2h[0]
# CHECK: bfdot v2.2s, v3.4h, v4.2h[1]
# CHECK: bfdot v2.2s, v3.4h, v4.2h[2]
# CHECK: bfdot v2.2s, v3.4h, v4.2h[3]
# NOBF16: warning: invalid instruction encoding
# NOBF-NEXT: [0x62,0xf0,0x44,0x0f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf0,0x64,0x0f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf8,0x44,0x0f]
# NOBF16: warning: invalid instruction encoding
# NOBF6-NEXT: [0x62,0xf8,0x64,0x0f]
[0x62,0xec,0x44,0x6e]
[0x83,0xec,0x45,0x6e]
# CHECK: bfmmla v2.4s, v3.8h, v4.8h
# CHECK: bfmmla v3.4s, v4.8h, v5.8h
# NOBF16: warning: invalid instruction encoding
NOBF16-NEXT: [0x62,0xec,0x44,0x6e]
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0x83,0xec,0x45,0x6e]
[0xa5,0x68,0xa1,0x0e]
[0xa5,0x68,0xa1,0x4e]
# CHECK: bfcvtn v5.4h, v5.4s
# CHECK: bfcvtn2 v5.8h, v5.4s
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0xa5,0x68,0xa1,0x0e]
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0xa5,0x68,0xa1,0x4e]
[0x65, 0x40, 0x63, 0x1e]
# CHECK: bfcvt h5, s3
# NOBF16: warning: invalid instruction encoding
# NOBF16-NEXT: [0x65, 0x40, 0x63, 0x1e]

View File

@ -0,0 +1,102 @@
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
# RUN: llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
#
# Tests BFloat16 instruction decodings.
# Without BFloat16 enabled, some of these get disassembled to coprocessor instructions.
[0x25,0x3d,0x04,0xfe]
# CHECK: vdot.bf16 d3, d4, d5[1]
# NOBF16: cdp2 p13, #0, c3, c4, c5, #1
#
[0x65,0x0d,0x02,0xfe]
# CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
# NOBF16-NEXT: cdp2 p13, #0, c0, c2, c5, #3
#
[0x61,0x09,0xf3,0x0e]
# CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
# NOBF16-NEXT: cdpeq p9, #15, c0, c3, c1, #3
#
[0xe1,0x09,0xf3,0x1e]
# CHECK-NEXT: vcvttne.bf16.f32 s1, s3
# NOBF16-NEXT: cdpne p9, #15, c0, c3, c1, #7
#
[0x50,0x08,0x30,0xfc]
# CHECK-NEXT: vfmat.bf16 q0, q0, q0
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-320
#
[0x7e,0x08,0x30,0xfc]
# CHECK-NEXT: vfmat.bf16 q0, q0, q15
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-504
#
[0xd0,0x08,0x3e,0xfc]
# CHECK-NEXT: vfmat.bf16 q0, q15, q0
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-832
#
[0xfe,0x08,0x3e,0xfc]
# CHECK-NEXT: vfmat.bf16 q0, q15, q15
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-1016
#
[0xd0,0x08,0x30,0xfc]
# CHECK-NEXT: vfmat.bf16 q0, q8, q0
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-832
#
[0x50,0xe8,0x30,0xfc]
# CHECK-NEXT: vfmat.bf16 q7, q0, q0
# NOBF16-NEXT: ldc2 p8, c14, [r0], #-320
#
[0x50,0x08,0x70,0xfc]
# CHECK-NEXT: vfmat.bf16 q8, q0, q0
# NOBF16-NEXT: ldc2l p8, c0, [r0], #-320
#
[0x10,0x08,0x30,0xfc]
# CHECK-NEXT: vfmab.bf16 q0, q0, q0
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-64
#
[0x3e,0x08,0x30,0xfc]
# CHECK-NEXT: vfmab.bf16 q0, q0, q15
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-248
#
[0x90,0x08,0x3e,0xfc]
# CHECK-NEXT: vfmab.bf16 q0, q15, q0
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-576
#
[0xbe,0x08,0x3e,0xfc]
# CHECK-NEXT: vfmab.bf16 q0, q15, q15
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-760
#
[0x90,0x08,0x30,0xfc]
# CHECK-NEXT: vfmab.bf16 q0, q8, q0
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-576
#
[0x10,0xe8,0x30,0xfc]
# CHECK-NEXT: vfmab.bf16 q7, q0, q0
# NOBF16-NEXT: ldc2 p8, c14, [r0], #-64
#
[0x10,0x08,0x70,0xfc]
# CHECK-NEXT: vfmab.bf16 q8, q0, q0
# NOBF16-NEXT: ldc2l p8, c0, [r0], #-64
#
[0x50,0x08,0x30,0xfe]
# CHECK-NEXT: vfmat.bf16 q0, q0, d0[0]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #2
#
[0x78,0x08,0x30,0xfe]
# CHECK-NEXT: vfmat.bf16 q0, q0, d0[3]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #3
[0x57,0x08,0x30,0xfe]
#
# CHECK-NEXT: vfmat.bf16 q0, q0, d7[0]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #2
[0x10,0x08,0x30,0xfe]
#
# CHECK-NEXT: vfmab.bf16 q0, q0, d0[0]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #0
[0x38,0x08,0x30,0xfe]
#
# CHECK-NEXT: vfmab.bf16 q0, q0, d0[3]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #1
#
[0x17,0x08,0x30,0xfe]
# CHECK-NEXT: vfmab.bf16 q0, q0, d7[0]
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #0

View File

@ -0,0 +1,20 @@
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
# RUN: not llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
[0x05,0x3d,0x04,0xfc]
# CHECK: vdot.bf16 d3, d4, d5
# CHECK-NOBF16: warning: invalid instruction encoding
[0x44,0x0d,0x02,0xfc]
# CHECK: vdot.bf16 q0, q1, q2
# CHECK-NOBF16: warning: invalid instruction encoding
[0x44,0x0c,0x02,0xfc]
# CHECK: vmmla.bf16 q0, q1, q2
# CHECK-NOBF16: warning: invalid instruction encoding
[0x46,0x16,0xb6,0xf3]
# CHECK: vcvt.bf16.f32 d1, q3
# CHECK-ERROR: warning: invalid instruction encoding

View File

@ -0,0 +1,25 @@
# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
[0x04,0xfc,0x05,0x3d]
[0x02,0xfc,0x44,0x0d]
# CHECK: vdot.bf16 d3, d4, d5
# CHECK: vdot.bf16 q0, q1, q2
[0x04,0xfe,0x25,0x3d]
# CHECK: vdot.bf16 d3, d4, d5[1]
[0x02,0xfe,0x65,0x0d]
# CHECK: vdot.bf16 q0, q1, d5[1]
[0x02,0xfc,0x44,0x0c]
# CHECK: vmmla.bf16 q0, q1, q2
[0xb6,0xff,0x46,0x16]
# CHECK: vcvt.bf16.f32 d1, q3
[0xf3,0xee,0x61,0x09]
# CHECK: vcvtb.bf16.f32 s1, s3
[0xf3,0xee,0xe1,0x09]
# CHECK: vcvtt.bf16.f32 s1, s3

View File

@ -0,0 +1,40 @@
# RUN: not llvm-mc -triple thumbv8-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s
# RUN: not llvm-mc -triple thumbv8-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s
[0x04,0xfc,0x05,0x3d]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0x04,0xfc,0x05,0x3d]
[0x02,0xfc,0x44,0x0d]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0x02,0xfc,0x44,0x0d]
[0x04,0xfe,0x25,0x3d]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0x04,0xfe,0x25,0x3d]
[0x02,0xfe,0x65,0x0d]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0x02,0xfe,0x65,0x0d]
[0x02,0xfc,0x44,0x0c]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0x02,0xfc,0x44,0x0c]
[0xb6,0xff,0x46,0x16]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0xb6,0xff,0x46,0x16]
[0xf3,0xee,0x61,0x09]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0xf3,0xee,0x61,0x09]
[0xf3,0xee,0xe1,0x09]
# CHECK: warning: invalid instruction encoding
# CHECK-NEXT: [0xf3,0xee,0xe1,0x09]

View File

@ -26,9 +26,9 @@ const char *ARMArch[] = {
"armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a",
"armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a",
"armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a",
"armv8.5a", "armv8-r", "armv8r", "armv8-m.base", "armv8m.base",
"armv8-m.main", "armv8m.main", "iwmmxt", "iwmmxt2", "xscale",
"armv8.1-m.main",
"armv8.5a", "armv8.6-a", "armv8.6a", "armv8-r", "armv8r",
"armv8-m.base", "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt",
"iwmmxt2", "xscale", "armv8.1-m.main",
};
bool testARMCPU(StringRef CPUName, StringRef ExpectedArch,
@ -410,6 +410,9 @@ TEST(TargetParserTest, testARMArch) {
EXPECT_TRUE(
testARMArch("armv8.5-a", "generic", "v8.5a",
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(
testARMArch("armv8.6-a", "generic", "v8.6a",
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(
testARMArch("armv8-r", "cortex-r52", "v8r",
ARMBuildAttrs::CPUArch::v8_R));
@ -678,7 +681,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
"v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", "v7r", "v7-m",
"v7m", "v7k", "v7s", "v7e-m", "v7em", "v8-a", "v8", "v8a",
"v8l", "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a",
"v8.4a", "v8.5-a","v8.5a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
"v8.4a", "v8.5-a","v8.5a", "v8.6-a", "v8.6a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
};
for (unsigned i = 0; i < array_lengthof(Arch); i++) {
@ -743,6 +746,7 @@ TEST(TargetParserTest, ARMparseArchProfile) {
case ARM::ArchKind::ARMV8_3A:
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
case ARM::ArchKind::ARMV8_6A:
EXPECT_EQ(ARM::ProfileKind::A, ARM::parseArchProfile(ARMArch[i]));
break;
default:
@ -1008,6 +1012,8 @@ TEST(TargetParserTest, testAArch64Arch) {
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(testAArch64Arch("armv8.5-a", "generic", "v8.5a",
ARMBuildAttrs::CPUArch::v8_A));
EXPECT_TRUE(testAArch64Arch("armv8.6-a", "generic", "v8.6a",
ARMBuildAttrs::CPUArch::v8_A));
}
bool testAArch64Extension(StringRef CPUName, AArch64::ArchKind AK,