forked from OSchip/llvm-project
[PATCH] [ARM] ARMv8.6-a command-line + BFloat16 Asm Support
Summary: This patch introduces command-line support for the Armv8.6-a architecture and assembly support for BFloat16. Details can be found https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a in addition to the GCC patch for the 8..6-a CLI: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-11/msg02647.html In detail this patch - march options for armv8.6-a - BFloat16 assembly This is part of a patch series, starting with command-line and Bfloat16 assembly support. The subsequent patches will upstream intrinsics support for BFloat16, followed by Matrix Multiplication and the remaining Virtualization features of the armv8.6-a architecture. Based on work by: - labrinea - MarkMurrayARM - Luke Cheeseman - Javed Asbar - Mikhail Maltsev - Luke Geeson Reviewers: SjoerdMeijer, craig.topper, rjmccall, jfb, LukeGeeson Reviewed By: SjoerdMeijer Subscribers: stuij, kristof.beyls, hiraditya, dexonsmith, danielkiss, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D76062
This commit is contained in:
parent
6a946993d5
commit
71ae267d1f
|
@ -151,6 +151,7 @@ void AArch64TargetInfo::fillValidCPUList(
|
|||
|
||||
void AArch64TargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const {
|
||||
// FIXME: Armv8.1 makes __ARM_FEATURE_CRC32 mandatory. Handle it here.
|
||||
Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
|
||||
}
|
||||
|
||||
|
@ -171,17 +172,26 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
|
|||
void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const {
|
||||
// Also include the Armv8.3 defines
|
||||
// FIXME: Armv8.4 makes some extensions mandatory. Handle them here.
|
||||
// FIXME: Armv8.4 makes __ARM_FEATURE_ATOMICS, defined in GCC, mandatory.
|
||||
// Add and handle it here.
|
||||
getTargetDefinesARMV83A(Opts, Builder);
|
||||
}
|
||||
|
||||
void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const {
|
||||
// Also include the Armv8.4 defines
|
||||
// FIXME: Armv8.5 makes some extensions mandatory. Handle them here.
|
||||
getTargetDefinesARMV84A(Opts, Builder);
|
||||
}
|
||||
|
||||
void AArch64TargetInfo::getTargetDefinesARMV86A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const {
|
||||
// Also include the Armv8.5 defines
|
||||
// FIXME: Armv8.6 makes the following extensions mandatory:
|
||||
// - __ARM_FEATURE_BF16
|
||||
// - __ARM_FEATURE_MATMUL_INT8
|
||||
// Handle them here.
|
||||
getTargetDefinesARMV85A(Opts, Builder);
|
||||
}
|
||||
|
||||
void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const {
|
||||
|
@ -290,6 +300,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
case llvm::AArch64::ArchKind::ARMV8_5A:
|
||||
getTargetDefinesARMV85A(Opts, Builder);
|
||||
break;
|
||||
case llvm::AArch64::ArchKind::ARMV8_6A:
|
||||
getTargetDefinesARMV86A(Opts, Builder);
|
||||
break;
|
||||
}
|
||||
|
||||
// All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
|
||||
|
@ -344,6 +357,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
ArchKind = llvm::AArch64::ArchKind::ARMV8_4A;
|
||||
if (Feature == "+v8.5a")
|
||||
ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
|
||||
if (Feature == "+v8.6a")
|
||||
ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
|
||||
if (Feature == "+fullfp16")
|
||||
HasFullFP16 = true;
|
||||
if (Feature == "+dotprod")
|
||||
|
|
|
@ -70,6 +70,8 @@ public:
|
|||
MacroBuilder &Builder) const;
|
||||
void getTargetDefinesARMV85A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const;
|
||||
void getTargetDefinesARMV86A(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const;
|
||||
void getTargetDefines(const LangOptions &Opts,
|
||||
MacroBuilder &Builder) const override;
|
||||
|
||||
|
|
|
@ -201,6 +201,8 @@ StringRef ARMTargetInfo::getCPUAttr() const {
|
|||
return "8_4A";
|
||||
case llvm::ARM::ArchKind::ARMV8_5A:
|
||||
return "8_5A";
|
||||
case llvm::ARM::ArchKind::ARMV8_6A:
|
||||
return "8_6A";
|
||||
case llvm::ARM::ArchKind::ARMV8MBaseline:
|
||||
return "8M_BASE";
|
||||
case llvm::ARM::ArchKind::ARMV8MMainline:
|
||||
|
@ -830,6 +832,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
case llvm::ARM::ArchKind::ARMV8_3A:
|
||||
case llvm::ARM::ArchKind::ARMV8_4A:
|
||||
case llvm::ARM::ArchKind::ARMV8_5A:
|
||||
case llvm::ARM::ArchKind::ARMV8_6A:
|
||||
getTargetDefinesARMV83A(Opts, Builder);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -603,6 +603,39 @@
|
|||
// RUN: %clang -target aarch64 -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-FP16 %s
|
||||
// GENERICV85A-FP16: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.5a" "-target-feature" "+fullfp16"
|
||||
|
||||
// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// RUN: %clang -target aarch64 -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A %s
|
||||
// GENERICV86A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
|
||||
|
||||
// RUN: %clang -target aarch64_be -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// RUN: %clang -target aarch64_be -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// RUN: %clang -target aarch64 -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// RUN: %clang -target aarch64_be -mbig-endian -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV86A-BE %s
|
||||
// GENERICV86A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.6a"
|
||||
|
||||
// The SVE extension is an optional extension for Armv8-A.
|
||||
// RUN: %clang -target aarch64 -march=armv8a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
|
||||
// RUN: %clang -target aarch64 -march=armv8.6a+sve -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-SVE %s
|
||||
// GENERICV8A-SVE: "-target-feature" "+sve"
|
||||
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
|
||||
// RUN: %clang -target aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
|
||||
// GENERICV8A-NOSVE-NOT: "-target-feature" "+sve"
|
||||
|
||||
// The BFloat16 extension is a mandatory component of the Armv8.6-A extensions, but is permitted as an
|
||||
// optional feature for any implementation of Armv8.2-A to Armv8.5-A (inclusive)
|
||||
// RUN: %clang -target aarch64 -march=armv8.5a+bf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16 %s
|
||||
// GENERICV85A-BF16: "-target-feature" "+bf16"
|
||||
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+nobf16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV85A-BF16-NO-BF16 %s
|
||||
// GENERICV85A-BF16-NO-BF16: "-target-feature" "-bf16"
|
||||
// RUN: %clang -target aarch64 -march=armv8.5a+bf16+sve -### -c %s 2>&1 | FileCheck -check-prefixes=GENERICV85A-BF16-SVE %s
|
||||
// GENERICV85A-BF16-SVE: "-target-feature" "+bf16" "-target-feature" "+sve"
|
||||
|
||||
// fullfp16 is off by default for v8a, feature must not be mentioned
|
||||
// RUN: %clang -target aarch64 -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
|
||||
// RUN: %clang -target aarch64 -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=V82ANOFP16 -check-prefix=GENERIC %s
|
||||
|
|
|
@ -335,6 +335,23 @@
|
|||
// RUN: %clang -target arm -march=armebv8.5-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V85A %s
|
||||
// CHECK-BE-V85A: "-cc1"{{.*}} "-triple" "armebv8.5{{.*}}" "-target-cpu" "generic"
|
||||
|
||||
// RUN: %clang -target armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target arm -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target arm -march=armv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target arm -march=armv8.6a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// RUN: %clang -target arm -mlittle-endian -march=armv8.6-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V86A %s
|
||||
// CHECK-V86A: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic"
|
||||
|
||||
// RUN: %clang -target armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// RUN: %clang -target armv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// RUN: %clang -target armeb -march=armebv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// RUN: %clang -target armeb -march=armebv8.6-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// RUN: %clang -target arm -march=armebv8.6a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// RUN: %clang -target arm -march=armebv8.6-a -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V86A %s
|
||||
// CHECK-BE-V86A: "-cc1"{{.*}} "-triple" "armebv8.6{{.*}}" "-target-cpu" "generic"
|
||||
|
||||
// Once we have CPUs with optional v8.2-A FP16, we will need a way to turn it
|
||||
// on and off. Cortex-A53 is a placeholder for now.
|
||||
// RUN: %clang -target armv8a-linux-eabi -mcpu=cortex-a53+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-CORTEX-A53-FP16 %s
|
||||
|
@ -432,6 +449,9 @@
|
|||
// RUN: %clang -target armv8a-linux-eabi -march=armv8.5-a+fp16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V85A-FP16 %s
|
||||
// CHECK-V85A-FP16: "-cc1"{{.*}} "-triple" "armv8.5{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+fullfp16"
|
||||
|
||||
// RUN: %clang -target armv8a-linux-eabi -march=armv8.6-a+bf16 -### -c %s 2>&1 | FileCheck --check-prefix CHECK-V86A-BF16 %s
|
||||
// CHECK-V86A-BF16: "-cc1"{{.*}} "-triple" "armv8.6{{.*}}" "-target-cpu" "generic" {{.*}}"-target-feature" "+bf16"
|
||||
|
||||
// RUN: %clang -target arm -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
|
||||
// RUN: %clang -target arm -march=armv8.2-a+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
|
||||
// RUN: %clang -target arm -march=armv8.2-a+fp16+fp16fml -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-FULLFP16-SOFT %s
|
||||
|
|
|
@ -841,5 +841,10 @@
|
|||
// CHECK-V85A: #define __ARM_ARCH_8_5A__ 1
|
||||
// CHECK-V85A: #define __ARM_ARCH_PROFILE 'A'
|
||||
|
||||
// RUN: %clang -target armv8.6a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V86A %s
|
||||
// CHECK-V86A: #define __ARM_ARCH 8
|
||||
// CHECK-V86A: #define __ARM_ARCH_8_6A__ 1
|
||||
// CHECK-V86A: #define __ARM_ARCH_PROFILE 'A'
|
||||
|
||||
// RUN: %clang -target arm-none-none-eabi -march=armv7-m -mfpu=softvfp -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SOFTVFP %s
|
||||
// CHECK-SOFTVFP-NOT: #define __ARM_FP 0x
|
||||
|
|
|
@ -101,6 +101,7 @@ public:
|
|||
enum SubArchType {
|
||||
NoSubArch,
|
||||
|
||||
ARMSubArch_v8_6a,
|
||||
ARMSubArch_v8_5a,
|
||||
ARMSubArch_v8_4a,
|
||||
ARMSubArch_v8_3a,
|
||||
|
|
|
@ -44,6 +44,13 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
|
|||
(AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
|
||||
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
|
||||
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD))
|
||||
AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
|
||||
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
|
||||
(AArch64::AEK_CRC | AArch64::AEK_FP |
|
||||
AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
|
||||
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
|
||||
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
|
||||
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
|
||||
#undef AARCH64_ARCH
|
||||
|
||||
#ifndef AARCH64_ARCH_EXT_NAME
|
||||
|
@ -79,6 +86,8 @@ AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte"
|
|||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
AARCH64_ARCH_EXT_NAME("bf16", AArch64::AEK_BF16, "+bf16", "-bf16")
|
||||
AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm")
|
||||
AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme")
|
||||
#undef AARCH64_ARCH_EXT_NAME
|
||||
|
||||
|
|
|
@ -55,6 +55,8 @@ enum ArchExtKind : unsigned {
|
|||
AEK_SVE2SHA3 = 1 << 26,
|
||||
AEK_SVE2BITPERM = 1 << 27,
|
||||
AEK_TME = 1 << 28,
|
||||
AEK_BF16 = 1 << 29,
|
||||
AEK_I8MM = 1 << 30,
|
||||
};
|
||||
|
||||
enum class ArchKind {
|
||||
|
|
|
@ -112,6 +112,11 @@ ARM_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
|
|||
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
|
||||
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
|
||||
ARM::AEK_DOTPROD))
|
||||
ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a",
|
||||
ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
|
||||
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
|
||||
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
|
||||
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
|
||||
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
|
||||
FK_NEON_FP_ARMV8,
|
||||
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
|
||||
|
@ -164,6 +169,7 @@ ARM_ARCH_EXT_NAME("iwmmxt2", ARM::AEK_IWMMXT2, nullptr, nullptr)
|
|||
ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr)
|
||||
ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
|
||||
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16", "-bf16")
|
||||
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
|
||||
ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
|
||||
ARM_ARCH_EXT_NAME("cdecp0", ARM::AEK_CDECP0, "+cdecp0", "-cdecp0")
|
||||
|
|
|
@ -46,14 +46,15 @@ enum ArchExtKind : uint64_t {
|
|||
AEK_SB = 1 << 17,
|
||||
AEK_FP_DP = 1 << 18,
|
||||
AEK_LOB = 1 << 19,
|
||||
AEK_CDECP0 = 1 << 20,
|
||||
AEK_CDECP1 = 1 << 21,
|
||||
AEK_CDECP2 = 1 << 22,
|
||||
AEK_CDECP3 = 1 << 23,
|
||||
AEK_CDECP4 = 1 << 24,
|
||||
AEK_CDECP5 = 1 << 25,
|
||||
AEK_CDECP6 = 1 << 26,
|
||||
AEK_CDECP7 = 1 << 27,
|
||||
AEK_BF16 = 1 << 20,
|
||||
AEK_CDECP0 = 1 << 21,
|
||||
AEK_CDECP1 = 1 << 22,
|
||||
AEK_CDECP2 = 1 << 23,
|
||||
AEK_CDECP3 = 1 << 24,
|
||||
AEK_CDECP4 = 1 << 25,
|
||||
AEK_CDECP5 = 1 << 26,
|
||||
AEK_CDECP6 = 1 << 27,
|
||||
AEK_CDECP7 = 1 << 28,
|
||||
|
||||
// Unsupported extensions.
|
||||
AEK_OS = 1ULL << 59,
|
||||
|
|
|
@ -116,6 +116,8 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
|
|||
Features.push_back("+v8.4a");
|
||||
if (AK == ArchKind::ARMV8_5A)
|
||||
Features.push_back("+v8.5a");
|
||||
if (AK == AArch64::ArchKind::ARMV8_6A)
|
||||
Features.push_back("+v8.6a");
|
||||
|
||||
return AK != ArchKind::INVALID;
|
||||
}
|
||||
|
|
|
@ -74,6 +74,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
|
|||
case ArchKind::ARMV8_3A:
|
||||
case ArchKind::ARMV8_4A:
|
||||
case ArchKind::ARMV8_5A:
|
||||
case ArchKind::ARMV8_6A:
|
||||
case ArchKind::ARMV8R:
|
||||
case ArchKind::ARMV8MBaseline:
|
||||
case ArchKind::ARMV8MMainline:
|
||||
|
@ -108,6 +109,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
|
|||
case ArchKind::ARMV8_3A:
|
||||
case ArchKind::ARMV8_4A:
|
||||
case ArchKind::ARMV8_5A:
|
||||
case ArchKind::ARMV8_6A:
|
||||
return ProfileKind::A;
|
||||
case ArchKind::ARMV2:
|
||||
case ArchKind::ARMV2A:
|
||||
|
@ -150,6 +152,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
|
|||
.Case("v8.3a", "v8.3-a")
|
||||
.Case("v8.4a", "v8.4-a")
|
||||
.Case("v8.5a", "v8.5-a")
|
||||
.Case("v8.6a", "v8.6-a")
|
||||
.Case("v8r", "v8-r")
|
||||
.Case("v8m.base", "v8-m.base")
|
||||
.Case("v8m.main", "v8-m.main")
|
||||
|
|
|
@ -627,6 +627,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
|
|||
return Triple::ARMSubArch_v8_4a;
|
||||
case ARM::ArchKind::ARMV8_5A:
|
||||
return Triple::ARMSubArch_v8_5a;
|
||||
case ARM::ArchKind::ARMV8_6A:
|
||||
return Triple::ARMSubArch_v8_6a;
|
||||
case ARM::ArchKind::ARMV8R:
|
||||
return Triple::ARMSubArch_v8r;
|
||||
case ARM::ArchKind::ARMV8MBaseline:
|
||||
|
|
|
@ -365,6 +365,9 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
|
|||
"true", "Use an instruction sequence for taking the address of a global "
|
||||
"that allows a memory tag in the upper address bits">;
|
||||
|
||||
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
|
||||
"true", "Enable BFloat16 Extension" >;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Architectures.
|
||||
//
|
||||
|
@ -391,8 +394,11 @@ def HasV8_5aOps : SubtargetFeature<
|
|||
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
|
||||
[HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
|
||||
FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
|
||||
FeatureBranchTargetId]
|
||||
>;
|
||||
FeatureBranchTargetId]>;
|
||||
|
||||
def HasV8_6aOps : SubtargetFeature<
|
||||
"v8.6a", "HasV8_6aOps", "true", "Support ARM v8.6a instructions",
|
||||
[HasV8_5aOps, FeatureBF16]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
|
|
|
@ -7786,6 +7786,110 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
|
|||
let Inst{4-0} = Rd;
|
||||
}
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Armv8.6 BFloat16 Extension
|
||||
//----------------------------------------------------------------------------
|
||||
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in {
|
||||
|
||||
class BaseSIMDThreeSameVectorBFDot<bit Q, bit U, string asm, string kind1,
|
||||
string kind2, RegisterOperand RegType,
|
||||
ValueType AccumType, ValueType InputType>
|
||||
: BaseSIMDThreeSameVectorTied<Q, U, 0b010, 0b11111, RegType, asm, kind1, []> {
|
||||
let AsmString = !strconcat(asm,
|
||||
"{\t$Rd" # kind1 # ", $Rn" # kind2 #
|
||||
", $Rm" # kind2 # "}");
|
||||
}
|
||||
|
||||
multiclass SIMDThreeSameVectorBFDot<bit U, string asm> {
|
||||
def v4f16 : BaseSIMDThreeSameVectorBFDot<0, U, asm, ".2s", ".4h", V64,
|
||||
v2f32, v8i8>;
|
||||
def v8f16 : BaseSIMDThreeSameVectorBFDot<1, U, asm, ".4s", ".8h", V128,
|
||||
v4f32, v16i8>;
|
||||
}
|
||||
|
||||
class BaseSIMDThreeSameVectorBF16DotI<bit Q, bit U, string asm,
|
||||
string dst_kind, string lhs_kind,
|
||||
string rhs_kind,
|
||||
RegisterOperand RegType,
|
||||
ValueType AccumType,
|
||||
ValueType InputType>
|
||||
: BaseSIMDIndexedTied<Q, U, 0b0, 0b01, 0b1111,
|
||||
RegType, RegType, V128, VectorIndexS,
|
||||
asm, "", dst_kind, lhs_kind, rhs_kind,
|
||||
[]> {
|
||||
|
||||
bits<2> idx;
|
||||
let Inst{21} = idx{0}; // L
|
||||
let Inst{11} = idx{1}; // H
|
||||
}
|
||||
|
||||
multiclass SIMDThreeSameVectorBF16DotI<bit U, string asm> {
|
||||
|
||||
def v4f16 : BaseSIMDThreeSameVectorBF16DotI<0, U, asm, ".2s", ".4h",
|
||||
".2h", V64, v2f32, v8i8>;
|
||||
def v8f16 : BaseSIMDThreeSameVectorBF16DotI<1, U, asm, ".4s", ".8h",
|
||||
".2h", V128, v4f32, v16i8>;
|
||||
}
|
||||
|
||||
class SIMDBF16MLAL<bit Q, string asm>
|
||||
: BaseSIMDThreeSameVectorTied<Q, 0b1, 0b110, 0b11111, V128, asm, ".4s",
|
||||
[]> { // TODO: Add intrinsics
|
||||
let AsmString = !strconcat(asm, "{\t$Rd.4s, $Rn.8h, $Rm.8h}");
|
||||
}
|
||||
|
||||
class SIMDBF16MLALIndex<bit Q, string asm>
|
||||
: I<(outs V128:$dst),
|
||||
(ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm,
|
||||
"{\t$Rd.4s, $Rn.8h, $Rm.h$idx}", "$Rd = $dst",
|
||||
[]>, // TODO: Add intrinsics
|
||||
Sched<[WriteV]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
bits<4> Rm;
|
||||
bits<3> idx;
|
||||
|
||||
let Inst{31} = 0;
|
||||
let Inst{30} = Q;
|
||||
let Inst{29-22} = 0b00111111;
|
||||
let Inst{21-20} = idx{1-0};
|
||||
let Inst{19-16} = Rm;
|
||||
let Inst{15-12} = 0b1111;
|
||||
let Inst{11} = idx{2}; // H
|
||||
let Inst{10} = 0;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4-0} = Rd;
|
||||
}
|
||||
|
||||
class SIMDThreeSameVectorBF16MatrixMul<string asm>
|
||||
: BaseSIMDThreeSameVectorTied<1, 1, 0b010, 0b11101,
|
||||
V128, asm, ".4s",
|
||||
[]> {
|
||||
let AsmString = !strconcat(asm, "{\t$Rd", ".4s", ", $Rn", ".8h",
|
||||
", $Rm", ".8h", "}");
|
||||
}
|
||||
|
||||
class SIMD_BFCVTN
|
||||
: BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128,
|
||||
"bfcvtn", ".4h", ".4s",
|
||||
[]>;
|
||||
|
||||
class SIMD_BFCVTN2
|
||||
: BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128,
|
||||
"bfcvtn2", ".8h", ".4s",
|
||||
[]>;
|
||||
|
||||
class BF16ToSinglePrecision<string asm>
|
||||
: I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", []>,
|
||||
Sched<[WriteFCvt]> {
|
||||
bits<5> Rd;
|
||||
bits<5> Rn;
|
||||
let Inst{31-10} = 0b0001111001100011010000;
|
||||
let Inst{9-5} = Rn;
|
||||
let Inst{4-0} = Rd;
|
||||
}
|
||||
} // End of let mayStore = 0, mayLoad = 0, hasSideEffects = 0
|
||||
|
||||
// ARMv8.2-A Dot Product Instructions (Indexed)
|
||||
class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
|
||||
string lhs_kind, string rhs_kind,
|
||||
|
|
|
@ -23,6 +23,8 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
|
|||
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
|
||||
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
|
||||
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
|
||||
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
|
||||
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
|
||||
def HasVH : Predicate<"Subtarget->hasVH()">,
|
||||
AssemblerPredicate<(all_of FeatureVH), "vh">;
|
||||
|
||||
|
@ -142,6 +144,8 @@ def HasETE : Predicate<"Subtarget->hasETE()">,
|
|||
AssemblerPredicate<(all_of FeatureETE), "ete">;
|
||||
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
|
||||
AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
|
||||
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
|
||||
AssemblerPredicate<(all_of FeatureBF16), "bf16">;
|
||||
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
|
||||
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
|
||||
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
|
||||
|
@ -746,6 +750,20 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>;
|
|||
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>;
|
||||
}
|
||||
|
||||
// ARMv8.6-A BFloat
|
||||
let Predicates = [HasBF16] in {
|
||||
defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
|
||||
defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
|
||||
def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
|
||||
def BFMLALB : SIMDBF16MLAL<0, "bfmlalb">;
|
||||
def BFMLALT : SIMDBF16MLAL<1, "bfmlalt">;
|
||||
def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb">;
|
||||
def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt">;
|
||||
def BFCVTN : SIMD_BFCVTN;
|
||||
def BFCVTN2 : SIMD_BFCVTN2;
|
||||
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
|
||||
}
|
||||
|
||||
// ARMv8.2-A FP16 Fused Multiply-Add Long
|
||||
let Predicates = [HasNEON, HasFP16FML] in {
|
||||
defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
|
||||
|
|
|
@ -1197,6 +1197,18 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
|
||||
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
|
||||
|
||||
let Predicates = [HasBF16, HasSVE] in {
|
||||
def BFDOT_ZZZ : sve_bfloat_dot<"bfdot">;
|
||||
def BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot">;
|
||||
def BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla">;
|
||||
def BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb">;
|
||||
def BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt">;
|
||||
def BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb">;
|
||||
def BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt">;
|
||||
def BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt">;
|
||||
def BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt">;
|
||||
}
|
||||
|
||||
// InstAliases
|
||||
def : InstAlias<"mov $Zd, $Zn",
|
||||
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
|
||||
|
|
|
@ -76,6 +76,7 @@ protected:
|
|||
bool HasV8_3aOps = false;
|
||||
bool HasV8_4aOps = false;
|
||||
bool HasV8_5aOps = false;
|
||||
bool HasV8_6aOps = false;
|
||||
|
||||
bool HasFPARMv8 = false;
|
||||
bool HasNEON = false;
|
||||
|
@ -144,6 +145,9 @@ protected:
|
|||
bool HasMTE = false;
|
||||
bool HasTME = false;
|
||||
|
||||
// Armv8.6-A Extensions
|
||||
bool HasBF16 = false;
|
||||
|
||||
// Arm SVE2 extensions
|
||||
bool HasSVE2AES = false;
|
||||
bool HasSVE2SM4 = false;
|
||||
|
@ -403,6 +407,9 @@ public:
|
|||
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
|
||||
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
|
||||
|
||||
// Armv8.6-A Extensions
|
||||
bool hasBF16() const { return HasBF16; }
|
||||
|
||||
bool isLittleEndian() const { return IsLittle; }
|
||||
|
||||
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
|
||||
|
|
|
@ -2859,6 +2859,8 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
|
|||
Str += "ARMv8.4a";
|
||||
else if (FBS[AArch64::HasV8_5aOps])
|
||||
Str += "ARMv8.5a";
|
||||
else if (FBS[AArch64::HasV8_6aOps])
|
||||
Str += "ARMv8.6a";
|
||||
else {
|
||||
auto ext = std::find_if(std::begin(ExtensionMap),
|
||||
std::end(ExtensionMap),
|
||||
|
@ -5094,6 +5096,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
|
|||
break;
|
||||
case AArch64::ArchKind::ARMV8_4A:
|
||||
case AArch64::ArchKind::ARMV8_5A:
|
||||
case AArch64::ArchKind::ARMV8_6A:
|
||||
RequestedExtensions.push_back("sm4");
|
||||
RequestedExtensions.push_back("sha3");
|
||||
RequestedExtensions.push_back("sha2");
|
||||
|
@ -5113,6 +5116,7 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
|
|||
break;
|
||||
case AArch64::ArchKind::ARMV8_4A:
|
||||
case AArch64::ArchKind::ARMV8_5A:
|
||||
case AArch64::ArchKind::ARMV8_6A:
|
||||
RequestedExtensions.push_back("nosm4");
|
||||
RequestedExtensions.push_back("nosha3");
|
||||
RequestedExtensions.push_back("nosha2");
|
||||
|
|
|
@ -7394,6 +7394,96 @@ multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
|
|||
def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE BFloat16 Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve_bfloat_dot_base<bits<2> opc, string asm, string ops, dag iops>
|
||||
: I<(outs ZPR32:$Zda), iops, asm, ops, "", []>, Sched<[]> {
|
||||
bits<5> Zda;
|
||||
bits<5> Zn;
|
||||
let Inst{31-21} = 0b01100100011;
|
||||
let Inst{15-14} = opc;
|
||||
let Inst{13-10} = 0b0000;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zda;
|
||||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let ElementSize = ElementSizeH;
|
||||
}
|
||||
|
||||
class sve_bfloat_dot<string asm>
|
||||
: sve_bfloat_dot_base<0b10, asm, "\t$Zda, $Zn, $Zm",
|
||||
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm)> {
|
||||
bits<5> Zm;
|
||||
let Inst{20-16} = Zm;
|
||||
}
|
||||
|
||||
class sve_bfloat_dot_indexed<string asm>
|
||||
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
|
||||
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
|
||||
bits<2> iop;
|
||||
bits<3> Zm;
|
||||
let Inst{20-19} = iop;
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul<string asm>
|
||||
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
|
||||
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zm;
|
||||
bits<5> Zda;
|
||||
bits<5> Zn;
|
||||
let Inst{31-21} = 0b01100100011;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-10} = 0b111001;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zda;
|
||||
|
||||
let Constraints = "$Zda = $_Zda";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let ElementSize = ElementSizeH;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul_longvecl<bit BT, string asm>
|
||||
: sve_bfloat_matmul<asm> {
|
||||
let Inst{23} = 0b1;
|
||||
let Inst{14-13} = 0b00;
|
||||
let Inst{10} = BT;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
|
||||
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
|
||||
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> {
|
||||
bits<3> iop;
|
||||
bits<3> Zm;
|
||||
let Inst{23} = 0b1;
|
||||
let Inst{20-19} = iop{2-1};
|
||||
let Inst{18-16} = Zm;
|
||||
let Inst{11} = iop{0};
|
||||
let Inst{10} = BT;
|
||||
}
|
||||
|
||||
class sve_bfloat_convert<bit N, string asm>
|
||||
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
|
||||
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<3> Pg;
|
||||
bits<5> Zn;
|
||||
let Inst{31-25} = 0b0110010;
|
||||
let Inst{24} = N;
|
||||
let Inst{23-13} = 0b10001010101;
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = DestructiveOther;
|
||||
let hasSideEffects = 1;
|
||||
let ElementSize = ElementSizeS;
|
||||
}
|
||||
|
||||
/// Addressing modes
|
||||
def am_sve_indexed_s4 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-8,7>", [], [SDNPWantRoot]>;
|
||||
def am_sve_indexed_s6 :ComplexPattern<i64, 2, "SelectAddrModeIndexedSVE<-32,31>", [], [SDNPWantRoot]>;
|
||||
|
|
|
@ -424,6 +424,10 @@ def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
|
|||
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
|
||||
"Enable v8.5a Speculation Barrier" >;
|
||||
|
||||
// Armv8.6-A extensions
|
||||
def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", "true",
|
||||
"Enable support for BFloat16 instructions", [FeatureNEON]>;
|
||||
|
||||
// Armv8.1-M extensions
|
||||
|
||||
def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
|
||||
|
@ -523,6 +527,10 @@ def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
|
|||
"Support ARM v8.5a instructions",
|
||||
[HasV8_4aOps, FeatureSB]>;
|
||||
|
||||
def HasV8_6aOps : SubtargetFeature<"v8.6a", "HasV8_6aOps", "true",
|
||||
"Support ARM v8.6a instructions",
|
||||
[HasV8_5aOps, FeatureBF16]>;
|
||||
|
||||
def HasV8_1MMainlineOps : SubtargetFeature<
|
||||
"v8.1m.main", "HasV8_1MMainlineOps", "true",
|
||||
"Support ARM v8-1M Mainline instructions",
|
||||
|
@ -797,6 +805,19 @@ def ARMv85a : Architecture<"armv8.5-a", "ARMv85a", [HasV8_5aOps,
|
|||
FeatureCRC,
|
||||
FeatureRAS,
|
||||
FeatureDotProd]>;
|
||||
def ARMv86a : Architecture<"armv8.6-a", "ARMv86a", [HasV8_6aOps,
|
||||
FeatureAClass,
|
||||
FeatureDB,
|
||||
FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeatureDSP,
|
||||
FeatureTrustZone,
|
||||
FeatureMP,
|
||||
FeatureVirtualization,
|
||||
FeatureCrypto,
|
||||
FeatureCRC,
|
||||
FeatureRAS,
|
||||
FeatureDotProd]>;
|
||||
|
||||
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
|
||||
FeatureRClass,
|
||||
|
|
|
@ -8926,3 +8926,93 @@ def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
|
|||
(VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
|
||||
def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
|
||||
(VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
|
||||
|
||||
// ARMv8.6a BFloat16 instructions.
|
||||
let Predicates = [HasBF16, HasNEON] in {
|
||||
class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
|
||||
dag oops, dag iops>
|
||||
: N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
|
||||
N3RegFrm, IIC_VDOTPROD, "", "", []> {
|
||||
let hasNoSchedulingInfo = 1;
|
||||
let DecoderNamespace = "VFPV8";
|
||||
}
|
||||
|
||||
class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
|
||||
: BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst),
|
||||
(ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm)> {
|
||||
let Constraints = "$dst = $Vd";
|
||||
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
|
||||
let DecoderNamespace = "VFPV8";
|
||||
}
|
||||
|
||||
multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
|
||||
ValueType InputTy, dag RHS> {
|
||||
|
||||
def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
|
||||
(ins RegTy:$Vd, RegTy:$Vn,
|
||||
DPR_VFP2:$Vm, VectorIndex32:$lane)> {
|
||||
bit lane;
|
||||
let Inst{5} = lane;
|
||||
let Constraints = "$dst = $Vd";
|
||||
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
|
||||
let DecoderNamespace = "VFPV8";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v8i8>;
|
||||
def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v16i8>;
|
||||
|
||||
defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v8i8, (v2f32 DPR_VFP2:$Vm)>;
|
||||
defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
|
||||
|
||||
class BF16MM<bit Q, RegisterClass RegTy,
|
||||
string opc>
|
||||
: N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
|
||||
(outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
|
||||
N3RegFrm, IIC_VDOTPROD, "", "", []> {
|
||||
let Constraints = "$dst = $Vd";
|
||||
let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
|
||||
let DecoderNamespace = "VFPV8";
|
||||
let hasNoSchedulingInfo = 1;
|
||||
}
|
||||
|
||||
def VMMLA : BF16MM<1, QPR, "vmmla">;
|
||||
|
||||
class VBF16MALQ<bit T, string suffix>
|
||||
: N3VCP8<0b00, 0b11, T, 1,
|
||||
(outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
|
||||
NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
|
||||
[]> { // TODO: Add intrinsics
|
||||
let Constraints = "$dst = $Vd";
|
||||
let DecoderNamespace = "VFPV8";
|
||||
let hasNoSchedulingInfo = 1;
|
||||
}
|
||||
|
||||
def VBF16MALTQ: VBF16MALQ<1, "t">;
|
||||
def VBF16MALBQ: VBF16MALQ<0, "b">;
|
||||
|
||||
multiclass VBF16MALQI<bit T, string suffix> {
|
||||
def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
|
||||
(ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
|
||||
IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
|
||||
bits<2> idx;
|
||||
let Inst{5} = idx{1};
|
||||
let Inst{3} = idx{0};
|
||||
let Constraints = "$dst = $Vd";
|
||||
let DecoderNamespace = "VFPV8";
|
||||
let hasNoSchedulingInfo = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
defm VBF16MALTQI: VBF16MALQI<1, "t">;
|
||||
defm VBF16MALBQI: VBF16MALQI<0, "b">;
|
||||
|
||||
let hasNoSchedulingInfo = 1 in {
|
||||
def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
|
||||
(outs DPR:$Vd), (ins QPR:$Vm),
|
||||
NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
|
||||
}
|
||||
}
|
||||
// End of BFloat16 instructions
|
||||
|
|
|
@ -1867,6 +1867,35 @@ def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
|
|||
|
||||
} // End of 'let Constraints = "$a = $dst" in'
|
||||
|
||||
// BFloat16 - Single precision, unary, predicated
|
||||
class BF16_VCVT<string opc, bits<2> op7_6>
|
||||
: VFPAI<(outs SPR:$Sd), (ins SPR:$dst, SPR:$Sm),
|
||||
VFPUnaryFrm, NoItinerary,
|
||||
opc, ".bf16.f32\t$Sd, $Sm", []>,
|
||||
RegConstraint<"$dst = $Sd">,
|
||||
Requires<[HasBF16]>,
|
||||
Sched<[]> {
|
||||
bits<5> Sd;
|
||||
bits<5> Sm;
|
||||
|
||||
// Encode instruction operands.
|
||||
let Inst{3-0} = Sm{4-1};
|
||||
let Inst{5} = Sm{0};
|
||||
let Inst{15-12} = Sd{4-1};
|
||||
let Inst{22} = Sd{0};
|
||||
|
||||
let Inst{27-23} = 0b11101; // opcode1
|
||||
let Inst{21-20} = 0b11; // opcode2
|
||||
let Inst{19-16} = 0b0011; // opcode3
|
||||
let Inst{11-8} = 0b1001;
|
||||
let Inst{7-6} = op7_6;
|
||||
let Inst{4} = 0;
|
||||
let DecoderNamespace = "VFPV8";
|
||||
}
|
||||
|
||||
def BF16_VCVTB : BF16_VCVT<"vcvtb", 0b01>;
|
||||
def BF16_VCVTT : BF16_VCVT<"vcvtt", 0b11>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FP Multiply-Accumulate Operations.
|
||||
//
|
||||
|
|
|
@ -72,6 +72,8 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
|
|||
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
|
||||
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
|
||||
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
|
||||
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
|
||||
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
|
||||
def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
|
||||
def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
|
||||
AssemblerPredicate<(all_of FeatureVFP2_SP), "VFP2">;
|
||||
|
@ -106,6 +108,8 @@ def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
|
|||
AssemblerPredicate<(all_of FeatureFullFP16),"full half-float">;
|
||||
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
|
||||
AssemblerPredicate<(all_of FeatureFP16FML),"full half-float fml">;
|
||||
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
|
||||
AssemblerPredicate<(all_of FeatureBF16),"BFloat16 floating point extension">;
|
||||
def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
|
||||
AssemblerPredicate<(all_of FeatureHWDivThumb), "divide in THUMB">;
|
||||
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
|
||||
|
|
|
@ -108,6 +108,7 @@ protected:
|
|||
ARMv83a,
|
||||
ARMv84a,
|
||||
ARMv85a,
|
||||
ARMv86a,
|
||||
ARMv8a,
|
||||
ARMv8mBaseline,
|
||||
ARMv8mMainline,
|
||||
|
@ -157,6 +158,7 @@ protected:
|
|||
bool HasV8_3aOps = false;
|
||||
bool HasV8_4aOps = false;
|
||||
bool HasV8_5aOps = false;
|
||||
bool HasV8_6aOps = false;
|
||||
bool HasV8MBaselineOps = false;
|
||||
bool HasV8MMainlineOps = false;
|
||||
bool HasV8_1MMainlineOps = false;
|
||||
|
@ -255,6 +257,9 @@ protected:
|
|||
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
|
||||
bool HasFP16FML = false;
|
||||
|
||||
/// HasBF16 - True if subtarget supports BFloat16 floating point operations
|
||||
bool HasBF16 = false;
|
||||
|
||||
/// HasD32 - True if subtarget has the full 32 double precision
|
||||
/// FP registers for VFPv3.
|
||||
bool HasD32 = false;
|
||||
|
@ -581,6 +586,7 @@ public:
|
|||
bool hasV8_3aOps() const { return HasV8_3aOps; }
|
||||
bool hasV8_4aOps() const { return HasV8_4aOps; }
|
||||
bool hasV8_5aOps() const { return HasV8_5aOps; }
|
||||
bool hasV8_6aOps() const { return HasV8_6aOps; }
|
||||
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
|
||||
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
|
||||
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
|
||||
|
|
|
@ -6322,6 +6322,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
|
|||
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
|
||||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
|
||||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
|
||||
Mnemonic == "vdot" || Mnemonic == "vmmla" ||
|
||||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
|
||||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
|
||||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
|
||||
|
@ -6462,6 +6463,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
|
|||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
|
||||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
|
||||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
|
||||
Mnemonic == "vfmat" || Mnemonic == "vfmab" ||
|
||||
Mnemonic == "vdot" || Mnemonic == "vmmla" ||
|
||||
Mnemonic == "sb" || Mnemonic == "ssbb" ||
|
||||
Mnemonic == "pssbb" ||
|
||||
Mnemonic == "bfcsel" || Mnemonic == "wls" ||
|
||||
|
|
|
@ -856,6 +856,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
|
|||
case ARM::ArchKind::ARMV8_3A:
|
||||
case ARM::ArchKind::ARMV8_4A:
|
||||
case ARM::ArchKind::ARMV8_5A:
|
||||
case ARM::ArchKind::ARMV8_6A:
|
||||
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
|
||||
setAttributeItem(ARM_ISA_use, Allowed, false);
|
||||
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
|
||||
|
||||
bfcvt z0.s, p0/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfcvt z0.s, p0/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvt z0.h, p0/m, z1.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfcvt z0.h, p0/m, z1.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvt z0.h, p0/z, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: bfcvt z0.h, p0/z, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvt z0.h, p8/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
|
||||
// CHECK-NEXT: bfcvt z0.h, p8/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.h, p0/m, z7.h
|
||||
bfcvt z0.h, p0/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
|
||||
// CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
|
@ -0,0 +1,29 @@
|
|||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
|
||||
bfcvt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0.S, p0/m, z2.S
|
||||
// CHECK-INST: movprfx z0.s, p0/m, z2.s
|
||||
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfcvt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z2
|
||||
// CHECK-INST: movprfx z0, z2
|
||||
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfcvt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x65]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
|
@ -0,0 +1,27 @@
|
|||
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
|
||||
|
||||
bfcvtnt z0.s, p0/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfcvtnt z0.s, p0/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvtnt z0.h, p0/m, z1.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvtnt z0.h, p0/z, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: bfcvtnt z0.h, p0/z, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfcvtnt z0.h, p8/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
|
||||
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.h, p0/m, z7.h
|
||||
bfcvtnt z0.h, p0/m, z1.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
|
||||
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
|
@ -0,0 +1,29 @@
|
|||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
|
||||
bfcvtnt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0.S, p0/m, z2.S
|
||||
// CHECK-INST: movprfx z0.s, p0/m, z2.s
|
||||
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfcvtnt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z2
|
||||
// CHECK-INST: movprfx z0, z2
|
||||
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfcvtnt z0.H, p0/m, z1.S
|
||||
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
|
||||
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
|
@ -0,0 +1,53 @@
|
|||
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
|
||||
|
||||
bfdot z0.s, z1.s, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfdot z0.s, z1.s, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.h, z1.h, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfdot z0.h, z1.h, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.s, z1.h, z2.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z2.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.s, p0/m, z7.s
|
||||
bfdot z0.s, z1.h, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.s, z1.s, z2.h[0]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfdot z0.s, z1.s, z2.h[0]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.h, z1.h, z2.h[0]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfdot z0.h, z1.h, z2.h[0]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.s, z1.h, z2.s[0]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z2.s[0]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.s, z1.h, z8.h[0]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z8.h[0]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfdot z0.s, z1.h, z2.h[4]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3].
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[4]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.s, p0/m, z7.s
|
||||
bfdot z0.s, z1.h, z2.h[0]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
|
||||
// CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0]
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
|
@ -0,0 +1,52 @@
|
|||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
|
||||
bfdot z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfdot z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfdot z0.S, z1.H, z2.H[3]
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
|
||||
// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Test compatibility with MOVPRFX instruction.
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfdot z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x80,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfdot z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x40,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfdot z0.S, z1.H, z2.H[3]
|
||||
// CHECK-INST: bfdot z0.s, z1.h, z2.h[3]
|
||||
// CHECK-ENCODING: [0x20,0x40,0x7a,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
|
@ -0,0 +1,42 @@
|
|||
// RUN: not llvm-mc -o - -triple=aarch64 -mattr=+sve,bf16 2>&1 %s | FileCheck %s
|
||||
|
||||
bfmlalb z0.S, z1.H, z7.H[8]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
|
||||
// CHECK-NEXT: bfmlalb z0.S, z1.H, z7.H[8]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalb z0.S, z1.H, z8.H[7]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: bfmlalb z0.S, z1.H, z8.H[7]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalt z0.S, z1.H, z7.H[8]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
|
||||
// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.H[8]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalt z0.S, z1.H, z8.H[7]
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
|
||||
// CHECK-NEXT: bfmlalt z0.S, z1.H, z8.H[7]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalt z0.S, z1.H, z7.2h[2]
|
||||
// CHECK: error: invalid vector kind qualifier
|
||||
// CHECK-NEXT: bfmlalt z0.S, z1.H, z7.2h[2]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.s[2]
|
||||
// CHECK: error: Invalid restricted vector register, expected z0.h..z7.h
|
||||
// CHECK-NEXT: bfmlalt z0.S, z1.H, z2.s[2]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
bfmlalt z0.S, z1.s, z2.h[2]
|
||||
// CHECK: error: invalid element width
|
||||
// CHECK-NEXT: bfmlalt z0.S, z1.s, z2.h[2]
|
||||
// CHECK-NEXT: ^
|
||||
|
||||
movprfx z0.s, p0/m, z7.s
|
||||
bfmlalt z0.s, z1.h, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx
|
||||
// CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h
|
||||
// CHECK-NEXT: ^
|
|
@ -0,0 +1,157 @@
|
|||
// RUN: llvm-mc -o - -triple=aarch64 -show-encoding -mattr=+sve,+bf16 %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -o - -triple=aarch64 -show-encoding %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H[7]
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H[7]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z7.H[7]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalb z10.S, z21.H, z14.H
|
||||
// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
|
||||
// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalt z14.S, z10.H, z21.H
|
||||
// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
|
||||
// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
bfmlalb z21.s, z14.h, z3.h[2]
|
||||
// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
|
||||
// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Test compatibility with MOVPRFX instruction.
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x80,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0x84,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x40,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H[0]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[0]
|
||||
// CHECK-ENCODING: [0x20,0x44,0xe2,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalb z0.S, z1.H, z2.H[7]
|
||||
// CHECK-INST: bfmlalb z0.s, z1.h, z2.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x48,0xfa,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z2.H[7]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z2.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x4c,0xfa,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalt z0.S, z1.H, z7.H[7]
|
||||
// CHECK-INST: bfmlalt z0.s, z1.h, z7.h[7]
|
||||
// CHECK-ENCODING: [0x20,0x4c,0xff,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z10, z7
|
||||
// CHECK-INST: movprfx z10, z7
|
||||
// CHECK-ENCODING: [0xea,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalb z10.S, z21.H, z14.H
|
||||
// CHECK-INST: bfmlalb z10.s, z21.h, z14.h
|
||||
// CHECK-ENCODING: [0xaa,0x82,0xee,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z14, z7
|
||||
// CHECK-INST: movprfx z14, z7
|
||||
// CHECK-ENCODING: [0xee,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalt z14.S, z10.H, z21.H
|
||||
// CHECK-INST: bfmlalt z14.s, z10.h, z21.h
|
||||
// CHECK-ENCODING: [0x4e,0x85,0xf5,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
movprfx z21, z7
|
||||
// CHECK-INST: movprfx z21, z7
|
||||
// CHECK-ENCODING: [0xf5,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmlalb z21.s, z14.h, z3.h[2]
|
||||
// CHECK-INST: bfmlalb z21.s, z14.h, z3.h[2]
|
||||
// CHECK-ENCODING: [0xd5,0x41,0xeb,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
|
@ -0,0 +1,22 @@
|
|||
// RUN: not llvm-mc -triple=aarch64 -mattr=+sve,bf16 2>&1 < %s| FileCheck %s
|
||||
|
||||
bfmmla z0.s, z1.s, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfmmla z0.s, z1.s, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfmmla z0.h, z1.h, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfmmla z0.h, z1.h, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
bfmmla z0.s, z1.h, z2.s
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
|
||||
// CHECK-NEXT: bfmmla z0.s, z1.h, z2.s
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
||||
|
||||
movprfx z0.s, p0/m, z7.s
|
||||
bfmmla z0.s, z1.h, z2.h
|
||||
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
|
||||
// CHECK-NEXT: bfmmla z0.s, z1.h, z2.h
|
||||
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
|
|
@ -0,0 +1,22 @@
|
|||
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+bf16 < %s \
|
||||
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
|
||||
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
|
||||
|
||||
bfmmla z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmmla z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
||||
|
||||
// --------------------------------------------------------------------------//
|
||||
// Test compatibility with MOVPRFX instruction.
|
||||
|
||||
movprfx z0, z7
|
||||
// CHECK-INST: movprfx z0, z7
|
||||
// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
|
||||
// CHECK-ERROR: instruction requires: sve
|
||||
|
||||
bfmmla z0.S, z1.H, z2.H
|
||||
// CHECK-INST: bfmmla z0.s, z1.h, z2.h
|
||||
// CHECK-ENCODING: [0x20,0xe4,0x62,0x64]
|
||||
// CHECK-ERROR: instruction requires: bf16 sve
|
|
@ -0,0 +1,115 @@
|
|||
// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+bf16 < %s | FileCheck %s
|
||||
// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a < %s | FileCheck %s
|
||||
// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-bf16 < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
|
||||
|
||||
bfdot v2.2s, v3.4h, v4.4h
|
||||
bfdot v2.4s, v3.8h, v4.8h
|
||||
// CHECK: bfdot v2.2s, v3.4h, v4.4h // encoding: [0x62,0xfc,0x44,0x2e]
|
||||
// CHECK: bfdot v2.4s, v3.8h, v4.8h // encoding: [0x62,0xfc,0x44,0x6e]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.4h
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.8h
|
||||
|
||||
bfdot v2.2s, v3.4h, v4.2h[0]
|
||||
bfdot v2.2s, v3.4h, v4.2h[1]
|
||||
bfdot v2.2s, v3.4h, v4.2h[2]
|
||||
bfdot v2.2s, v3.4h, v4.2h[3]
|
||||
// CHECK: bfdot v2.2s, v3.4h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x0f]
|
||||
// CHECK: bfdot v2.2s, v3.4h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x0f]
|
||||
// CHECK: bfdot v2.2s, v3.4h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x0f]
|
||||
// CHECK: bfdot v2.2s, v3.4h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x0f]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[0]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[1]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[2]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.2s, v3.4h, v4.2h[3]
|
||||
|
||||
|
||||
bfdot v2.4s, v3.8h, v4.2h[0]
|
||||
bfdot v2.4s, v3.8h, v4.2h[1]
|
||||
bfdot v2.4s, v3.8h, v4.2h[2]
|
||||
bfdot v2.4s, v3.8h, v4.2h[3]
|
||||
// CHECK: bfdot v2.4s, v3.8h, v4.2h[0] // encoding: [0x62,0xf0,0x44,0x4f]
|
||||
// CHECK: bfdot v2.4s, v3.8h, v4.2h[1] // encoding: [0x62,0xf0,0x64,0x4f]
|
||||
// CHECK: bfdot v2.4s, v3.8h, v4.2h[2] // encoding: [0x62,0xf8,0x44,0x4f]
|
||||
// CHECK: bfdot v2.4s, v3.8h, v4.2h[3] // encoding: [0x62,0xf8,0x64,0x4f]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[0]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[1]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[2]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfdot v2.4s, v3.8h, v4.2h[3]
|
||||
|
||||
|
||||
bfmmla v2.4s, v3.8h, v4.8h
|
||||
bfmmla v3.4s, v4.8h, v5.8h
|
||||
// CHECK: bfmmla v2.4s, v3.8h, v4.8h // encoding: [0x62,0xec,0x44,0x6e]
|
||||
// CHECK: bfmmla v3.4s, v4.8h, v5.8h // encoding: [0x83,0xec,0x45,0x6e]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmmla v2.4s, v3.8h, v4.8h
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmmla v3.4s, v4.8h, v5.8h
|
||||
|
||||
bfcvtn v5.4h, v5.4s
|
||||
bfcvtn2 v5.8h, v5.4s
|
||||
// CHECK: bfcvtn v5.4h, v5.4s // encoding: [0xa5,0x68,0xa1,0x0e]
|
||||
// CHECK: bfcvtn2 v5.8h, v5.4s // encoding: [0xa5,0x68,0xa1,0x4e]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfcvtn v5.4h, v5.4s
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfcvtn2 v5.8h, v5.4s
|
||||
|
||||
bfcvt h5, s3
|
||||
// CHECK: bfcvt h5, s3 // encoding: [0x65,0x40,0x63,0x1e]
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfcvt h5, s3
|
||||
|
||||
bfmlalb V10.4S, V21.8h, V14.8H
|
||||
bfmlalt V21.4S, V14.8h, V10.8H
|
||||
// CHECK: bfmlalb v10.4s, v21.8h, v14.8h // encoding: [0xaa,0xfe,0xce,0x2e]
|
||||
// CHECK-NEXT: bfmlalt v21.4s, v14.8h, v10.8h // encoding: [0xd5,0xfd,0xca,0x6e]
|
||||
// NOBF16: error: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalb V10.4S, V21.8h, V14.8H
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalt V21.4S, V14.8h, V10.8H
|
||||
// NOBF16-NEXT: ^
|
||||
|
||||
bfmlalb V14.4S, V21.8H, V10.H[1]
|
||||
bfmlalb V14.4S, V21.8H, V10.H[2]
|
||||
bfmlalb V14.4S, V21.8H, V10.H[7]
|
||||
bfmlalt V21.4S, V10.8H, V14.H[1]
|
||||
bfmlalt V21.4S, V10.8H, V14.H[2]
|
||||
bfmlalt V21.4S, V10.8H, V14.H[7]
|
||||
// CHECK: bfmlalb v14.4s, v21.8h, v10.h[1] // encoding: [0xae,0xf2,0xda,0x0f]
|
||||
// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[2] // encoding: [0xae,0xf2,0xea,0x0f]
|
||||
// CHECK-NEXT: bfmlalb v14.4s, v21.8h, v10.h[7] // encoding: [0xae,0xfa,0xfa,0x0f]
|
||||
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[1] // encoding: [0x55,0xf1,0xde,0x4f]
|
||||
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[2] // encoding: [0x55,0xf1,0xee,0x4f]
|
||||
// CHECK-NEXT: bfmlalt v21.4s, v10.8h, v14.h[7] // encoding: [0x55,0xf9,0xfe,0x4f]
|
||||
// NOBF16: error: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[1]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: error: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[2]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: error: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalb V14.4S, V21.8H, V10.H[7]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[1]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[2]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16: instruction requires: bf16
|
||||
// NOBF16-NEXT: bfmlalt V21.4S, V10.8H, V14.H[7]
|
||||
// NOBF16-NEXT: ^
|
|
@ -0,0 +1,57 @@
|
|||
// RUN: not llvm-mc -triple arm -mattr=+bf16,-neon %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
|
||||
// RUN: not llvm-mc -triple arm -mattr=-bf16 %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NOBF16,ALL
|
||||
// RUN: not llvm-mc -triple arm %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=NONEON,ALL
|
||||
//
|
||||
vdot.bf16 d3, d4, d5
|
||||
vdot.bf16 q0, q1, q2
|
||||
vdot.bf16 d3, d4, d5[1]
|
||||
vdot.bf16 q0, q1, d5[1]
|
||||
vmmla.bf16 q0, q1, q2
|
||||
vcvt.bf16.f32 d1, q3
|
||||
vcvtbeq.bf16.f32 s1, s3
|
||||
vcvttne.bf16.f32 s1, s3
|
||||
// NOBF16: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vdot.bf16 d3, d4, d5
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vdot.bf16 q0, q1, q2
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vdot.bf16 d3, d4, d5[1]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vdot.bf16 q0, q1, d5[1]
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vmmla.bf16 q0, q1, q2
|
||||
// NOBF16-NEXT: ^
|
||||
// NOBF16-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// NOBF16-NEXT: vcvt.bf16.f32 d1, q3
|
||||
// NOBF16-NEXT: ^
|
||||
|
||||
// NONEON: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vdot.bf16 d3, d4, d5
|
||||
// NONEON-NEXT: ^
|
||||
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vdot.bf16 q0, q1, q2
|
||||
// NONEON-NEXT: ^
|
||||
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vdot.bf16 d3, d4, d5[1]
|
||||
// NONEON-NEXT: ^
|
||||
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vdot.bf16 q0, q1, d5[1]
|
||||
// NONEON-NEXT: ^
|
||||
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vmmla.bf16 q0, q1, q2
|
||||
// NONEON-NEXT: ^
|
||||
// NONEON-NEXT: error: instruction requires: BFloat16 floating point extension NEON
|
||||
// NONEON-NEXT: vcvt.bf16.f32 d1, q3
|
||||
// NONEON-NEXT: ^
|
||||
|
||||
|
||||
// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// ALL-NEXT: vcvtbeq.bf16.f32 s1, s3
|
||||
// ALL-NEXT: ^
|
||||
// ALL-NEXT: error: instruction requires: BFloat16 floating point extension
|
||||
// ALL-NEXT: vcvttne.bf16.f32 s1, s3
|
||||
// ALL-NEXT: ^
|
|
@ -0,0 +1,134 @@
|
|||
// RUN: not llvm-mc -o - -triple arm -mattr=+v8.6a -show-encoding %s 2>&1 | FileCheck %s
|
||||
vfmat.bf16 d0, d0, d0
|
||||
vfmat.bf16 d0, d0, q0
|
||||
vfmat.bf16 d0, q0, d0
|
||||
vfmat.bf16 q0, d0, d0
|
||||
vfmat.bf16 q0, q0, d0
|
||||
vfmat.bf16 q0, d0, q0
|
||||
vfmat.bf16 d0, q0, q0
|
||||
vfmat.bf16 q0, q0, q0[3]
|
||||
vfmat.bf16 q0, q0, q0[3]
|
||||
vfmat.bf16 q0, d0, d0[0]
|
||||
vfmat.bf16 d0, q0, d0[0]
|
||||
vfmat.bf16 q0, d0, d0[9]
|
||||
|
||||
vfmab.bf16 d0, d0, d0
|
||||
vfmab.bf16 d0, d0, q0
|
||||
vfmab.bf16 d0, q0, d0
|
||||
vfmab.bf16 q0, d0, d0
|
||||
vfmab.bf16 q0, q0, d0
|
||||
vfmab.bf16 q0, d0, q0
|
||||
vfmab.bf16 d0, q0, q0
|
||||
vfmab.bf16 q0, q0, q0[3]
|
||||
vfmab.bf16 q0, q0, q0[3]
|
||||
vfmab.bf16 q0, d0, d0[0]
|
||||
vfmab.bf16 d0, q0, d0[0]
|
||||
vfmab.bf16 q0, d0, d0[9]
|
||||
|
||||
//CHECK:error: invalid instruction
|
||||
//CHECK-NEXT:vfmat.bf16 d0, d0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmat.bf16 d0, d0, q0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmat.bf16 d0, q0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmat.bf16 q0, d0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: too few operands for instruction
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, d0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmat.bf16 q0, d0, q0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmat.bf16 d0, q0, q0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: too many operands for instruction
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: too many operands for instruction
|
||||
//CHECK-NEXT:vfmat.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmat.bf16 q0, d0, d0[0]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmat.bf16 d0, q0, d0[0]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmat.bf16 q0, d0, d0[9]
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmab.bf16 d0, d0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmab.bf16 d0, d0, q0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmab.bf16 d0, q0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmab.bf16 q0, d0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: too few operands for instruction
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, d0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmab.bf16 q0, d0, q0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmab.bf16 d0, q0, q0
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: too many operands for instruction
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction, any one of the following would fix this:
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT:^
|
||||
//CHECK-NEXT:note: operand must be a register in range [d0, d7]
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:note: too many operands for instruction
|
||||
//CHECK-NEXT:vfmab.bf16 q0, q0, q0[3]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmab.bf16 q0, d0, d0[0]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: operand must be a register in range [q0, q15]
|
||||
//CHECK-NEXT:vfmab.bf16 d0, q0, d0[0]
|
||||
//CHECK-NEXT: ^
|
||||
//CHECK-NEXT:error: invalid instruction
|
||||
//CHECK-NEXT:vfmab.bf16 q0, d0, d0[9]
|
|
@ -0,0 +1,55 @@
|
|||
// RUN: llvm-mc -triple arm -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
|
||||
// RUN: llvm-mc -triple arm -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
vdot.bf16 d3, d4, d5
|
||||
// CHECK: vdot.bf16 d3, d4, d5 @ encoding: [0x05,0x3d,0x04,0xfc]
|
||||
vdot.bf16 q0, q1, q2
|
||||
// CHECK-NEXT: vdot.bf16 q0, q1, q2 @ encoding: [0x44,0x0d,0x02,0xfc]
|
||||
vdot.bf16 d3, d4, d5[1]
|
||||
// CHECK-NEXT: vdot.bf16 d3, d4, d5[1] @ encoding: [0x25,0x3d,0x04,0xfe]
|
||||
vdot.bf16 q0, q1, d5[1]
|
||||
// CHECK-NEXT: vdot.bf16 q0, q1, d5[1] @ encoding: [0x65,0x0d,0x02,0xfe]
|
||||
vmmla.bf16 q0, q1, q2
|
||||
// CHECK-NEXT: vmmla.bf16 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xfc]
|
||||
vcvt.bf16.f32 d1, q3
|
||||
// CHECK-NEXT: vcvt.bf16.f32 d1, q3 @ encoding: [0x46,0x16,0xb6,0xf3]
|
||||
vcvtbeq.bf16.f32 s1, s3
|
||||
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0x61,0x09,0xf3,0x0e]
|
||||
vcvttne.bf16.f32 s1, s3
|
||||
// CHECK-NEXT: vcvttne.bf16.f32 s1, s3 @ encoding: [0xe1,0x09,0xf3,0x1e]
|
||||
vfmat.bf16 q0, q0, q0
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q0, q0 @ encoding: [0x50,0x08,0x30,0xfc]
|
||||
vfmat.bf16 q0, q0, q15
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q0, q15 @ encoding: [0x7e,0x08,0x30,0xfc]
|
||||
vfmat.bf16 q0, q15, q0
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q15, q0 @ encoding: [0xd0,0x08,0x3e,0xfc]
|
||||
vfmat.bf16 q0, q15, q15
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q15, q15 @ encoding: [0xfe,0x08,0x3e,0xfc]
|
||||
vfmat.bf16 q7, q0, q0
|
||||
//CHECK-NEXT: vfmat.bf16 q7, q0, q0 @ encoding: [0x50,0xe8,0x30,0xfc]
|
||||
vfmat.bf16 q8, q0, q0
|
||||
//CHECK-NEXT: vfmat.bf16 q8, q0, q0 @ encoding: [0x50,0x08,0x70,0xfc]
|
||||
vfmab.bf16 q0, q0, q0
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q0, q0 @ encoding: [0x10,0x08,0x30,0xfc]
|
||||
vfmab.bf16 q0, q0, q15
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q0, q15 @ encoding: [0x3e,0x08,0x30,0xfc]
|
||||
vfmab.bf16 q0, q15, q0
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q15, q0 @ encoding: [0x90,0x08,0x3e,0xfc]
|
||||
vfmab.bf16 q0, q15, q15
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q15, q15 @ encoding: [0xbe,0x08,0x3e,0xfc]
|
||||
vfmab.bf16 q7, q0, q0
|
||||
//CHECK-NEXT: vfmab.bf16 q7, q0, q0 @ encoding: [0x10,0xe8,0x30,0xfc]
|
||||
vfmab.bf16 q8, q0, q0
|
||||
//CHECK-NEXT: vfmab.bf16 q8, q0, q0 @ encoding: [0x10,0x08,0x70,0xfc]
|
||||
vfmat.bf16 q0, q0, d0[0]
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q0, d0[0] @ encoding: [0x50,0x08,0x30,0xfe]
|
||||
vfmat.bf16 q0, q0, d0[3]
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q0, d0[3] @ encoding: [0x78,0x08,0x30,0xfe]
|
||||
vfmat.bf16 q0, q0, d7[0]
|
||||
//CHECK-NEXT: vfmat.bf16 q0, q0, d7[0] @ encoding: [0x57,0x08,0x30,0xfe]
|
||||
vfmab.bf16 q0, q0, d0[0]
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q0, d0[0] @ encoding: [0x10,0x08,0x30,0xfe]
|
||||
vfmab.bf16 q0, q0, d0[3]
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q0, d0[3] @ encoding: [0x38,0x08,0x30,0xfe]
|
||||
vfmab.bf16 q0, q0, d7[0]
|
||||
//CHECK-NEXT: vfmab.bf16 q0, q0, d7[0] @ encoding: [0x17,0x08,0x30,0xfe]
|
|
@ -0,0 +1,32 @@
|
|||
// RUN: not llvm-mc -triple thumbv8 -mattr=-bf16 < %s 2>&1 | FileCheck %s
|
||||
|
||||
vdot.bf16 d3, d4, d5
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vdot.bf16 d3, d4, d5
|
||||
|
||||
vdot.bf16 q0, q1, q2
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vdot.bf16 q0, q1, q2
|
||||
|
||||
vdot.bf16 d3, d4, d5[1]
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vdot.bf16 d3, d4, d5[1]
|
||||
|
||||
vdot.bf16 q0, q1, d5[1]
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
|
||||
|
||||
vmmla.bf16 q0, q1, q2
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vmmla.bf16 q0, q1, q2
|
||||
|
||||
vcvt.bf16.f32 d1, q3
|
||||
// CHECK: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vcvt.bf16.f32 d1, q3
|
||||
|
||||
vcvtbeq.bf16.f32 s1, s3
|
||||
// CHECK: note: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
|
||||
vcvttne.bf16.f32 s1, s3
|
||||
// CHECK: note: instruction requires: BFloat16 floating point extension
|
||||
// CHECK-NEXT: vcvttne.bf16.f32 s1, s3
|
|
@ -0,0 +1,15 @@
|
|||
// RUN: llvm-mc -triple thumbv8 -mattr=+bf16,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
|
||||
// RUN: llvm-mc -triple thumbv8 -mattr=+v8.6a -show-encoding < %s | FileCheck %s --check-prefix=CHECK
|
||||
|
||||
vcvt.bf16.f32 d1, q3
|
||||
// CHECK: vcvt.bf16.f32 d1, q3 @ encoding: [0xb6,0xff,0x46,0x16]
|
||||
|
||||
it eq
|
||||
vcvtbeq.bf16.f32 s1, s3
|
||||
// CHECK: it eq @ encoding: [0x08,0xbf]
|
||||
// CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0x61,0x09]
|
||||
|
||||
it ne
|
||||
vcvttne.bf16.f32 s1, s3
|
||||
// CHECK: it ne @ encoding: [0x18,0xbf]
|
||||
// CHECK: vcvttne.bf16.f32 s1, s3 @ encoding: [0xf3,0xee,0xe1,0x09]
|
|
@ -0,0 +1,74 @@
|
|||
# RUN: llvm-mc -triple=aarch64 -mattr=+bf16 -disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple=aarch64 -mattr=+v8.6a -disassemble < %s | FileCheck %s
|
||||
# RUN: not llvm-mc -triple=aarch64 -mattr=-bf16 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
# RUN: not llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
|
||||
|
||||
[0x62,0xfc,0x44,0x2e]
|
||||
[0x62,0xfc,0x44,0x6e]
|
||||
# CHECK: bfdot v2.2s, v3.4h, v4.4h
|
||||
# CHECK: bfdot v2.4s, v3.8h, v4.8h
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0x62,0xfc,0x44,0x2e]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0x62,0xfc,0x44,0x6e]
|
||||
|
||||
[0x62,0xf0,0x44,0x4f]
|
||||
[0x62,0xf0,0x64,0x4f]
|
||||
[0x62,0xf8,0x44,0x4f]
|
||||
[0x62,0xf8,0x64,0x4f]
|
||||
# CHECK: bfdot v2.4s, v3.8h, v4.2h[0]
|
||||
# CHECK: bfdot v2.4s, v3.8h, v4.2h[1]
|
||||
# CHECK: bfdot v2.4s, v3.8h, v4.2h[2]
|
||||
# CHECK: bfdot v2.4s, v3.8h, v4.2h[3]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF-NEXT: [0x62,0xf0,0x44,0x4f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf0,0x64,0x4f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf8,0x44,0x4f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf8,0x64,0x4f]
|
||||
|
||||
|
||||
[0x62,0xf0,0x44,0x0f]
|
||||
[0x62,0xf0,0x64,0x0f]
|
||||
[0x62,0xf8,0x44,0x0f]
|
||||
[0x62,0xf8,0x64,0x0f]
|
||||
# CHECK: bfdot v2.2s, v3.4h, v4.2h[0]
|
||||
# CHECK: bfdot v2.2s, v3.4h, v4.2h[1]
|
||||
# CHECK: bfdot v2.2s, v3.4h, v4.2h[2]
|
||||
# CHECK: bfdot v2.2s, v3.4h, v4.2h[3]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF-NEXT: [0x62,0xf0,0x44,0x0f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf0,0x64,0x0f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf8,0x44,0x0f]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF6-NEXT: [0x62,0xf8,0x64,0x0f]
|
||||
|
||||
|
||||
[0x62,0xec,0x44,0x6e]
|
||||
[0x83,0xec,0x45,0x6e]
|
||||
# CHECK: bfmmla v2.4s, v3.8h, v4.8h
|
||||
# CHECK: bfmmla v3.4s, v4.8h, v5.8h
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
NOBF16-NEXT: [0x62,0xec,0x44,0x6e]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0x83,0xec,0x45,0x6e]
|
||||
|
||||
|
||||
[0xa5,0x68,0xa1,0x0e]
|
||||
[0xa5,0x68,0xa1,0x4e]
|
||||
# CHECK: bfcvtn v5.4h, v5.4s
|
||||
# CHECK: bfcvtn2 v5.8h, v5.4s
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0xa5,0x68,0xa1,0x0e]
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0xa5,0x68,0xa1,0x4e]
|
||||
|
||||
[0x65, 0x40, 0x63, 0x1e]
|
||||
# CHECK: bfcvt h5, s3
|
||||
# NOBF16: warning: invalid instruction encoding
|
||||
# NOBF16-NEXT: [0x65, 0x40, 0x63, 0x1e]
|
|
@ -0,0 +1,102 @@
|
|||
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
# RUN: llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOBF16
|
||||
#
|
||||
# Tests BFloat16 instruction decodings.
|
||||
# Without BFloat16 enabled, some of these get disassembled to coprocessor instructions.
|
||||
[0x25,0x3d,0x04,0xfe]
|
||||
# CHECK: vdot.bf16 d3, d4, d5[1]
|
||||
# NOBF16: cdp2 p13, #0, c3, c4, c5, #1
|
||||
#
|
||||
[0x65,0x0d,0x02,0xfe]
|
||||
# CHECK-NEXT: vdot.bf16 q0, q1, d5[1]
|
||||
# NOBF16-NEXT: cdp2 p13, #0, c0, c2, c5, #3
|
||||
#
|
||||
[0x61,0x09,0xf3,0x0e]
|
||||
# CHECK-NEXT: vcvtbeq.bf16.f32 s1, s3
|
||||
# NOBF16-NEXT: cdpeq p9, #15, c0, c3, c1, #3
|
||||
#
|
||||
[0xe1,0x09,0xf3,0x1e]
|
||||
# CHECK-NEXT: vcvttne.bf16.f32 s1, s3
|
||||
# NOBF16-NEXT: cdpne p9, #15, c0, c3, c1, #7
|
||||
#
|
||||
[0x50,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q0, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-320
|
||||
#
|
||||
[0x7e,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q0, q15
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-504
|
||||
#
|
||||
[0xd0,0x08,0x3e,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q15, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-832
|
||||
#
|
||||
[0xfe,0x08,0x3e,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q15, q15
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-1016
|
||||
#
|
||||
[0xd0,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q8, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-832
|
||||
#
|
||||
[0x50,0xe8,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q7, q0, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c14, [r0], #-320
|
||||
#
|
||||
[0x50,0x08,0x70,0xfc]
|
||||
# CHECK-NEXT: vfmat.bf16 q8, q0, q0
|
||||
# NOBF16-NEXT: ldc2l p8, c0, [r0], #-320
|
||||
#
|
||||
[0x10,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q0, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-64
|
||||
#
|
||||
[0x3e,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q0, q15
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-248
|
||||
#
|
||||
[0x90,0x08,0x3e,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q15, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-576
|
||||
#
|
||||
[0xbe,0x08,0x3e,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q15, q15
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [lr], #-760
|
||||
#
|
||||
[0x90,0x08,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q8, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c0, [r0], #-576
|
||||
#
|
||||
[0x10,0xe8,0x30,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q7, q0, q0
|
||||
# NOBF16-NEXT: ldc2 p8, c14, [r0], #-64
|
||||
#
|
||||
[0x10,0x08,0x70,0xfc]
|
||||
# CHECK-NEXT: vfmab.bf16 q8, q0, q0
|
||||
# NOBF16-NEXT: ldc2l p8, c0, [r0], #-64
|
||||
#
|
||||
[0x50,0x08,0x30,0xfe]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q0, d0[0]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #2
|
||||
#
|
||||
[0x78,0x08,0x30,0xfe]
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q0, d0[3]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #3
|
||||
[0x57,0x08,0x30,0xfe]
|
||||
#
|
||||
# CHECK-NEXT: vfmat.bf16 q0, q0, d7[0]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #2
|
||||
[0x10,0x08,0x30,0xfe]
|
||||
#
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q0, d0[0]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c0, #0
|
||||
[0x38,0x08,0x30,0xfe]
|
||||
#
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q0, d0[3]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c8, #1
|
||||
#
|
||||
[0x17,0x08,0x30,0xfe]
|
||||
# CHECK-NEXT: vfmab.bf16 q0, q0, d7[0]
|
||||
# NOBF16-NEXT: mrc2 p8, #1, r0, c0, c7, #0
|
|
@ -0,0 +1,20 @@
|
|||
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple arm-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
|
||||
# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
|
||||
# RUN: not llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOBF16
|
||||
|
||||
[0x05,0x3d,0x04,0xfc]
|
||||
# CHECK: vdot.bf16 d3, d4, d5
|
||||
# CHECK-NOBF16: warning: invalid instruction encoding
|
||||
|
||||
[0x44,0x0d,0x02,0xfc]
|
||||
# CHECK: vdot.bf16 q0, q1, q2
|
||||
# CHECK-NOBF16: warning: invalid instruction encoding
|
||||
|
||||
[0x44,0x0c,0x02,0xfc]
|
||||
# CHECK: vmmla.bf16 q0, q1, q2
|
||||
# CHECK-NOBF16: warning: invalid instruction encoding
|
||||
|
||||
[0x46,0x16,0xb6,0xf3]
|
||||
# CHECK: vcvt.bf16.f32 d1, q3
|
||||
# CHECK-ERROR: warning: invalid instruction encoding
|
|
@ -0,0 +1,25 @@
|
|||
# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+bf16,+neon --disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple thumbv8-none-linux-gnu -mattr=+v8.6a --disassemble < %s | FileCheck %s
|
||||
|
||||
[0x04,0xfc,0x05,0x3d]
|
||||
[0x02,0xfc,0x44,0x0d]
|
||||
# CHECK: vdot.bf16 d3, d4, d5
|
||||
# CHECK: vdot.bf16 q0, q1, q2
|
||||
|
||||
[0x04,0xfe,0x25,0x3d]
|
||||
# CHECK: vdot.bf16 d3, d4, d5[1]
|
||||
|
||||
[0x02,0xfe,0x65,0x0d]
|
||||
# CHECK: vdot.bf16 q0, q1, d5[1]
|
||||
|
||||
[0x02,0xfc,0x44,0x0c]
|
||||
# CHECK: vmmla.bf16 q0, q1, q2
|
||||
|
||||
[0xb6,0xff,0x46,0x16]
|
||||
# CHECK: vcvt.bf16.f32 d1, q3
|
||||
|
||||
[0xf3,0xee,0x61,0x09]
|
||||
# CHECK: vcvtb.bf16.f32 s1, s3
|
||||
|
||||
[0xf3,0xee,0xe1,0x09]
|
||||
# CHECK: vcvtt.bf16.f32 s1, s3
|
|
@ -0,0 +1,40 @@
|
|||
# RUN: not llvm-mc -triple thumbv8-none-linux-gnu -mattr=-bf16 --disassemble < %s 2>&1 | FileCheck %s
|
||||
# RUN: not llvm-mc -triple thumbv8-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s
|
||||
|
||||
[0x04,0xfc,0x05,0x3d]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0x04,0xfc,0x05,0x3d]
|
||||
|
||||
[0x02,0xfc,0x44,0x0d]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0x02,0xfc,0x44,0x0d]
|
||||
|
||||
|
||||
[0x04,0xfe,0x25,0x3d]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0x04,0xfe,0x25,0x3d]
|
||||
|
||||
|
||||
[0x02,0xfe,0x65,0x0d]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0x02,0xfe,0x65,0x0d]
|
||||
|
||||
|
||||
[0x02,0xfc,0x44,0x0c]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0x02,0xfc,0x44,0x0c]
|
||||
|
||||
|
||||
[0xb6,0xff,0x46,0x16]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0xb6,0xff,0x46,0x16]
|
||||
|
||||
|
||||
[0xf3,0xee,0x61,0x09]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0xf3,0xee,0x61,0x09]
|
||||
|
||||
|
||||
[0xf3,0xee,0xe1,0x09]
|
||||
# CHECK: warning: invalid instruction encoding
|
||||
# CHECK-NEXT: [0xf3,0xee,0xe1,0x09]
|
|
@ -26,9 +26,9 @@ const char *ARMArch[] = {
|
|||
"armv7e-m", "armv7em", "armv8-a", "armv8", "armv8a",
|
||||
"armv8l", "armv8.1-a", "armv8.1a", "armv8.2-a", "armv8.2a",
|
||||
"armv8.3-a", "armv8.3a", "armv8.4-a", "armv8.4a", "armv8.5-a",
|
||||
"armv8.5a", "armv8-r", "armv8r", "armv8-m.base", "armv8m.base",
|
||||
"armv8-m.main", "armv8m.main", "iwmmxt", "iwmmxt2", "xscale",
|
||||
"armv8.1-m.main",
|
||||
"armv8.5a", "armv8.6-a", "armv8.6a", "armv8-r", "armv8r",
|
||||
"armv8-m.base", "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt",
|
||||
"iwmmxt2", "xscale", "armv8.1-m.main",
|
||||
};
|
||||
|
||||
bool testARMCPU(StringRef CPUName, StringRef ExpectedArch,
|
||||
|
@ -410,6 +410,9 @@ TEST(TargetParserTest, testARMArch) {
|
|||
EXPECT_TRUE(
|
||||
testARMArch("armv8.5-a", "generic", "v8.5a",
|
||||
ARMBuildAttrs::CPUArch::v8_A));
|
||||
EXPECT_TRUE(
|
||||
testARMArch("armv8.6-a", "generic", "v8.6a",
|
||||
ARMBuildAttrs::CPUArch::v8_A));
|
||||
EXPECT_TRUE(
|
||||
testARMArch("armv8-r", "cortex-r52", "v8r",
|
||||
ARMBuildAttrs::CPUArch::v8_R));
|
||||
|
@ -678,7 +681,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
|
|||
"v7", "v7a", "v7ve", "v7hl", "v7l", "v7-r", "v7r", "v7-m",
|
||||
"v7m", "v7k", "v7s", "v7e-m", "v7em", "v8-a", "v8", "v8a",
|
||||
"v8l", "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a",
|
||||
"v8.4a", "v8.5-a","v8.5a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
|
||||
"v8.4a", "v8.5-a","v8.5a", "v8.6-a", "v8.6a", "v8-r", "v8m.base", "v8m.main", "v8.1m.main"
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < array_lengthof(Arch); i++) {
|
||||
|
@ -743,6 +746,7 @@ TEST(TargetParserTest, ARMparseArchProfile) {
|
|||
case ARM::ArchKind::ARMV8_3A:
|
||||
case ARM::ArchKind::ARMV8_4A:
|
||||
case ARM::ArchKind::ARMV8_5A:
|
||||
case ARM::ArchKind::ARMV8_6A:
|
||||
EXPECT_EQ(ARM::ProfileKind::A, ARM::parseArchProfile(ARMArch[i]));
|
||||
break;
|
||||
default:
|
||||
|
@ -1008,6 +1012,8 @@ TEST(TargetParserTest, testAArch64Arch) {
|
|||
ARMBuildAttrs::CPUArch::v8_A));
|
||||
EXPECT_TRUE(testAArch64Arch("armv8.5-a", "generic", "v8.5a",
|
||||
ARMBuildAttrs::CPUArch::v8_A));
|
||||
EXPECT_TRUE(testAArch64Arch("armv8.6-a", "generic", "v8.6a",
|
||||
ARMBuildAttrs::CPUArch::v8_A));
|
||||
}
|
||||
|
||||
bool testAArch64Extension(StringRef CPUName, AArch64::ArchKind AK,
|
||||
|
|
Loading…
Reference in New Issue