[ARM/AArch64] Support FP16 +fp16fml instructions

Add +fp16fml feature for new FP16 instructions, which are a mandatory part of FP16 from v8.4-A and an optional part of FP16 from v8.2-A. It doesn't seem to be possible to model this in LLVM, but the relationship between the options is handled by the related clang patch. In keeping with what I think is the usual practice, the fp16fml extension is accepted regardless of base architecture version. Builds on/replaces Sjoerd Meijer's patch to add these instructions at https://reviews.llvm.org/D49839. Differential Revision: https://reviews.llvm.org/D50228 llvm-svn: 340013
2018-08-17 11:29:49 +00:00 · 2018-08-17 11:29:49 +00:00 · b828bb2a15
parent 6cb07d2bed
commit b828bb2a15
24 changed files with 821 additions and 5 deletions
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@ -60,6 +60,7 @@ AARCH64_ARCH_EXT_NAME("dotprod",  AArch64::AEK_DOTPROD,  "+dotprod","-dotprod")
 AARCH64_ARCH_EXT_NAME("fp",       AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
 AARCH64_ARCH_EXT_NAME("simd",     AArch64::AEK_SIMD,     "+neon",  "-neon")
 AARCH64_ARCH_EXT_NAME("fp16",     AArch64::AEK_FP16,     "+fullfp16",  "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml",  AArch64::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 AARCH64_ARCH_EXT_NAME("profile",  AArch64::AEK_PROFILE,  "+spe",  "-spe")
 AARCH64_ARCH_EXT_NAME("ras",      AArch64::AEK_RAS,      "+ras",  "-ras")
 AARCH64_ARCH_EXT_NAME("sve",      AArch64::AEK_SVE,      "+sve",  "-sve")
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@ -152,6 +152,7 @@ ARM_ARCH_EXT_NAME("iwmmxt",   ARM::AEK_IWMMXT,   nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("iwmmxt2",  ARM::AEK_IWMMXT2,  nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("xscale",   ARM::AEK_XSCALE,   nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 #undef ARM_ARCH_EXT_NAME

 #ifndef ARM_HW_DIV_NAME
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@ -88,6 +88,7 @@ enum ArchExtKind : unsigned {
  AEK_DOTPROD =     1 << 14,
  AEK_SHA2    =     1 << 15,
  AEK_AES     =     1 << 16,
+  AEK_FP16FML =     1 << 17,
  // Unsupported extensions.
  AEK_OS = 0x8000000,
  AEK_IWMMXT = 0x10000000,
@ -178,6 +179,7 @@ enum ArchExtKind : unsigned {
  AEK_SHA3 =        1 << 14,
  AEK_SHA2 =        1 << 15,
  AEK_AES =         1 << 16,
+  AEK_FP16FML =     1 << 17,
 };

 StringRef getCanonicalArchName(StringRef Arch);
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@ -234,6 +234,11 @@ bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
  else
    Features.push_back("-dsp");

+  if (Extensions & ARM::AEK_FP16FML)
+    Features.push_back("+fp16fml");
+  else
+    Features.push_back("-fp16fml");
+
  if (Extensions & ARM::AEK_RAS)
    Features.push_back("+ras");
  else
@ -460,6 +465,8 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
    Features.push_back("+crypto");
  if (Extensions & AArch64::AEK_DOTPROD)
    Features.push_back("+dotprod");
+  if (Extensions & AArch64::AEK_FP16FML)
+    Features.push_back("+fp16fml");
  if (Extensions & AArch64::AEK_FP16)
    Features.push_back("+fullfp16");
  if (Extensions & AArch64::AEK_PROFILE)
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@ -71,6 +71,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
 def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
  "Full FP16", [FeatureFPARMv8]>;

+def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+  "Enable FP16 FML instructions", [FeatureFullFP16]>;
+
 def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
  "Enable Statistical Profiling extension">;

--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@ -4790,6 +4790,14 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
  let Inst{4-0}   = Rd;
 }

+let Predicates = [HasNEON, HasFP16FML] in
+class BaseSIMDThreeSameMult<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1,
+                                 string kind2> :
+        BaseSIMDThreeSameVector<Q, U, size, 0b11101, V128, asm, kind1, [] > {
+  let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+  let Inst{13} = b13;
+}
+
 class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1,
                                 string kind2, RegisterOperand RegType,
                                 ValueType AccumType, ValueType InputType,
@ -7255,6 +7263,20 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind,
  let Inst{11}    = idx{1};  // H
 }

+let Predicates = [HasNEON, HasFP16FML] in
+class BaseSIMDThreeSameMultIndex<bit Q, bit U, bits<4> opc, string asm,
+                                 string dst_kind, string lhs_kind,
+                                 string rhs_kind> :
+        BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, V128, V128, V128,
+                            VectorIndexH, asm, "", dst_kind, lhs_kind,
+                            rhs_kind, []> {
+  //idx = H:L:M
+  bits<3> idx;
+  let Inst{11} = idx{2}; // H
+  let Inst{21} = idx{1}; // L
+  let Inst{20} = idx{0}; // M
+}
+
 multiclass SIMDThreeSameVectorDotIndex<bit U, string asm,
                                       SDPatternOperator OpNode> {
  def v8i8  : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64,
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@ -49,6 +49,8 @@ def HasRDM           : Predicate<"Subtarget->hasRDM()">,
 def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
                                 AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML", "fp16fml">;
 def HasSPE           : Predicate<"Subtarget->hasSPE()">,
                                 AssemblerPredicate<"FeatureSPE", "spe">;
 def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
@ -3299,6 +3301,24 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
 defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
                                                    int_aarch64_neon_sqsub>;

+// FP16FML
+def FMLAL_2S   : BaseSIMDThreeSameMult<0, 0, 1, 0b001, "fmlal", ".2s", ".2h">;
+def FMLSL_2S   : BaseSIMDThreeSameMult<0, 0, 1, 0b101, "fmlsl", ".2s", ".2h">;
+def FMLAL_4S   : BaseSIMDThreeSameMult<1, 0, 1, 0b001, "fmlal", ".4s", ".4h">;
+def FMLSL_4S   : BaseSIMDThreeSameMult<1, 0, 1, 0b101, "fmlsl", ".4s", ".4h">;
+def FMLAL2_2S  : BaseSIMDThreeSameMult<0, 1, 0, 0b001, "fmlal2", ".2s", ".2h">;
+def FMLSL2_2S  : BaseSIMDThreeSameMult<0, 1, 0, 0b101, "fmlsl2", ".2s", ".2h">;
+def FMLAL2_4S  : BaseSIMDThreeSameMult<1, 1, 0, 0b001, "fmlal2", ".4s", ".4h">;
+def FMLSL2_4S  : BaseSIMDThreeSameMult<1, 1, 0, 0b101, "fmlsl2", ".4s", ".4h">;
+def FMLALI_2s  : BaseSIMDThreeSameMultIndex<0, 0, 0b0000, "fmlal", ".2s", ".2h", ".h">;
+def FMLSLI_2s  : BaseSIMDThreeSameMultIndex<0, 0, 0b0100, "fmlsl", ".2s", ".2h", ".h">;
+def FMLALI_4s  : BaseSIMDThreeSameMultIndex<1, 0, 0b0000, "fmlal", ".4s", ".4h", ".h">;
+def FMLSLI_4s  : BaseSIMDThreeSameMultIndex<1, 0, 0b0100, "fmlsl", ".4s", ".4h", ".h">;
+def FMLALI2_2s : BaseSIMDThreeSameMultIndex<0, 1, 0b1000, "fmlal2", ".2s", ".2h", ".h">;
+def FMLSLI2_2s : BaseSIMDThreeSameMultIndex<0, 1, 0b1100, "fmlsl2", ".2s", ".2h", ".h">;
+def FMLALI2_4s : BaseSIMDThreeSameMultIndex<1, 1, 0b1000, "fmlal2", ".4s", ".4h", ".h">;
+def FMLSLI2_4s : BaseSIMDThreeSameMultIndex<1, 1, 0b1100, "fmlsl2", ".4s", ".4h", ".h">;
+
 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@ -78,6 +78,7 @@ protected:
  bool HasRDM = false;
  bool HasPerfMon = false;
  bool HasFullFP16 = false;
+  bool HasFP16FML = false;
  bool HasSPE = false;

  // ARMv8.4 Crypto extensions
@ -291,6 +292,7 @@ public:

  bool hasPerfMon() const { return HasPerfMon; }
  bool hasFullFP16() const { return HasFullFP16; }
+  bool hasFP16FML() const { return HasFP16FML; }
  bool hasSPE() const { return HasSPE; }
  bool hasLSLFast() const { return HasLSLFast; }
  bool hasSVE() const { return HasSVE; }
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@ -61,6 +61,11 @@ def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                             "floating point",
                                             [FeatureFPARMv8]>;

+def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
+                                             "Enable full half-precision "
+                                             "floating point fml instructions",
+                                             [FeatureFullFP16]>;
+
 def FeatureVFPOnlySP      : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                                             "Floating point unit supports "
                                             "single precision only">;
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@ -2579,6 +2579,37 @@ class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4,
  let Inst{3-0}   = Vm{3-0};
 }

+// In Armv8.2-A, some NEON instructions are added that encode Vn and Vm
+// differently:
+//    if Q == ‘1’ then UInt(N:Vn) else UInt(Vn:N);
+//    if Q == ‘1’ then UInt(M:Vm) else UInt(Vm:M);
+// Class N3VCP8 above describes the Q=1 case, and this class the Q=0 case.
+class N3VCP8Q0<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4,
+             dag oops, dag iops, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, dt, asm, cstr, pattern> {
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let DecoderNamespace = "VFPV8";
+  // These have the same encodings in ARM and Thumb2
+  let PostEncoderMethod = "";
+
+  let Inst{31-25} = 0b1111110;
+  let Inst{24-23} = op24_23;
+  let Inst{22}    = Vd{4};
+  let Inst{21-20} = op21_20;
+  let Inst{19-16} = Vn{4-1};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{11-8}  = 0b1000;
+  let Inst{7}     = Vn{0};
+  let Inst{6}     = op6;
+  let Inst{5}     = Vm{0};
+  let Inst{4}     = op4;
+  let Inst{3-0}   = Vm{4-1};
+}
+
 // Operand types for complex instructions
 class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag>
  : AsmOperandClass {
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@ -285,6 +285,8 @@ def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
 def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
 def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
                                 AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
 def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@ -5109,6 +5109,54 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
 }

+// +fp16fml Floating Point Multiplication Variants
+let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
+
+class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
+                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
+                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
+  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
+           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
+
+class VFMQ0<string opc, bits<2> S>
+  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
+               (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
+               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+  bit idx;
+  let Inst{3} = idx;
+  let Inst{19-16} = Vn{4-1};
+  let Inst{7}     = Vn{0};
+  let Inst{5}     = Vm{0};
+  let Inst{2-0}   = Vm{3-1};
+}
+
+class VFMQ1<string opc, bits<2> S>
+  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
+               (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
+               IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+  bits<2> idx;
+  let Inst{5} = idx{1};
+  let Inst{3} = idx{0};
+}
+
+let hasNoSchedulingInfo = 1 in {
+//                                                op1   op2   op3
+def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
+def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
+def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
+def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
+def VFMALDI : VFMQ0<"vfmal", 0b00>;
+def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
+def VFMALQI : VFMQ1<"vfmal", 0b00>;
+def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+}
+} // HasNEON, HasFP16FML
+
+
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@ -227,6 +227,9 @@ protected:
  /// HasFullFP16 - True if subtarget supports half-precision FP operations
  bool HasFullFP16 = false;

+  /// HasFP16FML - True if subtarget supports half-precision FP fml operations
+  bool HasFP16FML = false;
+
  /// HasD16 - True if subtarget is limited to 16 double precision
  /// FP registers for VFPv3.
  bool HasD16 = false;
@ -622,6 +625,7 @@ public:
  bool hasFP16() const { return HasFP16; }
  bool hasD16() const { return HasD16; }
  bool hasFullFP16() const { return HasFullFP16; }
+  bool hasFP16FML() const { return HasFP16FML; }

  bool hasFuseAES() const { return HasFuseAES; }
  bool hasFuseLiterals() const { return HasFuseLiterals; }
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@ -57,7 +57,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  const FeatureBitset InlineFeatureWhitelist = {
      ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
      ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
-      ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+      ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
      ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
      ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
      ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@ -5626,7 +5626,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
      Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
      Mnemonic == "bxns"  || Mnemonic == "blxns" ||
      Mnemonic == "vudot" || Mnemonic == "vsdot" ||
-      Mnemonic == "vcmla" || Mnemonic == "vcadd")
+      Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl")
    return Mnemonic;

  // First, split out any predication code. Ignore mnemonics we know aren't
@ -5716,7 +5717,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
      (FullInst.startswith("vmull") && FullInst.endswith(".p64")) ||
      Mnemonic == "vmovx" || Mnemonic == "vins" ||
      Mnemonic == "vudot" || Mnemonic == "vsdot" ||
-      Mnemonic == "vcmla" || Mnemonic == "vcadd") {
+      Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
+      Mnemonic == "vfmal" || Mnemonic == "vfmsl") {
    // These mnemonics are never predicable
    CanAcceptPredicationCode = false;
  } else if (!isThumb()) {
--- a/llvm/test/MC/AArch64/armv8a-fpmul-error.s
+++ b/llvm/test/MC/AArch64/armv8a-fpmul-error.s
@ -0,0 +1,51 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp16fml,+neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK
+
+//------------------------------------------------------------------------------
+// ARMV8.2-A Floating Point Multiplication
+//------------------------------------------------------------------------------
+
+fmlal  V0.2s, v1.2h, v2.h[8]
+fmlsl  V0.2s, v1.2h, v2.h[8]
+fmlal  V0.4s, v1.4h, v2.h[8]
+fmlsl  V0.4s, v1.4h, v2.h[8]
+
+fmlal2  V0.2s, v1.2h, v2.h[8]
+fmlsl2  V0.2s, v1.2h, v2.h[8]
+fmlal2  V0.4s, v1.4h, v2.h[8]
+fmlsl2  V0.4s, v1.4h, v2.h[8]
+
+fmlal  V0.2s, v1.2h, v2.h[-1]
+fmlsl2  V0.2s, v1.2h, v2.h[-1]
+
+//CHECK: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlal  V0.2s, v1.2h, v2.h[8]
+//CHECK-NEXT:                          ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlsl  V0.2s, v1.2h, v2.h[8]
+//CHECK-NEXT:                          ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlal  V0.4s, v1.4h, v2.h[8]
+//CHECK-NEXT:                          ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlsl  V0.4s, v1.4h, v2.h[8]
+//CHECK-NEXT:                          ^
+
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlal2  V0.2s, v1.2h, v2.h[8]
+//CHECK-NEXT:                           ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlsl2  V0.2s, v1.2h, v2.h[8]
+//CHECK-NEXT:                           ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlal2  V0.4s, v1.4h, v2.h[8]
+//CHECK-NEXT:                           ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlsl2  V0.4s, v1.4h, v2.h[8]
+//CHECK-NEXT:                           ^
+
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlal  V0.2s, v1.2h, v2.h[-1]
+//CHECK-NEXT:                          ^
+//CHECK-NEXT: error: vector lane must be an integer in range [0, 7].
+//CHECK-NEXT: fmlsl2  V0.2s, v1.2h, v2.h[-1]
+//CHECK-NEXT:                           ^
--- a/llvm/test/MC/AArch64/armv8a-fpmul.s
+++ b/llvm/test/MC/AArch64/armv8a-fpmul.s
@ -0,0 +1,147 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp16fml < %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-fullfp16,+fp16fml < %s | FileCheck %s --check-prefix=CHECK
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,+fp16fml,-fullfp16 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOFP16FML
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon,+fp16fml < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-NEON
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a,-neon < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-FP16FML-NOR-NEON
+
+//------------------------------------------------------------------------------
+// ARMV8.2-A Floating Point Multiplication
+//------------------------------------------------------------------------------
+
+FMLAL  V0.2S, V1.2H, V2.2H
+FMLSL  V0.2S, V1.2H, V2.2H
+FMLAL  V0.4S, V1.4H, V2.4H
+FMLSL  V0.4S, V1.4H, V2.4H
+FMLAL2  V0.2S, V1.2H, V2.2H
+FMLSL2  V0.2S, V1.2H, V2.2H
+FMLAL2  V0.4S, V1.4H, V2.4H
+FMLSL2  V0.4S, V1.4H, V2.4H
+
+//CHECK:  fmlal v0.2s, v1.2h, v2.2h     // encoding: [0x20,0xec,0x22,0x0e]
+//CHECK:  fmlsl v0.2s, v1.2h, v2.2h     // encoding: [0x20,0xec,0xa2,0x0e]
+//CHECK:  fmlal v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xec,0x22,0x4e]
+//CHECK:  fmlsl v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xec,0xa2,0x4e]
+//CHECK:  fmlal2  v0.2s, v1.2h, v2.2h     // encoding: [0x20,0xcc,0x22,0x2e]
+//CHECK:  fmlsl2  v0.2s, v1.2h, v2.2h     // encoding: [0x20,0xcc,0xa2,0x2e]
+//CHECK:  fmlal2  v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xcc,0x22,0x6e]
+//CHECK:  fmlsl2  v0.4s, v1.4h, v2.4h     // encoding: [0x20,0xcc,0xa2,0x6e]
+
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+
+# Checks with the maximum index value 7:
+fmlal  V0.2s, v1.2h, v2.h[7]
+fmlsl  V0.2s, v1.2h, v2.h[7]
+fmlal  V0.4s, v1.4h, v2.h[7]
+fmlsl  V0.4s, v1.4h, v2.h[7]
+fmlal2  V0.2s, v1.2h, v2.h[7]
+fmlsl2  V0.2s, v1.2h, v2.h[7]
+fmlal2  V0.4s, v1.4h, v2.h[7]
+fmlsl2  V0.4s, v1.4h, v2.h[7]
+
+# Some more checks with a different index bit pattern to catch
+# incorrect permutations of the index (decimal 7 is 0b111):
+fmlal  V0.2s, v1.2h, v2.h[5]
+fmlsl  V0.2s, v1.2h, v2.h[5]
+fmlal  V0.4s, v1.4h, v2.h[5]
+fmlsl  V0.4s, v1.4h, v2.h[5]
+fmlal2  V0.2s, v1.2h, v2.h[5]
+fmlsl2  V0.2s, v1.2h, v2.h[5]
+fmlal2  V0.4s, v1.4h, v2.h[5]
+fmlsl2  V0.4s, v1.4h, v2.h[5]
+
+//CHECK: fmlal v0.2s, v1.2h, v2.h[7]   // encoding: [0x20,0x08,0xb2,0x0f]
+//CHECK: fmlsl v0.2s, v1.2h, v2.h[7]   // encoding: [0x20,0x48,0xb2,0x0f]
+//CHECK: fmlal v0.4s, v1.4h, v2.h[7]   // encoding: [0x20,0x08,0xb2,0x4f]
+//CHECK: fmlsl v0.4s, v1.4h, v2.h[7]   // encoding: [0x20,0x48,0xb2,0x4f]
+//CHECK: fmlal2  v0.2s, v1.2h, v2.h[7]   // encoding: [0x20,0x88,0xb2,0x2f]
+//CHECK: fmlsl2  v0.2s, v1.2h, v2.h[7]   // encoding: [0x20,0xc8,0xb2,0x2f]
+//CHECK: fmlal2  v0.4s, v1.4h, v2.h[7]   // encoding: [0x20,0x88,0xb2,0x6f]
+//CHECK: fmlsl2  v0.4s, v1.4h, v2.h[7]   // encoding: [0x20,0xc8,0xb2,0x6f]
+
+//CHECK:  fmlal v0.2s, v1.2h, v2.h[5]   // encoding: [0x20,0x08,0x92,0x0f]
+//CHECK:  fmlsl v0.2s, v1.2h, v2.h[5]   // encoding: [0x20,0x48,0x92,0x0f]
+//CHECK:  fmlal v0.4s, v1.4h, v2.h[5]   // encoding: [0x20,0x08,0x92,0x4f]
+//CHECK:  fmlsl v0.4s, v1.4h, v2.h[5]   // encoding: [0x20,0x48,0x92,0x4f]
+//CHECK:  fmlal2  v0.2s, v1.2h, v2.h[5]   // encoding: [0x20,0x88,0x92,0x2f]
+//CHECK:  fmlsl2  v0.2s, v1.2h, v2.h[5]   // encoding: [0x20,0xc8,0x92,0x2f]
+//CHECK:  fmlal2  v0.4s, v1.4h, v2.h[5]   // encoding: [0x20,0x88,0x92,0x6f]
+//CHECK:  fmlsl2  v0.4s, v1.4h, v2.h[5]   // encoding: [0x20,0xc8,0x92,0x6f]
+
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+//CHECK-NOFP16FML: error: instruction requires: fp16fml{{$}}
+
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+//CHECK-NO-NEON: error: instruction requires: neon{{$}}
+
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: error: instruction requires: fp16fml neon{{$}}
+
--- a/llvm/test/MC/ARM/armv8a-fpmul-error.s
+++ b/llvm/test/MC/ARM/armv8a-fpmul-error.s
@ -0,0 +1,27 @@
+// RUN: not llvm-mc -triple arm -mattr=+fp16fml,+neon -show-encoding < %s 2>&1 | FileCheck %s  --check-prefix=CHECK-ERROR
+
+VFMAL.F16 D0, S1, S2[2]
+vfmsl.f16 d0, s1, s2[2]
+vfmsl.f16 d0, s1, s2[-1]
+vfmal.f16 q0, d1, d2[4]
+VFMSL.F16 Q0, D1, D2[4]
+vfmal.f16 q0, d1, d2[-1]
+
+//CHECK-ERROR:      error: invalid operand for instruction
+//CHECK-ERROR-NEXT: VFMAL.F16 D0, S1, S2[2]
+//CHECK-ERROR-NEXT:                     ^
+//CHECK-ERROR-NEXT: error: invalid operand for instruction
+//CHECK-ERROR-NEXT: vfmsl.f16 d0, s1, s2[2]
+//CHECK-ERROR-NEXT:                     ^
+//CHECK-ERROR-NEXT: error: invalid operand for instruction
+//CHECK-ERROR-NEXT: vfmsl.f16 d0, s1, s2[-1]
+//CHECK-ERROR-NEXT:                     ^
+//CHECK-ERROR-NEXT: error: invalid operand for instruction
+//CHECK-ERROR-NEXT: vfmal.f16 q0, d1, d2[4]
+//CHECK-ERROR-NEXT:                     ^
+//CHECK-ERROR-NEXT: error: invalid operand for instruction
+//CHECK-ERROR-NEXT: VFMSL.F16 Q0, D1, D2[4]
+//CHECK-ERROR-NEXT:                     ^
+//CHECK-ERROR-NEXT: error: invalid operand for instruction
+//CHECK-ERROR-NEXT: vfmal.f16 q0, d1, d2[-1]
+//CHECK-ERROR-NEXT:                     ^
--- a/llvm/test/MC/ARM/armv8a-fpmul.s
+++ b/llvm/test/MC/ARM/armv8a-fpmul.s
@ -0,0 +1,92 @@
+// RUN: llvm-mc -triple arm -mattr=+fp16fml,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple thumb -mattr=+fp16fml,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK-T32
+// RUN: llvm-mc -triple arm -mattr=-fullfp16,+fp16fml,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple thumb -mattr=-fullfp16,+fp16fml,+neon -show-encoding < %s | FileCheck %s --check-prefix=CHECK-T32
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML-NOR-NEON < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML-NOR-NEON < %t %s
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a,+neon -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a,+neon,+fp16fml,-fp16fml -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fp16fml,-fp16fml -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fullfp16 -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a,+neon,+fp16fml,-fullfp16 -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fp16fml,-fullfp16 -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-FP16FML < %t %s
+
+// RUN: not llvm-mc -triple arm -mattr=+v8.2a,+fp16fml -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-NEON < %t %s
+// RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fp16fml -show-encoding < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-NEON < %t %s
+
+VFMAL.F16 D0, S1, S2
+vfmsl.f16 d0, s1, s2
+vfmal.f16 q0, d1, d2
+VFMSL.F16 Q0, D1, D2
+
+VFMAL.F16 D0, S1, S2[1]
+vfmsl.f16 d0, s1, s2[1]
+vfmal.f16 q0, d1, d2[3]
+VFMSL.F16 Q0, D1, D2[3]
+
+//CHECK: vfmal.f16 d0, s1, s2      @ encoding: [0x91,0x08,0x20,0xfc]
+//CHECK: vfmsl.f16 d0, s1, s2      @ encoding: [0x91,0x08,0xa0,0xfc]
+//CHECK: vfmal.f16 q0, d1, d2      @ encoding: [0x52,0x08,0x21,0xfc]
+//CHECK: vfmsl.f16 q0, d1, d2      @ encoding: [0x52,0x08,0xa1,0xfc]
+
+//CHECK:  vfmal.f16 d0, s1, s2[1]   @ encoding: [0x99,0x08,0x00,0xfe]
+//CHECK:  vfmsl.f16 d0, s1, s2[1]   @ encoding: [0x99,0x08,0x10,0xfe]
+//CHECK:  vfmal.f16 q0, d1, d2[3]   @ encoding: [0x7a,0x08,0x01,0xfe]
+//CHECK:  vfmsl.f16 q0, d1, d2[3]   @ encoding: [0x7a,0x08,0x11,0xfe]
+
+//CHECK-T32:  vfmal.f16 d0, s1, s2      @ encoding: [0x20,0xfc,0x91,0x08]
+//CHECK-T32:  vfmsl.f16 d0, s1, s2      @ encoding: [0xa0,0xfc,0x91,0x08]
+//CHECK-T32:  vfmal.f16 q0, d1, d2      @ encoding: [0x21,0xfc,0x52,0x08]
+//CHECK-T32:  vfmsl.f16 q0, d1, d2      @ encoding: [0xa1,0xfc,0x52,0x08]
+
+//CHECK-T32:  vfmal.f16 d0, s1, s2[1]   @ encoding: [0x00,0xfe,0x99,0x08]
+//CHECK-T32:  vfmsl.f16 d0, s1, s2[1]   @ encoding: [0x10,0xfe,0x99,0x08]
+//CHECK-T32:  vfmal.f16 q0, d1, d2[3]   @ encoding: [0x01,0xfe,0x7a,0x08]
+//CHECK-T32:  vfmsl.f16 q0, d1, d2[3]   @ encoding: [0x11,0xfe,0x7a,0x08]
+
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+//CHECK-NO-FP16FML: instruction requires: full half-float fml{{$}}
+
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+//CHECK-NO-FP16FML-NOR-NEON: instruction requires: full half-float fml NEON{{$}}
+
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+//CHECK-NO-NEON: instruction requires: NEON{{$}}
+
--- a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul-err.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul-err.txt
@ -0,0 +1,118 @@
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a             --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ERROR,FP16-ERROR
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16          --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=FP16,CHECK-ERROR
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16,-fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=FP16,CHECK-ERROR
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a,-fp16fml,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=FP16,CHECK-ERROR
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a,+fp16fml,-fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ERROR,FP16-ERROR
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a,+fp16fml,-neon     --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=FP16,CHECK-ERROR
+
+[0x20,0xec,0x22,0x0e]
+[0x20,0xec,0xa2,0x0e]
+[0x20,0xec,0x22,0x4e]
+[0x20,0xec,0xa2,0x4e]
+[0x20,0xcc,0x22,0x2e]
+[0x20,0xcc,0xa2,0x2e]
+[0x20,0xcc,0x22,0x6e]
+[0x20,0xcc,0xa2,0x6e]
+
+#indexed variants:
+
+[0x20,0x08,0xb2,0x0f]
+[0x20,0x48,0xb2,0x0f]
+[0x20,0x08,0xb2,0x4f]
+[0x20,0x48,0xb2,0x4f]
+[0x20,0x88,0xb2,0x2f]
+[0x20,0xc8,0xb2,0x2f]
+[0x20,0x88,0xb2,0x6f]
+[0x20,0xc8,0xb2,0x6f]
+
+[0x20,0x08,0x92,0x0f]
+[0x20,0x48,0x92,0x0f]
+[0x20,0x08,0x92,0x4f]
+[0x20,0x48,0x92,0x4f]
+[0x20,0x88,0x92,0x2f]
+[0x20,0xc8,0x92,0x2f]
+[0x20,0x88,0x92,0x6f]
+[0x20,0xc8,0x92,0x6f]
+
+#A fullfp16 instruction, for testing the interaction of the features
+[0x41,0x08,0xe3,0x1e]
+
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xec,0x22,0x0e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xec,0xa2,0x0e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xec,0x22,0x4e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xec,0xa2,0x4e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xcc,0x22,0x2e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xcc,0xa2,0x2e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xcc,0x22,0x6e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xcc,0xa2,0x6e]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x08,0xb2,0x0f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x48,0xb2,0x0f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x08,0xb2,0x4f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x48,0xb2,0x4f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x88,0xb2,0x2f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xc8,0xb2,0x2f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x88,0xb2,0x6f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xc8,0xb2,0x6f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x08,0x92,0x0f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x48,0x92,0x0f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x08,0x92,0x4f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x48,0x92,0x4f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x88,0x92,0x2f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xc8,0x92,0x2f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0x88,0x92,0x6f]
+#CHECK-ERROR:  ^
+#CHECK-ERROR: warning: invalid instruction encoding
+#CHECK-ERROR: [0x20,0xc8,0x92,0x6f]
+#CHECK-ERROR:  ^
+
+#FP16-ERROR: warning: invalid instruction encoding
+#FP16-ERROR: [0x41,0x08,0xe3,0x1e]
+#FP16-ERROR:  ^
+
+#FP16-NOT: [0x41,0x08,0xe3,0x1e]
+
--- a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
@ -0,0 +1,64 @@
+# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+fp16fml           --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
+# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=-fullfp16,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
+
+#A fullfp16 instruction, for testing the interaction of the features
+[0x41,0x08,0xe3,0x1e]
+
+[0x20,0xec,0x22,0x0e]
+[0x20,0xec,0xa2,0x0e]
+[0x20,0xec,0x22,0x4e]
+[0x20,0xec,0xa2,0x4e]
+[0x20,0xcc,0x22,0x2e]
+[0x20,0xcc,0xa2,0x2e]
+[0x20,0xcc,0x22,0x6e]
+[0x20,0xcc,0xa2,0x6e]
+
+#indexed variants:
+
+[0x20,0x08,0xb2,0x0f]
+[0x20,0x48,0xb2,0x0f]
+[0x20,0x08,0xb2,0x4f]
+[0x20,0x48,0xb2,0x4f]
+[0x20,0x88,0xb2,0x2f]
+[0x20,0xc8,0xb2,0x2f]
+[0x20,0x88,0xb2,0x6f]
+[0x20,0xc8,0xb2,0x6f]
+
+[0x20,0x08,0x92,0x0f]
+[0x20,0x48,0x92,0x0f]
+[0x20,0x08,0x92,0x4f]
+[0x20,0x48,0x92,0x4f]
+[0x20,0x88,0x92,0x2f]
+[0x20,0xc8,0x92,0x2f]
+[0x20,0x88,0x92,0x6f]
+[0x20,0xc8,0x92,0x6f]
+
+#FP16: fmul h1, h2, h3
+
+#CHECK:  fmlal v0.2s, v1.2h, v2.2h
+#CHECK:  fmlsl v0.2s, v1.2h, v2.2h
+#CHECK:  fmlal v0.4s, v1.4h, v2.4h
+#CHECK:  fmlsl v0.4s, v1.4h, v2.4h
+#CHECK:  fmlal2  v0.2s, v1.2h, v2.2h
+#CHECK:  fmlsl2  v0.2s, v1.2h, v2.2h
+#CHECK:  fmlal2  v0.4s, v1.4h, v2.4h
+#CHECK:  fmlsl2  v0.4s, v1.4h, v2.4h
+
+#CHECK:  fmlal v0.2s, v1.2h, v2.h[7]
+#CHECK:  fmlsl v0.2s, v1.2h, v2.h[7]
+#CHECK:  fmlal v0.4s, v1.4h, v2.h[7]
+#CHECK:  fmlsl v0.4s, v1.4h, v2.h[7]
+#CHECK:  fmlal2  v0.2s, v1.2h, v2.h[7]
+#CHECK:  fmlsl2  v0.2s, v1.2h, v2.h[7]
+#CHECK:  fmlal2  v0.4s, v1.4h, v2.h[7]
+#CHECK:  fmlsl2  v0.4s, v1.4h, v2.h[7]
+
+#CHECK:  fmlal v0.2s, v1.2h, v2.h[5]
+#CHECK:  fmlsl v0.2s, v1.2h, v2.h[5]
+#CHECK:  fmlal v0.4s, v1.4h, v2.h[5]
+#CHECK:  fmlsl v0.4s, v1.4h, v2.h[5]
+#CHECK:  fmlal2  v0.2s, v1.2h, v2.h[5]
+#CHECK:  fmlsl2  v0.2s, v1.2h, v2.h[5]
+#CHECK:  fmlal2  v0.4s, v1.4h, v2.h[5]
+#CHECK:  fmlsl2  v0.4s, v1.4h, v2.h[5]
+
--- a/llvm/test/MC/Disassembler/ARM/armv8a-fpmul-a32.txt
+++ b/llvm/test/MC/Disassembler/ARM/armv8a-fpmul-a32.txt
@ -0,0 +1,78 @@
+# RUN:     llvm-mc -triple arm-none-linux-gnu -mattr=+neon,+fp16fml --disassemble < %s | FileCheck %s
+# RUN:     llvm-mc -triple arm-none-linux-gnu -mattr=+neon,-fullfp16,+fp16fml --disassemble < %s | FileCheck %s
+# RUN:     llvm-mc -triple arm-none-linux-gnu --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-COPROC
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+fullfp16,-fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,-fp16fml,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+fullfp16,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+fp16fml,-fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fullfp16,-fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,-fp16fml,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.2a,+neon,+fp16fml,-fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+
+[0x91,0x08,0x20,0xfc]
+[0x91,0x08,0xa0,0xfc]
+[0x52,0x08,0x21,0xfc]
+[0x52,0x08,0xa1,0xfc]
+[0x99,0x08,0x00,0xfe]
+[0x99,0x08,0x10,0xfe]
+[0x7a,0x08,0x01,0xfe]
+[0x7a,0x08,0x11,0xfe]
+
+#A fullfp16 instruction, for testing the interaction of the features
+[0x80,0x09,0x30,0xee]
+
+#CHECK: vfmal.f16 d0, s1, s2
+#CHECK: vfmsl.f16 d0, s1, s2
+#CHECK: vfmal.f16 q0, d1, d2
+#CHECK: vfmsl.f16 q0, d1, d2
+#CHECK: vfmal.f16 d0, s1, s2[1]
+#CHECK: vfmsl.f16 d0, s1, s2[1]
+#CHECK: vfmal.f16 q0, d1, d2[3]
+#CHECK: vfmsl.f16 q0, d1, d2[3]
+#CHECK: vadd.f16  s0, s1, s0
+
+#CHECK-COPROC:  stc2  p8, c0, [r0], #-580
+#CHECK-COPROC:  stc2  p8, c0, [r0], #580
+#CHECK-COPROC:  stc2  p8, c0, [r1], #-328
+#CHECK-COPROC:  stc2  p8, c0, [r1], #328
+#CHECK-COPROC:  mcr2  p8, #0, r0, c0, c9, #4
+#CHECK-COPROC:  mrc2  p8, #0, r0, c0, c9, #4
+#CHECK-COPROC:  mcr2  p8, #0, r0, c1, c10, #3
+#CHECK-COPROC:  mrc2  p8, #0, r0, c1, c10, #3
+#CHECK-COPROC:  cdp   p9, #3, c0, c0, c0, #4
+
+#CHECK-INVALID:  warning: invalid instruction encoding
+#CHECK-INVALID: [0x91,0x08,0x20,0xfc]
+#CHECK-INVALID:  ^
+#CHECK-INVALID:  warning: invalid instruction encoding
+#CHECK-INVALID: [0x91,0x08,0xa0,0xfc]
+#CHECK-INVALID:  ^
+#CHECK-INVALID:  warning: invalid instruction encoding
+#CHECK-INVALID: [0x52,0x08,0x21,0xfc]
+#CHECK-INVALID:  ^
+#CHECK-INVALID:  warning: invalid instruction encoding
+#CHECK-INVALID: [0x52,0x08,0xa1,0xfc]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x99,0x08,0x00,0xfe]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x99,0x08,0x10,0xfe]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x7a,0x08,0x01,0xfe]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x7a,0x08,0x11,0xfe]
+#CHECK-INVALID:  ^
+
+#FP16-INVALID: warning: invalid instruction encoding
+#FP16-INVALID: [0x80,0x09,0x30,0xee]
+#FP16-INVALID: ^
+
+#FP16-NOT: [0x80,0x09,0x30,0xee]
--- a/llvm/test/MC/Disassembler/ARM/armv8a-fpmul-t32.txt
+++ b/llvm/test/MC/Disassembler/ARM/armv8a-fpmul-t32.txt
@ -0,0 +1,68 @@
+# RUN:     llvm-mc -triple thumb -mattr=+neon,+fp16fml --disassemble < %s | FileCheck %s
+# RUN:     llvm-mc -triple thumb -mattr=+neon,-fullfp16,+fp16fml --disassemble < %s | FileCheck %s
+# RUN: not llvm-mc -triple thumb --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fullfp16,-fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,-fp16fml,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fullfp16,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+fp16fml,-fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fullfp16,-fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,-fp16fml,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16
+# RUN: not llvm-mc -triple thumb -mattr=+v8.2a,+neon,+fp16fml,-fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-INVALID,FP16-INVALID
+
+[0x20,0xfc,0x91,0x08]
+[0xa0,0xfc,0x91,0x08]
+[0x21,0xfc,0x52,0x08]
+[0xa1,0xfc,0x52,0x08]
+[0x00,0xfe,0x99,0x08]
+[0x10,0xfe,0x99,0x08]
+[0x01,0xfe,0x7a,0x08]
+[0x11,0xfe,0x7a,0x08]
+
+#A fullfp16 instruction, for testing the interaction of the features
+[0x30,0xee,0x80,0x09]
+
+#CHECK: vfmal.f16 d0, s1, s2
+#CHECK: vfmsl.f16 d0, s1, s2
+#CHECK: vfmal.f16 q0, d1, d2
+#CHECK: vfmsl.f16 q0, d1, d2
+#CHECK: vfmal.f16 d0, s1, s2[1]
+#CHECK: vfmsl.f16 d0, s1, s2[1]
+#CHECK: vfmal.f16 q0, d1, d2[3]
+#CHECK: vfmsl.f16 q0, d1, d2[3]
+#CHECK: vadd.f16 s0, s1, s0
+
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x20,0xfc,0x91,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0xa0,0xfc,0x91,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x21,0xfc,0x52,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0xa1,0xfc,0x52,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x00,0xfe,0x99,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x10,0xfe,0x99,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x01,0xfe,0x7a,0x08]
+#CHECK-INVALID:  ^
+#CHECK-INVALID: warning: invalid instruction encoding
+#CHECK-INVALID: [0x11,0xfe,0x7a,0x08]
+#CHECK-INVALID:  ^
+
+#FP16-INVALID: warning: invalid instruction encoding
+#FP16-INVALID: [0x30,0xee,0x80,0x09]
+#FP16-INVALID: ^
+
+#FP16-NOT: [0x30,0xee,0x80,0x09]
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@ -448,8 +448,12 @@ TEST(TargetParserTest, testARMExtension) {
                                ARM::ArchKind::INVALID, "fp16"));
  EXPECT_TRUE(testARMExtension("cortex-a55",
                                ARM::ArchKind::INVALID, "fp16"));
+  EXPECT_FALSE(testARMExtension("cortex-a55",
+                                ARM::ArchKind::INVALID, "fp16fml"));
  EXPECT_TRUE(testARMExtension("cortex-a75",
                                ARM::ArchKind::INVALID, "fp16"));
+  EXPECT_FALSE(testARMExtension("cortex-a75",
+                                ARM::ArchKind::INVALID, "fp16fml"));
  EXPECT_FALSE(testARMExtension("cortex-r52",
                                ARM::ArchKind::INVALID, "ras"));
  EXPECT_FALSE(testARMExtension("iwmmxt", ARM::ArchKind::INVALID, "crc"));
@ -481,8 +485,11 @@ TEST(TargetParserTest, testARMExtension) {
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_1A, "ras"));
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_2A, "spe"));
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_2A, "fp16"));
+  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_2A, "fp16fml"));
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_3A, "fp16"));
+  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_3A, "fp16fml"));
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_4A, "fp16"));
+  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8_4A, "fp16fml"));
  EXPECT_FALSE(testARMExtension("generic", ARM::ArchKind::ARMV8R, "ras"));
  EXPECT_FALSE(testARMExtension("generic",
                                ARM::ArchKind::ARMV8MBaseline, "crc"));
@ -536,7 +543,8 @@ TEST(TargetParserTest, ARMExtensionFeatures) {
  std::vector<StringRef> Features;
  unsigned Extensions = ARM::AEK_CRC | ARM::AEK_CRYPTO | ARM::AEK_DSP |
                        ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_MP |
-                        ARM::AEK_SEC | ARM::AEK_VIRT | ARM::AEK_RAS | ARM::AEK_FP16;
+                        ARM::AEK_SEC | ARM::AEK_VIRT | ARM::AEK_RAS | ARM::AEK_FP16 |
+                        ARM::AEK_FP16FML;

  for (unsigned i = 0; i <= Extensions; i++)
    EXPECT_TRUE(i == 0 ? !ARM::getExtensionFeatures(i, Features)
@ -564,6 +572,7 @@ TEST(TargetParserTest, ARMArchExtFeature) {
                              {"sec", "nosec", nullptr, nullptr},
                              {"virt", "novirt", nullptr, nullptr},
                              {"fp16", "nofp16", "+fullfp16", "-fullfp16"},
+                              {"fp16fml", "nofp16fml", "+fp16fml", "-fp16fml"},
                              {"ras", "noras", "+ras", "-ras"},
                              {"dotprod", "nodotprod", "+dotprod", "-dotprod"},
                              {"os", "noos", nullptr, nullptr},
@ -864,8 +873,12 @@ TEST(TargetParserTest, testAArch64Extension) {
                                    AArch64::ArchKind::INVALID, "fp16"));
  EXPECT_TRUE(testAArch64Extension("cortex-a55",
                                    AArch64::ArchKind::INVALID, "fp16"));
+  EXPECT_FALSE(testAArch64Extension("cortex-a55",
+                                    AArch64::ArchKind::INVALID, "fp16fml"));
  EXPECT_TRUE(testAArch64Extension("cortex-a75",
                                    AArch64::ArchKind::INVALID, "fp16"));
+  EXPECT_FALSE(testAArch64Extension("cortex-a75",
+                                    AArch64::ArchKind::INVALID, "fp16fml"));
  EXPECT_FALSE(testAArch64Extension("thunderx2t99",
                                    AArch64::ArchKind::INVALID, "ras"));
  EXPECT_FALSE(testAArch64Extension("thunderx",
@ -885,10 +898,16 @@ TEST(TargetParserTest, testAArch64Extension) {
      "generic", AArch64::ArchKind::ARMV8_2A, "spe"));
  EXPECT_FALSE(testAArch64Extension(
      "generic", AArch64::ArchKind::ARMV8_2A, "fp16"));
+  EXPECT_FALSE(testAArch64Extension(
+      "generic", AArch64::ArchKind::ARMV8_2A, "fp16fml"));
  EXPECT_FALSE(testAArch64Extension(
      "generic", AArch64::ArchKind::ARMV8_3A, "fp16"));
+  EXPECT_FALSE(testAArch64Extension(
+      "generic", AArch64::ArchKind::ARMV8_3A, "fp16fml"));
  EXPECT_FALSE(testAArch64Extension(
      "generic", AArch64::ArchKind::ARMV8_4A, "fp16"));
+  EXPECT_FALSE(testAArch64Extension(
+      "generic", AArch64::ArchKind::ARMV8_4A, "fp16fml"));
 }

 TEST(TargetParserTest, AArch64ExtensionFeatures) {
@ -898,7 +917,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
                        AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
                        AArch64::AEK_RAS | AArch64::AEK_LSE |
                        AArch64::AEK_RDM | AArch64::AEK_SVE |
-                        AArch64::AEK_DOTPROD | AArch64::AEK_RCPC;
+                        AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+                        AArch64::AEK_FP16FML;

  for (unsigned i = 0; i <= Extensions; i++)
    EXPECT_TRUE(i == 0 ? !AArch64::getExtensionFeatures(i, Features)
@ -925,6 +945,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
                              {"fp", "nofp", "+fp-armv8", "-fp-armv8"},
                              {"simd", "nosimd", "+neon", "-neon"},
                              {"fp16", "nofp16", "+fullfp16", "-fullfp16"},
+                              {"fp16fml", "nofp16fml", "+fp16fml", "-fp16fml"},
                              {"profile", "noprofile", "+spe", "-spe"},
                              {"ras", "noras", "+ras", "-ras"},
                              {"lse", "nolse", "+lse", "-lse"},