[AArch64][SVE] Asm: Support for structured LD4 (scalar+imm) load instructions.

Reviewers: fhahn, rengolin, javed.absar, huntergr, SjoerdMeijer, t.p.northover, echristo, evandro Reviewed By: rengolin Subscribers: tschuett, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D45624 llvm-svn: 330120
2018-04-16 10:46:18 +00:00 · 2018-04-16 10:46:18 +00:00 · 7a210db81e
parent 9f6d02dcf1
commit 7a210db81e
14 changed files with 440 additions and 3 deletions
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@ -843,11 +843,13 @@ let Namespace = "AArch64" in {
  def zsub0 : SubRegIndex<128, -1>;
  def zsub1 : SubRegIndex<128, -1>;
  def zsub2 : SubRegIndex<128, -1>;
+  def zsub3 : SubRegIndex<128, -1>;
 }

 // Pairs, triples, and quads of SVE vector registers.
 def ZSeqPairs   : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>;
 def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>;
+def ZSeqQuads   : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2), (rotl ZPR, 3)]>;

 def ZPR2   : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)>  {
  let Size = 256;
@ -855,6 +857,9 @@ def ZPR2   : RegisterClass<"AArch64", [untyped], 128, (add ZSeqPairs)>  {
 def ZPR3  : RegisterClass<"AArch64", [untyped], 128, (add ZSeqTriples)> {
  let Size = 384;
 }
+def ZPR4 : RegisterClass<"AArch64", [untyped], 128, (add ZSeqQuads)> {
+  let Size = 512;
+}

 class ZPRVectorList<int ElementWidth, int NumRegs> : AsmOperandClass {
  let Name = "SVEVectorList" # NumRegs # ElementWidth;
@ -911,3 +916,19 @@ def ZZZ_s  : RegisterOperand<ZPR3, "printTypedVectorList<0,'s'>"> {
 def ZZZ_d  : RegisterOperand<ZPR3, "printTypedVectorList<0,'d'>"> {
  let ParserMatchClass = ZPRVectorList<64, 3>;
 }
+
+def ZZZZ_b : RegisterOperand<ZPR4, "printTypedVectorList<0,'b'>"> {
+  let ParserMatchClass = ZPRVectorList<8, 4>;
+}
+
+def ZZZZ_h : RegisterOperand<ZPR4, "printTypedVectorList<0,'h'>"> {
+  let ParserMatchClass = ZPRVectorList<16, 4>;
+}
+
+def ZZZZ_s : RegisterOperand<ZPR4, "printTypedVectorList<0,'s'>"> {
+  let ParserMatchClass = ZPRVectorList<32, 4>;
+}
+
+def ZZZZ_d : RegisterOperand<ZPR4, "printTypedVectorList<0,'d'>"> {
+  let ParserMatchClass = ZPRVectorList<64, 4>;
+}
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@ -41,12 +41,16 @@ let Predicates = [HasSVE] in {
  // LD(2|3|4) structured loads with reg+immediate
  defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b,   "ld2b", simm4Scale2MulVl>;
  defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b,  "ld3b", simm4Scale3MulVl>;
+  defm LD4B_IMM : sve_mem_eld_si<0b00, 0b11, ZZZZ_b, "ld4b", simm4Scale4MulVl>;
  defm LD2H_IMM : sve_mem_eld_si<0b01, 0b01, ZZ_h,   "ld2h", simm4Scale2MulVl>;
  defm LD3H_IMM : sve_mem_eld_si<0b01, 0b10, ZZZ_h,  "ld3h", simm4Scale3MulVl>;
+  defm LD4H_IMM : sve_mem_eld_si<0b01, 0b11, ZZZZ_h, "ld4h", simm4Scale4MulVl>;
  defm LD2W_IMM : sve_mem_eld_si<0b10, 0b01, ZZ_s,   "ld2w", simm4Scale2MulVl>;
  defm LD3W_IMM : sve_mem_eld_si<0b10, 0b10, ZZZ_s,  "ld3w", simm4Scale3MulVl>;
+  defm LD4W_IMM : sve_mem_eld_si<0b10, 0b11, ZZZZ_s, "ld4w", simm4Scale4MulVl>;
  defm LD2D_IMM : sve_mem_eld_si<0b11, 0b01, ZZ_d,   "ld2d", simm4Scale2MulVl>;
  defm LD3D_IMM : sve_mem_eld_si<0b11, 0b10, ZZZ_d,  "ld3d", simm4Scale3MulVl>;
+  defm LD4D_IMM : sve_mem_eld_si<0b11, 0b11, ZZZZ_d, "ld4d", simm4Scale4MulVl>;

  // continuous store with immediates
  defm ST1B_IMM   : sve_mem_cst_si<0b00, 0b00, "st1b", Z_b, ZPR8>;
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -1155,11 +1155,11 @@ public:
                   AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 },
      /* ZReg */ { AArch64::Z0,
                   AArch64::Z0,       AArch64::Z0_Z1,
-                   AArch64::Z0_Z1_Z2 }
+                   AArch64::Z0_Z1_Z2, AArch64::Z0_Z1_Z2_Z3 }
    };

-    assert((RegTy != VecListIdx_ZReg || NumRegs <= 3) &&
-           " NumRegs must be <= 3 for ZRegs");
+    assert((RegTy != VecListIdx_ZReg || NumRegs <= 4) &&
+           " NumRegs must be <= 4 for ZRegs");

    unsigned FirstReg = FirstRegs[(unsigned)RegTy][NumRegs];
    Inst.addOperand(MCOperand::createReg(FirstReg + getVectorListStart() -
@ -3652,6 +3652,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
    return Error(Loc, "index must be a multiple of 2 in range [-16, 14].");
  case Match_InvalidMemoryIndexed3SImm4:
    return Error(Loc, "index must be a multiple of 3 in range [-24, 21].");
+  case Match_InvalidMemoryIndexed4SImm4:
+    return Error(Loc, "index must be a multiple of 3 in range [-32, 28].");
  case Match_InvalidMemoryIndexedSImm9:
    return Error(Loc, "index must be an integer in range [-256, 255].");
  case Match_InvalidMemoryIndexedSImm10:
@ -4169,6 +4171,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  case Match_InvalidMemoryIndexed1SImm4:
  case Match_InvalidMemoryIndexed2SImm4:
  case Match_InvalidMemoryIndexed3SImm4:
+  case Match_InvalidMemoryIndexed4SImm4:
  case Match_InvalidMemoryIndexed4SImm7:
  case Match_InvalidMemoryIndexed8SImm7:
  case Match_InvalidMemoryIndexed16SImm7:
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@ -94,6 +94,9 @@ static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
                                            uint64_t Address,
                                            const void *Decode);
+static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decode);
 static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                           uint64_t Address,
                                           const void *Decode);
@ -522,6 +525,30 @@ static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
  return Success;
 }

+static const unsigned ZZZZDecoderTable[] = {
+  AArch64::Z0_Z1_Z2_Z3,     AArch64::Z1_Z2_Z3_Z4,     AArch64::Z2_Z3_Z4_Z5,
+  AArch64::Z3_Z4_Z5_Z6,     AArch64::Z4_Z5_Z6_Z7,     AArch64::Z5_Z6_Z7_Z8,
+  AArch64::Z6_Z7_Z8_Z9,     AArch64::Z7_Z8_Z9_Z10,    AArch64::Z8_Z9_Z10_Z11,
+  AArch64::Z9_Z10_Z11_Z12,  AArch64::Z10_Z11_Z12_Z13, AArch64::Z11_Z12_Z13_Z14,
+  AArch64::Z12_Z13_Z14_Z15, AArch64::Z13_Z14_Z15_Z16, AArch64::Z14_Z15_Z16_Z17,
+  AArch64::Z15_Z16_Z17_Z18, AArch64::Z16_Z17_Z18_Z19, AArch64::Z17_Z18_Z19_Z20,
+  AArch64::Z18_Z19_Z20_Z21, AArch64::Z19_Z20_Z21_Z22, AArch64::Z20_Z21_Z22_Z23,
+  AArch64::Z21_Z22_Z23_Z24, AArch64::Z22_Z23_Z24_Z25, AArch64::Z23_Z24_Z25_Z26,
+  AArch64::Z24_Z25_Z26_Z27, AArch64::Z25_Z26_Z27_Z28, AArch64::Z26_Z27_Z28_Z29,
+  AArch64::Z27_Z28_Z29_Z30, AArch64::Z28_Z29_Z30_Z31, AArch64::Z29_Z30_Z31_Z0,
+  AArch64::Z30_Z31_Z0_Z1,   AArch64::Z31_Z0_Z1_Z2
+};
+
+static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address,
+                                            const void* Decoder) {
+  if (RegNo > 31)
+    return Fail;
+  unsigned Register = ZZZZDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return Success;
+}
+
 static const unsigned PPRDecoderTable[] = {
  AArch64::P0,  AArch64::P1,  AArch64::P2,  AArch64::P3,
  AArch64::P4,  AArch64::P5,  AArch64::P6,  AArch64::P7,
--- a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@ -1188,6 +1188,7 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
           MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg))
    NumRegs = 3;
  else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) ||
+           MRI.getRegClass(AArch64::ZPR4RegClassID).contains(Reg) ||
           MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg))
    NumRegs = 4;

--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@ -38,6 +38,7 @@ class SImmMulVlOperand<int Bits, int Scale> : AsmOperandClass {
 def SImm4MulVlOperand : SImmMulVlOperand<4,1>;
 def SImm4Scale2MulVlOperand : SImmMulVlOperand<4,2>;
 def SImm4Scale3MulVlOperand : SImmMulVlOperand<4,3>;
+def SImm4Scale4MulVlOperand : SImmMulVlOperand<4,4>;

 def simm4MulVl : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -8 && Imm < 8; }]> {
  let DecoderMethod = "DecodeSImm<4>";
@ -60,6 +61,14 @@ def simm4Scale3MulVl : Operand<i64>, ImmLeaf<i64, [{
  let ParserMatchClass = SImm4Scale3MulVlOperand;
 }

+def simm4Scale4MulVl : Operand<i64>, ImmLeaf<i64, [{
+    return (Imm >= -32 && Imm <= 28) && ((Imm % 4) == 0x0);
+  }]> {
+  let DecoderMethod = "DecodeSImm<4>";
+  let PrintMethod = "printImmScale<4>";
+  let ParserMatchClass = SImm4Scale4MulVlOperand;
+}
+
 class SVELogicalImmOperand<int Width> : AsmOperandClass {
  let Name = "SVELogicalImm" # Width;
  let DiagnosticType = "LogicalSecondSource";
--- a/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s
@ -0,0 +1,67 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Immediate out of lower bound [-32, 28].
+
+ld4b {z12.b, z13.b, z14.b, z15.b}, p4/z, [x12, #-36, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4b {z12.b, z13.b, z14.b, z15.b}, p4/z, [x12, #-36, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b {z7.b, z8.b, z9.b, z10.b}, p3/z, [x1, #32, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4b {z7.b, z8.b, z9.b, z10.b}, p3/z, [x1, #32, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Immediate not a multiple of four.
+
+ld4b {z12.b, z13.b, z14.b, z15.b}, p4/z, [x12, #-7, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4b {z12.b, z13.b, z14.b, z15.b}, p4/z, [x12, #-7, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b {z7.b, z8.b, z9.b, z10.b}, p3/z, [x1, #5, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4b {z7.b, z8.b, z9.b, z10.b}, p3/z, [x1, #5, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// error: restricted predicate has range [0, 7].
+
+ld4b {z2.b, z3.b, z4.b, z5.b}, p8/z, [x15, #10, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ld4b {z2.b, z3.b, z4.b, z5.b}, p8/z, [x15, #10, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ld4b { }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ld4b { }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b { z0.b, z1.b, z2.b, z3.b, z4.b }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b, z4.b }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b { z0.b, z1.b, z2.b, z3.h }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.h }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b { z0.b, z1.b, z3.b, z5.b }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: ld4b { z0.b, z1.b, z3.b, z5.b }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: ld4b { v0.16b, v1.16b, v2.16b }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/llvm/test/MC/AArch64/SVE/ld4b.s
+++ b/llvm/test/MC/AArch64/SVE/ld4b.s
@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+// CHECK-INST: ld4b    { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+// CHECK-ENCODING: [0x00,0xe0,0x60,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 e0 60 a4 <unknown>
+
+ld4b    { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4b    { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+// CHECK-ENCODING: [0xb7,0xed,0x68,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 ed 68 a4 <unknown>
+
+ld4b    { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4b    { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x55,0xf5,0x65,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 55 f5 65 a4 <unknown>
--- a/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s
@ -0,0 +1,67 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Immediate out of lower bound [-32, 28].
+
+ld4d {z12.d, z13.d, z14.d, z15.d}, p4/z, [x12, #-36, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4d {z12.d, z13.d, z14.d, z15.d}, p4/z, [x12, #-36, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d {z7.d, z8.d, z9.d, z10.d}, p3/z, [x1, #32, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4d {z7.d, z8.d, z9.d, z10.d}, p3/z, [x1, #32, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Immediate not a multiple of four.
+
+ld4d {z12.d, z13.d, z14.d, z15.d}, p4/z, [x12, #-7, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4d {z12.d, z13.d, z14.d, z15.d}, p4/z, [x12, #-7, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d {z7.d, z8.d, z9.d, z10.d}, p3/z, [x1, #5, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4d {z7.d, z8.d, z9.d, z10.d}, p3/z, [x1, #5, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// error: restricted predicate has range [0, 7].
+
+ld4d {z2.d, z3.d, z4.d, z5.d}, p8/z, [x15, #10, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ld4d {z2.d, z3.d, z4.d, z5.d}, p8/z, [x15, #10, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ld4d { }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ld4d { }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d { z0.d, z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d { z0.d, z1.d, z2.d, z3.b }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.b }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d { z0.d, z1.d, z3.d, z5.d }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: ld4d { z0.d, z1.d, z3.d, z5.d }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: ld4d { v0.2d, v1.2d, v2.2d }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/llvm/test/MC/AArch64/SVE/ld4d.s
+++ b/llvm/test/MC/AArch64/SVE/ld4d.s
@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+// CHECK-INST: ld4d    { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+// CHECK-ENCODING: [0x00,0xe0,0xe0,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 e0 e0 a5 <unknown>
+
+ld4d    { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4d    { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+// CHECK-ENCODING: [0xb7,0xed,0xe8,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 ed e8 a5 <unknown>
+
+ld4d    { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4d    { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x55,0xf5,0xe5,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 55 f5 e5 a5 <unknown>
--- a/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s
@ -0,0 +1,67 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Immediate out of lower bound [-32, 28].
+
+ld4h {z12.h, z13.h, z14.h, z15.h}, p4/z, [x12, #-36, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4h {z12.h, z13.h, z14.h, z15.h}, p4/z, [x12, #-36, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h {z7.h, z8.h, z9.h, z10.h}, p3/z, [x1, #32, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4h {z7.h, z8.h, z9.h, z10.h}, p3/z, [x1, #32, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Immediate not a multiple of four.
+
+ld4h {z12.h, z13.h, z14.h, z15.h}, p4/z, [x12, #-7, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4h {z12.h, z13.h, z14.h, z15.h}, p4/z, [x12, #-7, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h {z7.h, z8.h, z9.h, z10.h}, p3/z, [x1, #5, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4h {z7.h, z8.h, z9.h, z10.h}, p3/z, [x1, #5, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// error: restricted predicate has range [0, 7].
+
+ld4h {z2.h, z3.h, z4.h, z5.h}, p8/z, [x15, #10, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ld4h {z2.h, z3.h, z4.h, z5.h}, p8/z, [x15, #10, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ld4h { }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ld4h { }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h { z0.h, z1.h, z2.h, z3.h, z4.h }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h, z4.h }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h { z0.h, z1.h, z2.h, z3.s }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.s }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h { z0.h, z1.h, z3.h, z5.h }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: ld4h { z0.h, z1.h, z3.h, z5.h }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: ld4h { v0.8h, v1.8h, v2.8h }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/llvm/test/MC/AArch64/SVE/ld4h.s
+++ b/llvm/test/MC/AArch64/SVE/ld4h.s
@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+// CHECK-INST: ld4h    { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+// CHECK-ENCODING: [0x00,0xe0,0xe0,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 e0 e0 a4 <unknown>
+
+ld4h    { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4h    { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+// CHECK-ENCODING: [0xb7,0xed,0xe8,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 ed e8 a4 <unknown>
+
+ld4h    { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4h    { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x55,0xf5,0xe5,0xa4]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 55 f5 e5 a4 <unknown>
--- a/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s
@ -0,0 +1,67 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Immediate out of lower bound [-32, 28].
+
+ld4w {z12.s, z13.s, z14.s, z15.s}, p4/z, [x12, #-36, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4w {z12.s, z13.s, z14.s, z15.s}, p4/z, [x12, #-36, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w {z7.s, z8.s, z9.s, z10.s}, p3/z, [x1, #32, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4w {z7.s, z8.s, z9.s, z10.s}, p3/z, [x1, #32, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Immediate not a multiple of four.
+
+ld4w {z12.s, z13.s, z14.s, z15.s}, p4/z, [x12, #-7, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4w {z12.s, z13.s, z14.s, z15.s}, p4/z, [x12, #-7, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w {z7.s, z8.s, z9.s, z10.s}, p3/z, [x1, #5, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 3 in range [-32, 28].
+// CHECK-NEXT: ld4w {z7.s, z8.s, z9.s, z10.s}, p3/z, [x1, #5, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// error: restricted predicate has range [0, 7].
+
+ld4w {z2.s, z3.s, z4.s, z5.s}, p8/z, [x15, #10, MUL VL]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ld4w {z2.s, z3.s, z4.s, z5.s}, p8/z, [x15, #10, MUL VL]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ld4w { }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ld4w { }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w { z0.s, z1.s, z2.s, z3.s, z4.s }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s, z4.s }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w { z0.s, z1.s, z2.s, z3.d }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.d }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w { z0.s, z1.s, z3.s, z5.s }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: ld4w { z0.s, z1.s, z3.s, z5.s }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ld4w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand
+// CHECK-NEXT: ld4w { v0.4s, v1.4s, v2.4s }, p0/z, [x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
--- a/llvm/test/MC/AArch64/SVE/ld4w.s
+++ b/llvm/test/MC/AArch64/SVE/ld4w.s
@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+// CHECK-INST: ld4w    { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+// CHECK-ENCODING: [0x00,0xe0,0x60,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 00 e0 60 a5 <unknown>
+
+ld4w    { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+// CHECK-INST: ld4w    { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+// CHECK-ENCODING: [0xb7,0xed,0x68,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: b7 ed 68 a5 <unknown>
+
+ld4w    { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-INST: ld4w    { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+// CHECK-ENCODING: [0x55,0xf5,0x65,0xa5]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 55 f5 65 a5 <unknown>