[AArch64] Model Cortex-A55 Q register NEON instructions

Cortex-A55 has 2 64bit NEON vector units, meaning a 128bit instruction requires taking both units (and can only be issued as the first instruction in a dual issue pair). This patch models that by splitting the WriteV SchedWrite into two - the WriteVd that reads/writes only 64bit operands, and the WriteVq that read/writes 128bit registers. The A55 schedule then uses this distinction to model the WriteVq as taking both resource units, and starting a Schedule Group and WriteVd as taking one as before. I believe this is more correct, even if it does not lead to much better performance. Differential Revision: https://reviews.llvm.org/D108766
2021-09-29 16:55:31 +01:00 · 2021-09-29 16:55:31 +01:00 · e9adcbde31
parent 9b10e2b1cf
commit e9adcbde31
18 changed files with 1053 additions and 1024 deletions
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@ -5310,7 +5310,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
      "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -5333,7 +5333,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
      "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -5352,7 +5352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
  : Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
-    Sched<[WriteV]>;
+    Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>;
 multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
  def v8i8  : BaseSIMDThreeSameVectorPseudo<V64,
@ -5705,7 +5705,7 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
      "{\t$Rd" # dstkind # ", $Rn" # srckind #
      "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -5730,7 +5730,7 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
      "{\t$Rd" # dstkind # ", $Rn" # srckind #
      "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -5776,7 +5776,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
  : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
      "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
      "|" # dstkind # "\t$Rd, $Rn, #" #  amount # "}", "", []>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6041,7 +6041,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
  : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
      "{\t$Rd" # outkind # ", $Rn" # inkind #
      "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6064,7 +6064,7 @@ class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
  : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
      "{\t$Rd" # outkind # ", $Rn" # inkind #
      "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6116,7 +6116,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
      "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
      "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
      [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6214,7 +6214,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
                             list<dag> pattern>
  : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
      !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6236,7 +6236,7 @@ class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
                             list<dag> pattern>
  : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
      !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -6299,7 +6299,7 @@ class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
  : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
      "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
      "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6325,7 +6325,7 @@ class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
  : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
      "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
      "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6665,7 +6665,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
      "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
      [(set (vty regtype:$Rd),
            (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
-    Sched<[WriteV]> {
+    Sched<[!if(size, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6699,7 +6699,7 @@ class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
      "|" # kind # "\t$Rd, $Rn, $Rm}", "",
      [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
-    Sched<[WriteV]> {
+    Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6755,7 +6755,7 @@ class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
                        list<dag> pattern>
  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
      "\t$Rd, $Rn, $Rm", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6775,7 +6775,7 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
                        dag oops, dag iops, string asm,
            list<dag> pattern>
  : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6868,7 +6868,7 @@ class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
              dag oops, dag iops, string asm, string cstr, list<dag> pat>
  : I<oops, iops, asm,
      "\t$Rd, $Rn, $Rm", cstr, pat>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -6921,7 +6921,7 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                        string asm, list<dag> pat>
  : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
      "\t$Rd, $Rn", "", pat>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-30} = 0b01;
@ -6943,7 +6943,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
                        string asm, list<dag> pat>
  : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
      "\t$Rd, $Rn", "$Rd = $dst", pat>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-30} = 0b01;
@ -6963,7 +6963,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                        RegisterClass regtype, string asm, string zero>
  : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
      "\t$Rd, $Rn, #" # zero, "", []>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-30} = 0b01;
@ -6982,7 +6982,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
 class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
  : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
     [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-17} = 0b011111100110000;
@ -7104,7 +7104,7 @@ class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
                        string asm, string kind>
  : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
      "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-30} = 0b01;
@ -7144,7 +7144,7 @@ class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
                          string asm, string kind, list<dag> pattern>
  : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
      "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -7210,7 +7210,7 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
 class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
                     string operands, string constraints, list<dag> pattern>
  : I<outs, ins, asm, operands, constraints, pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31} = 0;
@ -7522,7 +7522,7 @@ class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
                          RegisterOperand listtype, string asm, string kind>
  : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
       "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Vd;
  bits<5> Vn;
  bits<5> Vm;
@ -7543,7 +7543,7 @@ class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectyp
                          RegisterOperand listtype, string asm, string kind>
  : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
       "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Vd;
  bits<5> Vn;
  bits<5> Vm;
@ -7662,7 +7662,7 @@ class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
  : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
       "{\t$dst, $src" # kind # "$idx" #
       "|\t$dst, $src$idx}", "", []>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> dst;
  bits<5> src;
  let Inst{31-21} = 0b01011110000;
@ -7727,7 +7727,7 @@ class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
                          string asm, string op_string,
                          string cstr, list<dag> pattern>
  : I<oops, iops, asm, op_string, cstr, pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<8> imm8;
  let Inst{31}    = 0;
@ -7897,7 +7897,7 @@ class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
      asm,
      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
      "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -7927,7 +7927,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
      (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
      "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -8020,7 +8020,7 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
                               (v8bf16
                                  (AArch64duplane16 (v8bf16 V128_lo:$Rm),
                                      VectorIndexH:$idx)))))]>,
-    Sched<[WriteV]> {
+    Sched<[WriteVq]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<4> Rm;
@ -8941,7 +8941,7 @@ class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
                     Operand immtype, string asm, list<dag> pattern>
  : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
      asm, "\t$Rd, $Rn, $imm", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<7> imm;
@ -8961,7 +8961,7 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
                     Operand immtype, string asm, list<dag> pattern>
  : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
      asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[WriteVd]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<7> imm;
@ -9125,7 +9125,7 @@ class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
  : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
      asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
           "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -9148,7 +9148,7 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
  : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
      asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
           "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31}    = 0;
@ -10695,7 +10695,7 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
  : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
      "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -10769,7 +10769,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
      (ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
      "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
      "|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -10845,7 +10845,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
      "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
      "$idx, $rot" # "|" # apple_kind #
      "\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
-    Sched<[WriteV]> {
+    Sched<[!if(Q, WriteVq, WriteVd)]> {
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -10910,7 +10910,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
              list<dag> pat>
  : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
-    Sched<[WriteV]>{
+    Sched<[WriteVq]>{
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-16} = 0b0100111000101000;
@ -10936,7 +10936,7 @@ class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
  : I<oops, iops, asm,
      "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
      "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
-    Sched<[WriteV]>{
+    Sched<[WriteVq]>{
  bits<5> Rd;
  bits<5> Rn;
  bits<5> Rm;
@ -10976,7 +10976,7 @@ class SHA2OpInst<bits<4> opc, string asm, string kind,
                 list<dag> pat>
  : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
                       "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
-    Sched<[WriteV]>{
+    Sched<[WriteVq]>{
  bits<5> Rd;
  bits<5> Rn;
  let Inst{31-16} = 0b0101111000101000;
@ -10999,7 +10999,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
 // Armv8.2-A Crypto extensions
 class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
                    list<dag> pattern>
-  : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
+  : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteVq]> {
  bits<5> Vd;
  bits<5> Vn;
  let Inst{31-25} = 0b1100111;
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@ -7006,9 +7006,9 @@ def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
 // for AES fusion on some CPUs.
 let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
 def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
-                        Sched<[WriteV]>;
+                        Sched<[WriteVq]>;
 def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
-                         Sched<[WriteV]>;
+                         Sched<[WriteVq]>;
 }
 // Only use constrained versions of AES(I)MC instructions if they are paired with
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@ -127,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@ -149,9 +149,11 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
 def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
 def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
 def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
 def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
 // FP ALU specific new schedwrite definitions
 def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
 def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
 def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
 def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
@ -331,6 +333,8 @@ def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16
 //---
 // Floating Point Conversions, MAC, DIV, SQRT
 //---
 def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
 def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
 def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
 def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@ -96,7 +96,8 @@ def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
 def : SchedAlias<WriteFImm,  A57Write_3cyc_1V>;
 def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
 def : SchedAlias<WriteFDiv,  A57Write_17cyc_1W>;
-def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
+def : SchedAlias<WriteVd,    A57Write_3cyc_1V>;
 def : SchedAlias<WriteVq,    A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
 def : SchedAlias<WriteVST,   A57Write_1cyc_1S>;
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@ -1627,7 +1627,11 @@ def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
 // ASIMD shift by register, basic, Q-form
 // ASIMD shift by register, complex, D-form
 // ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [A64FXGI03]> {
+def : WriteRes<WriteVd, [A64FXGI03]> {
  let Latency = 4;
  let ResourceCycles = [1];
 }
 def : WriteRes<WriteVq, [A64FXGI03]> {
  let Latency = 4;
  let ResourceCycles = [1];
 }
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@ -304,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
 // 7.9 Vector Unit Instructions
 // Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
 def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
 // Define some longer latency vector op types for Cyclone.
 def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@ -335,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
 // COPY is handled above in the WriteMov Variant.
 def WriteVMov    : SchedWriteVariant<[
                     SchedVar<WriteVMovPred, [WriteX]>,
-                     SchedVar<NoSchedPred,   [WriteV]>]>;
+                     SchedVar<NoSchedPred,   [WriteVq]>]>;
 def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
 // FMOVSr,FMOVDr are WriteF.
@ -355,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
 def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
 // INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
 def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
 // SMOV,UMOV R,V[x]
@ -571,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
 //---
 // FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
 // FCVT,FCVTN,FCVTXN
 // SCVTF,UCVTF V,V
@ -681,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
 def : InstRW<[WriteVLDShuffle, WriteAdr],
             (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Twov(8b|4h|2s)_POST$")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
             (instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
             (instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
             (instregex "LD2i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
             (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3Threev(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)$")>;
 def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
             (instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instregex "LD3i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
             (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instregex "LD4Fourv(8b|4h|2s)_POST")>;
 def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
@ -744,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
              WriteVLDPairShuffle, WriteVLDPairShuffle],
             (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
             (instrs LD4i64_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
             (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
             (instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
 //---
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@ -254,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
                                        let NumMicroOps = 1; }
 // ASIMD FP instructions.
-def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
 def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
 // Other miscellaneous instructions.
 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@ -558,7 +558,8 @@ def : SchedAlias<WriteVLD, M4WriteL5>;
 def : SchedAlias<WriteVST, M4WriteVST1>;
 // ASIMD FP instructions.
-def : SchedAlias<WriteV, M4WriteNALU1>;
+def : SchedAlias<WriteVd, M4WriteNALU1>;
 def : SchedAlias<WriteVq, M4WriteNALU1>;
 // Other miscellaneous instructions.
 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@ -594,7 +594,8 @@ def : SchedAlias<WriteVLD, M5WriteL6>;
 def : SchedAlias<WriteVST, M5WriteVST1>;
 // ASIMD FP instructions.
-def : SchedAlias<WriteV, M5WriteNALU1>;
+def : SchedAlias<WriteVd, M5WriteNALU1>;
 def : SchedAlias<WriteVq, M5WriteNALU1>;
 // Other miscellaneous instructions.
 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []>   { let Unsupported = 1; }
 def : WriteRes<WriteFImm, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFMul, []>    { let Unsupported = 1; }
 def : WriteRes<WriteFDiv, []>    { let Unsupported = 1; }
-def : WriteRes<WriteV, []>       { let Unsupported = 1; }
+def : WriteRes<WriteVd, []>      { let Unsupported = 1; }
 def : WriteRes<WriteVq, []>      { let Unsupported = 1; }
 def : WriteRes<WriteVLD, []>     { let Unsupported = 1; }
 def : WriteRes<WriteVST, []>     { let Unsupported = 1; }
 def : WriteRes<WriteSys, []>     { let Unsupported = 1; }
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@ -95,7 +95,8 @@ def : WriteRes<WriteFMul,  [KryoUnitX, KryoUnitX]>
      { let Latency = 6; let NumMicroOps = 2; }
 def : WriteRes<WriteFDiv,  [KryoUnitXA, KryoUnitY]>
      { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV,     [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVd,    [KryoUnitXY]> { let Latency = 6; }
 def : WriteRes<WriteVq,    [KryoUnitXY]> { let Latency = 6; }
 def : WriteRes<WriteVLD,   [KryoUnitLS]> { let Latency = 4; }
 def : WriteRes<WriteVST,   [KryoUnitLS]> { let Latency = 4; }
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@ -90,7 +90,8 @@ def : WriteRes<WriteFMul,  [TSV110UnitF]> { let Latency = 5; }
 // FP Div, Sqrt
 def : WriteRes<WriteFDiv,  [TSV110UnitFSU1]> { let Latency = 18; } 
-def : WriteRes<WriteV,     [TSV110UnitF]>     { let Latency = 4; }
+def : WriteRes<WriteVd,    [TSV110UnitF]>     { let Latency = 4; }
 def : WriteRes<WriteVq,    [TSV110UnitF]>     { let Latency = 4; }
 def : WriteRes<WriteVLD,   [TSV110UnitFLdSt]> { let Latency = 5; }
 def : WriteRes<WriteVST,   [TSV110UnitF]>     { let Latency = 1; }
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
 def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
 // FP Mul, Div, Sqrt
 def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@ -1250,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
 // ASIMD shift by register, basic, Q-form
 // ASIMD shift by register, complex, D-form
 // ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> {
+def : WriteRes<WriteVd, [THX2T99F01]> {
  let Latency = 7;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
 }
 def : WriteRes<WriteVq, [THX2T99F01]> {
  let Latency = 7;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@ -1357,7 +1357,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
 // ASIMD shift by register, basic, Q-form
 // ASIMD shift by register, complex, D-form
 // ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX3T110FP0123]> {
+def : WriteRes<WriteVd, [THX3T110FP0123]> {
  let Latency = 5;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
 }
 def : WriteRes<WriteVq, [THX3T110FP0123]> {
  let Latency = 5;
  let NumMicroOps = 4;
  let ResourceCycles = [4];
--- a/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/llvm/lib/Target/AArch64/AArch64Schedule.td
@ -77,7 +77,8 @@ def WriteFImm    : SchedWrite; // Floating-point immediate.
 def WriteFMul    : SchedWrite; // Floating-point multiply.
 def WriteFDiv    : SchedWrite; // Floating-point division.
-def WriteV   : SchedWrite; // Vector ops.
+def WriteVd  : SchedWrite; // 64bit Vector D ops.
 def WriteVq  : SchedWrite; // 128bit Vector Q ops.
 def WriteVLD : SchedWrite; // Vector loads.
 def WriteVST : SchedWrite; // Vector stores.
@ -87,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
 def ReadVLD : SchedRead;
 // Sequential vector load and shuffle.
-def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteV]>;
+def WriteVLDShuffle     : WriteSequence<[WriteVLD, WriteVq]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
 // Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
+def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s