From 46a75436f811d0e6a2c76c669140a7e9471cd2a3 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Tue, 28 Apr 2020 13:54:14 -0700
Subject: [PATCH] [AMDGPU] Define special SGPR subregs

These are used in SReg_32 and when we start to use SGPR_LO16
there will be compaints that not all registers in RC support
all subreg indexes. For now it is NFC.

Unused regunits are reserved so that verifier does not complain
about missing phys reg live-ins.

Differential Revision: https://reviews.llvm.org/D78591
---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td |   2 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  19 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  10 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      | 175 ++++++++++--------
 llvm/test/CodeGen/AMDGPU/ipra-regmask.ll      |   4 +-
 ...ssert-dead-def-subreg-use-other-subreg.mir |   2 +-
 ...dleMoveUp-subreg-def-across-subreg-def.mir |   8 +-
 ...ubreg-undef-def-with-other-subreg-defs.mir |  12 +-
 9 files changed, 132 insertions(+), 102 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 4e1a7842a1ee..395085d9b459 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -765,7 +765,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         }
 
         if (AMDGPU::SReg_32RegClass.contains(Reg) ||
-            AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
+            AMDGPU::SReg_LO16RegClass.contains(Reg) ||
             AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
                  "trap handler registers should not be used");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index dfb13767bfe2..167ca393650b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 def SGPRRegBank : RegisterBank<"SGPR",
-  [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
+  [SReg_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
 >;
 
 def VGPRRegBank : RegisterBank<"VGPR",
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 60569dfbbe1d..efeb5ab4bd3e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -678,21 +678,20 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
-      RC == &AMDGPU::SGPR_LO16RegClass) {
+  if (RI.getRegSizeInBits(*RC) == 16) {
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
            AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
-           AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+           AMDGPU::SReg_LO16RegClass.contains(SrcReg));
 
-    bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
-    bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
-    bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
-                   RC == &AMDGPU::SGPR_LO16RegClass);
+    bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
+    bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
+    bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
+                  AMDGPU::SReg_LO16RegClass.contains(DestReg);
     bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
-                  AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
-    const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+                  AMDGPU::SReg_LO16RegClass.contains(SrcReg);
+    const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SReg_32RegClass
                                                  : &AMDGPU::VGPR_32RegClass;
-    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SReg_32RegClass
                                                  : &AMDGPU::VGPR_32RegClass;
     MCRegister NewDestReg =
       RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6f668b3b720a..a021a9132a41 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -263,6 +263,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     reserveRegisterTuples(Reserved, Reg);
   }
 
+  for (auto Reg : AMDGPU::SReg_32RegClass) {
+    Reserved.set(getSubReg(Reg, AMDGPU::hi16));
+    Register Low = getSubReg(Reg, AMDGPU::lo16);
+    // This is to prevent BB vcc liveness errors.
+    if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
+      Reserved.set(Low);
+  }
+
   // Reserve all the rest AGPRs if there are no instructions to use it.
   if (!ST.hasMAIInsts()) {
     for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
@@ -1365,7 +1373,7 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
   static const TargetRegisterClass *const BaseClasses[] = {
     &AMDGPU::VGPR_LO16RegClass,
     &AMDGPU::VGPR_HI16RegClass,
-    &AMDGPU::SGPR_LO16RegClass,
+    &AMDGPU::SReg_LO16RegClass,
     &AMDGPU::VGPR_32RegClass,
     &AMDGPU::SReg_32RegClass,
     &AMDGPU::AGPR_32RegClass,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f493f937430c..69018ac67b29 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -123,25 +123,41 @@ class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
 class SIReg <string n, bits<16> regIdx = 0> :
   Register<n> {
   let Namespace = "AMDGPU";
-
-  // This is the not yet the complete register encoding. An additional
-  // bit is set for VGPRs.
   let HWEncoding = regIdx;
 }
 
-class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx = 0> :
+class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
   RegisterWithSubRegs<n, subregs> {
-  let Namespace = "AMDGPU";
+}
 
-  // This is the not yet the complete register encoding. An additional
-  // bit is set for VGPRs.
-  let HWEncoding = regIdx;
-  let CoveredBySubRegs = 1;
+multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
+                        bit HWEncodingHigh = 0> {
+  // There is no special encoding for 16 bit subregs, these are not real
+  // registers but rather operands for instructions preserving other 16 bits
+  // of the result or reading just 16 bits of a 32 bit VGPR.
+  // It is encoded as a corresponding 32 bit register.
+  // Non-VGPR register classes use it as we need to have matching subregisters
+  // to move instructions and data between ALUs.
+  def _LO16 : SIReg<n#".l", regIdx> {
+    let HWEncoding{8} = HWEncodingHigh;
+  }
+  def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx> {
+    let isArtificial = ArtificialHigh;
+    let HWEncoding{8} = HWEncodingHigh;
+  }
+  def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
+                                   !cast<Register>(NAME#"_HI16")]> {
+    let Namespace = "AMDGPU";
+    let SubRegIndices = [lo16, hi16];
+    let CoveredBySubRegs = !if(ArtificialHigh,0,1);
+    let HWEncoding = regIdx;
+    let HWEncoding{8} = HWEncodingHigh;
+  }
 }
 
 // Special Registers
-def VCC_LO : SIReg<"vcc_lo", 106>;
-def VCC_HI : SIReg<"vcc_hi", 107>;
+defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>;
+defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>;
 
 // Pseudo-registers: Used as placeholders during isel and immediately
 // replaced, never seeing the verifier.
@@ -164,8 +180,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
   let HWEncoding = 106;
 }
 
-def EXEC_LO : SIReg<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
-def EXEC_HI : SIReg<"exec_hi", 127>;
+defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
+defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
 
 def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
   let Namespace = "AMDGPU";
@@ -175,22 +191,22 @@ def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]>
 
 // 32-bit real registers, for MC only.
 // May be used with both 32-bit and 64-bit operands.
-def SRC_VCCZ : SIReg<"src_vccz", 251>;
-def SRC_EXECZ : SIReg<"src_execz", 252>;
-def SRC_SCC : SIReg<"src_scc", 253>;
+defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>;
+defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>;
+defm SRC_SCC : SIRegLoHi16<"src_scc", 253>;
 
 // 1-bit pseudo register, for codegen only.
 // Should never be emitted.
 def SCC : SIReg<"scc">;
 
-def M0 : SIReg <"m0", 124>;
-def SGPR_NULL : SIReg<"null", 125>;
+defm M0 : SIRegLoHi16 <"m0", 124>;
+defm SGPR_NULL : SIRegLoHi16 <"null", 125>;
 
-def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
-def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
-def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
-def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
-def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
+defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>;
+defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>;
+defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>;
+defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>;
+defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
 
 def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
   // There is no physical register corresponding to this. This is an
@@ -199,8 +215,8 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
   let isArtificial = 1;
 }
 
-def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
-def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
+defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>;
+defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>;
 
 def XNACK_MASK :
     RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> {
@@ -210,8 +226,8 @@ def XNACK_MASK :
 }
 
 // Trap handler registers
-def TBA_LO : SIReg<"tba_lo", 108>;
-def TBA_HI : SIReg<"tba_hi", 109>;
+defm TBA_LO : SIRegLoHi16<"tba_lo", 108>;
+defm TBA_HI : SIRegLoHi16<"tba_hi", 109>;
 
 def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
   let Namespace = "AMDGPU";
@@ -219,8 +235,8 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
   let HWEncoding = 108;
 }
 
-def TMA_LO : SIReg<"tma_lo", 110>;
-def TMA_HI : SIReg<"tma_hi", 111>;
+defm TMA_LO : SIRegLoHi16<"tma_lo", 110>;
+defm TMA_HI : SIRegLoHi16<"tma_hi", 111>;
 
 def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
   let Namespace = "AMDGPU";
@@ -229,15 +245,15 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
 }
 
 foreach Index = 0-15 in {
-  def TTMP#Index#_vi         : SIReg<"ttmp"#Index, !add(112, Index)>;
-  def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
-  def TTMP#Index             : SIReg<"ttmp"#Index, 0>;
+  defm TTMP#Index#_vi         : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
+  defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
+  defm TTMP#Index             : SIRegLoHi16<"ttmp"#Index, 0>;
 }
 
 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
-  def _ci : SIReg<n, ci_e>;
-  def _vi : SIReg<n, vi_e>;
-  def "" : SIReg<n, 0>;
+  defm _ci : SIRegLoHi16<n, ci_e>;
+  defm _vi : SIRegLoHi16<n, vi_e>;
+  defm "" : SIRegLoHi16<n, 0>;
 }
 
 class FlatReg <Register lo, Register hi, bits<16> encoding> :
@@ -256,50 +272,17 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 
 // SGPR registers
 foreach Index = 0-105 in {
-  def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>,
-    DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
-                 !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
-
-  // This is a placeholder to fill high lane in mask.
-  def SGPR#Index#_HI16 : SIReg <"", Index> {
-    let isArtificial = 1;
-  }
-
-  def SGPR#Index :
-    SIRegWithSubRegs <"s"#Index, [!cast<Register>("SGPR"#Index#"_LO16"),
-                                  !cast<Register>("SGPR"#Index#"_HI16")],
-                      Index>,
-    DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
-                 !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> {
-    let SubRegIndices = [lo16, hi16];
-  }
+  defm SGPR#Index :
+     SIRegLoHi16 <"s"#Index, Index>,
+     DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
+                  !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
 }
 
 // VGPR registers
 foreach Index = 0-255 in {
-  // There is no special encoding for low 16 bit subreg, this not a real
-  // register but rather an operand for instructions preserving high 16 bits
-  // of the result or reading just low 16 bits of a 32 bit VGPR.
-  // It is encoded as a corresponding 32 bit register.
-  def VGPR#Index#_LO16 : SIReg <"v"#Index#".l", Index>,
-    DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
-    let HWEncoding{8} = 1;
-  }
-  // There is no special encoding for low 16 bit subreg, this not a real
-  // register but rather an operand for instructions preserving low 16 bits
-  // of the result or reading just high 16 bits of a 32 bit VGPR.
-  // It is encoded as a corresponding 32 bit register.
-  def VGPR#Index#_HI16 : SIReg <"v"#Index#".h", Index>,
-    DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
-    let HWEncoding{8} = 1;
-  }
-  def VGPR#Index : SIRegWithSubRegs <"v"#Index,
-    [!cast<Register>("VGPR"#Index#"_LO16"), !cast<Register>("VGPR"#Index#"_HI16")],
-    Index>,
-    DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
-    let HWEncoding{8} = 1;
-    let SubRegIndices = [lo16, hi16];
-  }
+  defm VGPR#Index :
+    SIRegLoHi16 <"v"#Index, Index, 0, 1>,
+    DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
 }
 
 // AccVGPR registers
@@ -325,11 +308,17 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
   let isAllocatable = 0;
 }
 
+def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
+  let CopyCost = 1;
+  let Size = 16;
+  let isAllocatable = 0;
+}
+
 // TODO: Do we need to set DwarfRegAlias on register tuples?
 
 def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
                               (add (sequence "SGPR%u_LO16", 0, 105))> {
-  let AllocationPriority = 1;
+  let AllocationPriority = 9;
   let Size = 16;
   let GeneratePressureSet = 0;
 }
@@ -380,6 +369,12 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
   let isAllocatable = 0;
 }
 
+def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+                              (add (sequence "TTMP%u_LO16", 0, 15))> {
+  let Size = 16;
+  let isAllocatable = 0;
+}
+
 // Trap handler TMP 64-bit registers
 def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
 
@@ -588,15 +583,43 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
   let AllocationPriority = 10;
 }
 
+def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
+  (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
+   XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
+   TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
+   SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
+   SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
+  let Size = 16;
+  let AllocationPriority = 10;
+}
+
 def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
   (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
   let AllocationPriority = 10;
 }
 
+def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16,
+  (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
+  let Size = 16;
+  let AllocationPriority = 10;
+}
+
 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
   (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
   let AllocationPriority = 10;
 }
+
+def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16,
+  (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
+  let Size = 16;
+  let AllocationPriority = 10;
+}
+
+def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+  (add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
+  let Size = 16;
+  let AllocationPriority = 10;
+}
 } // End GeneratePressureSet = 0
 
 // Register class for all scalar registers (SGPRs + Special Registers)
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
index d4084e40fc8f..cc3264af1046 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-regmask.ll
@@ -24,13 +24,13 @@ define void @nothing() #0 {
   ret void
 }
 
-; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}}
+; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 $m0_hi16 $m0_lo16 {{$}}
 define void @special_regs() #0 {
   call void asm sideeffect "", "~{m0},~{scc}"() #0
   ret void
 }
 
-; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}}
+; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo $vcc_hi_hi16 $vcc_hi_lo16 $vcc_lo_hi16 $vcc_lo_lo16 {{$}}
 define void @vcc() #0 {
   call void asm sideeffect "", "~{vcc}"() #0
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 443999bdea5f..39c446cc219e 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -33,7 +33,7 @@ body:             |
   ; CHECK:   dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
   ; CHECK:   undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[COPY1]], 851978 /* regdef:SReg_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
   ; CHECK:   %11.sub0:vreg_512 = COPY [[COPY]].sub0
   ; CHECK:   %11.sub3:vreg_512 = COPY [[COPY]].sub3
   ; CHECK:   dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index 3398e0f10b36..8514f56b8316 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -36,18 +36,18 @@ body:             |
   ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead %11
   ; CHECK:   GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; CHECK:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
-  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %15, 851978 /* regdef:SReg_LO16 */, def %16
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %21, 851978 /* regdef:SReg_LO16 */, def %22
   ; CHECK:   [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SReg_LO16 */, %15, 851977 /* reguse:SReg_LO16 */, %16, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_2]]
   ; CHECK:   %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
   ; CHECK:   DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
   ; CHECK:   DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
index 488bbfeefa10..e8c699faac50 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
@@ -25,9 +25,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1
@@ -63,9 +63,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1