diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aa165d4ce21e..afdeacc42910 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -555,48 +555,6 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
   return false;
 }
 
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
-static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
-                                          unsigned Size, unsigned Offset) {
-  switch (Size) {
-  case 32:
-    return TRI.getSubRegFromChannel(Offset / 32);
-  case 64: {
-    switch (Offset) {
-    case 0:
-      return AMDGPU::sub0_sub1;
-    case 32:
-      return AMDGPU::sub1_sub2;
-    case 64:
-      return AMDGPU::sub2_sub3;
-    case 96:
-      return AMDGPU::sub4_sub5;
-    case 128:
-      return AMDGPU::sub5_sub6;
-    case 160:
-      return AMDGPU::sub7_sub8;
-      // FIXME: Missing cases up to 1024 bits
-    default:
-      return AMDGPU::NoSubRegister;
-    }
-  }
-  case 96: {
-    switch (Offset) {
-    case 0:
-      return AMDGPU::sub0_sub1_sub2;
-    case 32:
-      return AMDGPU::sub1_sub2_sub3;
-    case 64:
-      return AMDGPU::sub2_sub3_sub4;
-    }
-  }
-  default:
-    return AMDGPU::NoSubRegister;
-  }
-}
-
 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
 
@@ -612,7 +570,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
   if (Offset % 32 != 0)
     return false;
 
-  unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+  unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
   if (SubReg == AMDGPU::NoSubRegister)
     return false;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 7cffdf1a4dcf..9806e6b0714f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
 // they are not supported at this time.
 //===----------------------------------------------------------------------===//
 
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
-  static const unsigned SubRegs[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
-    AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
-    AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
-    AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
-    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
-    AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
-    AMDGPU::sub30, AMDGPU::sub31
-  };
+// Table of NumRegs sized pieces at every 32-bit offset.
+static const uint16_t SubRegFromChannelTable[][32] = {
+  { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+    AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+    AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+    AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
+  },
+  {
+    AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
+    AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
+    AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
+    AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
+    AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
+    AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
+    AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
+    AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
+  },
+  {
+    AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
+    AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
+    AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
+    AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
+    AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
+    AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
+    AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
+    AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+  },
+  {
+    AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
+    AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
+    AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
+    AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
+    AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
+    AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
+    AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
+    AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+  }
+};
 
-  assert(Channel < array_lengthof(SubRegs));
-  return SubRegs[Channel];
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
+  const unsigned NumRegIndex = NumRegs - 1;
+
+  assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
+         "Not implemented");
+  assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
+  return SubRegFromChannelTable[NumRegIndex][Channel];
 }
 
 void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 3453a8c1b0b3..9e713ca804a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
 
   /// \returns the sub reg enum value for the given \p Channel
   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
-  static unsigned getSubRegFromChannel(unsigned Channel);
+  static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
 
   void reserveRegisterTuples(BitVector &, unsigned Reg) const;
 };
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
index 3cd1b463b579..c120c9617412 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
@@ -303,41 +303,46 @@ body: |
 
 ---
 
-name:            insert_s_s256_s_s64_96
+name:            insert_s_v256_v_s64_96
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9
+    ; CHECK-LABEL: name: insert_s_v256_v_s64_96
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s64) = COPY $vgpr8_vgpr9
+    %2:vgpr(s256) = G_INSERT %0, %1, 96
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s256_s_s64_128
 legalized:       true
 regBankSelected: true
 
 body: |
   bb.0:
     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
-    ; CHECK-LABEL: name: insert_s_s256_s_s64_96
+    ; CHECK-LABEL: name: insert_s_s256_s_s64_128
     ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
     ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5
     ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    %1:sgpr(s64) = COPY $sgpr8_sgpr9
-    %2:sgpr(s256) = G_INSERT %0, %1, 96
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s256) = G_INSERT %0, %1, 128
     S_ENDPGM 0, implicit %2
 ...
 
 # ---
 
-# name:            insert_s_s256_s_s64_128
-# legalized:       true
-# regBankSelected: true
-
-# body: |
-#   bb.0:
-#     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
-#     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-#     %1:sgpr(s64) = COPY $sgpr4_sgpr5
-#     %2:sgpr(s256) = G_INSERT %0, %1, 128
-#     S_ENDPGM 0, implicit %2
-# ...
-
-# ---
-
 # name:            insert_s_s256_s_s64_160
 # legalized:       true
 # regBankSelected: true
@@ -450,3 +455,108 @@ body: |
     %2:sgpr(s160) = G_INSERT %0, %1, 64
     S_ENDPGM 0, implicit %2
 ...
+
+---
+
+name:            insert_s_s256_s_s128_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11
+
+    ; CHECK-LABEL: name: insert_s_s256_s_s128_0
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+    %2:sgpr(s256) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_32
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_64
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_64
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 64
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_96
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_96
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 96
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_128
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_128
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 128
+    S_ENDPGM 0, implicit %2
+...