AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect

The constantness shouldn't change the register bank choice. We also
don't need to restrict this to only indexing VGPRs, since it's
possible to index SGPRs (but SelectionDAG made using this
difficult). Allow directly indexing SGPRs when appropriate.

llvm-svn: 356611
This commit is contained in:
Matt Arsenault 2019-03-20 20:41:34 +00:00
parent f6f4f84378
commit 2065206a9d
3 changed files with 165 additions and 131 deletions

View File

@ -52,24 +52,6 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
}
static bool isConstant(const MachineOperand &MO, int64_t &C) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
if (!Def)
return false;
if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
C = Def->getOperand(1).getCImm()->getSExtValue();
return true;
}
if (Def->getOpcode() == AMDGPU::COPY)
return isConstant(Def->getOperand(1), C);
return false;
}
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
@ -816,42 +798,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_EXTRACT_VECTOR_ELT: {
unsigned IdxOp = 2;
int64_t Imm;
// XXX - Do we really need to fully handle these? The constant case should
// be legalized away before RegBankSelect?
unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
unsigned OutputBankID = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
// The index can be either if the source vector is VGPR.
OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
break;
}
case AMDGPU::G_INSERT_VECTOR_ELT: {
// XXX - Do we really need to fully handle these? The constant case should
// be legalized away before RegBankSelect?
unsigned OutputBankID = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
int64_t Imm;
unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
// TODO: Can do SGPR indexing, which would obviate the need for the
// isConstant check.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
}
unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
// The index can be either if the source vector is VGPR.
OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
break;
}
case AMDGPU::G_UNMERGE_VALUES: {

View File

@ -1,39 +1,76 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---
name: extract_vector_elt_0_v2i32_s
name: extract_vector_elt_v16i32_ss
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32)
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<2 x s32>) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 0
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $sgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
---
name: extract_vector_elt_0_v4i32_s
name: extract_vector_elt_v16i32_sv
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32)
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
---
name: extract_vector_elt_v16i32_vs
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
---
name: extract_vector_elt_v16i32_vv
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...

View File

@ -1,111 +1,111 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---
name: insert_vector_elt_v4i32_s_s_k
name: insert_vector_elt_v4i32_s_s_s
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5
%2:_(s32) = G_CONSTANT i32 0
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $sgpr5
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_v4i32_v_s_k
name: insert_vector_elt_v4i32_v_s_s
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5
; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1
; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr5
%2:_(s32) = G_CONSTANT i32 0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_v4i32_s_v_k
name: insert_vector_elt_v4i32_s_v_s
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr2
%2:_(s32) = G_CONSTANT i32 0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4
---
name: insert_vector_elt_var_v4i32_s_s_s
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5
%2:_(s32) = COPY $sgpr6
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr4
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_var_v4i32_s_s_v
name: insert_vector_elt_v4i32_s_s_v
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5
%2:_(s32) = COPY $vgpr6
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_v4i32_s_v_v
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
@ -116,17 +116,38 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr5
%2:_(s32) = COPY $vgpr6
%1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_var_v4i32_v_v_s
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
@ -137,16 +158,17 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr5
%2:_(s32) = COPY $vgpr6
%1:_(s32) = COPY $vgpr4
%2:_(s32) = COPY $vgpr5
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...