AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect

The constantness shouldn't change the register bank choice. We also
don't need to restrict this to only indexing VGPRs, since it's
possible to index SGPRs (but SelectionDAG made using this
difficult). Allow directly indexing SGPRs when appropriate.

llvm-svn: 356611
This commit is contained in:
Matt Arsenault 2019-03-20 20:41:34 +00:00
parent f6f4f84378
commit 2065206a9d
3 changed files with 165 additions and 131 deletions

View File

@ -52,24 +52,6 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
} }
static bool isConstant(const MachineOperand &MO, int64_t &C) {
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
if (!Def)
return false;
if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
C = Def->getOperand(1).getCImm()->getSExtValue();
return true;
}
if (Def->getOpcode() == AMDGPU::COPY)
return isConstant(Def->getOperand(1), C);
return false;
}
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src, const RegisterBank &Src,
unsigned Size) const { unsigned Size) const {
@ -816,42 +798,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_EXTRACT_VECTOR_ELT: { case AMDGPU::G_EXTRACT_VECTOR_ELT: {
unsigned IdxOp = 2; unsigned OutputBankID = isSALUMapping(MI) ?
int64_t Imm;
// XXX - Do we really need to fully handle these? The constant case should
// be legalized away before RegBankSelect?
unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()); OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
// The index can be either if the source vector is VGPR. // The index can be either if the source vector is VGPR.
OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()); OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
break; break;
} }
case AMDGPU::G_INSERT_VECTOR_ELT: { case AMDGPU::G_INSERT_VECTOR_ELT: {
// XXX - Do we really need to fully handle these? The constant case should unsigned OutputBankID = isSALUMapping(MI) ?
// be legalized away before RegBankSelect?
int64_t Imm;
unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
// TODO: Can do SGPR indexing, which would obviate the need for the // The index can be either if the source vector is VGPR.
// isConstant check. OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
}
break; break;
} }
case AMDGPU::G_UNMERGE_VALUES: { case AMDGPU::G_UNMERGE_VALUES: {

View File

@ -1,39 +1,76 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
--- ---
name: extract_vector_elt_0_v2i32_s name: extract_vector_elt_v16i32_ss
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0_sgpr1 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<2 x s32>) = COPY $sgpr0_sgpr1 %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = G_CONSTANT i32 0 %1:_(s32) = COPY $sgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2 $vgpr0 = COPY %2
... ...
--- ---
name: extract_vector_elt_0_v4i32_s name: extract_vector_elt_v16i32_sv
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32) ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%1:_(s32) = G_CONSTANT i32 0 %1:_(s32) = COPY $vgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
---
name: extract_vector_elt_v16i32_vs
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2
...
---
name: extract_vector_elt_v16i32_vv
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
%1:_(s32) = COPY $vgpr16
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0 = COPY %2 $vgpr0 = COPY %2
... ...

View File

@ -1,111 +1,111 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
--- ---
name: insert_vector_elt_v4i32_s_s_k name: insert_vector_elt_v4i32_s_s_s
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5 %1:_(s32) = COPY $sgpr4
%2:_(s32) = G_CONSTANT i32 0 %2:_(s32) = COPY $sgpr5
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
... ...
--- ---
name: insert_vector_elt_v4i32_v_s_k name: insert_vector_elt_v4i32_v_s_s
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1
; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k
; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr5 %1:_(s32) = COPY $sgpr0
%2:_(s32) = G_CONSTANT i32 0 %2:_(s32) = COPY $sgpr1
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
... ...
--- ---
name: insert_vector_elt_v4i32_s_v_k name: insert_vector_elt_v4i32_s_v_s
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr2
%2:_(s32) = G_CONSTANT i32 0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
--- ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s
name: insert_vector_elt_var_v4i32_s_s_s
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5 %1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr6 %2:_(s32) = COPY $sgpr4
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
... ...
--- ---
name: insert_vector_elt_var_v4i32_s_s_v name: insert_vector_elt_v4i32_s_s_v
legalized: true legalized: true
body: | body: |
bb.0: bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $sgpr5 %1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr6 %2:_(s32) = COPY $vgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_v4i32_s_v_v
legalized: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1
; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
... ...
@ -116,17 +116,38 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $sgpr5 %1:_(s32) = COPY $sgpr4
%2:_(s32) = COPY $vgpr6 %2:_(s32) = COPY $vgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
...
---
name: insert_vector_elt_var_v4i32_v_v_s
legalized: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr0
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
... ...
@ -137,16 +158,17 @@ legalized: true
body: | body: |
bb.0: bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = COPY $vgpr5 %1:_(s32) = COPY $vgpr4
%2:_(s32) = COPY $vgpr6 %2:_(s32) = COPY $vgpr5
%3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
... ...