forked from OSchip/llvm-project
[AMDGPU] Do not use undef on indirect source
We are using undef on the indirect move source subreg and then using implicit super-reg. This creates a problem in RA when Greedy decides to split the register. It reassigns the implicit super-reg but does not bother to change undef source because it is really does not matter. The fix is to stop lying to RA and drop undef flag. This has also hit a problem in SIFoldOperands as it can fold immediate into an indirect move since there is no undef flag anymore. That results in multiple test failures, so added the check for this case. Differential Revision: https://reviews.llvm.org/D84899
This commit is contained in:
parent
41909e9682
commit
5b32518f96
|
@ -2542,7 +2542,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
|
|||
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
|
||||
.addReg(IdxReg);
|
||||
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
|
@ -2552,7 +2552,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
|
|||
.addReg(IdxReg)
|
||||
.addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
|
||||
BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit);
|
||||
BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
|
||||
|
|
|
@ -463,7 +463,18 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
|||
static bool isUseSafeToFold(const SIInstrInfo *TII,
|
||||
const MachineInstr &MI,
|
||||
const MachineOperand &UseMO) {
|
||||
return !UseMO.isUndef() && !TII->isSDWA(MI);
|
||||
if (UseMO.isUndef() || TII->isSDWA(MI))
|
||||
return false;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::V_MOV_B32_e32:
|
||||
case AMDGPU::V_MOV_B32_e64:
|
||||
case AMDGPU::V_MOV_B64_PSEUDO:
|
||||
// Do not fold into an indirect mov.
|
||||
return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0);
|
||||
}
|
||||
|
||||
return true;
|
||||
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
|
||||
}
|
||||
|
||||
|
|
|
@ -3658,13 +3658,13 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
|
|||
// to avoid interfering with other uses, so probably requires a new
|
||||
// optimization pass.
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit);
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
|
||||
} else {
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit);
|
||||
}
|
||||
|
||||
|
@ -3687,13 +3687,13 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
|
|||
|
||||
if (UseGPRIdxMode) {
|
||||
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit);
|
||||
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
|
||||
} else {
|
||||
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
|
||||
.addReg(SrcReg, RegState::Undef, SubReg)
|
||||
.addReg(SrcReg, 0, SubReg)
|
||||
.addReg(SrcReg, RegState::Implicit);
|
||||
}
|
||||
|
||||
|
|
|
@ -502,13 +502,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
|
@ -530,13 +530,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
|
@ -558,13 +558,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
|
@ -586,13 +586,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -614,13 +614,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
|
@ -642,13 +642,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
|
@ -670,13 +670,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -702,7 +702,7 @@ body: |
|
|||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -710,7 +710,7 @@ body: |
|
|||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
|
||||
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -734,13 +734,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; MOVREL: $m0 = COPY [[COPY1]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -766,7 +766,7 @@ body: |
|
|||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||
; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; MOVREL: $m0 = COPY [[S_ADD_I32_]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -774,7 +774,7 @@ body: |
|
|||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
||||
; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
@ -825,13 +825,13 @@ body: |
|
|||
; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; MOVREL: $m0 = COPY [[S_MOV_B32_]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
|
||||
; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
|
||||
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx
|
||||
; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
|
||||
; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
|
||||
%0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
|
|
@ -88,7 +88,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
|
|||
; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode
|
||||
; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
|
||||
; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
|
||||
; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
|
||||
; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
|
||||
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
|
||||
; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
|
||||
|
|
Loading…
Reference in New Issue