forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix scc->vcc copy handling
This was checking the size of the register with the value of the size, which happens to be exec. Also fix assuming VCC is 64-bit to fix wave32. Also remove some untested handling for physical registers which is skipped. This doesn't insert the V_CNDMASK_B32 if SCC is the physical copy source. I'm not sure if this should be trying to handle this special case instead of dealing with this in copyPhysReg. llvm-svn: 364761
This commit is contained in:
parent
5dafcb9b11
commit
9f992c238a
llvm
lib/Target/AMDGPU
test/CodeGen/AMDGPU/GlobalISel
|
@ -60,11 +60,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
|
|||
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
|
||||
|
||||
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
|
||||
if (Reg == AMDGPU::SCC)
|
||||
return true;
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg))
|
||||
return false;
|
||||
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
|
||||
|
||||
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
|
||||
const TargetRegisterClass *RC =
|
||||
|
@ -77,6 +73,22 @@ static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
|
|||
return RB->getID() == AMDGPU::SCCRegBankID;
|
||||
}
|
||||
|
||||
static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
|
||||
const SIRegisterInfo &TRI) {
|
||||
assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
|
||||
|
||||
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
|
||||
const TargetRegisterClass *RC =
|
||||
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
|
||||
if (RC) {
|
||||
return RC == TRI.getWaveMaskRegClass() &&
|
||||
MRI.getType(Reg).getSizeInBits() == 1;
|
||||
}
|
||||
|
||||
const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
|
||||
return RB->getID() == AMDGPU::VCCRegBankID;
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
MachineFunction *MF = BB->getParent();
|
||||
|
@ -88,14 +100,12 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
|
|||
const MachineOperand &Src = I.getOperand(1);
|
||||
unsigned SrcReg = Src.getReg();
|
||||
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
|
||||
unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
|
||||
unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
|
||||
unsigned DstReg = I.getOperand(0).getReg();
|
||||
|
||||
// We have a copy from a 32-bit to 64-bit register. This happens
|
||||
// when we are selecting scc->vcc copies.
|
||||
if (DstSize == 64) {
|
||||
// Specially handle scc->vcc copies.
|
||||
if (isVCC(DstReg, MRI, TRI)) {
|
||||
const DebugLoc &DL = I.getDebugLoc();
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
|
||||
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
|
||||
.addImm(0)
|
||||
.addReg(SrcReg);
|
||||
if (!MRI.getRegClassOrNull(SrcReg))
|
||||
|
|
|
@ -1690,8 +1690,8 @@ SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
|
|||
case AMDGPU::VGPRRegBankID:
|
||||
return &AMDGPU::VGPR_32RegClass;
|
||||
case AMDGPU::VCCRegBankID:
|
||||
// TODO: Check wavesize
|
||||
return &AMDGPU::SReg_64_XEXECRegClass;
|
||||
return isWave32 ?
|
||||
&AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
|
||||
case AMDGPU::SGPRRegBankID:
|
||||
return &AMDGPU::SReg_32_XM0RegClass;
|
||||
case AMDGPU::SCCRegBankID:
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE64 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE32 %s
|
||||
|
||||
---
|
||||
|
||||
|
@ -11,11 +12,18 @@ regBankSelected: true
|
|||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr2_sgpr3
|
||||
; GCN-LABEL: name: copy
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
|
||||
; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
; WAVE64-LABEL: name: copy
|
||||
; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; WAVE32-LABEL: name: copy
|
||||
; WAVE32: $vcc_hi = IMPLICIT_DEF
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%0:sgpr(p1) = COPY $sgpr2_sgpr3
|
||||
%1:vgpr(p1) = COPY %0
|
||||
%2:vgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -23,21 +31,31 @@ body: |
|
|||
...
|
||||
---
|
||||
|
||||
name: copy_vcc_scc
|
||||
name: copy_vcc_bank_scc_bank
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
|
||||
; GCN-LABEL: name: copy_vcc_scc
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
; WAVE64-LABEL: name: copy_vcc_bank_scc_bank
|
||||
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_scc_bank
|
||||
; WAVE32: $vcc_hi = IMPLICIT_DEF
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -48,23 +66,35 @@ body: |
|
|||
...
|
||||
---
|
||||
|
||||
name: copy_vcc_scc_2_uses
|
||||
name: copy_vcc_bank_scc_bank_2_uses
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
|
||||
; GCN-LABEL: name: copy_vcc_scc_2_uses
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
; WAVE64-LABEL: name: copy_vcc_bank_scc_bank_2_uses
|
||||
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses
|
||||
; WAVE32: $vcc_hi = IMPLICIT_DEF
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
|
||||
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -75,4 +105,36 @@ body: |
|
|||
%7:vgpr(s32) = G_SELECT %6, %1, %5
|
||||
G_STORE %7, %0 :: (store 4, addrspace 1)
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: copy_vcc_bank_scc_physreg
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
|
||||
|
||||
; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg
|
||||
; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
|
||||
; WAVE32: $vcc_hi = IMPLICIT_DEF
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
|
||||
; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
%3:vcc(s1) = COPY $scc
|
||||
%5:vgpr(s32) = G_SELECT %3, %1, %2
|
||||
G_STORE %5, %0 :: (store 4, addrspace 1)
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue