[AMDGPU] copyPhysReg() for 16 bit SGPR subregs

Differential Revision: https://reviews.llvm.org/D78255
This commit is contained in:
Stanislav Mekhanoshin 2020-04-15 16:16:13 -07:00
parent 4623c2ffa4
commit 992fbce4e9
4 changed files with 116 additions and 17 deletions

View File

@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) {
MCRegister SrcReg, bool KillSrc,
const char *Msg = "illegal SGPR to VGPR copy") {
MachineFunction *MF = MBB.getParent();
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
"illegal SGPR to VGPR copy",
DL, DS_Error);
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
LLVMContext &C = MF->getFunction().getContext();
C.diagnose(IllegalCopy);
@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
RC == &AMDGPU::SGPR_LO16RegClass) {
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
DestReg = RI.getMatchingSuperReg(DestReg,
DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
&AMDGPU::VGPR_32RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg,
SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
&AMDGPU::VGPR_32RegClass);
bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
RC == &AMDGPU::SGPR_LO16RegClass);
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
MCRegister NewDestReg =
RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
DstRC);
MCRegister NewSrcReg =
RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
SrcRC);
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
if (IsSGPRDst) {
if (!IsSGPRSrc) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
.addReg(NewSrcReg, getKillRegState(KillSrc));
return;
}
if (IsSGPRSrc && !ST.hasSDWAScalar()) {
if (!DstLow || !SrcLow) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
"Cannot use hi16 subreg on VI!");
}
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
.addReg(NewSrcReg, getKillRegState(KillSrc));
return;
}
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
.addImm(0) // src0_modifiers
.addReg(SrcReg)
.addReg(NewSrcReg)
.addImm(0) // clamp
.addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
: AMDGPU::SDWA::SdwaSel::WORD_1)
.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
.addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
: AMDGPU::SDWA::SdwaSel::WORD_1)
.addReg(DestReg, RegState::Implicit | RegState::Undef);
.addReg(NewDestReg, RegState::Implicit | RegState::Undef);
// First implicit operand is $exec.
MIB->tieOperands(0, MIB->getNumOperands() - 1);
return;

View File

@ -0,0 +1,31 @@
# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
# GCN: ; illegal copy v0.l to s1.l
# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
name: lo_to_lo_illegal_vgpr_to_sgpr
tracksRegLiveness: true
body: |
bb.0:
$vgpr0 = IMPLICIT_DEF
$sgpr1_lo16 = COPY $vgpr0_lo16
S_ENDPGM 0
...
# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
# GFX8: ; illegal copy s0.l to v1.h
# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
name: lo_to_hi_sgpr_to_vgpr
tracksRegLiveness: true
body: |
bb.0:
$sgpr0 = IMPLICIT_DEF
$vgpr1_hi16 = COPY killed $sgpr0_lo16
S_ENDPGM 0
...

View File

@ -193,3 +193,14 @@ body: |
$vgpr2 = COPY killed $vgpr1
S_ENDPGM 0
...
# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
# GCN: s_mov_b32 s1, s0
name: lo_to_lo_sgpr_to_sgpr
tracksRegLiveness: true
body: |
bb.0:
$sgpr0 = IMPLICIT_DEF
$sgpr1_lo16 = COPY $sgpr0_lo16
S_ENDPGM 0
...

View File

@ -0,0 +1,26 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
name: lo_to_lo_sgpr_to_vgpr
tracksRegLiveness: true
body: |
bb.0:
$sgpr0 = IMPLICIT_DEF
$vgpr1_lo16 = COPY $sgpr0_lo16
S_ENDPGM 0
...
# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
name: lo_to_hi_sgpr_to_vgpr
tracksRegLiveness: true
body: |
bb.0:
$sgpr0 = IMPLICIT_DEF
$vgpr1_hi16 = COPY killed $sgpr0_lo16
S_ENDPGM 0
...