forked from OSchip/llvm-project
[AMDGPU] copyPhysReg() for 16 bit SGPR subregs
Differential Revision: https://reviews.llvm.org/D78255
This commit is contained in:
parent
4623c2ffa4
commit
992fbce4e9
|
@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
|
|||
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, MCRegister DestReg,
|
||||
MCRegister SrcReg, bool KillSrc) {
|
||||
MCRegister SrcReg, bool KillSrc,
|
||||
const char *Msg = "illegal SGPR to VGPR copy") {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
|
||||
"illegal SGPR to VGPR copy",
|
||||
DL, DS_Error);
|
||||
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
|
||||
LLVMContext &C = MF->getFunction().getContext();
|
||||
C.diagnose(IllegalCopy);
|
||||
|
||||
|
@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
return;
|
||||
}
|
||||
|
||||
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
|
||||
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
|
||||
RC == &AMDGPU::SGPR_LO16RegClass) {
|
||||
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
|
||||
AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
|
||||
AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
|
||||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
|
||||
|
||||
bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
|
||||
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
|
||||
DestReg = RI.getMatchingSuperReg(DestReg,
|
||||
DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
SrcReg = RI.getMatchingSuperReg(SrcReg,
|
||||
SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
|
||||
&AMDGPU::VGPR_32RegClass);
|
||||
bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
|
||||
bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
|
||||
bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
|
||||
RC == &AMDGPU::SGPR_LO16RegClass);
|
||||
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
|
||||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
|
||||
const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
|
||||
: &AMDGPU::VGPR_32RegClass;
|
||||
const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
|
||||
: &AMDGPU::VGPR_32RegClass;
|
||||
MCRegister NewDestReg =
|
||||
RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
|
||||
DstRC);
|
||||
MCRegister NewSrcReg =
|
||||
RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
|
||||
SrcRC);
|
||||
|
||||
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
|
||||
if (IsSGPRDst) {
|
||||
if (!IsSGPRSrc) {
|
||||
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
|
||||
return;
|
||||
}
|
||||
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
|
||||
.addReg(NewSrcReg, getKillRegState(KillSrc));
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsSGPRSrc && !ST.hasSDWAScalar()) {
|
||||
if (!DstLow || !SrcLow) {
|
||||
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
|
||||
"Cannot use hi16 subreg on VI!");
|
||||
}
|
||||
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
|
||||
.addReg(NewSrcReg, getKillRegState(KillSrc));
|
||||
return;
|
||||
}
|
||||
|
||||
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
|
||||
.addImm(0) // src0_modifiers
|
||||
.addReg(SrcReg)
|
||||
.addReg(NewSrcReg)
|
||||
.addImm(0) // clamp
|
||||
.addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
|
||||
: AMDGPU::SDWA::SdwaSel::WORD_1)
|
||||
.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
|
||||
.addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
|
||||
: AMDGPU::SDWA::SdwaSel::WORD_1)
|
||||
.addReg(DestReg, RegState::Implicit | RegState::Undef);
|
||||
.addReg(NewDestReg, RegState::Implicit | RegState::Undef);
|
||||
// First implicit operand is $exec.
|
||||
MIB->tieOperands(0, MIB->getNumOperands() - 1);
|
||||
return;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
|
||||
# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
|
||||
# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
|
||||
# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
|
||||
|
||||
# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
|
||||
# GCN: ; illegal copy v0.l to s1.l
|
||||
# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
|
||||
name: lo_to_lo_illegal_vgpr_to_sgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$sgpr1_lo16 = COPY $vgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
|
||||
# GFX8: ; illegal copy s0.l to v1.h
|
||||
# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
|
||||
# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
|
||||
name: lo_to_hi_sgpr_to_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$vgpr1_hi16 = COPY killed $sgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
|
@ -193,3 +193,14 @@ body: |
|
|||
$vgpr2 = COPY killed $vgpr1
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
|
||||
# GCN: s_mov_b32 s1, s0
|
||||
name: lo_to_lo_sgpr_to_sgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1_lo16 = COPY $sgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
|
||||
|
||||
# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
|
||||
# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
|
||||
name: lo_to_lo_sgpr_to_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$vgpr1_lo16 = COPY $sgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
|
||||
# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
|
||||
name: lo_to_hi_sgpr_to_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$vgpr1_hi16 = COPY killed $sgpr0_lo16
|
||||
S_ENDPGM 0
|
||||
...
|
Loading…
Reference in New Issue