AMDGPU: Re-justify workaround and fix worked around problem

When buffer resource descriptors were built, the upper two components
of the descriptor were first composed into a 64-bit register because
legalizeOperands assumed all operands had the same register class.
Fix that problem, but keep the workaround. I'm not sure anything
actually is actually emitting such a REG_SEQUENCE now.

If multiple resource descriptors are set up with different base
pointers, this is copied with a single s_mov_b64. We probably
should fix this better by recognizing a pair of s_mov_b32 later,
but for now delete the dead code.

llvm-svn: 248585
This commit is contained in:
Matt Arsenault 2015-09-25 17:08:42 +00:00
parent 3ad55ec946
commit 2d6fdb8495
2 changed files with 64 additions and 55 deletions

View File

@ -2188,47 +2188,32 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
#if 1
// XXX - Workaround for moveToVALU not handling different register class
// inserts for REG_SEQUENCE.
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
// Build the half of the subregister with the constants.
const SDValue Ops0[] = {
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
// Build the half of the subregister with the constants before building the
// full 128-bit register. If we are building multiple resource descriptors,
// this will allow CSEing of the 2-component register.
const SDValue Ops0[] = {
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
};
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, Ops0), 0);
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::v2i32, Ops0), 0);
// Combine the constants and the pointer.
const SDValue Ops1[] = {
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
Ptr,
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
SubRegHi,
DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
};
// Combine the constants and the pointer.
const SDValue Ops1[] = {
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
Ptr,
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
SubRegHi,
DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
#else
const SDValue Ops[] = {
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
Ptr,
DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
buildSMovImm32(DAG, DL, 0),
DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
};
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
#endif
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
}
/// \brief Return a resource descriptor with the 'Add TID' bit enabled

View File

@ -1737,8 +1737,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
// Legalize REG_SEQUENCE and PHI
// The register class of the operands much be the same type as the register
// class of the output.
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
MI->getOpcode() == AMDGPU::PHI) {
if (MI->getOpcode() == AMDGPU::PHI) {
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
if (!MI->getOperand(i).isReg() ||
@ -1767,25 +1766,50 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
// Update all the operands so they have the same type.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
if (!MI->getOperand(i).isReg() ||
!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI->getOperand(I);
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
unsigned DstReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *InsertBB;
MachineBasicBlock::iterator Insert;
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
InsertBB = MI->getParent();
Insert = MI;
} else {
// MI is a PHI instruction.
InsertBB = MI->getOperand(i + 1).getMBB();
Insert = InsertBB->getFirstTerminator();
// MI is a PHI instruction.
MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
.addOperand(Op);
Op.setReg(DstReg);
}
}
// REG_SEQUENCE doesn't really require operand legalization, but if one has a
// VGPR dest type and SGPR sources, insert copies so all operands are
// VGPRs. This seems to help operand folding / the register coalescer.
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
MachineBasicBlock *MBB = MI->getParent();
const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
if (RI.hasVGPRs(DstRC)) {
// Update all the operands so they are VGPR register classes. These may
// not be the same register class because REG_SEQUENCE supports mixing
// subregister index types e.g. sub0_sub1 + sub2 + sub3
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
MachineOperand &Op = MI->getOperand(I);
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
if (VRC == OpRC)
continue;
unsigned DstReg = MRI.createVirtualRegister(VRC);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
.addOperand(Op);
Op.setReg(DstReg);
Op.setIsKill();
}
BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
get(AMDGPU::COPY), DstReg)
.addOperand(MI->getOperand(i));
MI->getOperand(i).setReg(DstReg);
}
return;