forked from OSchip/llvm-project
AMDGPU: Re-justify workaround and fix worked around problem
When buffer resource descriptors were built, the upper two components of the descriptor were first composed into a 64-bit register because legalizeOperands assumed all operands had the same register class. Fix that problem, but keep the workaround. I'm not sure anything actually is actually emitting such a REG_SEQUENCE now. If multiple resource descriptors are set up with different base pointers, this is copied with a single s_mov_b64. We probably should fix this better by recognizing a pair of s_mov_b32 later, but for now delete the dead code. llvm-svn: 248585
This commit is contained in:
parent
3ad55ec946
commit
2d6fdb8495
|
@ -2188,47 +2188,32 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
|
|||
SDLoc DL,
|
||||
SDValue Ptr) const {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
#if 1
|
||||
// XXX - Workaround for moveToVALU not handling different register class
|
||||
// inserts for REG_SEQUENCE.
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
|
||||
// Build the half of the subregister with the constants.
|
||||
const SDValue Ops0[] = {
|
||||
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, 0),
|
||||
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
|
||||
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
|
||||
};
|
||||
// Build the half of the subregister with the constants before building the
|
||||
// full 128-bit register. If we are building multiple resource descriptors,
|
||||
// this will allow CSEing of the 2-component register.
|
||||
const SDValue Ops0[] = {
|
||||
DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, 0),
|
||||
DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
|
||||
DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
|
||||
};
|
||||
|
||||
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
|
||||
MVT::v2i32, Ops0), 0);
|
||||
SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
|
||||
MVT::v2i32, Ops0), 0);
|
||||
|
||||
// Combine the constants and the pointer.
|
||||
const SDValue Ops1[] = {
|
||||
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
|
||||
Ptr,
|
||||
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
|
||||
SubRegHi,
|
||||
DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
|
||||
};
|
||||
// Combine the constants and the pointer.
|
||||
const SDValue Ops1[] = {
|
||||
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
|
||||
Ptr,
|
||||
DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
|
||||
SubRegHi,
|
||||
DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
|
||||
};
|
||||
|
||||
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
|
||||
#else
|
||||
const SDValue Ops[] = {
|
||||
DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
|
||||
Ptr,
|
||||
DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, 0),
|
||||
DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
|
||||
buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
|
||||
DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
|
||||
};
|
||||
|
||||
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
|
||||
|
||||
#endif
|
||||
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
|
||||
}
|
||||
|
||||
/// \brief Return a resource descriptor with the 'Add TID' bit enabled
|
||||
|
|
|
@ -1737,8 +1737,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||
// Legalize REG_SEQUENCE and PHI
|
||||
// The register class of the operands much be the same type as the register
|
||||
// class of the output.
|
||||
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
|
||||
MI->getOpcode() == AMDGPU::PHI) {
|
||||
if (MI->getOpcode() == AMDGPU::PHI) {
|
||||
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
|
||||
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
|
||||
if (!MI->getOperand(i).isReg() ||
|
||||
|
@ -1767,25 +1766,50 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||
}
|
||||
|
||||
// Update all the operands so they have the same type.
|
||||
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
|
||||
if (!MI->getOperand(i).isReg() ||
|
||||
!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
|
||||
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
|
||||
MachineOperand &Op = MI->getOperand(I);
|
||||
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
|
||||
continue;
|
||||
unsigned DstReg = MRI.createVirtualRegister(RC);
|
||||
MachineBasicBlock *InsertBB;
|
||||
MachineBasicBlock::iterator Insert;
|
||||
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
|
||||
InsertBB = MI->getParent();
|
||||
Insert = MI;
|
||||
} else {
|
||||
// MI is a PHI instruction.
|
||||
InsertBB = MI->getOperand(i + 1).getMBB();
|
||||
Insert = InsertBB->getFirstTerminator();
|
||||
|
||||
// MI is a PHI instruction.
|
||||
MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
|
||||
MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
|
||||
|
||||
BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
|
||||
.addOperand(Op);
|
||||
Op.setReg(DstReg);
|
||||
}
|
||||
}
|
||||
|
||||
// REG_SEQUENCE doesn't really require operand legalization, but if one has a
|
||||
// VGPR dest type and SGPR sources, insert copies so all operands are
|
||||
// VGPRs. This seems to help operand folding / the register coalescer.
|
||||
if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
|
||||
if (RI.hasVGPRs(DstRC)) {
|
||||
// Update all the operands so they are VGPR register classes. These may
|
||||
// not be the same register class because REG_SEQUENCE supports mixing
|
||||
// subregister index types e.g. sub0_sub1 + sub2 + sub3
|
||||
for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
|
||||
MachineOperand &Op = MI->getOperand(I);
|
||||
if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
|
||||
continue;
|
||||
|
||||
const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
|
||||
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
|
||||
if (VRC == OpRC)
|
||||
continue;
|
||||
|
||||
unsigned DstReg = MRI.createVirtualRegister(VRC);
|
||||
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
|
||||
.addOperand(Op);
|
||||
|
||||
Op.setReg(DstReg);
|
||||
Op.setIsKill();
|
||||
}
|
||||
BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
|
||||
get(AMDGPU::COPY), DstReg)
|
||||
.addOperand(MI->getOperand(i));
|
||||
MI->getOperand(i).setReg(DstReg);
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue