AMDGPU: Don't create intermediate SALU instructions

When splitting 64-bit operations, create the correct
VALU instructions immediately.

This was splitting things like s_or_b64 into the two
s_or_b32s and then pushing the new instructions
onto the worklist. There's no reason we need
to do this intermediate step.

llvm-svn: 246077
This commit is contained in:
Matt Arsenault 2015-08-26 20:47:50 +00:00
parent b85b4079f1
commit f003c38e1e
2 changed files with 44 additions and 27 deletions

View File

@ -2195,22 +2195,22 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
continue; continue;
} }
case AMDGPU::S_AND_B64: case AMDGPU::S_AND_B64:
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Inst->eraseFromParent(); Inst->eraseFromParent();
continue; continue;
case AMDGPU::S_OR_B64: case AMDGPU::S_OR_B64:
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Inst->eraseFromParent(); Inst->eraseFromParent();
continue; continue;
case AMDGPU::S_XOR_B64: case AMDGPU::S_XOR_B64:
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Inst->eraseFromParent(); Inst->eraseFromParent();
continue; continue;
case AMDGPU::S_NOT_B64: case AMDGPU::S_NOT_B64:
splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Inst->eraseFromParent(); Inst->eraseFromParent();
continue; continue;
@ -2347,13 +2347,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// Legalize the operands // Legalize the operands
legalizeOperands(Inst); legalizeOperands(Inst);
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
E = MRI.use_end(); I != E; ++I) {
MachineInstr &UseMI = *I->getParent();
if (!canReadVGPR(UseMI, I.getOperandNo())) {
Worklist.push_back(&UseMI);
}
}
} }
} }
@ -2395,20 +2389,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
AMDGPU::sub0, Src0SubRC); AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(DestRC); unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0); .addOperand(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC); AMDGPU::sub1, Src0SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1); .addOperand(SrcReg0Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(DestRC); unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0) .addReg(DestSub0)
.addImm(AMDGPU::sub0) .addImm(AMDGPU::sub0)
@ -2417,10 +2412,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg); MRI.replaceRegWith(Dest.getReg(), FullDestReg);
// Try to legalize the operands in case we need to swap the order to keep it // We don't need to legalizeOperands here because for a single operand, src0
// valid. // will support any kind of input.
Worklist.push_back(LoHalf);
Worklist.push_back(HiHalf); // Move all users of this moved value.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
} }
void SIInstrInfo::splitScalar64BitBinaryOp( void SIInstrInfo::splitScalar64BitBinaryOp(
@ -2455,9 +2451,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src1SubRC); AMDGPU::sub0, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(DestRC); unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0) .addOperand(SrcReg0Sub0)
.addOperand(SrcReg1Sub0); .addOperand(SrcReg1Sub0);
@ -2467,12 +2464,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub1, Src1SubRC); AMDGPU::sub1, Src1SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1) .addOperand(SrcReg0Sub1)
.addOperand(SrcReg1Sub1); .addOperand(SrcReg1Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(DestRC); unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0) .addReg(DestSub0)
.addImm(AMDGPU::sub0) .addImm(AMDGPU::sub0)
@ -2483,8 +2480,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
// Try to legalize the operands in case we need to swap the order to keep it // Try to legalize the operands in case we need to swap the order to keep it
// valid. // valid.
Worklist.push_back(LoHalf); legalizeOperands(LoHalf);
Worklist.push_back(HiHalf); legalizeOperands(HiHalf);
// Move all users of this moved vlaue.
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
} }
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
@ -2588,6 +2588,19 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MRI.replaceRegWith(Dest.getReg(), ResultReg); MRI.replaceRegWith(Dest.getReg(), ResultReg);
} }
void SIInstrInfo::addUsersToMoveToVALUWorklist(
unsigned DstReg,
MachineRegisterInfo &MRI,
SmallVectorImpl<MachineInstr *> &Worklist) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
E = MRI.use_end(); I != E; ++I) {
MachineInstr &UseMI = *I->getParent();
if (!canReadVGPR(UseMI, I.getOperandNo())) {
Worklist.push_back(&UseMI);
}
}
}
unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
int OpIndices[3]) const { int OpIndices[3]) const {
const MCInstrDesc &Desc = get(MI->getOpcode()); const MCInstrDesc &Desc = get(MI->getOpcode());

View File

@ -58,6 +58,10 @@ private:
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst) const; MachineInstr *Inst) const;
void addUsersToMoveToVALUWorklist(
unsigned Reg, MachineRegisterInfo &MRI,
SmallVectorImpl<MachineInstr *> &Worklist) const;
bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa, bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
MachineInstr *MIb) const; MachineInstr *MIb) const;