forked from OSchip/llvm-project
AMDGPU: Don't create intermediate SALU instructions
When splitting 64-bit operations, create the correct VALU instructions immediately. This was splitting things like s_or_b64 into the two s_or_b32s and then pushing the new instructions onto the worklist. There's no reason we need to do this intermediate step. llvm-svn: 246077
This commit is contained in:
parent
b85b4079f1
commit
f003c38e1e
|
@ -2195,22 +2195,22 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
case AMDGPU::S_AND_B64:
|
case AMDGPU::S_AND_B64:
|
||||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
|
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
|
||||||
Inst->eraseFromParent();
|
Inst->eraseFromParent();
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case AMDGPU::S_OR_B64:
|
case AMDGPU::S_OR_B64:
|
||||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
|
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
|
||||||
Inst->eraseFromParent();
|
Inst->eraseFromParent();
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case AMDGPU::S_XOR_B64:
|
case AMDGPU::S_XOR_B64:
|
||||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
|
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
|
||||||
Inst->eraseFromParent();
|
Inst->eraseFromParent();
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case AMDGPU::S_NOT_B64:
|
case AMDGPU::S_NOT_B64:
|
||||||
splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
|
splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
|
||||||
Inst->eraseFromParent();
|
Inst->eraseFromParent();
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -2347,13 +2347,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||||
// Legalize the operands
|
// Legalize the operands
|
||||||
legalizeOperands(Inst);
|
legalizeOperands(Inst);
|
||||||
|
|
||||||
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
|
addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
|
||||||
E = MRI.use_end(); I != E; ++I) {
|
|
||||||
MachineInstr &UseMI = *I->getParent();
|
|
||||||
if (!canReadVGPR(UseMI, I.getOperandNo())) {
|
|
||||||
Worklist.push_back(&UseMI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2395,20 +2389,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
|
||||||
AMDGPU::sub0, Src0SubRC);
|
AMDGPU::sub0, Src0SubRC);
|
||||||
|
|
||||||
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
||||||
const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
|
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
|
||||||
|
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
|
||||||
|
|
||||||
unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
|
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
|
||||||
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
||||||
.addOperand(SrcReg0Sub0);
|
.addOperand(SrcReg0Sub0);
|
||||||
|
|
||||||
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
|
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
|
||||||
AMDGPU::sub1, Src0SubRC);
|
AMDGPU::sub1, Src0SubRC);
|
||||||
|
|
||||||
unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
|
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
|
||||||
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
||||||
.addOperand(SrcReg0Sub1);
|
.addOperand(SrcReg0Sub1);
|
||||||
|
|
||||||
unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
|
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
|
||||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
||||||
.addReg(DestSub0)
|
.addReg(DestSub0)
|
||||||
.addImm(AMDGPU::sub0)
|
.addImm(AMDGPU::sub0)
|
||||||
|
@ -2417,10 +2412,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
|
||||||
|
|
||||||
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
|
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
|
||||||
|
|
||||||
// Try to legalize the operands in case we need to swap the order to keep it
|
// We don't need to legalizeOperands here because for a single operand, src0
|
||||||
// valid.
|
// will support any kind of input.
|
||||||
Worklist.push_back(LoHalf);
|
|
||||||
Worklist.push_back(HiHalf);
|
// Move all users of this moved value.
|
||||||
|
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIInstrInfo::splitScalar64BitBinaryOp(
|
void SIInstrInfo::splitScalar64BitBinaryOp(
|
||||||
|
@ -2455,9 +2451,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
|
||||||
AMDGPU::sub0, Src1SubRC);
|
AMDGPU::sub0, Src1SubRC);
|
||||||
|
|
||||||
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
||||||
const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
|
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
|
||||||
|
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
|
||||||
|
|
||||||
unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
|
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
|
||||||
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
||||||
.addOperand(SrcReg0Sub0)
|
.addOperand(SrcReg0Sub0)
|
||||||
.addOperand(SrcReg1Sub0);
|
.addOperand(SrcReg1Sub0);
|
||||||
|
@ -2467,12 +2464,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
|
||||||
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
|
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
|
||||||
AMDGPU::sub1, Src1SubRC);
|
AMDGPU::sub1, Src1SubRC);
|
||||||
|
|
||||||
unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
|
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
|
||||||
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
||||||
.addOperand(SrcReg0Sub1)
|
.addOperand(SrcReg0Sub1)
|
||||||
.addOperand(SrcReg1Sub1);
|
.addOperand(SrcReg1Sub1);
|
||||||
|
|
||||||
unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
|
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
|
||||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
||||||
.addReg(DestSub0)
|
.addReg(DestSub0)
|
||||||
.addImm(AMDGPU::sub0)
|
.addImm(AMDGPU::sub0)
|
||||||
|
@ -2483,8 +2480,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
|
||||||
|
|
||||||
// Try to legalize the operands in case we need to swap the order to keep it
|
// Try to legalize the operands in case we need to swap the order to keep it
|
||||||
// valid.
|
// valid.
|
||||||
Worklist.push_back(LoHalf);
|
legalizeOperands(LoHalf);
|
||||||
Worklist.push_back(HiHalf);
|
legalizeOperands(HiHalf);
|
||||||
|
|
||||||
|
// Move all users of this moved vlaue.
|
||||||
|
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
|
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||||
|
@ -2588,6 +2588,19 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||||
MRI.replaceRegWith(Dest.getReg(), ResultReg);
|
MRI.replaceRegWith(Dest.getReg(), ResultReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SIInstrInfo::addUsersToMoveToVALUWorklist(
|
||||||
|
unsigned DstReg,
|
||||||
|
MachineRegisterInfo &MRI,
|
||||||
|
SmallVectorImpl<MachineInstr *> &Worklist) const {
|
||||||
|
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
|
||||||
|
E = MRI.use_end(); I != E; ++I) {
|
||||||
|
MachineInstr &UseMI = *I->getParent();
|
||||||
|
if (!canReadVGPR(UseMI, I.getOperandNo())) {
|
||||||
|
Worklist.push_back(&UseMI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
|
unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
|
||||||
int OpIndices[3]) const {
|
int OpIndices[3]) const {
|
||||||
const MCInstrDesc &Desc = get(MI->getOpcode());
|
const MCInstrDesc &Desc = get(MI->getOpcode());
|
||||||
|
|
|
@ -58,6 +58,10 @@ private:
|
||||||
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
|
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||||
MachineInstr *Inst) const;
|
MachineInstr *Inst) const;
|
||||||
|
|
||||||
|
void addUsersToMoveToVALUWorklist(
|
||||||
|
unsigned Reg, MachineRegisterInfo &MRI,
|
||||||
|
SmallVectorImpl<MachineInstr *> &Worklist) const;
|
||||||
|
|
||||||
bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
|
bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
|
||||||
MachineInstr *MIb) const;
|
MachineInstr *MIb) const;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue