forked from OSchip/llvm-project
AMDGPU: Cleanup immediate folding code
Move code down to use, reorder to avoid hard to follow immediate folding logic. llvm-svn: 287818
This commit is contained in:
parent
391c3ea9bc
commit
a24d84beb9
|
@ -249,63 +249,16 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FoldingImm = OpToFold.isImm();
|
|
||||||
APInt Imm;
|
|
||||||
|
|
||||||
if (FoldingImm) {
|
|
||||||
unsigned UseReg = UseOp.getReg();
|
|
||||||
const TargetRegisterClass *UseRC
|
|
||||||
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
|
|
||||||
MRI.getRegClass(UseReg) :
|
|
||||||
TRI.getPhysRegClass(UseReg);
|
|
||||||
|
|
||||||
Imm = APInt(64, OpToFold.getImm());
|
|
||||||
|
|
||||||
const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
|
|
||||||
const TargetRegisterClass *FoldRC =
|
|
||||||
TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
|
|
||||||
|
|
||||||
// Split 64-bit constants into 32-bits for folding.
|
|
||||||
if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
|
|
||||||
if (UseRC->getSize() != 8)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (UseOp.getSubReg() == AMDGPU::sub0) {
|
|
||||||
Imm = Imm.getLoBits(32);
|
|
||||||
} else {
|
|
||||||
assert(UseOp.getSubReg() == AMDGPU::sub1);
|
|
||||||
Imm = Imm.getHiBits(32);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// In order to fold immediates into copies, we need to change the
|
|
||||||
// copy to a MOV.
|
|
||||||
if (UseMI->getOpcode() == AMDGPU::COPY) {
|
|
||||||
unsigned DestReg = UseMI->getOperand(0).getReg();
|
|
||||||
const TargetRegisterClass *DestRC
|
|
||||||
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
|
|
||||||
MRI.getRegClass(DestReg) :
|
|
||||||
TRI.getPhysRegClass(DestReg);
|
|
||||||
|
|
||||||
unsigned MovOp = TII->getMovOpcode(DestRC);
|
|
||||||
if (MovOp == AMDGPU::COPY)
|
|
||||||
return;
|
|
||||||
|
|
||||||
UseMI->setDesc(TII->get(MovOp));
|
|
||||||
CopiesToReplace.push_back(UseMI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case for REG_SEQUENCE: We can't fold literals into
|
// Special case for REG_SEQUENCE: We can't fold literals into
|
||||||
// REG_SEQUENCE instructions, so we have to fold them into the
|
// REG_SEQUENCE instructions, so we have to fold them into the
|
||||||
// uses of REG_SEQUENCE.
|
// uses of REG_SEQUENCE.
|
||||||
if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
|
if (UseMI->isRegSequence()) {
|
||||||
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
|
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
|
||||||
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
|
||||||
|
|
||||||
for (MachineRegisterInfo::use_iterator
|
for (MachineRegisterInfo::use_iterator
|
||||||
RSUse = MRI.use_begin(RegSeqDstReg),
|
RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
|
||||||
RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
|
RSUse != RSE; ++RSUse) {
|
||||||
|
|
||||||
MachineInstr *RSUseMI = RSUse->getParent();
|
MachineInstr *RSUseMI = RSUse->getParent();
|
||||||
if (RSUse->getSubReg() != RegSeqDstSubReg)
|
if (RSUse->getSubReg() != RegSeqDstSubReg)
|
||||||
|
@ -314,29 +267,74 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
|
||||||
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
|
foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
|
||||||
CopiesToReplace, TII, TRI, MRI);
|
CopiesToReplace, TII, TRI, MRI);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const MCInstrDesc &UseDesc = UseMI->getDesc();
|
|
||||||
|
|
||||||
// Don't fold into target independent nodes. Target independent opcodes
|
bool FoldingImm = OpToFold.isImm();
|
||||||
// don't have defined register classes.
|
|
||||||
if (UseDesc.isVariadic() ||
|
|
||||||
UseDesc.OpInfo[UseOpIdx].RegClass == -1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (FoldingImm) {
|
// In order to fold immediates into copies, we need to change the
|
||||||
MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
|
// copy to a MOV.
|
||||||
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
|
if (FoldingImm && UseMI->isCopy()) {
|
||||||
|
unsigned DestReg = UseMI->getOperand(0).getReg();
|
||||||
|
const TargetRegisterClass *DestRC
|
||||||
|
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
|
||||||
|
MRI.getRegClass(DestReg) :
|
||||||
|
TRI.getPhysRegClass(DestReg);
|
||||||
|
|
||||||
|
unsigned MovOp = TII->getMovOpcode(DestRC);
|
||||||
|
if (MovOp == AMDGPU::COPY)
|
||||||
|
return;
|
||||||
|
|
||||||
|
UseMI->setDesc(TII->get(MovOp));
|
||||||
|
CopiesToReplace.push_back(UseMI);
|
||||||
|
} else {
|
||||||
|
const MCInstrDesc &UseDesc = UseMI->getDesc();
|
||||||
|
|
||||||
|
// Don't fold into target independent nodes. Target independent opcodes
|
||||||
|
// don't have defined register classes.
|
||||||
|
if (UseDesc.isVariadic() ||
|
||||||
|
UseDesc.OpInfo[UseOpIdx].RegClass == -1)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!FoldingImm) {
|
||||||
|
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
|
||||||
|
|
||||||
|
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
||||||
|
// to enable more folding opportunites. The shrink operands pass
|
||||||
|
// already does this.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
|
APInt Imm(64, OpToFold.getImm());
|
||||||
|
|
||||||
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
|
||||||
// to enable more folding opportunites. The shrink operands pass
|
const TargetRegisterClass *FoldRC =
|
||||||
// already does this.
|
TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
|
||||||
return;
|
|
||||||
|
// Split 64-bit constants into 32-bits for folding.
|
||||||
|
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
|
||||||
|
unsigned UseReg = UseOp.getReg();
|
||||||
|
const TargetRegisterClass *UseRC
|
||||||
|
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
|
||||||
|
MRI.getRegClass(UseReg) :
|
||||||
|
TRI.getPhysRegClass(UseReg);
|
||||||
|
|
||||||
|
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (UseOp.getSubReg() == AMDGPU::sub0) {
|
||||||
|
Imm = Imm.getLoBits(32);
|
||||||
|
} else {
|
||||||
|
assert(UseOp.getSubReg() == AMDGPU::sub1);
|
||||||
|
Imm = Imm.getHiBits(32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
|
||||||
|
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
|
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
|
||||||
|
|
Loading…
Reference in New Issue