diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a54b628520cc..e03913bff5ab 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -249,63 +249,16 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, return; } - bool FoldingImm = OpToFold.isImm(); - APInt Imm; - - if (FoldingImm) { - unsigned UseReg = UseOp.getReg(); - const TargetRegisterClass *UseRC - = TargetRegisterInfo::isVirtualRegister(UseReg) ? - MRI.getRegClass(UseReg) : - TRI.getPhysRegClass(UseReg); - - Imm = APInt(64, OpToFold.getImm()); - - const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); - const TargetRegisterClass *FoldRC = - TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); - - // Split 64-bit constants into 32-bits for folding. - if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { - if (UseRC->getSize() != 8) - return; - - if (UseOp.getSubReg() == AMDGPU::sub0) { - Imm = Imm.getLoBits(32); - } else { - assert(UseOp.getSubReg() == AMDGPU::sub1); - Imm = Imm.getHiBits(32); - } - } - - // In order to fold immediates into copies, we need to change the - // copy to a MOV. - if (UseMI->getOpcode() == AMDGPU::COPY) { - unsigned DestReg = UseMI->getOperand(0).getReg(); - const TargetRegisterClass *DestRC - = TargetRegisterInfo::isVirtualRegister(DestReg) ? - MRI.getRegClass(DestReg) : - TRI.getPhysRegClass(DestReg); - - unsigned MovOp = TII->getMovOpcode(DestRC); - if (MovOp == AMDGPU::COPY) - return; - - UseMI->setDesc(TII->get(MovOp)); - CopiesToReplace.push_back(UseMI); - } - } - // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. - if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { + if (UseMI->isRegSequence()) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator - RSUse = MRI.use_begin(RegSeqDstReg), - RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { + RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end(); + RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) @@ -314,29 +267,74 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } + return; } - const MCInstrDesc &UseDesc = UseMI->getDesc(); - // Don't fold into target independent nodes. Target independent opcodes - // don't have defined register classes. - if (UseDesc.isVariadic() || - UseDesc.OpInfo[UseOpIdx].RegClass == -1) - return; + bool FoldingImm = OpToFold.isImm(); - if (FoldingImm) { - MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); - tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); + // In order to fold immediates into copies, we need to change the + // copy to a MOV. + if (FoldingImm && UseMI->isCopy()) { + unsigned DestReg = UseMI->getOperand(0).getReg(); + const TargetRegisterClass *DestRC + = TargetRegisterInfo::isVirtualRegister(DestReg) ? + MRI.getRegClass(DestReg) : + TRI.getPhysRegClass(DestReg); + + unsigned MovOp = TII->getMovOpcode(DestRC); + if (MovOp == AMDGPU::COPY) + return; + + UseMI->setDesc(TII->get(MovOp)); + CopiesToReplace.push_back(UseMI); + } else { + const MCInstrDesc &UseDesc = UseMI->getDesc(); + + // Don't fold into target independent nodes. Target independent opcodes + // don't have defined register classes. + if (UseDesc.isVariadic() || + UseDesc.OpInfo[UseOpIdx].RegClass == -1) + return; + } + + if (!FoldingImm) { + tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); + + // FIXME: We could try to change the instruction from 64-bit to 32-bit + // to enable more folding opportunites. The shrink operands pass + // already does this. return; } - tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); + APInt Imm(64, OpToFold.getImm()); - // FIXME: We could try to change the instruction from 64-bit to 32-bit - // to enable more folding opportunites. The shrink operands pass - // already does this. - return; + const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); + const TargetRegisterClass *FoldRC = + TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); + + // Split 64-bit constants into 32-bits for folding. + if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { + unsigned UseReg = UseOp.getReg(); + const TargetRegisterClass *UseRC + = TargetRegisterInfo::isVirtualRegister(UseReg) ? + MRI.getRegClass(UseReg) : + TRI.getPhysRegClass(UseReg); + + if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) + return; + + if (UseOp.getSubReg() == AMDGPU::sub0) { + Imm = Imm.getLoBits(32); + } else { + assert(UseOp.getSubReg() == AMDGPU::sub1); + Imm = Imm.getHiBits(32); + } + } + + MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); + tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); } static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,