llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp

287 lines
9.8 KiB
C++

//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
// is mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
#include "AMDGPUUtil.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
{
setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
computeRegisterProperties();
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setSchedulingPreference(Sched::VLIW);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const
{
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::TGID_X:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
break;
case AMDGPU::TGID_Y:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
break;
case AMDGPU::TGID_Z:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
break;
case AMDGPU::TIDIG_X:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
break;
case AMDGPU::TIDIG_Y:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
break;
case AMDGPU::TIDIG_Z:
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
break;
case AMDGPU::NGROUPS_X:
lowerImplicitParameter(MI, *BB, MRI, 0);
break;
case AMDGPU::NGROUPS_Y:
lowerImplicitParameter(MI, *BB, MRI, 1);
break;
case AMDGPU::NGROUPS_Z:
lowerImplicitParameter(MI, *BB, MRI, 2);
break;
case AMDGPU::GLOBAL_SIZE_X:
lowerImplicitParameter(MI, *BB, MRI, 3);
break;
case AMDGPU::GLOBAL_SIZE_Y:
lowerImplicitParameter(MI, *BB, MRI, 4);
break;
case AMDGPU::GLOBAL_SIZE_Z:
lowerImplicitParameter(MI, *BB, MRI, 5);
break;
case AMDGPU::LOCAL_SIZE_X:
lowerImplicitParameter(MI, *BB, MRI, 6);
break;
case AMDGPU::LOCAL_SIZE_Y:
lowerImplicitParameter(MI, *BB, MRI, 7);
break;
case AMDGPU::LOCAL_SIZE_Z:
lowerImplicitParameter(MI, *BB, MRI, 8);
break;
case AMDGPU::CLAMP_R600:
MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1));
break;
case AMDGPU::FABS_R600:
MI->getOperand(1).addTargetFlag(MO_FLAG_ABS);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1));
break;
case AMDGPU::FNEG_R600:
MI->getOperand(1).addTargetFlag(MO_FLAG_NEG);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1));
break;
case AMDGPU::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
break;
}
case AMDGPU::LOAD_INPUT:
{
int64_t RegIndex = MI->getOperand(1).getImm();
addLiveIn(MI, MF, MRI, TII,
AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
break;
}
case AMDGPU::MASK_WRITE:
{
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
def->addTargetFlag(MO_FLAG_MASK);
// Return early so the instruction is not erased
return BB;
}
case AMDGPU::RAT_WRITE_CACHELESS_eg:
{
// Convert to DWORD address
unsigned NewAddr = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32_XRegClass);
unsigned ShiftValue = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32RegClass);
// XXX In theory, we should be able to pass ShiftValue directly to
// the LSHR_eg instruction as an inline literal, but I tried doing it
// this way and it didn't produce the correct results.
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue)
.addReg(AMDGPU::ALU_LITERAL_X)
.addImm(2);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
.addOperand(MI->getOperand(1))
.addReg(ShiftValue);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addReg(NewAddr);
break;
}
case AMDGPU::STORE_OUTPUT:
{
int64_t OutputIndex = MI->getOperand(1).getImm();
unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg)
.addOperand(MI->getOperand(0));
if (!MRI.isLiveOut(OutputReg)) {
MRI.addLiveOut(OutputReg);
}
break;
}
case AMDGPU::RESERVE_REG:
{
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
int64_t ReservedIndex = MI->getOperand(0).getImm();
unsigned ReservedReg =
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
MFI->ReservedRegs.push_back(ReservedReg);
break;
}
case AMDGPU::TXD:
{
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::TXD_SHADOW:
{
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
}
MI->eraseFromParent();
return BB;
}
void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const
{
MachineBasicBlock::iterator I = *MI;
unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg)
.addReg(AMDGPU::ALU_LITERAL_X)
.addImm(dword_offset * 4);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
.addOperand(MI->getOperand(0))
.addReg(PtrReg)
.addImm(0);
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
}
}
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
Op.getOperand(0),
Op.getOperand(0),
DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(32, MVT::i32),
Op.getOperand(1)));
}