forked from OSchip/llvm-project
R600: Support for indirect addressing v4
Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines llvm-svn: 174525
This commit is contained in:
parent
5ce9c5657c
commit
f3b2a1e8b3
|
@ -36,6 +36,7 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
|
|||
// Passes common to R600 and SI
|
||||
Pass *createAMDGPUStructurizeCFGPass();
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
||||
int LAO, unsigned TransAl)
|
||||
: TargetFrameLowering(D, StackAl, LAO, TransAl) { }
|
||||
|
||||
AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
|
||||
|
||||
unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
|
||||
|
||||
// XXX: Hardcoding to 1 for now.
|
||||
//
|
||||
// I think the StackWidth should stored as metadata associated with the
|
||||
// MachineFunction. This metadata can either be added by a frontend, or
|
||||
// calculated by a R600 specific LLVM IR pass.
|
||||
//
|
||||
// The StackWidth determines how stack objects are laid out in memory.
|
||||
// For a vector stack variable, like: int4 stack[2], the data will be stored
|
||||
// in the following ways depending on the StackWidth.
|
||||
//
|
||||
// StackWidth = 1:
|
||||
//
|
||||
// T0.X = stack[0].x
|
||||
// T1.X = stack[0].y
|
||||
// T2.X = stack[0].z
|
||||
// T3.X = stack[0].w
|
||||
// T4.X = stack[1].x
|
||||
// T5.X = stack[1].y
|
||||
// T6.X = stack[1].z
|
||||
// T7.X = stack[1].w
|
||||
//
|
||||
// StackWidth = 2:
|
||||
//
|
||||
// T0.X = stack[0].x
|
||||
// T0.Y = stack[0].y
|
||||
// T1.X = stack[0].z
|
||||
// T1.Y = stack[0].w
|
||||
// T2.X = stack[1].x
|
||||
// T2.Y = stack[1].y
|
||||
// T3.X = stack[1].z
|
||||
// T3.Y = stack[1].w
|
||||
//
|
||||
// StackWidth = 4:
|
||||
// T0.X = stack[0].x
|
||||
// T0.Y = stack[0].y
|
||||
// T0.Z = stack[0].z
|
||||
// T0.W = stack[0].w
|
||||
// T1.X = stack[1].x
|
||||
// T1.Y = stack[1].y
|
||||
// T1.Z = stack[1].z
|
||||
// T1.W = stack[1].w
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// \returns The number of registers allocated for \p FI.
|
||||
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
unsigned Offset = 0;
|
||||
int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
|
||||
|
||||
for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
|
||||
const AllocaInst *Alloca = MFI->getObjectAllocation(i);
|
||||
unsigned ArrayElements;
|
||||
const Type *AllocaType = Alloca->getAllocatedType();
|
||||
const Type *ElementType;
|
||||
|
||||
if (AllocaType->isArrayTy()) {
|
||||
ArrayElements = AllocaType->getArrayNumElements();
|
||||
ElementType = AllocaType->getArrayElementType();
|
||||
} else {
|
||||
ArrayElements = 1;
|
||||
ElementType = AllocaType;
|
||||
}
|
||||
|
||||
unsigned VectorElements;
|
||||
if (ElementType->isVectorTy()) {
|
||||
VectorElements = ElementType->getVectorNumElements();
|
||||
} else {
|
||||
VectorElements = 1;
|
||||
}
|
||||
|
||||
Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
|
||||
}
|
||||
return Offset;
|
||||
}
|
||||
|
||||
const TargetFrameLowering::SpillSlot *
|
||||
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
|
||||
NumEntries = 0;
|
||||
return 0;
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return false;
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
|
||||
//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -30,6 +30,10 @@ public:
|
|||
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
|
||||
unsigned TransAl = 1);
|
||||
virtual ~AMDGPUFrameLowering();
|
||||
|
||||
/// \returns The number of 32-bit sub-registers that are used when storing
|
||||
/// values to the stack.
|
||||
virtual unsigned getStackWidth(const MachineFunction &MF) const;
|
||||
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
|
||||
virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
|
||||
virtual void emitPrologue(MachineFunction &MF) const;
|
|
@ -412,5 +412,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(URECIP)
|
||||
NODE_NAME_CASE(EXPORT)
|
||||
NODE_NAME_CASE(CONST_ADDRESS)
|
||||
NODE_NAME_CASE(REGISTER_LOAD)
|
||||
NODE_NAME_CASE(REGISTER_STORE)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,6 +122,8 @@ enum {
|
|||
URECIP,
|
||||
EXPORT,
|
||||
CONST_ADDRESS,
|
||||
REGISTER_LOAD,
|
||||
REGISTER_STORE,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,326 @@
|
|||
//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Instructions can use indirect addressing to index the register file as if it
|
||||
/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
|
||||
/// to either a COPY or a MOV that uses indirect addressing.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const AMDGPUInstrInfo *TII;
|
||||
|
||||
bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
|
||||
|
||||
public:
|
||||
AMDGPUIndirectAddressingPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID),
|
||||
TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
|
||||
{ }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const { return "R600 Handle indirect addressing"; }
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char AMDGPUIndirectAddressingPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
|
||||
return new AMDGPUIndirectAddressingPass(tm);
|
||||
}
|
||||
|
||||
bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
int IndirectBegin = TII->getIndirectIndexBegin(MF);
|
||||
int IndirectEnd = TII->getIndirectIndexEnd(MF);
|
||||
|
||||
if (IndirectBegin == -1) {
|
||||
// No indirect addressing, we can skip this pass
|
||||
assert(IndirectEnd == -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// The map keeps track of the indirect address that is represented by
|
||||
// each virtual register. The key is the register and the value is the
|
||||
// indirect address it uses.
|
||||
std::map<unsigned, unsigned> RegisterAddressMap;
|
||||
|
||||
// First pass - Lower all of the RegisterStore instructions and track which
|
||||
// registers are live.
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
// This map keeps track of the current live indirect registers.
|
||||
// The key is the address and the value is the register
|
||||
std::map<unsigned, unsigned> LiveAddressRegisterMap;
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next) {
|
||||
Next = llvm::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
if (!TII->isRegisterStore(MI)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lower RegisterStore
|
||||
|
||||
unsigned RegIndex = MI.getOperand(2).getImm();
|
||||
unsigned Channel = MI.getOperand(3).getImm();
|
||||
unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
|
||||
const TargetRegisterClass *IndirectStoreRegClass =
|
||||
TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
|
||||
|
||||
if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
// Direct register access.
|
||||
unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
|
||||
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
|
||||
.addOperand(MI.getOperand(0));
|
||||
|
||||
RegisterAddressMap[DstReg] = Address;
|
||||
LiveAddressRegisterMap[Address] = DstReg;
|
||||
} else {
|
||||
// Indirect register access.
|
||||
MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
|
||||
MI.getOperand(0).getReg(), // Value
|
||||
Address,
|
||||
MI.getOperand(1).getReg()); // Offset
|
||||
for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
|
||||
unsigned Addr = TII->calculateIndirectAddress(i, Channel);
|
||||
unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
|
||||
MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
|
||||
RegisterAddressMap[DstReg] = Addr;
|
||||
LiveAddressRegisterMap[Addr] = DstReg;
|
||||
}
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
// Update the live-ins of the succesor blocks
|
||||
for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
|
||||
SuccEnd = MBB.succ_end();
|
||||
SuccEnd != Succ; ++Succ) {
|
||||
std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
|
||||
for (Key = LiveAddressRegisterMap.begin(),
|
||||
KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
|
||||
(*Succ)->addLiveIn(Key->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass - Lower the RegisterLoad instructions
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
// Key is the address and the value is the register
|
||||
std::map<unsigned, unsigned> LiveAddressRegisterMap;
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
|
||||
MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
|
||||
while (LI != MBB.livein_end()) {
|
||||
std::vector<unsigned> PhiRegisters;
|
||||
|
||||
// Make sure this live in is used for indirect addressing
|
||||
if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
|
||||
++LI;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned Address = RegisterAddressMap[*LI];
|
||||
LiveAddressRegisterMap[Address] = *LI;
|
||||
PhiRegisters.push_back(*LI);
|
||||
|
||||
// Check if there are other live in registers which map to the same
|
||||
// indirect address.
|
||||
for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
|
||||
LE = MBB.livein_end();
|
||||
LJ != LE; ++LJ) {
|
||||
unsigned Reg = *LJ;
|
||||
if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RegisterAddressMap[Reg] == Address) {
|
||||
if (!regHasExplicitDef(MRI, Reg)) {
|
||||
continue;
|
||||
}
|
||||
PhiRegisters.push_back(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
if (PhiRegisters.size() == 1) {
|
||||
// We don't need to insert a Phi instruction, so we can just add the
|
||||
// registers to the live list for the block.
|
||||
LiveAddressRegisterMap[Address] = *LI;
|
||||
MBB.removeLiveIn(*LI);
|
||||
} else {
|
||||
// We need to insert a PHI, because we have the same address being
|
||||
// written in multiple predecessor blocks.
|
||||
const TargetRegisterClass *PhiDstClass =
|
||||
TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
|
||||
unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
|
||||
MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
|
||||
MBB.findDebugLoc(MBB.begin()),
|
||||
TII->get(AMDGPU::PHI), PhiDstReg);
|
||||
|
||||
for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
|
||||
RE = PhiRegisters.end();
|
||||
RI != RE; ++RI) {
|
||||
unsigned Reg = *RI;
|
||||
MachineInstr *DefInst = MRI.getVRegDef(Reg);
|
||||
assert(DefInst);
|
||||
MachineBasicBlock *RegBlock = DefInst->getParent();
|
||||
Phi.addReg(Reg);
|
||||
Phi.addMBB(RegBlock);
|
||||
MBB.removeLiveIn(Reg);
|
||||
}
|
||||
RegisterAddressMap[PhiDstReg] = Address;
|
||||
LiveAddressRegisterMap[Address] = PhiDstReg;
|
||||
}
|
||||
LI = MBB.livein_begin();
|
||||
}
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next) {
|
||||
Next = llvm::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
if (!TII->isRegisterLoad(MI)) {
|
||||
if (MI.getOpcode() == AMDGPU::PHI) {
|
||||
continue;
|
||||
}
|
||||
// Check for indirect register defs
|
||||
for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
|
||||
OpIdx < NumOperands; ++OpIdx) {
|
||||
MachineOperand &MO = MI.getOperand(OpIdx);
|
||||
if (MO.isReg() && MO.isDef() &&
|
||||
RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
unsigned LiveAddress = RegisterAddressMap[Reg];
|
||||
// Chain the live-ins
|
||||
if (LiveAddressRegisterMap.find(LiveAddress) !=
|
||||
RegisterAddressMap.end()) {
|
||||
MI.addOperand(MachineOperand::CreateReg(
|
||||
LiveAddressRegisterMap[LiveAddress],
|
||||
false, // isDef
|
||||
true, // isImp
|
||||
true)); // isKill
|
||||
}
|
||||
LiveAddressRegisterMap[LiveAddress] = Reg;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SuperIndirectRegClass =
|
||||
TII->getSuperIndirectRegClass();
|
||||
const TargetRegisterClass *IndirectLoadRegClass =
|
||||
TII->getIndirectAddrLoadRegClass();
|
||||
unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
|
||||
|
||||
unsigned RegIndex = MI.getOperand(2).getImm();
|
||||
unsigned Channel = MI.getOperand(3).getImm();
|
||||
unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
|
||||
|
||||
if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
// Direct register access
|
||||
unsigned Reg = LiveAddressRegisterMap[Address];
|
||||
unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
|
||||
|
||||
if (regHasExplicitDef(MRI, Reg)) {
|
||||
// If the register we are reading from has an explicit def, then that
|
||||
// means it was written via a direct register access (i.e. COPY
|
||||
// or other instruction that doesn't use indirect addressing). In
|
||||
// this case we know where the value has been stored, so we can just
|
||||
// issue a copy.
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
|
||||
MI.getOperand(0).getReg())
|
||||
.addReg(Reg);
|
||||
} else {
|
||||
// If the register we are reading has an implicit def, then that
|
||||
// means it was written by an indirect register access (i.e. An
|
||||
// instruction that uses indirect addressing.
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
|
||||
MI.getOperand(0).getReg())
|
||||
.addReg(AddrReg);
|
||||
}
|
||||
} else {
|
||||
// Indirect register access
|
||||
|
||||
// Note on REQ_SEQUENCE instructons: You can't actually use the register
|
||||
// it defines unless you have an instruction that takes the defined
|
||||
// register class as an operand.
|
||||
|
||||
MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
|
||||
TII->get(AMDGPU::REG_SEQUENCE),
|
||||
IndirectReg);
|
||||
for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
|
||||
unsigned Addr = TII->calculateIndirectAddress(i, Channel);
|
||||
if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
|
||||
continue;
|
||||
}
|
||||
unsigned Reg = LiveAddressRegisterMap[Addr];
|
||||
|
||||
// We only need to use REG_SEQUENCE for explicit defs, since the
|
||||
// register coalescer won't do anything with the implicit defs.
|
||||
MachineInstr *DefInstr = MRI.getVRegDef(Reg);
|
||||
if (!DefInstr->getOperand(0).isReg() ||
|
||||
DefInstr->getOperand(0).getReg() != Reg) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Insert a REQ_SEQUENCE instruction to force the register allocator
|
||||
// to allocate the virtual register to the correct physical register.
|
||||
Sequence.addReg(LiveAddressRegisterMap[Addr]);
|
||||
Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
|
||||
}
|
||||
MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
|
||||
MI.getOperand(0).getReg(), // Value
|
||||
Address,
|
||||
MI.getOperand(1).getReg()); // Offset
|
||||
|
||||
|
||||
|
||||
Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
|
||||
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
|
||||
unsigned Reg) const {
|
||||
MachineInstr *DefInstr = MRI.getVRegDef(Reg);
|
||||
return DefInstr && DefInstr->getOperand(0).isReg() &&
|
||||
DefInstr->getOperand(0).getReg() == Reg;
|
||||
}
|
|
@ -235,6 +235,15 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
|
||||
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
|
||||
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
|
||||
}
|
||||
|
||||
|
||||
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const {
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
|
|
@ -40,9 +40,10 @@ class MachineInstrBuilder;
|
|||
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
|
||||
private:
|
||||
const AMDGPURegisterInfo RI;
|
||||
TargetMachine &TM;
|
||||
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||
MachineBasicBlock &MBB) const;
|
||||
protected:
|
||||
TargetMachine &TM;
|
||||
public:
|
||||
explicit AMDGPUInstrInfo(TargetMachine &tm);
|
||||
|
||||
|
@ -130,12 +131,66 @@ public:
|
|||
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isStoreInst(llvm::MachineInstr *MI) const;
|
||||
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
|
||||
bool isRegisterStore(const MachineInstr &MI) const;
|
||||
bool isRegisterLoad(const MachineInstr &MI) const;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Pure virtual funtions to be implemented by sub-classes.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const = 0;
|
||||
virtual unsigned getIEQOpcode() const = 0;
|
||||
virtual bool isMov(unsigned opcode) const = 0;
|
||||
|
||||
/// \returns the smallest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
|
||||
|
||||
/// \returns the largest register index that will be accessed by an indirect
|
||||
/// read or write or -1 if indirect addressing is not used by this program.
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
|
||||
|
||||
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
|
||||
/// \p Channel
|
||||
///
|
||||
/// We model indirect addressing using a virtual address space that can be
|
||||
/// accesed with loads and stores. The "Indirect Address" is the memory
|
||||
/// address in this virtual address space that maps to the given \p RegIndex
|
||||
/// and \p Channel.
|
||||
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const = 0;
|
||||
|
||||
/// \returns The register class to be used for storing values to an
|
||||
/// "Indirect Address" .
|
||||
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
|
||||
unsigned SourceReg) const = 0;
|
||||
|
||||
/// \returns The register class to be used for loading values from
|
||||
/// an "Indirect Address" .
|
||||
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
|
||||
|
||||
/// \brief Build instruction(s) for an indirect register write.
|
||||
///
|
||||
/// \returns The instruction that performs the indirect register write
|
||||
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const = 0;
|
||||
|
||||
/// \brief Build instruction(s) for an indirect register read.
|
||||
///
|
||||
/// \returns The instruction that performs the indirect register read
|
||||
virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const = 0;
|
||||
|
||||
/// \returns the register class whose sub registers are the set of all
|
||||
/// possible registers that can be used for indirect addressing.
|
||||
virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
|
||||
|
||||
|
||||
/// \brief Convert the AMDIL MachineInstr to a supported ISA
|
||||
/// MachineInstr
|
||||
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
|
@ -145,4 +200,7 @@ public:
|
|||
|
||||
} // End llvm namespace
|
||||
|
||||
#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
|
||||
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
|
||||
|
||||
#endif // AMDGPUINSTRINFO_H
|
||||
|
|
|
@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
|
|||
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
|
||||
|
||||
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
|
||||
|
||||
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
|
||||
SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
|
||||
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
|
||||
field bits<16> AMDILOp = 0;
|
||||
field bits<3> Gen = 0;
|
||||
field bit isRegisterLoad = 0;
|
||||
field bit isRegisterStore = 0;
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
let OutOperandList = outs;
|
||||
|
@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
|
|||
let AsmString = asm;
|
||||
let Pattern = pattern;
|
||||
let Itinerary = NullALU;
|
||||
let TSFlags{42-40} = Gen;
|
||||
let TSFlags{63-48} = AMDILOp;
|
||||
|
||||
let TSFlags{63} = isRegisterLoad;
|
||||
let TSFlags{62} = isRegisterStore;
|
||||
}
|
||||
|
||||
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
||||
|
@ -101,7 +102,9 @@ def FP_ONE : PatLeaf <
|
|||
[{return N->isExactlyValue(1.0);}]
|
||||
>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
|
||||
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
|
@ -131,7 +134,31 @@ def SHADER_TYPE : AMDGPUShaderInst <
|
|||
[(int_AMDGPU_shader_type imm:$type)]
|
||||
>;
|
||||
|
||||
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
|
||||
} // usesCustomInserter = 1
|
||||
|
||||
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
|
||||
ComplexPattern addrPat> {
|
||||
def RegisterLoad : AMDGPUShaderInst <
|
||||
(outs dstClass:$dst),
|
||||
(ins addrClass:$addr, i32imm:$chan),
|
||||
"RegisterLoad $dst, $addr",
|
||||
[(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
|
||||
(i32 timm:$chan)))]
|
||||
> {
|
||||
let isRegisterLoad = 1;
|
||||
}
|
||||
|
||||
def RegisterStore : AMDGPUShaderInst <
|
||||
(outs),
|
||||
(ins dstClass:$val, addrClass:$addr, i32imm:$chan),
|
||||
"RegisterStore $val, $addr",
|
||||
[(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
|
||||
> {
|
||||
let isRegisterStore = 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // End isCodeGenOnly = 1, isPseudo = 1
|
||||
|
||||
/* Generic helper patterns for intrinsics */
|
||||
/* -------------------------------------- */
|
||||
|
|
|
@ -48,5 +48,28 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
|
||||
|
||||
switch(IndirectIndex) {
|
||||
case 0: return AMDGPU::indirect_0;
|
||||
case 1: return AMDGPU::indirect_1;
|
||||
case 2: return AMDGPU::indirect_2;
|
||||
case 3: return AMDGPU::indirect_3;
|
||||
case 4: return AMDGPU::indirect_4;
|
||||
case 5: return AMDGPU::indirect_5;
|
||||
case 6: return AMDGPU::indirect_6;
|
||||
case 7: return AMDGPU::indirect_7;
|
||||
case 8: return AMDGPU::indirect_8;
|
||||
case 9: return AMDGPU::indirect_9;
|
||||
case 10: return AMDGPU::indirect_10;
|
||||
case 11: return AMDGPU::indirect_11;
|
||||
case 12: return AMDGPU::indirect_12;
|
||||
case 13: return AMDGPU::indirect_13;
|
||||
case 14: return AMDGPU::indirect_14;
|
||||
case 15: return AMDGPU::indirect_15;
|
||||
default: llvm_unreachable("indirect index out of range");
|
||||
}
|
||||
}
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
|
|
@ -57,6 +57,8 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
|
|||
RegScavenger *RS) const;
|
||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
|
||||
unsigned getIndirectSubReg(unsigned IndirectIndex) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
|
|
@ -16,6 +16,14 @@ let Namespace = "AMDGPU" in {
|
|||
def sel_y : SubRegIndex;
|
||||
def sel_z : SubRegIndex;
|
||||
def sel_w : SubRegIndex;
|
||||
|
||||
|
||||
foreach Index = 0-15 in {
|
||||
def indirect_#Index : SubRegIndex;
|
||||
}
|
||||
|
||||
def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
|
||||
|
||||
}
|
||||
|
||||
include "R600RegisterInfo.td"
|
||||
|
|
|
@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() {
|
|||
bool AMDGPUPassConfig::addInstSelector() {
|
||||
addPass(createAMDGPUPeepholeOpt(*TM));
|
||||
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
// This callbacks this pass uses are not implemented yet on SI.
|
||||
addPass(createAMDGPUIndirectAddressingPass(*TM));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,9 +15,9 @@
|
|||
#ifndef AMDGPU_TARGET_MACHINE_H
|
||||
#define AMDGPU_TARGET_MACHINE_H
|
||||
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "R600ISelLowering.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
|
||||
/// machine.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
||||
int LAO, unsigned TransAl)
|
||||
: TargetFrameLowering(D, StackAl, LAO, TransAl) {
|
||||
}
|
||||
|
||||
AMDGPUFrameLowering::~AMDGPUFrameLowering() {
|
||||
}
|
||||
|
||||
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
return MFI->getObjectOffset(FI);
|
||||
}
|
||||
|
||||
const TargetFrameLowering::SpillSlot *
|
||||
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
|
||||
NumEntries = 0;
|
||||
return 0;
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
|
||||
}
|
||||
bool
|
||||
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return false;
|
||||
}
|
|
@ -75,6 +75,7 @@ private:
|
|||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "AMDGPUGenDAGISel.inc"
|
||||
|
@ -161,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
}
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ISD::FrameIndex: {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
|
||||
unsigned int FI = FIN->getIndex();
|
||||
EVT OpVT = N->getValueType(0);
|
||||
unsigned int NewOpc = AMDGPU::COPY;
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
|
||||
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::ConstantFP:
|
||||
case ISD::Constant: {
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
|
@ -613,3 +604,22 @@ bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
ConstantSDNode *C;
|
||||
|
||||
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
|
||||
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
|
||||
} else {
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
|
|||
AMDILDevice.cpp
|
||||
AMDILDeviceInfo.cpp
|
||||
AMDILEvergreenDevice.cpp
|
||||
AMDILFrameLowering.cpp
|
||||
AMDILIntrinsicInfo.cpp
|
||||
AMDILISelDAGToDAG.cpp
|
||||
AMDILISelLowering.cpp
|
||||
|
@ -25,6 +24,8 @@ add_llvm_target(R600CodeGen
|
|||
AMDILPeepholeOptimizer.cpp
|
||||
AMDILSIDevice.cpp
|
||||
AMDGPUAsmPrinter.cpp
|
||||
AMDGPUFrameLowering.cpp
|
||||
AMDGPUIndirectAddressing.cpp
|
||||
AMDGPUMCInstLower.cpp
|
||||
AMDGPUSubtarget.cpp
|
||||
AMDGPUStructurizeCFG.cpp
|
||||
|
|
|
@ -105,10 +105,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
|
|||
|
||||
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
if (Op.getImm() != 0) {
|
||||
O << " + " << Op.getImm();
|
||||
}
|
||||
printIfSet(MI, OpNo, O, "+");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
|
||||
|
|
|
@ -49,6 +49,9 @@ namespace R600_InstFlag {
|
|||
#define HW_REG_MASK 0x1ff
|
||||
#define HW_CHAN_SHIFT 9
|
||||
|
||||
#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
|
||||
#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
|
||||
|
||||
namespace R600Operands {
|
||||
enum Ops {
|
||||
DST,
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
|
@ -71,11 +72,23 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
|||
setOperationAction(ISD::SELECT, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::f32, Custom);
|
||||
|
||||
// Legalize loads and stores to the private address space.
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i8, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::FP_ROUND);
|
||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
||||
|
||||
|
@ -350,6 +363,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||
case ISD::STORE: return LowerSTORE(Op, DAG);
|
||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
||||
case ISD::FPOW: return LowerFPOW(Op, DAG);
|
||||
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
|
||||
case ISD::INTRINSIC_VOID: {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
unsigned IntrinsicID =
|
||||
|
@ -485,6 +499,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
|
||||
return;
|
||||
}
|
||||
case ISD::STORE:
|
||||
SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
|
||||
Results.push_back(SDValue(Node, 0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -552,6 +570,20 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
|||
false, false, false, 0);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const AMDGPUFrameLowering *TFL =
|
||||
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
|
||||
|
||||
FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
|
||||
assert(FIN);
|
||||
|
||||
unsigned FrameIndex = FIN->getIndex();
|
||||
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
|
||||
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
@ -766,6 +798,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
return Cond;
|
||||
}
|
||||
|
||||
/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
|
||||
/// convert these pointers to a register index. Each register holds
|
||||
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
|
||||
/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
|
||||
/// for indirect addressing.
|
||||
SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
|
||||
unsigned StackWidth,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned SRLPad;
|
||||
switch(StackWidth) {
|
||||
case 1:
|
||||
SRLPad = 2;
|
||||
break;
|
||||
case 2:
|
||||
SRLPad = 3;
|
||||
break;
|
||||
case 4:
|
||||
SRLPad = 4;
|
||||
break;
|
||||
default: llvm_unreachable("Invalid stack width");
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
|
||||
DAG.getConstant(SRLPad, MVT::i32));
|
||||
}
|
||||
|
||||
void R600TargetLowering::getStackAddress(unsigned StackWidth,
|
||||
unsigned ElemIdx,
|
||||
unsigned &Channel,
|
||||
unsigned &PtrIncr) const {
|
||||
switch (StackWidth) {
|
||||
default:
|
||||
case 1:
|
||||
Channel = 0;
|
||||
if (ElemIdx > 0) {
|
||||
PtrIncr = 1;
|
||||
} else {
|
||||
PtrIncr = 0;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
Channel = ElemIdx % 2;
|
||||
if (ElemIdx == 2) {
|
||||
PtrIncr = 1;
|
||||
} else {
|
||||
PtrIncr = 0;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
Channel = ElemIdx;
|
||||
PtrIncr = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
||||
|
@ -787,7 +874,52 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
return Chain;
|
||||
}
|
||||
return SDValue();
|
||||
|
||||
EVT ValueVT = Value.getValueType();
|
||||
|
||||
if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Lowering for indirect addressing
|
||||
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
|
||||
getTargetMachine().getFrameLowering());
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
|
||||
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
|
||||
|
||||
if (ValueVT.isVector()) {
|
||||
unsigned NumElemVT = ValueVT.getVectorNumElements();
|
||||
EVT ElemVT = ValueVT.getVectorElementType();
|
||||
SDValue Stores[4];
|
||||
|
||||
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
|
||||
"vector width in load");
|
||||
|
||||
for (unsigned i = 0; i < NumElemVT; ++i) {
|
||||
unsigned Channel, PtrIncr;
|
||||
getStackAddress(StackWidth, i, Channel, PtrIncr);
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
|
||||
DAG.getConstant(PtrIncr, MVT::i32));
|
||||
SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
|
||||
Value, DAG.getConstant(i, MVT::i32));
|
||||
|
||||
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Elem, Ptr,
|
||||
DAG.getTargetConstant(Channel, MVT::i32));
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
|
||||
} else {
|
||||
if (ValueVT == MVT::i8) {
|
||||
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
|
||||
}
|
||||
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
|
||||
DAG.getTargetConstant(0, MVT::i32)); // Channel
|
||||
}
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
||||
// return (512 + (kc_bank << 12)
|
||||
|
@ -876,7 +1008,53 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
|||
return DAG.getMergeValues(MergedValues, 2, DL);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Lowering for indirect addressing
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
|
||||
getTargetMachine().getFrameLowering());
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
|
||||
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
|
||||
|
||||
if (VT.isVector()) {
|
||||
unsigned NumElemVT = VT.getVectorNumElements();
|
||||
EVT ElemVT = VT.getVectorElementType();
|
||||
SDValue Loads[4];
|
||||
|
||||
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
|
||||
"vector width in load");
|
||||
|
||||
for (unsigned i = 0; i < NumElemVT; ++i) {
|
||||
unsigned Channel, PtrIncr;
|
||||
getStackAddress(StackWidth, i, Channel, PtrIncr);
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
|
||||
DAG.getConstant(PtrIncr, MVT::i32));
|
||||
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(Channel, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
}
|
||||
for (unsigned i = NumElemVT; i < 4; ++i) {
|
||||
Loads[i] = DAG.getUNDEF(ElemVT);
|
||||
}
|
||||
EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
|
||||
LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
|
||||
} else {
|
||||
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(0, MVT::i32), // Channel
|
||||
Op.getOperand(2));
|
||||
}
|
||||
|
||||
SDValue Ops[2];
|
||||
Ops[0] = LoweredLoad;
|
||||
Ops[1] = Chain;
|
||||
|
||||
return DAG.getMergeValues(Ops, 2, DL);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
|
||||
|
|
|
@ -64,7 +64,12 @@ private:
|
|||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
|
||||
SelectionDAG &DAG) const;
|
||||
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
|
||||
unsigned &Channel, unsigned &PtrIncr) const;
|
||||
bool isZero(SDValue Op) const;
|
||||
};
|
||||
|
||||
|
|
|
@ -16,8 +16,11 @@
|
|||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "AMDGPUGenDFAPacketizer.inc"
|
||||
|
@ -465,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
|||
return 2;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
int Offset = 0;
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (MRI.livein_empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
|
||||
LE = MRI.livein_end();
|
||||
LI != LE; ++LI) {
|
||||
Offset = std::max(Offset,
|
||||
GET_REG_INDEX(RI.getEncodingValue(LI->first)));
|
||||
}
|
||||
|
||||
return Offset + 1;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
|
||||
int Offset = 0;
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Variable sized objects are not supported
|
||||
assert(!MFI->hasVarSizedObjects());
|
||||
|
||||
if (MFI->getNumObjects() == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
|
||||
|
||||
return getIndirectIndexBegin(MF) + Offset;
|
||||
}
|
||||
|
||||
std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
|
||||
const MachineFunction &MF) const {
|
||||
const AMDGPUFrameLowering *TFL =
|
||||
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
|
||||
std::vector<unsigned> Regs;
|
||||
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
int End = getIndirectIndexEnd(MF);
|
||||
|
||||
if (End == -1) {
|
||||
return Regs;
|
||||
}
|
||||
|
||||
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
|
||||
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
|
||||
Regs.push_back(SuperReg);
|
||||
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
|
||||
Regs.push_back(Reg);
|
||||
}
|
||||
}
|
||||
return Regs;
|
||||
}
|
||||
|
||||
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const {
|
||||
// XXX: Remove when we support a stack width > 2
|
||||
assert(Channel == 0);
|
||||
return RegIndex;
|
||||
}
|
||||
|
||||
const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
|
||||
unsigned SourceReg) const {
|
||||
return &AMDGPU::R600_TReg32RegClass;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
|
||||
return &AMDGPU::TRegMemRegClass;
|
||||
}
|
||||
|
||||
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const {
|
||||
unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
|
||||
AMDGPU::AR_X, OffsetReg);
|
||||
setImmOperand(MOVA, R600Operands::WRITE, 0);
|
||||
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
|
||||
AddrReg, ValueReg)
|
||||
.addReg(AMDGPU::AR_X, RegState::Implicit);
|
||||
setImmOperand(Mov, R600Operands::DST_REL, 1);
|
||||
return Mov;
|
||||
}
|
||||
|
||||
MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const {
|
||||
unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
|
||||
AMDGPU::AR_X,
|
||||
OffsetReg);
|
||||
setImmOperand(MOVA, R600Operands::WRITE, 0);
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
|
||||
ValueReg,
|
||||
AddrReg)
|
||||
.addReg(AMDGPU::AR_X, RegState::Implicit);
|
||||
setImmOperand(Mov, R600Operands::SRC0_REL, 1);
|
||||
|
||||
return Mov;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
|
||||
return &AMDGPU::IndirectRegRegClass;
|
||||
}
|
||||
|
||||
|
||||
MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned Opcode,
|
||||
|
|
|
@ -113,6 +113,38 @@ namespace llvm {
|
|||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||
SDNode *Node) const { return 1;}
|
||||
|
||||
/// \returns a list of all the registers that may be accesed using indirect
|
||||
/// addressing.
|
||||
std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||
|
||||
|
||||
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const;
|
||||
|
||||
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
|
||||
unsigned SourceReg) const;
|
||||
|
||||
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
|
||||
|
||||
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const;
|
||||
|
||||
virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg, unsigned Address,
|
||||
unsigned OffsetReg) const;
|
||||
|
||||
virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
|
||||
|
||||
|
||||
///buildDefaultInstruction - This function returns a MachineInstr with
|
||||
/// all the instruction modifiers initialized to their default values.
|
||||
/// You can use this function to avoid manually specifying each instruction
|
||||
/// modifier operand when building a new instruction.
|
||||
///
|
||||
|
|
|
@ -91,11 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
|
|||
// default to 0.
|
||||
def LAST : InstFlag<"printLast", 1>;
|
||||
|
||||
def FRAMEri : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
|
||||
}
|
||||
|
||||
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
|
||||
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
|
||||
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
|
||||
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
|
||||
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
|
||||
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
||||
|
||||
class R600ALU_Word0 {
|
||||
field bits<32> Word0;
|
||||
|
@ -1220,6 +1225,10 @@ let Predicates = [isEGorCayman] in {
|
|||
defm DOT4_eg : DOT4_Common<0xBE>;
|
||||
defm CUBE_eg : CUBE_Common<0xC0>;
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
|
||||
}
|
||||
|
||||
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
|
||||
|
||||
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
|
||||
|
@ -1470,6 +1479,12 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
|
|||
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Regist loads and stores - for indirect addressing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
|
||||
|
||||
let Predicates = [isCayman] in {
|
||||
|
||||
let isVector = 1 in {
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#ifndef R600MACHINEFUNCTIONINFO_H
|
||||
#define R600MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include <vector>
|
||||
|
@ -24,6 +25,7 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
|
|||
public:
|
||||
R600MachineFunctionInfo(const MachineFunction &MF);
|
||||
SmallVector<unsigned, 4> LiveOuts;
|
||||
std::vector<unsigned> IndirectRegs;
|
||||
SDNode *Outputs[16];
|
||||
};
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "R600RegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -43,6 +44,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
Reserved.set(AMDGPU::PRED_SEL_ZERO);
|
||||
Reserved.set(AMDGPU::PRED_SEL_ONE);
|
||||
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
|
||||
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
|
||||
std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
|
||||
for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
|
||||
E = IndirectRegs.end();
|
||||
I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
|
@ -77,3 +90,4 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
|
|||
case 3: return AMDGPU::sel_w;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,12 @@ foreach Index = 0-127 in {
|
|||
foreach Chan = [ "X", "Y", "Z", "W" ] in {
|
||||
// 32-bit Temporary Registers
|
||||
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
|
||||
|
||||
// Indirect addressing offset registers
|
||||
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
|
||||
Index, Chan>;
|
||||
def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
|
||||
Chan>;
|
||||
}
|
||||
// 128-bit Temporary Registers
|
||||
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
|
||||
|
@ -57,6 +63,7 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
|
|||
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
|
||||
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
|
||||
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
|
||||
def AR_X : R600Reg<"AR.x", 0>;
|
||||
|
||||
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "ArrayBase%u", 448, 464))>;
|
||||
|
@ -66,6 +73,13 @@ def ALU_CONST : R600Reg<"CBuf", 0>;
|
|||
// interpolation param reference, SRCx_SEL contains index
|
||||
def ALU_PARAM : R600Reg<"Param", 0>;
|
||||
|
||||
let isAllocatable = 0 in {
|
||||
|
||||
// XXX: Only use the X channel, until we support wider stack widths
|
||||
def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
|
||||
|
||||
} // End isAllocatable = 0
|
||||
|
||||
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "T%u_X", 0, 127))>;
|
||||
|
||||
|
@ -85,6 +99,7 @@ def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
|
|||
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
R600_TReg32,
|
||||
R600_ArrayBase,
|
||||
R600_Addr,
|
||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
|
||||
ALU_CONST, ALU_PARAM
|
||||
)>;
|
||||
|
@ -99,3 +114,34 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
|
|||
(add (sequence "T%u_XYZW", 0, 127))> {
|
||||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes for indirect addressing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Super register for all the Indirect Registers. This register class is used
|
||||
// by the REG_SEQUENCE instruction to specify the registers to use for direct
|
||||
// reads / writes which may be written / read by an indirect address.
|
||||
class IndirectSuper<string n, list<Register> subregs> :
|
||||
RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices =
|
||||
[indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
|
||||
indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
|
||||
indirect_13,indirect_14,indirect_15];
|
||||
}
|
||||
|
||||
def IndirectSuperReg : IndirectSuper<"Indirect",
|
||||
[TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
|
||||
TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
|
||||
TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
|
||||
>;
|
||||
|
||||
def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
|
||||
|
||||
// This register class defines the registers that are the storage units for
|
||||
// the "Indirect Addressing" pseudo memory space.
|
||||
// XXX: Only use the X channel, until we support wider stack widths
|
||||
def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "TRegMem%u_X", 0, 16))
|
||||
>;
|
||||
|
|
|
@ -87,3 +87,51 @@ bool
|
|||
SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
||||
return RC != &AMDGPU::EXECRegRegClass;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Indirect addressing callbacks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const {
|
||||
assert(Channel == 0);
|
||||
return RegIndex;
|
||||
}
|
||||
|
||||
|
||||
int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
|
||||
unsigned SourceReg) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address, unsigned OffsetReg) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address, unsigned OffsetReg) const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
|
||||
llvm_unreachable("Unimplemented");
|
||||
}
|
||||
|
|
|
@ -48,6 +48,32 @@ public:
|
|||
virtual bool isMov(unsigned Opcode) const;
|
||||
|
||||
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
|
||||
|
||||
virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
|
||||
|
||||
virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
|
||||
|
||||
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
|
||||
unsigned Channel) const;
|
||||
|
||||
virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
|
||||
unsigned SourceReg) const;
|
||||
|
||||
virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
|
||||
|
||||
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address,
|
||||
unsigned OffsetReg) const;
|
||||
|
||||
virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned ValueReg,
|
||||
unsigned Address,
|
||||
unsigned OffsetReg) const;
|
||||
|
||||
virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
|
Loading…
Reference in New Issue