forked from OSchip/llvm-project
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning. Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP. Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris Differential Revision: https://reviews.llvm.org/D26730 llvm-svn: 293551
This commit is contained in:
parent
2bf8c9d381
commit
ca16621b2a
|
@ -562,5 +562,6 @@ include "Processors.td"
|
||||||
include "AMDGPUInstrInfo.td"
|
include "AMDGPUInstrInfo.td"
|
||||||
include "AMDGPUIntrinsics.td"
|
include "AMDGPUIntrinsics.td"
|
||||||
include "AMDGPURegisterInfo.td"
|
include "AMDGPURegisterInfo.td"
|
||||||
|
include "AMDGPURegisterBanks.td"
|
||||||
include "AMDGPUInstructions.td"
|
include "AMDGPUInstructions.td"
|
||||||
include "AMDGPUCallingConv.td"
|
include "AMDGPUCallingConv.td"
|
||||||
|
|
|
@ -14,8 +14,13 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "AMDGPUCallLowering.h"
|
#include "AMDGPUCallLowering.h"
|
||||||
|
#include "AMDGPU.h"
|
||||||
#include "AMDGPUISelLowering.h"
|
#include "AMDGPUISelLowering.h"
|
||||||
|
#include "AMDGPUSubtarget.h"
|
||||||
|
#include "SIISelLowering.h"
|
||||||
|
#include "SIRegisterInfo.h"
|
||||||
|
#include "SIMachineFunctionInfo.h"
|
||||||
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
|
||||||
|
@ -30,13 +35,135 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
|
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
|
||||||
const Value *Val, unsigned VReg) const {
|
const Value *Val, unsigned VReg) const {
|
||||||
|
MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
|
||||||
|
Type *ParamTy,
|
||||||
|
unsigned Offset) const {
|
||||||
|
|
||||||
|
MachineFunction &MF = MIRBuilder.getMF();
|
||||||
|
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
const Function &F = *MF.getFunction();
|
||||||
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||||
|
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
|
LLT PtrType(*PtrTy, DL);
|
||||||
|
unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
|
||||||
|
unsigned KernArgSegmentPtr =
|
||||||
|
TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
|
||||||
|
unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
|
||||||
|
|
||||||
|
unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
|
||||||
|
MIRBuilder.buildConstant(OffsetReg, Offset);
|
||||||
|
|
||||||
|
MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
|
||||||
|
|
||||||
|
return DstReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
|
||||||
|
Type *ParamTy, unsigned Offset,
|
||||||
|
unsigned DstReg) const {
|
||||||
|
MachineFunction &MF = MIRBuilder.getMF();
|
||||||
|
const Function &F = *MF.getFunction();
|
||||||
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||||
|
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
|
||||||
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||||
|
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
|
||||||
|
unsigned Align = DL.getABITypeAlignment(ParamTy);
|
||||||
|
unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
|
||||||
|
|
||||||
|
MachineMemOperand *MMO =
|
||||||
|
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
|
||||||
|
MachineMemOperand::MONonTemporal |
|
||||||
|
MachineMemOperand::MOInvariant,
|
||||||
|
TypeSize, Align);
|
||||||
|
|
||||||
|
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
|
||||||
|
}
|
||||||
|
|
||||||
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
|
||||||
const Function &F,
|
const Function &F,
|
||||||
ArrayRef<unsigned> VRegs) const {
|
ArrayRef<unsigned> VRegs) const {
|
||||||
// TODO: Implement once there are generic loads/stores.
|
|
||||||
|
MachineFunction &MF = MIRBuilder.getMF();
|
||||||
|
const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||||
|
const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
|
||||||
|
const DataLayout &DL = F.getParent()->getDataLayout();
|
||||||
|
|
||||||
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
|
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
|
||||||
|
|
||||||
|
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
|
||||||
|
if (Info->hasPrivateSegmentBuffer()) {
|
||||||
|
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
|
||||||
|
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
|
||||||
|
CCInfo.AllocateReg(PrivateSegmentBufferReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Info->hasDispatchPtr()) {
|
||||||
|
unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
|
||||||
|
// FIXME: Need to add reg as live-in
|
||||||
|
CCInfo.AllocateReg(DispatchPtrReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Info->hasQueuePtr()) {
|
||||||
|
unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
|
||||||
|
// FIXME: Need to add reg as live-in
|
||||||
|
CCInfo.AllocateReg(QueuePtrReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Info->hasKernargSegmentPtr()) {
|
||||||
|
unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
|
||||||
|
const LLT P2 = LLT::pointer(2, 64);
|
||||||
|
unsigned VReg = MRI.createGenericVirtualRegister(P2);
|
||||||
|
MRI.addLiveIn(InputPtrReg, VReg);
|
||||||
|
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
|
||||||
|
MIRBuilder.buildCopy(VReg, InputPtrReg);
|
||||||
|
CCInfo.AllocateReg(InputPtrReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Info->hasDispatchID()) {
|
||||||
|
unsigned DispatchIDReg = Info->addDispatchID(*TRI);
|
||||||
|
// FIXME: Need to add reg as live-in
|
||||||
|
CCInfo.AllocateReg(DispatchIDReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Info->hasFlatScratchInit()) {
|
||||||
|
unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
|
||||||
|
// FIXME: Need to add reg as live-in
|
||||||
|
CCInfo.AllocateReg(FlatScratchInitReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned NumArgs = F.arg_size();
|
||||||
|
Function::const_arg_iterator CurOrigArg = F.arg_begin();
|
||||||
|
const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
|
||||||
|
for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
|
||||||
|
CurOrigArg->getType()->dump();
|
||||||
|
MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
|
||||||
|
ISD::ArgFlagsTy Flags;
|
||||||
|
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
|
||||||
|
CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
|
||||||
|
/*IsVarArg=*/false);
|
||||||
|
bool Res =
|
||||||
|
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
|
||||||
|
assert(!Res && "Call operand has unhandled type");
|
||||||
|
(void)Res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Function::const_arg_iterator Arg = F.arg_begin();
|
||||||
|
for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
|
||||||
|
// FIXME: We should be getting DebugInfo from the arguments some how.
|
||||||
|
CCValAssign &VA = ArgLocs[i];
|
||||||
|
lowerParameter(MIRBuilder, Arg->getType(),
|
||||||
|
VA.getLocMemOffset() +
|
||||||
|
Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,13 @@ namespace llvm {
|
||||||
class AMDGPUTargetLowering;
|
class AMDGPUTargetLowering;
|
||||||
|
|
||||||
class AMDGPUCallLowering: public CallLowering {
|
class AMDGPUCallLowering: public CallLowering {
|
||||||
|
|
||||||
|
unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
|
||||||
|
unsigned Offset) const;
|
||||||
|
|
||||||
|
void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
|
||||||
|
unsigned Offset, unsigned DstReg) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
|
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
|
||||||
|
|
||||||
|
@ -29,6 +36,7 @@ class AMDGPUCallLowering: public CallLowering {
|
||||||
unsigned VReg) const override;
|
unsigned VReg) const override;
|
||||||
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
|
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
|
||||||
ArrayRef<unsigned> VRegs) const override;
|
ArrayRef<unsigned> VRegs) const override;
|
||||||
|
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
|
||||||
};
|
};
|
||||||
} // End of namespace llvm;
|
} // End of namespace llvm;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file defines all the static objects used by AMDGPURegisterBankInfo.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_BUILD_GLOBAL_ISEL
|
||||||
|
#error "You shouldn't build this"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
namespace AMDGPU {
|
||||||
|
|
||||||
|
enum PartialMappingIdx {
|
||||||
|
None = - 1,
|
||||||
|
PM_SGPR32 = 0,
|
||||||
|
PM_SGPR64 = 1,
|
||||||
|
PM_VGPR32 = 2,
|
||||||
|
PM_VGPR64 = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
const RegisterBankInfo::PartialMapping PartMappings[] {
|
||||||
|
// StartIdx, Length, RegBank
|
||||||
|
{0, 32, SGPRRegBank},
|
||||||
|
{0, 64, SGPRRegBank},
|
||||||
|
{0, 32, VGPRRegBank},
|
||||||
|
{0, 64, VGPRRegBank}
|
||||||
|
};
|
||||||
|
|
||||||
|
const RegisterBankInfo::ValueMapping ValMappings[] {
|
||||||
|
// SGPR 32-bit
|
||||||
|
{&PartMappings[0], 1},
|
||||||
|
// SGPR 64-bit
|
||||||
|
{&PartMappings[1], 1},
|
||||||
|
// VGPR 32-bit
|
||||||
|
{&PartMappings[2], 1},
|
||||||
|
// VGPR 64-bit
|
||||||
|
{&PartMappings[3], 1}
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ValueMappingIdx {
|
||||||
|
SGPRStartIdx = 0,
|
||||||
|
VGPRStartIdx = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
|
||||||
|
unsigned Size) {
|
||||||
|
assert(Size % 32 == 0);
|
||||||
|
unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
|
||||||
|
Idx += (Size / 32) - 1;
|
||||||
|
return &ValMappings[Idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End AMDGPU namespace.
|
||||||
|
} // End llvm namespace.
|
|
@ -15,6 +15,7 @@
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
#include "AMDGPUISelLowering.h"
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
|
#include "AMDGPUCallLowering.h"
|
||||||
#include "AMDGPUFrameLowering.h"
|
#include "AMDGPUFrameLowering.h"
|
||||||
#include "AMDGPUIntrinsicInfo.h"
|
#include "AMDGPUIntrinsicInfo.h"
|
||||||
#include "AMDGPURegisterInfo.h"
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
@ -670,6 +671,11 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
|
||||||
// TargetLowering Callbacks
|
// TargetLowering Callbacks
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
|
||||||
|
bool IsVarArg) const {
|
||||||
|
return CC_AMDGPU;
|
||||||
|
}
|
||||||
|
|
||||||
/// The SelectionDAGBuilder will automatically promote function arguments
|
/// The SelectionDAGBuilder will automatically promote function arguments
|
||||||
/// with illegal types. However, this does not work for the AMDGPU targets
|
/// with illegal types. However, this does not work for the AMDGPU targets
|
||||||
/// since the function arguments are stored in memory as these illegal types.
|
/// since the function arguments are stored in memory as these illegal types.
|
||||||
|
|
|
@ -0,0 +1,418 @@
|
||||||
|
//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file implements the targeting of the InstructionSelector class for
|
||||||
|
/// AMDGPU.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPUInstructionSelector.h"
|
||||||
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "AMDGPURegisterBankInfo.h"
|
||||||
|
#include "AMDGPURegisterInfo.h"
|
||||||
|
#include "AMDGPUSubtarget.h"
|
||||||
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/IR/Type.h"
|
||||||
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "amdgpu-isel"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
AMDGPUInstructionSelector::AMDGPUInstructionSelector(
|
||||||
|
const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
|
||||||
|
: InstructionSelector(), TII(*STI.getInstrInfo()),
|
||||||
|
TRI(*STI.getRegisterInfo()), RBI(RBI) {}
|
||||||
|
|
||||||
|
MachineOperand
|
||||||
|
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
|
||||||
|
unsigned SubIdx) const {
|
||||||
|
|
||||||
|
MachineInstr *MI = MO.getParent();
|
||||||
|
MachineBasicBlock *BB = MO.getParent()->getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
|
||||||
|
|
||||||
|
if (MO.isReg()) {
|
||||||
|
unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
|
||||||
|
unsigned Reg = MO.getReg();
|
||||||
|
BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
|
||||||
|
.addReg(Reg, 0, ComposedSubIdx);
|
||||||
|
|
||||||
|
return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
|
||||||
|
MO.isKill(), MO.isDead(), MO.isUndef(),
|
||||||
|
MO.isEarlyClobber(), 0, MO.isDebug(),
|
||||||
|
MO.isInternalRead());
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(MO.isImm());
|
||||||
|
|
||||||
|
APInt Imm(64, MO.getImm());
|
||||||
|
|
||||||
|
switch (SubIdx) {
|
||||||
|
default:
|
||||||
|
llvm_unreachable("do not know to split immediate with this sub index.");
|
||||||
|
case AMDGPU::sub0:
|
||||||
|
return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
|
||||||
|
case AMDGPU::sub1:
|
||||||
|
return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
|
||||||
|
unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
|
||||||
|
if (Size != 64)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
DebugLoc DL = I.getDebugLoc();
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
|
||||||
|
.add(getSubOperand64(I.getOperand(1), AMDGPU::sub0))
|
||||||
|
.add(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
|
||||||
|
.add(getSubOperand64(I.getOperand(1), AMDGPU::sub1))
|
||||||
|
.add(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
|
||||||
|
.addReg(DstLo)
|
||||||
|
.addImm(AMDGPU::sub0)
|
||||||
|
.addReg(DstHi)
|
||||||
|
.addImm(AMDGPU::sub1);
|
||||||
|
|
||||||
|
for (MachineOperand &MO : I.explicit_operands()) {
|
||||||
|
if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
|
||||||
|
continue;
|
||||||
|
RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
|
||||||
|
}
|
||||||
|
|
||||||
|
I.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
|
||||||
|
return selectG_ADD(I);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
DebugLoc DL = I.getDebugLoc();
|
||||||
|
|
||||||
|
// FIXME: Select store instruction based on address space
|
||||||
|
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
|
||||||
|
.add(I.getOperand(1))
|
||||||
|
.add(I.getOperand(0))
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0);
|
||||||
|
|
||||||
|
// Now that we selected an opcode, we need to constrain the register
|
||||||
|
// operands to use appropriate classes.
|
||||||
|
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
|
||||||
|
|
||||||
|
I.eraseFromParent();
|
||||||
|
return Ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
unsigned DstReg = I.getOperand(0).getReg();
|
||||||
|
unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
|
||||||
|
|
||||||
|
if (Size == 32) {
|
||||||
|
I.setDesc(TII.get(AMDGPU::S_MOV_B32));
|
||||||
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(Size == 64);
|
||||||
|
|
||||||
|
DebugLoc DL = I.getDebugLoc();
|
||||||
|
unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
const APInt &Imm = I.getOperand(1).getCImm()->getValue();
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
|
||||||
|
.addImm(Imm.trunc(32).getZExtValue());
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
|
||||||
|
.addImm(Imm.ashr(32).getZExtValue());
|
||||||
|
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
|
||||||
|
.addReg(LoReg)
|
||||||
|
.addImm(AMDGPU::sub0)
|
||||||
|
.addReg(HiReg)
|
||||||
|
.addImm(AMDGPU::sub1);
|
||||||
|
// We can't call constrainSelectedInstRegOperands here, because it doesn't
|
||||||
|
// work for target independent opcodes
|
||||||
|
I.eraseFromParent();
|
||||||
|
return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isConstant(const MachineInstr &MI) {
|
||||||
|
return MI.getOpcode() == TargetOpcode::G_CONSTANT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
|
||||||
|
const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
|
||||||
|
|
||||||
|
const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
|
||||||
|
|
||||||
|
assert(PtrMI);
|
||||||
|
|
||||||
|
if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
|
||||||
|
return;
|
||||||
|
|
||||||
|
GEPInfo GEPInfo(*PtrMI);
|
||||||
|
|
||||||
|
for (unsigned i = 1, e = 3; i < e; ++i) {
|
||||||
|
const MachineOperand &GEPOp = PtrMI->getOperand(i);
|
||||||
|
const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
|
||||||
|
assert(OpDef);
|
||||||
|
if (isConstant(*OpDef)) {
|
||||||
|
// FIXME: Is it possible to have multiple Imm parts? Maybe if we
|
||||||
|
// are lacking other optimizations.
|
||||||
|
assert(GEPInfo.Imm == 0);
|
||||||
|
GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
|
||||||
|
if (OpBank->getID() == AMDGPU::SGPRRegBankID)
|
||||||
|
GEPInfo.SgprParts.push_back(GEPOp.getReg());
|
||||||
|
else
|
||||||
|
GEPInfo.VgprParts.push_back(GEPOp.getReg());
|
||||||
|
}
|
||||||
|
|
||||||
|
AddrInfo.push_back(GEPInfo);
|
||||||
|
getAddrModeInfo(*PtrMI, MRI, AddrInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isInstrUniform(const MachineInstr &MI) {
|
||||||
|
if (!MI.hasOneMemOperand())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const MachineMemOperand *MMO = *MI.memoperands_begin();
|
||||||
|
const Value *Ptr = MMO->getValue();
|
||||||
|
|
||||||
|
// UndefValue means this is a load of a kernel input. These are uniform.
|
||||||
|
// Sometimes LDS instructions have constant pointers.
|
||||||
|
// If Ptr is null, then that means this mem operand contains a
|
||||||
|
// PseudoSourceValue like GOT.
|
||||||
|
if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
|
||||||
|
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const Instruction *I = dyn_cast<Instruction>(Ptr);
|
||||||
|
return I && I->getMetadata("amdgpu.uniform");
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
|
||||||
|
|
||||||
|
if (LoadSize == 32)
|
||||||
|
return BaseOpcode;
|
||||||
|
|
||||||
|
switch (BaseOpcode) {
|
||||||
|
case AMDGPU::S_LOAD_DWORD_IMM:
|
||||||
|
switch (LoadSize) {
|
||||||
|
case 64:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX2_IMM;
|
||||||
|
case 128:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX4_IMM;
|
||||||
|
case 256:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX8_IMM;
|
||||||
|
case 512:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX16_IMM;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case AMDGPU::S_LOAD_DWORD_IMM_ci:
|
||||||
|
switch (LoadSize) {
|
||||||
|
case 64:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
|
||||||
|
case 128:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
|
||||||
|
case 256:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
|
||||||
|
case 512:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case AMDGPU::S_LOAD_DWORD_SGPR:
|
||||||
|
switch (LoadSize) {
|
||||||
|
case 64:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX2_SGPR;
|
||||||
|
case 128:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX4_SGPR;
|
||||||
|
case 256:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX8_SGPR;
|
||||||
|
case 512:
|
||||||
|
return AMDGPU::S_LOAD_DWORDX16_SGPR;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
llvm_unreachable("Invalid base smrd opcode or size");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
|
||||||
|
for (const GEPInfo &GEPInfo : AddrInfo) {
|
||||||
|
if (!GEPInfo.VgprParts.empty())
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
|
||||||
|
ArrayRef<GEPInfo> AddrInfo) const {
|
||||||
|
|
||||||
|
if (!I.hasOneMemOperand())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!isInstrUniform(I))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (hasVgprParts(AddrInfo))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
unsigned DstReg = I.getOperand(0).getReg();
|
||||||
|
const DebugLoc &DL = I.getDebugLoc();
|
||||||
|
unsigned Opcode;
|
||||||
|
unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
|
||||||
|
|
||||||
|
if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
|
||||||
|
|
||||||
|
const GEPInfo &GEPInfo = AddrInfo[0];
|
||||||
|
|
||||||
|
unsigned PtrReg = GEPInfo.SgprParts[0];
|
||||||
|
int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
|
||||||
|
if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
|
||||||
|
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
|
||||||
|
|
||||||
|
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addImm(EncodedImm)
|
||||||
|
.addImm(0); // glc
|
||||||
|
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
|
||||||
|
isUInt<32>(EncodedImm)) {
|
||||||
|
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
|
||||||
|
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addImm(EncodedImm)
|
||||||
|
.addImm(0); // glc
|
||||||
|
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isUInt<32>(GEPInfo.Imm)) {
|
||||||
|
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
|
||||||
|
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
||||||
|
BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||||
|
.addImm(GEPInfo.Imm);
|
||||||
|
|
||||||
|
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addReg(OffsetReg)
|
||||||
|
.addImm(0); // glc
|
||||||
|
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned PtrReg = I.getOperand(1).getReg();
|
||||||
|
Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
|
||||||
|
MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0); // glc
|
||||||
|
return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||||
|
DebugLoc DL = I.getDebugLoc();
|
||||||
|
unsigned DstReg = I.getOperand(0).getReg();
|
||||||
|
unsigned PtrReg = I.getOperand(1).getReg();
|
||||||
|
unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
|
||||||
|
unsigned Opcode;
|
||||||
|
|
||||||
|
SmallVector<GEPInfo, 4> AddrInfo;
|
||||||
|
|
||||||
|
getAddrModeInfo(I, MRI, AddrInfo);
|
||||||
|
|
||||||
|
if (selectSMRD(I, AddrInfo)) {
|
||||||
|
I.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (LoadSize) {
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Load size not supported\n");
|
||||||
|
case 32:
|
||||||
|
Opcode = AMDGPU::FLAT_LOAD_DWORD;
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
|
||||||
|
.add(I.getOperand(0))
|
||||||
|
.addReg(PtrReg)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0)
|
||||||
|
.addImm(0);
|
||||||
|
|
||||||
|
bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
|
||||||
|
I.eraseFromParent();
|
||||||
|
return Ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
|
||||||
|
|
||||||
|
if (!isPreISelGenericOpcode(I.getOpcode()))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
switch (I.getOpcode()) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
case TargetOpcode::G_ADD:
|
||||||
|
return selectG_ADD(I);
|
||||||
|
case TargetOpcode::G_CONSTANT:
|
||||||
|
return selectG_CONSTANT(I);
|
||||||
|
case TargetOpcode::G_GEP:
|
||||||
|
return selectG_GEP(I);
|
||||||
|
case TargetOpcode::G_LOAD:
|
||||||
|
return selectG_LOAD(I);
|
||||||
|
case TargetOpcode::G_STORE:
|
||||||
|
return selectG_STORE(I);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file declares the targeting of the InstructionSelector class for
|
||||||
|
/// AMDGPU.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
|
||||||
|
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
||||||
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class AMDGPUInstrInfo;
|
||||||
|
class AMDGPURegisterBankInfo;
|
||||||
|
class MachineInstr;
|
||||||
|
class MachineOperand;
|
||||||
|
class MachineRegisterInfo;
|
||||||
|
class SIInstrInfo;
|
||||||
|
class SIRegisterInfo;
|
||||||
|
class SISubtarget;
|
||||||
|
|
||||||
|
class AMDGPUInstructionSelector : public InstructionSelector {
|
||||||
|
public:
|
||||||
|
AMDGPUInstructionSelector(const SISubtarget &STI,
|
||||||
|
const AMDGPURegisterBankInfo &RBI);
|
||||||
|
|
||||||
|
bool select(MachineInstr &I) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct GEPInfo {
|
||||||
|
const MachineInstr &GEP;
|
||||||
|
SmallVector<unsigned, 2> SgprParts;
|
||||||
|
SmallVector<unsigned, 2> VgprParts;
|
||||||
|
int64_t Imm;
|
||||||
|
GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
|
||||||
|
};
|
||||||
|
|
||||||
|
MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
|
||||||
|
bool selectG_CONSTANT(MachineInstr &I) const;
|
||||||
|
bool selectG_ADD(MachineInstr &I) const;
|
||||||
|
bool selectG_GEP(MachineInstr &I) const;
|
||||||
|
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
|
||||||
|
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
|
||||||
|
SmallVectorImpl<GEPInfo> &AddrInfo) const;
|
||||||
|
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
|
||||||
|
bool selectG_LOAD(MachineInstr &I) const;
|
||||||
|
bool selectG_STORE(MachineInstr &I) const;
|
||||||
|
|
||||||
|
const SIInstrInfo &TII;
|
||||||
|
const SIRegisterInfo &TRI;
|
||||||
|
const AMDGPURegisterBankInfo &RBI;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // End llvm namespace.
|
||||||
|
#endif
|
|
@ -0,0 +1,62 @@
|
||||||
|
//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file implements the targeting of the Machinelegalizer class for
|
||||||
|
/// AMDGPU.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPULegalizerInfo.h"
|
||||||
|
#include "llvm/CodeGen/ValueTypes.h"
|
||||||
|
#include "llvm/IR/Type.h"
|
||||||
|
#include "llvm/IR/DerivedTypes.h"
|
||||||
|
#include "llvm/Target/TargetOpcodes.h"
|
||||||
|
#include "llvm/Support/Debug.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#ifndef LLVM_BUILD_GLOBAL_ISEL
|
||||||
|
#error "You shouldn't build this"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
|
||||||
|
using namespace TargetOpcode;
|
||||||
|
|
||||||
|
const LLT S32 = LLT::scalar(32);
|
||||||
|
const LLT S64 = LLT::scalar(64);
|
||||||
|
const LLT P1 = LLT::pointer(1, 64);
|
||||||
|
const LLT P2 = LLT::pointer(2, 64);
|
||||||
|
|
||||||
|
setAction({G_CONSTANT, S64}, Legal);
|
||||||
|
|
||||||
|
setAction({G_GEP, P1}, Legal);
|
||||||
|
setAction({G_GEP, P2}, Legal);
|
||||||
|
setAction({G_GEP, 1, S64}, Legal);
|
||||||
|
|
||||||
|
setAction({G_LOAD, P1}, Legal);
|
||||||
|
setAction({G_LOAD, P2}, Legal);
|
||||||
|
setAction({G_LOAD, S32}, Legal);
|
||||||
|
setAction({G_LOAD, 1, P1}, Legal);
|
||||||
|
setAction({G_LOAD, 1, P2}, Legal);
|
||||||
|
|
||||||
|
setAction({G_STORE, S32}, Legal);
|
||||||
|
setAction({G_STORE, 1, P1}, Legal);
|
||||||
|
|
||||||
|
// FIXME: When RegBankSelect inserts copies, it will only create new
|
||||||
|
// registers with scalar types. This means we can end up with
|
||||||
|
// G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
|
||||||
|
// operands. In assert builds, the instruction selector will assert
|
||||||
|
// if it sees a generic instruction which isn't legal, so we need to
|
||||||
|
// tell it that scalar types are legal for pointer operands
|
||||||
|
setAction({G_GEP, S64}, Legal);
|
||||||
|
setAction({G_LOAD, 1, S64}, Legal);
|
||||||
|
setAction({G_STORE, 1, S64}, Legal);
|
||||||
|
|
||||||
|
computeTables();
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file declares the targeting of the Machinelegalizer class for
|
||||||
|
/// AMDGPU.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
|
||||||
|
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class LLVMContext;
|
||||||
|
|
||||||
|
/// This class provides the information for the target register banks.
|
||||||
|
class AMDGPULegalizerInfo : public LegalizerInfo {
|
||||||
|
public:
|
||||||
|
AMDGPULegalizerInfo();
|
||||||
|
};
|
||||||
|
} // End llvm namespace.
|
||||||
|
#endif
|
|
@ -0,0 +1,228 @@
|
||||||
|
//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file implements the targeting of the RegisterBankInfo class for
|
||||||
|
/// AMDGPU.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "AMDGPURegisterBankInfo.h"
|
||||||
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "SIRegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||||
|
#include "llvm/IR/Constants.h"
|
||||||
|
#include "llvm/Target/TargetRegisterInfo.h"
|
||||||
|
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||||
|
|
||||||
|
#define GET_TARGET_REGBANK_IMPL
|
||||||
|
#include "AMDGPUGenRegisterBank.inc"
|
||||||
|
|
||||||
|
// This file will be TableGen'ed at some point.
|
||||||
|
#include "AMDGPUGenRegisterBankInfo.def"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
#ifndef LLVM_BUILD_GLOBAL_ISEL
|
||||||
|
#error "You shouldn't build this"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
|
||||||
|
: AMDGPUGenRegisterBankInfo(),
|
||||||
|
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
|
||||||
|
|
||||||
|
// HACK: Until this is fully tablegen'd
|
||||||
|
static bool AlreadyInit = false;
|
||||||
|
if (AlreadyInit)
|
||||||
|
return;
|
||||||
|
|
||||||
|
AlreadyInit = true;
|
||||||
|
|
||||||
|
const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
|
||||||
|
assert(&RBSGPR == &AMDGPU::SGPRRegBank);
|
||||||
|
|
||||||
|
const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
|
||||||
|
assert(&RBVGPR == &AMDGPU::VGPRRegBank);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
|
||||||
|
const RegisterBank &B,
|
||||||
|
unsigned Size) const {
|
||||||
|
return RegisterBankInfo::copyCost(A, B, Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
|
||||||
|
const TargetRegisterClass &RC) const {
|
||||||
|
|
||||||
|
if (TRI->isSGPRClass(&RC))
|
||||||
|
return getRegBank(AMDGPU::SGPRRegBankID);
|
||||||
|
|
||||||
|
return getRegBank(AMDGPU::VGPRRegBankID);
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMappings
|
||||||
|
AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||||
|
const MachineInstr &MI) const {
|
||||||
|
|
||||||
|
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||||
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
|
||||||
|
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||||
|
|
||||||
|
InstructionMappings AltMappings;
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
case TargetOpcode::G_LOAD: {
|
||||||
|
// FIXME: Should we be hard coding the size for these mappings?
|
||||||
|
InstructionMapping SSMapping(1, 1,
|
||||||
|
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
|
||||||
|
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
|
||||||
|
2); // Num Operands
|
||||||
|
AltMappings.emplace_back(std::move(SSMapping));
|
||||||
|
|
||||||
|
InstructionMapping VVMapping(2, 1,
|
||||||
|
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
|
||||||
|
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
|
||||||
|
2); // Num Operands
|
||||||
|
AltMappings.emplace_back(std::move(VVMapping));
|
||||||
|
|
||||||
|
// FIXME: Should this be the pointer-size (64-bits) or the size of the
|
||||||
|
// register that will hold the bufffer resourc (128-bits).
|
||||||
|
InstructionMapping VSMapping(3, 1,
|
||||||
|
getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
|
||||||
|
AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
|
||||||
|
2); // Num Operands
|
||||||
|
AltMappings.emplace_back(std::move(VSMapping));
|
||||||
|
|
||||||
|
return AltMappings;
|
||||||
|
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return RegisterBankInfo::getInstrAlternativeMappings(MI);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
|
const OperandsMapper &OpdMapper) const {
|
||||||
|
return applyDefaultMapping(OpdMapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isInstrUniform(const MachineInstr &MI) {
|
||||||
|
if (!MI.hasOneMemOperand())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const MachineMemOperand *MMO = *MI.memoperands_begin();
|
||||||
|
return AMDGPU::isUniformMMO(MMO);
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMapping
|
||||||
|
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
|
||||||
|
|
||||||
|
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||||
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
RegisterBankInfo::InstructionMapping Mapping =
|
||||||
|
InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
|
||||||
|
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
|
||||||
|
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||||
|
unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
|
||||||
|
|
||||||
|
const ValueMapping *ValMapping;
|
||||||
|
const ValueMapping *PtrMapping;
|
||||||
|
|
||||||
|
if (isInstrUniform(MI)) {
|
||||||
|
// We have a uniform instruction so we want to use an SMRD load
|
||||||
|
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||||
|
PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
|
||||||
|
} else {
|
||||||
|
ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||||
|
// FIXME: What would happen if we used SGPRRegBankID here?
|
||||||
|
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
OpdsMapping[0] = ValMapping;
|
||||||
|
OpdsMapping[1] = PtrMapping;
|
||||||
|
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
|
||||||
|
return Mapping;
|
||||||
|
|
||||||
|
// FIXME: Do we want to add a mapping for FLAT load, or should we just
|
||||||
|
// handle that during instruction selection?
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMapping
|
||||||
|
AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
|
RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
|
||||||
|
|
||||||
|
if (Mapping.isValid())
|
||||||
|
return Mapping;
|
||||||
|
|
||||||
|
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||||
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
|
Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
|
||||||
|
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
|
||||||
|
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
default: break;
|
||||||
|
case AMDGPU::G_CONSTANT: {
|
||||||
|
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||||
|
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||||
|
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
|
||||||
|
return Mapping;
|
||||||
|
}
|
||||||
|
case AMDGPU::G_GEP: {
|
||||||
|
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||||
|
if (!MI.getOperand(i).isReg())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
|
||||||
|
OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||||
|
}
|
||||||
|
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
|
||||||
|
return Mapping;
|
||||||
|
}
|
||||||
|
case AMDGPU::G_STORE: {
|
||||||
|
assert(MI.getOperand(0).isReg());
|
||||||
|
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||||
|
// FIXME: We need to specify a different reg bank once scalar stores
|
||||||
|
// are supported.
|
||||||
|
const ValueMapping *ValMapping =
|
||||||
|
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||||
|
// FIXME: Depending on the type of store, the pointer could be in
|
||||||
|
// the SGPR Reg bank.
|
||||||
|
// FIXME: Pointer size should be based on the address space.
|
||||||
|
const ValueMapping *PtrMapping =
|
||||||
|
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
|
||||||
|
|
||||||
|
OpdsMapping[0] = ValMapping;
|
||||||
|
OpdsMapping[1] = PtrMapping;
|
||||||
|
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
|
||||||
|
return Mapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AMDGPU::G_LOAD:
|
||||||
|
return getInstrMappingForLoad(MI);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned BankID = AMDGPU::SGPRRegBankID;
|
||||||
|
|
||||||
|
Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
|
||||||
|
unsigned Size = 0;
|
||||||
|
for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
|
||||||
|
// If the operand is not a register default to the size of the previous
|
||||||
|
// operand.
|
||||||
|
// FIXME: Can't we pull the types from the MachineInstr rather than the
|
||||||
|
// operands.
|
||||||
|
if (MI.getOperand(Idx).isReg())
|
||||||
|
Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
|
||||||
|
OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
|
||||||
|
}
|
||||||
|
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
|
||||||
|
|
||||||
|
return Mapping;
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
/// \file
|
||||||
|
/// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
|
||||||
|
/// \todo This should be generated by TableGen.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
|
||||||
|
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
|
||||||
|
|
||||||
|
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class SIRegisterInfo;
|
||||||
|
class TargetRegisterInfo;
|
||||||
|
|
||||||
|
namespace AMDGPU {
|
||||||
|
enum {
|
||||||
|
SGPRRegBankID = 0,
|
||||||
|
VGPRRegBankID = 1,
|
||||||
|
NumRegisterBanks
|
||||||
|
};
|
||||||
|
} // End AMDGPU namespace.
|
||||||
|
|
||||||
|
/// This class provides the information for the target register banks.
|
||||||
|
class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
#define GET_TARGET_REGBANK_CLASS
|
||||||
|
#include "AMDGPUGenRegisterBank.inc"
|
||||||
|
|
||||||
|
};
|
||||||
|
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||||
|
const SIRegisterInfo *TRI;
|
||||||
|
|
||||||
|
/// See RegisterBankInfo::applyMapping.
|
||||||
|
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMapping
|
||||||
|
getInstrMappingForLoad(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
|
||||||
|
|
||||||
|
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
|
||||||
|
unsigned Size) const override;
|
||||||
|
|
||||||
|
const RegisterBank &
|
||||||
|
getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
|
||||||
|
|
||||||
|
InstructionMappings
|
||||||
|
getInstrAlternativeMappings(const MachineInstr &MI) const override;
|
||||||
|
|
||||||
|
InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
|
||||||
|
};
|
||||||
|
} // End llvm namespace.
|
||||||
|
#endif
|
|
@ -0,0 +1,16 @@
|
||||||
|
//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def SGPRRegBank : RegisterBank<"SGPR",
|
||||||
|
[SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VGPRRegBank : RegisterBank<"VGPR",
|
||||||
|
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
|
||||||
|
>;
|
|
@ -517,6 +517,21 @@ public:
|
||||||
return GISel->getCallLowering();
|
return GISel->getCallLowering();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const InstructionSelector *getInstructionSelector() const override {
|
||||||
|
assert(GISel && "Access to GlobalISel APIs not set");
|
||||||
|
return GISel->getInstructionSelector();
|
||||||
|
}
|
||||||
|
|
||||||
|
const LegalizerInfo *getLegalizerInfo() const override {
|
||||||
|
assert(GISel && "Access to GlobalISel APIs not set");
|
||||||
|
return GISel->getLegalizerInfo();
|
||||||
|
}
|
||||||
|
|
||||||
|
const RegisterBankInfo *getRegBankInfo() const override {
|
||||||
|
assert(GISel && "Access to GlobalISel APIs not set");
|
||||||
|
return GISel->getRegBankInfo();
|
||||||
|
}
|
||||||
|
|
||||||
const SIRegisterInfo *getRegisterInfo() const override {
|
const SIRegisterInfo *getRegisterInfo() const override {
|
||||||
return &InstrInfo.getRegisterInfo();
|
return &InstrInfo.getRegisterInfo();
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,18 +16,20 @@
|
||||||
#include "AMDGPUTargetMachine.h"
|
#include "AMDGPUTargetMachine.h"
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
#include "AMDGPUCallLowering.h"
|
#include "AMDGPUCallLowering.h"
|
||||||
|
#include "AMDGPUInstructionSelector.h"
|
||||||
|
#include "AMDGPULegalizerInfo.h"
|
||||||
|
#ifdef LLVM_BUILD_GLOBAL_ISEL
|
||||||
|
#include "AMDGPURegisterBankInfo.h"
|
||||||
|
#endif
|
||||||
#include "AMDGPUTargetObjectFile.h"
|
#include "AMDGPUTargetObjectFile.h"
|
||||||
#include "AMDGPUTargetTransformInfo.h"
|
#include "AMDGPUTargetTransformInfo.h"
|
||||||
#include "GCNSchedStrategy.h"
|
#include "GCNSchedStrategy.h"
|
||||||
#include "R600MachineScheduler.h"
|
#include "R600MachineScheduler.h"
|
||||||
#include "SIMachineScheduler.h"
|
#include "SIMachineScheduler.h"
|
||||||
#include "llvm/ADT/SmallString.h"
|
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
|
||||||
#include "llvm/ADT/Triple.h"
|
|
||||||
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
|
|
||||||
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
|
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
|
||||||
#include "llvm/CodeGen/MachineScheduler.h"
|
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
|
||||||
|
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
|
||||||
#include "llvm/CodeGen/Passes.h"
|
#include "llvm/CodeGen/Passes.h"
|
||||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
@ -287,9 +289,21 @@ namespace {
|
||||||
|
|
||||||
struct SIGISelActualAccessor : public GISelAccessor {
|
struct SIGISelActualAccessor : public GISelAccessor {
|
||||||
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
|
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
|
||||||
|
std::unique_ptr<InstructionSelector> InstSelector;
|
||||||
|
std::unique_ptr<LegalizerInfo> Legalizer;
|
||||||
|
std::unique_ptr<RegisterBankInfo> RegBankInfo;
|
||||||
const AMDGPUCallLowering *getCallLowering() const override {
|
const AMDGPUCallLowering *getCallLowering() const override {
|
||||||
return CallLoweringInfo.get();
|
return CallLoweringInfo.get();
|
||||||
}
|
}
|
||||||
|
const InstructionSelector *getInstructionSelector() const override {
|
||||||
|
return InstSelector.get();
|
||||||
|
}
|
||||||
|
const LegalizerInfo *getLegalizerInfo() const override {
|
||||||
|
return Legalizer.get();
|
||||||
|
}
|
||||||
|
const RegisterBankInfo *getRegBankInfo() const override {
|
||||||
|
return RegBankInfo.get();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
@ -323,6 +337,11 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
|
||||||
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
|
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
|
||||||
GISel->CallLoweringInfo.reset(
|
GISel->CallLoweringInfo.reset(
|
||||||
new AMDGPUCallLowering(*I->getTargetLowering()));
|
new AMDGPUCallLowering(*I->getTargetLowering()));
|
||||||
|
GISel->Legalizer.reset(new AMDGPULegalizerInfo());
|
||||||
|
|
||||||
|
GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
|
||||||
|
GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
|
||||||
|
*static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get())));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
I->setGISelAccessor(*GISel);
|
I->setGISelAccessor(*GISel);
|
||||||
|
@ -623,16 +642,20 @@ bool GCNPassConfig::addIRTranslator() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GCNPassConfig::addLegalizeMachineIR() {
|
bool GCNPassConfig::addLegalizeMachineIR() {
|
||||||
|
addPass(new Legalizer());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GCNPassConfig::addRegBankSelect() {
|
bool GCNPassConfig::addRegBankSelect() {
|
||||||
|
addPass(new RegBankSelect());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GCNPassConfig::addGlobalInstructionSelect() {
|
bool GCNPassConfig::addGlobalInstructionSelect() {
|
||||||
|
addPass(new InstructionSelect());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GCNPassConfig::addPreRegAlloc() {
|
void GCNPassConfig::addPreRegAlloc() {
|
||||||
|
|
|
@ -12,11 +12,17 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
|
||||||
tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
|
tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
|
||||||
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
|
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
|
||||||
tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
|
tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
|
||||||
|
if(LLVM_BUILD_GLOBAL_ISEL)
|
||||||
|
tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
|
||||||
|
endif()
|
||||||
add_public_tablegen_target(AMDGPUCommonTableGen)
|
add_public_tablegen_target(AMDGPUCommonTableGen)
|
||||||
|
|
||||||
# List of all GlobalISel files.
|
# List of all GlobalISel files.
|
||||||
set(GLOBAL_ISEL_FILES
|
set(GLOBAL_ISEL_FILES
|
||||||
AMDGPUCallLowering.cpp
|
AMDGPUCallLowering.cpp
|
||||||
|
AMDGPUInstructionSelector.cpp
|
||||||
|
AMDGPULegalizerInfo.cpp
|
||||||
|
AMDGPURegisterBankInfo.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add GlobalISel files to the dependencies if the user wants to build it.
|
# Add GlobalISel files to the dependencies if the user wants to build it.
|
||||||
|
|
|
@ -10,10 +10,10 @@
|
||||||
#include "AMDGPU.h"
|
#include "AMDGPU.h"
|
||||||
#include "SIDefines.h"
|
#include "SIDefines.h"
|
||||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||||
#include "llvm/IR/LLVMContext.h"
|
|
||||||
#include "llvm/IR/Constants.h"
|
#include "llvm/IR/Constants.h"
|
||||||
#include "llvm/IR/Function.h"
|
#include "llvm/IR/Function.h"
|
||||||
#include "llvm/IR/GlobalValue.h"
|
#include "llvm/IR/GlobalValue.h"
|
||||||
|
#include "llvm/IR/LLVMContext.h"
|
||||||
#include "llvm/MC/MCContext.h"
|
#include "llvm/MC/MCContext.h"
|
||||||
#include "llvm/MC/MCInstrInfo.h"
|
#include "llvm/MC/MCInstrInfo.h"
|
||||||
#include "llvm/MC/MCRegisterInfo.h"
|
#include "llvm/MC/MCRegisterInfo.h"
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||||
|
|
||||||
|
# REQUIRES: global-isel
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
|
||||||
|
...
|
||||||
|
---
|
||||||
|
|
||||||
|
name: global_addrspace
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
# GCN: global_addrspace
|
||||||
|
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
||||||
|
# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %vgpr0_vgpr1
|
||||||
|
|
||||||
|
%0:vgpr(p1) = COPY %vgpr0_vgpr1
|
||||||
|
%1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0)
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
|
@ -0,0 +1,143 @@
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
|
||||||
|
|
||||||
|
# REQUIRES: global-isel
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
|
||||||
|
...
|
||||||
|
---
|
||||||
|
|
||||||
|
name: smrd_imm
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
# GCN: body:
|
||||||
|
# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1
|
||||||
|
|
||||||
|
# Immediate offset:
|
||||||
|
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
|
||||||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
|
||||||
|
|
||||||
|
# Max immediate offset for SI
|
||||||
|
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
|
||||||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
|
||||||
|
|
||||||
|
# Immediate overflow for SI
|
||||||
|
# FIXME: The immediate gets selected twice, once into the
|
||||||
|
# S_LOAD_DWORD instruction and once just as a normal constat.
|
||||||
|
# SI: S_MOV_B32 1024
|
||||||
|
# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024
|
||||||
|
# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
|
||||||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
|
||||||
|
|
||||||
|
# Max immediate offset for VI
|
||||||
|
# SI: S_MOV_B32 1048572
|
||||||
|
# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
|
||||||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
|
||||||
|
|
||||||
|
#
|
||||||
|
# Immediate overflow for VI
|
||||||
|
# FIXME: The immediate gets selected twice, once into the
|
||||||
|
# S_LOAD_DWORD instruction and once just as a normal constat.
|
||||||
|
# SIVI: S_MOV_B32 1048576
|
||||||
|
# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576
|
||||||
|
# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
|
||||||
|
|
||||||
|
# Max immediate for CI
|
||||||
|
# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292
|
||||||
|
# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3
|
||||||
|
# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
|
||||||
|
# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
|
||||||
|
# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
|
||||||
|
# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||||
|
# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
|
||||||
|
# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
|
||||||
|
# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||||
|
# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
|
||||||
|
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
|
||||||
|
|
||||||
|
# Immediate overflow for CI
|
||||||
|
# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0
|
||||||
|
# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4
|
||||||
|
# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
|
||||||
|
# GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
|
||||||
|
# GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
|
||||||
|
# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||||
|
# GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
|
||||||
|
# GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
|
||||||
|
# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||||
|
# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
|
||||||
|
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||||
|
|
||||||
|
# Max 32-bit byte offset
|
||||||
|
# FIXME: The immediate gets selected twice, once into the
|
||||||
|
# S_LOAD_DWORD instruction and once just as a normal constat.
|
||||||
|
# SIVI: S_MOV_B32 4294967292
|
||||||
|
# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292
|
||||||
|
# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
|
||||||
|
|
||||||
|
# Overflow 32-bit byte offset
|
||||||
|
# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0
|
||||||
|
# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1
|
||||||
|
# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
|
||||||
|
# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
|
||||||
|
# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
|
||||||
|
# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||||||
|
# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
|
||||||
|
# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
|
||||||
|
# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||||
|
# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
|
||||||
|
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %sgpr0_sgpr1
|
||||||
|
|
||||||
|
%0:sgpr(p2) = COPY %sgpr0_sgpr1
|
||||||
|
|
||||||
|
%1:sgpr(s64) = G_CONSTANT i64 4
|
||||||
|
%2:sgpr(p2) = G_GEP %0, %1
|
||||||
|
%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%4:sgpr(s64) = G_CONSTANT i64 1020
|
||||||
|
%5:sgpr(p2) = G_GEP %0, %4
|
||||||
|
%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%7:sgpr(s64) = G_CONSTANT i64 1024
|
||||||
|
%8:sgpr(p2) = G_GEP %0, %7
|
||||||
|
%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%10:sgpr(s64) = G_CONSTANT i64 1048572
|
||||||
|
%11:sgpr(p2) = G_GEP %0, %10
|
||||||
|
%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%13:sgpr(s64) = G_CONSTANT i64 1048576
|
||||||
|
%14:sgpr(p2) = G_GEP %0, %13
|
||||||
|
%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%16:sgpr(s64) = G_CONSTANT i64 17179869180
|
||||||
|
%17:sgpr(p2) = G_GEP %0, %16
|
||||||
|
%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%19:sgpr(s64) = G_CONSTANT i64 17179869184
|
||||||
|
%20:sgpr(p2) = G_GEP %0, %19
|
||||||
|
%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%22:sgpr(s64) = G_CONSTANT i64 4294967292
|
||||||
|
%23:sgpr(p2) = G_GEP %0, %22
|
||||||
|
%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
%25:sgpr(s64) = G_CONSTANT i64 4294967296
|
||||||
|
%26:sgpr(p2) = G_GEP %0, %25
|
||||||
|
%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
|
@ -0,0 +1,29 @@
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||||
|
|
||||||
|
# REQUIRES: global-isel
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
|
||||||
|
...
|
||||||
|
---
|
||||||
|
|
||||||
|
name: global_addrspace
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
# GCN: global_addrspace
|
||||||
|
# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
|
||||||
|
# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
|
||||||
|
# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %vgpr0_vgpr1, %vgpr2
|
||||||
|
|
||||||
|
%0:vgpr(p1) = COPY %vgpr0_vgpr1
|
||||||
|
%1:vgpr(s32) = COPY %vgpr2
|
||||||
|
G_STORE %1, %0 :: (store 4 into %ir.global0)
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
|
@ -0,0 +1,69 @@
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - | FileCheck %s
|
||||||
|
|
||||||
|
# REQUIRES: global-isel
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
|
||||||
|
define void @load_global_uniform(i32 addrspace(1)* %ptr1) {
|
||||||
|
%tmp0 = load i32, i32 addrspace(1)* %ptr1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
|
||||||
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||||
|
%tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0
|
||||||
|
%tmp2 = load i32, i32 addrspace(1)* %tmp1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name : load_constant
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
# CHECK-LABEL: name: load_constant
|
||||||
|
# CHECK: registers:
|
||||||
|
# CHECK: - { id: 0, class: sgpr }
|
||||||
|
# CHECK: - { id: 1, class: sgpr }
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %sgpr0_sgpr1
|
||||||
|
%0:_(p2) = COPY %sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: load_global_uniform
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
# CHECK-LABEL: name: load_global_uniform
|
||||||
|
# CHECK: registers:
|
||||||
|
# CHECK: - { id: 0, class: sgpr }
|
||||||
|
# CHECK: - { id: 1, class: sgpr }
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %sgpr0_sgpr1
|
||||||
|
%0:_(p1) = COPY %sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1)
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: load_global_non_uniform
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
# CHECK-LABEL: name: load_global_non_uniform
|
||||||
|
# CHECK: registers:
|
||||||
|
# CHECK: - { id: 0, class: sgpr }
|
||||||
|
# CHECK: - { id: 1, class: vgpr }
|
||||||
|
# CHECK: - { id: 2, class: vgpr }
|
||||||
|
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: %sgpr0_sgpr1
|
||||||
|
%0:_(p1) = COPY %sgpr0_sgpr1
|
||||||
|
%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1)
|
||||||
|
...
|
|
@ -0,0 +1,11 @@
|
||||||
|
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s
|
||||||
|
|
||||||
|
; REQUIRES: global-isel
|
||||||
|
|
||||||
|
; GCN-LABEL: vs_epilog
|
||||||
|
; GCN: s_endpgm
|
||||||
|
|
||||||
|
define amdgpu_vs void @vs_epilog() {
|
||||||
|
main_body:
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -0,0 +1,89 @@
|
||||||
|
; FIXME: Need to add support for mubuf stores to enable this on SI.
|
||||||
|
; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
|
||||||
|
; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s
|
||||||
|
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
|
||||||
|
|
||||||
|
; REQUIRES: global-isel
|
||||||
|
|
||||||
|
; SMRD load with an immediate offset.
|
||||||
|
; GCN-LABEL: {{^}}smrd0:
|
||||||
|
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
|
||||||
|
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
|
||||||
|
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SMRD load with the largest possible immediate offset.
|
||||||
|
; GCN-LABEL: {{^}}smrd1:
|
||||||
|
; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
|
||||||
|
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
|
||||||
|
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SMRD load with an offset greater than the largest possible immediate.
|
||||||
|
; GCN-LABEL: {{^}}smrd2:
|
||||||
|
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
|
||||||
|
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
|
||||||
|
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
|
||||||
|
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
|
||||||
|
; GCN: s_endpgm
|
||||||
|
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SMRD load with a 64-bit offset
|
||||||
|
; GCN-LABEL: {{^}}smrd3:
|
||||||
|
; FIXME: There are too many copies here because we don't fold immediates
|
||||||
|
; through REG_SEQUENCE
|
||||||
|
; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
|
||||||
|
; TODO: Add VI checks
|
||||||
|
; XGCN: s_endpgm
|
||||||
|
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SMRD load with the largest possible immediate offset on VI
|
||||||
|
; GCN-LABEL: {{^}}smrd4:
|
||||||
|
; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
|
||||||
|
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||||
|
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
|
||||||
|
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
|
||||||
|
define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SMRD load with an offset greater than the largest possible immediate on VI
|
||||||
|
; GCN-LABEL: {{^}}smrd5:
|
||||||
|
; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
|
||||||
|
; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
|
||||||
|
; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
|
||||||
|
; GCN: s_endpgm
|
||||||
|
define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
|
||||||
|
entry:
|
||||||
|
%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
|
||||||
|
%1 = load i32, i32 addrspace(2)* %0
|
||||||
|
store i32 %1, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue