AMDGPU: Separate R600 and GCN TableGen files

Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc.  This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself.  This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.

Reviewers: arsenm, nhaehnle, jvesely

Reviewed By: arsenm

Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D46365

llvm-svn: 335942
This commit is contained in:
Tom Stellard 2018-06-28 23:47:12 +00:00
parent 3702f91287
commit c5a154db48
63 changed files with 1854 additions and 1508 deletions

View File

@ -9,23 +9,12 @@
include "llvm/TableGen/SearchableTable.td"
include "llvm/Target/Target.td"
include "AMDGPUFeatures.td"
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
"Enable double precision operations"
>;
def FeatureFMA : SubtargetFeature<"fmaf",
"FMA",
"true",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"FastFMAF32",
"true",
@ -44,30 +33,6 @@ def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
"Most fp64 instructions are half rate instead of quarter"
>;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"R600ALUInst",
"false",
"Older version of ALU instructions encoding"
>;
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
"HasVertexCache",
"true",
"Specify use of dedicated vertex cache"
>;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"CaymanISA",
"true",
"Use Cayman ISA"
>;
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
"CFALUBug",
"true",
"GPU has CF_ALU bug"
>;
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",
@ -153,27 +118,6 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
Value,
"Limit the maximum number of fetches in a clause to "#Value
>;
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
"wavefrontsize"#Value,
"WavefrontSize",
!cast<string>(Value),
"The number of threads per wavefront"
>;
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@ -184,19 +128,6 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
!cast<string>(Value),
"The size of local memory in bytes"
>;
def FeatureGCN : SubtargetFeature<"gcn",
"IsGCN",
"true",
"GCN or newer GPU"
>;
def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
"GCN3Encoding",
"true",
@ -369,12 +300,6 @@ def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
[FeatureFP64FP16Denormals]
>;
def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
"DX10Clamp",
"true",
"clamp modifier clamps NaNs to 0.0"
>;
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
@ -417,12 +342,6 @@ def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
"Dump MachineInstrs in the CodeEmitter"
>;
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
"EnablePromoteAlloca",
"true",
"Enable promote alloca pass"
>;
// XXX - This should probably be removed once enabled by default
def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
"EnableLoadStoreOpt",
@ -486,45 +405,29 @@ def FeatureDisable : SubtargetFeature<"",
"Dummy feature to disable assembler instructions"
>;
class SubtargetFeatureGeneration <string Value,
def FeatureGCN : SubtargetFeature<"gcn",
"IsGCN",
"true",
"GCN or newer GPU"
>;
class AMDGPUSubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
Value#" GPU generation", Implies>;
SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>;
def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
def FeatureR600 : SubtargetFeatureGeneration<"R600",
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
>;
def FeatureR700 : SubtargetFeatureGeneration<"R700",
[FeatureFetchLimit16, FeatureLocalMemorySize0]
>;
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
>;
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
[FeatureFetchLimit16, FeatureWavefrontSize64,
FeatureLocalMemorySize32768]
>;
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN,
FeatureLDSBankCount32, FeatureMovrel]
>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel]
>;
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@ -535,7 +438,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
]
>;
def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@ -738,8 +641,6 @@ def NullALU : InstrItinClass;
// Predicate helper class
//===----------------------------------------------------------------------===//
def TruePredicate : Predicate<"true">;
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@ -831,36 +732,15 @@ def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
// Exists to help track down where SubtargetPredicate isn't set rather
// than letting tablegen crash with an unhelpful error.
def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
class PredicateControl {
Predicate SubtargetPredicate = InvalidPred;
Predicate SIAssemblerPredicate = isSICI;
Predicate VIAssemblerPredicate = isVI;
list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
AssemblerPredicate],
AssemblerPredicates,
OtherPredicates);
}
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
PredicateControl;
// Include AMDGPU TD files
include "R600Schedule.td"
include "R600Processors.td"
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "SIIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
include "SIInstrInfo.td"
include "AMDGPUCallingConv.td"
include "AMDGPUSearchableTables.td"

View File

@ -85,17 +85,6 @@ def RetCC_SI_Shader : CallingConv<[
]>>
]>;
// Calling convention for R600
def CC_R600 : CallingConv<[
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
T30_XYZW, T31_XYZW, T32_XYZW
]>>>
]>;
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
CCCustom<"allocateKernArg">
@ -165,9 +154,5 @@ def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
CCDelegateTo<CC_AMDGPU_Func>>,
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() < "
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
CCDelegateTo<CC_R600>>
CCDelegateTo<CC_AMDGPU_Func>>
]>;

View File

@ -0,0 +1,60 @@
//===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
"Enable double precision operations"
>;
def FeatureFMA : SubtargetFeature<"fmaf",
"FMA",
"true",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
"LocalMemorySize",
!cast<string>(Value),
"The size of local memory in bytes"
>;
def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
"wavefrontsize"#Value,
"WavefrontSize",
!cast<string>(Value),
"The number of threads per wavefront"
>;
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
class SubtargetFeatureGeneration <string Value, string Subtarget,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
Value#" GPU generation", Implies>;
def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
"DX10Clamp",
"true",
"clamp modifier clamps NaNs to 0.0"
>;
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
"EnablePromoteAlloca",
"true",
"Enable promote alloca pass"
>;

View File

@ -104,15 +104,11 @@ private:
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool isUniformBr(const SDNode *N) const;
SDNode *glueCopyToM0(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
@ -227,9 +223,18 @@ protected:
};
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
const R600Subtarget *Subtarget;
AMDGPUAS AMDGPUASI;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
SDValue& Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
AMDGPUDAGToDAGISel(TM, OptLevel) {}
AMDGPUDAGToDAGISel(TM, OptLevel) {
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
}
void Select(SDNode *N) override;
@ -237,6 +242,11 @@ public:
SDValue &Offset) override;
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
bool runOnMachineFunction(MachineFunction &MF) override;
protected:
// Include the pieces autogenerated from the target description.
#include "R600GenDAGISel.inc"
};
} // end anonymous namespace
@ -280,8 +290,7 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
}
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
const SIInstrInfo *TII
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
return TII->isInlineConstant(C->getAPIntValue());
@ -637,16 +646,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
}
bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
const Instruction *Term = BB->getTerminator();
@ -662,26 +661,6 @@ StringRef AMDGPUDAGToDAGISel::getPassName() const {
// Complex Patterns
//===----------------------------------------------------------------------===//
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
true);
return true;
}
return false;
}
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
}
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
return false;
@ -693,11 +672,11 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@ -2160,6 +2139,41 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
} while (IsModified);
}
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<R600Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
true);
return true;
}
return false;
}
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
return true;
}
return false;
}
void R600DAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@ -2180,12 +2194,12 @@ void R600DAGToDAGISel::Select(SDNode *N) {
// pass. We want to avoid 128 bits copies as much as possible because they
// can't be bundled by our scheduler.
switch(NumVectorElts) {
case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
case 2: RegClassID = R600::R600_Reg64RegClassID; break;
case 4:
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
RegClassID = R600::R600_Reg128VerticalRegClassID;
else
RegClassID = AMDGPU::R600_Reg128RegClassID;
RegClassID = R600::R600_Reg128RegClassID;
break;
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
@ -2203,11 +2217,11 @@ bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@ -2238,7 +2252,7 @@ bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()),
AMDGPU::ZERO, MVT::i32);
R600::ZERO, MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;

View File

@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
const AMDGPUCommonSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
@ -330,10 +330,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG, MVT::f32, Custom);
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FLOG, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
@ -341,10 +337,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, MVT::f32, Custom);
setOperationAction(ISD::FREM, MVT::f64, Custom);
// v_mad_f32 does not support denormals according to some sources.
if (!Subtarget->hasFP32Denormals())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
@ -359,19 +351,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
setOperationAction(ISD::FRINT, MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
}
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
}
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
@ -403,12 +382,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, VT, Legal);
}
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
// The hardware supports 32-bit ROTR, but not ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@ -428,28 +401,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::UMAX, MVT::i32, Legal);
if (Subtarget->hasFFBH())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
if (Subtarget->hasFFBL())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
// effort to match them now. We want this to be false for i64 cases when the
// extraction isn't restricted to the upper or lower half. Ideally we would
// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
// span the midpoint are probably relatively rare, so don't worry about them
// for now.
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
@ -554,11 +510,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// vector compares until that is fixed.
setHasMultipleConditionRegisters(true);
// SI at least has hardware support for floating point exceptions, but no way
// of using or handling them is implemented. They are also optional in OpenCL
// (Section 7.3)
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
PredictableSelectIsExpensive = false;
// We want to find all load dependencies for long chains of stores to enable
@ -781,7 +732,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
{
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
if (L->getMemOperand()->getAddrSpace()
== Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
== AMDGPUASI.CONSTANT_ADDRESS_32BIT)
return true;
return false;
}
@ -4290,9 +4241,11 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
switch (IID) {
case Intrinsic::amdgcn_mbcnt_lo:
case Intrinsic::amdgcn_mbcnt_hi: {
const SISubtarget &ST =
DAG.getMachineFunction().getSubtarget<SISubtarget>();
// These return at most the wavefront size - 1.
unsigned Size = Op.getValueType().getSizeInBits();
Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2());
Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
break;
}
default:

View File

@ -23,11 +23,13 @@
namespace llvm {
class AMDGPUMachineFunction;
class AMDGPUSubtarget;
class AMDGPUCommonSubtarget;
struct ArgDescriptor;
class AMDGPUTargetLowering : public TargetLowering {
private:
const AMDGPUCommonSubtarget *Subtarget;
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
/// legalized from a smaller type VT. Need to match pre-legalized type because
/// the generic legalization inserts the add/sub between the select and
@ -39,7 +41,6 @@ public:
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@ -124,7 +125,7 @@ protected:
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
if (getTargetMachine().Options.NoSignedZerosFPMath)

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
/// Implementation of the TargetInstrInfo class that is common to all
/// \brief Implementation of the TargetInstrInfo class that is common to all
/// AMD GPUs.
//
//===----------------------------------------------------------------------===//
@ -23,107 +23,11 @@
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
namespace AMDGPU {
#define GET_D16ImageDimIntrinsics_IMPL
#define GET_ImageDimIntrinsicTable_IMPL
#define GET_RsrcIntrinsics_IMPL
#include "AMDGPUGenSearchableTables.inc"
}
}
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
//void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
ST(ST),
AMDGPUASI(ST.getAMDGPUAS()) {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
//
// Loads are clustered until this returns false, rather than trying to schedule
// groups of stores. This also means we have to deal with saying different
// address space loads should be clustered, and ones which might cause bank
// conflicts.
//
// This might be deprecated so it might not be worth that much effort to fix.
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
int64_t Offset0, int64_t Offset1,
unsigned NumLoads) const {
assert(Offset1 > Offset0 &&
"Second offset should be larger than first offset!");
// If we have less than 16 loads in a row, and the offsets are within 64
// bytes, then schedule together.
// A cacheline is 64 bytes (for global memory).
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
VI = 1,
SDWA = 2,
SDWA9 = 3,
GFX80 = 4,
GFX9 = 5
};
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
switch (ST.getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
case AMDGPUSubtarget::SEA_ISLANDS:
return SIEncodingFamily::SI;
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
// FIXME: This should never be called for r600 GPUs.
case AMDGPUSubtarget::R600:
case AMDGPUSubtarget::R700:
case AMDGPUSubtarget::EVERGREEN:
case AMDGPUSubtarget::NORTHERN_ISLANDS:
return SIEncodingFamily::SI;
}
llvm_unreachable("Unknown subtarget generation!");
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
: SIEncodingFamily::SDWA;
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
// subtarget has UnpackedD16VMem feature.
// TODO: remove this when we discard GFX80 encoding.
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
Gen = SIEncodingFamily::GFX80;
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
return Opcode;
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
// no encoding in the given subtarget generation.
if (MCOp == (uint16_t)-1)
return -1;
return MCOp;
}
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {

View File

@ -20,10 +20,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_HEADER
namespace llvm {
class AMDGPUSubtarget;
@ -31,26 +27,10 @@ class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPUSubtarget &ST;
virtual void anchor();
protected:
AMDGPUAS AMDGPUASI;
class AMDGPUInstrInfo {
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
/// Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
static bool isUniformMMO(const MachineMemOperand *MMO);
};

View File

@ -42,6 +42,47 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
field bits<32> Inst = 0xffffffff;
}
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
let Namespace = "AMDGPU";
dag OutOperandList = outs;
dag InOperandList = ins;
let Pattern = pattern;
let AsmString = !strconcat(asmstr, "\n");
let isPseudo = 1;
let Itinerary = NullALU;
bit hasIEEEFlag = 0;
bit hasZeroOpFlag = 0;
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let isCodeGenOnly = 1;
}
def TruePredicate : Predicate<"true">;
// Exists to help track down where SubtargetPredicate isn't set rather
// than letting tablegen crash with an unhelpful error.
def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
class PredicateControl {
Predicate SubtargetPredicate = InvalidPred;
list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
AssemblerPredicate],
AssemblerPredicates,
OtherPredicates);
}
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
PredicateControl;
def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
@ -94,12 +135,6 @@ def brtarget : Operand<OtherVT>;
// Misc. PatFrags
//===----------------------------------------------------------------------===//
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
(ops node:$src0),
(op $src0),
[{ return N->hasOneUse(); }]
>;
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
@ -112,8 +147,6 @@ class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
[{ return N->hasOneUse(); }]
>;
def trunc_oneuse : HasOneUseUnaryOp<trunc>;
let Properties = [SDNPCommutative, SDNPAssociative] in {
def smax_oneuse : HasOneUseBinOp<smax>;
def smin_oneuse : HasOneUseBinOp<smin>;
@ -240,6 +273,37 @@ def COND_NULL : PatLeaf <
[{(void)N; return false;}]
>;
//===----------------------------------------------------------------------===//
// PatLeafs for Texture Constants
//===----------------------------------------------------------------------===//
def TEX_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 9 || TType == 10 || TType == 16;
}]
>;
def TEX_RECT : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 5;
}]
>;
def TEX_SHADOW : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return (TType >= 6 && TType <= 8) || TType == 13;
}]
>;
def TEX_SHADOW_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 11 || TType == 12 || TType == 17;
}]
>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
@ -769,11 +833,3 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
include "R600Instructions.td"
include "R700Instructions.td"
include "EvergreenInstructions.td"
include "CaymanInstructions.td"
include "SIInstrInfo.td"

View File

@ -14,5 +14,3 @@
let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
}
include "SIIntrinsics.td"

View File

@ -117,7 +117,6 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
return false;
const TargetMachine &TM = TPC->getTM<TargetMachine>();
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);
bool Changed = false;
for (auto *U : F.users()) {
@ -125,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
if (!CI)
continue;
Changed |= ST.makeLIDRangeMetadata(CI);
Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
}
return Changed;
}

View File

@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
IsAMDGCN = TT.getArch() == Triple::amdgcn;
IsAMDHSA = TT.getOS() == Triple::AMDHSA;
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
if (!ST.isPromoteAllocaEnabled())
return false;
@ -174,8 +174,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
*Builder.GetInsertBlock()->getParent());
const Function &F = *Builder.GetInsertBlock()->getParent();
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
if (!IsAMDHSA) {
Function *LocalSizeYFn
@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
}
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
*Builder.GetInsertBlock()->getParent());
const AMDGPUCommonSubtarget &ST =
AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
switch (N) {
@ -602,7 +602,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
FunctionType *FTy = F.getFunctionType();
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
// If the function has any arguments in the local address space, then it's
// possible these arguments require the entire local memory space, so
@ -729,8 +729,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (!SufficientLDS)
return false;
const AMDGPUSubtarget &ST =
TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
const DataLayout &DL = Mod->getDataLayout();

View File

@ -19,5 +19,4 @@ foreach Index = 0-15 in {
}
include "R600RegisterInfo.td"
include "SIRegisterInfo.td"

View File

@ -23,6 +23,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
@ -34,9 +35,32 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "R600GenSubtargetInfo.inc"
AMDGPUSubtarget::~AMDGPUSubtarget() = default;
R600Subtarget &
R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
FP32Denormals = false;
}
HasMulU24 = getGeneration() >= EVERGREEN;
HasMulI24 = hasCaymanISA();
return *this;
}
AMDGPUSubtarget &
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
@ -93,26 +117,44 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
HasMovrel = true;
}
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
return *this;
}
AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
const FeatureBitset &FeatureBits) :
TargetTriple(TT),
SubtargetFeatureBits(FeatureBits),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
FPExceptions(false),
HasSDWA(false),
HasVOP3PInsts(false),
HasMulI24(true),
HasMulU24(true),
HasFminFmaxLegacy(true),
EnablePromoteAlloca(false),
LocalMemorySize(0),
WavefrontSize(0)
{ }
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM)
: AMDGPUGenSubtargetInfo(TT, GPU, FS),
const TargetMachine &TM) :
AMDGPUGenSubtargetInfo(TT, GPU, FS),
AMDGPUCommonSubtarget(TT, getFeatureBits()),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
TargetTriple(TT),
Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
WavefrontSize(0),
LocalMemorySize(0),
LDSBankCount(0),
MaxPrivateElementSize(0),
FastFMAF32(false),
HalfRate64Ops(false),
FP32Denormals(false),
FP64FP16Denormals(false),
FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
@ -128,7 +170,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
EnableHugePrivateBuffer(false),
EnableVGPRSpilling(false),
EnablePromoteAlloca(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
@ -136,25 +177,18 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DumpCode(false),
FP64(false),
FMA(false),
MIMG_R128(false),
IsGCN(false),
GCN3Encoding(false),
CIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
HasIntClamp(false),
HasVOP3PInsts(false),
HasMadMixInsts(false),
HasFmaMixInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
HasScalarAtomics(false),
HasInv2PiInlineImm(false),
HasSDWA(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),
@ -170,20 +204,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
R600ALUInst(false),
CaymanISA(false),
CFALUBug(false),
HasVertexCache(false),
TexVTXClauseSize(0),
ScalarizeGlobal(false),
FeatureDisable(false),
InstrItins(getInstrItineraryForCPU(GPU)) {
FeatureDisable(false) {
AS = AMDGPU::getAMDGPUAS(TT);
initializeSubtargetDependencies(TT, GPU, FS);
}
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)
return getLocalMemorySize();
@ -193,7 +221,7 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
const Function &F) const {
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
@ -206,13 +234,13 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
}
unsigned
AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
}
std::pair<unsigned, unsigned>
AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
@ -230,7 +258,7 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
}
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
const Function &F) const {
// FIXME: 1024 if function.
// Default minimum/maximum flat work group sizes.
@ -260,7 +288,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
return Requested;
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
const Function &F) const {
// Default minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
@ -308,7 +336,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
return Requested;
}
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Function *Kernel = I->getParent()->getParent();
unsigned MinSize = 0;
unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
@ -372,10 +400,22 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
AMDGPUSubtarget(TT, GPU, FS, TM),
R600GenSubtargetInfo(TT, GPU, FS),
AMDGPUCommonSubtarget(TT, getFeatureBits()),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
TLInfo(TM, *this) {}
FMA(false),
CaymanISA(false),
CFALUBug(false),
DX10Clamp(false),
HasVertexCache(false),
R600ALUInst(false),
FP64(false),
TexVTXClauseSize(0),
Gen(R600),
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
InstrItins(getInstrItineraryForCPU(GPU)),
AS (AMDGPU::getAMDGPUAS(TT)) { }
SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM)
@ -619,3 +659,17 @@ void SISubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
}
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>());
else
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>());
}
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
if (TM.getTargetTriple().getArch() == Triple::amdgcn)
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F));
else
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
}

View File

@ -39,22 +39,181 @@
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
#define GET_SUBTARGETINFO_HEADER
#include "R600GenSubtargetInfo.inc"
namespace llvm {
class StringRef;
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
class AMDGPUCommonSubtarget {
private:
Triple TargetTriple;
protected:
const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
bool FPExceptions;
bool HasSDWA;
bool HasVOP3PInsts;
bool HasMulI24;
bool HasMulU24;
bool HasFminFmaxLegacy;
bool EnablePromoteAlloca;
int LocalMemorySize;
unsigned WavefrontSize;
public:
AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
const Function &F);
/// \returns Default range flat work group size for a calling convention.
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
/// function \p F.
///
/// \returns Subtarget's default values if explicitly requested values cannot
/// be converted to integer, or violate subtarget's specifications.
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
/// \returns Subtarget's default pair of minimum/maximum number of waves per
/// execution unit for function \p F, or minimum/maximum number of waves per
/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
/// attached to function \p F.
///
/// \returns Subtarget's default values if explicitly requested values cannot
/// be converted to integer, violate subtarget's specifications, or are not
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
/// the given LDS memory size is the only constraint.
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
bool isAmdPalOS() const {
return TargetTriple.getOS() == Triple::AMDPAL;
}
bool has16BitInsts() const {
return Has16BitInsts;
}
bool hasMadMixInsts() const {
return HasMadMixInsts;
}
bool hasFP32Denormals() const {
return FP32Denormals;
}
bool hasFPExceptions() const {
return FPExceptions;
}
bool hasSDWA() const {
return HasSDWA;
}
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
bool hasMulI24() const {
return HasMulI24;
}
bool hasMulU24() const {
return HasMulU24;
}
bool hasFminFmaxLegacy() const {
return HasFminFmaxLegacy;
}
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
unsigned getWavefrontSize() const {
return WavefrontSize;
}
int getLocalMemorySize() const {
return LocalMemorySize;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
}
unsigned getMaxWavesPerEU() const { return 10; }
/// Creates value range metadata on an workitemid.* inrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
virtual ~AMDGPUCommonSubtarget() {}
};
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
public AMDGPUCommonSubtarget {
public:
enum Generation {
R600 = 0,
R700,
EVERGREEN,
NORTHERN_ISLANDS,
SOUTHERN_ISLANDS,
SEA_ISLANDS,
VOLCANIC_ISLANDS,
GFX9,
// Gap for R600 generations, so we can do comparisons between
// AMDGPUSubtarget and r600Subtarget.
SOUTHERN_ISLANDS = 4,
SEA_ISLANDS = 5,
VOLCANIC_ISLANDS = 6,
GFX9 = 7,
};
enum {
@ -96,13 +255,20 @@ public:
LLVMTrapHandlerRegValue = 1
};
private:
SIFrameLowering FrameLowering;
/// GlobalISel related APIs.
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
protected:
// Basic subtarget description.
Triple TargetTriple;
Generation Gen;
unsigned Gen;
unsigned IsaVersion;
unsigned WavefrontSize;
int LocalMemorySize;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@ -111,9 +277,7 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
bool FP32Denormals;
bool FP64FP16Denormals;
bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
@ -129,7 +293,6 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
bool EnableVGPRSpilling;
bool EnablePromoteAlloca;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
@ -146,17 +309,13 @@ protected:
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
bool HasIntClamp;
bool HasVOP3PInsts;
bool HasMadMixInsts;
bool HasFmaMixInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
bool HasInv2PiInlineImm;
bool HasSDWA;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
@ -181,7 +340,6 @@ protected:
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
AMDGPUAS AS;
@ -193,13 +351,30 @@ public:
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
const AMDGPUInstrInfo *getInstrInfo() const override = 0;
const AMDGPUFrameLowering *getFrameLowering() const override = 0;
const AMDGPUTargetLowering *getTargetLowering() const override = 0;
const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
virtual const SIInstrInfo *getInstrInfo() const override = 0;
const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
const SIFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
virtual const SITargetLowering *getTargetLowering() const override = 0;
virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
// Nothing implemented, just prevent crashes on use.
@ -209,34 +384,18 @@ public:
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
bool isAmdHsaOS() const {
return TargetTriple.getOS() == Triple::AMDHSA;
}
bool isMesa3DOS() const {
return TargetTriple.getOS() == Triple::Mesa3D;
}
bool isAmdPalOS() const {
return TargetTriple.getOS() == Triple::AMDPAL;
}
Generation getGeneration() const {
return Gen;
}
unsigned getWavefrontSize() const {
return WavefrontSize;
return (Generation)Gen;
}
unsigned getWavefrontSizeLog2() const {
return Log2_32(WavefrontSize);
}
int getLocalMemorySize() const {
return LocalMemorySize;
}
int getLDSBankCount() const {
return LDSBankCount;
}
@ -249,18 +408,10 @@ public:
return AS;
}
bool has16BitInsts() const {
return Has16BitInsts;
}
bool hasIntClamp() const {
return HasIntClamp;
}
bool hasVOP3PInsts() const {
return HasVOP3PInsts;
}
bool hasFP64() const {
return FP64;
}
@ -269,6 +420,10 @@ public:
return MIMG_R128;
}
bool hasHWFP64() const {
return FP64;
}
bool hasFastFMAF32() const {
return FastFMAF32;
}
@ -278,15 +433,15 @@ public:
}
bool hasAddr64() const {
return (getGeneration() < VOLCANIC_ISLANDS);
return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
}
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
return true;
}
bool hasBFI() const {
return (getGeneration() >= EVERGREEN);
return true;
}
bool hasBFM() const {
@ -294,42 +449,23 @@ public:
}
bool hasBCNT(unsigned Size) const {
if (Size == 32)
return (getGeneration() >= EVERGREEN);
if (Size == 64)
return (getGeneration() >= SOUTHERN_ISLANDS);
return false;
}
bool hasMulU24() const {
return (getGeneration() >= EVERGREEN);
}
bool hasMulI24() const {
return (getGeneration() >= SOUTHERN_ISLANDS ||
hasCaymanISA());
return true;
}
bool hasFFBL() const {
return (getGeneration() >= EVERGREEN);
return true;
}
bool hasFFBH() const {
return (getGeneration() >= EVERGREEN);
return true;
}
bool hasMed3_16() const {
return getGeneration() >= GFX9;
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasMin3Max3_16() const {
return getGeneration() >= GFX9;
}
bool hasMadMixInsts() const {
return HasMadMixInsts;
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasFmaMixInsts() const {
@ -337,15 +473,7 @@ public:
}
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
bool hasBORROW() const {
return (getGeneration() >= EVERGREEN);
}
bool hasCaymanISA() const {
return CaymanISA;
return true;
}
bool hasFMA() const {
@ -360,10 +488,6 @@ public:
return EnableHugePrivateBuffer;
}
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
@ -377,20 +501,10 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
/// the given LDS memory size is the only constraint.
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
bool hasFP16Denormals() const {
return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
return FP32Denormals;
}
bool hasFP64Denormals() const {
return FP64FP16Denormals;
}
@ -399,10 +513,6 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasFPExceptions() const {
return FPExceptions;
}
bool enableDX10Clamp() const {
return DX10Clamp;
}
@ -444,7 +554,7 @@ public:
}
bool hasApertureRegs() const {
return HasApertureRegs;
return HasApertureRegs;
}
bool isTrapHandlerEnabled() const {
@ -510,14 +620,6 @@ public:
return getGeneration() >= SEA_ISLANDS;
}
bool hasFminFmaxLegacy() const {
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
bool hasSDWA() const {
return HasSDWA;
}
bool hasSDWAOmod() const {
return HasSDWAOmod;
}
@ -556,10 +658,6 @@ public:
return isAmdCodeObjectV2(F) ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
/// \returns Number of bytes of arguments that are passed to a shader or
/// kernel in addition to the explicit ones declared for the function.
unsigned getImplicitArgNumBytes(const Function &F) const {
@ -588,134 +686,39 @@ public:
return true;
}
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
}
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
FlatWorkGroupSize);
return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
return AMDGPU::IsaInfo::getMaxWavesPerEU();
}
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
FlatWorkGroupSize);
}
/// \returns Default range flat work group size for a calling convention.
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
/// function \p F.
///
/// \returns Subtarget's default values if explicitly requested values cannot
/// be converted to integer, or violate subtarget's specifications.
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
/// \returns Subtarget's default pair of minimum/maximum number of waves per
/// execution unit for function \p F, or minimum/maximum number of waves per
/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
/// attached to function \p F.
///
/// \returns Subtarget's default values if explicitly requested values cannot
/// be converted to integer, violate subtarget's specifications, or are not
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
/// Creates value range metadata on an workitemid.* inrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
};
class R600Subtarget final : public AMDGPUSubtarget {
private:
R600InstrInfo InstrInfo;
R600FrameLowering FrameLowering;
R600TargetLowering TLInfo;
public:
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
const R600InstrInfo *getInstrInfo() const override {
return &InstrInfo;
}
const R600FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const R600TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
const R600RegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
bool hasCFAluBug() const {
return CFALUBug;
}
bool hasVertexCache() const {
return HasVertexCache;
}
short getTexVTXClauseSize() const {
return TexVTXClauseSize;
return AMDGPU::IsaInfo::getWavesPerWorkGroup(
MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
}
};
@ -766,6 +769,8 @@ public:
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
// static wrappers
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
// XXX - Why is this here if it isn't in the default pass set?
bool enableEarlyIfConversion() const override {
@ -775,7 +780,7 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
bool isVGPRSpillingEnabled(const Function& F) const;
bool isVGPRSpillingEnabled(const Function &F) const;
unsigned getMaxNumUserSGPRs() const {
return 16;
@ -860,16 +865,18 @@ public:
unsigned getKernArgSegmentSize(const Function &F,
unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs
/// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
return getGeneration() >= GFX9;
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
/// \returns true if the machine has merged shaders in which s0-s7 are
@ -880,35 +887,39 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
return AMDGPU::IsaInfo::getSGPRAllocGranule(
MCSubtargetInfo::getFeatureBits());
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
return AMDGPU::IsaInfo::getSGPREncodingGranule(
MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
return AMDGPU::IsaInfo::getAddressableNumSGPRs(
MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
Addressable);
return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@ -926,34 +937,39 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
return AMDGPU::IsaInfo::getVGPRAllocGranule(
MCSubtargetInfo::getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
return AMDGPU::IsaInfo::getVGPREncodingGranule(
MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
return AMDGPU::IsaInfo::getAddressableNumVGPRs(
MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@ -971,6 +987,127 @@ public:
const override;
};
class R600Subtarget final : public R600GenSubtargetInfo,
public AMDGPUCommonSubtarget {
public:
enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
private:
R600InstrInfo InstrInfo;
R600FrameLowering FrameLowering;
bool FMA;
bool CaymanISA;
bool CFALUBug;
bool DX10Clamp;
bool HasVertexCache;
bool R600ALUInst;
bool FP64;
short TexVTXClauseSize;
Generation Gen;
R600TargetLowering TLInfo;
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
AMDGPUAS AS;
public:
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const TargetMachine &TM);
const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const R600FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const R600TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
const R600RegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
// Nothing implemented, just prevent crashes on use.
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
Generation getGeneration() const {
return Gen;
}
unsigned getStackAlignment() const {
return 4;
}
R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
bool hasBFI() const {
return (getGeneration() >= EVERGREEN);
}
bool hasBCNT(unsigned Size) const {
if (Size == 32)
return (getGeneration() >= EVERGREEN);
return false;
}
bool hasBORROW() const {
return (getGeneration() >= EVERGREEN);
}
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
bool hasCaymanISA() const {
return CaymanISA;
}
bool hasFFBL() const {
return (getGeneration() >= EVERGREEN);
}
bool hasFFBH() const {
return (getGeneration() >= EVERGREEN);
}
bool hasFMA() const { return FMA; }
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
return 36;
}
bool hasCFAluBug() const { return CFALUBug; }
bool hasVertexCache() const { return HasVertexCache; }
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
AMDGPUAS getAMDGPUAS() const { return AS; }
bool enableMachineScheduler() const override {
return true;
}
bool enableSubRegLiveness() const override {
return true;
}
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H

View File

@ -34,7 +34,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
@ -49,12 +48,8 @@ public:
CodeGenOpt::Level OL);
~AMDGPUTargetMachine() override;
const AMDGPUSubtarget *getSubtargetImpl() const;
const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
const TargetSubtargetInfo *getSubtargetImpl() const;
const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
@ -103,6 +98,7 @@ public:
class GCNTargetMachine final : public AMDGPUTargetMachine {
private:
AMDGPUIntrinsicInfo IntrinsicInfo;
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
public:
@ -117,6 +113,10 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
bool useIPRA() const override {
return true;
}

View File

@ -102,7 +102,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned ThresholdPrivate = UnrollThresholdPrivate;
unsigned ThresholdLocal = UnrollThresholdLocal;
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
AMDGPUAS ASST = ST->getAMDGPUAS();
const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;

View File

@ -45,17 +45,12 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
friend BaseT;
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
Triple TargetTriple;
public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
TargetTriple(TM->getTargetTriple()) {}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@ -123,7 +118,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
@ -211,18 +206,18 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
friend BaseT;
const AMDGPUSubtarget *ST;
const R600Subtarget *ST;
const AMDGPUTargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
public:
explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
CommonTTI(TM, F) {}
const AMDGPUSubtarget *getST() const { return ST; }
const R600Subtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

View File

@ -432,19 +432,19 @@ void AMDGPUCFGStructurizer::reversePredicateSetter(
for (;; --I) {
if (I == MBB.end())
continue;
if (I->getOpcode() == AMDGPU::PRED_X) {
if (I->getOpcode() == R600::PRED_X) {
switch (I->getOperand(2).getImm()) {
case AMDGPU::PRED_SETE_INT:
I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
case R600::PRED_SETE_INT:
I->getOperand(2).setImm(R600::PRED_SETNE_INT);
return;
case AMDGPU::PRED_SETNE_INT:
I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
case R600::PRED_SETNE_INT:
I->getOperand(2).setImm(R600::PRED_SETE_INT);
return;
case AMDGPU::PRED_SETE:
I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
case R600::PRED_SETE:
I->getOperand(2).setImm(R600::PRED_SETNE);
return;
case AMDGPU::PRED_SETNE:
I->getOperand(2).setImm(AMDGPU::PRED_SETE);
case R600::PRED_SETNE:
I->getOperand(2).setImm(R600::PRED_SETE);
return;
default:
llvm_unreachable("PRED_X Opcode invalid!");
@ -513,10 +513,10 @@ void AMDGPUCFGStructurizer::insertCondBranchBefore(
int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
case R600::JUMP_COND:
case R600::JUMP: return R600::IF_PREDICATE_SET;
case R600::BRANCH_COND_i32:
case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
default: llvm_unreachable("internal error");
}
return -1;
@ -524,10 +524,10 @@ int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
case R600::JUMP_COND:
case R600::JUMP: return R600::IF_PREDICATE_SET;
case R600::BRANCH_COND_i32:
case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
default: llvm_unreachable("internal error");
}
return -1;
@ -535,8 +535,8 @@ int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
case R600::JUMP_COND:
case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
default: llvm_unreachable("internal error");
}
return -1;
@ -544,8 +544,8 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
case R600::JUMP_COND:
case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
default: llvm_unreachable("internal error");
}
return -1;
@ -573,9 +573,9 @@ AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
case AMDGPU::JUMP_COND:
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return true;
case R600::JUMP_COND:
case R600::BRANCH_COND_i32:
case R600::BRANCH_COND_f32: return true;
default:
return false;
}
@ -584,8 +584,8 @@ bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
case AMDGPU::JUMP:
case AMDGPU::BRANCH:
case R600::JUMP:
case R600::BRANCH:
return true;
default:
return false;
@ -634,7 +634,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
MachineBasicBlock::reverse_iterator It = MBB->rbegin();
if (It != MBB->rend()) {
MachineInstr *instr = &(*It);
if (instr->getOpcode() == AMDGPU::RETURN)
if (instr->getOpcode() == R600::RETURN)
return instr;
}
return nullptr;
@ -687,8 +687,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator It = Pre;
while (It != E) {
if (Pre->getOpcode() == AMDGPU::CONTINUE
&& It->getOpcode() == AMDGPU::ENDLOOP)
if (Pre->getOpcode() == R600::CONTINUE
&& It->getOpcode() == R600::ENDLOOP)
ContInstr.push_back(&*Pre);
Pre = It;
++It;
@ -1303,15 +1303,15 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
//insert R600::ENDIF to avoid special case "input landBlk == NULL"
MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
if (LandBlkHasOtherPred) {
report_fatal_error("Extra register needed to handle CFG");
unsigned CmpResReg =
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
report_fatal_error("Extra compare instruction needed to handle CFG");
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
CmpResReg, DebugLoc());
}
@ -1319,7 +1319,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// cause an assertion failure in the PostRA scheduling pass.
unsigned InitReg =
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
DebugLoc());
if (MigrateTrue) {
@ -1329,7 +1329,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// (initVal != 1).
report_fatal_error("Extra register needed to handle CFG");
}
insertInstrBefore(I, AMDGPU::ELSE);
insertInstrBefore(I, R600::ELSE);
if (MigrateFalse) {
migrateInstruction(FalseMBB, LandBlk, I);
@ -1341,7 +1341,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
if (LandBlkHasOtherPred) {
// add endif
insertInstrBefore(I, AMDGPU::ENDIF);
insertInstrBefore(I, R600::ENDIF);
// put initReg = 2 to other predecessors of landBlk
for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
@ -1414,7 +1414,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
}
if (FalseMBB) {
insertInstrBefore(I, AMDGPU::ELSE);
insertInstrBefore(I, R600::ELSE);
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
MBB->removeSuccessor(FalseMBB, true);
@ -1423,7 +1423,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
}
insertInstrBefore(I, AMDGPU::ENDIF);
insertInstrBefore(I, R600::ENDIF);
BranchMI->eraseFromParent();
@ -1436,8 +1436,8 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
<< " land = BB" << LandMBB->getNumber() << "\n";);
insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
@ -1453,9 +1453,9 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
MachineBasicBlock::iterator I = BranchMI;
if (TrueBranch != LandMBB)
reversePredicateSetter(I, *I->getParent());
insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
insertInstrBefore(I, AMDGPU::BREAK);
insertInstrBefore(I, AMDGPU::ENDIF);
insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
insertInstrBefore(I, R600::BREAK);
insertInstrBefore(I, R600::ENDIF);
//now branchInst can be erase safely
BranchMI->eraseFromParent();
//now take care of successors, retire blocks
@ -1484,8 +1484,8 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
getBranchZeroOpcode(OldOpcode);
insertCondBranchBefore(I, BranchOpcode, DL);
// insertEnd to ensure phi-moves, if exist, go before the continue-instr.
insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
insertInstrEnd(ContingMBB, R600::ENDIF, DL);
} else {
int BranchOpcode =
TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
@ -1500,7 +1500,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
// location we've just inserted that reference here so it should be
// representative insertEnd to ensure phi-moves, if exist, go before the
// continue-instr.
insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
insertInstrEnd(ContingMBB, R600::CONTINUE,
getLastDebugLocInBB(ContingMBB));
}
}
@ -1627,7 +1627,7 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
FuncRep->push_back(DummyExitBlk); //insert to function
insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
insertInstrEnd(DummyExitBlk, R600::RETURN);
for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
E = RetMBB.end(); It != E; ++It) {

View File

@ -4,7 +4,6 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM AMDGPUGenIntrinsicEnums.inc -gen-tgt-intrinsic-enums)
@ -19,6 +18,16 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
set(LLVM_TARGET_DEFINITIONS R600.td)
tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM R600GenCallingConv.inc -gen-callingconv)
tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info)
tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(AMDGPUCodeGen

View File

@ -20,6 +20,7 @@
#include "Disassembler/AMDGPUDisassembler.h"
#include "AMDGPU.h"
#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"

View File

@ -14,14 +14,13 @@
//===----------------------------------------------------------------------===//
def isEG : Predicate<
"Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
"Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && "
"Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
"!Subtarget->hasCaymanISA()"
>;
def isEGorCayman : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
"Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
"Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
>;
class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {

View File

@ -510,11 +510,6 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
static_cast<R600InstPrinter*>(this)->printOperand(MI, OpNo, O);
return;
}
if (OpNo >= MI->getNumOperands()) {
O << "/*Missing OP" << OpNo << "*/";
return;
@ -965,11 +960,6 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
static_cast<R600InstPrinter*>(this)->printMemOperand(MI, OpNo, O);
return;
}
printOperand(MI, OpNo, STI, O);
O << ", ";
printOperand(MI, OpNo + 1, STI, O);
@ -995,16 +985,6 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
O << Asm;
}
void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printAbs(MI, OpNo, O);
}
void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printClamp(MI, OpNo, O);
}
void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@ -1031,70 +1011,6 @@ void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
O << " div:2";
}
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printLiteral(MI, OpNo, O);
}
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printLast(MI, OpNo, O);
}
void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printNeg(MI, OpNo, O);
}
void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printOMOD(MI, OpNo, O);
}
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printRel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printUpdateExecMask(MI, OpNo, O);
}
void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printUpdatePred(MI, OpNo, O);
}
void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printWrite(MI, OpNo, O);
}
void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printBankSwizzle(MI, OpNo, O);
}
void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printRSel(MI, OpNo, O);
}
void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printCT(MI, OpNo, O);
}
void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
static_cast<R600InstPrinter*>(this)->printKCache(MI, OpNo, O);
}
void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@ -1299,6 +1215,13 @@ void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
#include "AMDGPUGenAsmWriter.inc"
void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot, const MCSubtargetInfo &STI) {
O.flush();
printInstruction(MI, O);
printAnnotation(O, Annot);
}
void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
@ -1417,7 +1340,7 @@ void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
switch (Op.getReg()) {
// This is the default predicate state, so we don't need to print it.
case AMDGPU::PRED_SEL_OFF:
case R600::PRED_SEL_OFF:
break;
default:
@ -1493,3 +1416,5 @@ void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
O << " (MASKED)";
}
}
#include "R600GenAsmWriter.inc"

View File

@ -218,13 +218,16 @@ protected:
raw_ostream &O);
};
// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and
// MCTargetDesc should be using R600InstPrinter for the R600 target.
class R600InstPrinter : public AMDGPUInstPrinter {
class R600InstPrinter : public MCInstPrinter {
public:
R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
: AMDGPUInstPrinter(MAI, MII, MRI) {}
: MCInstPrinter(MAI, MII, MRI) {}
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -38,9 +38,17 @@ using namespace llvm;
#define GET_SUBTARGETINFO_MC_DESC
#include "AMDGPUGenSubtargetInfo.inc"
#define NoSchedModel NoSchedModelR600
#define GET_SUBTARGETINFO_MC_DESC
#include "R600GenSubtargetInfo.inc"
#undef NoSchedModelR600
#define GET_REGINFO_MC_DESC
#include "AMDGPUGenRegisterInfo.inc"
#define GET_REGINFO_MC_DESC
#include "R600GenRegisterInfo.inc"
static MCInstrInfo *createAMDGPUMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitAMDGPUMCInstrInfo(X);
@ -49,12 +57,17 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() {
static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitAMDGPUMCRegisterInfo(X, 0);
if (TT.getArch() == Triple::r600)
InitR600MCRegisterInfo(X, 0);
else
InitAMDGPUMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
if (TT.getArch() == Triple::r600)
return createR600MCSubtargetInfoImpl(TT, CPU, FS);
return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
}
@ -63,8 +76,10 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI) {
return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) :
new AMDGPUInstPrinter(MAI, MII, MRI);
if (T.getArch() == Triple::r600)
return new R600InstPrinter(MAI, MII, MRI);
else
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
@ -90,10 +105,12 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
}
extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);

View File

@ -40,6 +40,7 @@ Target &getTheGCNTarget();
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
MCInstrInfo *createR600MCInstrInfo();
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@ -59,6 +60,10 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#include "AMDGPUGenRegisterInfo.inc"
#undef GET_REGINFO_ENUM
#define GET_REGINFO_ENUM
#include "R600GenRegisterInfo.inc"
#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
@ -67,9 +72,20 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#undef GET_INSTRINFO_OPERAND_ENUM
#undef GET_INSTRINFO_ENUM
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
#include "R600GenInstrInfo.inc"
#undef GET_INSTRINFO_SCHED_ENUM
#undef GET_INSTRINFO_OPERAND_ENUM
#undef GET_INSTRINFO_ENUM
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_ENUM
#include "R600GenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
#endif

View File

@ -8,5 +8,6 @@ add_llvm_library(LLVMAMDGPUDesc
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
R600MCTargetDesc.cpp
SIMCCodeEmitter.cpp
)

View File

@ -15,7 +15,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
@ -36,30 +35,40 @@ using namespace llvm;
namespace {
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
class R600MCCodeEmitter : public MCCodeEmitter {
const MCRegisterInfo &MRI;
const MCInstrInfo &MCII;
public:
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
: AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
: MRI(mri), MCII(mcii) {}
R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
/// Encode the instruction and write it to the OS.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
const MCSubtargetInfo &STI) const;
/// \returns the encoding for an MCOperand.
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
const MCSubtargetInfo &STI) const;
private:
void Emit(uint32_t value, raw_ostream &OS) const;
void Emit(uint64_t value, raw_ostream &OS) const;
unsigned getHWReg(unsigned regNo) const;
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
} // end anonymous namespace
@ -94,16 +103,16 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
computeAvailableFeatures(STI.getFeatureBits()));
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
if (MI.getOpcode() == R600::RETURN ||
MI.getOpcode() == R600::FETCH_CLAUSE ||
MI.getOpcode() == R600::ALU_CLAUSE ||
MI.getOpcode() == R600::BUNDLE ||
MI.getOpcode() == R600::KILL) {
return;
} else if (IS_VTX(Desc)) {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
InstWord2 |= 1 << 19; // Mega-Fetch bit
}
@ -136,7 +145,7 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
Emit((uint32_t) 0, OS);
} else {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
((Desc.TSFlags & R600_InstFlag::OP1) ||
Desc.TSFlags & R600_InstFlag::OP2)) {
uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
@ -186,4 +195,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
}
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AMDGPUGenMCCodeEmitter.inc"
#include "R600GenMCCodeEmitter.inc"

View File

@ -0,0 +1,27 @@
//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
/// \brief This file provides R600 specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInstrInfo.h"
using namespace llvm;
#define GET_INSTRINFO_MC_DESC
#include "R600GenInstrInfo.inc"
MCInstrInfo *llvm::createR600MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitR600MCInstrInfo(X);
return X;
}

View File

@ -438,3 +438,6 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
llvm_unreachable("Encoding of this operand type is not supported yet.");
return 0;
}
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AMDGPUGenMCCodeEmitter.inc"

View File

@ -0,0 +1,59 @@
//===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
def R600InstrInfo : InstrInfo {
let guessInstructionProperties = 1;
let noNamedPositionallyEncodedOperands = 1;
}
def R600 : Target {
let InstructionSet = R600InstrInfo;
let AllowRegisterRenaming = 1;
}
let Namespace = "R600" in {
foreach Index = 0-15 in {
def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
}
include "R600RegisterInfo.td"
}
def NullALU : InstrItinClass;
def ALU_NULL : FuncUnit;
include "AMDGPUFeatures.td"
include "R600Schedule.td"
include "R600Processors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUInstructions.td"
include "R600Instructions.td"
include "R700Instructions.td"
include "EvergreenInstructions.td"
include "CaymanInstructions.td"
// Calling convention for R600
def CC_R600 : CallingConv<[
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
T30_XYZW, T31_XYZW, T32_XYZW
]>>>
]>;
// Calling convention for compute kernels
def CC_R600_Kernel : CallingConv<[
CCCustom<"allocateKernArg">
]>;

View File

@ -51,7 +51,7 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
if (MI.getOpcode() == AMDGPU::KILLGT)
if (MI.getOpcode() == R600::KILLGT)
killPixel = true;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {

View File

@ -34,8 +34,8 @@ namespace {
static bool isCFAlu(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::CF_ALU:
case AMDGPU::CF_ALU_PUSH_BEFORE:
case R600::CF_ALU:
case R600::CF_ALU_PUSH_BEFORE:
return true;
default:
return false;
@ -85,20 +85,20 @@ char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
assert(isCFAlu(MI));
return MI
.getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
.getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
.getImm();
}
bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
assert(isCFAlu(MI));
return MI
.getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
.getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
.getImm();
}
void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
MachineInstr &CFAlu) const {
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
I++;
do {
@ -117,7 +117,7 @@ void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
const MachineInstr &LatrCFAlu) const {
assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
unsigned RootInstCount = getCFAluSize(RootCFAlu),
LaterInstCount = getCFAluSize(LatrCFAlu);
unsigned CumuledInsts = RootInstCount + LaterInstCount;
@ -125,15 +125,15 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
LLVM_DEBUG(dbgs() << "Excess inst counts\n");
return false;
}
if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
return false;
// Is KCache Bank 0 compatible ?
int Mode0Idx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
int KBank0Idx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
int KBank0LineIdx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
RootCFAlu.getOperand(Mode0Idx).getImm() &&
(LatrCFAlu.getOperand(KBank0Idx).getImm() !=
@ -145,11 +145,11 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
}
// Is KCache Bank 1 compatible ?
int Mode1Idx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
int KBank1Idx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
int KBank1LineIdx =
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
RootCFAlu.getOperand(Mode1Idx).getImm() &&
(LatrCFAlu.getOperand(KBank1Idx).getImm() !=

View File

@ -94,7 +94,7 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
}
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
getLoopDepth() > 1)
return true;
@ -103,10 +103,10 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
switch(Opcode) {
default: return false;
case AMDGPU::CF_ALU_PUSH_BEFORE:
case AMDGPU::CF_ALU_ELSE_AFTER:
case AMDGPU::CF_ALU_BREAK:
case AMDGPU::CF_ALU_CONTINUE:
case R600::CF_ALU_PUSH_BEFORE:
case R600::CF_ALU_ELSE_AFTER:
case R600::CF_ALU_BREAK:
case R600::CF_ALU_CONTINUE:
if (CurrentSubEntries == 0)
return false;
if (ST->getWavefrontSize() == 64) {
@ -168,8 +168,8 @@ void CFStack::updateMaxStackSize() {
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
CFStack::StackItem Item = CFStack::ENTRY;
switch(Opcode) {
case AMDGPU::CF_PUSH_EG:
case AMDGPU::CF_ALU_PUSH_BEFORE:
case R600::CF_PUSH_EG:
case R600::CF_ALU_PUSH_BEFORE:
if (!isWQM) {
if (!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
@ -240,8 +240,8 @@ private:
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
case R600::KILL:
case R600::RETURN:
return true;
default:
return false;
@ -253,41 +253,41 @@ private:
bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
break;
case CF_VC:
Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
break;
case CF_CALL_FS:
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
break;
case CF_WHILE_LOOP:
Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
break;
case CF_END_LOOP:
Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
break;
case CF_LOOP_BREAK:
Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
break;
case CF_LOOP_CONTINUE:
Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
break;
case CF_JUMP:
Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
break;
case CF_ELSE:
Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
break;
case CF_POP:
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
Opcode = isEg ? R600::POP_EG : R600::POP_R600;
break;
case CF_END:
if (ST->hasCaymanISA()) {
Opcode = AMDGPU::CF_END_CM;
Opcode = R600::CF_END_CM;
break;
}
Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
break;
}
assert (Opcode && "No opcode selected");
@ -305,21 +305,21 @@ private:
continue;
if (MO.isDef()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
if (R600::R600_Reg128RegClass.contains(Reg))
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
&R600::R600_Reg128RegClass);
}
if (MO.isUse()) {
unsigned Reg = MO.getReg();
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
if (R600::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
&AMDGPU::R600_Reg128RegClass);
&R600::R600_Reg128RegClass);
}
}
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
@ -359,15 +359,15 @@ private:
void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
static const unsigned LiteralRegs[] = {
AMDGPU::ALU_LITERAL_X,
AMDGPU::ALU_LITERAL_Y,
AMDGPU::ALU_LITERAL_Z,
AMDGPU::ALU_LITERAL_W
R600::ALU_LITERAL_X,
R600::ALU_LITERAL_Y,
R600::ALU_LITERAL_Z,
R600::ALU_LITERAL_W
};
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
TII->getSrcs(MI);
for (const auto &Src:Srcs) {
if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
if (Src.first->getReg() != R600::ALU_LITERAL_X)
continue;
int64_t Imm = Src.second;
std::vector<MachineOperand *>::iterator It =
@ -377,7 +377,7 @@ private:
// Get corresponding Operand
MachineOperand &Operand = MI.getOperand(
TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
if (It != Lits.end()) {
// Reuse existing literal reg
@ -400,7 +400,7 @@ private:
unsigned LiteralPair0 = Literals[i];
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
TII->get(AMDGPU::LITERALS))
TII->get(R600::LITERALS))
.addImm(LiteralPair0)
.addImm(LiteralPair1);
}
@ -442,7 +442,7 @@ private:
}
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
TII->get(AMDGPU::LITERALS));
TII->get(R600::LITERALS));
if (Literals[i]->isImm()) {
MILit.addImm(Literals[i]->getImm());
} else {
@ -471,7 +471,7 @@ private:
unsigned &CfCount) {
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
@ -483,7 +483,7 @@ private:
Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
@ -540,34 +540,34 @@ public:
}
MachineBasicBlock::iterator MI = I;
if (MI->getOpcode() != AMDGPU::ENDIF)
if (MI->getOpcode() != R600::ENDIF)
LastAlu.back() = nullptr;
if (MI->getOpcode() == AMDGPU::CF_ALU)
if (MI->getOpcode() == R600::CF_ALU)
LastAlu.back() = &*MI;
I++;
bool RequiresWorkAround =
CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
case R600::CF_ALU_PUSH_BEFORE:
if (RequiresWorkAround) {
LLVM_DEBUG(dbgs()
<< "Applying bug work-around for ALU_PUSH_BEFORE\n");
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
MI->setDesc(TII->get(AMDGPU::CF_ALU));
MI->setDesc(TII->get(R600::CF_ALU));
CfCount++;
CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
CFStack.pushBranch(R600::CF_PUSH_EG);
} else
CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
LLVM_FALLTHROUGH;
case AMDGPU::CF_ALU:
case R600::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
case R600::WHILELOOP: {
CFStack.pushLoop();
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
@ -580,7 +580,7 @@ public:
CfCount++;
break;
}
case AMDGPU::ENDLOOP: {
case R600::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *>> Pair =
std::move(LoopStack.back());
@ -592,7 +592,7 @@ public:
CfCount++;
break;
}
case AMDGPU::IF_PREDICATE_SET: {
case R600::IF_PREDICATE_SET: {
LastAlu.push_back(nullptr);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
@ -604,7 +604,7 @@ public:
CfCount++;
break;
}
case AMDGPU::ELSE: {
case R600::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(*JumpInst, CfCount);
@ -618,7 +618,7 @@ public:
CfCount++;
break;
}
case AMDGPU::ENDIF: {
case R600::ENDIF: {
CFStack.popBranch();
if (LastAlu.back()) {
ToPopAfter.push_back(LastAlu.back());
@ -640,7 +640,7 @@ public:
MI->eraseFromParent();
break;
}
case AMDGPU::BREAK: {
case R600::BREAK: {
CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
@ -649,7 +649,7 @@ public:
MI->eraseFromParent();
break;
}
case AMDGPU::CONTINUE: {
case R600::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
@ -658,12 +658,12 @@ public:
CfCount++;
break;
}
case AMDGPU::RETURN: {
case R600::RETURN: {
DebugLoc DL = MBB.findDebugLoc(MI);
BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
CfCount++;
if (CfCount % 2) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
BuildMI(MBB, I, DL, TII->get(R600::PAD));
CfCount++;
}
MI->eraseFromParent();
@ -684,7 +684,7 @@ public:
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
MachineInstr *Alu = ToPopAfter[i];
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
TII->get(AMDGPU::CF_ALU_POP_AFTER))
TII->get(R600::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
.addImm(Alu->getOperand(1).getImm())
.addImm(Alu->getOperand(2).getImm())

View File

@ -52,12 +52,12 @@ private:
unsigned OccupiedDwords(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::DOT_4:
case R600::INTERP_PAIR_XY:
case R600::INTERP_PAIR_ZW:
case R600::INTERP_VEC_LOAD:
case R600::DOT_4:
return 4;
case AMDGPU::KILL:
case R600::KILL:
return 0;
default:
break;
@ -77,7 +77,7 @@ private:
E = MI.operands_end();
It != E; ++It) {
MachineOperand &MO = *It;
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
++NumLiteral;
}
return 1 + NumLiteral;
@ -89,12 +89,12 @@ private:
if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
return true;
switch (MI.getOpcode()) {
case AMDGPU::PRED_X:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::COPY:
case AMDGPU::DOT_4:
case R600::PRED_X:
case R600::INTERP_PAIR_XY:
case R600::INTERP_PAIR_ZW:
case R600::INTERP_VEC_LOAD:
case R600::COPY:
case R600::DOT_4:
return true;
default:
return false;
@ -103,9 +103,9 @@ private:
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
case AMDGPU::IMPLICIT_DEF:
case R600::KILL:
case R600::RETURN:
case R600::IMPLICIT_DEF:
return true;
default:
return false;
@ -132,16 +132,16 @@ private:
bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned>> UsedKCache;
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
return true;
const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
TII->getSrcs(MI);
assert(
(TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
(TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
"Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
if (Consts[i].first->getReg() != R600::ALU_CONST)
continue;
unsigned Sel = Consts[i].second;
unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
@ -172,16 +172,16 @@ private:
return true;
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
if (Consts[i].first->getReg() != R600::ALU_CONST)
continue;
switch(UsedKCache[j].first) {
case 0:
Consts[i].first->setReg(
AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
break;
case 1:
Consts[i].first->setReg(
AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
break;
default:
llvm_unreachable("Wrong Cache Line");
@ -253,7 +253,7 @@ private:
break;
if (AluInstCount > TII->getMaxAlusPerClause())
break;
if (I->getOpcode() == AMDGPU::PRED_X) {
if (I->getOpcode() == R600::PRED_X) {
// We put PRED_X in its own clause to ensure that ifcvt won't create
// clauses with more than 128 insts.
// IfCvt is indeed checking that "then" and "else" branches of an if
@ -289,7 +289,7 @@ private:
AluInstCount += OccupiedDwords(*I);
}
unsigned Opcode = PushBeforeModifier ?
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
// We don't use the ADDR field until R600ControlFlowFinalizer pass, where
// it is safe to assume it is 0. However if we always put 0 here, the ifcvt
@ -322,7 +322,7 @@ public:
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
MachineBasicBlock::iterator I = MBB.begin();
if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
continue; // BB was already parsed
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
if (isALU(*I)) {

View File

@ -96,16 +96,16 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// Expand LDS_*_RET instructions
if (TII->isLDSRetInstr(MI.getOpcode())) {
int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
assert(DstIdx != -1);
MachineOperand &DstOp = MI.getOperand(DstIdx);
MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
DstOp.getReg(), AMDGPU::OQAP);
DstOp.setReg(AMDGPU::OQAP);
DstOp.getReg(), R600::OQAP);
DstOp.setReg(R600::OQAP);
int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
AMDGPU::OpName::pred_sel);
R600::OpName::pred_sel);
int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
AMDGPU::OpName::pred_sel);
R600::OpName::pred_sel);
// Copy the pred_sel bit
Mov->getOperand(MovPredSelIdx).setReg(
MI.getOperand(LDSPredSelIdx).getReg());
@ -114,7 +114,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default: break;
// Expand PRED_X to one of the PRED_SET instructions.
case AMDGPU::PRED_X: {
case R600::PRED_X: {
uint64_t Flags = MI.getOperand(3).getImm();
// The native opcode used by PRED_X is stored as an immediate in the
// third operand.
@ -122,17 +122,18 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MI.getOperand(2).getImm(), // opcode
MI.getOperand(0).getReg(), // dst
MI.getOperand(1).getReg(), // src0
AMDGPU::ZERO); // src1
R600::ZERO); // src1
TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
if (Flags & MO_FLAG_PUSH) {
TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
} else {
TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
}
MI.eraseFromParent();
continue;
}
case AMDGPU::DOT_4: {
case R600::DOT_4: {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
unsigned DstReg = MI.getOperand(0).getReg();
@ -141,7 +142,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
for (unsigned Chan = 0; Chan < 4; ++Chan) {
bool Mask = (Chan != TRI.getHWRegChan(DstReg));
unsigned SubDstReg =
AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
MachineInstr *BMI =
TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
if (Chan > 0) {
@ -156,10 +157,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// While not strictly necessary from hw point of view, we force
// all src operands of a dot4 inst to belong to the same slot.
unsigned Src0 = BMI->getOperand(
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
TII->getOperandIdx(Opcode, R600::OpName::src0))
.getReg();
unsigned Src1 = BMI->getOperand(
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
TII->getOperandIdx(Opcode, R600::OpName::src1))
.getReg();
(void) Src0;
(void) Src1;
@ -206,14 +207,14 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(
TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
unsigned Src0 = MI.getOperand(
TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
unsigned Src1 = 0;
// Determine the correct source registers
if (!IsCube) {
int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
if (Src1Idx != -1) {
Src1 = MI.getOperand(Src1Idx).getReg();
}
@ -241,7 +242,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// the current Channel.
Mask = (Chan != TRI.getHWRegChan(DstReg));
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
// Set the IsLast bit
@ -250,11 +251,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
// Add the new instruction
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
case AMDGPU::CUBE_r600_pseudo:
Opcode = AMDGPU::CUBE_r600_real;
case R600::CUBE_r600_pseudo:
Opcode = R600::CUBE_r600_real;
break;
case AMDGPU::CUBE_eg_pseudo:
Opcode = AMDGPU::CUBE_eg_real;
case R600::CUBE_eg_pseudo:
Opcode = R600::CUBE_eg_real;
break;
default:
break;
@ -271,12 +272,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
if (NotLast) {
TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
}
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
}
MI.eraseFromParent();
}

View File

@ -14,7 +14,6 @@
#include "R600ISelLowering.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600FrameLowering.h"
@ -51,17 +50,31 @@
using namespace llvm;
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
MachineFunction &MF = State.getMachineFunction();
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
#include "R600GenCallingConv.inc"
R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
const R600Subtarget &STI)
: AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
: AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
computeRegisterProperties(STI.getRegisterInfo());
computeRegisterProperties(Subtarget->getRegisterInfo());
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
@ -148,6 +161,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
setOperationAction(ISD::FRINT, MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@ -216,6 +234,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
}
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
// need it for R600.
if (!Subtarget->hasFP32Denormals())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
}
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
if (Subtarget->hasFFBH())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
if (Subtarget->hasFFBL())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
// need it for R600.
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@ -246,14 +292,10 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::LOAD);
}
const R600Subtarget *R600TargetLowering::getSubtarget() const {
return static_cast<const R600Subtarget *>(Subtarget);
}
static inline bool isEOP(MachineBasicBlock::iterator I) {
if (std::next(I) == I->getParent()->end())
return false;
return std::next(I)->getOpcode() == AMDGPU::RETURN;
return std::next(I)->getOpcode() == R600::RETURN;
}
MachineBasicBlock *
@ -262,24 +304,24 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = MI;
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
const R600InstrInfo *TII = Subtarget->getInstrInfo();
switch (MI.getOpcode()) {
default:
// Replace LDS_*_RET instruction that don't have any uses with the
// equivalent LDS_*_NORET instruction.
if (TII->isLDSRetInstr(MI.getOpcode())) {
int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
assert(DstIdx != -1);
MachineInstrBuilder NewMI;
// FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
// LDS_1A2D support and remove this special case.
if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
MI.getOpcode() == R600::LDS_CMPST_RET)
return BB;
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
NewMI.add(MI.getOperand(i));
}
@ -288,23 +330,23 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
break;
case AMDGPU::FABS_R600: {
case R600::FABS_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
break;
}
case AMDGPU::FNEG_R600: {
case R600::FNEG_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
break;
}
case AMDGPU::MASK_WRITE: {
case R600::MASK_WRITE: {
unsigned maskedRegister = MI.getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
@ -312,7 +354,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
break;
}
case AMDGPU::MOV_IMM_F32:
case R600::MOV_IMM_F32:
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
.getFPImm()
->getValueAPF()
@ -320,39 +362,39 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.getZExtValue());
break;
case AMDGPU::MOV_IMM_I32:
case R600::MOV_IMM_I32:
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
MI.getOperand(1).getImm());
break;
case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
case R600::MOV_IMM_GLOBAL_ADDR: {
//TODO: Perhaps combine this instruction with the next if possible
auto MIB = TII->buildDefaultInstruction(
*BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
//TODO: Ugh this is rather ugly
MIB->getOperand(Idx) = MI.getOperand(1);
break;
}
case AMDGPU::CONST_COPY: {
case R600::CONST_COPY: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
*BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
MI.getOperand(1).getImm());
break;
}
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
case R600::RAT_WRITE_CACHELESS_32_eg:
case R600::RAT_WRITE_CACHELESS_64_eg:
case R600::RAT_WRITE_CACHELESS_128_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::RAT_STORE_TYPED_eg:
case R600::RAT_STORE_TYPED_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@ -360,49 +402,49 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
case R600::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
.add(MI.getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
case R600::BRANCH_COND_f32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
R600::PREDICATE_BIT)
.add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE)
.addImm(R600::PRED_SETNE)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
.add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
.addReg(R600::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::BRANCH_COND_i32: {
case R600::BRANCH_COND_i32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
R600::PREDICATE_BIT)
.add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE_INT)
.addImm(R600::PRED_SETNE_INT)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
.add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
.addReg(R600::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
case R600::EG_ExportSwz:
case R600::R600_ExportSwz: {
// Instruction is left unmodified if its not the last one of its type
bool isLastInstructionOfItsType = true;
unsigned InstExportType = MI.getOperand(1).getImm();
for (MachineBasicBlock::iterator NextExportInst = std::next(I),
EndBlock = BB->end(); NextExportInst != EndBlock;
NextExportInst = std::next(NextExportInst)) {
if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
NextExportInst->getOpcode() == R600::R600_ExportSwz) {
unsigned CurrentInstExportType = NextExportInst->getOperand(1)
.getImm();
if (CurrentInstExportType == InstExportType) {
@ -414,7 +456,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool EOP = isEOP(I);
if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@ -427,7 +469,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addImm(EOP);
break;
}
case AMDGPU::RETURN: {
case R600::RETURN: {
return BB;
}
}
@ -583,23 +625,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return LowerImplicitParameter(DAG, VT, DL, 8);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_X, VT);
case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Y, VT);
case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T1_Z, VT);
case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_X, VT);
case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Y, VT);
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
R600::T0_Z, VT);
case Intrinsic::r600_recipsqrt_ieee:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
@ -1521,7 +1563,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
const R600FrameLowering *TFL = Subtarget->getFrameLowering();
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
@ -1533,6 +1575,28 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
Op.getValueType());
}
CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
return CC_R600_Kernel;
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_ES:
case CallingConv::AMDGPU_LS:
return CC_R600;
default:
report_fatal_error("Unsupported calling convention.");
}
}
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
@ -1563,7 +1627,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
if (AMDGPU::isShader(CallConv)) {
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
continue;
@ -1594,7 +1658,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
@ -1981,26 +2045,26 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
SDValue &Src, SDValue &Neg, SDValue &Abs,
SDValue &Sel, SDValue &Imm,
SelectionDAG &DAG) const {
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
const R600InstrInfo *TII = Subtarget->getInstrInfo();
if (!Src.isMachineOpcode())
return false;
switch (Src.getMachineOpcode()) {
case AMDGPU::FNEG_R600:
case R600::FNEG_R600:
if (!Neg.getNode())
return false;
Src = Src.getOperand(0);
Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
case AMDGPU::FABS_R600:
case R600::FABS_R600:
if (!Abs.getNode())
return false;
Src = Src.getOperand(0);
Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
case AMDGPU::CONST_COPY: {
case R600::CONST_COPY: {
unsigned Opcode = ParentNode->getMachineOpcode();
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
if (!Sel.getNode())
return false;
@ -2011,17 +2075,17 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
// Gather constants values
int SrcIndices[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
TII->getOperandIdx(Opcode, R600::OpName::src0),
TII->getOperandIdx(Opcode, R600::OpName::src1),
TII->getOperandIdx(Opcode, R600::OpName::src2),
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
};
std::vector<unsigned> Consts;
for (int OtherSrcIdx : SrcIndices) {
@ -2034,7 +2098,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
}
if (RegisterSDNode *Reg =
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
if (Reg->getReg() == R600::ALU_CONST) {
ConstantSDNode *Cst
= cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Consts.push_back(Cst->getZExtValue());
@ -2049,30 +2113,30 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
}
Sel = CstOffset;
Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
return true;
}
case AMDGPU::MOV_IMM_GLOBAL_ADDR:
case R600::MOV_IMM_GLOBAL_ADDR:
// Check if the Imm slot is used. Taken from below.
if (cast<ConstantSDNode>(Imm)->getZExtValue())
return false;
Imm = Src.getOperand(0);
Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
return true;
case AMDGPU::MOV_IMM_I32:
case AMDGPU::MOV_IMM_F32: {
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
case R600::MOV_IMM_I32:
case R600::MOV_IMM_F32: {
unsigned ImmReg = R600::ALU_LITERAL_X;
uint64_t ImmValue = 0;
if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
float FloatValue = FPC->getValueAPF().convertToFloat();
if (FloatValue == 0.0) {
ImmReg = AMDGPU::ZERO;
ImmReg = R600::ZERO;
} else if (FloatValue == 0.5) {
ImmReg = AMDGPU::HALF;
ImmReg = R600::HALF;
} else if (FloatValue == 1.0) {
ImmReg = AMDGPU::ONE;
ImmReg = R600::ONE;
} else {
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
}
@ -2080,9 +2144,9 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
uint64_t Value = C->getZExtValue();
if (Value == 0) {
ImmReg = AMDGPU::ZERO;
ImmReg = R600::ZERO;
} else if (Value == 1) {
ImmReg = AMDGPU::ONE_INT;
ImmReg = R600::ONE_INT;
} else {
ImmValue = Value;
}
@ -2091,7 +2155,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
if (ImmReg == R600::ALU_LITERAL_X) {
if (!Imm.getNode())
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
@ -2111,7 +2175,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
/// Fold the instructions after selecting them
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
const R600InstrInfo *TII = Subtarget->getInstrInfo();
if (!Node->isMachineOpcode())
return Node;
@ -2120,36 +2184,36 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
if (Opcode == AMDGPU::DOT_4) {
if (Opcode == R600::DOT_4) {
int OperandIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
};
int NegIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
};
int AbsIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
};
for (unsigned i = 0; i < 8; i++) {
if (OperandIdx[i] < 0)
@ -2157,7 +2221,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue &Src = Ops[OperandIdx[i] - 1];
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue &Abs = Ops[AbsIdx[i] - 1];
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
if (HasDst)
SelIdx--;
@ -2165,7 +2229,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
} else if (Opcode == AMDGPU::REG_SEQUENCE) {
} else if (Opcode == R600::REG_SEQUENCE) {
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
SDValue &Src = Ops[i];
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
@ -2175,18 +2239,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
if (!TII->hasInstrModifiers(Opcode))
return Node;
int OperandIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
TII->getOperandIdx(Opcode, R600::OpName::src0),
TII->getOperandIdx(Opcode, R600::OpName::src1),
TII->getOperandIdx(Opcode, R600::OpName::src2)
};
int NegIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
};
int AbsIdx[] = {
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
-1
};
for (unsigned i = 0; i < 3; i++) {
@ -2196,9 +2260,9 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue FakeAbs;
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
if (HasDst) {
SelIdx--;
ImmIdx--;

View File

@ -23,6 +23,8 @@ class R600InstrInfo;
class R600Subtarget;
class R600TargetLowering final : public AMDGPUTargetLowering {
const R600Subtarget *Subtarget;
public:
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
@ -36,6 +38,7 @@ public:
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,

View File

@ -41,7 +41,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
bit LDS_1A2D = 0;
let SubtargetPredicate = isR600toCayman;
let Namespace = "AMDGPU";
let Namespace = "R600";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;

View File

@ -45,10 +45,15 @@
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
#include "R600GenDFAPacketizer.inc"
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#define GET_INSTRINFO_NAMED_OPS
#include "R600GenInstrInfo.inc"
R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
: AMDGPUInstrInfo(ST), RI(), ST(ST) {}
: R600GenInstrInfo(-1, -1), RI(), ST(ST) {}
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
@ -59,31 +64,31 @@ void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc) const {
unsigned VectorComponents = 0;
if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
(AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
if ((R600::R600_Reg128RegClass.contains(DestReg) ||
R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
(R600::R600_Reg128RegClass.contains(SrcReg) ||
R600::R600_Reg128VerticalRegClass.contains(SrcReg))) {
VectorComponents = 4;
} else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
(AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
} else if((R600::R600_Reg64RegClass.contains(DestReg) ||
R600::R600_Reg64VerticalRegClass.contains(DestReg)) &&
(R600::R600_Reg64RegClass.contains(SrcReg) ||
R600::R600_Reg64VerticalRegClass.contains(SrcReg))) {
VectorComponents = 2;
}
if (VectorComponents > 0) {
for (unsigned I = 0; I < VectorComponents; I++) {
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
buildDefaultInstruction(MBB, MI, R600::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))
.addReg(DestReg,
RegState::Define | RegState::Implicit);
}
} else {
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV,
DestReg, SrcReg);
NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0))
.setIsKill(KillSrc);
}
}
@ -104,9 +109,9 @@ bool R600InstrInfo::isMov(unsigned Opcode) const {
switch(Opcode) {
default:
return false;
case AMDGPU::MOV:
case AMDGPU::MOV_IMM_F32:
case AMDGPU::MOV_IMM_I32:
case R600::MOV:
case R600::MOV_IMM_F32:
case R600::MOV_IMM_I32:
return true;
}
}
@ -118,10 +123,10 @@ bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
case AMDGPU::CUBE_r600_pseudo:
case AMDGPU::CUBE_r600_real:
case AMDGPU::CUBE_eg_pseudo:
case AMDGPU::CUBE_eg_real:
case R600::CUBE_r600_pseudo:
case R600::CUBE_r600_real:
case R600::CUBE_eg_pseudo:
case R600::CUBE_eg_real:
return true;
}
}
@ -149,7 +154,7 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
}
bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1;
}
bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
@ -158,12 +163,12 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
if (isVector(MI) || isCubeOp(MI.getOpcode()))
return true;
switch (MI.getOpcode()) {
case AMDGPU::PRED_X:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::COPY:
case AMDGPU::DOT_4:
case R600::PRED_X:
case R600::INTERP_PAIR_XY:
case R600::INTERP_PAIR_ZW:
case R600::INTERP_VEC_LOAD:
case R600::COPY:
case R600::DOT_4:
return true;
default:
return false;
@ -173,7 +178,7 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
if (ST.hasCaymanISA())
return false;
return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
return (get(Opcode).getSchedClass() == R600::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
@ -181,7 +186,7 @@ bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
}
bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
return (get(Opcode).getSchedClass() == R600::Sched::VecALU);
}
bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
@ -215,8 +220,8 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
switch (Opcode) {
case AMDGPU::KILLGT:
case AMDGPU::GROUP_BARRIER:
case R600::KILLGT:
case R600::GROUP_BARRIER:
return true;
default:
return false;
@ -224,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@ -242,7 +247,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
TargetRegisterInfo::isVirtualRegister(I->getReg()))
continue;
if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
return true;
}
return false;
@ -250,17 +255,17 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
static const unsigned SrcSelTable[][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
{R600::OpName::src0, R600::OpName::src0_sel},
{R600::OpName::src1, R600::OpName::src1_sel},
{R600::OpName::src2, R600::OpName::src2_sel},
{R600::OpName::src0_X, R600::OpName::src0_sel_X},
{R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
{R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
{R600::OpName::src0_W, R600::OpName::src0_sel_W},
{R600::OpName::src1_X, R600::OpName::src1_sel_X},
{R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
{R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
{R600::OpName::src1_W, R600::OpName::src1_sel_W}
};
for (const auto &Row : SrcSelTable) {
@ -275,23 +280,23 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr &MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
if (MI.getOpcode() == AMDGPU::DOT_4) {
if (MI.getOpcode() == R600::DOT_4) {
static const unsigned OpTable[8][2] = {
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
{R600::OpName::src0_X, R600::OpName::src0_sel_X},
{R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
{R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
{R600::OpName::src0_W, R600::OpName::src0_sel_W},
{R600::OpName::src1_X, R600::OpName::src1_sel_X},
{R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
{R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
{R600::OpName::src1_W, R600::OpName::src1_sel_W},
};
for (unsigned j = 0; j < 8; j++) {
MachineOperand &MO =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
unsigned Reg = MO.getReg();
if (Reg == AMDGPU::ALU_CONST) {
if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
@ -303,9 +308,9 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
}
static const unsigned OpTable[3][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
{R600::OpName::src0, R600::OpName::src0_sel},
{R600::OpName::src1, R600::OpName::src1_sel},
{R600::OpName::src2, R600::OpName::src2_sel},
};
for (unsigned j = 0; j < 3; j++) {
@ -314,15 +319,15 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
break;
MachineOperand &MO = MI.getOperand(SrcIdx);
unsigned Reg = MO.getReg();
if (Reg == AMDGPU::ALU_CONST) {
if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
continue;
}
if (Reg == AMDGPU::ALU_LITERAL_X) {
if (Reg == R600::ALU_LITERAL_X) {
MachineOperand &Operand =
MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal));
if (Operand.isImm()) {
Result.push_back(std::make_pair(&MO, Operand.getImm()));
continue;
@ -346,7 +351,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
++i;
unsigned Reg = Src.first->getReg();
int Index = RI.getEncodingValue(Reg) & 0xff;
if (Reg == AMDGPU::OQAP) {
if (Reg == R600::OQAP) {
Result.push_back(std::make_pair(Index, 0U));
}
if (PV.find(Reg) != PV.end()) {
@ -436,7 +441,7 @@ unsigned R600InstrInfo::isLegalUpTo(
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0 || Src.first == 255)
continue;
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) {
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
// The value from output queue A (denoted by register OQAP) can
@ -542,7 +547,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
AMDGPU::OpName::bank_swizzle);
R600::OpName::bank_swizzle);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
@ -611,14 +616,14 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
continue;
for (const auto &Src : getSrcs(MI)) {
if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
if (Src.first->getReg() == R600::ALU_LITERAL_X)
Literals.insert(Src.second);
if (Literals.size() > 4)
return false;
if (Src.first->getReg() == AMDGPU::ALU_CONST)
if (Src.first->getReg() == R600::ALU_CONST)
Consts.push_back(Src.second);
if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
if (R600::R600_KC0RegClass.contains(Src.first->getReg()) ||
R600::R600_KC1RegClass.contains(Src.first->getReg())) {
unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
@ -637,7 +642,7 @@ R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
static bool
isPredicateSetter(unsigned Opcode) {
switch (Opcode) {
case AMDGPU::PRED_X:
case R600::PRED_X:
return true;
default:
return false;
@ -659,12 +664,12 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
static
bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
return Opcode == R600::JUMP || Opcode == R600::JUMP_COND;
}
static bool isBranch(unsigned Opcode) {
return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
Opcode == AMDGPU::BRANCH_COND_f32;
return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 ||
Opcode == R600::BRANCH_COND_f32;
}
bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
@ -679,7 +684,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (I == MBB.end())
return false;
// AMDGPU::BRANCH* instructions are only available after isel and are not
// R600::BRANCH* instructions are only available after isel and are not
// handled
if (isBranch(I->getOpcode()))
return true;
@ -688,7 +693,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// Remove successive JUMP
while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) {
MachineBasicBlock::iterator PriorI = std::prev(I);
if (AllowModify)
I->removeFromParent();
@ -699,10 +704,10 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst.getOpcode();
if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
if (LastOpc == AMDGPU::JUMP) {
if (LastOpc == R600::JUMP) {
TBB = LastInst.getOperand(0).getMBB();
return false;
} else if (LastOpc == AMDGPU::JUMP_COND) {
} else if (LastOpc == R600::JUMP_COND) {
auto predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@ -710,7 +715,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
TBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
return false;
}
return true; // Can't handle indirect branch.
@ -721,7 +726,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
unsigned SecondLastOpc = SecondLastInst.getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) {
auto predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@ -730,7 +735,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
return false;
}
@ -742,8 +747,8 @@ static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
It != E; ++It) {
if (It->getOpcode() == AMDGPU::CF_ALU ||
It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
if (It->getOpcode() == R600::CF_ALU ||
It->getOpcode() == R600::CF_ALU_PUSH_BEFORE)
return It.getReverse();
}
return MBB.end();
@ -760,7 +765,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
if (!FBB) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
@ -768,14 +773,14 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
addFlag(*PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
BuildMI(&MBB, DL, get(R600::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
.addReg(R600::PREDICATE_BIT, RegState::Kill);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 1;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
assert (CfAlu->getOpcode() == R600::CF_ALU);
CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
@ -783,15 +788,15 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(PredSet && "No previous predicate !");
addFlag(*PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
BuildMI(&MBB, DL, get(R600::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
.addReg(R600::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 2;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
assert (CfAlu->getOpcode() == R600::CF_ALU);
CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
return 2;
}
}
@ -812,18 +817,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP_COND: {
case R600::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(*predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(R600::CF_ALU));
break;
}
case AMDGPU::JUMP:
case R600::JUMP:
I->eraseFromParent();
break;
}
@ -837,18 +842,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP_COND: {
case R600::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(*predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(AMDGPU::CF_ALU));
assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
CfAlu->setDesc(get(R600::CF_ALU));
break;
}
case AMDGPU::JUMP:
case R600::JUMP:
I->eraseFromParent();
break;
}
@ -863,9 +868,9 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
unsigned Reg = MI.getOperand(idx).getReg();
switch (Reg) {
default: return false;
case AMDGPU::PRED_SEL_ONE:
case AMDGPU::PRED_SEL_ZERO:
case AMDGPU::PREDICATE_BIT:
case R600::PRED_SEL_ONE:
case R600::PRED_SEL_ZERO:
case R600::PREDICATE_BIT:
return true;
}
}
@ -876,9 +881,9 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
// be predicated. Until we have proper support for instruction clauses in the
// backend, we will mark KILL* instructions as unpredicable.
if (MI.getOpcode() == AMDGPU::KILLGT) {
if (MI.getOpcode() == R600::KILLGT) {
return false;
} else if (MI.getOpcode() == AMDGPU::CF_ALU) {
} else if (MI.getOpcode() == R600::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
@ -888,7 +893,7 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
} else if (isVector(MI)) {
return false;
} else {
return AMDGPUInstrInfo::isPredicable(MI);
return TargetInstrInfo::isPredicable(MI);
}
}
@ -929,17 +934,17 @@ bool
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
switch (MO.getImm()) {
case AMDGPU::PRED_SETE_INT:
MO.setImm(AMDGPU::PRED_SETNE_INT);
case R600::PRED_SETE_INT:
MO.setImm(R600::PRED_SETNE_INT);
break;
case AMDGPU::PRED_SETNE_INT:
MO.setImm(AMDGPU::PRED_SETE_INT);
case R600::PRED_SETNE_INT:
MO.setImm(R600::PRED_SETE_INT);
break;
case AMDGPU::PRED_SETE:
MO.setImm(AMDGPU::PRED_SETNE);
case R600::PRED_SETE:
MO.setImm(R600::PRED_SETNE);
break;
case AMDGPU::PRED_SETNE:
MO.setImm(AMDGPU::PRED_SETE);
case R600::PRED_SETNE:
MO.setImm(R600::PRED_SETE);
break;
default:
return true;
@ -947,11 +952,11 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) con
MachineOperand &MO2 = Cond[2];
switch (MO2.getReg()) {
case AMDGPU::PRED_SEL_ZERO:
MO2.setReg(AMDGPU::PRED_SEL_ONE);
case R600::PRED_SEL_ZERO:
MO2.setReg(R600::PRED_SEL_ONE);
break;
case AMDGPU::PRED_SEL_ONE:
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
case R600::PRED_SEL_ONE:
MO2.setReg(R600::PRED_SEL_ZERO);
break;
default:
return true;
@ -968,22 +973,22 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
int PIdx = MI.findFirstPredOperandIdx();
if (MI.getOpcode() == AMDGPU::CF_ALU) {
if (MI.getOpcode() == R600::CF_ALU) {
MI.getOperand(8).setImm(0);
return true;
}
if (MI.getOpcode() == AMDGPU::DOT_4) {
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
if (MI.getOpcode() == R600::DOT_4) {
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X))
.setReg(Pred[2].getReg());
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y))
.setReg(Pred[2].getReg());
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z))
.setReg(Pred[2].getReg());
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W))
.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
return true;
}
@ -991,7 +996,7 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
return true;
}
@ -1021,20 +1026,20 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
default: {
MachineBasicBlock *MBB = MI.getParent();
int OffsetOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr);
// addr is a custom operand with multiple MI operands, and only the
// first MI operand is given a name.
int RegOpIdx = OffsetOpIdx + 1;
int ChanOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan);
if (isRegisterLoad(MI)) {
int DstOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst);
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
getIndirectAddrRegClass()->getRegister(Address));
} else {
@ -1043,12 +1048,12 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
} else if (isRegisterStore(MI)) {
int ValOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val);
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
MI.getOperand(ValOpIdx).getReg());
} else {
@ -1063,15 +1068,15 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB->erase(MI);
return true;
}
case AMDGPU::R600_EXTRACT_ELT_V2:
case AMDGPU::R600_EXTRACT_ELT_V4:
case R600::R600_EXTRACT_ELT_V2:
case R600::R600_EXTRACT_ELT_V4:
buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
MI.getOperand(2).getReg(),
RI.getHWRegChan(MI.getOperand(1).getReg()));
break;
case AMDGPU::R600_INSERT_ELT_V2:
case AMDGPU::R600_INSERT_ELT_V4:
case R600::R600_INSERT_ELT_V2:
case R600::R600_INSERT_ELT_V4:
buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
MI.getOperand(3).getReg(), // Offset
@ -1096,14 +1101,14 @@ void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
TRI.reserveRegisterTuples(Reserved, Reg);
}
}
}
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::R600_TReg32_XRegClass;
return &R600::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@ -1121,20 +1126,20 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
unsigned AddrReg;
switch (AddrChan) {
default: llvm_unreachable("Invalid Channel");
case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
}
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X, OffsetReg);
setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
R600::AR_X, OffsetReg);
setImmOperand(*MOVA, R600::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
AddrReg, ValueReg)
.addReg(AMDGPU::AR_X,
.addReg(R600::AR_X,
RegState::Implicit | RegState::Kill);
setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
setImmOperand(*Mov, R600::OpName::dst_rel, 1);
return Mov;
}
@ -1153,21 +1158,21 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
unsigned AddrReg;
switch (AddrChan) {
default: llvm_unreachable("Invalid Channel");
case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
}
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
R600::AR_X,
OffsetReg);
setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
setImmOperand(*MOVA, R600::OpName::write, 0);
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
ValueReg,
AddrReg)
.addReg(AMDGPU::AR_X,
.addReg(R600::AR_X,
RegState::Implicit | RegState::Kill);
setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
setImmOperand(*Mov, R600::OpName::src0_rel, 1);
return Mov;
}
@ -1265,7 +1270,7 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
//XXX: The r600g finalizer expects this to be 1, once we've moved the
//scheduling to the backend, we can change the default to 0.
MIB.addImm(1) // $last
.addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
.addReg(R600::PRED_SEL_OFF) // $pred_sel
.addImm(0) // $literal
.addImm(0); // $bank_swizzle
@ -1286,23 +1291,23 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
switch (Op) {
OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
OPERAND_CASE(AMDGPU::OpName::update_pred)
OPERAND_CASE(AMDGPU::OpName::write)
OPERAND_CASE(AMDGPU::OpName::omod)
OPERAND_CASE(AMDGPU::OpName::dst_rel)
OPERAND_CASE(AMDGPU::OpName::clamp)
OPERAND_CASE(AMDGPU::OpName::src0)
OPERAND_CASE(AMDGPU::OpName::src0_neg)
OPERAND_CASE(AMDGPU::OpName::src0_rel)
OPERAND_CASE(AMDGPU::OpName::src0_abs)
OPERAND_CASE(AMDGPU::OpName::src0_sel)
OPERAND_CASE(AMDGPU::OpName::src1)
OPERAND_CASE(AMDGPU::OpName::src1_neg)
OPERAND_CASE(AMDGPU::OpName::src1_rel)
OPERAND_CASE(AMDGPU::OpName::src1_abs)
OPERAND_CASE(AMDGPU::OpName::src1_sel)
OPERAND_CASE(AMDGPU::OpName::pred_sel)
OPERAND_CASE(R600::OpName::update_exec_mask)
OPERAND_CASE(R600::OpName::update_pred)
OPERAND_CASE(R600::OpName::write)
OPERAND_CASE(R600::OpName::omod)
OPERAND_CASE(R600::OpName::dst_rel)
OPERAND_CASE(R600::OpName::clamp)
OPERAND_CASE(R600::OpName::src0)
OPERAND_CASE(R600::OpName::src0_neg)
OPERAND_CASE(R600::OpName::src0_rel)
OPERAND_CASE(R600::OpName::src0_abs)
OPERAND_CASE(R600::OpName::src0_sel)
OPERAND_CASE(R600::OpName::src1)
OPERAND_CASE(R600::OpName::src1_neg)
OPERAND_CASE(R600::OpName::src1_rel)
OPERAND_CASE(R600::OpName::src1_abs)
OPERAND_CASE(R600::OpName::src1_sel)
OPERAND_CASE(R600::OpName::pred_sel)
default:
llvm_unreachable("Wrong Operand");
}
@ -1313,39 +1318,39 @@ static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
const {
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");
unsigned Opcode;
if (ST.getGeneration() <= R600Subtarget::R700)
Opcode = AMDGPU::DOT4_r600;
Opcode = R600::DOT4_r600;
else
Opcode = AMDGPU::DOT4_eg;
Opcode = R600::DOT4_eg;
MachineBasicBlock::iterator I = MI;
MachineOperand &Src0 = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot)));
MachineOperand &Src1 = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot)));
MachineInstr *MIB = buildDefaultInstruction(
MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
static const unsigned Operands[14] = {
AMDGPU::OpName::update_exec_mask,
AMDGPU::OpName::update_pred,
AMDGPU::OpName::write,
AMDGPU::OpName::omod,
AMDGPU::OpName::dst_rel,
AMDGPU::OpName::clamp,
AMDGPU::OpName::src0_neg,
AMDGPU::OpName::src0_rel,
AMDGPU::OpName::src0_abs,
AMDGPU::OpName::src0_sel,
AMDGPU::OpName::src1_neg,
AMDGPU::OpName::src1_rel,
AMDGPU::OpName::src1_abs,
AMDGPU::OpName::src1_sel,
R600::OpName::update_exec_mask,
R600::OpName::update_pred,
R600::OpName::write,
R600::OpName::omod,
R600::OpName::dst_rel,
R600::OpName::clamp,
R600::OpName::src0_neg,
R600::OpName::src0_rel,
R600::OpName::src0_abs,
R600::OpName::src0_sel,
R600::OpName::src1_neg,
R600::OpName::src1_rel,
R600::OpName::src1_abs,
R600::OpName::src1_sel,
};
MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
getSlotedOps(R600::OpName::pred_sel, Slot)));
MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))
.setReg(MO.getReg());
for (unsigned i = 0; i < 14; i++) {
@ -1362,16 +1367,16 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const {
MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
AMDGPU::ALU_LITERAL_X);
setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg,
R600::ALU_LITERAL_X);
setImmOperand(*MovImm, R600::OpName::literal, Imm);
return MovImm;
}
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const {
return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg);
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
@ -1379,7 +1384,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
}
int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
return AMDGPU::getNamedOperandIdx(Opcode, Op);
return R600::getNamedOperandIdx(Opcode, Op);
}
void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
@ -1406,25 +1411,25 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
switch (Flag) {
case MO_FLAG_CLAMP:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
FlagIndex = getOperandIdx(MI, R600::OpName::clamp);
break;
case MO_FLAG_MASK:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
FlagIndex = getOperandIdx(MI, R600::OpName::write);
break;
case MO_FLAG_NOT_LAST:
case MO_FLAG_LAST:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
FlagIndex = getOperandIdx(MI, R600::OpName::last);
break;
case MO_FLAG_NEG:
switch (SrcIdx) {
case 0:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg);
break;
case 1:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg);
break;
case 2:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg);
break;
}
break;
@ -1435,10 +1440,10 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
(void)IsOP3;
switch (SrcIdx) {
case 0:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs);
break;
case 1:
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs);
break;
}
break;
@ -1499,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
switch (Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
return AMDGPUASI.PRIVATE_ADDRESS;
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
return AMDGPUASI.CONSTANT_ADDRESS;
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
}
llvm_unreachable("Invalid pseudo source kind");
return AMDGPUASI.PRIVATE_ADDRESS;
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
}

View File

@ -15,8 +15,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
#include "AMDGPUInstrInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "R600GenInstrInfo.inc"
namespace llvm {
@ -34,7 +37,7 @@ class MachineInstr;
class MachineInstrBuilder;
class R600Subtarget;
class R600InstrInfo final : public AMDGPUInstrInfo {
class R600InstrInfo final : public R600GenInstrInfo {
private:
const R600RegisterInfo RI;
const R600Subtarget &ST;
@ -324,7 +327,7 @@ public:
PseudoSourceValue::PSVKind Kind) const override;
};
namespace AMDGPU {
namespace R600 {
int getLDSNoRetOp(uint16_t Opcode);

View File

@ -18,13 +18,13 @@ include "R600InstrFormats.td"
class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :
AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
let SubtargetPredicate = isR600toCayman;
let Namespace = "R600";
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :
InstR600 <outs, ins, asm, pattern, NullALU> {
let Namespace = "AMDGPU";
}
def MEMxi : Operand<iPTR> {
@ -86,6 +86,12 @@ def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
(ops PRED_SEL_OFF)>;
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
usesCustomInserter = 1, Namespace = "R600" in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(AMDGPUendpgm)]
>;
}
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@ -219,34 +225,6 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
def TEX_SHADOW : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return (TType >= 6 && TType <= 8) || TType == 13;
}]
>;
def TEX_RECT : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 5;
}]
>;
def TEX_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 9 || TType == 10 || TType == 16;
}]
>;
def TEX_SHADOW_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
return TType == 11 || TType == 12 || TType == 17;
}]
>;
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
dag outs, dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
@ -357,6 +335,8 @@ def vtx_id2_load : LoadVtxId2 <load>;
// R600 SDNodes
//===----------------------------------------------------------------------===//
let Namespace = "R600" in {
def INTERP_PAIR_XY : AMDGPUShaderInst <
(outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
(ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
@ -369,6 +349,8 @@ def INTERP_PAIR_ZW : AMDGPUShaderInst <
"INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
[]>;
}
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPVariadic]
@ -416,11 +398,15 @@ def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
// Interpolation Instructions
//===----------------------------------------------------------------------===//
let Namespace = "R600" in {
def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst">;
}
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
}
@ -660,7 +646,7 @@ def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > {
let isCodeGenOnly = 1, isPseudo = 1 in {
let usesCustomInserter = 1 in {
let Namespace = "R600", usesCustomInserter = 1 in {
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
@ -792,7 +778,9 @@ class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
(ins immType:$imm),
"",
[]
>;
> {
let Namespace = "R600";
}
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
@ -1007,7 +995,7 @@ class CNDGE_Common <bits<5> inst> : R600_3OP <
}
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
// Slot X
UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
@ -1326,7 +1314,9 @@ let Predicates = [isR600] in {
// Regist loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
let Namespace = "R600" in {
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
}
// Hardcode channel to 0
// NOTE: LSHR is not available here. LSHR is per family instruction
@ -1378,11 +1368,12 @@ let usesCustomInserter = 1 in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
def MASK_WRITE : AMDGPUShaderInst <
def MASK_WRITE : InstR600 <
(outs),
(ins R600_Reg32:$src),
"MASK_WRITE $src",
[]
[],
NullALU
>;
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
@ -1413,7 +1404,7 @@ def TXD_SHADOW: InstR600 <
// Constant Buffer Addressing Support
//===----------------------------------------------------------------------===//
let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
def CONST_COPY : Instruction {
let OutOperandList = (outs R600_Reg32:$dst);
let InOperandList = (ins i32imm:$src);
@ -1536,23 +1527,6 @@ let Inst{63-32} = Word1;
//===---------------------------------------------------------------------===//
// Flow and Program control Instructions
//===---------------------------------------------------------------------===//
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction {
let Namespace = "AMDGPU";
dag OutOperandList = outs;
dag InOperandList = ins;
let Pattern = pattern;
let AsmString = !strconcat(asmstr, "\n");
let isPseudo = 1;
let Itinerary = NullALU;
bit hasIEEEFlag = 0;
bit hasZeroOpFlag = 0;
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let isCodeGenOnly = 1;
}
multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
def _i32 : ILFormat<(outs),
@ -1584,23 +1558,14 @@ multiclass BranchInstr2<string name> {
// Custom Inserter for Branches and returns, this eventually will be a
// separate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1,
Namespace = "R600" in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
}
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(AMDGPUendpgm)]
>;
}
//===----------------------------------------------------------------------===//
// Branch Instructions
//===----------------------------------------------------------------------===//
@ -1731,7 +1696,7 @@ def : R600Pat <
// KIL Patterns
def KIL : R600Pat <
(int_AMDGPU_kill f32:$src0),
(int_r600_kill f32:$src0),
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;

View File

@ -162,7 +162,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
E = SU->getInstr()->operands_end(); It != E; ++It) {
MachineOperand &MO = *It;
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
++CurEmitted;
}
}
@ -181,7 +181,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
static bool
isPhysicalRegCopy(MachineInstr *MI) {
if (MI->getOpcode() != AMDGPU::COPY)
if (MI->getOpcode() != R600::COPY)
return false;
return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
@ -224,14 +224,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
return AluTrans;
switch (MI->getOpcode()) {
case AMDGPU::PRED_X:
case R600::PRED_X:
return AluPredX;
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::DOT_4:
case R600::INTERP_PAIR_XY:
case R600::INTERP_PAIR_ZW:
case R600::INTERP_VEC_LOAD:
case R600::DOT_4:
return AluT_XYZW;
case AMDGPU::COPY:
case R600::COPY:
if (MI->getOperand(1).isUndef()) {
// MI will become a KILL, don't considers it in scheduling
return AluDiscarded;
@ -246,7 +246,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
TII->isReductionOp(MI->getOpcode()) ||
MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
MI->getOpcode() == R600::GROUP_BARRIER) {
return AluT_XYZW;
}
@ -257,13 +257,13 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
// Is the result already assigned to a channel ?
unsigned DestSubReg = MI->getOperand(0).getSubReg();
switch (DestSubReg) {
case AMDGPU::sub0:
case R600::sub0:
return AluT_X;
case AMDGPU::sub1:
case R600::sub1:
return AluT_Y;
case AMDGPU::sub2:
case R600::sub2:
return AluT_Z;
case AMDGPU::sub3:
case R600::sub3:
return AluT_W;
default:
break;
@ -271,16 +271,16 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
// Is the result already member of a X/Y/Z/W class ?
unsigned DestReg = MI->getOperand(0).getReg();
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
return AluT_X;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
return AluT_Y;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
return AluT_Z;
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
return AluT_W;
if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
return AluT_XYZW;
// LDS src registers cannot be used in the Trans slot.
@ -301,13 +301,13 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
switch (Opcode) {
case AMDGPU::PRED_X:
case AMDGPU::COPY:
case AMDGPU::CONST_COPY:
case AMDGPU::INTERP_PAIR_XY:
case AMDGPU::INTERP_PAIR_ZW:
case AMDGPU::INTERP_VEC_LOAD:
case AMDGPU::DOT_4:
case R600::PRED_X:
case R600::COPY:
case R600::CONST_COPY:
case R600::INTERP_PAIR_XY:
case R600::INTERP_PAIR_ZW:
case R600::INTERP_VEC_LOAD:
case R600::DOT_4:
return IDAlu;
default:
return IDOther;
@ -353,7 +353,7 @@ void R600SchedStrategy::PrepareNextSlot() {
}
void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
if (DstIndex == -1) {
return;
}
@ -370,16 +370,16 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
// Constrains the regclass of DestReg to assign it to Slot
switch (Slot) {
case 0:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
break;
case 1:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
break;
case 2:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
break;
case 3:
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
break;
}
}

View File

@ -79,7 +79,7 @@ public:
std::vector<unsigned> UndefReg;
RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
assert(MI->getOpcode() == R600::REG_SEQUENCE);
for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
MachineOperand &MO = Instr->getOperand(i);
unsigned Chan = Instr->getOperand(i + 1).getImm();
@ -159,8 +159,8 @@ bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
return true;
switch (MI.getOpcode()) {
case AMDGPU::R600_ExportSwz:
case AMDGPU::EG_ExportSwz:
case R600::R600_ExportSwz:
case R600::EG_ExportSwz:
return true;
default:
return false;
@ -213,12 +213,12 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
E = RSI->RegToChan.end(); It != E; ++It) {
unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
unsigned SubReg = (*It).first;
unsigned Swizzle = (*It).second;
unsigned Chan = getReassignedChan(RemapChan, Swizzle);
MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
DstReg)
.addReg(SrcVec)
.addReg(SubReg)
@ -234,7 +234,7 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
SrcVec = DstReg;
}
MachineInstr *NewMI =
BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
LLVM_DEBUG(dbgs() << " ->"; NewMI->dump(););
LLVM_DEBUG(dbgs() << " Updating Swizzle:\n");
@ -354,7 +354,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
MII != MIIE; ++MII) {
MachineInstr &MI = *MII;
if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
if (MI.getOpcode() != R600::REG_SEQUENCE) {
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
unsigned Reg = MI.getOperand(1).getReg();
for (MachineRegisterInfo::def_instr_iterator

View File

@ -84,39 +84,39 @@ private:
LastDstChan = BISlot;
if (TII->isPredicated(*BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
continue;
int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
if (DstIdx == -1) {
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
if (isTrans || TII->isTransOnly(*BI)) {
Result[Dst] = AMDGPU::PS;
Result[Dst] = R600::PS;
continue;
}
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
BI->getOpcode() == AMDGPU::DOT4_eg) {
Result[Dst] = AMDGPU::PV_X;
if (BI->getOpcode() == R600::DOT4_r600 ||
BI->getOpcode() == R600::DOT4_eg) {
Result[Dst] = R600::PV_X;
continue;
}
if (Dst == AMDGPU::OQAP) {
if (Dst == R600::OQAP) {
continue;
}
unsigned PVReg = 0;
switch (TRI.getHWRegChan(Dst)) {
case 0:
PVReg = AMDGPU::PV_X;
PVReg = R600::PV_X;
break;
case 1:
PVReg = AMDGPU::PV_Y;
PVReg = R600::PV_Y;
break;
case 2:
PVReg = AMDGPU::PV_Z;
PVReg = R600::PV_Z;
break;
case 3:
PVReg = AMDGPU::PV_W;
PVReg = R600::PV_W;
break;
default:
llvm_unreachable("Invalid Chan");
@ -129,9 +129,9 @@ private:
void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
const {
unsigned Ops[] = {
AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
AMDGPU::OpName::src2
R600::OpName::src0,
R600::OpName::src1,
R600::OpName::src2
};
for (unsigned i = 0; i < 3; i++) {
int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
@ -171,7 +171,7 @@ public:
return true;
if (!TII->isALUInstr(MI.getOpcode()))
return true;
if (MI.getOpcode() == AMDGPU::GROUP_BARRIER)
if (MI.getOpcode() == R600::GROUP_BARRIER)
return true;
// XXX: This can be removed once the packetizer properly handles all the
// LDS instruction group restrictions.
@ -185,8 +185,8 @@ public:
if (getSlot(*MII) == getSlot(*MIJ))
ConsideredInstUsesAlreadyWrittenVectorElement = true;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
if (PredI != PredJ)
@ -220,7 +220,7 @@ public:
}
void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
MI->getOperand(LastOp).setImm(Bit);
}
@ -301,11 +301,11 @@ public:
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
MachineInstr *MI = CurrentPacketMIs[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
R600::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS[i]);
}
unsigned Op =
TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle);
TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
MI.getOperand(Op).setImm(BS.back());
if (!CurrentPacketMIs.empty())
setIsLastBit(CurrentPacketMIs.back(), 0);
@ -334,6 +334,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
assert(Packetizer.getResourceTracker()->getInstrItins());
if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
return false;
@ -353,8 +354,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
(MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);

View File

@ -7,6 +7,62 @@
//
//===----------------------------------------------------------------------===//
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
Value,
"Limit the maximum number of fetches in a clause to "#Value
>;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"R600ALUInst",
"false",
"Older version of ALU instructions encoding"
>;
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
"HasVertexCache",
"true",
"Specify use of dedicated vertex cache"
>;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"CaymanISA",
"true",
"Use Cayman ISA"
>;
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
"CFALUBug",
"true",
"GPU has CF_ALU bug"
>;
class R600SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
>;
def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
[FeatureFetchLimit16, FeatureLocalMemorySize0]
>;
def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
>;
def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
[FeatureFetchLimit16, FeatureWavefrontSize64,
FeatureLocalMemorySize32768]
>;
//===----------------------------------------------------------------------===//
// Radeon HD 2000/3000 Series (R600).
//===----------------------------------------------------------------------===//

View File

@ -21,34 +21,37 @@
using namespace llvm;
R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
RCW.RegWeight = 0;
RCW.WeightLimit = 0;
}
#define GET_REGINFO_TARGET_DESC
#include "R600GenRegisterInfo.inc"
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600InstrInfo *TII = ST.getInstrInfo();
reserveRegisterTuples(Reserved, AMDGPU::ZERO);
reserveRegisterTuples(Reserved, AMDGPU::HALF);
reserveRegisterTuples(Reserved, AMDGPU::ONE);
reserveRegisterTuples(Reserved, AMDGPU::ONE_INT);
reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF);
reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE);
reserveRegisterTuples(Reserved, AMDGPU::PV_X);
reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X);
reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST);
reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT);
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF);
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO);
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE);
reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR);
reserveRegisterTuples(Reserved, R600::ZERO);
reserveRegisterTuples(Reserved, R600::HALF);
reserveRegisterTuples(Reserved, R600::ONE);
reserveRegisterTuples(Reserved, R600::ONE_INT);
reserveRegisterTuples(Reserved, R600::NEG_HALF);
reserveRegisterTuples(Reserved, R600::NEG_ONE);
reserveRegisterTuples(Reserved, R600::PV_X);
reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X);
reserveRegisterTuples(Reserved, R600::ALU_CONST);
reserveRegisterTuples(Reserved, R600::PREDICATE_BIT);
reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF);
reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO);
reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE);
reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR);
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(),
E = R600::R600_AddrRegClass.end(); I != E; ++I) {
reserveRegisterTuples(Reserved, *I);
}
@ -58,7 +61,7 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
// Dummy to not crash RegisterClassInfo.
static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
static const MCPhysReg CalleeSavedReg = R600::NoRegister;
const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
const MachineFunction *) const {
@ -66,7 +69,7 @@ const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
}
unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return AMDGPU::NoRegister;
return R600::NoRegister;
}
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
@ -81,7 +84,7 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
default:
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
case MVT::i32: return &R600::R600_TReg32RegClass;
}
}
@ -94,9 +97,9 @@ bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
switch (Reg) {
case AMDGPU::OQAP:
case AMDGPU::OQBP:
case AMDGPU::AR_X:
case R600::OQAP:
case R600::OQBP:
case R600::AR_X:
return false;
default:
return true;
@ -109,3 +112,10 @@ void R600RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
RegScavenger *RS) const {
llvm_unreachable("Subroutines not supported yet");
}
void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
MCRegAliasIterator R(Reg, this, true);
for (; R.isValid(); ++R)
Reserved.set(*R);
}

View File

@ -15,13 +15,14 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
#include "AMDGPURegisterInfo.h"
#define GET_REGINFO_HEADER
#include "R600GenRegisterInfo.inc"
namespace llvm {
class AMDGPUSubtarget;
struct R600RegisterInfo final : public AMDGPURegisterInfo {
struct R600RegisterInfo final : public R600GenRegisterInfo {
RegClassWeight RCW;
R600RegisterInfo();
@ -49,6 +50,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const;
};
} // End namespace llvm

View File

@ -245,7 +245,7 @@ def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
(add V0123_W, V0123_Z, V0123_Y, V0123_X)
>;
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,
(add (sequence "T%u_XY", 0, 63))>;
def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,

View File

@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;
let Predicates = [isR700] in {
def SIN_r700 : SIN_Common<0x6E>;

View File

@ -76,7 +76,7 @@ public:
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const SISubtarget *ST;
const AMDGPUSubtarget *ST;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
return false;
MRI = &MF.getRegInfo();
ST = &MF.getSubtarget<SISubtarget>();
ST = &MF.getSubtarget<AMDGPUSubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();

View File

@ -113,7 +113,8 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) {
SITargetLowering::SITargetLowering(const TargetMachine &TM,
const SISubtarget &STI)
: AMDGPUTargetLowering(TM, STI) {
: AMDGPUTargetLowering(TM, STI),
Subtarget(&STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
@ -147,7 +148,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}
computeRegisterProperties(STI.getRegisterInfo());
computeRegisterProperties(Subtarget->getRegisterInfo());
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@ -323,7 +324,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
if (getSubtarget()->hasFlatAddressSpace()) {
if (Subtarget->hasFlatAddressSpace()) {
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
}
@ -336,6 +337,44 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Custom);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::FLOG, MVT::f16, Custom);
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
}
// v_mad_f32 does not support denormals according to some sources.
if (!Subtarget->hasFP32Denormals())
setOperationAction(ISD::FMAD, MVT::f32, Legal);
if (!Subtarget->hasBFI()) {
// fcopysign can be done in a single instruction with BFI.
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
}
if (!Subtarget->hasBCNT(32))
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
if (Subtarget->hasFFBH())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
if (Subtarget->hasFFBL())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
// effort to match them now. We want this to be false for i64 cases when the
// extraction isn't restricted to the upper or lower half. Ideally we would
// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
// span the midpoint are probably relatively rare, so don't worry about them
// for now.
if (Subtarget->hasBFE())
setHasExtractBitsInsn(true);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@ -343,6 +382,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
} else {
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
setOperationAction(ISD::FRINT, MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
}
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
@ -616,10 +660,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
setSchedulingPreference(Sched::RegPressure);
// SI at least has hardware support for floating point exceptions, but no way
// of using or handling them is implemented. They are also optional in OpenCL
// (Section 7.3)
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
}
const SISubtarget *SITargetLowering::getSubtarget() const {
return static_cast<const SISubtarget *>(Subtarget);
return Subtarget;
}
//===----------------------------------------------------------------------===//
@ -2012,8 +2061,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// FIXME: Does sret work properly?
if (!Info->isEntryFunction()) {
const SIRegisterInfo *TRI
= static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
@ -2115,8 +2163,7 @@ void SITargetLowering::passSpecialInputs(
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
const SISubtarget *ST = getSubtarget();
const SIRegisterInfo *TRI = ST->getRegisterInfo();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
@ -2561,7 +2608,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@ -8179,8 +8226,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (Info->isEntryFunction()) {
// Callable functions have fixed registers used for stack access.

View File

@ -22,6 +22,9 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
private:
const SISubtarget *Subtarget;
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;

View File

@ -934,8 +934,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// All waits must be resolved at call return.
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::RETURN ||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
@ -1131,7 +1130,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
// TODO: Remove this work-around, enable the assert for Bug 457939
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
ScoreBrackets->getScoreUB(LGKM_CNT) &&
ScoreBrackets->hasPendingSMEM()) {
@ -1716,7 +1715,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
ScoreBrackets->getScoreUB(LGKM_CNT) &&
ScoreBrackets->hasPendingSMEM()) {
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
VCCZBugWorkAround = true;
}
}

View File

@ -21,7 +21,7 @@ def isSI : Predicate<"Subtarget->getGeneration() "
class InstSI <dag outs, dag ins, string asm = "",
list<dag> pattern = []> :
AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
let SubtargetPredicate = isGCN;
// Low bits - basic encoding information.

View File

@ -14,6 +14,7 @@
#include "SIInstrInfo.h"
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
@ -63,6 +64,19 @@
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
namespace AMDGPU {
#define GET_D16ImageDimIntrinsics_IMPL
#define GET_ImageDimIntrinsicTable_IMPL
#define GET_RsrcIntrinsics_IMPL
#include "AMDGPUGenSearchableTables.inc"
}
}
// Must be at least 4 to be able to branch over minimum unconditional branch
// code. This is only for making it possible to write reasonably small tests for
// long branches.
@ -71,7 +85,8 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
: AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
RI(ST), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@ -438,6 +453,28 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
// be clustered as expected. It should really split into 2 16 store batches.
//
// Loads are clustered until this returns false, rather than trying to schedule
// groups of stores. This also means we have to deal with saying different
// address space loads should be clustered, and ones which might cause bank
// conflicts.
//
// This might be deprecated so it might not be worth that much effort to fix.
bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
int64_t Offset0, int64_t Offset1,
unsigned NumLoads) const {
assert(Offset1 > Offset0 &&
"Second offset should be larger than first offset!");
// If we have less than 16 loads in a row, and the offsets are within 64
// bytes, then schedule together.
// A cacheline is 64 bytes (for global memory).
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg,
@ -998,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
unsigned FrameOffset, unsigned Size) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
@ -1134,7 +1171,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI.getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
default: return TargetInstrInfo::expandPostRAPseudo(MI);
case AMDGPU::S_MOV_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@ -1900,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
switch(Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
return AMDGPUASI.PRIVATE_ADDRESS;
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
return AMDGPUASI.CONSTANT_ADDRESS;
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
}
return AMDGPUASI.FLAT_ADDRESS;
return ST.getAMDGPUAS().FLAT_ADDRESS;
}
static void removeModOperands(MachineInstr &MI) {
@ -4649,7 +4686,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
(*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
(*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@ -4768,7 +4805,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
return true;
}
return false;
@ -4948,3 +4985,55 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
return RCID == AMDGPU::SReg_128RegClassID;
}
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
VI = 1,
SDWA = 2,
SDWA9 = 3,
GFX80 = 4,
GFX9 = 5
};
static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) {
switch (ST.getGeneration()) {
case SISubtarget::SOUTHERN_ISLANDS:
case SISubtarget::SEA_ISLANDS:
return SIEncodingFamily::SI;
case SISubtarget::VOLCANIC_ISLANDS:
case SISubtarget::GFX9:
return SIEncodingFamily::VI;
}
llvm_unreachable("Unknown subtarget generation!");
}
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
ST.getGeneration() >= SISubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9
: SIEncodingFamily::SDWA;
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
// subtarget has UnpackedD16VMem feature.
// TODO: remove this when we discard GFX80 encoding.
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
Gen = SIEncodingFamily::GFX80;
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
return Opcode;
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
// no encoding in the given subtarget generation.
if (MCOp == (uint16_t)-1)
return -1;
return MCOp;
}

View File

@ -31,6 +31,9 @@
#include <cassert>
#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
class APInt;
@ -39,7 +42,7 @@ class RegScavenger;
class SISubtarget;
class TargetRegisterClass;
class SIInstrInfo final : public AMDGPUInstrInfo {
class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
const SISubtarget &ST;
@ -163,7 +166,10 @@ public:
bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
MachineInstr &SecondLdSt, unsigned BaseReg2,
unsigned NumLoads) const final;
unsigned NumLoads) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
int64_t Offset1, unsigned NumLoads) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@ -871,6 +877,12 @@ public:
static bool isLegalMUBUFImmOffset(unsigned Imm) {
return isUInt<12>(Imm);
}
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
};
namespace AMDGPU {

View File

@ -17,6 +17,11 @@ def isVIOnly : Predicate<"Subtarget->getGeneration() =="
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
class GCNPredicateControl : PredicateControl {
Predicate SIAssemblerPredicate = isSICI;
Predicate VIAssemblerPredicate = isVI;
}
// Execpt for the NONE field, this must be kept in sync with the
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
def SIEncodingFamily {

View File

@ -11,11 +11,10 @@
// that are not yet supported remain commented out.
//===----------------------------------------------------------------------===//
class GCNPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
let SubtargetPredicate = isGCN;
}
include "VOPInstructions.td"
include "SOPInstructions.td"
include "SMInstructions.td"

View File

@ -1232,8 +1232,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,
&AMDGPU::SCC_CLASSRegClass,
&AMDGPU::R600_Reg32RegClass,
&AMDGPU::R600_PredicateRegClass,
&AMDGPU::Pseudo_SReg_32RegClass,
&AMDGPU::Pseudo_SReg_128RegClass,
};

View File

@ -21,6 +21,7 @@
namespace llvm {
class AMDGPUSubtarget;
class LiveIntervals;
class MachineRegisterInfo;
class SISubtarget;

View File

@ -181,7 +181,7 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
if (Features.test(FeatureGFX9))
return {9, 0, 0};
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
if (Features.test(FeatureSouthernIslands))
return {0, 0, 0};
return {7, 0, 0};
}
@ -243,7 +243,7 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
return getMaxWavesPerEU() * getEUsPerCU(Features);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
@ -255,9 +255,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features) {
return 1;
}
unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
if (!Features.test(FeatureGCN))
return 8;
unsigned getMaxWavesPerEU() {
// FIXME: Need to take scratch memory into account.
return 10;
}
@ -313,7 +311,7 @@ unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU(Features))
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
@ -390,7 +388,7 @@ unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU(Features))
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
@ -735,6 +733,8 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
return Reg;
MAP_REG2REG
}

View File

@ -100,7 +100,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features);
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// Features without any kind of limitation.
unsigned getMaxWavesPerEU(const FeatureBitset &Features);
unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// Features and limited by given \p FlatWorkGroupSize.