forked from OSchip/llvm-project
AMDGPU: Separate R600 and GCN TableGen files
Summary: We now have two sets of generated TableGen files, one for R600 and one for GCN, so each sub-target now has its own tables of instructions, registers, ISel patterns, etc. This should help reduce compile time since each sub-target now only has to consider information that is specific to itself. This will also help prevent the R600 sub-target from slowing down new features for GCN, like disassembler support, GlobalISel, etc. Reviewers: arsenm, nhaehnle, jvesely Reviewed By: arsenm Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D46365 llvm-svn: 335942
This commit is contained in:
parent
3702f91287
commit
c5a154db48
|
@ -9,23 +9,12 @@
|
|||
|
||||
include "llvm/TableGen/SearchableTable.td"
|
||||
include "llvm/Target/Target.td"
|
||||
include "AMDGPUFeatures.td"
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (device properties)
|
||||
//===------------------------------------------------------------===//
|
||||
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"FP64",
|
||||
"true",
|
||||
"Enable double precision operations"
|
||||
>;
|
||||
|
||||
def FeatureFMA : SubtargetFeature<"fmaf",
|
||||
"FMA",
|
||||
"true",
|
||||
"Enable single precision FMA (not as fast as mul+add, but fused)"
|
||||
>;
|
||||
|
||||
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||
"FastFMAF32",
|
||||
"true",
|
||||
|
@ -44,30 +33,6 @@ def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
|
|||
"Most fp64 instructions are half rate instead of quarter"
|
||||
>;
|
||||
|
||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
||||
"R600ALUInst",
|
||||
"false",
|
||||
"Older version of ALU instructions encoding"
|
||||
>;
|
||||
|
||||
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
|
||||
"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache"
|
||||
>;
|
||||
|
||||
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
|
||||
"CaymanISA",
|
||||
"true",
|
||||
"Use Cayman ISA"
|
||||
>;
|
||||
|
||||
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
|
||||
"CFALUBug",
|
||||
"true",
|
||||
"GPU has CF_ALU bug"
|
||||
>;
|
||||
|
||||
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
|
||||
"FlatAddressSpace",
|
||||
"true",
|
||||
|
@ -153,27 +118,6 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
|||
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
Value,
|
||||
"Limit the maximum number of fetches in a clause to "#Value
|
||||
>;
|
||||
|
||||
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
|
||||
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
|
||||
|
||||
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
|
||||
"wavefrontsize"#Value,
|
||||
"WavefrontSize",
|
||||
!cast<string>(Value),
|
||||
"The number of threads per wavefront"
|
||||
>;
|
||||
|
||||
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
|
||||
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
|
||||
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
|
||||
|
||||
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
|
||||
"ldsbankcount"#Value,
|
||||
"LDSBankCount",
|
||||
|
@ -184,19 +128,6 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
|
|||
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
|
||||
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
|
||||
|
||||
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
|
||||
"localmemorysize"#Value,
|
||||
"LocalMemorySize",
|
||||
!cast<string>(Value),
|
||||
"The size of local memory in bytes"
|
||||
>;
|
||||
|
||||
def FeatureGCN : SubtargetFeature<"gcn",
|
||||
"IsGCN",
|
||||
"true",
|
||||
"GCN or newer GPU"
|
||||
>;
|
||||
|
||||
def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
|
||||
"GCN3Encoding",
|
||||
"true",
|
||||
|
@ -369,12 +300,6 @@ def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
|
|||
[FeatureFP64FP16Denormals]
|
||||
>;
|
||||
|
||||
def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
|
||||
"DX10Clamp",
|
||||
"true",
|
||||
"clamp modifier clamps NaNs to 0.0"
|
||||
>;
|
||||
|
||||
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
|
||||
"FPExceptions",
|
||||
"true",
|
||||
|
@ -417,12 +342,6 @@ def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
|
|||
"Dump MachineInstrs in the CodeEmitter"
|
||||
>;
|
||||
|
||||
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
|
||||
"EnablePromoteAlloca",
|
||||
"true",
|
||||
"Enable promote alloca pass"
|
||||
>;
|
||||
|
||||
// XXX - This should probably be removed once enabled by default
|
||||
def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
|
||||
"EnableLoadStoreOpt",
|
||||
|
@ -486,45 +405,29 @@ def FeatureDisable : SubtargetFeature<"",
|
|||
"Dummy feature to disable assembler instructions"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureGeneration <string Value,
|
||||
def FeatureGCN : SubtargetFeature<"gcn",
|
||||
"IsGCN",
|
||||
"true",
|
||||
"GCN or newer GPU"
|
||||
>;
|
||||
|
||||
class AMDGPUSubtargetFeatureGeneration <string Value,
|
||||
list<SubtargetFeature> Implies> :
|
||||
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
|
||||
Value#" GPU generation", Implies>;
|
||||
SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>;
|
||||
|
||||
def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
|
||||
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
|
||||
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
|
||||
|
||||
def FeatureR600 : SubtargetFeatureGeneration<"R600",
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureR700 : SubtargetFeatureGeneration<"R700",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
|
||||
[FeatureFetchLimit16, FeatureWavefrontSize64,
|
||||
FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureGCN,
|
||||
FeatureLDSBankCount32, FeatureMovrel]
|
||||
>;
|
||||
|
||||
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
|
||||
FeatureCIInsts, FeatureMovrel]
|
||||
>;
|
||||
|
||||
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
|
@ -535,7 +438,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
|||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
|
||||
def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
|
||||
[FeatureFP64, FeatureLocalMemorySize65536,
|
||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
|
@ -738,8 +641,6 @@ def NullALU : InstrItinClass;
|
|||
// Predicate helper class
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TruePredicate : Predicate<"true">;
|
||||
|
||||
def isSICI : Predicate<
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
|
||||
|
@ -831,36 +732,15 @@ def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
|
|||
def EnableLateCFGStructurize : Predicate<
|
||||
"EnableLateStructurizeCFG">;
|
||||
|
||||
// Exists to help track down where SubtargetPredicate isn't set rather
|
||||
// than letting tablegen crash with an unhelpful error.
|
||||
def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
|
||||
|
||||
class PredicateControl {
|
||||
Predicate SubtargetPredicate = InvalidPred;
|
||||
Predicate SIAssemblerPredicate = isSICI;
|
||||
Predicate VIAssemblerPredicate = isVI;
|
||||
list<Predicate> AssemblerPredicates = [];
|
||||
Predicate AssemblerPredicate = TruePredicate;
|
||||
list<Predicate> OtherPredicates = [];
|
||||
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
|
||||
AssemblerPredicate],
|
||||
AssemblerPredicates,
|
||||
OtherPredicates);
|
||||
}
|
||||
|
||||
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
|
||||
PredicateControl;
|
||||
|
||||
|
||||
// Include AMDGPU TD files
|
||||
include "R600Schedule.td"
|
||||
include "R600Processors.td"
|
||||
include "SISchedule.td"
|
||||
include "GCNProcessors.td"
|
||||
include "AMDGPUInstrInfo.td"
|
||||
include "AMDGPUIntrinsics.td"
|
||||
include "SIIntrinsics.td"
|
||||
include "AMDGPURegisterInfo.td"
|
||||
include "AMDGPURegisterBanks.td"
|
||||
include "AMDGPUInstructions.td"
|
||||
include "SIInstrInfo.td"
|
||||
include "AMDGPUCallingConv.td"
|
||||
include "AMDGPUSearchableTables.td"
|
||||
|
|
|
@ -85,17 +85,6 @@ def RetCC_SI_Shader : CallingConv<[
|
|||
]>>
|
||||
]>;
|
||||
|
||||
// Calling convention for R600
|
||||
def CC_R600 : CallingConv<[
|
||||
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
|
||||
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
|
||||
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
|
||||
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
|
||||
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
|
||||
T30_XYZW, T31_XYZW, T32_XYZW
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCCustom<"allocateKernArg">
|
||||
|
@ -165,9 +154,5 @@ def CC_AMDGPU : CallingConv<[
|
|||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
|
||||
CCDelegateTo<CC_AMDGPU_Func>>,
|
||||
CCIf<"static_cast<const AMDGPUSubtarget&>"
|
||||
"(State.getMachineFunction().getSubtarget()).getGeneration() < "
|
||||
"AMDGPUSubtarget::SOUTHERN_ISLANDS",
|
||||
CCDelegateTo<CC_R600>>
|
||||
CCDelegateTo<CC_AMDGPU_Func>>
|
||||
]>;
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
//===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"FP64",
|
||||
"true",
|
||||
"Enable double precision operations"
|
||||
>;
|
||||
|
||||
def FeatureFMA : SubtargetFeature<"fmaf",
|
||||
"FMA",
|
||||
"true",
|
||||
"Enable single precision FMA (not as fast as mul+add, but fused)"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
|
||||
"localmemorysize"#Value,
|
||||
"LocalMemorySize",
|
||||
!cast<string>(Value),
|
||||
"The size of local memory in bytes"
|
||||
>;
|
||||
|
||||
def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
|
||||
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
|
||||
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
|
||||
|
||||
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
|
||||
"wavefrontsize"#Value,
|
||||
"WavefrontSize",
|
||||
!cast<string>(Value),
|
||||
"The number of threads per wavefront"
|
||||
>;
|
||||
|
||||
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
|
||||
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
|
||||
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
|
||||
|
||||
class SubtargetFeatureGeneration <string Value, string Subtarget,
|
||||
list<SubtargetFeature> Implies> :
|
||||
SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
|
||||
Value#" GPU generation", Implies>;
|
||||
|
||||
def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
|
||||
"DX10Clamp",
|
||||
"true",
|
||||
"clamp modifier clamps NaNs to 0.0"
|
||||
>;
|
||||
|
||||
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
|
||||
"EnablePromoteAlloca",
|
||||
"true",
|
||||
"Enable promote alloca pass"
|
||||
>;
|
||||
|
|
@ -104,15 +104,11 @@ private:
|
|||
bool isNoNanSrc(SDValue N) const;
|
||||
bool isInlineImmediate(const SDNode *N) const;
|
||||
|
||||
bool isConstantLoad(const MemSDNode *N, int cbID) const;
|
||||
bool isUniformBr(const SDNode *N) const;
|
||||
|
||||
SDNode *glueCopyToM0(SDNode *N) const;
|
||||
|
||||
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
|
||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
||||
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
|
||||
SDValue& Offset);
|
||||
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
||||
|
@ -227,9 +223,18 @@ protected:
|
|||
};
|
||||
|
||||
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
|
||||
const R600Subtarget *Subtarget;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
bool isConstantLoad(const MemSDNode *N, int cbID) const;
|
||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
||||
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
|
||||
SDValue& Offset);
|
||||
public:
|
||||
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
|
||||
AMDGPUDAGToDAGISel(TM, OptLevel) {}
|
||||
AMDGPUDAGToDAGISel(TM, OptLevel) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
||||
}
|
||||
|
||||
void Select(SDNode *N) override;
|
||||
|
||||
|
@ -237,6 +242,11 @@ public:
|
|||
SDValue &Offset) override;
|
||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset) override;
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
protected:
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "R600GenDAGISel.inc"
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -280,8 +290,7 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
|
|||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
|
||||
const SIInstrInfo *TII
|
||||
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
|
||||
const SIInstrInfo *TII = Subtarget->getInstrInfo();
|
||||
|
||||
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
|
||||
return TII->isInlineConstant(C->getAPIntValue());
|
||||
|
@ -637,16 +646,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
SelectCode(N);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
if (CbId == -1)
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
|
||||
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
|
||||
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
|
||||
const Instruction *Term = BB->getTerminator();
|
||||
|
@ -662,26 +661,6 @@ StringRef AMDGPUDAGToDAGISel::getPassName() const {
|
|||
// Complex Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
||||
SDValue& IntPtr) {
|
||||
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
|
||||
true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
|
||||
SDValue& BaseReg, SDValue &Offset) {
|
||||
if (!isa<ConstantSDNode>(Addr)) {
|
||||
BaseReg = Addr;
|
||||
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
return false;
|
||||
|
@ -693,11 +672,11 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|||
SDLoc DL(Addr);
|
||||
|
||||
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||
|
@ -2160,6 +2139,41 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
|||
} while (IsModified);
|
||||
}
|
||||
|
||||
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
||||
Subtarget = &MF.getSubtarget<R600Subtarget>();
|
||||
return SelectionDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
if (CbId == -1)
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
|
||||
|
||||
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
|
||||
}
|
||||
|
||||
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
||||
SDValue& IntPtr) {
|
||||
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
|
||||
true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
|
||||
SDValue& BaseReg, SDValue &Offset) {
|
||||
if (!isa<ConstantSDNode>(Addr)) {
|
||||
BaseReg = Addr;
|
||||
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void R600DAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned int Opc = N->getOpcode();
|
||||
if (N->isMachineOpcode()) {
|
||||
|
@ -2180,12 +2194,12 @@ void R600DAGToDAGISel::Select(SDNode *N) {
|
|||
// pass. We want to avoid 128 bits copies as much as possible because they
|
||||
// can't be bundled by our scheduler.
|
||||
switch(NumVectorElts) {
|
||||
case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
|
||||
case 2: RegClassID = R600::R600_Reg64RegClassID; break;
|
||||
case 4:
|
||||
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
|
||||
RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
|
||||
RegClassID = R600::R600_Reg128VerticalRegClassID;
|
||||
else
|
||||
RegClassID = AMDGPU::R600_Reg128RegClassID;
|
||||
RegClassID = R600::R600_Reg128RegClassID;
|
||||
break;
|
||||
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
|
||||
}
|
||||
|
@ -2203,11 +2217,11 @@ bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|||
SDLoc DL(Addr);
|
||||
|
||||
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||
|
@ -2238,7 +2252,7 @@ bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
|||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
||||
SDLoc(CurDAG->getEntryNode()),
|
||||
AMDGPU::ZERO, MVT::i32);
|
||||
R600::ZERO, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
|
||||
MVT::i32);
|
||||
return true;
|
||||
|
|
|
@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
|
|||
}
|
||||
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
const AMDGPUSubtarget &STI)
|
||||
const AMDGPUCommonSubtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
|
||||
// Lower floating point store/load to integer store/load to reduce the number
|
||||
|
@ -330,10 +330,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FLOG, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
|
||||
|
||||
if (Subtarget->has16BitInsts()) {
|
||||
setOperationAction(ISD::FLOG, MVT::f16, Custom);
|
||||
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
|
||||
|
@ -341,10 +337,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FREM, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FREM, MVT::f64, Custom);
|
||||
|
||||
// v_mad_f32 does not support denormals according to some sources.
|
||||
if (!Subtarget->hasFP32Denormals())
|
||||
setOperationAction(ISD::FMAD, MVT::f32, Legal);
|
||||
|
||||
// Expand to fneg + fadd.
|
||||
setOperationAction(ISD::FSUB, MVT::f64, Expand);
|
||||
|
||||
|
@ -359,19 +351,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
|
||||
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasBFI()) {
|
||||
// fcopysign can be done in a single instruction with BFI.
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
|
||||
|
@ -403,12 +382,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SUBE, VT, Legal);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasBCNT(32))
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
|
||||
|
||||
if (!Subtarget->hasBCNT(64))
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
|
||||
|
||||
// The hardware supports 32-bit ROTR, but not ROTL.
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ROTL, MVT::i64, Expand);
|
||||
|
@ -428,28 +401,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SMAX, MVT::i32, Legal);
|
||||
setOperationAction(ISD::UMAX, MVT::i32, Legal);
|
||||
|
||||
if (Subtarget->hasFFBH())
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
if (Subtarget->hasFFBL())
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
|
||||
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
|
||||
|
||||
// We only really have 32-bit BFE instructions (and 16-bit on VI).
|
||||
//
|
||||
// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
|
||||
// effort to match them now. We want this to be false for i64 cases when the
|
||||
// extraction isn't restricted to the upper or lower half. Ideally we would
|
||||
// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
|
||||
// span the midpoint are probably relatively rare, so don't worry about them
|
||||
// for now.
|
||||
if (Subtarget->hasBFE())
|
||||
setHasExtractBitsInsn(true);
|
||||
|
||||
static const MVT::SimpleValueType VectorIntTypes[] = {
|
||||
MVT::v2i32, MVT::v4i32
|
||||
};
|
||||
|
@ -554,11 +510,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
// vector compares until that is fixed.
|
||||
setHasMultipleConditionRegisters(true);
|
||||
|
||||
// SI at least has hardware support for floating point exceptions, but no way
|
||||
// of using or handling them is implemented. They are also optional in OpenCL
|
||||
// (Section 7.3)
|
||||
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
|
||||
|
||||
PredictableSelectIsExpensive = false;
|
||||
|
||||
// We want to find all load dependencies for long chains of stores to enable
|
||||
|
@ -781,7 +732,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
|
|||
{
|
||||
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
|
||||
if (L->getMemOperand()->getAddrSpace()
|
||||
== Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
|
||||
== AMDGPUASI.CONSTANT_ADDRESS_32BIT)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -4290,9 +4241,11 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
|
|||
switch (IID) {
|
||||
case Intrinsic::amdgcn_mbcnt_lo:
|
||||
case Intrinsic::amdgcn_mbcnt_hi: {
|
||||
const SISubtarget &ST =
|
||||
DAG.getMachineFunction().getSubtarget<SISubtarget>();
|
||||
// These return at most the wavefront size - 1.
|
||||
unsigned Size = Op.getValueType().getSizeInBits();
|
||||
Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2());
|
||||
Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -23,11 +23,13 @@
|
|||
namespace llvm {
|
||||
|
||||
class AMDGPUMachineFunction;
|
||||
class AMDGPUSubtarget;
|
||||
class AMDGPUCommonSubtarget;
|
||||
struct ArgDescriptor;
|
||||
|
||||
class AMDGPUTargetLowering : public TargetLowering {
|
||||
private:
|
||||
const AMDGPUCommonSubtarget *Subtarget;
|
||||
|
||||
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
|
||||
/// legalized from a smaller type VT. Need to match pre-legalized type because
|
||||
/// the generic legalization inserts the add/sub between the select and
|
||||
|
@ -39,7 +41,6 @@ public:
|
|||
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
|
||||
|
||||
protected:
|
||||
const AMDGPUSubtarget *Subtarget;
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -124,7 +125,7 @@ protected:
|
|||
void analyzeFormalArgumentsCompute(CCState &State,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
||||
public:
|
||||
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
|
||||
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
|
||||
|
||||
bool mayIgnoreSignedZero(SDValue Op) const {
|
||||
if (getTargetMachine().Options.NoSignedZerosFPMath)
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// Implementation of the TargetInstrInfo class that is common to all
|
||||
/// \brief Implementation of the TargetInstrInfo class that is common to all
|
||||
/// AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -23,107 +23,11 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
#define GET_D16ImageDimIntrinsics_IMPL
|
||||
#define GET_ImageDimIntrinsicTable_IMPL
|
||||
#define GET_RsrcIntrinsics_IMPL
|
||||
#include "AMDGPUGenSearchableTables.inc"
|
||||
}
|
||||
}
|
||||
|
||||
// Pin the vtable to this file.
|
||||
void AMDGPUInstrInfo::anchor() {}
|
||||
//void AMDGPUInstrInfo::anchor() {}
|
||||
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
|
||||
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
|
||||
ST(ST),
|
||||
AMDGPUASI(ST.getAMDGPUAS()) {}
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
|
||||
|
||||
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
|
||||
// the first 16 loads will be interleaved with the stores, and the next 16 will
|
||||
// be clustered as expected. It should really split into 2 16 store batches.
|
||||
//
|
||||
// Loads are clustered until this returns false, rather than trying to schedule
|
||||
// groups of stores. This also means we have to deal with saying different
|
||||
// address space loads should be clustered, and ones which might cause bank
|
||||
// conflicts.
|
||||
//
|
||||
// This might be deprecated so it might not be worth that much effort to fix.
|
||||
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
|
||||
int64_t Offset0, int64_t Offset1,
|
||||
unsigned NumLoads) const {
|
||||
assert(Offset1 > Offset0 &&
|
||||
"Second offset should be larger than first offset!");
|
||||
// If we have less than 16 loads in a row, and the offsets are within 64
|
||||
// bytes, then schedule together.
|
||||
|
||||
// A cacheline is 64 bytes (for global memory).
|
||||
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
|
||||
}
|
||||
|
||||
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
|
||||
enum SIEncodingFamily {
|
||||
SI = 0,
|
||||
VI = 1,
|
||||
SDWA = 2,
|
||||
SDWA9 = 3,
|
||||
GFX80 = 4,
|
||||
GFX9 = 5
|
||||
};
|
||||
|
||||
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
|
||||
switch (ST.getGeneration()) {
|
||||
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
return SIEncodingFamily::SI;
|
||||
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
|
||||
case AMDGPUSubtarget::GFX9:
|
||||
return SIEncodingFamily::VI;
|
||||
|
||||
// FIXME: This should never be called for r600 GPUs.
|
||||
case AMDGPUSubtarget::R600:
|
||||
case AMDGPUSubtarget::R700:
|
||||
case AMDGPUSubtarget::EVERGREEN:
|
||||
case AMDGPUSubtarget::NORTHERN_ISLANDS:
|
||||
return SIEncodingFamily::SI;
|
||||
}
|
||||
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
}
|
||||
|
||||
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
|
||||
|
||||
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
|
||||
ST.getGeneration() >= AMDGPUSubtarget::GFX9)
|
||||
Gen = SIEncodingFamily::GFX9;
|
||||
|
||||
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
|
||||
Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
||||
: SIEncodingFamily::SDWA;
|
||||
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
|
||||
// subtarget has UnpackedD16VMem feature.
|
||||
// TODO: remove this when we discard GFX80 encoding.
|
||||
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
|
||||
Gen = SIEncodingFamily::GFX80;
|
||||
|
||||
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
|
||||
|
||||
// -1 means that Opcode is already a native instruction.
|
||||
if (MCOp == -1)
|
||||
return Opcode;
|
||||
|
||||
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
|
||||
// no encoding in the given subtarget generation.
|
||||
if (MCOp == (uint16_t)-1)
|
||||
return -1;
|
||||
|
||||
return MCOp;
|
||||
}
|
||||
|
||||
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
|
||||
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
|
||||
|
|
|
@ -20,10 +20,6 @@
|
|||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
#undef GET_INSTRINFO_HEADER
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
|
@ -31,26 +27,10 @@ class MachineFunction;
|
|||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
|
||||
private:
|
||||
const AMDGPUSubtarget &ST;
|
||||
|
||||
virtual void anchor();
|
||||
protected:
|
||||
AMDGPUAS AMDGPUASI;
|
||||
|
||||
class AMDGPUInstrInfo {
|
||||
public:
|
||||
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
|
||||
|
||||
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const override;
|
||||
|
||||
/// Return a target-specific opcode if Opcode is a pseudo instruction.
|
||||
/// Return -1 if the target-specific opcode for the pseudo instruction does
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
static bool isUniformMMO(const MachineMemOperand *MMO);
|
||||
};
|
||||
|
||||
|
|
|
@ -42,6 +42,47 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
|
|||
field bits<32> Inst = 0xffffffff;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Return instruction
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
let Pattern = pattern;
|
||||
let AsmString = !strconcat(asmstr, "\n");
|
||||
let isPseudo = 1;
|
||||
let Itinerary = NullALU;
|
||||
bit hasIEEEFlag = 0;
|
||||
bit hasZeroOpFlag = 0;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let isCodeGenOnly = 1;
|
||||
}
|
||||
|
||||
def TruePredicate : Predicate<"true">;
|
||||
|
||||
// Exists to help track down where SubtargetPredicate isn't set rather
|
||||
// than letting tablegen crash with an unhelpful error.
|
||||
def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
|
||||
|
||||
class PredicateControl {
|
||||
Predicate SubtargetPredicate = InvalidPred;
|
||||
list<Predicate> AssemblerPredicates = [];
|
||||
Predicate AssemblerPredicate = TruePredicate;
|
||||
list<Predicate> OtherPredicates = [];
|
||||
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
|
||||
AssemblerPredicate],
|
||||
AssemblerPredicates,
|
||||
OtherPredicates);
|
||||
}
|
||||
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
|
||||
PredicateControl;
|
||||
|
||||
def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
|
||||
def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
|
||||
def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
|
||||
|
@ -94,12 +135,6 @@ def brtarget : Operand<OtherVT>;
|
|||
// Misc. PatFrags
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0),
|
||||
(op $src0),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(op $src0, $src1),
|
||||
|
@ -112,8 +147,6 @@ class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
|
|||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
|
||||
def trunc_oneuse : HasOneUseUnaryOp<trunc>;
|
||||
|
||||
let Properties = [SDNPCommutative, SDNPAssociative] in {
|
||||
def smax_oneuse : HasOneUseBinOp<smax>;
|
||||
def smin_oneuse : HasOneUseBinOp<smin>;
|
||||
|
@ -240,6 +273,37 @@ def COND_NULL : PatLeaf <
|
|||
[{(void)N; return false;}]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PatLeafs for Texture Constants
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def TEX_ARRAY : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 9 || TType == 10 || TType == 16;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_RECT : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 5;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_SHADOW : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return (TType >= 6 && TType <= 8) || TType == 13;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_SHADOW_ARRAY : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 11 || TType == 12 || TType == 17;
|
||||
}]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load/Store Pattern Fragments
|
||||
|
@ -769,11 +833,3 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
|
|||
(AMDGPUrcp (fsqrt vt:$src)),
|
||||
(RsqInst $src)
|
||||
>;
|
||||
|
||||
include "R600Instructions.td"
|
||||
include "R700Instructions.td"
|
||||
include "EvergreenInstructions.td"
|
||||
include "CaymanInstructions.td"
|
||||
|
||||
include "SIInstrInfo.td"
|
||||
|
||||
|
|
|
@ -14,5 +14,3 @@
|
|||
let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
||||
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
|
||||
}
|
||||
|
||||
include "SIIntrinsics.td"
|
||||
|
|
|
@ -117,7 +117,6 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
|
|||
return false;
|
||||
|
||||
const TargetMachine &TM = TPC->getTM<TargetMachine>();
|
||||
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);
|
||||
bool Changed = false;
|
||||
|
||||
for (auto *U : F.users()) {
|
||||
|
@ -125,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
|
|||
if (!CI)
|
||||
continue;
|
||||
|
||||
Changed |= ST.makeLIDRangeMetadata(CI);
|
||||
Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
|
|
@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
|||
IsAMDGCN = TT.getArch() == Triple::amdgcn;
|
||||
IsAMDHSA = TT.getOS() == Triple::AMDHSA;
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
if (!ST.isPromoteAllocaEnabled())
|
||||
return false;
|
||||
|
||||
|
@ -174,8 +174,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
|
|||
|
||||
std::pair<Value *, Value *>
|
||||
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
|
||||
*Builder.GetInsertBlock()->getParent());
|
||||
const Function &F = *Builder.GetInsertBlock()->getParent();
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
|
||||
if (!IsAMDHSA) {
|
||||
Function *LocalSizeYFn
|
||||
|
@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
|
|||
}
|
||||
|
||||
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
|
||||
*Builder.GetInsertBlock()->getParent());
|
||||
const AMDGPUCommonSubtarget &ST =
|
||||
AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
|
||||
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
|
||||
|
||||
switch (N) {
|
||||
|
@ -602,7 +602,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
|
|||
bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
|
||||
|
||||
FunctionType *FTy = F.getFunctionType();
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
|
||||
|
||||
// If the function has any arguments in the local address space, then it's
|
||||
// possible these arguments require the entire local memory space, so
|
||||
|
@ -729,8 +729,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
|
|||
if (!SufficientLDS)
|
||||
return false;
|
||||
|
||||
const AMDGPUSubtarget &ST =
|
||||
TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
|
||||
const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
|
||||
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
|
||||
|
||||
const DataLayout &DL = Mod->getDataLayout();
|
||||
|
|
|
@ -19,5 +19,4 @@ foreach Index = 0-15 in {
|
|||
|
||||
}
|
||||
|
||||
include "R600RegisterInfo.td"
|
||||
include "SIRegisterInfo.td"
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/CodeGen/TargetFrameLowering.h"
|
||||
#include <algorithm>
|
||||
|
@ -34,9 +35,32 @@ using namespace llvm;
|
|||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "R600GenSubtargetInfo.inc"
|
||||
|
||||
AMDGPUSubtarget::~AMDGPUSubtarget() = default;
|
||||
|
||||
R600Subtarget &
|
||||
R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS) {
|
||||
SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
|
||||
FullFS += FS;
|
||||
ParseSubtargetFeatures(GPU, FullFS);
|
||||
|
||||
// FIXME: I don't think think Evergreen has any useful support for
|
||||
// denormals, but should be checked. Should we issue a warning somewhere
|
||||
// if someone tries to enable these?
|
||||
if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
|
||||
FP32Denormals = false;
|
||||
}
|
||||
|
||||
HasMulU24 = getGeneration() >= EVERGREEN;
|
||||
HasMulI24 = hasCaymanISA();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
AMDGPUSubtarget &
|
||||
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS) {
|
||||
|
@ -93,26 +117,44 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
|||
HasMovrel = true;
|
||||
}
|
||||
|
||||
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
|
||||
const FeatureBitset &FeatureBits) :
|
||||
TargetTriple(TT),
|
||||
SubtargetFeatureBits(FeatureBits),
|
||||
Has16BitInsts(false),
|
||||
HasMadMixInsts(false),
|
||||
FP32Denormals(false),
|
||||
FPExceptions(false),
|
||||
HasSDWA(false),
|
||||
HasVOP3PInsts(false),
|
||||
HasMulI24(true),
|
||||
HasMulU24(true),
|
||||
HasFminFmaxLegacy(true),
|
||||
EnablePromoteAlloca(false),
|
||||
LocalMemorySize(0),
|
||||
WavefrontSize(0)
|
||||
{ }
|
||||
|
||||
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const TargetMachine &TM)
|
||||
: AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
||||
const TargetMachine &TM) :
|
||||
AMDGPUGenSubtargetInfo(TT, GPU, FS),
|
||||
AMDGPUCommonSubtarget(TT, getFeatureBits()),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
TargetTriple(TT),
|
||||
Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
|
||||
Gen(SOUTHERN_ISLANDS),
|
||||
IsaVersion(ISAVersion0_0_0),
|
||||
WavefrontSize(0),
|
||||
LocalMemorySize(0),
|
||||
LDSBankCount(0),
|
||||
MaxPrivateElementSize(0),
|
||||
|
||||
FastFMAF32(false),
|
||||
HalfRate64Ops(false),
|
||||
|
||||
FP32Denormals(false),
|
||||
FP64FP16Denormals(false),
|
||||
FPExceptions(false),
|
||||
DX10Clamp(false),
|
||||
FlatForGlobal(false),
|
||||
AutoWaitcntBeforeBarrier(false),
|
||||
|
@ -128,7 +170,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
|
||||
EnableHugePrivateBuffer(false),
|
||||
EnableVGPRSpilling(false),
|
||||
EnablePromoteAlloca(false),
|
||||
EnableLoadStoreOpt(false),
|
||||
EnableUnsafeDSOffsetFolding(false),
|
||||
EnableSIScheduler(false),
|
||||
|
@ -136,25 +177,18 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
DumpCode(false),
|
||||
|
||||
FP64(false),
|
||||
FMA(false),
|
||||
MIMG_R128(false),
|
||||
IsGCN(false),
|
||||
GCN3Encoding(false),
|
||||
CIInsts(false),
|
||||
GFX9Insts(false),
|
||||
SGPRInitBug(false),
|
||||
HasSMemRealTime(false),
|
||||
Has16BitInsts(false),
|
||||
HasIntClamp(false),
|
||||
HasVOP3PInsts(false),
|
||||
HasMadMixInsts(false),
|
||||
HasFmaMixInsts(false),
|
||||
HasMovrel(false),
|
||||
HasVGPRIndexMode(false),
|
||||
HasScalarStores(false),
|
||||
HasScalarAtomics(false),
|
||||
HasInv2PiInlineImm(false),
|
||||
HasSDWA(false),
|
||||
HasSDWAOmod(false),
|
||||
HasSDWAScalar(false),
|
||||
HasSDWASdst(false),
|
||||
|
@ -170,20 +204,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
AddNoCarryInsts(false),
|
||||
HasUnpackedD16VMem(false),
|
||||
|
||||
R600ALUInst(false),
|
||||
CaymanISA(false),
|
||||
CFALUBug(false),
|
||||
HasVertexCache(false),
|
||||
TexVTXClauseSize(0),
|
||||
ScalarizeGlobal(false),
|
||||
|
||||
FeatureDisable(false),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)) {
|
||||
FeatureDisable(false) {
|
||||
AS = AMDGPU::getAMDGPUAS(TT);
|
||||
initializeSubtargetDependencies(TT, GPU, FS);
|
||||
}
|
||||
|
||||
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
||||
unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
||||
const Function &F) const {
|
||||
if (NWaves == 1)
|
||||
return getLocalMemorySize();
|
||||
|
@ -193,7 +221,7 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
|
|||
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
|
||||
}
|
||||
|
||||
unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
||||
unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
||||
const Function &F) const {
|
||||
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
|
||||
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
|
||||
|
@ -206,13 +234,13 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
|
|||
}
|
||||
|
||||
unsigned
|
||||
AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
|
||||
AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
|
||||
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
||||
AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
||||
switch (CC) {
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
|
@ -230,7 +258,7 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
|
|||
}
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
|
||||
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
|
||||
const Function &F) const {
|
||||
// FIXME: 1024 if function.
|
||||
// Default minimum/maximum flat work group sizes.
|
||||
|
@ -260,7 +288,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
|
|||
return Requested;
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
|
||||
std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
|
||||
const Function &F) const {
|
||||
// Default minimum/maximum number of waves per execution unit.
|
||||
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
|
||||
|
@ -308,7 +336,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
|
|||
return Requested;
|
||||
}
|
||||
|
||||
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
||||
bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
||||
Function *Kernel = I->getParent()->getParent();
|
||||
unsigned MinSize = 0;
|
||||
unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
|
||||
|
@ -372,10 +400,22 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
|
|||
|
||||
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const TargetMachine &TM) :
|
||||
AMDGPUSubtarget(TT, GPU, FS, TM),
|
||||
R600GenSubtargetInfo(TT, GPU, FS),
|
||||
AMDGPUCommonSubtarget(TT, getFeatureBits()),
|
||||
InstrInfo(*this),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
|
||||
TLInfo(TM, *this) {}
|
||||
FMA(false),
|
||||
CaymanISA(false),
|
||||
CFALUBug(false),
|
||||
DX10Clamp(false),
|
||||
HasVertexCache(false),
|
||||
R600ALUInst(false),
|
||||
FP64(false),
|
||||
TexVTXClauseSize(0),
|
||||
Gen(R600),
|
||||
TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
|
||||
InstrItins(getInstrItineraryForCPU(GPU)),
|
||||
AS (AMDGPU::getAMDGPUAS(TT)) { }
|
||||
|
||||
SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
const GCNTargetMachine &TM)
|
||||
|
@ -619,3 +659,17 @@ void SISubtarget::getPostRAMutations(
|
|||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
|
||||
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
|
||||
}
|
||||
|
||||
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
|
||||
if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>());
|
||||
else
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>());
|
||||
}
|
||||
|
||||
const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
|
||||
if (TM.getTargetTriple().getArch() == Triple::amdgcn)
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F));
|
||||
else
|
||||
return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
|
||||
}
|
||||
|
|
|
@ -39,22 +39,181 @@
|
|||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "R600GenSubtargetInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class StringRef;
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
|
||||
class AMDGPUCommonSubtarget {
|
||||
private:
|
||||
Triple TargetTriple;
|
||||
|
||||
protected:
|
||||
const FeatureBitset &SubtargetFeatureBits;
|
||||
bool Has16BitInsts;
|
||||
bool HasMadMixInsts;
|
||||
bool FP32Denormals;
|
||||
bool FPExceptions;
|
||||
bool HasSDWA;
|
||||
bool HasVOP3PInsts;
|
||||
bool HasMulI24;
|
||||
bool HasMulU24;
|
||||
bool HasFminFmaxLegacy;
|
||||
bool EnablePromoteAlloca;
|
||||
int LocalMemorySize;
|
||||
unsigned WavefrontSize;
|
||||
|
||||
public:
|
||||
AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
|
||||
|
||||
static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
|
||||
static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
|
||||
const Function &F);
|
||||
|
||||
/// \returns Default range flat work group size for a calling convention.
|
||||
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
|
||||
|
||||
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
|
||||
/// for function \p F, or minimum/maximum flat work group sizes explicitly
|
||||
/// requested using "amdgpu-flat-work-group-size" attribute attached to
|
||||
/// function \p F.
|
||||
///
|
||||
/// \returns Subtarget's default values if explicitly requested values cannot
|
||||
/// be converted to integer, or violate subtarget's specifications.
|
||||
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
|
||||
|
||||
/// \returns Subtarget's default pair of minimum/maximum number of waves per
|
||||
/// execution unit for function \p F, or minimum/maximum number of waves per
|
||||
/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
|
||||
/// attached to function \p F.
|
||||
///
|
||||
/// \returns Subtarget's default values if explicitly requested values cannot
|
||||
/// be converted to integer, violate subtarget's specifications, or are not
|
||||
/// compatible with minimum/maximum number of waves limited by flat work group
|
||||
/// size, register usage, and/or lds usage.
|
||||
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
|
||||
|
||||
/// Return the amount of LDS that can be used that will not restrict the
|
||||
/// occupancy lower than WaveCount.
|
||||
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
|
||||
const Function &) const;
|
||||
|
||||
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
|
||||
/// the given LDS memory size is the only constraint.
|
||||
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
|
||||
|
||||
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
|
||||
|
||||
bool isAmdHsaOS() const {
|
||||
return TargetTriple.getOS() == Triple::AMDHSA;
|
||||
}
|
||||
|
||||
bool isAmdPalOS() const {
|
||||
return TargetTriple.getOS() == Triple::AMDPAL;
|
||||
}
|
||||
|
||||
bool has16BitInsts() const {
|
||||
return Has16BitInsts;
|
||||
}
|
||||
|
||||
bool hasMadMixInsts() const {
|
||||
return HasMadMixInsts;
|
||||
}
|
||||
|
||||
bool hasFP32Denormals() const {
|
||||
return FP32Denormals;
|
||||
}
|
||||
|
||||
bool hasFPExceptions() const {
|
||||
return FPExceptions;
|
||||
}
|
||||
|
||||
bool hasSDWA() const {
|
||||
return HasSDWA;
|
||||
}
|
||||
|
||||
bool hasVOP3PInsts() const {
|
||||
return HasVOP3PInsts;
|
||||
}
|
||||
|
||||
bool hasMulI24() const {
|
||||
return HasMulI24;
|
||||
}
|
||||
|
||||
bool hasMulU24() const {
|
||||
return HasMulU24;
|
||||
}
|
||||
|
||||
bool hasFminFmaxLegacy() const {
|
||||
return HasFminFmaxLegacy;
|
||||
}
|
||||
|
||||
bool isPromoteAllocaEnabled() const {
|
||||
return EnablePromoteAlloca;
|
||||
}
|
||||
|
||||
unsigned getWavefrontSize() const {
|
||||
return WavefrontSize;
|
||||
}
|
||||
|
||||
int getLocalMemorySize() const {
|
||||
return LocalMemorySize;
|
||||
}
|
||||
|
||||
unsigned getAlignmentForImplicitArgPtr() const {
|
||||
return isAmdHsaOS() ? 8 : 4;
|
||||
}
|
||||
|
||||
/// \returns Maximum number of work groups per compute unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
|
||||
FlatWorkGroupSize);
|
||||
}
|
||||
|
||||
/// \returns Minimum flat work group size supported by the subtarget.
|
||||
unsigned getMinFlatWorkGroupSize() const {
|
||||
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
|
||||
}
|
||||
|
||||
/// \returns Maximum flat work group size supported by the subtarget.
|
||||
unsigned getMaxFlatWorkGroupSize() const {
|
||||
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
|
||||
}
|
||||
|
||||
/// \returns Maximum number of waves per execution unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
|
||||
FlatWorkGroupSize);
|
||||
}
|
||||
|
||||
/// \returns Minimum number of waves per execution unit supported by the
|
||||
/// subtarget.
|
||||
unsigned getMinWavesPerEU() const {
|
||||
return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
|
||||
}
|
||||
|
||||
unsigned getMaxWavesPerEU() const { return 10; }
|
||||
|
||||
/// Creates value range metadata on an workitemid.* inrinsic call or load.
|
||||
bool makeLIDRangeMetadata(Instruction *I) const;
|
||||
|
||||
virtual ~AMDGPUCommonSubtarget() {}
|
||||
};
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
|
||||
public AMDGPUCommonSubtarget {
|
||||
public:
|
||||
enum Generation {
|
||||
R600 = 0,
|
||||
R700,
|
||||
EVERGREEN,
|
||||
NORTHERN_ISLANDS,
|
||||
SOUTHERN_ISLANDS,
|
||||
SEA_ISLANDS,
|
||||
VOLCANIC_ISLANDS,
|
||||
GFX9,
|
||||
// Gap for R600 generations, so we can do comparisons between
|
||||
// AMDGPUSubtarget and r600Subtarget.
|
||||
SOUTHERN_ISLANDS = 4,
|
||||
SEA_ISLANDS = 5,
|
||||
VOLCANIC_ISLANDS = 6,
|
||||
GFX9 = 7,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -96,13 +255,20 @@ public:
|
|||
LLVMTrapHandlerRegValue = 1
|
||||
};
|
||||
|
||||
private:
|
||||
SIFrameLowering FrameLowering;
|
||||
|
||||
/// GlobalISel related APIs.
|
||||
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
|
||||
std::unique_ptr<InstructionSelector> InstSelector;
|
||||
std::unique_ptr<LegalizerInfo> Legalizer;
|
||||
std::unique_ptr<RegisterBankInfo> RegBankInfo;
|
||||
|
||||
protected:
|
||||
// Basic subtarget description.
|
||||
Triple TargetTriple;
|
||||
Generation Gen;
|
||||
unsigned Gen;
|
||||
unsigned IsaVersion;
|
||||
unsigned WavefrontSize;
|
||||
int LocalMemorySize;
|
||||
int LDSBankCount;
|
||||
unsigned MaxPrivateElementSize;
|
||||
|
||||
|
@ -111,9 +277,7 @@ protected:
|
|||
bool HalfRate64Ops;
|
||||
|
||||
// Dynamially set bits that enable features.
|
||||
bool FP32Denormals;
|
||||
bool FP64FP16Denormals;
|
||||
bool FPExceptions;
|
||||
bool DX10Clamp;
|
||||
bool FlatForGlobal;
|
||||
bool AutoWaitcntBeforeBarrier;
|
||||
|
@ -129,7 +293,6 @@ protected:
|
|||
// Used as options.
|
||||
bool EnableHugePrivateBuffer;
|
||||
bool EnableVGPRSpilling;
|
||||
bool EnablePromoteAlloca;
|
||||
bool EnableLoadStoreOpt;
|
||||
bool EnableUnsafeDSOffsetFolding;
|
||||
bool EnableSIScheduler;
|
||||
|
@ -146,17 +309,13 @@ protected:
|
|||
bool GFX9Insts;
|
||||
bool SGPRInitBug;
|
||||
bool HasSMemRealTime;
|
||||
bool Has16BitInsts;
|
||||
bool HasIntClamp;
|
||||
bool HasVOP3PInsts;
|
||||
bool HasMadMixInsts;
|
||||
bool HasFmaMixInsts;
|
||||
bool HasMovrel;
|
||||
bool HasVGPRIndexMode;
|
||||
bool HasScalarStores;
|
||||
bool HasScalarAtomics;
|
||||
bool HasInv2PiInlineImm;
|
||||
bool HasSDWA;
|
||||
bool HasSDWAOmod;
|
||||
bool HasSDWAScalar;
|
||||
bool HasSDWASdst;
|
||||
|
@ -181,7 +340,6 @@ protected:
|
|||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable;
|
||||
|
||||
InstrItineraryData InstrItins;
|
||||
SelectionDAGTargetInfo TSInfo;
|
||||
AMDGPUAS AS;
|
||||
|
||||
|
@ -193,13 +351,30 @@ public:
|
|||
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS);
|
||||
|
||||
const AMDGPUInstrInfo *getInstrInfo() const override = 0;
|
||||
const AMDGPUFrameLowering *getFrameLowering() const override = 0;
|
||||
const AMDGPUTargetLowering *getTargetLowering() const override = 0;
|
||||
const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
|
||||
virtual const SIInstrInfo *getInstrInfo() const override = 0;
|
||||
|
||||
const InstrItineraryData *getInstrItineraryData() const override {
|
||||
return &InstrItins;
|
||||
const SIFrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
virtual const SITargetLowering *getTargetLowering() const override = 0;
|
||||
|
||||
virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
|
||||
|
||||
const CallLowering *getCallLowering() const override {
|
||||
return CallLoweringInfo.get();
|
||||
}
|
||||
|
||||
const InstructionSelector *getInstructionSelector() const override {
|
||||
return InstSelector.get();
|
||||
}
|
||||
|
||||
const LegalizerInfo *getLegalizerInfo() const override {
|
||||
return Legalizer.get();
|
||||
}
|
||||
|
||||
const RegisterBankInfo *getRegBankInfo() const override {
|
||||
return RegBankInfo.get();
|
||||
}
|
||||
|
||||
// Nothing implemented, just prevent crashes on use.
|
||||
|
@ -209,34 +384,18 @@ public:
|
|||
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
|
||||
bool isAmdHsaOS() const {
|
||||
return TargetTriple.getOS() == Triple::AMDHSA;
|
||||
}
|
||||
|
||||
bool isMesa3DOS() const {
|
||||
return TargetTriple.getOS() == Triple::Mesa3D;
|
||||
}
|
||||
|
||||
bool isAmdPalOS() const {
|
||||
return TargetTriple.getOS() == Triple::AMDPAL;
|
||||
}
|
||||
|
||||
Generation getGeneration() const {
|
||||
return Gen;
|
||||
}
|
||||
|
||||
unsigned getWavefrontSize() const {
|
||||
return WavefrontSize;
|
||||
return (Generation)Gen;
|
||||
}
|
||||
|
||||
unsigned getWavefrontSizeLog2() const {
|
||||
return Log2_32(WavefrontSize);
|
||||
}
|
||||
|
||||
int getLocalMemorySize() const {
|
||||
return LocalMemorySize;
|
||||
}
|
||||
|
||||
int getLDSBankCount() const {
|
||||
return LDSBankCount;
|
||||
}
|
||||
|
@ -249,18 +408,10 @@ public:
|
|||
return AS;
|
||||
}
|
||||
|
||||
bool has16BitInsts() const {
|
||||
return Has16BitInsts;
|
||||
}
|
||||
|
||||
bool hasIntClamp() const {
|
||||
return HasIntClamp;
|
||||
}
|
||||
|
||||
bool hasVOP3PInsts() const {
|
||||
return HasVOP3PInsts;
|
||||
}
|
||||
|
||||
bool hasFP64() const {
|
||||
return FP64;
|
||||
}
|
||||
|
@ -269,6 +420,10 @@ public:
|
|||
return MIMG_R128;
|
||||
}
|
||||
|
||||
bool hasHWFP64() const {
|
||||
return FP64;
|
||||
}
|
||||
|
||||
bool hasFastFMAF32() const {
|
||||
return FastFMAF32;
|
||||
}
|
||||
|
@ -278,15 +433,15 @@ public:
|
|||
}
|
||||
|
||||
bool hasAddr64() const {
|
||||
return (getGeneration() < VOLCANIC_ISLANDS);
|
||||
return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
|
||||
}
|
||||
|
||||
bool hasBFE() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasBFI() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasBFM() const {
|
||||
|
@ -294,42 +449,23 @@ public:
|
|||
}
|
||||
|
||||
bool hasBCNT(unsigned Size) const {
|
||||
if (Size == 32)
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
|
||||
if (Size == 64)
|
||||
return (getGeneration() >= SOUTHERN_ISLANDS);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasMulU24() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasMulI24() const {
|
||||
return (getGeneration() >= SOUTHERN_ISLANDS ||
|
||||
hasCaymanISA());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasFFBL() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasFFBH() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasMed3_16() const {
|
||||
return getGeneration() >= GFX9;
|
||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasMin3Max3_16() const {
|
||||
return getGeneration() >= GFX9;
|
||||
}
|
||||
|
||||
bool hasMadMixInsts() const {
|
||||
return HasMadMixInsts;
|
||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasFmaMixInsts() const {
|
||||
|
@ -337,15 +473,7 @@ public:
|
|||
}
|
||||
|
||||
bool hasCARRY() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasBORROW() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasCaymanISA() const {
|
||||
return CaymanISA;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool hasFMA() const {
|
||||
|
@ -360,10 +488,6 @@ public:
|
|||
return EnableHugePrivateBuffer;
|
||||
}
|
||||
|
||||
bool isPromoteAllocaEnabled() const {
|
||||
return EnablePromoteAlloca;
|
||||
}
|
||||
|
||||
bool unsafeDSOffsetFoldingEnabled() const {
|
||||
return EnableUnsafeDSOffsetFolding;
|
||||
}
|
||||
|
@ -377,20 +501,10 @@ public:
|
|||
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
|
||||
const Function &) const;
|
||||
|
||||
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
|
||||
/// the given LDS memory size is the only constraint.
|
||||
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
|
||||
|
||||
unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
|
||||
|
||||
bool hasFP16Denormals() const {
|
||||
return FP64FP16Denormals;
|
||||
}
|
||||
|
||||
bool hasFP32Denormals() const {
|
||||
return FP32Denormals;
|
||||
}
|
||||
|
||||
bool hasFP64Denormals() const {
|
||||
return FP64FP16Denormals;
|
||||
}
|
||||
|
@ -399,10 +513,6 @@ public:
|
|||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasFPExceptions() const {
|
||||
return FPExceptions;
|
||||
}
|
||||
|
||||
bool enableDX10Clamp() const {
|
||||
return DX10Clamp;
|
||||
}
|
||||
|
@ -444,7 +554,7 @@ public:
|
|||
}
|
||||
|
||||
bool hasApertureRegs() const {
|
||||
return HasApertureRegs;
|
||||
return HasApertureRegs;
|
||||
}
|
||||
|
||||
bool isTrapHandlerEnabled() const {
|
||||
|
@ -510,14 +620,6 @@ public:
|
|||
return getGeneration() >= SEA_ISLANDS;
|
||||
}
|
||||
|
||||
bool hasFminFmaxLegacy() const {
|
||||
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
|
||||
}
|
||||
|
||||
bool hasSDWA() const {
|
||||
return HasSDWA;
|
||||
}
|
||||
|
||||
bool hasSDWAOmod() const {
|
||||
return HasSDWAOmod;
|
||||
}
|
||||
|
@ -556,10 +658,6 @@ public:
|
|||
return isAmdCodeObjectV2(F) ? 0 : 36;
|
||||
}
|
||||
|
||||
unsigned getAlignmentForImplicitArgPtr() const {
|
||||
return isAmdHsaOS() ? 8 : 4;
|
||||
}
|
||||
|
||||
/// \returns Number of bytes of arguments that are passed to a shader or
|
||||
/// kernel in addition to the explicit ones declared for the function.
|
||||
unsigned getImplicitArgNumBytes(const Function &F) const {
|
||||
|
@ -588,134 +686,39 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
|
||||
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
|
||||
void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
|
||||
bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
|
||||
|
||||
/// \returns Number of execution units per compute unit supported by the
|
||||
/// subtarget.
|
||||
unsigned getEUsPerCU() const {
|
||||
return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum number of work groups per compute unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
|
||||
FlatWorkGroupSize);
|
||||
return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum number of waves per compute unit supported by the
|
||||
/// subtarget without any kind of limitation.
|
||||
unsigned getMaxWavesPerCU() const {
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum number of waves per compute unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
|
||||
FlatWorkGroupSize);
|
||||
}
|
||||
|
||||
/// \returns Minimum number of waves per execution unit supported by the
|
||||
/// subtarget.
|
||||
unsigned getMinWavesPerEU() const {
|
||||
return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum number of waves per execution unit supported by the
|
||||
/// subtarget without any kind of limitation.
|
||||
unsigned getMaxWavesPerEU() const {
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum number of waves per execution unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
|
||||
FlatWorkGroupSize);
|
||||
}
|
||||
|
||||
/// \returns Minimum flat work group size supported by the subtarget.
|
||||
unsigned getMinFlatWorkGroupSize() const {
|
||||
return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Maximum flat work group size supported by the subtarget.
|
||||
unsigned getMaxFlatWorkGroupSize() const {
|
||||
return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getMaxWavesPerEU();
|
||||
}
|
||||
|
||||
/// \returns Number of waves per work group supported by the subtarget and
|
||||
/// limited by given \p FlatWorkGroupSize.
|
||||
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
|
||||
return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
|
||||
FlatWorkGroupSize);
|
||||
}
|
||||
|
||||
/// \returns Default range flat work group size for a calling convention.
|
||||
std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
|
||||
|
||||
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
|
||||
/// for function \p F, or minimum/maximum flat work group sizes explicitly
|
||||
/// requested using "amdgpu-flat-work-group-size" attribute attached to
|
||||
/// function \p F.
|
||||
///
|
||||
/// \returns Subtarget's default values if explicitly requested values cannot
|
||||
/// be converted to integer, or violate subtarget's specifications.
|
||||
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
|
||||
|
||||
/// \returns Subtarget's default pair of minimum/maximum number of waves per
|
||||
/// execution unit for function \p F, or minimum/maximum number of waves per
|
||||
/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
|
||||
/// attached to function \p F.
|
||||
///
|
||||
/// \returns Subtarget's default values if explicitly requested values cannot
|
||||
/// be converted to integer, violate subtarget's specifications, or are not
|
||||
/// compatible with minimum/maximum number of waves limited by flat work group
|
||||
/// size, register usage, and/or lds usage.
|
||||
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
|
||||
|
||||
/// Creates value range metadata on an workitemid.* inrinsic call or load.
|
||||
bool makeLIDRangeMetadata(Instruction *I) const;
|
||||
};
|
||||
|
||||
class R600Subtarget final : public AMDGPUSubtarget {
|
||||
private:
|
||||
R600InstrInfo InstrInfo;
|
||||
R600FrameLowering FrameLowering;
|
||||
R600TargetLowering TLInfo;
|
||||
|
||||
public:
|
||||
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
const TargetMachine &TM);
|
||||
|
||||
const R600InstrInfo *getInstrInfo() const override {
|
||||
return &InstrInfo;
|
||||
}
|
||||
|
||||
const R600FrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
const R600TargetLowering *getTargetLowering() const override {
|
||||
return &TLInfo;
|
||||
}
|
||||
|
||||
const R600RegisterInfo *getRegisterInfo() const override {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
|
||||
bool hasCFAluBug() const {
|
||||
return CFALUBug;
|
||||
}
|
||||
|
||||
bool hasVertexCache() const {
|
||||
return HasVertexCache;
|
||||
}
|
||||
|
||||
short getTexVTXClauseSize() const {
|
||||
return TexVTXClauseSize;
|
||||
return AMDGPU::IsaInfo::getWavesPerWorkGroup(
|
||||
MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -766,6 +769,8 @@ public:
|
|||
const SIRegisterInfo *getRegisterInfo() const override {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
// static wrappers
|
||||
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
|
||||
|
||||
// XXX - Why is this here if it isn't in the default pass set?
|
||||
bool enableEarlyIfConversion() const override {
|
||||
|
@ -775,7 +780,7 @@ public:
|
|||
void overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
unsigned NumRegionInstrs) const override;
|
||||
|
||||
bool isVGPRSpillingEnabled(const Function& F) const;
|
||||
bool isVGPRSpillingEnabled(const Function &F) const;
|
||||
|
||||
unsigned getMaxNumUserSGPRs() const {
|
||||
return 16;
|
||||
|
@ -860,16 +865,18 @@ public:
|
|||
unsigned getKernArgSegmentSize(const Function &F,
|
||||
unsigned ExplictArgBytes) const;
|
||||
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
|
||||
/// SGPRs
|
||||
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
|
||||
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs
|
||||
/// VGPRs
|
||||
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
|
||||
|
||||
/// \returns true if the flat_scratch register should be initialized with the
|
||||
/// pointer to the wave's scratch memory rather than a size and offset.
|
||||
bool flatScratchIsPointer() const {
|
||||
return getGeneration() >= GFX9;
|
||||
return getGeneration() >= AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
/// \returns true if the machine has merged shaders in which s0-s7 are
|
||||
|
@ -880,35 +887,39 @@ public:
|
|||
|
||||
/// \returns SGPR allocation granularity supported by the subtarget.
|
||||
unsigned getSGPRAllocGranule() const {
|
||||
return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getSGPRAllocGranule(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns SGPR encoding granularity supported by the subtarget.
|
||||
unsigned getSGPREncodingGranule() const {
|
||||
return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getSGPREncodingGranule(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Total number of SGPRs supported by the subtarget.
|
||||
unsigned getTotalNumSGPRs() const {
|
||||
return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Addressable number of SGPRs supported by the subtarget.
|
||||
unsigned getAddressableNumSGPRs() const {
|
||||
return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getAddressableNumSGPRs(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Minimum number of SGPRs that meets the given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
|
||||
return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
|
||||
return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||
WavesPerEU);
|
||||
}
|
||||
|
||||
/// \returns Maximum number of SGPRs that meets the given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
|
||||
return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
|
||||
Addressable);
|
||||
return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||
WavesPerEU, Addressable);
|
||||
}
|
||||
|
||||
/// \returns Reserved number of SGPRs for given function \p MF.
|
||||
|
@ -926,34 +937,39 @@ public:
|
|||
|
||||
/// \returns VGPR allocation granularity supported by the subtarget.
|
||||
unsigned getVGPRAllocGranule() const {
|
||||
return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getVGPRAllocGranule(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns VGPR encoding granularity supported by the subtarget.
|
||||
unsigned getVGPREncodingGranule() const {
|
||||
return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getVGPREncodingGranule(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Total number of VGPRs supported by the subtarget.
|
||||
unsigned getTotalNumVGPRs() const {
|
||||
return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Addressable number of VGPRs supported by the subtarget.
|
||||
unsigned getAddressableNumVGPRs() const {
|
||||
return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
|
||||
return AMDGPU::IsaInfo::getAddressableNumVGPRs(
|
||||
MCSubtargetInfo::getFeatureBits());
|
||||
}
|
||||
|
||||
/// \returns Minimum number of VGPRs that meets given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
|
||||
return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
|
||||
return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||
WavesPerEU);
|
||||
}
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets given number of waves per
|
||||
/// execution unit requirement supported by the subtarget.
|
||||
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
|
||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
|
||||
return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
|
||||
WavesPerEU);
|
||||
}
|
||||
|
||||
/// \returns Maximum number of VGPRs that meets number of waves per execution
|
||||
|
@ -971,6 +987,127 @@ public:
|
|||
const override;
|
||||
};
|
||||
|
||||
|
||||
class R600Subtarget final : public R600GenSubtargetInfo,
|
||||
public AMDGPUCommonSubtarget {
|
||||
public:
|
||||
enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
|
||||
|
||||
private:
|
||||
R600InstrInfo InstrInfo;
|
||||
R600FrameLowering FrameLowering;
|
||||
bool FMA;
|
||||
bool CaymanISA;
|
||||
bool CFALUBug;
|
||||
bool DX10Clamp;
|
||||
bool HasVertexCache;
|
||||
bool R600ALUInst;
|
||||
bool FP64;
|
||||
short TexVTXClauseSize;
|
||||
Generation Gen;
|
||||
R600TargetLowering TLInfo;
|
||||
InstrItineraryData InstrItins;
|
||||
SelectionDAGTargetInfo TSInfo;
|
||||
AMDGPUAS AS;
|
||||
|
||||
public:
|
||||
R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
const TargetMachine &TM);
|
||||
|
||||
const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
|
||||
|
||||
const R600FrameLowering *getFrameLowering() const override {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
const R600TargetLowering *getTargetLowering() const override {
|
||||
return &TLInfo;
|
||||
}
|
||||
|
||||
const R600RegisterInfo *getRegisterInfo() const override {
|
||||
return &InstrInfo.getRegisterInfo();
|
||||
}
|
||||
|
||||
const InstrItineraryData *getInstrItineraryData() const override {
|
||||
return &InstrItins;
|
||||
}
|
||||
|
||||
// Nothing implemented, just prevent crashes on use.
|
||||
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
|
||||
return &TSInfo;
|
||||
}
|
||||
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
|
||||
Generation getGeneration() const {
|
||||
return Gen;
|
||||
}
|
||||
|
||||
unsigned getStackAlignment() const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
|
||||
StringRef GPU, StringRef FS);
|
||||
|
||||
bool hasBFE() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasBFI() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasBCNT(unsigned Size) const {
|
||||
if (Size == 32)
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasBORROW() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasCARRY() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasCaymanISA() const {
|
||||
return CaymanISA;
|
||||
}
|
||||
|
||||
bool hasFFBL() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasFFBH() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
|
||||
bool hasFMA() const { return FMA; }
|
||||
|
||||
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
|
||||
return 36;
|
||||
}
|
||||
|
||||
bool hasCFAluBug() const { return CFALUBug; }
|
||||
|
||||
bool hasVertexCache() const { return HasVertexCache; }
|
||||
|
||||
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
|
||||
|
||||
AMDGPUAS getAMDGPUAS() const { return AS; }
|
||||
|
||||
bool enableMachineScheduler() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool enableSubRegLiveness() const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
|
||||
|
|
|
@ -34,7 +34,6 @@ namespace llvm {
|
|||
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
||||
protected:
|
||||
std::unique_ptr<TargetLoweringObjectFile> TLOF;
|
||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
||||
AMDGPUAS AS;
|
||||
|
||||
StringRef getGPUName(const Function &F) const;
|
||||
|
@ -49,12 +48,8 @@ public:
|
|||
CodeGenOpt::Level OL);
|
||||
~AMDGPUTargetMachine() override;
|
||||
|
||||
const AMDGPUSubtarget *getSubtargetImpl() const;
|
||||
const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
|
||||
|
||||
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
|
||||
return &IntrinsicInfo;
|
||||
}
|
||||
const TargetSubtargetInfo *getSubtargetImpl() const;
|
||||
const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;
|
||||
|
||||
TargetLoweringObjectFile *getObjFileLowering() const override {
|
||||
return TLOF.get();
|
||||
|
@ -103,6 +98,7 @@ public:
|
|||
|
||||
class GCNTargetMachine final : public AMDGPUTargetMachine {
|
||||
private:
|
||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
||||
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
|
||||
|
||||
public:
|
||||
|
@ -117,6 +113,10 @@ public:
|
|||
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
|
||||
return &IntrinsicInfo;
|
||||
}
|
||||
|
||||
bool useIPRA() const override {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -102,7 +102,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
|||
unsigned ThresholdPrivate = UnrollThresholdPrivate;
|
||||
unsigned ThresholdLocal = UnrollThresholdLocal;
|
||||
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
|
||||
AMDGPUAS ASST = ST->getAMDGPUAS();
|
||||
const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
|
||||
for (const BasicBlock *BB : L->getBlocks()) {
|
||||
const DataLayout &DL = BB->getModule()->getDataLayout();
|
||||
unsigned LocalGEPsSeen = 0;
|
||||
|
|
|
@ -45,17 +45,12 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
|
|||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
Triple TargetTriple;
|
||||
|
||||
public:
|
||||
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
TargetTriple(TM->getTargetTriple()) {}
|
||||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
TTI::UnrollingPreferences &UP);
|
||||
|
@ -123,7 +118,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
|
|||
public:
|
||||
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
|
||||
TLI(ST->getTargetLowering()),
|
||||
CommonTTI(TM, F),
|
||||
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
|
||||
|
@ -211,18 +206,18 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
|
|||
|
||||
friend BaseT;
|
||||
|
||||
const AMDGPUSubtarget *ST;
|
||||
const R600Subtarget *ST;
|
||||
const AMDGPUTargetLowering *TLI;
|
||||
AMDGPUTTIImpl CommonTTI;
|
||||
|
||||
public:
|
||||
explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()),
|
||||
ST(TM->getSubtargetImpl(F)),
|
||||
ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
|
||||
TLI(ST->getTargetLowering()),
|
||||
CommonTTI(TM, F) {}
|
||||
|
||||
const AMDGPUSubtarget *getST() const { return ST; }
|
||||
const R600Subtarget *getST() const { return ST; }
|
||||
const AMDGPUTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
|
|
|
@ -432,19 +432,19 @@ void AMDGPUCFGStructurizer::reversePredicateSetter(
|
|||
for (;; --I) {
|
||||
if (I == MBB.end())
|
||||
continue;
|
||||
if (I->getOpcode() == AMDGPU::PRED_X) {
|
||||
if (I->getOpcode() == R600::PRED_X) {
|
||||
switch (I->getOperand(2).getImm()) {
|
||||
case AMDGPU::PRED_SETE_INT:
|
||||
I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
|
||||
case R600::PRED_SETE_INT:
|
||||
I->getOperand(2).setImm(R600::PRED_SETNE_INT);
|
||||
return;
|
||||
case AMDGPU::PRED_SETNE_INT:
|
||||
I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
|
||||
case R600::PRED_SETNE_INT:
|
||||
I->getOperand(2).setImm(R600::PRED_SETE_INT);
|
||||
return;
|
||||
case AMDGPU::PRED_SETE:
|
||||
I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
|
||||
case R600::PRED_SETE:
|
||||
I->getOperand(2).setImm(R600::PRED_SETNE);
|
||||
return;
|
||||
case AMDGPU::PRED_SETNE:
|
||||
I->getOperand(2).setImm(AMDGPU::PRED_SETE);
|
||||
case R600::PRED_SETNE:
|
||||
I->getOperand(2).setImm(R600::PRED_SETE);
|
||||
return;
|
||||
default:
|
||||
llvm_unreachable("PRED_X Opcode invalid!");
|
||||
|
@ -513,10 +513,10 @@ void AMDGPUCFGStructurizer::insertCondBranchBefore(
|
|||
|
||||
int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
|
||||
switch(OldOpcode) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
|
||||
case R600::JUMP_COND:
|
||||
case R600::JUMP: return R600::IF_PREDICATE_SET;
|
||||
case R600::BRANCH_COND_i32:
|
||||
case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
|
||||
default: llvm_unreachable("internal error");
|
||||
}
|
||||
return -1;
|
||||
|
@ -524,10 +524,10 @@ int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
|
|||
|
||||
int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
|
||||
switch(OldOpcode) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
|
||||
case R600::JUMP_COND:
|
||||
case R600::JUMP: return R600::IF_PREDICATE_SET;
|
||||
case R600::BRANCH_COND_i32:
|
||||
case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
|
||||
default: llvm_unreachable("internal error");
|
||||
}
|
||||
return -1;
|
||||
|
@ -535,8 +535,8 @@ int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
|
|||
|
||||
int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
|
||||
switch(OldOpcode) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
|
||||
case R600::JUMP_COND:
|
||||
case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
|
||||
default: llvm_unreachable("internal error");
|
||||
}
|
||||
return -1;
|
||||
|
@ -544,8 +544,8 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
|
|||
|
||||
int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
|
||||
switch(OldOpcode) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
|
||||
case R600::JUMP_COND:
|
||||
case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
|
||||
default: llvm_unreachable("internal error");
|
||||
}
|
||||
return -1;
|
||||
|
@ -573,9 +573,9 @@ AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
|
|||
|
||||
bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::JUMP_COND:
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
case AMDGPU::BRANCH_COND_f32: return true;
|
||||
case R600::JUMP_COND:
|
||||
case R600::BRANCH_COND_i32:
|
||||
case R600::BRANCH_COND_f32: return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -584,8 +584,8 @@ bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
|
|||
|
||||
bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::JUMP:
|
||||
case AMDGPU::BRANCH:
|
||||
case R600::JUMP:
|
||||
case R600::BRANCH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -634,7 +634,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
|
|||
MachineBasicBlock::reverse_iterator It = MBB->rbegin();
|
||||
if (It != MBB->rend()) {
|
||||
MachineInstr *instr = &(*It);
|
||||
if (instr->getOpcode() == AMDGPU::RETURN)
|
||||
if (instr->getOpcode() == R600::RETURN)
|
||||
return instr;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -687,8 +687,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
|
|||
MachineBasicBlock::iterator E = MBB->end();
|
||||
MachineBasicBlock::iterator It = Pre;
|
||||
while (It != E) {
|
||||
if (Pre->getOpcode() == AMDGPU::CONTINUE
|
||||
&& It->getOpcode() == AMDGPU::ENDLOOP)
|
||||
if (Pre->getOpcode() == R600::CONTINUE
|
||||
&& It->getOpcode() == R600::ENDLOOP)
|
||||
ContInstr.push_back(&*Pre);
|
||||
Pre = It;
|
||||
++It;
|
||||
|
@ -1303,15 +1303,15 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
|||
|
||||
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
|
||||
|
||||
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
|
||||
MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
|
||||
//insert R600::ENDIF to avoid special case "input landBlk == NULL"
|
||||
MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
|
||||
|
||||
if (LandBlkHasOtherPred) {
|
||||
report_fatal_error("Extra register needed to handle CFG");
|
||||
unsigned CmpResReg =
|
||||
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
|
||||
report_fatal_error("Extra compare instruction needed to handle CFG");
|
||||
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
|
||||
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
|
||||
CmpResReg, DebugLoc());
|
||||
}
|
||||
|
||||
|
@ -1319,7 +1319,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
|||
// cause an assertion failure in the PostRA scheduling pass.
|
||||
unsigned InitReg =
|
||||
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
|
||||
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
|
||||
insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
|
||||
DebugLoc());
|
||||
|
||||
if (MigrateTrue) {
|
||||
|
@ -1329,7 +1329,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
|||
// (initVal != 1).
|
||||
report_fatal_error("Extra register needed to handle CFG");
|
||||
}
|
||||
insertInstrBefore(I, AMDGPU::ELSE);
|
||||
insertInstrBefore(I, R600::ELSE);
|
||||
|
||||
if (MigrateFalse) {
|
||||
migrateInstruction(FalseMBB, LandBlk, I);
|
||||
|
@ -1341,7 +1341,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
|
|||
|
||||
if (LandBlkHasOtherPred) {
|
||||
// add endif
|
||||
insertInstrBefore(I, AMDGPU::ENDIF);
|
||||
insertInstrBefore(I, R600::ENDIF);
|
||||
|
||||
// put initReg = 2 to other predecessors of landBlk
|
||||
for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
|
||||
|
@ -1414,7 +1414,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
|
|||
}
|
||||
|
||||
if (FalseMBB) {
|
||||
insertInstrBefore(I, AMDGPU::ELSE);
|
||||
insertInstrBefore(I, R600::ELSE);
|
||||
MBB->splice(I, FalseMBB, FalseMBB->begin(),
|
||||
FalseMBB->end());
|
||||
MBB->removeSuccessor(FalseMBB, true);
|
||||
|
@ -1423,7 +1423,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
|
|||
retireBlock(FalseMBB);
|
||||
MLI->removeBlock(FalseMBB);
|
||||
}
|
||||
insertInstrBefore(I, AMDGPU::ENDIF);
|
||||
insertInstrBefore(I, R600::ENDIF);
|
||||
|
||||
BranchMI->eraseFromParent();
|
||||
|
||||
|
@ -1436,8 +1436,8 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
|
|||
LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
|
||||
<< " land = BB" << LandMBB->getNumber() << "\n";);
|
||||
|
||||
insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
|
||||
insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
|
||||
insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
|
||||
insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
|
||||
DstBlk->replaceSuccessor(DstBlk, LandMBB);
|
||||
}
|
||||
|
||||
|
@ -1453,9 +1453,9 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
|
|||
MachineBasicBlock::iterator I = BranchMI;
|
||||
if (TrueBranch != LandMBB)
|
||||
reversePredicateSetter(I, *I->getParent());
|
||||
insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
|
||||
insertInstrBefore(I, AMDGPU::BREAK);
|
||||
insertInstrBefore(I, AMDGPU::ENDIF);
|
||||
insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
|
||||
insertInstrBefore(I, R600::BREAK);
|
||||
insertInstrBefore(I, R600::ENDIF);
|
||||
//now branchInst can be erase safely
|
||||
BranchMI->eraseFromParent();
|
||||
//now take care of successors, retire blocks
|
||||
|
@ -1484,8 +1484,8 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
|
|||
getBranchZeroOpcode(OldOpcode);
|
||||
insertCondBranchBefore(I, BranchOpcode, DL);
|
||||
// insertEnd to ensure phi-moves, if exist, go before the continue-instr.
|
||||
insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
|
||||
insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
|
||||
insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
|
||||
insertInstrEnd(ContingMBB, R600::ENDIF, DL);
|
||||
} else {
|
||||
int BranchOpcode =
|
||||
TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
|
||||
|
@ -1500,7 +1500,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
|
|||
// location we've just inserted that reference here so it should be
|
||||
// representative insertEnd to ensure phi-moves, if exist, go before the
|
||||
// continue-instr.
|
||||
insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
|
||||
insertInstrEnd(ContingMBB, R600::CONTINUE,
|
||||
getLastDebugLocInBB(ContingMBB));
|
||||
}
|
||||
}
|
||||
|
@ -1627,7 +1627,7 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
|
|||
SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
|
||||
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
|
||||
FuncRep->push_back(DummyExitBlk); //insert to function
|
||||
insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
|
||||
insertInstrEnd(DummyExitBlk, R600::RETURN);
|
||||
|
||||
for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
|
||||
E = RetMBB.end(); It != E; ++It) {
|
||||
|
|
|
@ -4,7 +4,6 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
|
|||
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
|
||||
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
|
||||
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
|
||||
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
|
||||
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM AMDGPUGenIntrinsicEnums.inc -gen-tgt-intrinsic-enums)
|
||||
|
@ -19,6 +18,16 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
|
|||
set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
|
||||
tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
|
||||
|
||||
set(LLVM_TARGET_DEFINITIONS R600.td)
|
||||
tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
|
||||
tablegen(LLVM R600GenCallingConv.inc -gen-callingconv)
|
||||
tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer)
|
||||
tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
|
||||
tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
|
||||
tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
|
||||
|
||||
add_public_tablegen_target(AMDGPUCommonTableGen)
|
||||
|
||||
add_llvm_target(AMDGPUCodeGen
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "Disassembler/AMDGPUDisassembler.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIDefines.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
|
|
|
@ -14,14 +14,13 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isEG : Predicate<
|
||||
"Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
|
||||
"Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && "
|
||||
"Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
|
||||
"!Subtarget->hasCaymanISA()"
|
||||
>;
|
||||
|
||||
def isEGorCayman : Predicate<
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
|
||||
"Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
|
||||
"Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
|
||||
"Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
|
||||
>;
|
||||
|
||||
class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
|
||||
|
|
|
@ -510,11 +510,6 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
|
|||
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
|
||||
static_cast<R600InstPrinter*>(this)->printOperand(MI, OpNo, O);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OpNo >= MI->getNumOperands()) {
|
||||
O << "/*Missing OP" << OpNo << "*/";
|
||||
return;
|
||||
|
@ -965,11 +960,6 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
|
|||
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
|
||||
static_cast<R600InstPrinter*>(this)->printMemOperand(MI, OpNo, O);
|
||||
return;
|
||||
}
|
||||
|
||||
printOperand(MI, OpNo, STI, O);
|
||||
O << ", ";
|
||||
printOperand(MI, OpNo + 1, STI, O);
|
||||
|
@ -995,16 +985,6 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
|
|||
O << Asm;
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printAbs(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printClamp(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
|
@ -1031,70 +1011,6 @@ void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
|
|||
O << " div:2";
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printLiteral(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printLast(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printNeg(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printOMOD(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printRel(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printUpdateExecMask(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printUpdatePred(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printWrite(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printBankSwizzle(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printRSel(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printCT(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
static_cast<R600InstPrinter*>(this)->printKCache(MI, OpNo, O);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI,
|
||||
raw_ostream &O) {
|
||||
|
@ -1299,6 +1215,13 @@ void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
|
|||
|
||||
#include "AMDGPUGenAsmWriter.inc"
|
||||
|
||||
void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
|
||||
StringRef Annot, const MCSubtargetInfo &STI) {
|
||||
O.flush();
|
||||
printInstruction(MI, O);
|
||||
printAnnotation(O, Annot);
|
||||
}
|
||||
|
||||
void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
|
||||
|
@ -1417,7 +1340,7 @@ void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
|||
if (Op.isReg()) {
|
||||
switch (Op.getReg()) {
|
||||
// This is the default predicate state, so we don't need to print it.
|
||||
case AMDGPU::PRED_SEL_OFF:
|
||||
case R600::PRED_SEL_OFF:
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1493,3 +1416,5 @@ void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
|
|||
O << " (MASKED)";
|
||||
}
|
||||
}
|
||||
|
||||
#include "R600GenAsmWriter.inc"
|
||||
|
|
|
@ -218,13 +218,16 @@ protected:
|
|||
raw_ostream &O);
|
||||
};
|
||||
|
||||
// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and
|
||||
// MCTargetDesc should be using R600InstPrinter for the R600 target.
|
||||
class R600InstPrinter : public AMDGPUInstPrinter {
|
||||
class R600InstPrinter : public MCInstPrinter {
|
||||
public:
|
||||
R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI)
|
||||
: AMDGPUInstPrinter(MAI, MII, MRI) {}
|
||||
: MCInstPrinter(MAI, MII, MRI) {}
|
||||
|
||||
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
|
||||
const MCSubtargetInfo &STI) override;
|
||||
void printInstruction(const MCInst *MI, raw_ostream &O);
|
||||
static const char *getRegisterName(unsigned RegNo);
|
||||
|
||||
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
|
|
|
@ -38,9 +38,17 @@ using namespace llvm;
|
|||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#define NoSchedModel NoSchedModelR600
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "R600GenSubtargetInfo.inc"
|
||||
#undef NoSchedModelR600
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "R600GenRegisterInfo.inc"
|
||||
|
||||
static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitAMDGPUMCInstrInfo(X);
|
||||
|
@ -49,12 +57,17 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
|||
|
||||
static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
InitAMDGPUMCRegisterInfo(X, 0);
|
||||
if (TT.getArch() == Triple::r600)
|
||||
InitR600MCRegisterInfo(X, 0);
|
||||
else
|
||||
InitAMDGPUMCRegisterInfo(X, 0);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *
|
||||
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
|
||||
if (TT.getArch() == Triple::r600)
|
||||
return createR600MCSubtargetInfoImpl(TT, CPU, FS);
|
||||
return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
|
||||
}
|
||||
|
||||
|
@ -63,8 +76,10 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
|
|||
const MCAsmInfo &MAI,
|
||||
const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI) {
|
||||
return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) :
|
||||
new AMDGPUInstPrinter(MAI, MII, MRI);
|
||||
if (T.getArch() == Triple::r600)
|
||||
return new R600InstPrinter(MAI, MII, MRI);
|
||||
else
|
||||
return new AMDGPUInstPrinter(MAI, MII, MRI);
|
||||
}
|
||||
|
||||
static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
|
||||
|
@ -90,10 +105,12 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
|
|||
}
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTargetMC() {
|
||||
|
||||
TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
|
||||
TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
|
||||
for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
|
||||
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
|
||||
|
||||
TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
|
||||
TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
|
||||
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
|
||||
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
|
||||
|
|
|
@ -40,6 +40,7 @@ Target &getTheGCNTarget();
|
|||
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
MCInstrInfo *createR600MCInstrInfo();
|
||||
|
||||
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
|
@ -59,6 +60,10 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
|
|||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
#undef GET_REGINFO_ENUM
|
||||
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "R600GenRegisterInfo.inc"
|
||||
#undef GET_REGINFO_ENUM
|
||||
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#define GET_INSTRINFO_OPERAND_ENUM
|
||||
#define GET_INSTRINFO_SCHED_ENUM
|
||||
|
@ -67,9 +72,20 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
|
|||
#undef GET_INSTRINFO_OPERAND_ENUM
|
||||
#undef GET_INSTRINFO_ENUM
|
||||
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#define GET_INSTRINFO_OPERAND_ENUM
|
||||
#define GET_INSTRINFO_SCHED_ENUM
|
||||
#include "R600GenInstrInfo.inc"
|
||||
#undef GET_INSTRINFO_SCHED_ENUM
|
||||
#undef GET_INSTRINFO_OPERAND_ENUM
|
||||
#undef GET_INSTRINFO_ENUM
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
#undef GET_SUBTARGETINFO_ENUM
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "R600GenSubtargetInfo.inc"
|
||||
#undef GET_SUBTARGETINFO_ENUM
|
||||
|
||||
#endif
|
||||
|
|
|
@ -8,5 +8,6 @@ add_llvm_library(LLVMAMDGPUDesc
|
|||
AMDGPUMCTargetDesc.cpp
|
||||
AMDGPUTargetStreamer.cpp
|
||||
R600MCCodeEmitter.cpp
|
||||
R600MCTargetDesc.cpp
|
||||
SIMCCodeEmitter.cpp
|
||||
)
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/AMDGPUFixupKinds.h"
|
||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "R600Defines.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
|
@ -36,30 +35,40 @@ using namespace llvm;
|
|||
|
||||
namespace {
|
||||
|
||||
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
|
||||
class R600MCCodeEmitter : public MCCodeEmitter {
|
||||
const MCRegisterInfo &MRI;
|
||||
const MCInstrInfo &MCII;
|
||||
|
||||
public:
|
||||
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
|
||||
: AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
|
||||
: MRI(mri), MCII(mcii) {}
|
||||
R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
|
||||
R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
|
||||
|
||||
/// Encode the instruction and write it to the OS.
|
||||
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const override;
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
/// \returns the encoding for an MCOperand.
|
||||
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const override;
|
||||
const MCSubtargetInfo &STI) const;
|
||||
|
||||
private:
|
||||
|
||||
void Emit(uint32_t value, raw_ostream &OS) const;
|
||||
void Emit(uint64_t value, raw_ostream &OS) const;
|
||||
|
||||
unsigned getHWReg(unsigned regNo) const;
|
||||
|
||||
uint64_t getBinaryCodeForInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
const MCSubtargetInfo &STI) const;
|
||||
uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
|
||||
void verifyInstructionPredicates(const MCInst &MI,
|
||||
uint64_t AvailableFeatures) const;
|
||||
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -94,16 +103,16 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||
computeAvailableFeatures(STI.getFeatureBits()));
|
||||
|
||||
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
|
||||
if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
|
||||
MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
|
||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||
MI.getOpcode() == AMDGPU::KILL) {
|
||||
if (MI.getOpcode() == R600::RETURN ||
|
||||
MI.getOpcode() == R600::FETCH_CLAUSE ||
|
||||
MI.getOpcode() == R600::ALU_CLAUSE ||
|
||||
MI.getOpcode() == R600::BUNDLE ||
|
||||
MI.getOpcode() == R600::KILL) {
|
||||
return;
|
||||
} else if (IS_VTX(Desc)) {
|
||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
|
||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||
if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
|
||||
if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
|
||||
InstWord2 |= 1 << 19; // Mega-Fetch bit
|
||||
}
|
||||
|
||||
|
@ -136,7 +145,7 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||
Emit((uint32_t) 0, OS);
|
||||
} else {
|
||||
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
|
||||
if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
|
||||
if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
|
||||
((Desc.TSFlags & R600_InstFlag::OP1) ||
|
||||
Desc.TSFlags & R600_InstFlag::OP2)) {
|
||||
uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
|
||||
|
@ -186,4 +195,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|||
}
|
||||
|
||||
#define ENABLE_INSTR_PREDICATE_VERIFIER
|
||||
#include "AMDGPUGenMCCodeEmitter.inc"
|
||||
#include "R600GenMCCodeEmitter.inc"
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
/// \brief This file provides R600 specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "R600GenInstrInfo.inc"
|
||||
|
||||
MCInstrInfo *llvm::createR600MCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitR600MCInstrInfo(X);
|
||||
return X;
|
||||
}
|
|
@ -438,3 +438,6 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ENABLE_INSTR_PREDICATE_VERIFIER
|
||||
#include "AMDGPUGenMCCodeEmitter.inc"
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
//===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
def R600InstrInfo : InstrInfo {
|
||||
let guessInstructionProperties = 1;
|
||||
let noNamedPositionallyEncodedOperands = 1;
|
||||
}
|
||||
|
||||
def R600 : Target {
|
||||
let InstructionSet = R600InstrInfo;
|
||||
let AllowRegisterRenaming = 1;
|
||||
}
|
||||
|
||||
let Namespace = "R600" in {
|
||||
|
||||
foreach Index = 0-15 in {
|
||||
def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
|
||||
}
|
||||
|
||||
include "R600RegisterInfo.td"
|
||||
|
||||
}
|
||||
|
||||
def NullALU : InstrItinClass;
|
||||
def ALU_NULL : FuncUnit;
|
||||
|
||||
include "AMDGPUFeatures.td"
|
||||
include "R600Schedule.td"
|
||||
include "R600Processors.td"
|
||||
include "AMDGPUInstrInfo.td"
|
||||
include "AMDGPUInstructions.td"
|
||||
include "R600Instructions.td"
|
||||
include "R700Instructions.td"
|
||||
include "EvergreenInstructions.td"
|
||||
include "CaymanInstructions.td"
|
||||
|
||||
// Calling convention for R600
|
||||
def CC_R600 : CallingConv<[
|
||||
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
|
||||
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
|
||||
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
|
||||
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
|
||||
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
|
||||
T30_XYZW, T31_XYZW, T32_XYZW
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
// Calling convention for compute kernels
|
||||
def CC_R600_Kernel : CallingConv<[
|
||||
CCCustom<"allocateKernArg">
|
||||
]>;
|
|
@ -51,7 +51,7 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
|
|||
|
||||
for (const MachineBasicBlock &MBB : MF) {
|
||||
for (const MachineInstr &MI : MBB) {
|
||||
if (MI.getOpcode() == AMDGPU::KILLGT)
|
||||
if (MI.getOpcode() == R600::KILLGT)
|
||||
killPixel = true;
|
||||
unsigned numOperands = MI.getNumOperands();
|
||||
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
|
||||
|
|
|
@ -34,8 +34,8 @@ namespace {
|
|||
|
||||
static bool isCFAlu(const MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::CF_ALU:
|
||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||
case R600::CF_ALU:
|
||||
case R600::CF_ALU_PUSH_BEFORE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -85,20 +85,20 @@ char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
|
|||
unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
|
||||
assert(isCFAlu(MI));
|
||||
return MI
|
||||
.getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
|
||||
.getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
|
||||
.getImm();
|
||||
}
|
||||
|
||||
bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
|
||||
assert(isCFAlu(MI));
|
||||
return MI
|
||||
.getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
|
||||
.getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
|
||||
.getImm();
|
||||
}
|
||||
|
||||
void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
|
||||
MachineInstr &CFAlu) const {
|
||||
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
|
||||
int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
|
||||
MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
|
||||
I++;
|
||||
do {
|
||||
|
@ -117,7 +117,7 @@ void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
|
|||
bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
|
||||
const MachineInstr &LatrCFAlu) const {
|
||||
assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
|
||||
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
|
||||
int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
|
||||
unsigned RootInstCount = getCFAluSize(RootCFAlu),
|
||||
LaterInstCount = getCFAluSize(LatrCFAlu);
|
||||
unsigned CumuledInsts = RootInstCount + LaterInstCount;
|
||||
|
@ -125,15 +125,15 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
|
|||
LLVM_DEBUG(dbgs() << "Excess inst counts\n");
|
||||
return false;
|
||||
}
|
||||
if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
|
||||
if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
|
||||
return false;
|
||||
// Is KCache Bank 0 compatible ?
|
||||
int Mode0Idx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
|
||||
int KBank0Idx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
|
||||
int KBank0LineIdx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
|
||||
if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
|
||||
RootCFAlu.getOperand(Mode0Idx).getImm() &&
|
||||
(LatrCFAlu.getOperand(KBank0Idx).getImm() !=
|
||||
|
@ -145,11 +145,11 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
|
|||
}
|
||||
// Is KCache Bank 1 compatible ?
|
||||
int Mode1Idx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
|
||||
int KBank1Idx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
|
||||
int KBank1LineIdx =
|
||||
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
|
||||
TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
|
||||
if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
|
||||
RootCFAlu.getOperand(Mode1Idx).getImm() &&
|
||||
(LatrCFAlu.getOperand(KBank1Idx).getImm() !=
|
||||
|
|
|
@ -94,7 +94,7 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
|
|||
}
|
||||
|
||||
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
|
||||
if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
|
||||
if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
|
||||
getLoopDepth() > 1)
|
||||
return true;
|
||||
|
||||
|
@ -103,10 +103,10 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
|
|||
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||
case AMDGPU::CF_ALU_ELSE_AFTER:
|
||||
case AMDGPU::CF_ALU_BREAK:
|
||||
case AMDGPU::CF_ALU_CONTINUE:
|
||||
case R600::CF_ALU_PUSH_BEFORE:
|
||||
case R600::CF_ALU_ELSE_AFTER:
|
||||
case R600::CF_ALU_BREAK:
|
||||
case R600::CF_ALU_CONTINUE:
|
||||
if (CurrentSubEntries == 0)
|
||||
return false;
|
||||
if (ST->getWavefrontSize() == 64) {
|
||||
|
@ -168,8 +168,8 @@ void CFStack::updateMaxStackSize() {
|
|||
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
|
||||
CFStack::StackItem Item = CFStack::ENTRY;
|
||||
switch(Opcode) {
|
||||
case AMDGPU::CF_PUSH_EG:
|
||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||
case R600::CF_PUSH_EG:
|
||||
case R600::CF_ALU_PUSH_BEFORE:
|
||||
if (!isWQM) {
|
||||
if (!ST->hasCaymanISA() &&
|
||||
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
|
||||
|
@ -240,8 +240,8 @@ private:
|
|||
|
||||
bool IsTrivialInst(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::KILL:
|
||||
case AMDGPU::RETURN:
|
||||
case R600::KILL:
|
||||
case R600::RETURN:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -253,41 +253,41 @@ private:
|
|||
bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
|
||||
switch (CFI) {
|
||||
case CF_TC:
|
||||
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
|
||||
Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
|
||||
break;
|
||||
case CF_VC:
|
||||
Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
|
||||
Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
|
||||
break;
|
||||
case CF_CALL_FS:
|
||||
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
|
||||
Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
|
||||
break;
|
||||
case CF_WHILE_LOOP:
|
||||
Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
|
||||
Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
|
||||
break;
|
||||
case CF_END_LOOP:
|
||||
Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
|
||||
Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
|
||||
break;
|
||||
case CF_LOOP_BREAK:
|
||||
Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
|
||||
Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
|
||||
break;
|
||||
case CF_LOOP_CONTINUE:
|
||||
Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
|
||||
Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
|
||||
break;
|
||||
case CF_JUMP:
|
||||
Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
|
||||
Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
|
||||
break;
|
||||
case CF_ELSE:
|
||||
Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
|
||||
Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
|
||||
break;
|
||||
case CF_POP:
|
||||
Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
|
||||
Opcode = isEg ? R600::POP_EG : R600::POP_R600;
|
||||
break;
|
||||
case CF_END:
|
||||
if (ST->hasCaymanISA()) {
|
||||
Opcode = AMDGPU::CF_END_CM;
|
||||
Opcode = R600::CF_END_CM;
|
||||
break;
|
||||
}
|
||||
Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
|
||||
Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
|
||||
break;
|
||||
}
|
||||
assert (Opcode && "No opcode selected");
|
||||
|
@ -305,21 +305,21 @@ private:
|
|||
continue;
|
||||
if (MO.isDef()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
|
||||
if (R600::R600_Reg128RegClass.contains(Reg))
|
||||
DstMI = Reg;
|
||||
else
|
||||
DstMI = TRI->getMatchingSuperReg(Reg,
|
||||
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
||||
&AMDGPU::R600_Reg128RegClass);
|
||||
&R600::R600_Reg128RegClass);
|
||||
}
|
||||
if (MO.isUse()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
if (AMDGPU::R600_Reg128RegClass.contains(Reg))
|
||||
if (R600::R600_Reg128RegClass.contains(Reg))
|
||||
SrcMI = Reg;
|
||||
else
|
||||
SrcMI = TRI->getMatchingSuperReg(Reg,
|
||||
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
||||
&AMDGPU::R600_Reg128RegClass);
|
||||
&R600::R600_Reg128RegClass);
|
||||
}
|
||||
}
|
||||
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
|
||||
|
@ -359,15 +359,15 @@ private:
|
|||
|
||||
void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
|
||||
static const unsigned LiteralRegs[] = {
|
||||
AMDGPU::ALU_LITERAL_X,
|
||||
AMDGPU::ALU_LITERAL_Y,
|
||||
AMDGPU::ALU_LITERAL_Z,
|
||||
AMDGPU::ALU_LITERAL_W
|
||||
R600::ALU_LITERAL_X,
|
||||
R600::ALU_LITERAL_Y,
|
||||
R600::ALU_LITERAL_Z,
|
||||
R600::ALU_LITERAL_W
|
||||
};
|
||||
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
|
||||
TII->getSrcs(MI);
|
||||
for (const auto &Src:Srcs) {
|
||||
if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
|
||||
if (Src.first->getReg() != R600::ALU_LITERAL_X)
|
||||
continue;
|
||||
int64_t Imm = Src.second;
|
||||
std::vector<MachineOperand *>::iterator It =
|
||||
|
@ -377,7 +377,7 @@ private:
|
|||
|
||||
// Get corresponding Operand
|
||||
MachineOperand &Operand = MI.getOperand(
|
||||
TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
|
||||
TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
|
||||
|
||||
if (It != Lits.end()) {
|
||||
// Reuse existing literal reg
|
||||
|
@ -400,7 +400,7 @@ private:
|
|||
unsigned LiteralPair0 = Literals[i];
|
||||
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
|
||||
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
|
||||
TII->get(AMDGPU::LITERALS))
|
||||
TII->get(R600::LITERALS))
|
||||
.addImm(LiteralPair0)
|
||||
.addImm(LiteralPair1);
|
||||
}
|
||||
|
@ -442,7 +442,7 @@ private:
|
|||
}
|
||||
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
|
||||
MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
|
||||
TII->get(AMDGPU::LITERALS));
|
||||
TII->get(R600::LITERALS));
|
||||
if (Literals[i]->isImm()) {
|
||||
MILit.addImm(Literals[i]->getImm());
|
||||
} else {
|
||||
|
@ -471,7 +471,7 @@ private:
|
|||
unsigned &CfCount) {
|
||||
CounterPropagateAddr(*Clause.first, CfCount);
|
||||
MachineBasicBlock *BB = Clause.first->getParent();
|
||||
BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
|
||||
BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
|
||||
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
||||
BB->splice(InsertPos, BB, Clause.second[i]);
|
||||
}
|
||||
|
@ -483,7 +483,7 @@ private:
|
|||
Clause.first->getOperand(0).setImm(0);
|
||||
CounterPropagateAddr(*Clause.first, CfCount);
|
||||
MachineBasicBlock *BB = Clause.first->getParent();
|
||||
BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
|
||||
BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
|
||||
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
||||
BB->splice(InsertPos, BB, Clause.second[i]);
|
||||
}
|
||||
|
@ -540,34 +540,34 @@ public:
|
|||
}
|
||||
|
||||
MachineBasicBlock::iterator MI = I;
|
||||
if (MI->getOpcode() != AMDGPU::ENDIF)
|
||||
if (MI->getOpcode() != R600::ENDIF)
|
||||
LastAlu.back() = nullptr;
|
||||
if (MI->getOpcode() == AMDGPU::CF_ALU)
|
||||
if (MI->getOpcode() == R600::CF_ALU)
|
||||
LastAlu.back() = &*MI;
|
||||
I++;
|
||||
bool RequiresWorkAround =
|
||||
CFStack.requiresWorkAroundForInst(MI->getOpcode());
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||
case R600::CF_ALU_PUSH_BEFORE:
|
||||
if (RequiresWorkAround) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "Applying bug work-around for ALU_PUSH_BEFORE\n");
|
||||
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
|
||||
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
|
||||
.addImm(CfCount + 1)
|
||||
.addImm(1);
|
||||
MI->setDesc(TII->get(AMDGPU::CF_ALU));
|
||||
MI->setDesc(TII->get(R600::CF_ALU));
|
||||
CfCount++;
|
||||
CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
|
||||
CFStack.pushBranch(R600::CF_PUSH_EG);
|
||||
} else
|
||||
CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
|
||||
CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
|
||||
LLVM_FALLTHROUGH;
|
||||
case AMDGPU::CF_ALU:
|
||||
case R600::CF_ALU:
|
||||
I = MI;
|
||||
AluClauses.push_back(MakeALUClause(MBB, I));
|
||||
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
|
||||
CfCount++;
|
||||
break;
|
||||
case AMDGPU::WHILELOOP: {
|
||||
case R600::WHILELOOP: {
|
||||
CFStack.pushLoop();
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_WHILE_LOOP))
|
||||
|
@ -580,7 +580,7 @@ public:
|
|||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::ENDLOOP: {
|
||||
case R600::ENDLOOP: {
|
||||
CFStack.popLoop();
|
||||
std::pair<unsigned, std::set<MachineInstr *>> Pair =
|
||||
std::move(LoopStack.back());
|
||||
|
@ -592,7 +592,7 @@ public:
|
|||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::IF_PREDICATE_SET: {
|
||||
case R600::IF_PREDICATE_SET: {
|
||||
LastAlu.push_back(nullptr);
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_JUMP))
|
||||
|
@ -604,7 +604,7 @@ public:
|
|||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::ELSE: {
|
||||
case R600::ELSE: {
|
||||
MachineInstr * JumpInst = IfThenElseStack.back();
|
||||
IfThenElseStack.pop_back();
|
||||
CounterPropagateAddr(*JumpInst, CfCount);
|
||||
|
@ -618,7 +618,7 @@ public:
|
|||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::ENDIF: {
|
||||
case R600::ENDIF: {
|
||||
CFStack.popBranch();
|
||||
if (LastAlu.back()) {
|
||||
ToPopAfter.push_back(LastAlu.back());
|
||||
|
@ -640,7 +640,7 @@ public:
|
|||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::BREAK: {
|
||||
case R600::BREAK: {
|
||||
CfCount ++;
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_LOOP_BREAK))
|
||||
|
@ -649,7 +649,7 @@ public:
|
|||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case AMDGPU::CONTINUE: {
|
||||
case R600::CONTINUE: {
|
||||
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
||||
getHWInstrDesc(CF_LOOP_CONTINUE))
|
||||
.addImm(0);
|
||||
|
@ -658,12 +658,12 @@ public:
|
|||
CfCount++;
|
||||
break;
|
||||
}
|
||||
case AMDGPU::RETURN: {
|
||||
case R600::RETURN: {
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
|
||||
CfCount++;
|
||||
if (CfCount % 2) {
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
|
||||
BuildMI(MBB, I, DL, TII->get(R600::PAD));
|
||||
CfCount++;
|
||||
}
|
||||
MI->eraseFromParent();
|
||||
|
@ -684,7 +684,7 @@ public:
|
|||
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
|
||||
MachineInstr *Alu = ToPopAfter[i];
|
||||
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
|
||||
TII->get(AMDGPU::CF_ALU_POP_AFTER))
|
||||
TII->get(R600::CF_ALU_POP_AFTER))
|
||||
.addImm(Alu->getOperand(0).getImm())
|
||||
.addImm(Alu->getOperand(1).getImm())
|
||||
.addImm(Alu->getOperand(2).getImm())
|
||||
|
|
|
@ -52,12 +52,12 @@ private:
|
|||
|
||||
unsigned OccupiedDwords(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT_4:
|
||||
case R600::INTERP_PAIR_XY:
|
||||
case R600::INTERP_PAIR_ZW:
|
||||
case R600::INTERP_VEC_LOAD:
|
||||
case R600::DOT_4:
|
||||
return 4;
|
||||
case AMDGPU::KILL:
|
||||
case R600::KILL:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
@ -77,7 +77,7 @@ private:
|
|||
E = MI.operands_end();
|
||||
It != E; ++It) {
|
||||
MachineOperand &MO = *It;
|
||||
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
|
||||
if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
|
||||
++NumLiteral;
|
||||
}
|
||||
return 1 + NumLiteral;
|
||||
|
@ -89,12 +89,12 @@ private:
|
|||
if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
|
||||
return true;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::PRED_X:
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::COPY:
|
||||
case AMDGPU::DOT_4:
|
||||
case R600::PRED_X:
|
||||
case R600::INTERP_PAIR_XY:
|
||||
case R600::INTERP_PAIR_ZW:
|
||||
case R600::INTERP_VEC_LOAD:
|
||||
case R600::COPY:
|
||||
case R600::DOT_4:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -103,9 +103,9 @@ private:
|
|||
|
||||
bool IsTrivialInst(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::KILL:
|
||||
case AMDGPU::RETURN:
|
||||
case AMDGPU::IMPLICIT_DEF:
|
||||
case R600::KILL:
|
||||
case R600::RETURN:
|
||||
case R600::IMPLICIT_DEF:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -132,16 +132,16 @@ private:
|
|||
bool UpdateInstr = true) const {
|
||||
std::vector<std::pair<unsigned, unsigned>> UsedKCache;
|
||||
|
||||
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
|
||||
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
|
||||
return true;
|
||||
|
||||
const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
|
||||
TII->getSrcs(MI);
|
||||
assert(
|
||||
(TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
|
||||
(TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
|
||||
"Can't assign Const");
|
||||
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
||||
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
|
||||
if (Consts[i].first->getReg() != R600::ALU_CONST)
|
||||
continue;
|
||||
unsigned Sel = Consts[i].second;
|
||||
unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
|
||||
|
@ -172,16 +172,16 @@ private:
|
|||
return true;
|
||||
|
||||
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
|
||||
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
|
||||
if (Consts[i].first->getReg() != R600::ALU_CONST)
|
||||
continue;
|
||||
switch(UsedKCache[j].first) {
|
||||
case 0:
|
||||
Consts[i].first->setReg(
|
||||
AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
|
||||
R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
|
||||
break;
|
||||
case 1:
|
||||
Consts[i].first->setReg(
|
||||
AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
|
||||
R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Wrong Cache Line");
|
||||
|
@ -253,7 +253,7 @@ private:
|
|||
break;
|
||||
if (AluInstCount > TII->getMaxAlusPerClause())
|
||||
break;
|
||||
if (I->getOpcode() == AMDGPU::PRED_X) {
|
||||
if (I->getOpcode() == R600::PRED_X) {
|
||||
// We put PRED_X in its own clause to ensure that ifcvt won't create
|
||||
// clauses with more than 128 insts.
|
||||
// IfCvt is indeed checking that "then" and "else" branches of an if
|
||||
|
@ -289,7 +289,7 @@ private:
|
|||
AluInstCount += OccupiedDwords(*I);
|
||||
}
|
||||
unsigned Opcode = PushBeforeModifier ?
|
||||
AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
|
||||
R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
|
||||
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
|
||||
// We don't use the ADDR field until R600ControlFlowFinalizer pass, where
|
||||
// it is safe to assume it is 0. However if we always put 0 here, the ifcvt
|
||||
|
@ -322,7 +322,7 @@ public:
|
|||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
|
||||
if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
|
||||
continue; // BB was already parsed
|
||||
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
|
||||
if (isALU(*I)) {
|
||||
|
|
|
@ -96,16 +96,16 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
// Expand LDS_*_RET instructions
|
||||
if (TII->isLDSRetInstr(MI.getOpcode())) {
|
||||
int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
|
||||
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
|
||||
assert(DstIdx != -1);
|
||||
MachineOperand &DstOp = MI.getOperand(DstIdx);
|
||||
MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
|
||||
DstOp.getReg(), AMDGPU::OQAP);
|
||||
DstOp.setReg(AMDGPU::OQAP);
|
||||
DstOp.getReg(), R600::OQAP);
|
||||
DstOp.setReg(R600::OQAP);
|
||||
int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
|
||||
AMDGPU::OpName::pred_sel);
|
||||
R600::OpName::pred_sel);
|
||||
int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
|
||||
AMDGPU::OpName::pred_sel);
|
||||
R600::OpName::pred_sel);
|
||||
// Copy the pred_sel bit
|
||||
Mov->getOperand(MovPredSelIdx).setReg(
|
||||
MI.getOperand(LDSPredSelIdx).getReg());
|
||||
|
@ -114,7 +114,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
// Expand PRED_X to one of the PRED_SET instructions.
|
||||
case AMDGPU::PRED_X: {
|
||||
case R600::PRED_X: {
|
||||
uint64_t Flags = MI.getOperand(3).getImm();
|
||||
// The native opcode used by PRED_X is stored as an immediate in the
|
||||
// third operand.
|
||||
|
@ -122,17 +122,18 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
MI.getOperand(2).getImm(), // opcode
|
||||
MI.getOperand(0).getReg(), // dst
|
||||
MI.getOperand(1).getReg(), // src0
|
||||
AMDGPU::ZERO); // src1
|
||||
R600::ZERO); // src1
|
||||
TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
|
||||
if (Flags & MO_FLAG_PUSH) {
|
||||
TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
|
||||
TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
|
||||
} else {
|
||||
TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
|
||||
TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
case AMDGPU::DOT_4: {
|
||||
case R600::DOT_4: {
|
||||
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
|
@ -141,7 +142,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
for (unsigned Chan = 0; Chan < 4; ++Chan) {
|
||||
bool Mask = (Chan != TRI.getHWRegChan(DstReg));
|
||||
unsigned SubDstReg =
|
||||
AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
MachineInstr *BMI =
|
||||
TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
|
||||
if (Chan > 0) {
|
||||
|
@ -156,10 +157,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
// While not strictly necessary from hw point of view, we force
|
||||
// all src operands of a dot4 inst to belong to the same slot.
|
||||
unsigned Src0 = BMI->getOperand(
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0))
|
||||
.getReg();
|
||||
unsigned Src1 = BMI->getOperand(
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1))
|
||||
.getReg();
|
||||
(void) Src0;
|
||||
(void) Src1;
|
||||
|
@ -206,14 +207,14 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
// T0_W = CUBE T1_Y, T1_Z
|
||||
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
||||
unsigned DstReg = MI.getOperand(
|
||||
TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
|
||||
TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
|
||||
unsigned Src0 = MI.getOperand(
|
||||
TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
|
||||
TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
|
||||
unsigned Src1 = 0;
|
||||
|
||||
// Determine the correct source registers
|
||||
if (!IsCube) {
|
||||
int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
|
||||
int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
|
||||
if (Src1Idx != -1) {
|
||||
Src1 = MI.getOperand(Src1Idx).getReg();
|
||||
}
|
||||
|
@ -241,7 +242,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
// the current Channel.
|
||||
Mask = (Chan != TRI.getHWRegChan(DstReg));
|
||||
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
|
||||
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
}
|
||||
|
||||
// Set the IsLast bit
|
||||
|
@ -250,11 +251,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Add the new instruction
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
Opcode = AMDGPU::CUBE_r600_real;
|
||||
case R600::CUBE_r600_pseudo:
|
||||
Opcode = R600::CUBE_r600_real;
|
||||
break;
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
Opcode = AMDGPU::CUBE_eg_real;
|
||||
case R600::CUBE_eg_pseudo:
|
||||
Opcode = R600::CUBE_eg_real;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -271,12 +272,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (NotLast) {
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
|
||||
}
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
|
||||
SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
|
||||
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
#include "R600ISelLowering.h"
|
||||
#include "AMDGPUFrameLowering.h"
|
||||
#include "AMDGPUIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600FrameLowering.h"
|
||||
|
@ -51,17 +50,31 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
CCValAssign::LocInfo LocInfo,
|
||||
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
||||
MachineFunction &MF = State.getMachineFunction();
|
||||
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
||||
|
||||
uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
|
||||
ArgFlags.getOrigAlign());
|
||||
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "R600GenCallingConv.inc"
|
||||
|
||||
R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
||||
const R600Subtarget &STI)
|
||||
: AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
|
||||
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
||||
: AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
|
||||
addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
|
||||
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
|
||||
|
||||
computeRegisterProperties(STI.getRegisterInfo());
|
||||
computeRegisterProperties(Subtarget->getRegisterInfo());
|
||||
|
||||
// Legalize loads and stores to the private address space.
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
|
@ -148,6 +161,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
|||
|
||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
|
@ -216,6 +234,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
|
||||
// need it for R600.
|
||||
if (!Subtarget->hasFP32Denormals())
|
||||
setOperationAction(ISD::FMAD, MVT::f32, Legal);
|
||||
|
||||
if (!Subtarget->hasBFI()) {
|
||||
// fcopysign can be done in a single instruction with BFI.
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasBCNT(32))
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
|
||||
|
||||
if (!Subtarget->hasBCNT(64))
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
|
||||
|
||||
if (Subtarget->hasFFBH())
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
if (Subtarget->hasFFBL())
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
|
||||
// need it for R600.
|
||||
if (Subtarget->hasBFE())
|
||||
setHasExtractBitsInsn(true);
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
|
||||
|
@ -246,14 +292,10 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::LOAD);
|
||||
}
|
||||
|
||||
const R600Subtarget *R600TargetLowering::getSubtarget() const {
|
||||
return static_cast<const R600Subtarget *>(Subtarget);
|
||||
}
|
||||
|
||||
static inline bool isEOP(MachineBasicBlock::iterator I) {
|
||||
if (std::next(I) == I->getParent()->end())
|
||||
return false;
|
||||
return std::next(I)->getOpcode() == AMDGPU::RETURN;
|
||||
return std::next(I)->getOpcode() == R600::RETURN;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
|
@ -262,24 +304,24 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
MachineFunction *MF = BB->getParent();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineBasicBlock::iterator I = MI;
|
||||
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
|
||||
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
// Replace LDS_*_RET instruction that don't have any uses with the
|
||||
// equivalent LDS_*_NORET instruction.
|
||||
if (TII->isLDSRetInstr(MI.getOpcode())) {
|
||||
int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
|
||||
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
|
||||
assert(DstIdx != -1);
|
||||
MachineInstrBuilder NewMI;
|
||||
// FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
|
||||
// LDS_1A2D support and remove this special case.
|
||||
if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
|
||||
MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
|
||||
MI.getOpcode() == R600::LDS_CMPST_RET)
|
||||
return BB;
|
||||
|
||||
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
||||
TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
|
||||
TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
|
||||
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
|
||||
NewMI.add(MI.getOperand(i));
|
||||
}
|
||||
|
@ -288,23 +330,23 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::FABS_R600: {
|
||||
case R600::FABS_R600: {
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
||||
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
|
||||
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
|
||||
MI.getOperand(1).getReg());
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::FNEG_R600: {
|
||||
case R600::FNEG_R600: {
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
||||
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
|
||||
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
|
||||
MI.getOperand(1).getReg());
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MASK_WRITE: {
|
||||
case R600::MASK_WRITE: {
|
||||
unsigned maskedRegister = MI.getOperand(0).getReg();
|
||||
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
||||
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
||||
|
@ -312,7 +354,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MOV_IMM_F32:
|
||||
case R600::MOV_IMM_F32:
|
||||
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
|
||||
.getFPImm()
|
||||
->getValueAPF()
|
||||
|
@ -320,39 +362,39 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
.getZExtValue());
|
||||
break;
|
||||
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
case R600::MOV_IMM_I32:
|
||||
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
|
||||
MI.getOperand(1).getImm());
|
||||
break;
|
||||
|
||||
case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
|
||||
case R600::MOV_IMM_GLOBAL_ADDR: {
|
||||
//TODO: Perhaps combine this instruction with the next if possible
|
||||
auto MIB = TII->buildDefaultInstruction(
|
||||
*BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
|
||||
int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
|
||||
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
|
||||
int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
|
||||
//TODO: Ugh this is rather ugly
|
||||
MIB->getOperand(Idx) = MI.getOperand(1);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::CONST_COPY: {
|
||||
case R600::CONST_COPY: {
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
||||
*BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
|
||||
TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
|
||||
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
|
||||
TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
|
||||
MI.getOperand(1).getImm());
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
|
||||
case R600::RAT_WRITE_CACHELESS_32_eg:
|
||||
case R600::RAT_WRITE_CACHELESS_64_eg:
|
||||
case R600::RAT_WRITE_CACHELESS_128_eg:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(1))
|
||||
.addImm(isEOP(I)); // Set End of program bit
|
||||
break;
|
||||
|
||||
case AMDGPU::RAT_STORE_TYPED_eg:
|
||||
case R600::RAT_STORE_TYPED_eg:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(1))
|
||||
|
@ -360,49 +402,49 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
.addImm(isEOP(I)); // Set End of program bit
|
||||
break;
|
||||
|
||||
case AMDGPU::BRANCH:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
||||
case R600::BRANCH:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
|
||||
.add(MI.getOperand(0));
|
||||
break;
|
||||
|
||||
case AMDGPU::BRANCH_COND_f32: {
|
||||
case R600::BRANCH_COND_f32: {
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
|
||||
AMDGPU::PREDICATE_BIT)
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
|
||||
R600::PREDICATE_BIT)
|
||||
.add(MI.getOperand(1))
|
||||
.addImm(AMDGPU::PRED_SETNE)
|
||||
.addImm(R600::PRED_SETNE)
|
||||
.addImm(0); // Flags
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
|
||||
.add(MI.getOperand(0))
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::BRANCH_COND_i32: {
|
||||
case R600::BRANCH_COND_i32: {
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
|
||||
AMDGPU::PREDICATE_BIT)
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
|
||||
R600::PREDICATE_BIT)
|
||||
.add(MI.getOperand(1))
|
||||
.addImm(AMDGPU::PRED_SETNE_INT)
|
||||
.addImm(R600::PRED_SETNE_INT)
|
||||
.addImm(0); // Flags
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
|
||||
.add(MI.getOperand(0))
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::EG_ExportSwz:
|
||||
case AMDGPU::R600_ExportSwz: {
|
||||
case R600::EG_ExportSwz:
|
||||
case R600::R600_ExportSwz: {
|
||||
// Instruction is left unmodified if its not the last one of its type
|
||||
bool isLastInstructionOfItsType = true;
|
||||
unsigned InstExportType = MI.getOperand(1).getImm();
|
||||
for (MachineBasicBlock::iterator NextExportInst = std::next(I),
|
||||
EndBlock = BB->end(); NextExportInst != EndBlock;
|
||||
NextExportInst = std::next(NextExportInst)) {
|
||||
if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
|
||||
NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
|
||||
if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
|
||||
NextExportInst->getOpcode() == R600::R600_ExportSwz) {
|
||||
unsigned CurrentInstExportType = NextExportInst->getOperand(1)
|
||||
.getImm();
|
||||
if (CurrentInstExportType == InstExportType) {
|
||||
|
@ -414,7 +456,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
bool EOP = isEOP(I);
|
||||
if (!EOP && !isLastInstructionOfItsType)
|
||||
return BB;
|
||||
unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
|
||||
unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(1))
|
||||
|
@ -427,7 +469,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
.addImm(EOP);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::RETURN: {
|
||||
case R600::RETURN: {
|
||||
return BB;
|
||||
}
|
||||
}
|
||||
|
@ -583,23 +625,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|||
return LowerImplicitParameter(DAG, VT, DL, 8);
|
||||
|
||||
case Intrinsic::r600_read_tgid_x:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_X, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_X, VT);
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Y, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_Y, VT);
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Z, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T1_Z, VT);
|
||||
case Intrinsic::r600_read_tidig_x:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_X, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_X, VT);
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Y, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_Y, VT);
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Z, VT);
|
||||
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
||||
R600::T0_Z, VT);
|
||||
|
||||
case Intrinsic::r600_recipsqrt_ieee:
|
||||
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
|
||||
|
@ -1521,7 +1563,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
|
||||
const R600FrameLowering *TFL = Subtarget->getFrameLowering();
|
||||
|
||||
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
|
||||
|
||||
|
@ -1533,6 +1575,28 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
|
|||
Op.getValueType());
|
||||
}
|
||||
|
||||
CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
|
||||
bool IsVarArg) const {
|
||||
switch (CC) {
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
case CallingConv::SPIR_KERNEL:
|
||||
case CallingConv::C:
|
||||
case CallingConv::Fast:
|
||||
case CallingConv::Cold:
|
||||
return CC_R600_Kernel;
|
||||
case CallingConv::AMDGPU_VS:
|
||||
case CallingConv::AMDGPU_GS:
|
||||
case CallingConv::AMDGPU_PS:
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_HS:
|
||||
case CallingConv::AMDGPU_ES:
|
||||
case CallingConv::AMDGPU_LS:
|
||||
return CC_R600;
|
||||
default:
|
||||
report_fatal_error("Unsupported calling convention.");
|
||||
}
|
||||
}
|
||||
|
||||
/// XXX Only kernel functions are supported, so we can assume for now that
|
||||
/// every function is a kernel function, but in the future we should use
|
||||
/// separate calling conventions for kernel and non-kernel functions.
|
||||
|
@ -1563,7 +1627,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||
}
|
||||
|
||||
if (AMDGPU::isShader(CallConv)) {
|
||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
|
||||
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
||||
InVals.push_back(Register);
|
||||
continue;
|
||||
|
@ -1594,7 +1658,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
|||
|
||||
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
||||
unsigned PartOffset = VA.getLocMemOffset();
|
||||
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
|
||||
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
|
||||
VA.getLocMemOffset();
|
||||
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||
|
@ -1981,26 +2045,26 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
SDValue &Src, SDValue &Neg, SDValue &Abs,
|
||||
SDValue &Sel, SDValue &Imm,
|
||||
SelectionDAG &DAG) const {
|
||||
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
|
||||
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
||||
if (!Src.isMachineOpcode())
|
||||
return false;
|
||||
|
||||
switch (Src.getMachineOpcode()) {
|
||||
case AMDGPU::FNEG_R600:
|
||||
case R600::FNEG_R600:
|
||||
if (!Neg.getNode())
|
||||
return false;
|
||||
Src = Src.getOperand(0);
|
||||
Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
|
||||
return true;
|
||||
case AMDGPU::FABS_R600:
|
||||
case R600::FABS_R600:
|
||||
if (!Abs.getNode())
|
||||
return false;
|
||||
Src = Src.getOperand(0);
|
||||
Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
|
||||
return true;
|
||||
case AMDGPU::CONST_COPY: {
|
||||
case R600::CONST_COPY: {
|
||||
unsigned Opcode = ParentNode->getMachineOpcode();
|
||||
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
||||
|
||||
if (!Sel.getNode())
|
||||
return false;
|
||||
|
@ -2011,17 +2075,17 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
|
||||
// Gather constants values
|
||||
int SrcIndices[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src2),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
|
||||
};
|
||||
std::vector<unsigned> Consts;
|
||||
for (int OtherSrcIdx : SrcIndices) {
|
||||
|
@ -2034,7 +2098,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
}
|
||||
if (RegisterSDNode *Reg =
|
||||
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
|
||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||
if (Reg->getReg() == R600::ALU_CONST) {
|
||||
ConstantSDNode *Cst
|
||||
= cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
|
@ -2049,30 +2113,30 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
}
|
||||
|
||||
Sel = CstOffset;
|
||||
Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
|
||||
return true;
|
||||
}
|
||||
case AMDGPU::MOV_IMM_GLOBAL_ADDR:
|
||||
case R600::MOV_IMM_GLOBAL_ADDR:
|
||||
// Check if the Imm slot is used. Taken from below.
|
||||
if (cast<ConstantSDNode>(Imm)->getZExtValue())
|
||||
return false;
|
||||
Imm = Src.getOperand(0);
|
||||
Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
|
||||
Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
|
||||
return true;
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
case AMDGPU::MOV_IMM_F32: {
|
||||
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
|
||||
case R600::MOV_IMM_I32:
|
||||
case R600::MOV_IMM_F32: {
|
||||
unsigned ImmReg = R600::ALU_LITERAL_X;
|
||||
uint64_t ImmValue = 0;
|
||||
|
||||
if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
|
||||
if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
|
||||
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
|
||||
float FloatValue = FPC->getValueAPF().convertToFloat();
|
||||
if (FloatValue == 0.0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
ImmReg = R600::ZERO;
|
||||
} else if (FloatValue == 0.5) {
|
||||
ImmReg = AMDGPU::HALF;
|
||||
ImmReg = R600::HALF;
|
||||
} else if (FloatValue == 1.0) {
|
||||
ImmReg = AMDGPU::ONE;
|
||||
ImmReg = R600::ONE;
|
||||
} else {
|
||||
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
|
||||
}
|
||||
|
@ -2080,9 +2144,9 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
|
||||
uint64_t Value = C->getZExtValue();
|
||||
if (Value == 0) {
|
||||
ImmReg = AMDGPU::ZERO;
|
||||
ImmReg = R600::ZERO;
|
||||
} else if (Value == 1) {
|
||||
ImmReg = AMDGPU::ONE_INT;
|
||||
ImmReg = R600::ONE_INT;
|
||||
} else {
|
||||
ImmValue = Value;
|
||||
}
|
||||
|
@ -2091,7 +2155,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
// Check that we aren't already using an immediate.
|
||||
// XXX: It's possible for an instruction to have more than one
|
||||
// immediate operand, but this is not supported yet.
|
||||
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
|
||||
if (ImmReg == R600::ALU_LITERAL_X) {
|
||||
if (!Imm.getNode())
|
||||
return false;
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
|
||||
|
@ -2111,7 +2175,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|||
/// Fold the instructions after selecting them
|
||||
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
SelectionDAG &DAG) const {
|
||||
const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
|
||||
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
||||
if (!Node->isMachineOpcode())
|
||||
return Node;
|
||||
|
||||
|
@ -2120,36 +2184,36 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
|
||||
std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
|
||||
|
||||
if (Opcode == AMDGPU::DOT_4) {
|
||||
if (Opcode == R600::DOT_4) {
|
||||
int OperandIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
|
||||
};
|
||||
int NegIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
|
||||
};
|
||||
int AbsIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
|
||||
};
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
if (OperandIdx[i] < 0)
|
||||
|
@ -2157,7 +2221,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
||||
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
||||
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
||||
if (HasDst)
|
||||
SelIdx--;
|
||||
|
@ -2165,7 +2229,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
} else if (Opcode == AMDGPU::REG_SEQUENCE) {
|
||||
} else if (Opcode == R600::REG_SEQUENCE) {
|
||||
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
|
||||
SDValue &Src = Ops[i];
|
||||
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
|
||||
|
@ -2175,18 +2239,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
if (!TII->hasInstrModifiers(Opcode))
|
||||
return Node;
|
||||
int OperandIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src2)
|
||||
};
|
||||
int NegIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
|
||||
};
|
||||
int AbsIdx[] = {
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
|
||||
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
|
||||
TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
|
||||
-1
|
||||
};
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
|
@ -2196,9 +2260,9 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||
SDValue FakeAbs;
|
||||
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
||||
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
|
||||
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
||||
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
||||
int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
|
||||
int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
|
||||
if (HasDst) {
|
||||
SelIdx--;
|
||||
ImmIdx--;
|
||||
|
|
|
@ -23,6 +23,8 @@ class R600InstrInfo;
|
|||
class R600Subtarget;
|
||||
|
||||
class R600TargetLowering final : public AMDGPUTargetLowering {
|
||||
|
||||
const R600Subtarget *Subtarget;
|
||||
public:
|
||||
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
|
||||
|
||||
|
@ -36,6 +38,7 @@ public:
|
|||
void ReplaceNodeResults(SDNode * N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const override;
|
||||
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
|
||||
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
|
|
|
@ -41,7 +41,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
|
|||
bit LDS_1A2D = 0;
|
||||
|
||||
let SubtargetPredicate = isR600toCayman;
|
||||
let Namespace = "AMDGPU";
|
||||
let Namespace = "R600";
|
||||
let OutOperandList = outs;
|
||||
let InOperandList = ins;
|
||||
let AsmString = asm;
|
||||
|
|
|
@ -45,10 +45,15 @@
|
|||
using namespace llvm;
|
||||
|
||||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#include "AMDGPUGenDFAPacketizer.inc"
|
||||
#include "R600GenDFAPacketizer.inc"
|
||||
|
||||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#define GET_INSTRMAP_INFO
|
||||
#define GET_INSTRINFO_NAMED_OPS
|
||||
#include "R600GenInstrInfo.inc"
|
||||
|
||||
R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
|
||||
: AMDGPUInstrInfo(ST), RI(), ST(ST) {}
|
||||
: R600GenInstrInfo(-1, -1), RI(), ST(ST) {}
|
||||
|
||||
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
|
||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
|
||||
|
@ -59,31 +64,31 @@ void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
const DebugLoc &DL, unsigned DestReg,
|
||||
unsigned SrcReg, bool KillSrc) const {
|
||||
unsigned VectorComponents = 0;
|
||||
if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
|
||||
AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
|
||||
(AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
|
||||
AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
|
||||
if ((R600::R600_Reg128RegClass.contains(DestReg) ||
|
||||
R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
|
||||
(R600::R600_Reg128RegClass.contains(SrcReg) ||
|
||||
R600::R600_Reg128VerticalRegClass.contains(SrcReg))) {
|
||||
VectorComponents = 4;
|
||||
} else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
|
||||
AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
|
||||
(AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
|
||||
AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
|
||||
} else if((R600::R600_Reg64RegClass.contains(DestReg) ||
|
||||
R600::R600_Reg64VerticalRegClass.contains(DestReg)) &&
|
||||
(R600::R600_Reg64RegClass.contains(SrcReg) ||
|
||||
R600::R600_Reg64VerticalRegClass.contains(SrcReg))) {
|
||||
VectorComponents = 2;
|
||||
}
|
||||
|
||||
if (VectorComponents > 0) {
|
||||
for (unsigned I = 0; I < VectorComponents; I++) {
|
||||
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
|
||||
buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
|
||||
buildDefaultInstruction(MBB, MI, R600::MOV,
|
||||
RI.getSubReg(DestReg, SubRegIndex),
|
||||
RI.getSubReg(SrcReg, SubRegIndex))
|
||||
.addReg(DestReg,
|
||||
RegState::Define | RegState::Implicit);
|
||||
}
|
||||
} else {
|
||||
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
|
||||
MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV,
|
||||
DestReg, SrcReg);
|
||||
NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
|
||||
NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0))
|
||||
.setIsKill(KillSrc);
|
||||
}
|
||||
}
|
||||
|
@ -104,9 +109,9 @@ bool R600InstrInfo::isMov(unsigned Opcode) const {
|
|||
switch(Opcode) {
|
||||
default:
|
||||
return false;
|
||||
case AMDGPU::MOV:
|
||||
case AMDGPU::MOV_IMM_F32:
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
case R600::MOV:
|
||||
case R600::MOV_IMM_F32:
|
||||
case R600::MOV_IMM_I32:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -118,10 +123,10 @@ bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
|
|||
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
case AMDGPU::CUBE_r600_real:
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
case AMDGPU::CUBE_eg_real:
|
||||
case R600::CUBE_r600_pseudo:
|
||||
case R600::CUBE_r600_real:
|
||||
case R600::CUBE_eg_pseudo:
|
||||
case R600::CUBE_eg_real:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -149,7 +154,7 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
|
|||
}
|
||||
|
||||
bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
|
||||
return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
|
||||
return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
|
||||
|
@ -158,12 +163,12 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
|
|||
if (isVector(MI) || isCubeOp(MI.getOpcode()))
|
||||
return true;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::PRED_X:
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::COPY:
|
||||
case AMDGPU::DOT_4:
|
||||
case R600::PRED_X:
|
||||
case R600::INTERP_PAIR_XY:
|
||||
case R600::INTERP_PAIR_ZW:
|
||||
case R600::INTERP_VEC_LOAD:
|
||||
case R600::COPY:
|
||||
case R600::DOT_4:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -173,7 +178,7 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
|
|||
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
|
||||
if (ST.hasCaymanISA())
|
||||
return false;
|
||||
return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
|
||||
return (get(Opcode).getSchedClass() == R600::Sched::TransALU);
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
|
||||
|
@ -181,7 +186,7 @@ bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
|
|||
}
|
||||
|
||||
bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
|
||||
return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
|
||||
return (get(Opcode).getSchedClass() == R600::Sched::VecALU);
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
|
||||
|
@ -215,8 +220,8 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
|
|||
|
||||
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::KILLGT:
|
||||
case AMDGPU::GROUP_BARRIER:
|
||||
case R600::KILLGT:
|
||||
case R600::GROUP_BARRIER:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -224,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
|
|||
}
|
||||
|
||||
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
|
||||
return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
|
||||
return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
|
||||
return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
|
||||
return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
|
||||
|
@ -242,7 +247,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
|
|||
TargetRegisterInfo::isVirtualRegister(I->getReg()))
|
||||
continue;
|
||||
|
||||
if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
|
||||
if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -250,17 +255,17 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
|
|||
|
||||
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
|
||||
static const unsigned SrcSelTable[][2] = {
|
||||
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
|
||||
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
|
||||
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
|
||||
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
|
||||
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
|
||||
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
|
||||
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
|
||||
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
|
||||
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
|
||||
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
|
||||
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
|
||||
{R600::OpName::src0, R600::OpName::src0_sel},
|
||||
{R600::OpName::src1, R600::OpName::src1_sel},
|
||||
{R600::OpName::src2, R600::OpName::src2_sel},
|
||||
{R600::OpName::src0_X, R600::OpName::src0_sel_X},
|
||||
{R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
|
||||
{R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
|
||||
{R600::OpName::src0_W, R600::OpName::src0_sel_W},
|
||||
{R600::OpName::src1_X, R600::OpName::src1_sel_X},
|
||||
{R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
|
||||
{R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
|
||||
{R600::OpName::src1_W, R600::OpName::src1_sel_W}
|
||||
};
|
||||
|
||||
for (const auto &Row : SrcSelTable) {
|
||||
|
@ -275,23 +280,23 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>
|
|||
R600InstrInfo::getSrcs(MachineInstr &MI) const {
|
||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::DOT_4) {
|
||||
if (MI.getOpcode() == R600::DOT_4) {
|
||||
static const unsigned OpTable[8][2] = {
|
||||
{AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
|
||||
{AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
|
||||
{AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
|
||||
{AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
|
||||
{AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
|
||||
{AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
|
||||
{AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
|
||||
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
|
||||
{R600::OpName::src0_X, R600::OpName::src0_sel_X},
|
||||
{R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
|
||||
{R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
|
||||
{R600::OpName::src0_W, R600::OpName::src0_sel_W},
|
||||
{R600::OpName::src1_X, R600::OpName::src1_sel_X},
|
||||
{R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
|
||||
{R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
|
||||
{R600::OpName::src1_W, R600::OpName::src1_sel_W},
|
||||
};
|
||||
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
MachineOperand &MO =
|
||||
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Reg == AMDGPU::ALU_CONST) {
|
||||
if (Reg == R600::ALU_CONST) {
|
||||
MachineOperand &Sel =
|
||||
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
|
||||
Result.push_back(std::make_pair(&MO, Sel.getImm()));
|
||||
|
@ -303,9 +308,9 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
|
|||
}
|
||||
|
||||
static const unsigned OpTable[3][2] = {
|
||||
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
|
||||
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
|
||||
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
|
||||
{R600::OpName::src0, R600::OpName::src0_sel},
|
||||
{R600::OpName::src1, R600::OpName::src1_sel},
|
||||
{R600::OpName::src2, R600::OpName::src2_sel},
|
||||
};
|
||||
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
|
@ -314,15 +319,15 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {
|
|||
break;
|
||||
MachineOperand &MO = MI.getOperand(SrcIdx);
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Reg == AMDGPU::ALU_CONST) {
|
||||
if (Reg == R600::ALU_CONST) {
|
||||
MachineOperand &Sel =
|
||||
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
|
||||
Result.push_back(std::make_pair(&MO, Sel.getImm()));
|
||||
continue;
|
||||
}
|
||||
if (Reg == AMDGPU::ALU_LITERAL_X) {
|
||||
if (Reg == R600::ALU_LITERAL_X) {
|
||||
MachineOperand &Operand =
|
||||
MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
|
||||
MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal));
|
||||
if (Operand.isImm()) {
|
||||
Result.push_back(std::make_pair(&MO, Operand.getImm()));
|
||||
continue;
|
||||
|
@ -346,7 +351,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
|
|||
++i;
|
||||
unsigned Reg = Src.first->getReg();
|
||||
int Index = RI.getEncodingValue(Reg) & 0xff;
|
||||
if (Reg == AMDGPU::OQAP) {
|
||||
if (Reg == R600::OQAP) {
|
||||
Result.push_back(std::make_pair(Index, 0U));
|
||||
}
|
||||
if (PV.find(Reg) != PV.end()) {
|
||||
|
@ -436,7 +441,7 @@ unsigned R600InstrInfo::isLegalUpTo(
|
|||
const std::pair<int, unsigned> &Src = Srcs[j];
|
||||
if (Src.first < 0 || Src.first == 255)
|
||||
continue;
|
||||
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
|
||||
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) {
|
||||
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
|
||||
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
|
||||
// The value from output queue A (denoted by register OQAP) can
|
||||
|
@ -542,7 +547,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
|
|||
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
|
||||
IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
|
||||
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
R600::OpName::bank_swizzle);
|
||||
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
|
||||
IG[i]->getOperand(Op).getImm());
|
||||
}
|
||||
|
@ -611,14 +616,14 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
|
|||
continue;
|
||||
|
||||
for (const auto &Src : getSrcs(MI)) {
|
||||
if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
|
||||
if (Src.first->getReg() == R600::ALU_LITERAL_X)
|
||||
Literals.insert(Src.second);
|
||||
if (Literals.size() > 4)
|
||||
return false;
|
||||
if (Src.first->getReg() == AMDGPU::ALU_CONST)
|
||||
if (Src.first->getReg() == R600::ALU_CONST)
|
||||
Consts.push_back(Src.second);
|
||||
if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
|
||||
AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
|
||||
if (R600::R600_KC0RegClass.contains(Src.first->getReg()) ||
|
||||
R600::R600_KC1RegClass.contains(Src.first->getReg())) {
|
||||
unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
|
||||
unsigned Chan = RI.getHWRegChan(Src.first->getReg());
|
||||
Consts.push_back((Index << 2) | Chan);
|
||||
|
@ -637,7 +642,7 @@ R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
|
|||
static bool
|
||||
isPredicateSetter(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::PRED_X:
|
||||
case R600::PRED_X:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -659,12 +664,12 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
|
|||
|
||||
static
|
||||
bool isJump(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
|
||||
return Opcode == R600::JUMP || Opcode == R600::JUMP_COND;
|
||||
}
|
||||
|
||||
static bool isBranch(unsigned Opcode) {
|
||||
return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
|
||||
Opcode == AMDGPU::BRANCH_COND_f32;
|
||||
return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 ||
|
||||
Opcode == R600::BRANCH_COND_f32;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
||||
|
@ -679,7 +684,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
if (I == MBB.end())
|
||||
return false;
|
||||
|
||||
// AMDGPU::BRANCH* instructions are only available after isel and are not
|
||||
// R600::BRANCH* instructions are only available after isel and are not
|
||||
// handled
|
||||
if (isBranch(I->getOpcode()))
|
||||
return true;
|
||||
|
@ -688,7 +693,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
}
|
||||
|
||||
// Remove successive JUMP
|
||||
while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
|
||||
while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) {
|
||||
MachineBasicBlock::iterator PriorI = std::prev(I);
|
||||
if (AllowModify)
|
||||
I->removeFromParent();
|
||||
|
@ -699,10 +704,10 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
// If there is only one terminator instruction, process it.
|
||||
unsigned LastOpc = LastInst.getOpcode();
|
||||
if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
|
||||
if (LastOpc == AMDGPU::JUMP) {
|
||||
if (LastOpc == R600::JUMP) {
|
||||
TBB = LastInst.getOperand(0).getMBB();
|
||||
return false;
|
||||
} else if (LastOpc == AMDGPU::JUMP_COND) {
|
||||
} else if (LastOpc == R600::JUMP_COND) {
|
||||
auto predSet = I;
|
||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
||||
predSet = --I;
|
||||
|
@ -710,7 +715,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
TBB = LastInst.getOperand(0).getMBB();
|
||||
Cond.push_back(predSet->getOperand(1));
|
||||
Cond.push_back(predSet->getOperand(2));
|
||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
||||
Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
|
||||
return false;
|
||||
}
|
||||
return true; // Can't handle indirect branch.
|
||||
|
@ -721,7 +726,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
unsigned SecondLastOpc = SecondLastInst.getOpcode();
|
||||
|
||||
// If the block ends with a B and a Bcc, handle it.
|
||||
if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
|
||||
if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) {
|
||||
auto predSet = --I;
|
||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
||||
predSet = --I;
|
||||
|
@ -730,7 +735,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
|||
FBB = LastInst.getOperand(0).getMBB();
|
||||
Cond.push_back(predSet->getOperand(1));
|
||||
Cond.push_back(predSet->getOperand(2));
|
||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
||||
Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -742,8 +747,8 @@ static
|
|||
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
|
||||
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
|
||||
It != E; ++It) {
|
||||
if (It->getOpcode() == AMDGPU::CF_ALU ||
|
||||
It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
|
||||
if (It->getOpcode() == R600::CF_ALU ||
|
||||
It->getOpcode() == R600::CF_ALU_PUSH_BEFORE)
|
||||
return It.getReverse();
|
||||
}
|
||||
return MBB.end();
|
||||
|
@ -760,7 +765,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||
|
||||
if (!FBB) {
|
||||
if (Cond.empty()) {
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
|
||||
BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB);
|
||||
return 1;
|
||||
} else {
|
||||
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
|
||||
|
@ -768,14 +773,14 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||
addFlag(*PredSet, 0, MO_FLAG_PUSH);
|
||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
||||
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
|
||||
BuildMI(&MBB, DL, get(R600::JUMP_COND))
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
return 1;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
|
||||
assert (CfAlu->getOpcode() == R600::CF_ALU);
|
||||
CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
|
@ -783,15 +788,15 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||
assert(PredSet && "No previous predicate !");
|
||||
addFlag(*PredSet, 0, MO_FLAG_PUSH);
|
||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
|
||||
BuildMI(&MBB, DL, get(R600::JUMP_COND))
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
|
||||
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
||||
BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB);
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
return 2;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
|
||||
assert (CfAlu->getOpcode() == R600::CF_ALU);
|
||||
CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
@ -812,18 +817,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
|
|||
switch (I->getOpcode()) {
|
||||
default:
|
||||
return 0;
|
||||
case AMDGPU::JUMP_COND: {
|
||||
case R600::JUMP_COND: {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(*predSet, 0, MO_FLAG_PUSH);
|
||||
I->eraseFromParent();
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
break;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU));
|
||||
assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(R600::CF_ALU));
|
||||
break;
|
||||
}
|
||||
case AMDGPU::JUMP:
|
||||
case R600::JUMP:
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
|
@ -837,18 +842,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
|
|||
// FIXME: only one case??
|
||||
default:
|
||||
return 1;
|
||||
case AMDGPU::JUMP_COND: {
|
||||
case R600::JUMP_COND: {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(*predSet, 0, MO_FLAG_PUSH);
|
||||
I->eraseFromParent();
|
||||
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
|
||||
if (CfAlu == MBB.end())
|
||||
break;
|
||||
assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(AMDGPU::CF_ALU));
|
||||
assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
|
||||
CfAlu->setDesc(get(R600::CF_ALU));
|
||||
break;
|
||||
}
|
||||
case AMDGPU::JUMP:
|
||||
case R600::JUMP:
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
|
@ -863,9 +868,9 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
|
|||
unsigned Reg = MI.getOperand(idx).getReg();
|
||||
switch (Reg) {
|
||||
default: return false;
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
case R600::PRED_SEL_ONE:
|
||||
case R600::PRED_SEL_ZERO:
|
||||
case R600::PREDICATE_BIT:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -876,9 +881,9 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
|
|||
// be predicated. Until we have proper support for instruction clauses in the
|
||||
// backend, we will mark KILL* instructions as unpredicable.
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::KILLGT) {
|
||||
if (MI.getOpcode() == R600::KILLGT) {
|
||||
return false;
|
||||
} else if (MI.getOpcode() == AMDGPU::CF_ALU) {
|
||||
} else if (MI.getOpcode() == R600::CF_ALU) {
|
||||
// If the clause start in the middle of MBB then the MBB has more
|
||||
// than a single clause, unable to predicate several clauses.
|
||||
if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
|
||||
|
@ -888,7 +893,7 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
|
|||
} else if (isVector(MI)) {
|
||||
return false;
|
||||
} else {
|
||||
return AMDGPUInstrInfo::isPredicable(MI);
|
||||
return TargetInstrInfo::isPredicable(MI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -929,17 +934,17 @@ bool
|
|||
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
|
||||
MachineOperand &MO = Cond[1];
|
||||
switch (MO.getImm()) {
|
||||
case AMDGPU::PRED_SETE_INT:
|
||||
MO.setImm(AMDGPU::PRED_SETNE_INT);
|
||||
case R600::PRED_SETE_INT:
|
||||
MO.setImm(R600::PRED_SETNE_INT);
|
||||
break;
|
||||
case AMDGPU::PRED_SETNE_INT:
|
||||
MO.setImm(AMDGPU::PRED_SETE_INT);
|
||||
case R600::PRED_SETNE_INT:
|
||||
MO.setImm(R600::PRED_SETE_INT);
|
||||
break;
|
||||
case AMDGPU::PRED_SETE:
|
||||
MO.setImm(AMDGPU::PRED_SETNE);
|
||||
case R600::PRED_SETE:
|
||||
MO.setImm(R600::PRED_SETNE);
|
||||
break;
|
||||
case AMDGPU::PRED_SETNE:
|
||||
MO.setImm(AMDGPU::PRED_SETE);
|
||||
case R600::PRED_SETNE:
|
||||
MO.setImm(R600::PRED_SETE);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
|
@ -947,11 +952,11 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) con
|
|||
|
||||
MachineOperand &MO2 = Cond[2];
|
||||
switch (MO2.getReg()) {
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
MO2.setReg(AMDGPU::PRED_SEL_ONE);
|
||||
case R600::PRED_SEL_ZERO:
|
||||
MO2.setReg(R600::PRED_SEL_ONE);
|
||||
break;
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
|
||||
case R600::PRED_SEL_ONE:
|
||||
MO2.setReg(R600::PRED_SEL_ZERO);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
|
@ -968,22 +973,22 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
|
|||
ArrayRef<MachineOperand> Pred) const {
|
||||
int PIdx = MI.findFirstPredOperandIdx();
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::CF_ALU) {
|
||||
if (MI.getOpcode() == R600::CF_ALU) {
|
||||
MI.getOperand(8).setImm(0);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::DOT_4) {
|
||||
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
|
||||
if (MI.getOpcode() == R600::DOT_4) {
|
||||
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X))
|
||||
.setReg(Pred[2].getReg());
|
||||
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
|
||||
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y))
|
||||
.setReg(Pred[2].getReg());
|
||||
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
|
||||
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z))
|
||||
.setReg(Pred[2].getReg());
|
||||
MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
|
||||
MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W))
|
||||
.setReg(Pred[2].getReg());
|
||||
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
|
||||
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
|
||||
MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -991,7 +996,7 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
|
|||
MachineOperand &PMO = MI.getOperand(PIdx);
|
||||
PMO.setReg(Pred[2].getReg());
|
||||
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
|
||||
MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
|
||||
MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1021,20 +1026,20 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
default: {
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
int OffsetOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
|
||||
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr);
|
||||
// addr is a custom operand with multiple MI operands, and only the
|
||||
// first MI operand is given a name.
|
||||
int RegOpIdx = OffsetOpIdx + 1;
|
||||
int ChanOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
|
||||
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan);
|
||||
if (isRegisterLoad(MI)) {
|
||||
int DstOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
|
||||
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst);
|
||||
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
|
||||
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
|
||||
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
|
||||
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
|
||||
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
|
||||
buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
|
||||
getIndirectAddrRegClass()->getRegister(Address));
|
||||
} else {
|
||||
|
@ -1043,12 +1048,12 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
}
|
||||
} else if (isRegisterStore(MI)) {
|
||||
int ValOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
|
||||
R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val);
|
||||
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
|
||||
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
|
||||
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
|
||||
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
|
||||
if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
|
||||
if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
|
||||
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
|
||||
MI.getOperand(ValOpIdx).getReg());
|
||||
} else {
|
||||
|
@ -1063,15 +1068,15 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
MBB->erase(MI);
|
||||
return true;
|
||||
}
|
||||
case AMDGPU::R600_EXTRACT_ELT_V2:
|
||||
case AMDGPU::R600_EXTRACT_ELT_V4:
|
||||
case R600::R600_EXTRACT_ELT_V2:
|
||||
case R600::R600_EXTRACT_ELT_V4:
|
||||
buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
|
||||
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
|
||||
MI.getOperand(2).getReg(),
|
||||
RI.getHWRegChan(MI.getOperand(1).getReg()));
|
||||
break;
|
||||
case AMDGPU::R600_INSERT_ELT_V2:
|
||||
case AMDGPU::R600_INSERT_ELT_V4:
|
||||
case R600::R600_INSERT_ELT_V2:
|
||||
case R600::R600_INSERT_ELT_V4:
|
||||
buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
|
||||
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
|
||||
MI.getOperand(3).getReg(), // Offset
|
||||
|
@ -1096,14 +1101,14 @@ void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
|
|||
|
||||
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
|
||||
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
|
||||
unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
|
||||
TRI.reserveRegisterTuples(Reserved, Reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
|
||||
return &AMDGPU::R600_TReg32_XRegClass;
|
||||
return &R600::R600_TReg32_XRegClass;
|
||||
}
|
||||
|
||||
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
|
||||
|
@ -1121,20 +1126,20 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
|
|||
unsigned AddrReg;
|
||||
switch (AddrChan) {
|
||||
default: llvm_unreachable("Invalid Channel");
|
||||
case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
|
||||
case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
|
||||
case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
|
||||
case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
|
||||
case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
|
||||
case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
|
||||
case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
|
||||
case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
|
||||
}
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
|
||||
AMDGPU::AR_X, OffsetReg);
|
||||
setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
|
||||
R600::AR_X, OffsetReg);
|
||||
setImmOperand(*MOVA, R600::OpName::write, 0);
|
||||
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
|
||||
AddrReg, ValueReg)
|
||||
.addReg(AMDGPU::AR_X,
|
||||
.addReg(R600::AR_X,
|
||||
RegState::Implicit | RegState::Kill);
|
||||
setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
|
||||
setImmOperand(*Mov, R600::OpName::dst_rel, 1);
|
||||
return Mov;
|
||||
}
|
||||
|
||||
|
@ -1153,21 +1158,21 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
|
|||
unsigned AddrReg;
|
||||
switch (AddrChan) {
|
||||
default: llvm_unreachable("Invalid Channel");
|
||||
case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
|
||||
case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
|
||||
case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
|
||||
case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
|
||||
case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
|
||||
case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
|
||||
case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
|
||||
case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
|
||||
}
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
|
||||
AMDGPU::AR_X,
|
||||
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
|
||||
R600::AR_X,
|
||||
OffsetReg);
|
||||
setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
|
||||
setImmOperand(*MOVA, R600::OpName::write, 0);
|
||||
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
|
||||
ValueReg,
|
||||
AddrReg)
|
||||
.addReg(AMDGPU::AR_X,
|
||||
.addReg(R600::AR_X,
|
||||
RegState::Implicit | RegState::Kill);
|
||||
setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
|
||||
setImmOperand(*Mov, R600::OpName::src0_rel, 1);
|
||||
|
||||
return Mov;
|
||||
}
|
||||
|
@ -1265,7 +1270,7 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
|
|||
//XXX: The r600g finalizer expects this to be 1, once we've moved the
|
||||
//scheduling to the backend, we can change the default to 0.
|
||||
MIB.addImm(1) // $last
|
||||
.addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
|
||||
.addReg(R600::PRED_SEL_OFF) // $pred_sel
|
||||
.addImm(0) // $literal
|
||||
.addImm(0); // $bank_swizzle
|
||||
|
||||
|
@ -1286,23 +1291,23 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
|
|||
|
||||
static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
|
||||
switch (Op) {
|
||||
OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
|
||||
OPERAND_CASE(AMDGPU::OpName::update_pred)
|
||||
OPERAND_CASE(AMDGPU::OpName::write)
|
||||
OPERAND_CASE(AMDGPU::OpName::omod)
|
||||
OPERAND_CASE(AMDGPU::OpName::dst_rel)
|
||||
OPERAND_CASE(AMDGPU::OpName::clamp)
|
||||
OPERAND_CASE(AMDGPU::OpName::src0)
|
||||
OPERAND_CASE(AMDGPU::OpName::src0_neg)
|
||||
OPERAND_CASE(AMDGPU::OpName::src0_rel)
|
||||
OPERAND_CASE(AMDGPU::OpName::src0_abs)
|
||||
OPERAND_CASE(AMDGPU::OpName::src0_sel)
|
||||
OPERAND_CASE(AMDGPU::OpName::src1)
|
||||
OPERAND_CASE(AMDGPU::OpName::src1_neg)
|
||||
OPERAND_CASE(AMDGPU::OpName::src1_rel)
|
||||
OPERAND_CASE(AMDGPU::OpName::src1_abs)
|
||||
OPERAND_CASE(AMDGPU::OpName::src1_sel)
|
||||
OPERAND_CASE(AMDGPU::OpName::pred_sel)
|
||||
OPERAND_CASE(R600::OpName::update_exec_mask)
|
||||
OPERAND_CASE(R600::OpName::update_pred)
|
||||
OPERAND_CASE(R600::OpName::write)
|
||||
OPERAND_CASE(R600::OpName::omod)
|
||||
OPERAND_CASE(R600::OpName::dst_rel)
|
||||
OPERAND_CASE(R600::OpName::clamp)
|
||||
OPERAND_CASE(R600::OpName::src0)
|
||||
OPERAND_CASE(R600::OpName::src0_neg)
|
||||
OPERAND_CASE(R600::OpName::src0_rel)
|
||||
OPERAND_CASE(R600::OpName::src0_abs)
|
||||
OPERAND_CASE(R600::OpName::src0_sel)
|
||||
OPERAND_CASE(R600::OpName::src1)
|
||||
OPERAND_CASE(R600::OpName::src1_neg)
|
||||
OPERAND_CASE(R600::OpName::src1_rel)
|
||||
OPERAND_CASE(R600::OpName::src1_abs)
|
||||
OPERAND_CASE(R600::OpName::src1_sel)
|
||||
OPERAND_CASE(R600::OpName::pred_sel)
|
||||
default:
|
||||
llvm_unreachable("Wrong Operand");
|
||||
}
|
||||
|
@ -1313,39 +1318,39 @@ static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
|
|||
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
|
||||
MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
|
||||
const {
|
||||
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
|
||||
assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");
|
||||
unsigned Opcode;
|
||||
if (ST.getGeneration() <= R600Subtarget::R700)
|
||||
Opcode = AMDGPU::DOT4_r600;
|
||||
Opcode = R600::DOT4_r600;
|
||||
else
|
||||
Opcode = AMDGPU::DOT4_eg;
|
||||
Opcode = R600::DOT4_eg;
|
||||
MachineBasicBlock::iterator I = MI;
|
||||
MachineOperand &Src0 = MI->getOperand(
|
||||
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
|
||||
getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot)));
|
||||
MachineOperand &Src1 = MI->getOperand(
|
||||
getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
|
||||
getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot)));
|
||||
MachineInstr *MIB = buildDefaultInstruction(
|
||||
MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
|
||||
static const unsigned Operands[14] = {
|
||||
AMDGPU::OpName::update_exec_mask,
|
||||
AMDGPU::OpName::update_pred,
|
||||
AMDGPU::OpName::write,
|
||||
AMDGPU::OpName::omod,
|
||||
AMDGPU::OpName::dst_rel,
|
||||
AMDGPU::OpName::clamp,
|
||||
AMDGPU::OpName::src0_neg,
|
||||
AMDGPU::OpName::src0_rel,
|
||||
AMDGPU::OpName::src0_abs,
|
||||
AMDGPU::OpName::src0_sel,
|
||||
AMDGPU::OpName::src1_neg,
|
||||
AMDGPU::OpName::src1_rel,
|
||||
AMDGPU::OpName::src1_abs,
|
||||
AMDGPU::OpName::src1_sel,
|
||||
R600::OpName::update_exec_mask,
|
||||
R600::OpName::update_pred,
|
||||
R600::OpName::write,
|
||||
R600::OpName::omod,
|
||||
R600::OpName::dst_rel,
|
||||
R600::OpName::clamp,
|
||||
R600::OpName::src0_neg,
|
||||
R600::OpName::src0_rel,
|
||||
R600::OpName::src0_abs,
|
||||
R600::OpName::src0_sel,
|
||||
R600::OpName::src1_neg,
|
||||
R600::OpName::src1_rel,
|
||||
R600::OpName::src1_abs,
|
||||
R600::OpName::src1_sel,
|
||||
};
|
||||
|
||||
MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
|
||||
getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
|
||||
MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
|
||||
getSlotedOps(R600::OpName::pred_sel, Slot)));
|
||||
MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))
|
||||
.setReg(MO.getReg());
|
||||
|
||||
for (unsigned i = 0; i < 14; i++) {
|
||||
|
@ -1362,16 +1367,16 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
|
|||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg,
|
||||
uint64_t Imm) const {
|
||||
MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
|
||||
AMDGPU::ALU_LITERAL_X);
|
||||
setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
|
||||
MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg,
|
||||
R600::ALU_LITERAL_X);
|
||||
setImmOperand(*MovImm, R600::OpName::literal, Imm);
|
||||
return MovImm;
|
||||
}
|
||||
|
||||
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg, unsigned SrcReg) const {
|
||||
return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
|
||||
return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg);
|
||||
}
|
||||
|
||||
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
|
||||
|
@ -1379,7 +1384,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
|
|||
}
|
||||
|
||||
int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
|
||||
return AMDGPU::getNamedOperandIdx(Opcode, Op);
|
||||
return R600::getNamedOperandIdx(Opcode, Op);
|
||||
}
|
||||
|
||||
void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
|
||||
|
@ -1406,25 +1411,25 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
|
|||
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
|
||||
switch (Flag) {
|
||||
case MO_FLAG_CLAMP:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::clamp);
|
||||
break;
|
||||
case MO_FLAG_MASK:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::write);
|
||||
break;
|
||||
case MO_FLAG_NOT_LAST:
|
||||
case MO_FLAG_LAST:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::last);
|
||||
break;
|
||||
case MO_FLAG_NEG:
|
||||
switch (SrcIdx) {
|
||||
case 0:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg);
|
||||
break;
|
||||
case 1:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg);
|
||||
break;
|
||||
case 2:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1435,10 +1440,10 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
|
|||
(void)IsOP3;
|
||||
switch (SrcIdx) {
|
||||
case 0:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs);
|
||||
break;
|
||||
case 1:
|
||||
FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
|
||||
FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
@ -1499,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
|
|||
switch (Kind) {
|
||||
case PseudoSourceValue::Stack:
|
||||
case PseudoSourceValue::FixedStack:
|
||||
return AMDGPUASI.PRIVATE_ADDRESS;
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
case PseudoSourceValue::ConstantPool:
|
||||
case PseudoSourceValue::GOT:
|
||||
case PseudoSourceValue::JumpTable:
|
||||
case PseudoSourceValue::GlobalValueCallEntry:
|
||||
case PseudoSourceValue::ExternalSymbolCallEntry:
|
||||
case PseudoSourceValue::TargetCustom:
|
||||
return AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
|
||||
}
|
||||
llvm_unreachable("Invalid pseudo source kind");
|
||||
return AMDGPUASI.PRIVATE_ADDRESS;
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,11 @@
|
|||
#ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "R600GenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -34,7 +37,7 @@ class MachineInstr;
|
|||
class MachineInstrBuilder;
|
||||
class R600Subtarget;
|
||||
|
||||
class R600InstrInfo final : public AMDGPUInstrInfo {
|
||||
class R600InstrInfo final : public R600GenInstrInfo {
|
||||
private:
|
||||
const R600RegisterInfo RI;
|
||||
const R600Subtarget &ST;
|
||||
|
@ -324,7 +327,7 @@ public:
|
|||
PseudoSourceValue::PSVKind Kind) const override;
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
namespace R600 {
|
||||
|
||||
int getLDSNoRetOp(uint16_t Opcode);
|
||||
|
||||
|
|
|
@ -18,13 +18,13 @@ include "R600InstrFormats.td"
|
|||
class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :
|
||||
AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
|
||||
let SubtargetPredicate = isR600toCayman;
|
||||
let Namespace = "R600";
|
||||
}
|
||||
|
||||
|
||||
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :
|
||||
InstR600 <outs, ins, asm, pattern, NullALU> {
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
}
|
||||
|
||||
def MEMxi : Operand<iPTR> {
|
||||
|
@ -86,6 +86,12 @@ def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
|
|||
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
|
||||
(ops PRED_SEL_OFF)>;
|
||||
|
||||
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
|
||||
usesCustomInserter = 1, Namespace = "R600" in {
|
||||
def RETURN : ILFormat<(outs), (ins variable_ops),
|
||||
"RETURN", [(AMDGPUendpgm)]
|
||||
>;
|
||||
}
|
||||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
|
||||
|
||||
|
@ -219,34 +225,6 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
|
|||
|
||||
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
|
||||
|
||||
def TEX_SHADOW : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return (TType >= 6 && TType <= 8) || TType == 13;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_RECT : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 5;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_ARRAY : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 9 || TType == 10 || TType == 16;
|
||||
}]
|
||||
>;
|
||||
|
||||
def TEX_SHADOW_ARRAY : PatLeaf<
|
||||
(imm),
|
||||
[{uint32_t TType = (uint32_t)N->getZExtValue();
|
||||
return TType == 11 || TType == 12 || TType == 17;
|
||||
}]
|
||||
>;
|
||||
|
||||
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
|
||||
dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstR600ISA <outs, ins, asm, pattern>,
|
||||
|
@ -357,6 +335,8 @@ def vtx_id2_load : LoadVtxId2 <load>;
|
|||
// R600 SDNodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "R600" in {
|
||||
|
||||
def INTERP_PAIR_XY : AMDGPUShaderInst <
|
||||
(outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
|
||||
(ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
|
||||
|
@ -369,6 +349,8 @@ def INTERP_PAIR_ZW : AMDGPUShaderInst <
|
|||
"INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
|
||||
[]>;
|
||||
|
||||
}
|
||||
|
||||
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
|
||||
SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
|
||||
[SDNPVariadic]
|
||||
|
@ -416,11 +398,15 @@ def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
|
|||
// Interpolation Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "R600" in {
|
||||
|
||||
def INTERP_VEC_LOAD : AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins i32imm:$src0),
|
||||
"INTERP_LOAD $src0 : $dst">;
|
||||
|
||||
}
|
||||
|
||||
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
|
||||
let bank_swizzle = 5;
|
||||
}
|
||||
|
@ -660,7 +646,7 @@ def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > {
|
|||
|
||||
let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
let Namespace = "R600", usesCustomInserter = 1 in {
|
||||
|
||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
|
@ -792,7 +778,9 @@ class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
|
|||
(ins immType:$imm),
|
||||
"",
|
||||
[]
|
||||
>;
|
||||
> {
|
||||
let Namespace = "R600";
|
||||
}
|
||||
|
||||
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
|
||||
|
||||
|
@ -1007,7 +995,7 @@ class CNDGE_Common <bits<5> inst> : R600_3OP <
|
|||
}
|
||||
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
|
||||
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
|
||||
class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
|
||||
// Slot X
|
||||
UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
|
||||
|
@ -1326,7 +1314,9 @@ let Predicates = [isR600] in {
|
|||
// Regist loads and stores - for indirect addressing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "R600" in {
|
||||
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
|
||||
}
|
||||
|
||||
// Hardcode channel to 0
|
||||
// NOTE: LSHR is not available here. LSHR is per family instruction
|
||||
|
@ -1378,11 +1368,12 @@ let usesCustomInserter = 1 in {
|
|||
|
||||
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
|
||||
|
||||
def MASK_WRITE : AMDGPUShaderInst <
|
||||
def MASK_WRITE : InstR600 <
|
||||
(outs),
|
||||
(ins R600_Reg32:$src),
|
||||
"MASK_WRITE $src",
|
||||
[]
|
||||
[],
|
||||
NullALU
|
||||
>;
|
||||
|
||||
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
|
||||
|
@ -1413,7 +1404,7 @@ def TXD_SHADOW: InstR600 <
|
|||
// Constant Buffer Addressing Support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
|
||||
let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
|
||||
def CONST_COPY : Instruction {
|
||||
let OutOperandList = (outs R600_Reg32:$dst);
|
||||
let InOperandList = (ins i32imm:$src);
|
||||
|
@ -1536,23 +1527,6 @@ let Inst{63-32} = Word1;
|
|||
//===---------------------------------------------------------------------===//
|
||||
// Flow and Program control Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
let Pattern = pattern;
|
||||
let AsmString = !strconcat(asmstr, "\n");
|
||||
let isPseudo = 1;
|
||||
let Itinerary = NullALU;
|
||||
bit hasIEEEFlag = 0;
|
||||
bit hasZeroOpFlag = 0;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let isCodeGenOnly = 1;
|
||||
}
|
||||
|
||||
multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
|
||||
def _i32 : ILFormat<(outs),
|
||||
|
@ -1584,23 +1558,14 @@ multiclass BranchInstr2<string name> {
|
|||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// separate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1,
|
||||
Namespace = "R600" in {
|
||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Return instruction
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
|
||||
usesCustomInserter = 1 in {
|
||||
def RETURN : ILFormat<(outs), (ins variable_ops),
|
||||
"RETURN", [(AMDGPUendpgm)]
|
||||
>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Branch Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1731,7 +1696,7 @@ def : R600Pat <
|
|||
|
||||
// KIL Patterns
|
||||
def KIL : R600Pat <
|
||||
(int_AMDGPU_kill f32:$src0),
|
||||
(int_r600_kill f32:$src0),
|
||||
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
|
||||
>;
|
||||
|
||||
|
|
|
@ -162,7 +162,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
|
|||
for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
|
||||
E = SU->getInstr()->operands_end(); It != E; ++It) {
|
||||
MachineOperand &MO = *It;
|
||||
if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
|
||||
if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
|
||||
++CurEmitted;
|
||||
}
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
|
|||
|
||||
static bool
|
||||
isPhysicalRegCopy(MachineInstr *MI) {
|
||||
if (MI->getOpcode() != AMDGPU::COPY)
|
||||
if (MI->getOpcode() != R600::COPY)
|
||||
return false;
|
||||
|
||||
return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
|
||||
|
@ -224,14 +224,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|||
return AluTrans;
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::PRED_X:
|
||||
case R600::PRED_X:
|
||||
return AluPredX;
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT_4:
|
||||
case R600::INTERP_PAIR_XY:
|
||||
case R600::INTERP_PAIR_ZW:
|
||||
case R600::INTERP_VEC_LOAD:
|
||||
case R600::DOT_4:
|
||||
return AluT_XYZW;
|
||||
case AMDGPU::COPY:
|
||||
case R600::COPY:
|
||||
if (MI->getOperand(1).isUndef()) {
|
||||
// MI will become a KILL, don't considers it in scheduling
|
||||
return AluDiscarded;
|
||||
|
@ -246,7 +246,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|||
if(TII->isVector(*MI) ||
|
||||
TII->isCubeOp(MI->getOpcode()) ||
|
||||
TII->isReductionOp(MI->getOpcode()) ||
|
||||
MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
|
||||
MI->getOpcode() == R600::GROUP_BARRIER) {
|
||||
return AluT_XYZW;
|
||||
}
|
||||
|
||||
|
@ -257,13 +257,13 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|||
// Is the result already assigned to a channel ?
|
||||
unsigned DestSubReg = MI->getOperand(0).getSubReg();
|
||||
switch (DestSubReg) {
|
||||
case AMDGPU::sub0:
|
||||
case R600::sub0:
|
||||
return AluT_X;
|
||||
case AMDGPU::sub1:
|
||||
case R600::sub1:
|
||||
return AluT_Y;
|
||||
case AMDGPU::sub2:
|
||||
case R600::sub2:
|
||||
return AluT_Z;
|
||||
case AMDGPU::sub3:
|
||||
case R600::sub3:
|
||||
return AluT_W;
|
||||
default:
|
||||
break;
|
||||
|
@ -271,16 +271,16 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|||
|
||||
// Is the result already member of a X/Y/Z/W class ?
|
||||
unsigned DestReg = MI->getOperand(0).getReg();
|
||||
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
|
||||
regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
|
||||
if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
|
||||
regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
|
||||
return AluT_X;
|
||||
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
|
||||
if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
|
||||
return AluT_Y;
|
||||
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
|
||||
if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
|
||||
return AluT_Z;
|
||||
if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
|
||||
if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
|
||||
return AluT_W;
|
||||
if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
|
||||
if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
|
||||
return AluT_XYZW;
|
||||
|
||||
// LDS src registers cannot be used in the Trans slot.
|
||||
|
@ -301,13 +301,13 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
|
|||
}
|
||||
|
||||
switch (Opcode) {
|
||||
case AMDGPU::PRED_X:
|
||||
case AMDGPU::COPY:
|
||||
case AMDGPU::CONST_COPY:
|
||||
case AMDGPU::INTERP_PAIR_XY:
|
||||
case AMDGPU::INTERP_PAIR_ZW:
|
||||
case AMDGPU::INTERP_VEC_LOAD:
|
||||
case AMDGPU::DOT_4:
|
||||
case R600::PRED_X:
|
||||
case R600::COPY:
|
||||
case R600::CONST_COPY:
|
||||
case R600::INTERP_PAIR_XY:
|
||||
case R600::INTERP_PAIR_ZW:
|
||||
case R600::INTERP_VEC_LOAD:
|
||||
case R600::DOT_4:
|
||||
return IDAlu;
|
||||
default:
|
||||
return IDOther;
|
||||
|
@ -353,7 +353,7 @@ void R600SchedStrategy::PrepareNextSlot() {
|
|||
}
|
||||
|
||||
void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
|
||||
int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
|
||||
int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
|
||||
if (DstIndex == -1) {
|
||||
return;
|
||||
}
|
||||
|
@ -370,16 +370,16 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
|
|||
// Constrains the regclass of DestReg to assign it to Slot
|
||||
switch (Slot) {
|
||||
case 0:
|
||||
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
|
||||
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
|
||||
break;
|
||||
case 1:
|
||||
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
|
||||
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
|
||||
break;
|
||||
case 2:
|
||||
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
|
||||
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
|
||||
break;
|
||||
case 3:
|
||||
MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
|
||||
MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,7 +79,7 @@ public:
|
|||
std::vector<unsigned> UndefReg;
|
||||
|
||||
RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
|
||||
assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
|
||||
assert(MI->getOpcode() == R600::REG_SEQUENCE);
|
||||
for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
|
||||
MachineOperand &MO = Instr->getOperand(i);
|
||||
unsigned Chan = Instr->getOperand(i + 1).getImm();
|
||||
|
@ -159,8 +159,8 @@ bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
|
|||
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
|
||||
return true;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::R600_ExportSwz:
|
||||
case AMDGPU::EG_ExportSwz:
|
||||
case R600::R600_ExportSwz:
|
||||
case R600::EG_ExportSwz:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -213,12 +213,12 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
|
|||
std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
|
||||
for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
|
||||
E = RSI->RegToChan.end(); It != E; ++It) {
|
||||
unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
|
||||
unsigned SubReg = (*It).first;
|
||||
unsigned Swizzle = (*It).second;
|
||||
unsigned Chan = getReassignedChan(RemapChan, Swizzle);
|
||||
|
||||
MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
|
||||
MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
|
||||
DstReg)
|
||||
.addReg(SrcVec)
|
||||
.addReg(SubReg)
|
||||
|
@ -234,7 +234,7 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
|
|||
SrcVec = DstReg;
|
||||
}
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
|
||||
BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
|
||||
LLVM_DEBUG(dbgs() << " ->"; NewMI->dump(););
|
||||
|
||||
LLVM_DEBUG(dbgs() << " Updating Swizzle:\n");
|
||||
|
@ -354,7 +354,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
|
|||
for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
|
||||
MII != MIIE; ++MII) {
|
||||
MachineInstr &MI = *MII;
|
||||
if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
|
||||
if (MI.getOpcode() != R600::REG_SEQUENCE) {
|
||||
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
|
||||
unsigned Reg = MI.getOperand(1).getReg();
|
||||
for (MachineRegisterInfo::def_instr_iterator
|
||||
|
|
|
@ -84,39 +84,39 @@ private:
|
|||
LastDstChan = BISlot;
|
||||
if (TII->isPredicated(*BI))
|
||||
continue;
|
||||
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
|
||||
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
|
||||
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
|
||||
continue;
|
||||
int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
|
||||
int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
|
||||
if (DstIdx == -1) {
|
||||
continue;
|
||||
}
|
||||
unsigned Dst = BI->getOperand(DstIdx).getReg();
|
||||
if (isTrans || TII->isTransOnly(*BI)) {
|
||||
Result[Dst] = AMDGPU::PS;
|
||||
Result[Dst] = R600::PS;
|
||||
continue;
|
||||
}
|
||||
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
|
||||
BI->getOpcode() == AMDGPU::DOT4_eg) {
|
||||
Result[Dst] = AMDGPU::PV_X;
|
||||
if (BI->getOpcode() == R600::DOT4_r600 ||
|
||||
BI->getOpcode() == R600::DOT4_eg) {
|
||||
Result[Dst] = R600::PV_X;
|
||||
continue;
|
||||
}
|
||||
if (Dst == AMDGPU::OQAP) {
|
||||
if (Dst == R600::OQAP) {
|
||||
continue;
|
||||
}
|
||||
unsigned PVReg = 0;
|
||||
switch (TRI.getHWRegChan(Dst)) {
|
||||
case 0:
|
||||
PVReg = AMDGPU::PV_X;
|
||||
PVReg = R600::PV_X;
|
||||
break;
|
||||
case 1:
|
||||
PVReg = AMDGPU::PV_Y;
|
||||
PVReg = R600::PV_Y;
|
||||
break;
|
||||
case 2:
|
||||
PVReg = AMDGPU::PV_Z;
|
||||
PVReg = R600::PV_Z;
|
||||
break;
|
||||
case 3:
|
||||
PVReg = AMDGPU::PV_W;
|
||||
PVReg = R600::PV_W;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Invalid Chan");
|
||||
|
@ -129,9 +129,9 @@ private:
|
|||
void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
|
||||
const {
|
||||
unsigned Ops[] = {
|
||||
AMDGPU::OpName::src0,
|
||||
AMDGPU::OpName::src1,
|
||||
AMDGPU::OpName::src2
|
||||
R600::OpName::src0,
|
||||
R600::OpName::src1,
|
||||
R600::OpName::src2
|
||||
};
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
|
||||
|
@ -171,7 +171,7 @@ public:
|
|||
return true;
|
||||
if (!TII->isALUInstr(MI.getOpcode()))
|
||||
return true;
|
||||
if (MI.getOpcode() == AMDGPU::GROUP_BARRIER)
|
||||
if (MI.getOpcode() == R600::GROUP_BARRIER)
|
||||
return true;
|
||||
// XXX: This can be removed once the packetizer properly handles all the
|
||||
// LDS instruction group restrictions.
|
||||
|
@ -185,8 +185,8 @@ public:
|
|||
if (getSlot(*MII) == getSlot(*MIJ))
|
||||
ConsideredInstUsesAlreadyWrittenVectorElement = true;
|
||||
// Does MII and MIJ share the same pred_sel ?
|
||||
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
|
||||
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
|
||||
int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
|
||||
OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
|
||||
unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
|
||||
PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
|
||||
if (PredI != PredJ)
|
||||
|
@ -220,7 +220,7 @@ public:
|
|||
}
|
||||
|
||||
void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
|
||||
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
|
||||
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
|
||||
MI->getOperand(LastOp).setImm(Bit);
|
||||
}
|
||||
|
||||
|
@ -301,11 +301,11 @@ public:
|
|||
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
|
||||
MachineInstr *MI = CurrentPacketMIs[i];
|
||||
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
R600::OpName::bank_swizzle);
|
||||
MI->getOperand(Op).setImm(BS[i]);
|
||||
}
|
||||
unsigned Op =
|
||||
TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle);
|
||||
TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
|
||||
MI.getOperand(Op).setImm(BS.back());
|
||||
if (!CurrentPacketMIs.empty())
|
||||
setIsLastBit(CurrentPacketMIs.back(), 0);
|
||||
|
@ -334,6 +334,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
|
|||
|
||||
// DFA state table should not be empty.
|
||||
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
|
||||
assert(Packetizer.getResourceTracker()->getInstrItins());
|
||||
|
||||
if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
|
||||
return false;
|
||||
|
@ -353,8 +354,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
|
|||
MachineBasicBlock::iterator End = MBB->end();
|
||||
MachineBasicBlock::iterator MI = MBB->begin();
|
||||
while (MI != End) {
|
||||
if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
|
||||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
|
||||
if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
|
||||
(MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
|
||||
MachineBasicBlock::iterator DeleteMI = MI;
|
||||
++MI;
|
||||
MBB->erase(DeleteMI);
|
||||
|
|
|
@ -7,6 +7,62 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
Value,
|
||||
"Limit the maximum number of fetches in a clause to "#Value
|
||||
>;
|
||||
|
||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
||||
"R600ALUInst",
|
||||
"false",
|
||||
"Older version of ALU instructions encoding"
|
||||
>;
|
||||
|
||||
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
|
||||
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
|
||||
|
||||
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
|
||||
"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache"
|
||||
>;
|
||||
|
||||
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
|
||||
"CaymanISA",
|
||||
"true",
|
||||
"Use Cayman ISA"
|
||||
>;
|
||||
|
||||
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
|
||||
"CFALUBug",
|
||||
"true",
|
||||
"GPU has CF_ALU bug"
|
||||
>;
|
||||
|
||||
class R600SubtargetFeatureGeneration <string Value,
|
||||
list<SubtargetFeature> Implies> :
|
||||
SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
|
||||
|
||||
def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
|
||||
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize0]
|
||||
>;
|
||||
|
||||
def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
|
||||
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
|
||||
[FeatureFetchLimit16, FeatureWavefrontSize64,
|
||||
FeatureLocalMemorySize32768]
|
||||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Radeon HD 2000/3000 Series (R600).
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -21,34 +21,37 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
|
||||
R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
|
||||
RCW.RegWeight = 0;
|
||||
RCW.WeightLimit = 0;
|
||||
}
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "R600GenRegisterInfo.inc"
|
||||
|
||||
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
BitVector Reserved(getNumRegs());
|
||||
|
||||
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
|
||||
const R600InstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
reserveRegisterTuples(Reserved, AMDGPU::ZERO);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::HALF);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::ONE);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::ONE_INT);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::PV_X);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE);
|
||||
reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR);
|
||||
reserveRegisterTuples(Reserved, R600::ZERO);
|
||||
reserveRegisterTuples(Reserved, R600::HALF);
|
||||
reserveRegisterTuples(Reserved, R600::ONE);
|
||||
reserveRegisterTuples(Reserved, R600::ONE_INT);
|
||||
reserveRegisterTuples(Reserved, R600::NEG_HALF);
|
||||
reserveRegisterTuples(Reserved, R600::NEG_ONE);
|
||||
reserveRegisterTuples(Reserved, R600::PV_X);
|
||||
reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X);
|
||||
reserveRegisterTuples(Reserved, R600::ALU_CONST);
|
||||
reserveRegisterTuples(Reserved, R600::PREDICATE_BIT);
|
||||
reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF);
|
||||
reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO);
|
||||
reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE);
|
||||
reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR);
|
||||
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
|
||||
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
|
||||
for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(),
|
||||
E = R600::R600_AddrRegClass.end(); I != E; ++I) {
|
||||
reserveRegisterTuples(Reserved, *I);
|
||||
}
|
||||
|
||||
|
@ -58,7 +61,7 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
}
|
||||
|
||||
// Dummy to not crash RegisterClassInfo.
|
||||
static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
|
||||
static const MCPhysReg CalleeSavedReg = R600::NoRegister;
|
||||
|
||||
const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
|
||||
const MachineFunction *) const {
|
||||
|
@ -66,7 +69,7 @@ const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
|
|||
}
|
||||
|
||||
unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||
return AMDGPU::NoRegister;
|
||||
return R600::NoRegister;
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
|
||||
|
@ -81,7 +84,7 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
|||
MVT VT) const {
|
||||
switch(VT.SimpleTy) {
|
||||
default:
|
||||
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
|
||||
case MVT::i32: return &R600::R600_TReg32RegClass;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,9 +97,9 @@ bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
|
|||
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
|
||||
|
||||
switch (Reg) {
|
||||
case AMDGPU::OQAP:
|
||||
case AMDGPU::OQBP:
|
||||
case AMDGPU::AR_X:
|
||||
case R600::OQAP:
|
||||
case R600::OQBP:
|
||||
case R600::AR_X:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
@ -109,3 +112,10 @@ void R600RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
RegScavenger *RS) const {
|
||||
llvm_unreachable("Subroutines not supported yet");
|
||||
}
|
||||
|
||||
void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
|
||||
MCRegAliasIterator R(Reg, this, true);
|
||||
|
||||
for (; R.isValid(); ++R)
|
||||
Reserved.set(*R);
|
||||
}
|
||||
|
|
|
@ -15,13 +15,14 @@
|
|||
#ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "R600GenRegisterInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
|
||||
struct R600RegisterInfo final : public AMDGPURegisterInfo {
|
||||
struct R600RegisterInfo final : public R600GenRegisterInfo {
|
||||
RegClassWeight RCW;
|
||||
|
||||
R600RegisterInfo();
|
||||
|
@ -49,6 +50,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {
|
|||
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
|
||||
unsigned FIOperandNum,
|
||||
RegScavenger *RS = nullptr) const override;
|
||||
|
||||
void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
|
|
@ -245,7 +245,7 @@ def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
|
|||
(add V0123_W, V0123_Z, V0123_Y, V0123_X)
|
||||
>;
|
||||
|
||||
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
|
||||
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,
|
||||
(add (sequence "T%u_XY", 0, 63))>;
|
||||
|
||||
def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
|
||||
def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;
|
||||
|
||||
let Predicates = [isR700] in {
|
||||
def SIN_r700 : SIN_Common<0x6E>;
|
||||
|
|
|
@ -76,7 +76,7 @@ public:
|
|||
MachineRegisterInfo *MRI;
|
||||
const SIInstrInfo *TII;
|
||||
const SIRegisterInfo *TRI;
|
||||
const SISubtarget *ST;
|
||||
const AMDGPUSubtarget *ST;
|
||||
|
||||
void foldOperand(MachineOperand &OpToFold,
|
||||
MachineInstr *UseMI,
|
||||
|
@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
return false;
|
||||
|
||||
MRI = &MF.getRegInfo();
|
||||
ST = &MF.getSubtarget<SISubtarget>();
|
||||
ST = &MF.getSubtarget<AMDGPUSubtarget>();
|
||||
TII = ST->getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
|
||||
|
|
|
@ -113,7 +113,8 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) {
|
|||
|
||||
SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
const SISubtarget &STI)
|
||||
: AMDGPUTargetLowering(TM, STI) {
|
||||
: AMDGPUTargetLowering(TM, STI),
|
||||
Subtarget(&STI) {
|
||||
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
|
||||
|
||||
|
@ -147,7 +148,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
|
||||
}
|
||||
|
||||
computeRegisterProperties(STI.getRegisterInfo());
|
||||
computeRegisterProperties(Subtarget->getRegisterInfo());
|
||||
|
||||
// We need to custom lower vector stores from local memory
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
||||
|
@ -323,7 +324,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
|
||||
|
||||
if (getSubtarget()->hasFlatAddressSpace()) {
|
||||
if (Subtarget->hasFlatAddressSpace()) {
|
||||
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
|
||||
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
|
||||
}
|
||||
|
@ -336,6 +337,44 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::TRAP, MVT::Other, Custom);
|
||||
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
|
||||
|
||||
if (Subtarget->has16BitInsts()) {
|
||||
setOperationAction(ISD::FLOG, MVT::f16, Custom);
|
||||
setOperationAction(ISD::FLOG10, MVT::f16, Custom);
|
||||
}
|
||||
|
||||
// v_mad_f32 does not support denormals according to some sources.
|
||||
if (!Subtarget->hasFP32Denormals())
|
||||
setOperationAction(ISD::FMAD, MVT::f32, Legal);
|
||||
|
||||
if (!Subtarget->hasBFI()) {
|
||||
// fcopysign can be done in a single instruction with BFI.
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasBCNT(32))
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
|
||||
|
||||
if (!Subtarget->hasBCNT(64))
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
|
||||
|
||||
if (Subtarget->hasFFBH())
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
if (Subtarget->hasFFBL())
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
|
||||
// We only really have 32-bit BFE instructions (and 16-bit on VI).
|
||||
//
|
||||
// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
|
||||
// effort to match them now. We want this to be false for i64 cases when the
|
||||
// extraction isn't restricted to the upper or lower half. Ideally we would
|
||||
// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
|
||||
// span the midpoint are probably relatively rare, so don't worry about them
|
||||
// for now.
|
||||
if (Subtarget->hasBFE())
|
||||
setHasExtractBitsInsn(true);
|
||||
|
||||
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
|
||||
|
||||
|
@ -343,6 +382,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Legal);
|
||||
} else {
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
|
||||
|
@ -616,10 +660,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
|||
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
|
||||
|
||||
setSchedulingPreference(Sched::RegPressure);
|
||||
|
||||
// SI at least has hardware support for floating point exceptions, but no way
|
||||
// of using or handling them is implemented. They are also optional in OpenCL
|
||||
// (Section 7.3)
|
||||
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
|
||||
}
|
||||
|
||||
const SISubtarget *SITargetLowering::getSubtarget() const {
|
||||
return static_cast<const SISubtarget *>(Subtarget);
|
||||
return Subtarget;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2012,8 +2061,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|||
|
||||
// FIXME: Does sret work properly?
|
||||
if (!Info->isEntryFunction()) {
|
||||
const SIRegisterInfo *TRI
|
||||
= static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
|
||||
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
const MCPhysReg *I =
|
||||
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
|
||||
if (I) {
|
||||
|
@ -2115,8 +2163,7 @@ void SITargetLowering::passSpecialInputs(
|
|||
SelectionDAG &DAG = CLI.DAG;
|
||||
const SDLoc &DL = CLI.DL;
|
||||
|
||||
const SISubtarget *ST = getSubtarget();
|
||||
const SIRegisterInfo *TRI = ST->getRegisterInfo();
|
||||
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
|
||||
auto &ArgUsageInfo =
|
||||
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
|
||||
|
@ -2561,7 +2608,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
|||
|
||||
// Add a register mask operand representing the call-preserved registers.
|
||||
|
||||
const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
|
||||
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
|
||||
assert(Mask && "Missing call preserved mask for calling convention");
|
||||
Ops.push_back(DAG.getRegisterMask(Mask));
|
||||
|
@ -8179,8 +8226,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
|
|||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
|
||||
if (Info->isEntryFunction()) {
|
||||
// Callable functions have fixed registers used for stack access.
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
namespace llvm {
|
||||
|
||||
class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
private:
|
||||
const SISubtarget *Subtarget;
|
||||
|
||||
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
|
||||
SDValue Chain, uint64_t Offset) const;
|
||||
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
|
||||
|
|
|
@ -934,8 +934,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
|||
// All waits must be resolved at call return.
|
||||
// NOTE: this could be improved with knowledge of all call sites or
|
||||
// with knowledge of the called routines.
|
||||
if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
|
||||
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
|
||||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
|
||||
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
|
||||
T = (enum InstCounterType)(T + 1)) {
|
||||
|
@ -1131,7 +1130,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
|
|||
// TODO: Remove this work-around, enable the assert for Bug 457939
|
||||
// after fixing the scheduler. Also, the Shader Compiler code is
|
||||
// independent of target.
|
||||
if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
|
||||
if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
|
||||
ScoreBrackets->getScoreUB(LGKM_CNT) &&
|
||||
ScoreBrackets->hasPendingSMEM()) {
|
||||
|
@ -1716,7 +1715,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
|
|||
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
|
||||
ScoreBrackets->getScoreUB(LGKM_CNT) &&
|
||||
ScoreBrackets->hasPendingSMEM()) {
|
||||
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
|
||||
if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
|
||||
VCCZBugWorkAround = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ def isSI : Predicate<"Subtarget->getGeneration() "
|
|||
|
||||
class InstSI <dag outs, dag ins, string asm = "",
|
||||
list<dag> pattern = []> :
|
||||
AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
|
||||
AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
|
||||
let SubtargetPredicate = isGCN;
|
||||
|
||||
// Low bits - basic encoding information.
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "SIInstrInfo.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "GCNHazardRecognizer.h"
|
||||
#include "SIDefines.h"
|
||||
|
@ -63,6 +64,19 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
#define GET_D16ImageDimIntrinsics_IMPL
|
||||
#define GET_ImageDimIntrinsicTable_IMPL
|
||||
#define GET_RsrcIntrinsics_IMPL
|
||||
#include "AMDGPUGenSearchableTables.inc"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Must be at least 4 to be able to branch over minimum unconditional branch
|
||||
// code. This is only for making it possible to write reasonably small tests for
|
||||
// long branches.
|
||||
|
@ -71,7 +85,8 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
|
|||
cl::desc("Restrict range of branch instructions (DEBUG)"));
|
||||
|
||||
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
|
||||
: AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
|
||||
: AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
|
||||
RI(ST), ST(ST) {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TargetInstrInfo callbacks
|
||||
|
@ -438,6 +453,28 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
|
|||
return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
|
||||
}
|
||||
|
||||
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
|
||||
// the first 16 loads will be interleaved with the stores, and the next 16 will
|
||||
// be clustered as expected. It should really split into 2 16 store batches.
|
||||
//
|
||||
// Loads are clustered until this returns false, rather than trying to schedule
|
||||
// groups of stores. This also means we have to deal with saying different
|
||||
// address space loads should be clustered, and ones which might cause bank
|
||||
// conflicts.
|
||||
//
|
||||
// This might be deprecated so it might not be worth that much effort to fix.
|
||||
bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
|
||||
int64_t Offset0, int64_t Offset1,
|
||||
unsigned NumLoads) const {
|
||||
assert(Offset1 > Offset0 &&
|
||||
"Second offset should be larger than first offset!");
|
||||
// If we have less than 16 loads in a row, and the offsets are within 64
|
||||
// bytes, then schedule together.
|
||||
|
||||
// A cacheline is 64 bytes (for global memory).
|
||||
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
|
||||
}
|
||||
|
||||
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DestReg,
|
||||
|
@ -998,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
|
|||
unsigned FrameOffset, unsigned Size) const {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
|
||||
unsigned WavefrontSize = ST.getWavefrontSize();
|
||||
|
@ -1134,7 +1171,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
switch (MI.getOpcode()) {
|
||||
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
|
||||
default: return TargetInstrInfo::expandPostRAPseudo(MI);
|
||||
case AMDGPU::S_MOV_B64_term:
|
||||
// This is only a terminator to get the correct spill code placement during
|
||||
// register allocation.
|
||||
|
@ -1900,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(
|
|||
switch(Kind) {
|
||||
case PseudoSourceValue::Stack:
|
||||
case PseudoSourceValue::FixedStack:
|
||||
return AMDGPUASI.PRIVATE_ADDRESS;
|
||||
return ST.getAMDGPUAS().PRIVATE_ADDRESS;
|
||||
case PseudoSourceValue::ConstantPool:
|
||||
case PseudoSourceValue::GOT:
|
||||
case PseudoSourceValue::JumpTable:
|
||||
case PseudoSourceValue::GlobalValueCallEntry:
|
||||
case PseudoSourceValue::ExternalSymbolCallEntry:
|
||||
case PseudoSourceValue::TargetCustom:
|
||||
return AMDGPUASI.CONSTANT_ADDRESS;
|
||||
return ST.getAMDGPUAS().CONSTANT_ADDRESS;
|
||||
}
|
||||
return AMDGPUASI.FLAT_ADDRESS;
|
||||
return ST.getAMDGPUAS().FLAT_ADDRESS;
|
||||
}
|
||||
|
||||
static void removeModOperands(MachineInstr &MI) {
|
||||
|
@ -4649,7 +4686,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
|
|||
return AMDGPU::NoRegister;
|
||||
|
||||
assert(!MI.memoperands_empty() &&
|
||||
(*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
|
||||
(*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
|
||||
|
||||
FrameIndex = Addr->getIndex();
|
||||
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
|
||||
|
@ -4768,7 +4805,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
|
|||
return true;
|
||||
|
||||
for (const MachineMemOperand *MMO : MI.memoperands()) {
|
||||
if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
|
||||
if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -4948,3 +4985,55 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
|
|||
const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
|
||||
return RCID == AMDGPU::SReg_128RegClassID;
|
||||
}
|
||||
|
||||
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
|
||||
enum SIEncodingFamily {
|
||||
SI = 0,
|
||||
VI = 1,
|
||||
SDWA = 2,
|
||||
SDWA9 = 3,
|
||||
GFX80 = 4,
|
||||
GFX9 = 5
|
||||
};
|
||||
|
||||
static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) {
|
||||
switch (ST.getGeneration()) {
|
||||
case SISubtarget::SOUTHERN_ISLANDS:
|
||||
case SISubtarget::SEA_ISLANDS:
|
||||
return SIEncodingFamily::SI;
|
||||
case SISubtarget::VOLCANIC_ISLANDS:
|
||||
case SISubtarget::GFX9:
|
||||
return SIEncodingFamily::VI;
|
||||
}
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
}
|
||||
|
||||
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
|
||||
|
||||
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
|
||||
ST.getGeneration() >= SISubtarget::GFX9)
|
||||
Gen = SIEncodingFamily::GFX9;
|
||||
|
||||
if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
|
||||
Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9
|
||||
: SIEncodingFamily::SDWA;
|
||||
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
|
||||
// subtarget has UnpackedD16VMem feature.
|
||||
// TODO: remove this when we discard GFX80 encoding.
|
||||
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
|
||||
Gen = SIEncodingFamily::GFX80;
|
||||
|
||||
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
|
||||
|
||||
// -1 means that Opcode is already a native instruction.
|
||||
if (MCOp == -1)
|
||||
return Opcode;
|
||||
|
||||
// (uint16_t)-1 means that Opcode is a pseudo instruction that has
|
||||
// no encoding in the given subtarget generation.
|
||||
if (MCOp == (uint16_t)-1)
|
||||
return -1;
|
||||
|
||||
return MCOp;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,9 @@
|
|||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class APInt;
|
||||
|
@ -39,7 +42,7 @@ class RegScavenger;
|
|||
class SISubtarget;
|
||||
class TargetRegisterClass;
|
||||
|
||||
class SIInstrInfo final : public AMDGPUInstrInfo {
|
||||
class SIInstrInfo final : public AMDGPUGenInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
const SISubtarget &ST;
|
||||
|
@ -163,7 +166,10 @@ public:
|
|||
|
||||
bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
|
||||
MachineInstr &SecondLdSt, unsigned BaseReg2,
|
||||
unsigned NumLoads) const final;
|
||||
unsigned NumLoads) const override;
|
||||
|
||||
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
|
||||
int64_t Offset1, unsigned NumLoads) const override;
|
||||
|
||||
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
|
||||
|
@ -871,6 +877,12 @@ public:
|
|||
static bool isLegalMUBUFImmOffset(unsigned Imm) {
|
||||
return isUInt<12>(Imm);
|
||||
}
|
||||
|
||||
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
|
||||
/// Return -1 if the target-specific opcode for the pseudo instruction does
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
|
|
@ -17,6 +17,11 @@ def isVIOnly : Predicate<"Subtarget->getGeneration() =="
|
|||
|
||||
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
||||
|
||||
class GCNPredicateControl : PredicateControl {
|
||||
Predicate SIAssemblerPredicate = isSICI;
|
||||
Predicate VIAssemblerPredicate = isVI;
|
||||
}
|
||||
|
||||
// Execpt for the NONE field, this must be kept in sync with the
|
||||
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
|
||||
def SIEncodingFamily {
|
||||
|
|
|
@ -11,11 +11,10 @@
|
|||
// that are not yet supported remain commented out.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class GCNPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
|
||||
class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
|
||||
let SubtargetPredicate = isGCN;
|
||||
}
|
||||
|
||||
|
||||
include "VOPInstructions.td"
|
||||
include "SOPInstructions.td"
|
||||
include "SMInstructions.td"
|
||||
|
|
|
@ -1232,8 +1232,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
|
|||
&AMDGPU::VReg_512RegClass,
|
||||
&AMDGPU::SReg_512RegClass,
|
||||
&AMDGPU::SCC_CLASSRegClass,
|
||||
&AMDGPU::R600_Reg32RegClass,
|
||||
&AMDGPU::R600_PredicateRegClass,
|
||||
&AMDGPU::Pseudo_SReg_32RegClass,
|
||||
&AMDGPU::Pseudo_SReg_128RegClass,
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget;
|
||||
class LiveIntervals;
|
||||
class MachineRegisterInfo;
|
||||
class SISubtarget;
|
||||
|
|
|
@ -181,7 +181,7 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
|||
if (Features.test(FeatureGFX9))
|
||||
return {9, 0, 0};
|
||||
|
||||
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
|
||||
if (Features.test(FeatureSouthernIslands))
|
||||
return {0, 0, 0};
|
||||
return {7, 0, 0};
|
||||
}
|
||||
|
@ -243,7 +243,7 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
|
|||
}
|
||||
|
||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
|
||||
return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
|
||||
return getMaxWavesPerEU() * getEUsPerCU(Features);
|
||||
}
|
||||
|
||||
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
|
||||
|
@ -255,9 +255,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
|
||||
if (!Features.test(FeatureGCN))
|
||||
return 8;
|
||||
unsigned getMaxWavesPerEU() {
|
||||
// FIXME: Need to take scratch memory into account.
|
||||
return 10;
|
||||
}
|
||||
|
@ -313,7 +311,7 @@ unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
|
|||
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
||||
assert(WavesPerEU != 0);
|
||||
|
||||
if (WavesPerEU >= getMaxWavesPerEU(Features))
|
||||
if (WavesPerEU >= getMaxWavesPerEU())
|
||||
return 0;
|
||||
|
||||
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
|
||||
|
@ -390,7 +388,7 @@ unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
|
|||
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
|
||||
assert(WavesPerEU != 0);
|
||||
|
||||
if (WavesPerEU >= getMaxWavesPerEU(Features))
|
||||
if (WavesPerEU >= getMaxWavesPerEU())
|
||||
return 0;
|
||||
unsigned MinNumVGPRs =
|
||||
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
|
||||
|
@ -735,6 +733,8 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
|
|||
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
|
||||
|
||||
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
|
||||
if (STI.getTargetTriple().getArch() == Triple::r600)
|
||||
return Reg;
|
||||
MAP_REG2REG
|
||||
}
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features);
|
|||
|
||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||
/// Features without any kind of limitation.
|
||||
unsigned getMaxWavesPerEU(const FeatureBitset &Features);
|
||||
unsigned getMaxWavesPerEU();
|
||||
|
||||
/// \returns Maximum number of waves per execution unit for given subtarget \p
|
||||
/// Features and limited by given \p FlatWorkGroupSize.
|
||||
|
|
Loading…
Reference in New Issue