forked from OSchip/llvm-project
R600: Add FetchInst bit to instruction defs to denote vertex/tex instructions
v2[Vincent Lejeune]: Split FetchInst into usesTextureCache/usesVertexCache llvm-svn: 180755
This commit is contained in:
parent
db6c6ea21c
commit
c299164284
|
@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
|||
DefaultSize[0] = 64;
|
||||
DefaultSize[1] = 1;
|
||||
DefaultSize[2] = 1;
|
||||
HasVertexCache = false;
|
||||
ParseSubtargetFeatures(GPU, FS);
|
||||
DevName = GPU;
|
||||
Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
|
||||
|
@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const {
|
|||
return Is64bit;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::hasVertexCache() const {
|
||||
return HasVertexCache;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::isTargetELF() const {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ private:
|
|||
bool Is32on64bit;
|
||||
bool DumpCode;
|
||||
bool R600ALUInst;
|
||||
bool HasVertexCache;
|
||||
|
||||
InstrItineraryData InstrItins;
|
||||
|
||||
|
@ -48,6 +49,7 @@ public:
|
|||
|
||||
bool isOverride(AMDGPUDeviceInfo::Caps) const;
|
||||
bool is64bit() const;
|
||||
bool hasVertexCache() const;
|
||||
|
||||
// Helper functions to simplify if statements
|
||||
bool isTargetELF() const;
|
||||
|
|
|
@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
|||
"false",
|
||||
"Older version of ALU instructions encoding.">;
|
||||
|
||||
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
|
||||
"HasVertexCache",
|
||||
"true",
|
||||
"Specify use of dedicated vertex cache.">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File, Calling Conv, Instruction Descriptions
|
||||
|
|
|
@ -13,23 +13,38 @@
|
|||
|
||||
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
||||
: Processor<Name, itin, Features>;
|
||||
def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
|
||||
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
|
||||
def : Proc<"rs880", R600_EG_Itin, [FeatureR600ALUInst]>;
|
||||
def : Proc<"rv670", R600_EG_Itin, [FeatureR600ALUInst, FeatureFP64]>;
|
||||
def : Proc<"rv710", R600_EG_Itin, []>;
|
||||
def : Proc<"rv730", R600_EG_Itin, []>;
|
||||
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
|
||||
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"sumo", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"", R600_EG_Itin,
|
||||
[FeatureR600ALUInst, FeatureVertexCache]>;
|
||||
def : Proc<"r600", R600_EG_Itin,
|
||||
[FeatureR600ALUInst , FeatureVertexCache]>;
|
||||
def : Proc<"rs880", R600_EG_Itin,
|
||||
[FeatureR600ALUInst]>;
|
||||
def : Proc<"rv670", R600_EG_Itin,
|
||||
[FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"rv710", R600_EG_Itin,
|
||||
[FeatureVertexCache]>;
|
||||
def : Proc<"rv730", R600_EG_Itin,
|
||||
[FeatureVertexCache]>;
|
||||
def : Proc<"rv770", R600_EG_Itin,
|
||||
[FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"cedar", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
|
||||
def : Proc<"redwood", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
|
||||
def : Proc<"sumo", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"juniper", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
|
||||
def : Proc<"cypress", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
|
||||
def : Proc<"barts", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
|
||||
def : Proc<"turks", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
|
||||
def : Proc<"caicos", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cayman", R600_EG_Itin,
|
||||
[FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
|
||||
|
|
|
@ -32,6 +32,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
|
|||
private:
|
||||
enum ControlFlowInstruction {
|
||||
CF_TC,
|
||||
CF_VC,
|
||||
CF_CALL_FS,
|
||||
CF_WHILE_LOOP,
|
||||
CF_END_LOOP,
|
||||
|
@ -48,39 +49,6 @@ private:
|
|||
unsigned MaxFetchInst;
|
||||
const AMDGPUSubtarget &ST;
|
||||
|
||||
bool isFetch(const MachineInstr *MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::TEX_VTX_CONSTBUF:
|
||||
case AMDGPU::TEX_VTX_TEXBUF:
|
||||
case AMDGPU::TEX_LD:
|
||||
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_V:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_V:
|
||||
case AMDGPU::TEX_SAMPLE:
|
||||
case AMDGPU::TEX_SAMPLE_C:
|
||||
case AMDGPU::TEX_SAMPLE_L:
|
||||
case AMDGPU::TEX_SAMPLE_C_L:
|
||||
case AMDGPU::TEX_SAMPLE_LB:
|
||||
case AMDGPU::TEX_SAMPLE_C_LB:
|
||||
case AMDGPU::TEX_SAMPLE_G:
|
||||
case AMDGPU::TEX_SAMPLE_C_G:
|
||||
case AMDGPU::TXD:
|
||||
case AMDGPU::TXD_SHADOW:
|
||||
case AMDGPU::VTX_READ_GLOBAL_8_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_128_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_8_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_16_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_32_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_128_eg:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsTrivialInst(MachineInstr *MI) const {
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::KILL:
|
||||
|
@ -98,6 +66,9 @@ private:
|
|||
case CF_TC:
|
||||
Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
|
||||
break;
|
||||
case CF_VC:
|
||||
Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
|
||||
break;
|
||||
case CF_CALL_FS:
|
||||
Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
|
||||
break;
|
||||
|
@ -139,17 +110,19 @@ private:
|
|||
unsigned CfAddress) const {
|
||||
MachineBasicBlock::iterator ClauseHead = I;
|
||||
unsigned AluInstCount = 0;
|
||||
bool IsTex = TII->usesTextureCache(ClauseHead);
|
||||
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
|
||||
if (IsTrivialInst(I))
|
||||
continue;
|
||||
if (!isFetch(I))
|
||||
if ((IsTex && !TII->usesTextureCache(I)) ||
|
||||
(!IsTex && !TII->usesVertexCache(I)))
|
||||
break;
|
||||
AluInstCount ++;
|
||||
if (AluInstCount > MaxFetchInst)
|
||||
break;
|
||||
}
|
||||
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
|
||||
getHWInstrDesc(CF_TC))
|
||||
getHWInstrDesc(IsTex?CF_TC:CF_VC))
|
||||
.addImm(CfAddress) // ADDR
|
||||
.addImm(AluInstCount); // COUNT
|
||||
return I;
|
||||
|
@ -211,7 +184,7 @@ public:
|
|||
}
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E;) {
|
||||
if (isFetch(I)) {
|
||||
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
|
||||
DEBUG(dbgs() << CfCount << ":"; I->dump(););
|
||||
I = MakeFetchClause(MBB, I, 0);
|
||||
CfCount++;
|
||||
|
|
|
@ -39,7 +39,9 @@ namespace R600_InstFlag {
|
|||
//FlagOperand bits 7, 8
|
||||
NATIVE_OPERANDS = (1 << 9),
|
||||
OP1 = (1 << 10),
|
||||
OP2 = (1 << 11)
|
||||
OP2 = (1 << 11),
|
||||
VTX_INST = (1 << 12),
|
||||
TEX_INST = (1 << 13)
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,8 @@ using namespace llvm;
|
|||
|
||||
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDGPUInstrInfo(tm),
|
||||
RI(tm, *this)
|
||||
RI(tm, *this),
|
||||
ST(tm.getSubtarget<AMDGPUSubtarget>())
|
||||
{ }
|
||||
|
||||
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
|
||||
|
@ -139,6 +140,23 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
|
|||
(TargetFlags & R600_InstFlag::OP3));
|
||||
}
|
||||
|
||||
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
|
||||
return ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
|
||||
return usesVertexCache(MI->getOpcode());
|
||||
}
|
||||
|
||||
bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
|
||||
return (!ST.hasVertexCache() && get(Opcode).TSFlags & R600_InstFlag::VTX_INST) ||
|
||||
(get(Opcode).TSFlags & R600_InstFlag::TEX_INST);
|
||||
}
|
||||
|
||||
bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
|
||||
return usesTextureCache(MI->getOpcode());
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
|
||||
const {
|
||||
|
|
|
@ -33,6 +33,7 @@ namespace llvm {
|
|||
class R600InstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const R600RegisterInfo RI;
|
||||
const AMDGPUSubtarget &ST;
|
||||
|
||||
int getBranchInstr(const MachineOperand &op) const;
|
||||
|
||||
|
@ -53,6 +54,11 @@ namespace llvm {
|
|||
/// \returns true if this \p Opcode represents an ALU instruction.
|
||||
bool isALUInstr(unsigned Opcode) const;
|
||||
|
||||
bool usesVertexCache(unsigned Opcode) const;
|
||||
bool usesVertexCache(const MachineInstr *MI) const;
|
||||
bool usesTextureCache(unsigned Opcode) const;
|
||||
bool usesTextureCache(const MachineInstr *MI) const;
|
||||
|
||||
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
|
||||
bool canBundle(const std::vector<MachineInstr *> &) const;
|
||||
|
||||
|
|
|
@ -25,6 +25,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
|
|||
bit Op1 = 0;
|
||||
bit Op2 = 0;
|
||||
bit HasNativeOperands = 0;
|
||||
bit VTXInst = 0;
|
||||
bit TEXInst = 0;
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
let OutOperandList = outs;
|
||||
|
@ -43,6 +45,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
|
|||
let TSFlags{9} = HasNativeOperands;
|
||||
let TSFlags{10} = Op1;
|
||||
let TSFlags{11} = Op2;
|
||||
let TSFlags{12} = VTXInst;
|
||||
let TSFlags{13} = TEXInst;
|
||||
}
|
||||
|
||||
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
|
@ -478,6 +482,8 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
|
|||
let COORD_TYPE_Y = 0;
|
||||
let COORD_TYPE_Z = 0;
|
||||
let COORD_TYPE_W = 0;
|
||||
|
||||
let TEXInst = 1;
|
||||
}
|
||||
|
||||
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
|
||||
|
@ -1784,6 +1790,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
|||
// VTX_WORD3 (Padding)
|
||||
//
|
||||
// Inst{127-96} = 0;
|
||||
|
||||
let VTXInst = 1;
|
||||
}
|
||||
|
||||
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
|
||||
|
@ -2012,15 +2020,17 @@ def TXD: InstR600 <
|
|||
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
||||
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
||||
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))], NullALU> {
|
||||
>;
|
||||
let TEXInst = 1;
|
||||
}
|
||||
|
||||
def TXD_SHADOW: InstR600 <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
|
||||
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
|
||||
[(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], NullALU
|
||||
>;
|
||||
|
||||
> {
|
||||
let TEXInst = 1;
|
||||
}
|
||||
} // End isPseudo = 1
|
||||
} // End usesCustomInserter = 1
|
||||
|
||||
|
@ -2106,6 +2116,7 @@ def TEX_VTX_CONSTBUF :
|
|||
// VTX_WORD3 (Padding)
|
||||
//
|
||||
// Inst{127-96} = 0;
|
||||
let VTXInst = 1;
|
||||
}
|
||||
|
||||
def TEX_VTX_TEXBUF:
|
||||
|
@ -2159,6 +2170,7 @@ let Inst{63-32} = Word1;
|
|||
// VTX_WORD3 (Padding)
|
||||
//
|
||||
// Inst{127-96} = 0;
|
||||
let VTXInst = 1;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: TEX
|
||||
;CHECK: VTX
|
||||
;CHECK: ALU_PUSH
|
||||
;CHECK: JUMP @4
|
||||
;CHECK: ELSE @16
|
||||
;CHECK: TEX
|
||||
;CHECK: VTX
|
||||
;CHECK: LOOP_START_DX10 @15
|
||||
;CHECK: LOOP_BREAK @14
|
||||
;CHECK: POP @16
|
||||
|
|
Loading…
Reference in New Issue