forked from OSchip/llvm-project
[AMDGPU] Add subtarget features for SDWA/DPP
Reviewers: vpykhtin, artem.tamazov, tstellarAMD Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D28900 llvm-svn: 292596
This commit is contained in:
parent
def8f90e67
commit
07dbde214b
|
@ -190,6 +190,18 @@ def FeatureScalarStores : SubtargetFeature<"scalar-stores",
|
||||||
"Has store scalar memory instructions"
|
"Has store scalar memory instructions"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def FeatureSDWA : SubtargetFeature<"sdwa",
|
||||||
|
"HasSDWA",
|
||||||
|
"true",
|
||||||
|
"Support SDWA (Sub-DWORD Addressing) extension"
|
||||||
|
>;
|
||||||
|
|
||||||
|
def FeatureDPP : SubtargetFeature<"dpp",
|
||||||
|
"HasDPP",
|
||||||
|
"true",
|
||||||
|
"Support DPP (Data Parallel Primitives) extension"
|
||||||
|
>;
|
||||||
|
|
||||||
//===------------------------------------------------------------===//
|
//===------------------------------------------------------------===//
|
||||||
// Subtarget Features (options and debugging)
|
// Subtarget Features (options and debugging)
|
||||||
//===------------------------------------------------------------===//
|
//===------------------------------------------------------------===//
|
||||||
|
@ -337,7 +349,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||||
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
|
||||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||||
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
|
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
|
||||||
FeatureScalarStores, FeatureInv2PiInlineImm
|
FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
|
||||||
|
FeatureDPP
|
||||||
]
|
]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
@ -507,6 +520,12 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
|
||||||
|
|
||||||
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
|
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
|
||||||
|
|
||||||
|
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
|
||||||
|
AssemblerPredicate<"FeatureSDWA">;
|
||||||
|
|
||||||
|
def HasDPP : Predicate<"Subtarget->hasDPP()">,
|
||||||
|
AssemblerPredicate<"FeatureDPP">;
|
||||||
|
|
||||||
class PredicateControl {
|
class PredicateControl {
|
||||||
Predicate SubtargetPredicate;
|
Predicate SubtargetPredicate;
|
||||||
Predicate SIAssemblerPredicate = isSICI;
|
Predicate SIAssemblerPredicate = isSICI;
|
||||||
|
|
|
@ -109,6 +109,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
HasMovrel(false),
|
HasMovrel(false),
|
||||||
HasVGPRIndexMode(false),
|
HasVGPRIndexMode(false),
|
||||||
HasScalarStores(false),
|
HasScalarStores(false),
|
||||||
|
HasSDWA(false),
|
||||||
|
HasDPP(false),
|
||||||
HasInv2PiInlineImm(false),
|
HasInv2PiInlineImm(false),
|
||||||
FlatAddressSpace(false),
|
FlatAddressSpace(false),
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,8 @@ protected:
|
||||||
bool HasVGPRIndexMode;
|
bool HasVGPRIndexMode;
|
||||||
bool HasScalarStores;
|
bool HasScalarStores;
|
||||||
bool HasInv2PiInlineImm;
|
bool HasInv2PiInlineImm;
|
||||||
|
bool HasSDWA;
|
||||||
|
bool HasDPP;
|
||||||
bool FlatAddressSpace;
|
bool FlatAddressSpace;
|
||||||
bool R600ALUInst;
|
bool R600ALUInst;
|
||||||
bool CaymanISA;
|
bool CaymanISA;
|
||||||
|
@ -552,6 +554,14 @@ public:
|
||||||
return HasInv2PiInlineImm;
|
return HasInv2PiInlineImm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasSDWA() const {
|
||||||
|
return HasSDWA;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasDPP() const {
|
||||||
|
return HasDPP;
|
||||||
|
}
|
||||||
|
|
||||||
bool enableSIScheduler() const {
|
bool enableSIScheduler() const {
|
||||||
return EnableSIScheduler;
|
return EnableSIScheduler;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3442,7 +3442,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
|
||||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||||
// Add the register arguments
|
// Add the register arguments
|
||||||
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
|
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
|
||||||
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
|
// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
|
||||||
// Skip it.
|
// Skip it.
|
||||||
continue;
|
continue;
|
||||||
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
|
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
|
||||||
|
|
|
@ -308,6 +308,14 @@ public:
|
||||||
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
|
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isSDWA(const MachineInstr &MI) {
|
||||||
|
return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSDWA(uint16_t Opcode) const {
|
||||||
|
return get(Opcode).TSFlags & SIInstrFlags::SDWA;
|
||||||
|
}
|
||||||
|
|
||||||
static bool isVOPC(const MachineInstr &MI) {
|
static bool isVOPC(const MachineInstr &MI) {
|
||||||
return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
|
return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,8 +119,7 @@ multiclass VOP2Inst <string opName,
|
||||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||||
|
|
||||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
|
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
|
||||||
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
|
// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
|
||||||
|
@ -135,9 +134,9 @@ multiclass VOP2bInst <string opName,
|
||||||
def _e32 : VOP2_Pseudo <opName, P>,
|
def _e32 : VOP2_Pseudo <opName, P>,
|
||||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||||
|
|
||||||
def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
|
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
|
||||||
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||||
}
|
}
|
||||||
|
@ -154,6 +153,7 @@ multiclass VOP2eInst <string opName,
|
||||||
def _e32 : VOP2_Pseudo <opName, P>,
|
def _e32 : VOP2_Pseudo <opName, P>,
|
||||||
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
|
||||||
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,13 +165,11 @@ multiclass VOPC_Pseudos <string opName,
|
||||||
let isCommutable = 1;
|
let isCommutable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
def _sdwa : VOPC_SDWA_Pseudo <opName, P>,
|
def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
|
||||||
Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
|
|
||||||
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
|
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
|
||||||
let SchedRW = P.Schedule;
|
let SchedRW = P.Schedule;
|
||||||
let isConvergent = DefExec;
|
let isConvergent = DefExec;
|
||||||
let isCompare = 1;
|
let isCompare = 1;
|
||||||
let isCommutable = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -267,8 +267,8 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
|
||||||
let SDWA = 1;
|
let SDWA = 1;
|
||||||
let Uses = [EXEC];
|
let Uses = [EXEC];
|
||||||
|
|
||||||
let SubtargetPredicate = isVI;
|
let SubtargetPredicate = HasSDWA;
|
||||||
let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
|
let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
|
||||||
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
|
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
|
||||||
AMDGPUAsmVariants.Disable);
|
AMDGPUAsmVariants.Disable);
|
||||||
let DecoderNamespace = "SDWA";
|
let DecoderNamespace = "SDWA";
|
||||||
|
@ -337,8 +337,8 @@ class VOP_DPP <string OpName, VOPProfile P> :
|
||||||
let Size = 8;
|
let Size = 8;
|
||||||
|
|
||||||
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
|
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
|
||||||
let SubtargetPredicate = isVI;
|
let SubtargetPredicate = HasDPP;
|
||||||
let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
|
let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
|
||||||
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
|
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
|
||||||
AMDGPUAsmVariants.Disable);
|
AMDGPUAsmVariants.Disable);
|
||||||
let DecoderNamespace = "DPP";
|
let DecoderNamespace = "DPP";
|
||||||
|
|
Loading…
Reference in New Issue