forked from OSchip/llvm-project
[AMDGPU] Separate feature dot-insts
Differential Revision: https://reviews.llvm.org/D56524 llvm-svn: 350793
This commit is contained in:
parent
afd6711287
commit
d3757d3f3a
|
@ -267,7 +267,13 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
|
|||
def FeatureDLInsts : SubtargetFeature<"dl-insts",
|
||||
"HasDLInsts",
|
||||
"true",
|
||||
"Has deep learning instructions"
|
||||
"Has v_fmac_f32 and v_xnor_b32 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDotInsts : SubtargetFeature<"dot-insts",
|
||||
"HasDotInsts",
|
||||
"true",
|
||||
"Has v_dot* instructions"
|
||||
>;
|
||||
|
||||
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
|
||||
|
@ -558,6 +564,7 @@ def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
|
|||
FeatureFmaMixInsts,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureDotInsts,
|
||||
FeatureSRAMECC,
|
||||
FeatureCodeObjectV3]>;
|
||||
|
||||
|
@ -756,6 +763,9 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
|
|||
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
|
||||
AssemblerPredicate<"FeatureDLInsts">;
|
||||
|
||||
def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
|
||||
AssemblerPredicate<"FeatureDotInsts">;
|
||||
|
||||
|
||||
def EnableLateCFGStructurize : Predicate<
|
||||
"EnableLateStructurizeCFG">;
|
||||
|
|
|
@ -198,6 +198,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
|||
HasDPP(false),
|
||||
HasR128A16(false),
|
||||
HasDLInsts(false),
|
||||
HasDotInsts(false),
|
||||
EnableSRAMECC(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
|
|
|
@ -353,6 +353,7 @@ protected:
|
|||
bool HasDPP;
|
||||
bool HasR128A16;
|
||||
bool HasDLInsts;
|
||||
bool HasDotInsts;
|
||||
bool EnableSRAMECC;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
|
@ -680,6 +681,10 @@ public:
|
|||
return HasDLInsts;
|
||||
}
|
||||
|
||||
bool hasDotInsts() const {
|
||||
return HasDotInsts;
|
||||
}
|
||||
|
||||
bool isSRAMECCEnabled() const {
|
||||
return EnableSRAMECC;
|
||||
}
|
||||
|
|
|
@ -8385,7 +8385,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
|
|||
EVT VT = N->getValueType(0);
|
||||
SDLoc SL(N);
|
||||
|
||||
if (!Subtarget->hasDLInsts() || VT != MVT::f32)
|
||||
if (!Subtarget->hasDotInsts() || VT != MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
|
||||
|
|
|
@ -250,7 +250,7 @@ class SDot2Pat<Instruction Inst> : GCNPat <
|
|||
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
let SubtargetPredicate = HasDotInsts in {
|
||||
|
||||
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
|
||||
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
|
||||
|
@ -302,7 +302,7 @@ foreach Type = ["U", "I"] in
|
|||
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
|
||||
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
} // End SubtargetPredicate = HasDotInsts
|
||||
|
||||
multiclass VOP3P_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
|
@ -352,7 +352,7 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
|
|||
}
|
||||
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
let SubtargetPredicate = HasDotInsts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
|
||||
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
|
||||
|
@ -362,4 +362,4 @@ defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
|
|||
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
|
||||
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
} // End SubtargetPredicate = HasDotInsts
|
||||
|
|
Loading…
Reference in New Issue