forked from OSchip/llvm-project
[AMDGPU] Simplify mad/mac patterns. NFC.
Simplify instruction selection patterns for mad/mac: - Use any_fmad consistently to make it clear that all patterns treat fmad and AMDGPUfmad_ftz identically. - For mad, put the patterns on the instruction definitions. For mac the patterns are still out-of-line because we want to set AddedComplexity and to have special handling of the source modifiers. Differential Revision: https://reviews.llvm.org/D133305
This commit is contained in:
parent
1427d55d70
commit
5291c3dd36
|
@ -984,20 +984,18 @@ def : GCNPat <
|
|||
|
||||
// NoMods pattern used for mac. If there are any source modifiers then it's
|
||||
// better to select mad instead of mac.
|
||||
class FMADPat <ValueType vt, Instruction inst, SDPatternOperator node>
|
||||
: GCNPat <(vt (node (vt (VOP3NoMods vt:$src0)),
|
||||
(vt (VOP3NoMods vt:$src1)),
|
||||
(vt (VOP3NoMods vt:$src2)))),
|
||||
class FMADPat <ValueType vt, Instruction inst>
|
||||
: GCNPat <(vt (any_fmad (vt (VOP3NoMods vt:$src0)),
|
||||
(vt (VOP3NoMods vt:$src1)),
|
||||
(vt (VOP3NoMods vt:$src2)))),
|
||||
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
|
||||
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
// Prefer mac form when there are no modifiers.
|
||||
let AddedComplexity = 9 in {
|
||||
let OtherPredicates = [HasMadMacF32Insts] in {
|
||||
def : FMADPat <f32, V_MAC_F32_e64, fmad>;
|
||||
def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
|
||||
} // OtherPredicates = [HasMadMacF32Insts]
|
||||
let OtherPredicates = [HasMadMacF32Insts] in
|
||||
def : FMADPat <f32, V_MAC_F32_e64>;
|
||||
|
||||
// Don't allow source modifiers. If there are any source modifiers then it's
|
||||
// better to select mad instead of mac.
|
||||
|
@ -1022,24 +1020,10 @@ def : GCNPat <
|
|||
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts in {
|
||||
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
|
||||
def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
|
||||
} // SubtargetPredicate = Has16BitInsts
|
||||
let SubtargetPredicate = Has16BitInsts in
|
||||
def : FMADPat <f16, V_MAC_F16_e64>;
|
||||
} // AddedComplexity = 9
|
||||
|
||||
class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
|
||||
: GCNPat<
|
||||
(Ty (mad_opr (Ty (VOP3Mods Ty:$src0, i32:$src0_mod)),
|
||||
(Ty (VOP3Mods Ty:$src1, i32:$src1_mod)),
|
||||
(Ty (VOP3Mods Ty:$src2, i32:$src2_mod)))),
|
||||
(inst $src0_mod, $src0, $src1_mod, $src1,
|
||||
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
let OtherPredicates = [HasMadMacF32Insts] in
|
||||
def : FMADModsPat<f32, V_MAD_F32_e64, AMDGPUfmad_ftz>;
|
||||
|
||||
let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
|
||||
def : GCNPat <
|
||||
(f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
|
||||
|
@ -1049,9 +1033,6 @@ def : GCNPat <
|
|||
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = Has16BitInsts in
|
||||
def : FMADModsPat<f16, V_MAD_F16_e64, AMDGPUfmad_ftz>;
|
||||
|
||||
class VOPSelectModsPat <ValueType vt> : GCNPat <
|
||||
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
|
||||
(VOP3Mods vt:$src2, i32:$src2_mods))),
|
||||
|
|
|
@ -123,7 +123,7 @@ let isReMaterializable = 1 in {
|
|||
let mayRaiseFPException = 0 in {
|
||||
let SubtargetPredicate = HasMadMacF32Insts in {
|
||||
defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
|
||||
defm V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
|
||||
defm V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fmad>;
|
||||
} // End SubtargetPredicate = HasMadMacInsts
|
||||
|
||||
let SubtargetPredicate = HasFmaLegacy32 in
|
||||
|
@ -318,7 +318,7 @@ let renamedInGFX9 = 1 in {
|
|||
defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
|
||||
let FPDPRounding = 1 in {
|
||||
defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
|
||||
defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fmad>;
|
||||
let Uses = [MODE, M0, EXEC] in {
|
||||
let OtherPredicates = [isNotGFX90APlus] in
|
||||
// For some reason the intrinsic operands are in a different order
|
||||
|
|
Loading…
Reference in New Issue