diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index a02555e6a7a5..6fa0ce1bcdee 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -31,6 +31,10 @@ def gi_vop3mods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3_no_mods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3mods_nnan : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 8ac944449d68..ae6d6489e92d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2218,6 +2218,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { }}; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const { + Register Reg = Root.getReg(); + const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI); + if (Def && (Def->getOpcode() == AMDGPU::G_FNEG || + Def->getOpcode() == AMDGPU::G_FABS)) + return {}; + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); }, + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const { Register Src; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index f946469a6985..2a0460c53374 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -142,6 +142,9 @@ private: selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; + + ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3Mods_nnan(MachineOperand &Root) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir index ec3f4449f60a..ffdef614b546 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -23,15 +23,15 @@ body: | ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -60,15 +60,15 @@ body: | ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -98,15 +98,15 @@ body: | ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir index fde4fba9c282..071e6072418c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir @@ -16,15 +16,15 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] + ; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] ; GFX10-LABEL: name: fmad_f32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] + ; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = COPY $vgpr2