forked from OSchip/llvm-project
AMDGPU/GlobalISel: Handle VOP3NoMods
This commit is contained in:
parent
d309b4ebe4
commit
e60d658260
|
@ -31,6 +31,10 @@ def gi_vop3mods :
|
|||
GIComplexOperandMatcher<s32, "selectVOP3Mods">,
|
||||
GIComplexPatternEquiv<VOP3Mods>;
|
||||
|
||||
def gi_vop3_no_mods :
|
||||
GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
|
||||
GIComplexPatternEquiv<VOP3NoMods>;
|
||||
|
||||
def gi_vop3mods_nnan :
|
||||
GIComplexOperandMatcher<s32, "selectVOP3Mods_nnan">,
|
||||
GIComplexPatternEquiv<VOP3Mods_nnan>;
|
||||
|
|
|
@ -2218,6 +2218,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
|
|||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
|
||||
Register Reg = Root.getReg();
|
||||
const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
|
||||
if (Def && (Def->getOpcode() == AMDGPU::G_FNEG ||
|
||||
Def->getOpcode() == AMDGPU::G_FABS))
|
||||
return {};
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
|
||||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
|
|
|
@ -142,6 +142,9 @@ private:
|
|||
selectVOP3OMods(MachineOperand &Root) const;
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3Mods(MachineOperand &Root) const;
|
||||
|
||||
ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectVOP3Mods_nnan(MachineOperand &Root) const;
|
||||
|
||||
|
|
|
@ -23,15 +23,15 @@ body: |
|
|||
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
; GFX10-LABEL: name: fma_f32
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -60,15 +60,15 @@ body: |
|
|||
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
; GFX10-LABEL: name: fma_f32_fneg_src0
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -98,15 +98,15 @@ body: |
|
|||
; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
; GFX10-LABEL: name: fma_f32_fneg_src1
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]]
|
||||
; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
|
|
|
@ -16,15 +16,15 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]]
|
||||
; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX6: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
|
||||
; GFX10-LABEL: name: fmad_f32
|
||||
; GFX10: $vcc_hi = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]]
|
||||
; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
|
||||
; GFX10: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
|
|
Loading…
Reference in New Issue