forked from OSchip/llvm-project
[AMDGPU] Take advantage of VOP3 literals in convertToThreeAddress
This improves a corner case where v_fmac can be converted to v_fma on GFX10+ even if it has a literal operand. Differential Revision: https://reviews.llvm.org/D130992
This commit is contained in:
parent
5ec6ea3dfd
commit
c24d68fff1
|
@ -3416,9 +3416,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
|
||||||
}
|
}
|
||||||
|
|
||||||
// VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
|
// VOP2 mac/fmac with a literal operand cannot be converted to VOP3 mad/fma
|
||||||
// because VOP3 does not allow a literal operand.
|
// if VOP3 does not allow a literal operand.
|
||||||
// TODO: Remove this restriction for GFX10.
|
if (Src0Literal && !ST.hasVOP3Literal())
|
||||||
if (Src0Literal)
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
|
unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
|
||||||
|
|
|
@ -34,17 +34,14 @@ define float @v_fma_imm(float %a, float %c) {
|
||||||
; GFX10: ; %bb.0:
|
; GFX10: ; %bb.0:
|
||||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX10-NEXT: v_fmac_legacy_f32_e32 v1, 0x41200000, v0
|
; GFX10-NEXT: v_fma_legacy_f32 v0, 0x41200000, v0, v1
|
||||||
; GFX10-NEXT: v_mov_b32_e32 v0, v1
|
|
||||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; GFX11-LABEL: v_fma_imm:
|
; GFX11-LABEL: v_fma_imm:
|
||||||
; GFX11: ; %bb.0:
|
; GFX11: ; %bb.0:
|
||||||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
|
||||||
; GFX11-NEXT: v_fmac_dx9_zero_f32_e32 v1, 0x41200000, v0
|
; GFX11-NEXT: v_fma_dx9_zero_f32 v0, 0x41200000, v0, v1
|
||||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
|
||||||
; GFX11-NEXT: v_mov_b32_e32 v0, v1
|
|
||||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||||
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
|
%fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c)
|
||||||
ret float %fma
|
ret float %fma
|
||||||
|
|
Loading…
Reference in New Issue