forked from OSchip/llvm-project
[PowerPC] Exploit vnmsubfp instruction
On PowerPC, we have vnmsubfp Altivec instruction for fnmsub operation on v4f32 type. Default pattern for this instruction never works since we don't have legal fneg for v4f32 when VSX disabled. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D80617
This commit is contained in:
parent
f8ef7c99a0
commit
7315d221a2
|
@ -16287,8 +16287,7 @@ SDValue PPCTargetLowering::combineFMALike(SDNode *N,
|
|||
SDLoc Loc(N);
|
||||
|
||||
// TODO: QPX subtarget is deprecated. No transformation here.
|
||||
if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT) ||
|
||||
(VT.isVector() && !Subtarget.hasVSX()))
|
||||
if (Subtarget.hasQPX() || !isOperationLegal(ISD::FMA, VT))
|
||||
return SDValue();
|
||||
|
||||
// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
|
||||
|
|
|
@ -1024,6 +1024,9 @@ def : Pat<(fmul v4f32:$vA, v4f32:$vB),
|
|||
(VMADDFP $vA, $vB,
|
||||
(v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
|
||||
|
||||
def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
|
||||
(VNMSUBFP $A, $B, $C)>;
|
||||
|
||||
def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
|
||||
(VMADDFP $A, $B, $C)>;
|
||||
def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
|
||||
|
|
|
@ -304,10 +304,7 @@ define <4 x float> @test_fast_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
|
|||
;
|
||||
; NO-VSX-LABEL: test_fast_neg_fma_v4f32:
|
||||
; NO-VSX: # %bb.0: # %entry
|
||||
; NO-VSX-NEXT: vspltisb 5, -1
|
||||
; NO-VSX-NEXT: vslw 5, 5, 5
|
||||
; NO-VSX-NEXT: vsubfp 2, 5, 2
|
||||
; NO-VSX-NEXT: vmaddfp 2, 2, 3, 4
|
||||
; NO-VSX-NEXT: vnmsubfp 2, 2, 3, 4
|
||||
; NO-VSX-NEXT: blr
|
||||
<4 x float> %c) {
|
||||
entry:
|
||||
|
|
|
@ -679,12 +679,9 @@ define <4 x float> @hoo2_fmf(<4 x float> %a, <4 x float> %b) nounwind {
|
|||
; CHECK-P7: # %bb.0:
|
||||
; CHECK-P7-NEXT: vspltisw 4, -1
|
||||
; CHECK-P7-NEXT: vrefp 5, 3
|
||||
; CHECK-P7-NEXT: vspltisb 0, -1
|
||||
; CHECK-P7-NEXT: vslw 0, 0, 0
|
||||
; CHECK-P7-NEXT: vslw 4, 4, 4
|
||||
; CHECK-P7-NEXT: vsubfp 3, 0, 3
|
||||
; CHECK-P7-NEXT: vmaddfp 4, 2, 5, 4
|
||||
; CHECK-P7-NEXT: vmaddfp 2, 3, 4, 2
|
||||
; CHECK-P7-NEXT: vnmsubfp 2, 3, 4, 2
|
||||
; CHECK-P7-NEXT: vmaddfp 2, 5, 2, 4
|
||||
; CHECK-P7-NEXT: blr
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue