forked from OSchip/llvm-project
[Power9]Legalize and emit code for quad-precision fma instructions
Legalize and emit code for the following quad-precision fma: * xsmaddqp * xsnmaddqp * xsmsubqp * xsnmsubqp Differential Revision: https://reviews.llvm.org/D44843 llvm-svn: 329206
This commit is contained in:
parent
fb6a4a7907
commit
09fda63af0
|
@ -798,6 +798,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::FMUL, MVT::f128, Legal);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FMA, MVT::f128, Legal);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13752,6 +13753,8 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
|||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
return true;
|
||||
case MVT::f128:
|
||||
return (EnableQuadPrecision && Subtarget.hasP9Vector());
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -2382,6 +2382,18 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
list<dag> pattern>
|
||||
: X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
|
||||
|
||||
// [PO VRT VRA VRB XO /]
|
||||
class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
|
||||
list<dag> pattern>
|
||||
: XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
|
||||
!strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
|
||||
RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
|
||||
|
||||
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
|
||||
class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
|
||||
list<dag> pattern>
|
||||
: X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isDOT;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Quad-Precision Scalar Move Instructions:
|
||||
|
||||
|
@ -2424,14 +2436,30 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>;
|
||||
|
||||
// (Negative) Multiply-{Add/Subtract}
|
||||
def XSMADDQP : X_VT5_VA5_VB5 <63, 388, "xsmaddqp" , []>;
|
||||
def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>;
|
||||
def XSMSUBQP : X_VT5_VA5_VB5 <63, 420, "xsmsubqp" , []>;
|
||||
def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>;
|
||||
def XSNMADDQP : X_VT5_VA5_VB5 <63, 452, "xsnmaddqp" , []>;
|
||||
def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>;
|
||||
def XSNMSUBQP : X_VT5_VA5_VB5 <63, 484, "xsnmsubqp" , []>;
|
||||
def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>;
|
||||
def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
|
||||
[(set f128:$vT,
|
||||
(fma f128:$vA, f128:$vB,
|
||||
f128:$vTi))]>;
|
||||
def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo" , []>;
|
||||
def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
|
||||
[(set f128:$vT,
|
||||
(fma f128:$vA, f128:$vB,
|
||||
(fneg f128:$vTi)))]>;
|
||||
def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" , []>;
|
||||
def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
|
||||
[(set f128:$vT,
|
||||
(fneg (fma f128:$vA, f128:$vB,
|
||||
f128:$vTi)))]>;
|
||||
def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo", []>;
|
||||
def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
|
||||
[(set f128:$vT,
|
||||
(fneg (fma f128:$vA, f128:$vB,
|
||||
(fneg f128:$vTi))))]>;
|
||||
def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo", []>;
|
||||
|
||||
// Additional fnmsub patterns: -a*c + b == -(a*c - b)
|
||||
def : Pat<(fma (fneg f128:$A), f128:$C, f128:$B), (XSNMSUBQP $B, $C, $A)>;
|
||||
def : Pat<(fma f128:$A, (fneg f128:$C), f128:$B), (XSNMSUBQP $B, $C, $A)>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Quad/Double-Precision Compare Instructions:
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
|
||||
; RUN: -enable-ppc-quad-precision -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
||||
|
||||
define void @qpFmadd(fp128* nocapture readonly %a, fp128* nocapture %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2)
|
||||
store fp128 %madd, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmadd
|
||||
; CHECK-NOT: bl fmal
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsmaddqp [[REG5]], [[REG3]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG5]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFmadd_02(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%mul = fmul contract fp128 %1, %2
|
||||
%add = fadd contract fp128 %0, %mul
|
||||
store fp128 %add, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmadd_02
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsmaddqp [[REG3]], [[REG4]], [[REG5]]
|
||||
; CHECK-NEXT: stxv [[REG3]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFmadd_03(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%mul = fmul contract fp128 %0, %1
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%add = fadd contract fp128 %mul, %2
|
||||
store fp128 %add, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmadd_03
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsmaddqp [[REG5]], [[REG3]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG5]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFnmadd(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%mul = fmul contract fp128 %1, %2
|
||||
%add = fadd contract fp128 %0, %mul
|
||||
%sub = fsub fp128 0xL00000000000000008000000000000000, %add
|
||||
store fp128 %sub, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFnmadd
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsnmaddqp [[REG3]], [[REG4]], [[REG5]]
|
||||
; CHECK-NEXT: stxv [[REG3]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFnmadd_02(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%mul = fmul contract fp128 %0, %1
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%add = fadd contract fp128 %mul, %2
|
||||
%sub = fsub fp128 0xL00000000000000008000000000000000, %add
|
||||
store fp128 %sub, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFnmadd_02
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsnmaddqp [[REG5]], [[REG3]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG5]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFmsub(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%mul = fmul contract fp128 %1, %2
|
||||
%sub = fsub contract fp128 %0, %mul
|
||||
store fp128 %sub, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmsub
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsnmsubqp [[REG3]], [[REG5]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG3]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFmsub_02(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%mul = fmul contract fp128 %0, %1
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%sub = fsub contract fp128 %mul, %2
|
||||
store fp128 %sub, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFmsub_02
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsmsubqp [[REG5]], [[REG3]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG5]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFnmsub(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%mul = fmul contract fp128 %1, %2
|
||||
%sub = fsub contract fp128 %0, %mul
|
||||
%sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
|
||||
store fp128 %sub1, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFnmsub
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsnegqp [[REG4]], [[REG4]]
|
||||
; CHECK: xsnmaddqp [[REG3]], [[REG4]], [[REG5]]
|
||||
; CHECK-NEXT: stxv [[REG3]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @qpFnmsub_02(fp128* nocapture readonly %a,
|
||||
fp128* nocapture readonly %b,
|
||||
fp128* nocapture readonly %c, fp128* nocapture %res) {
|
||||
entry:
|
||||
%0 = load fp128, fp128* %a, align 16
|
||||
%1 = load fp128, fp128* %b, align 16
|
||||
%mul = fmul contract fp128 %0, %1
|
||||
%2 = load fp128, fp128* %c, align 16
|
||||
%sub = fsub contract fp128 %mul, %2
|
||||
%sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
|
||||
store fp128 %sub1, fp128* %res, align 16
|
||||
ret void
|
||||
; CHECK-LABEL: qpFnmsub_02
|
||||
; CHECK-NOT: bl __multf3
|
||||
; CHECK-DAG: lxv [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK-DAG: lxv [[REG4:[0-9]+]], 0(4)
|
||||
; CHECK-DAG: lxv [[REG5:[0-9]+]], 0(5)
|
||||
; CHECK: xsnmsubqp [[REG5]], [[REG3]], [[REG4]]
|
||||
; CHECK-NEXT: stxv [[REG5]], 0(6)
|
||||
; CHECK-NEXT: blr
|
||||
}
|
Loading…
Reference in New Issue