forked from OSchip/llvm-project
[Hexagon] Generate HVX/FP arithmetic instructions
Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com> Co-authored-by: Sumanth Gundapaneni <sgundapa@quicinc.com> Co-authored-by: Joshua Herrera <joshherr@quicinc.com>
This commit is contained in:
parent
ee8e81b40e
commit
db83e3e507
|
@ -93,6 +93,12 @@ HexagonTargetLowering::initializeHVXLowering() {
|
|||
Subtarget.useHVXFloatingPoint()) {
|
||||
setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
|
||||
setOperationAction(ISD::FADD, MVT::v64f16, Legal);
|
||||
setOperationAction(ISD::FSUB, MVT::v64f16, Legal);
|
||||
setOperationAction(ISD::FMUL, MVT::v64f16, Legal);
|
||||
setOperationAction(ISD::FADD, MVT::v32f32, Legal);
|
||||
setOperationAction(ISD::FSUB, MVT::v32f32, Legal);
|
||||
setOperationAction(ISD::FMUL, MVT::v32f32, Legal);
|
||||
setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
|
||||
|
@ -126,6 +132,9 @@ HexagonTargetLowering::initializeHVXLowering() {
|
|||
|
||||
setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::FADD, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::FSUB, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::FMUL, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
|
||||
setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
|
||||
|
@ -2291,6 +2300,9 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::CTLZ:
|
||||
case ISD::CTTZ:
|
||||
case ISD::MUL:
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FMINNUM:
|
||||
case ISD::FMAXNUM:
|
||||
case ISD::MULHS:
|
||||
|
|
|
@ -453,6 +453,52 @@ let Predicates = [UseHVX] in {
|
|||
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
|
||||
}
|
||||
|
||||
// For now, we always deal with vector floating point in SF mode.
|
||||
class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
|
||||
PatFrag RsPred, PatFrag RtPred = RsPred>
|
||||
: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
|
||||
(V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>;
|
||||
|
||||
class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType,
|
||||
PatFrag RsPred, PatFrag RtPred = RsPred>
|
||||
: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
|
||||
(V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>;
|
||||
|
||||
let Predicates = [UseHVXV68, UseHVXQFloat] in {
|
||||
def: OpR_RR_pat_conv_hf<V6_vsub_hf, pf2<fsub>, VecF16, HVF16>;
|
||||
def: OpR_RR_pat_conv_hf<V6_vadd_hf, pf2<fadd>, VecF16, HVF16>;
|
||||
def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf, pf2<fmul>, VecF16, HVF16>;
|
||||
def: OpR_RR_pat_conv<V6_vsub_sf, pf2<fsub>, VecF32, HVF32>;
|
||||
def: OpR_RR_pat_conv<V6_vadd_sf, pf2<fadd>, VecF32, HVF32>;
|
||||
def: OpR_RR_pat_conv<V6_vmpy_qf32_sf, pf2<fmul>, VecF32, HVF32>;
|
||||
|
||||
// For now we assume that the fp32 register is always coming in as IEEE float
|
||||
// since the qfloat arithmetic instructions above always generate the
|
||||
// accompanying conversions as part of their pattern
|
||||
def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
|
||||
(V6_vdealh (V6_vconv_hf_qf32
|
||||
(VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)),
|
||||
(V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0))
|
||||
))))>;
|
||||
|
||||
}
|
||||
|
||||
// HVX IEEE arithmetic Instructions
|
||||
let Predicates = [UseHVXV68, UseHVXIEEEFP] in {
|
||||
def: Pat<(fadd HVF16:$Rs, HVF16:$Rt),
|
||||
(V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>;
|
||||
def: Pat<(fadd HVF32:$Rs, HVF32:$Rt),
|
||||
(V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>;
|
||||
def: Pat<(fsub HVF16:$Rs, HVF16:$Rt),
|
||||
(V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>;
|
||||
def: Pat<(fsub HVF32:$Rs, HVF32:$Rt),
|
||||
(V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>;
|
||||
def: Pat<(fmul HVF16:$Rs, HVF16:$Rt),
|
||||
(V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>;
|
||||
def: Pat<(fmul HVF32:$Rs, HVF32:$Rt),
|
||||
(V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
|
||||
def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
|
||||
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
define <64 x half> @f0(<64 x half> %a0, <64 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf16 = vadd(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = v0.qf16
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fadd <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f1(<32 x float> %a0, <32 x float> %a1) #0 {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = v0.qf32
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fadd <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
define <64 x half> @f2(<64 x half> %a0, <64 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf16 = vsub(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = v0.qf16
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fsub <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f3(<32 x float> %a0, <32 x float> %a1) #0 {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf32 = vsub(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = v0.qf32
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fsub <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
define <64 x half> @f4(<64 x half> %a0, <64 x half> %a1) #0 {
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf16 = vmpy(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = v0.qf16
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fmul <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f5(<32 x float> %a0, <32 x float> %a1) #0 {
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.qf32 = vmpy(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = v0.qf32
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fmul <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
define <64 x half> @f6(<64 x half> %a0, <64 x half> %a1) #1 {
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = vadd(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fadd <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f7(<32 x float> %a0, <32 x float> %a1) #1 {
|
||||
; CHECK-LABEL: f7:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = vadd(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fadd <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
define <64 x half> @f8(<64 x half> %a0, <64 x half> %a1) #1 {
|
||||
; CHECK-LABEL: f8:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = vsub(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fsub <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f9(<32 x float> %a0, <32 x float> %a1) #1 {
|
||||
; CHECK-LABEL: f9:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = vsub(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fsub <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
define <64 x half> @f10(<64 x half> %a0, <64 x half> %a1) #1 {
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.hf = vmpy(v0.hf,v1.hf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fmul <64 x half> %a0, %a1
|
||||
ret <64 x half> %v0
|
||||
}
|
||||
|
||||
define <32 x float> @f11(<32 x float> %a0, <32 x float> %a1) #1 {
|
||||
; CHECK-LABEL: f11:
|
||||
; CHECK: // %bb.0: // %b0
|
||||
; CHECK-NEXT: {
|
||||
; CHECK-NEXT: v0.sf = vmpy(v0.sf,v1.sf)
|
||||
; CHECK-NEXT: jumpr r31
|
||||
; CHECK-NEXT: }
|
||||
b0:
|
||||
%v0 = fmul <32 x float> %a0, %a1
|
||||
ret <32 x float> %v0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
|
||||
attributes #1 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp" }
|
Loading…
Reference in New Issue