[Hexagon] Generate HVX/FP arithmetic instructions

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com>
Co-authored-by: Sumanth Gundapaneni <sgundapa@quicinc.com>
Co-authored-by: Joshua Herrera <joshherr@quicinc.com>
This commit is contained in:
Krzysztof Parzyszek 2021-12-30 12:38:36 -08:00
parent ee8e81b40e
commit db83e3e507
3 changed files with 225 additions and 0 deletions

View File

@ -93,6 +93,12 @@ HexagonTargetLowering::initializeHVXLowering() {
Subtarget.useHVXFloatingPoint()) {
setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
setOperationAction(ISD::FADD, MVT::v64f16, Legal);
setOperationAction(ISD::FSUB, MVT::v64f16, Legal);
setOperationAction(ISD::FMUL, MVT::v64f16, Legal);
setOperationAction(ISD::FADD, MVT::v32f32, Legal);
setOperationAction(ISD::FSUB, MVT::v32f32, Legal);
setOperationAction(ISD::FMUL, MVT::v32f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
@ -126,6 +132,9 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
setOperationAction(ISD::STORE, MVT::v64f32, Custom);
setOperationAction(ISD::FADD, MVT::v64f32, Custom);
setOperationAction(ISD::FSUB, MVT::v64f32, Custom);
setOperationAction(ISD::FMUL, MVT::v64f32, Custom);
setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
@ -2291,6 +2300,9 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::MUL:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::MULHS:

View File

@ -453,6 +453,52 @@ let Predicates = [UseHVX] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
// For now, we always deal with vector floating point in SF mode.
class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
PatFrag RsPred, PatFrag RtPred = RsPred>
: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
(V6_vconv_sf_qf32 (VecF32 (MI RsPred:$Rs, RtPred:$Rt)))>;
class OpR_RR_pat_conv_hf<InstHexagon MI, PatFrag Op, ValueType ResType,
PatFrag RsPred, PatFrag RtPred = RsPred>
: Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
(V6_vconv_hf_qf16 (VecF16 (MI RsPred:$Rs, RtPred:$Rt)))>;
let Predicates = [UseHVXV68, UseHVXQFloat] in {
def: OpR_RR_pat_conv_hf<V6_vsub_hf, pf2<fsub>, VecF16, HVF16>;
def: OpR_RR_pat_conv_hf<V6_vadd_hf, pf2<fadd>, VecF16, HVF16>;
def: OpR_RR_pat_conv_hf<V6_vmpy_qf16_hf, pf2<fmul>, VecF16, HVF16>;
def: OpR_RR_pat_conv<V6_vsub_sf, pf2<fsub>, VecF32, HVF32>;
def: OpR_RR_pat_conv<V6_vadd_sf, pf2<fadd>, VecF32, HVF32>;
def: OpR_RR_pat_conv<V6_vmpy_qf32_sf, pf2<fmul>, VecF32, HVF32>;
// For now we assume that the fp32 register is always coming in as IEEE float
// since the qfloat arithmetic instructions above always generate the
// accompanying conversions as part of their pattern
def: Pat<(VecF16 (pf1<fpround> HWF32:$Vuu)),
(V6_vdealh (V6_vconv_hf_qf32
(VecPF32 (Combinev (V6_vadd_sf (HiVec HvxWR:$Vuu), (V6_vd0)),
(V6_vadd_sf (LoVec HvxWR:$Vuu), (V6_vd0))
))))>;
}
// HVX IEEE arithmetic Instructions
let Predicates = [UseHVXV68, UseHVXIEEEFP] in {
def: Pat<(fadd HVF16:$Rs, HVF16:$Rt),
(V6_vadd_hf_hf HVF16:$Rs, HVF16:$Rt)>;
def: Pat<(fadd HVF32:$Rs, HVF32:$Rt),
(V6_vadd_sf_sf HVF32:$Rs, HVF32:$Rt)>;
def: Pat<(fsub HVF16:$Rs, HVF16:$Rt),
(V6_vsub_hf_hf HVF16:$Rs, HVF16:$Rt)>;
def: Pat<(fsub HVF32:$Rs, HVF32:$Rt),
(V6_vsub_sf_sf HVF32:$Rs, HVF32:$Rt)>;
def: Pat<(fmul HVF16:$Rs, HVF16:$Rt),
(V6_vmpy_hf_hf HVF16:$Rs, HVF16:$Rt)>;
def: Pat<(fmul HVF32:$Rs, HVF32:$Rt),
(V6_vmpy_sf_sf HVF32:$Rs, HVF32:$Rt)>;
}
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;

View File

@ -0,0 +1,167 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s
define <64 x half> @f0(<64 x half> %a0, <64 x half> %a1) #0 {
; CHECK-LABEL: f0:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf16 = vadd(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = v0.qf16
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fadd <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f1(<32 x float> %a0, <32 x float> %a1) #0 {
; CHECK-LABEL: f1:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf32 = vadd(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = v0.qf32
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fadd <32 x float> %a0, %a1
ret <32 x float> %v0
}
define <64 x half> @f2(<64 x half> %a0, <64 x half> %a1) #0 {
; CHECK-LABEL: f2:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf16 = vsub(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = v0.qf16
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fsub <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f3(<32 x float> %a0, <32 x float> %a1) #0 {
; CHECK-LABEL: f3:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf32 = vsub(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = v0.qf32
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fsub <32 x float> %a0, %a1
ret <32 x float> %v0
}
define <64 x half> @f4(<64 x half> %a0, <64 x half> %a1) #0 {
; CHECK-LABEL: f4:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf16 = vmpy(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = v0.qf16
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fmul <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f5(<32 x float> %a0, <32 x float> %a1) #0 {
; CHECK-LABEL: f5:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.qf32 = vmpy(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = v0.qf32
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fmul <32 x float> %a0, %a1
ret <32 x float> %v0
}
define <64 x half> @f6(<64 x half> %a0, <64 x half> %a1) #1 {
; CHECK-LABEL: f6:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vadd(v0.hf,v1.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fadd <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f7(<32 x float> %a0, <32 x float> %a1) #1 {
; CHECK-LABEL: f7:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vadd(v0.sf,v1.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fadd <32 x float> %a0, %a1
ret <32 x float> %v0
}
define <64 x half> @f8(<64 x half> %a0, <64 x half> %a1) #1 {
; CHECK-LABEL: f8:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vsub(v0.hf,v1.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fsub <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f9(<32 x float> %a0, <32 x float> %a1) #1 {
; CHECK-LABEL: f9:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vsub(v0.sf,v1.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fsub <32 x float> %a0, %a1
ret <32 x float> %v0
}
define <64 x half> @f10(<64 x half> %a0, <64 x half> %a1) #1 {
; CHECK-LABEL: f10:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vmpy(v0.hf,v1.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fmul <64 x half> %a0, %a1
ret <64 x half> %v0
}
define <32 x float> @f11(<32 x float> %a0, <32 x float> %a1) #1 {
; CHECK-LABEL: f11:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vmpy(v0.sf,v1.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
%v0 = fmul <32 x float> %a0, %a1
ret <32 x float> %v0
}
attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
attributes #1 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-ieee-fp" }