[Hexagon] Generate HVX/FP compare instructions

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com>
This commit is contained in:
Krzysztof Parzyszek 2021-12-30 11:44:48 -08:00
parent dabac5feec
commit 9e6afbedb0
4 changed files with 825 additions and 0 deletions

View File

@ -91,6 +91,10 @@ HexagonTargetLowering::initializeHVXLowering() {
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom);
@ -122,6 +126,9 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
setOperationAction(ISD::STORE, MVT::v64f32, Custom);
setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
setOperationAction(ISD::MLOAD, MVT::v32f32, Custom);
setOperationAction(ISD::MSTORE, MVT::v32f32, Custom);
@ -248,6 +255,32 @@ HexagonTargetLowering::initializeHVXLowering() {
}
}
setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
// Boolean vectors.
for (MVT T : LegalW) {
@ -2258,6 +2291,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::MUL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::MULHS:
case ISD::MULHU:
case ISD::AND:

View File

@ -453,6 +453,44 @@ let Predicates = [UseHVX] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in {
let AddedComplexity = 220 in {
defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>;
defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>;
defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setgt, VecQ32, HVF32>;
defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>;
}
def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>;
def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>;
def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
}
let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in {
let AddedComplexity = 220 in {
defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setgt, VecQ16, HVF16>;
defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>;
defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setgt, VecQ32, HVF32>;
defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>;
}
def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>;
def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>;
def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
}
let Predicates = [UseHVX] in {
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
@ -707,3 +745,63 @@ let Predicates = [UseHVX] in {
def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>;
def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>;
}
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVF16>;
def: OpR_RR_pat<V6_veqh, setoeq, VecQ16, HVF16>;
def: OpR_RR_pat<V6_veqh, setueq, VecQ16, HVF16>;
def: OpR_RR_pat<V6_vgthf, setgt, VecQ16, HVF16>;
def: OpR_RR_pat<V6_vgthf, setogt, VecQ16, HVF16>;
def: OpR_RR_pat<V6_vgthf, setugt, VecQ16, HVF16>;
def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVF32>;
def: OpR_RR_pat<V6_veqw, setoeq, VecQ32, HVF32>;
def: OpR_RR_pat<V6_veqw, setueq, VecQ32, HVF32>;
def: OpR_RR_pat<V6_vgtsf, setgt, VecQ32, HVF32>;
def: OpR_RR_pat<V6_vgtsf, setogt, VecQ32, HVF32>;
def: OpR_RR_pat<V6_vgtsf, setugt, VecQ32, HVF32>;
def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_and, And, setoeq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_or, Or, setoeq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_xor, Xor, setoeq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_and, And, setueq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_or, Or, setueq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqh_xor, Xor, setueq, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_and, And, setgt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_or, Or, setgt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_xor, Xor, setgt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_and, And, setogt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_or, Or, setogt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_xor, Xor, setogt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_and, And, setugt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_or, Or, setugt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_vgthf_xor, Xor, setugt, HQ16, HVF16, HVF16>;
def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_and, And, setoeq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_or, Or, setoeq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_xor, Xor, setoeq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_and, And, setueq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_or, Or, setueq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_veqw_xor, Xor, setueq, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_and, And, setgt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_or, Or, setgt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_xor, Xor, setgt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_and, And, setogt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_or, Or, setogt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_xor, Xor, setogt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_and, And, setugt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_or, Or, setugt, HQ32, HVF32, HVF32>;
def: AccRRR_pat<V6_vgtsf_xor, Xor, setugt, HQ32, HVF32, HVF32>;
def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)),
(V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>;
def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)),
(V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>;
}

View File

@ -0,0 +1,226 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s
; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s
; min
define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_00:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vmin(v1.hf,v0.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_01:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_02:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vmin(v0.hf,v1.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
ret <64 x half> %t1
}
define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_03:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
ret <64 x half> %t1
}
define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_10:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vmin(v1.sf,v0.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_11:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_12:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vmin(v0.sf,v1.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
ret <32 x float> %t1
}
define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_13:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
ret <32 x float> %t1
}
; max
define <64 x half> @test_20(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_20:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vmax(v1.hf,v0.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
ret <64 x half> %t1
}
define <64 x half> @test_21(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_21:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
ret <64 x half> %t1
}
define <64 x half> @test_22(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_22:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.hf = vmax(v0.hf,v1.hf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_23(<64 x half> %v0, <64 x half> %v1) #0 {
; CHECK-LABEL: test_23:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <32 x float> @test_30(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_30:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vmax(v1.sf,v0.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
ret <32 x float> %t1
}
define <32 x float> @test_31(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_31:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
ret <32 x float> %t1
}
define <32 x float> @test_32(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_32:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: v0.sf = vmax(v0.sf,v1.sf)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_33(<32 x float> %v0, <32 x float> %v1) #0 {
; CHECK-LABEL: test_33:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
attributes #0 = { readnone nounwind "target-cpu"="hexagonv69" }

View File

@ -0,0 +1,466 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s
; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s
; --- Half
define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_00:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oeq <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_01:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp one <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_02:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_03:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_04(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_04:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_05(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
; CHECK-LABEL: test_05:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <64 x half> %v0, %v1
%t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
ret <64 x half> %t1
}
define <64 x half> @test_0a(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0a:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 &= vcmp.eq(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = and <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_0b(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0b:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 |= vcmp.eq(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = or <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_0c(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0c:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 ^= vcmp.eq(v0.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = xor <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_0d(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0d:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 &= vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = and <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_0e(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0e:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 |= vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = or <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
define <64 x half> @test_0f(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
; CHECK-LABEL: test_0f:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 ^= vcmp.gt(v0.hf,v1.hf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <64 x half> %v0, %v1
%q1 = trunc <64 x i16> %v2 to <64 x i1>
%q2 = xor <64 x i1> %q0, %q1
%t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
ret <64 x half> %t1
}
; --- Single
define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_10:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oeq <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_11:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp one <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_12:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp olt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_13:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ole <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_14(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_14:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v1,v2)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp ogt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_15(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
; CHECK-LABEL: test_15:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v2,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%t0 = fcmp oge <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
ret <32 x float> %t1
}
define <32 x float> @test_1a(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1a:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 &= vcmp.eq(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = and <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_1b(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1b:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 |= vcmp.eq(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = or <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_1c(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1c:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 ^= vcmp.eq(v0.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = xor <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_1d(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1d:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 &= vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = and <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_1e(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1e:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 |= vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = or <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
define <32 x float> @test_1f(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
; CHECK-LABEL: test_1f:
; CHECK: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r0 = ##16843009
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 = vand(v2,r0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q0 ^= vcmp.gt(v0.sf,v1.sf)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vmux(q0,v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = xor <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}
attributes #0 = { nounwind readnone "target-cpu"="hexagonv69" }