[Hexagon] Generate HVX/FP compare instructions

Co-authored-by: Anirudh Sundar Subramaniam <quic_sanirudh@quicinc.com>
2021-12-30 11:44:48 -08:00 · 2021-12-30 11:44:48 -08:00 · 9e6afbedb0
parent dabac5feec
commit 9e6afbedb0
4 changed files with 825 additions and 0 deletions
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@ -91,6 +91,10 @@ HexagonTargetLowering::initializeHVXLowering() {

  if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
      Subtarget.useHVXFloatingPoint()) {
+    setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v64f16, Custom);
    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,  MVT::v32f32, Custom);
@ -122,6 +126,9 @@ HexagonTargetLowering::initializeHVXLowering() {

    setOperationAction(ISD::LOAD,    MVT::v64f32, Custom);
    setOperationAction(ISD::STORE,   MVT::v64f32, Custom);
+    setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
+    setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
+    setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);

    setOperationAction(ISD::MLOAD, MVT::v32f32, Custom);
    setOperationAction(ISD::MSTORE, MVT::v32f32, Custom);
@ -248,6 +255,32 @@ HexagonTargetLowering::initializeHVXLowering() {
    }
  }

+  setCondCodeAction(ISD::SETNE,  MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETLE,  MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETGE,  MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETLT,  MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+
+  setCondCodeAction(ISD::SETNE,  MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETLE,  MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETGE,  MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETLT,  MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+
  // Boolean vectors.

  for (MVT T : LegalW) {
@ -2258,6 +2291,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
      case ISD::CTLZ:
      case ISD::CTTZ:
      case ISD::MUL:
+      case ISD::FMINNUM:
+      case ISD::FMAXNUM:
      case ISD::MULHS:
      case ISD::MULHU:
      case ISD::AND:
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@ -453,6 +453,44 @@ let Predicates = [UseHVX] in {
           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
 }

+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+  def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
+           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt),
+           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+
+  def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt),
+           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+  def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt),
+           (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+}
+
+let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in {
+  let AddedComplexity = 220 in {
+    defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect,  setgt, VecQ16, HVF16>;
+    defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setogt, VecQ16, HVF16>;
+    defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect,  setgt, VecQ32, HVF32>;
+    defm: MinMax_pats<V6_vmin_sf, V6_vmax_sf, vselect, setogt, VecQ32, HVF32>;
+  }
+  def: OpR_RR_pat<V6_vmin_hf, pf2<fminnum>, VecF16, HVF16>;
+  def: OpR_RR_pat<V6_vmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
+  def: OpR_RR_pat<V6_vmin_sf, pf2<fminnum>, VecF32, HVF32>;
+  def: OpR_RR_pat<V6_vmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
+}
+
+let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in {
+  let AddedComplexity = 220 in {
+    defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect,  setgt, VecQ16, HVF16>;
+    defm: MinMax_pats<V6_vfmin_hf, V6_vfmax_hf, vselect, setogt, VecQ16, HVF16>;
+    defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect,  setgt, VecQ32, HVF32>;
+    defm: MinMax_pats<V6_vfmin_sf, V6_vfmax_sf, vselect, setogt, VecQ32, HVF32>;
+  }
+  def: OpR_RR_pat<V6_vfmin_hf, pf2<fminnum>, VecF16, HVF16>;
+  def: OpR_RR_pat<V6_vfmax_hf, pf2<fmaxnum>, VecF16, HVF16>;
+  def: OpR_RR_pat<V6_vfmin_sf, pf2<fminnum>, VecF32, HVF32>;
+  def: OpR_RR_pat<V6_vfmax_sf, pf2<fmaxnum>, VecF32, HVF32>;
+}
+
 let Predicates = [UseHVX] in {
  // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
  // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
@ -707,3 +745,63 @@ let Predicates = [UseHVX] in {
  def: AccRRR_pat<V6_vgtuw_or,     Or, setugt,   HQ32, HVI32, HVI32>;
  def: AccRRR_pat<V6_vgtuw_xor,   Xor, setugt,   HQ32, HVI32, HVI32>;
 }
+
+let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
+  def: OpR_RR_pat<V6_veqh,              seteq,  VecQ16, HVF16>;
+  def: OpR_RR_pat<V6_veqh,             setoeq,  VecQ16, HVF16>;
+  def: OpR_RR_pat<V6_veqh,             setueq,  VecQ16, HVF16>;
+  def: OpR_RR_pat<V6_vgthf,             setgt,  VecQ16, HVF16>;
+  def: OpR_RR_pat<V6_vgthf,            setogt,  VecQ16, HVF16>;
+  def: OpR_RR_pat<V6_vgthf,            setugt,  VecQ16, HVF16>;
+
+  def: OpR_RR_pat<V6_veqw,              seteq,  VecQ32, HVF32>;
+  def: OpR_RR_pat<V6_veqw,             setoeq,  VecQ32, HVF32>;
+  def: OpR_RR_pat<V6_veqw,             setueq,  VecQ32, HVF32>;
+  def: OpR_RR_pat<V6_vgtsf,             setgt,  VecQ32, HVF32>;
+  def: OpR_RR_pat<V6_vgtsf,            setogt,  VecQ32, HVF32>;
+  def: OpR_RR_pat<V6_vgtsf,            setugt,  VecQ32, HVF32>;
+
+  def: AccRRR_pat<V6_veqh_and,    And,          seteq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_or,      Or,          seteq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_xor,    Xor,          seteq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_and,    And,         setoeq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_or,      Or,         setoeq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_xor,    Xor,         setoeq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_and,    And,         setueq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_or,      Or,         setueq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_veqh_xor,    Xor,         setueq,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_and,   And,          setgt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_or,     Or,          setgt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_xor,   Xor,          setgt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_and,   And,         setogt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_or,     Or,         setogt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_xor,   Xor,         setogt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_and,   And,         setugt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_or,     Or,         setugt,  HQ16, HVF16, HVF16>;
+  def: AccRRR_pat<V6_vgthf_xor,   Xor,         setugt,  HQ16, HVF16, HVF16>;
+
+  def: AccRRR_pat<V6_veqw_and,    And,          seteq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_or,      Or,          seteq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_xor,    Xor,          seteq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_and,    And,         setoeq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_or,      Or,         setoeq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_xor,    Xor,         setoeq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_and,    And,         setueq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_or,      Or,         setueq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_veqw_xor,    Xor,         setueq,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_and,   And,          setgt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_or,     Or,          setgt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_xor,   Xor,          setgt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_and,   And,         setogt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_or,     Or,         setogt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_xor,   Xor,         setogt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_and,   And,         setugt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_or,     Or,         setugt,  HQ32, HVF32, HVF32>;
+  def: AccRRR_pat<V6_vgtsf_xor,   Xor,         setugt,  HQ32, HVF32, HVF32>;
+
+  def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)),
+           (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>;
+
+  def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)),
+           (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>;
+}
--- a/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll
@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s
+; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s
+
+; min
+
+define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_00:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.hf = vmin(v1.hf,v0.hf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_01:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_02:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.hf = vmin(v0.hf,v1.hf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_03:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.hf,v0.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
+  ret <64 x half> %t1
+}
+
+define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.sf = vmin(v1.sf,v0.sf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.sf = vmin(v0.sf,v1.sf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.sf,v0.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
+  ret <32 x float> %t1
+}
+
+; max
+
+define <64 x half> @test_20(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_20:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.hf = vmax(v1.hf,v0.hf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_21(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_21:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_22(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_22:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.hf = vmax(v0.hf,v1.hf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_23(<64 x half> %v0, <64 x half> %v1) #0 {
+; CHECK-LABEL: test_23:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.hf,v0.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <32 x float> @test_30(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_30:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.sf = vmax(v1.sf,v0.sf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_31(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_31:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_32(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0.sf = vmax(v0.sf,v1.sf)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_33(<32 x float> %v0, <32 x float> %v1) #0 {
+; CHECK-LABEL: test_33:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.sf,v0.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+attributes #0 = { readnone nounwind "target-cpu"="hexagonv69" }
+
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll
@ -0,0 +1,466 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s
+; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s
+
+; --- Half
+
+define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_00:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.eq(v0.h,v1.h)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oeq <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_01:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.eq(v0.h,v1.h)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp one <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_02:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.hf,v0.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_03:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_04(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_04:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_05(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 {
+; CHECK-LABEL: test_05:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.hf,v0.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <64 x half> %v0, %v1
+  %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0a(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 &= vcmp.eq(v0.h,v1.h)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = and <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0b(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 |= vcmp.eq(v0.h,v1.h)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = or <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0c(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 ^= vcmp.eq(v0.h,v1.h)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = xor <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0d(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 &= vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = and <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0e(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0e:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 |= vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = or <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+define <64 x half> @test_0f(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 {
+; CHECK-LABEL: test_0f:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 ^= vcmp.gt(v0.hf,v1.hf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <64 x half> %v0, %v1
+  %q1 = trunc <64 x i16> %v2 to <64 x i1>
+  %q2 = xor <64 x i1> %q0, %q1
+  %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1
+  ret <64 x half> %t1
+}
+
+
+; --- Single
+
+define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.eq(v0.w,v1.w)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oeq <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.eq(v0.w,v1.w)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp one <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.sf,v0.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp olt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ole <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_14(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v1,v2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp ogt <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_15(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 {
+; CHECK-LABEL: test_15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vcmp.gt(v1.sf,v0.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v2,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %t0 = fcmp oge <32 x float> %v0, %v1
+  %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1a(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 &= vcmp.eq(v0.w,v1.w)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = and <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1b(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 |= vcmp.eq(v0.w,v1.w)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = or <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1c(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 ^= vcmp.eq(v0.w,v1.w)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp oeq <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = xor <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1d(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 &= vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = and <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1e(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1e:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 |= vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = or <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+define <32 x float> @test_1f(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 {
+; CHECK-LABEL: test_1f:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ##16843009
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 = vand(v2,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     q0 ^= vcmp.gt(v0.sf,v1.sf)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     v0 = vmux(q0,v0,v1)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %q0 = fcmp ogt <32 x float> %v0, %v1
+  %q1 = trunc <32 x i32> %v2 to <32 x i1>
+  %q2 = xor <32 x i1> %q0, %q1
+  %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+  ret <32 x float> %t1
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv69" }