[RISCV] Fix handling of nxvXi64 vmsgt(u).vx intrinsics on RV32.

We need to splat the scalar separately and use .vv, but there is
no vmsgt(u).vv. So add isel patterns to select vmslt(u).vv with
swapped operands.

We also need to get VT to use for the splat from an operand rather
than the result since the result VT is nxvXi1.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D99704
This commit is contained in:
Craig Topper 2021-04-01 10:17:53 -07:00
parent 1addc231cd
commit d157e3f387
6 changed files with 643 additions and 6 deletions

View File

@ -2720,7 +2720,6 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
SDValue &ScalarOp = Operands[SplatOp];
MVT OpVT = ScalarOp.getSimpleValueType();
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
// If this isn't a scalar, or its type is XLenVT we're done.
@ -2739,6 +2738,15 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
// Use the previous operand to get the vXi64 VT. The result might be a mask
// VT for compares. Using the previous operand assumes that the previous
// operand will never have a smaller element size than a scalar operand and
// that a widening operation never uses SEW=64.
// NOTE: If this fails the below assert, we can probably just find the
// element count from any operand or result and use it to construct the VT.
assert(II->SplatOperand > 1 && "Unexpected splat operand!");
MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
// The more complex case is when the scalar is larger than XLenVT.
assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");

View File

@ -3891,13 +3891,60 @@ defm "" : VPatBinaryM_VV_VX_VI<"int_riscv_vmsle", "PseudoVMSLE", AllIntegerVecto
defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgtu", "PseudoVMSGTU", AllIntegerVectors>;
defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>;
// Match vmsgt with 2 vector operands to vmslt with the operands swapped.
// Occurs when legalizing vmsgt(u).vx intrinsics for i64 on RV32 since we need
// to use a more complex splat sequence. Add the pattern for all VTs for
// consistency.
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Mask (int_riscv_vmsgt (vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLT_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
vti.RegClass:$rs2,
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsgt_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLT_VV_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs2,
(vti.Mask V0),
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsgtu (vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLTU_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
vti.RegClass:$rs2,
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsgtu_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLTU_VV_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs2,
(vti.Mask V0),
GPR:$vl,
vti.SEW)>;
}
// Match vmslt(u).vx intrinsics to vmsle(u).vi if the scalar is -15 to 16. This
// avoids the user needing to know that there is no vmslt(u).vi instruction.
// This is limited to vmslt(u).vx as there is no vmsge().vx intrinsic or
// instruction.
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))),
(vti.Scalar simm5_plus1:$rs2),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
GPR:$vl,

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i8.i8(
<vscale x 1 x i8>,
@ -706,6 +706,192 @@ entry:
ret <vscale x 8 x i1> %a
}
declare <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i64.i64(
<vscale x 1 x i64>,
i64,
i32);
define <vscale x 1 x i1> @intrinsic_vmsgt_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v25, v25, a1
; CHECK-NEXT: vmv.v.x v26, a0
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vsrl.vx v26, v26, a1
; CHECK-NEXT: vor.vv v25, v26, v25
; CHECK-NEXT: vmslt.vv v0, v25, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i64.i64(
<vscale x 1 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 1 x i1> %a
}
declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64.i64(
<vscale x 1 x i1>,
<vscale x 1 x i64>,
i64,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v27, a0
; CHECK-NEXT: vsll.vx v27, v27, a1
; CHECK-NEXT: vsrl.vx v27, v27, a1
; CHECK-NEXT: vor.vv v26, v27, v26
; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64.i64(
<vscale x 1 x i1> %0,
<vscale x 1 x i64> %1,
i64 %2,
<vscale x 1 x i1> %3,
i32 %4)
ret <vscale x 1 x i1> %a
}
declare <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64.i64(
<vscale x 2 x i64>,
i64,
i32);
define <vscale x 2 x i1> @intrinsic_vmsgt_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v28, a0
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vsrl.vx v28, v28, a1
; CHECK-NEXT: vor.vv v26, v28, v26
; CHECK-NEXT: vmslt.vv v0, v26, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64.i64(
<vscale x 2 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 2 x i1> %a
}
declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
<vscale x 2 x i1>,
<vscale x 2 x i64>,
i64,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v28, a0
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vsrl.vx v28, v28, a1
; CHECK-NEXT: vor.vv v26, v28, v26
; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmslt.vv v25, v26, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
<vscale x 2 x i1> %0,
<vscale x 2 x i64> %1,
i64 %2,
<vscale x 2 x i1> %3,
i32 %4)
ret <vscale x 2 x i1> %a
}
declare <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64.i64(
<vscale x 4 x i64>,
i64,
i32);
define <vscale x 4 x i1> @intrinsic_vmsgt_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.x v28, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsll.vx v12, v12, a1
; CHECK-NEXT: vsrl.vx v12, v12, a1
; CHECK-NEXT: vor.vv v28, v12, v28
; CHECK-NEXT: vmslt.vv v0, v28, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64.i64(
<vscale x 4 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 4 x i1> %a
}
declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
<vscale x 4 x i1>,
<vscale x 4 x i64>,
i64,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.x v28, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vsll.vx v16, v16, a1
; CHECK-NEXT: vsrl.vx v16, v16, a1
; CHECK-NEXT: vor.vv v28, v16, v28
; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmslt.vv v25, v28, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
<vscale x 4 x i1> %0,
<vscale x 4 x i64> %1,
i64 %2,
<vscale x 4 x i1> %3,
i32 %4)
ret <vscale x 4 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgt_vi_nxv1i8_i8(<vscale x 1 x i8> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
@ -1230,3 +1416,108 @@ entry:
ret <vscale x 8 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgt_vi_nxv1i64_i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vi_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vmsgt.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i64.i64(
<vscale x 1 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 1 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64.i64(
<vscale x 1 x i1> %0,
<vscale x 1 x i64> %1,
i64 9,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x i1> %a
}
define <vscale x 2 x i1> @intrinsic_vmsgt_vi_nxv2i64_i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vi_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
; CHECK-NEXT: vmsgt.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64.i64(
<vscale x 2 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 2 x i1> %a
}
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64(
<vscale x 2 x i1> %0,
<vscale x 2 x i64> %1,
i64 9,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x i1> %a
}
define <vscale x 4 x i1> @intrinsic_vmsgt_vi_nxv4i64_i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_vi_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
; CHECK-NEXT: vmsgt.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64.i64(
<vscale x 4 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 4 x i1> %a
}
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmsgt.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64(
<vscale x 4 x i1> %0,
<vscale x 4 x i64> %1,
i64 9,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x i1> %a
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i8.i8(
<vscale x 1 x i8>,

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i8.i8(
<vscale x 1 x i8>,
@ -706,6 +706,192 @@ entry:
ret <vscale x 8 x i1> %a
}
declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i64.i64(
<vscale x 1 x i64>,
i64,
i32);
define <vscale x 1 x i1> @intrinsic_vmsgtu_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v25, v25, a1
; CHECK-NEXT: vmv.v.x v26, a0
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vsrl.vx v26, v26, a1
; CHECK-NEXT: vor.vv v25, v26, v25
; CHECK-NEXT: vmsltu.vv v0, v25, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i64.i64(
<vscale x 1 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 1 x i1> %a
}
declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64.i64(
<vscale x 1 x i1>,
<vscale x 1 x i64>,
i64,
<vscale x 1 x i1>,
i32);
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m1,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v27, a0
; CHECK-NEXT: vsll.vx v27, v27, a1
; CHECK-NEXT: vsrl.vx v27, v27, a1
; CHECK-NEXT: vor.vv v26, v27, v26
; CHECK-NEXT: vsetvli a0, a2, e64,m1,tu,mu
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64.i64(
<vscale x 1 x i1> %0,
<vscale x 1 x i64> %1,
i64 %2,
<vscale x 1 x i1> %3,
i32 %4)
ret <vscale x 1 x i1> %a
}
declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64.i64(
<vscale x 2 x i64>,
i64,
i32);
define <vscale x 2 x i1> @intrinsic_vmsgtu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v28, a0
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vsrl.vx v28, v28, a1
; CHECK-NEXT: vor.vv v26, v28, v26
; CHECK-NEXT: vmsltu.vv v0, v26, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64.i64(
<vscale x 2 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 2 x i1> %a
}
declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
<vscale x 2 x i1>,
<vscale x 2 x i64>,
i64,
<vscale x 2 x i1>,
i32);
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m2,ta,mu
; CHECK-NEXT: vmv.v.x v26, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v26, v26, a1
; CHECK-NEXT: vmv.v.x v28, a0
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vsrl.vx v28, v28, a1
; CHECK-NEXT: vor.vv v26, v28, v26
; CHECK-NEXT: vsetvli a0, a2, e64,m2,tu,mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmsltu.vv v25, v26, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
<vscale x 2 x i1> %0,
<vscale x 2 x i64> %1,
i64 %2,
<vscale x 2 x i1> %3,
i32 %4)
ret <vscale x 2 x i1> %a
}
declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64.i64(
<vscale x 4 x i64>,
i64,
i32);
define <vscale x 4 x i1> @intrinsic_vmsgtu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a2, a2, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.x v28, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsll.vx v12, v12, a1
; CHECK-NEXT: vsrl.vx v12, v12, a1
; CHECK-NEXT: vor.vv v28, v12, v28
; CHECK-NEXT: vmsltu.vv v0, v28, v8
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64.i64(
<vscale x 4 x i64> %0,
i64 %1,
i32 %2)
ret <vscale x 4 x i1> %a
}
declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
<vscale x 4 x i1>,
<vscale x 4 x i64>,
i64,
<vscale x 4 x i1>,
i32);
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a3, a2, e64,m4,ta,mu
; CHECK-NEXT: vmv.v.x v28, a1
; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsll.vx v28, v28, a1
; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vsll.vx v16, v16, a1
; CHECK-NEXT: vsrl.vx v16, v16, a1
; CHECK-NEXT: vor.vv v28, v16, v28
; CHECK-NEXT: vsetvli a0, a2, e64,m4,tu,mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmsltu.vv v25, v28, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
<vscale x 4 x i1> %0,
<vscale x 4 x i64> %1,
i64 %2,
<vscale x 4 x i1> %3,
i32 %4)
ret <vscale x 4 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgtu_vi_nxv1i8_i8(<vscale x 1 x i8> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
@ -1230,3 +1416,108 @@ entry:
ret <vscale x 8 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgtu_vi_nxv1i64_i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vmsgtu.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i64.i64(
<vscale x 1 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 1 x i1> %a
}
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64.i64(
<vscale x 1 x i1> %0,
<vscale x 1 x i64> %1,
i64 9,
<vscale x 1 x i1> %2,
i32 %3)
ret <vscale x 1 x i1> %a
}
define <vscale x 2 x i1> @intrinsic_vmsgtu_vi_nxv2i64_i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
; CHECK-NEXT: vmsgtu.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64.i64(
<vscale x 2 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 2 x i1> %a
}
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64(
<vscale x 2 x i1> %0,
<vscale x 2 x i64> %1,
i64 9,
<vscale x 2 x i1> %2,
i32 %3)
ret <vscale x 2 x i1> %a
}
define <vscale x 4 x i1> @intrinsic_vmsgtu_vi_nxv4i64_i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_vi_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
; CHECK-NEXT: vmsgtu.vi v0, v8, 9
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64.i64(
<vscale x 4 x i64> %0,
i64 9,
i32 %1)
ret <vscale x 4 x i1> %a
}
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v25, v0
; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vmsgtu.vi v25, v8, 9, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: jalr zero, 0(ra)
entry:
%a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64(
<vscale x 4 x i1> %0,
<vscale x 4 x i64> %1,
i64 9,
<vscale x 4 x i1> %2,
i32 %3)
ret <vscale x 4 x i1> %a
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
; RUN: --riscv-no-aliases < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i8.i8(
<vscale x 1 x i8>,