[RISCV] Lower splats of non-constant i1s as SETCCs

This patch adds support for splatting i1 types to fixed-length or
scalable vector types. It does so by lowering the operation to a SETCC
of the equivalent i8 type.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D101465
This commit is contained in:
Fraser Cormack 2021-04-28 16:11:57 +01:00
parent 18883a3fec
commit 46fa214a6f
4 changed files with 305 additions and 1 deletions

View File

@ -428,7 +428,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : BoolVecVTs) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
@ -1388,6 +1388,19 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return Vec;
}
// A splat can be lowered as a SETCC. For each fixed-length mask vector
// type, we have a legal equivalently-sized i8 type, so we can use that.
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
assert(Splat.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
MVT InterVT = VT.changeVectorElementType(MVT::i8);
Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
DAG.getConstant(1, DL, XLenVT));
Splat = DAG.getSplatBuildVector(InterVT, DL, Splat);
SDValue Zero = DAG.getConstant(0, DL, InterVT);
return DAG.getSetCC(DL, VT, Splat, Zero, ISD::SETNE);
}
return SDValue();
}
@ -2244,6 +2257,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
@ -2802,6 +2817,27 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues(Parts, DL);
}
// Lower splats of i1 types to SETCC. For each mask vector type, we have a
// legal equivalently-sized i8 type, so we can use that as a go-between.
SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
SelectionDAG &DAG) const {
SDValue SplatVal = Op.getOperand(0);
// All-zeros or all-ones splats are handled specially.
if (isa<ConstantSDNode>(SplatVal))
return Op;
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
assert(SplatVal.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
MVT InterVT = VT.changeVectorElementType(MVT::i8);
SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
DAG.getConstant(1, DL, XLenVT));
SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
SDValue Zero = DAG.getConstant(0, DL, InterVT);
return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
}
// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
// illegal (currently only vXi64 RV32).
// FIXME: We could also catch non-constant sign-extended i32 values and lower

View File

@ -511,6 +511,7 @@ private:
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
int64_t ExtTrueVal) const;
SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const;

View File

@ -42,6 +42,54 @@ define void @splat_zeros_v2i1(<2 x i1>* %x) {
ret void
}
define void @splat_v1i1(<1 x i1>* %x, i1 %y) {
; CHECK-LABEL: splat_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu
; CHECK-NEXT: vslideup.vi v26, v25, 0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <1 x i1> undef, i1 %y, i32 0
%b = shufflevector <1 x i1> %a, <1 x i1> undef, <1 x i32> zeroinitializer
store <1 x i1> %b, <1 x i1>* %x
ret void
}
define void @splat_v1i1_icmp(<1 x i1>* %x, i32 signext %y, i32 signext %z) {
; CHECK-LABEL: splat_v1i1_icmp:
; CHECK: # %bb.0:
; CHECK-NEXT: xor a1, a1, a2
; CHECK-NEXT: seqz a1, a1
; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu
; CHECK-NEXT: vslideup.vi v26, v25, 0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = icmp eq i32 %y, %z
%a = insertelement <1 x i1> undef, i1 %c, i32 0
%b = shufflevector <1 x i1> %a, <1 x i1> undef, <1 x i32> zeroinitializer
store <1 x i1> %b, <1 x i1>* %x
ret void
}
define void @splat_ones_v4i1(<4 x i1>* %x) {
; CHECK-LABEL: splat_ones_v4i1:
; CHECK: # %bb.0:
@ -61,6 +109,29 @@ define void @splat_ones_v4i1(<4 x i1>* %x) {
ret void
}
define void @splat_v4i1(<4 x i1>* %x, i1 %y) {
; CHECK-LABEL: splat_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetivli a2, 4, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: vmv.v.i v25, 0
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vsetivli a1, 4, e8,m1,tu,mu
; CHECK-NEXT: vslideup.vi v26, v25, 0
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i1> undef, i1 %y, i32 0
%b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> zeroinitializer
store <4 x i1> %b, <4 x i1>* %x
ret void
}
define void @splat_zeros_v8i1(<8 x i1>* %x) {
; CHECK-LABEL: splat_zeros_v8i1:
; CHECK: # %bb.0:
@ -72,6 +143,21 @@ define void @splat_zeros_v8i1(<8 x i1>* %x) {
ret void
}
define void @splat_v8i1(<8 x i1>* %x, i1 %y) {
; CHECK-LABEL: splat_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetivli a2, 8, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vmsne.vi v26, v25, 0
; CHECK-NEXT: vse1.v v26, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i1> undef, i1 %y, i32 0
%b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> zeroinitializer
store <8 x i1> %b, <8 x i1>* %x
ret void
}
define void @splat_ones_v16i1(<16 x i1>* %x) {
; CHECK-LABEL: splat_ones_v16i1:
; CHECK: # %bb.0:
@ -83,6 +169,21 @@ define void @splat_ones_v16i1(<16 x i1>* %x) {
ret void
}
define void @splat_v16i1(<16 x i1>* %x, i1 %y) {
; CHECK-LABEL: splat_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a1, a1, 1
; CHECK-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a1
; CHECK-NEXT: vmsne.vi v26, v25, 0
; CHECK-NEXT: vse1.v v26, (a0)
; CHECK-NEXT: ret
%a = insertelement <16 x i1> undef, i1 %y, i32 0
%b = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> zeroinitializer
store <16 x i1> %b, <16 x i1>* %x
ret void
}
define void @splat_zeros_v32i1(<32 x i1>* %x) {
; LMULMAX2-LABEL: splat_zeros_v32i1:
; LMULMAX2: # %bb.0:
@ -113,6 +214,44 @@ define void @splat_zeros_v32i1(<32 x i1>* %x) {
ret void
}
define void @splat_v32i1(<32 x i1>* %x, i1 %y) {
; LMULMAX2-LABEL: splat_v32i1:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: andi a1, a1, 1
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-RV32-LABEL: splat_v32i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: andi a1, a1, 1
; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-RV32-NEXT: addi a1, a0, 2
; LMULMAX1-RV32-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV32-NEXT: vse1.v v26, (a0)
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: splat_v32i1:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: andi a1, a1, 1
; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-RV64-NEXT: addi a1, a0, 2
; LMULMAX1-RV64-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV64-NEXT: vse1.v v26, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = insertelement <32 x i1> undef, i1 %y, i32 0
%b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> zeroinitializer
store <32 x i1> %b, <32 x i1>* %x
ret void
}
define void @splat_ones_v64i1(<64 x i1>* %x) {
; LMULMAX2-LABEL: splat_ones_v64i1:
; LMULMAX2: # %bb.0:
@ -152,3 +291,51 @@ define void @splat_ones_v64i1(<64 x i1>* %x) {
store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <64 x i1>* %x
ret void
}
define void @splat_v64i1(<64 x i1>* %x, i1 %y) {
; LMULMAX2-LABEL: splat_v64i1:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: andi a1, a1, 1
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
; LMULMAX2-NEXT: vmv.v.x v26, a1
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: addi a1, a0, 4
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: vse1.v v25, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-RV32-LABEL: splat_v64i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: andi a1, a1, 1
; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-RV32-NEXT: addi a1, a0, 6
; LMULMAX1-RV32-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV32-NEXT: addi a1, a0, 4
; LMULMAX1-RV32-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV32-NEXT: addi a1, a0, 2
; LMULMAX1-RV32-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV32-NEXT: vse1.v v26, (a0)
; LMULMAX1-RV32-NEXT: ret
;
; LMULMAX1-RV64-LABEL: splat_v64i1:
; LMULMAX1-RV64: # %bb.0:
; LMULMAX1-RV64-NEXT: andi a1, a1, 1
; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu
; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1
; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0
; LMULMAX1-RV64-NEXT: addi a1, a0, 6
; LMULMAX1-RV64-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV64-NEXT: addi a1, a0, 4
; LMULMAX1-RV64-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV64-NEXT: addi a1, a0, 2
; LMULMAX1-RV64-NEXT: vse1.v v26, (a1)
; LMULMAX1-RV64-NEXT: vse1.v v26, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = insertelement <64 x i1> undef, i1 %y, i32 0
%b = shufflevector <64 x i1> %a, <64 x i1> undef, <64 x i32> zeroinitializer
store <64 x i1> %b, <64 x i1>* %x
ret void
}

View File

@ -24,6 +24,34 @@ define <vscale x 1 x i1> @vsplat_nxv1i1_1() {
ret <vscale x 1 x i1> %splat
}
define <vscale x 1 x i1> @vsplat_nxv1i1_2(i1 %x) {
; CHECK-LABEL: vsplat_nxv1i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> undef, i1 %x, i32 0
%splat = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
ret <vscale x 1 x i1> %splat
}
define <vscale x 1 x i1> @vsplat_nxv1i1_3(i32 signext %x, i32 signext %y) {
; CHECK-LABEL: vsplat_nxv1i1_3:
; CHECK: # %bb.0:
; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: snez a0, a0
; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%c = icmp ne i32 %x, %y
%head = insertelement <vscale x 1 x i1> undef, i1 %c, i32 0
%splat = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
ret <vscale x 1 x i1> %splat
}
define <vscale x 2 x i1> @vsplat_nxv2i1_0() {
; CHECK-LABEL: vsplat_nxv2i1_0:
; CHECK: # %bb.0:
@ -46,6 +74,19 @@ define <vscale x 2 x i1> @vsplat_nxv2i1_1() {
ret <vscale x 2 x i1> %splat
}
define <vscale x 2 x i1> @vsplat_nxv2i1_2(i1 %x) {
; CHECK-LABEL: vsplat_nxv2i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> undef, i1 %x, i32 0
%splat = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x i1> %splat
}
define <vscale x 4 x i1> @vsplat_nxv4i1_0() {
; CHECK-LABEL: vsplat_nxv4i1_0:
; CHECK: # %bb.0:
@ -68,6 +109,19 @@ define <vscale x 4 x i1> @vsplat_nxv4i1_1() {
ret <vscale x 4 x i1> %splat
}
define <vscale x 4 x i1> @vsplat_nxv4i1_2(i1 %x) {
; CHECK-LABEL: vsplat_nxv4i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> undef, i1 %x, i32 0
%splat = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i1> %splat
}
define <vscale x 8 x i1> @vsplat_nxv8i1_0() {
; CHECK-LABEL: vsplat_nxv8i1_0:
; CHECK: # %bb.0:
@ -90,6 +144,19 @@ define <vscale x 8 x i1> @vsplat_nxv8i1_1() {
ret <vscale x 8 x i1> %splat
}
define <vscale x 8 x i1> @vsplat_nxv8i1_2(i1 %x) {
; CHECK-LABEL: vsplat_nxv8i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu
; CHECK-NEXT: vmv.v.x v25, a0
; CHECK-NEXT: vmsne.vi v0, v25, 0
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> undef, i1 %x, i32 0
%splat = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x i1> %splat
}
define <vscale x 16 x i1> @vsplat_nxv16i1_0() {
; CHECK-LABEL: vsplat_nxv16i1_0:
; CHECK: # %bb.0:
@ -111,3 +178,16 @@ define <vscale x 16 x i1> @vsplat_nxv16i1_1() {
%splat = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
ret <vscale x 16 x i1> %splat
}
define <vscale x 16 x i1> @vsplat_nxv16i1_2(i1 %x) {
; CHECK-LABEL: vsplat_nxv16i1_2:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu
; CHECK-NEXT: vmv.v.x v26, a0
; CHECK-NEXT: vmsne.vi v0, v26, 0
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> undef, i1 %x, i32 0
%splat = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
ret <vscale x 16 x i1> %splat
}