[AArch64][SVE] Zero other lanes when doing OR reduction on unpacked predicate using ptest.

When the predicate vector is unpacked, we cannot assume anything about the
values in the other lanes. We have to make sure we use the correct
predicate where we know that the other lanes have been zeroed.

Reviewed By: RosieSumpter

Differential Revision: https://reviews.llvm.org/D129081
This commit is contained in:
Sander de Smalen 2022-07-06 15:59:24 +00:00
parent f5b5398ebf
commit 5d4f6ce229
3 changed files with 32 additions and 15 deletions

View File

@ -21094,7 +21094,7 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
default:
return SDValue();
case ISD::VECREDUCE_OR:
if (isAllActivePredicate(DAG, Pg))
if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
// The predicate can be 'Op' because
// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);

View File

@ -79,7 +79,8 @@ define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
@ -89,7 +90,8 @@ define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
@ -99,7 +101,8 @@ define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
@ -109,7 +112,9 @@ define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_or_nxv1i1(<vscale x 1 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv1i1(<vscale x 1 x i1> %vec)
@ -252,7 +257,8 @@ define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
@ -262,7 +268,8 @@ define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
@ -272,7 +279,8 @@ define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
@ -282,7 +290,9 @@ define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_smin_nxv1i1(<vscale x 1 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv1i1(<vscale x 1 x i1> %vec)
@ -304,7 +314,8 @@ define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
@ -314,7 +325,8 @@ define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
@ -324,7 +336,8 @@ define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
@ -334,7 +347,9 @@ define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
define i1 @reduce_umax_nxv1i1(<vscale x 1 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv1i1(<vscale x 1 x i1> %vec)

View File

@ -8,7 +8,8 @@
define i1 @reduce_or_insert_subvec_into_zero(<vscale x 4 x i1> %in) {
; CHECK-LABEL: reduce_or_insert_subvec_into_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %in, i64 0)
@ -19,7 +20,8 @@ define i1 @reduce_or_insert_subvec_into_zero(<vscale x 4 x i1> %in) {
define i1 @reduce_or_insert_subvec_into_poison(<vscale x 4 x i1> %in) {
; CHECK-LABEL: reduce_or_insert_subvec_into_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)