forked from OSchip/llvm-project
[AArch64][SVE] Zero other lanes when doing OR reduction on unpacked predicate using ptest.
When the predicate vector is unpacked, we cannot assume anything about the values in the other lanes. We have to make sure we use the correct predicate where we know that the other lanes have been zeroed. Reviewed By: RosieSumpter Differential Revision: https://reviews.llvm.org/D129081
This commit is contained in:
parent
f5b5398ebf
commit
5d4f6ce229
|
@ -21094,7 +21094,7 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
|
|||
default:
|
||||
return SDValue();
|
||||
case ISD::VECREDUCE_OR:
|
||||
if (isAllActivePredicate(DAG, Pg))
|
||||
if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
|
||||
// The predicate can be 'Op' because
|
||||
// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
|
||||
return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
|
||||
|
|
|
@ -79,7 +79,8 @@ define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
|
|||
define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
|
@ -89,7 +90,8 @@ define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
|
|||
define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
|
@ -99,7 +101,8 @@ define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
|
|||
define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
@ -109,7 +112,9 @@ define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
|
|||
define i1 @reduce_or_nxv1i1(<vscale x 1 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv1i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: punpklo p1.h, p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv1i1(<vscale x 1 x i1> %vec)
|
||||
|
@ -252,7 +257,8 @@ define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
|
|||
define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
|
@ -262,7 +268,8 @@ define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
|
|||
define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
|
@ -272,7 +279,8 @@ define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
|
|||
define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
@ -282,7 +290,9 @@ define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
|
|||
define i1 @reduce_smin_nxv1i1(<vscale x 1 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv1i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: punpklo p1.h, p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv1i1(<vscale x 1 x i1> %vec)
|
||||
|
@ -304,7 +314,8 @@ define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
|
|||
define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
|
@ -314,7 +325,8 @@ define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
|
|||
define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
|
@ -324,7 +336,8 @@ define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
|
|||
define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
@ -334,7 +347,9 @@ define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
|
|||
define i1 @reduce_umax_nxv1i1(<vscale x 1 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv1i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: punpklo p1.h, p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv1i1(<vscale x 1 x i1> %vec)
|
||||
|
|
|
@ -8,7 +8,8 @@
|
|||
define i1 @reduce_or_insert_subvec_into_zero(<vscale x 4 x i1> %in) {
|
||||
; CHECK-LABEL: reduce_or_insert_subvec_into_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %in, i64 0)
|
||||
|
@ -19,7 +20,8 @@ define i1 @reduce_or_insert_subvec_into_zero(<vscale x 4 x i1> %in) {
|
|||
define i1 @reduce_or_insert_subvec_into_poison(<vscale x 4 x i1> %in) {
|
||||
; CHECK-LABEL: reduce_or_insert_subvec_into_poison:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptest p0, p0.b
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%t = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %in, i64 0)
|
||||
|
|
Loading…
Reference in New Issue