[SystemZ] Don't create PERMUTE nodes with an undef operand.

It's better to reuse the first source value than to use an undef second
operand, because that will make more resulting VPERMs have identical operands
and therefore MachineCSE more successful.

Review: Ulrich Weigand
This commit is contained in:
Jonas Paulsson 2020-05-18 18:20:40 +02:00
parent 691980ebb4
commit 31ecef7627
3 changed files with 32 additions and 4 deletions

View File

@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
else
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
(!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
}
namespace {

View File

@ -0,0 +1,27 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
;
; Test that only one vperm of the vector compare is needed for both extracts.
define void @fun() {
; CHECK-LABEL: fun
; CHECK: vperm
; CHECK-NOT: vperm
bb:
%tmp = load <4 x i8>, <4 x i8>* undef
%tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
%tmp2 = extractelement <4 x i1> %tmp1, i32 0
br i1 %tmp2, label %bb1, label %bb2
bb1:
unreachable
bb2:
%tmp3 = extractelement <4 x i1> %tmp1, i32 1
br i1 %tmp3, label %bb3, label %bb4
bb3:
unreachable
bb4:
unreachable
}

View File

@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: vl %v1, 0(%r2), 4
; SZ13-NEXT: ld %f0, 16(%r2)
; SZ13-NEXT: vledb %v1, %v1, 0, 0
; SZ13-NEXT: larl %r1, .LCPI97_0
; SZ13-NEXT: ld %f0, 16(%r2)
; SZ13-NEXT: vl %v2, 0(%r1), 3
; SZ13-NEXT: vperm %v1, %v1, %v0, %v2
; SZ13-NEXT: ledbra %f0, 0, %f0, 0
; SZ13-NEXT: vl %v2, 0(%r1), 3
; SZ13-NEXT: vperm %v1, %v1, %v1, %v2
; SZ13-NEXT: ste %f0, 8(%r3)
; SZ13-NEXT: vsteg %v1, 0(%r3), 0
; SZ13-NEXT: br %r14