forked from OSchip/llvm-project
[SystemZ] Don't create PERMUTE nodes with an undef operand.
It's better to reuse the first source value than to use an undef second operand, because that will make more resulting VPERMs have identical operands and therefore MachineCSE more successful. Review: Ulrich Weigand
This commit is contained in:
parent
691980ebb4
commit
31ecef7627
|
@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
|
|||
else
|
||||
IndexNodes[I] = DAG.getUNDEF(MVT::i32);
|
||||
SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
|
||||
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
|
||||
return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
|
||||
(!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
|
||||
;
|
||||
; Test that only one vperm of the vector compare is needed for both extracts.
|
||||
|
||||
define void @fun() {
|
||||
; CHECK-LABEL: fun
|
||||
; CHECK: vperm
|
||||
; CHECK-NOT: vperm
|
||||
bb:
|
||||
%tmp = load <4 x i8>, <4 x i8>* undef
|
||||
%tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
|
||||
%tmp2 = extractelement <4 x i1> %tmp1, i32 0
|
||||
br i1 %tmp2, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
unreachable
|
||||
|
||||
bb2:
|
||||
%tmp3 = extractelement <4 x i1> %tmp1, i32 1
|
||||
br i1 %tmp3, label %bb3, label %bb4
|
||||
|
||||
bb3:
|
||||
unreachable
|
||||
|
||||
bb4:
|
||||
unreachable
|
||||
}
|
|
@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %
|
|||
; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
|
||||
; SZ13: # %bb.0: # %entry
|
||||
; SZ13-NEXT: vl %v1, 0(%r2), 4
|
||||
; SZ13-NEXT: ld %f0, 16(%r2)
|
||||
; SZ13-NEXT: vledb %v1, %v1, 0, 0
|
||||
; SZ13-NEXT: larl %r1, .LCPI97_0
|
||||
; SZ13-NEXT: ld %f0, 16(%r2)
|
||||
; SZ13-NEXT: vl %v2, 0(%r1), 3
|
||||
; SZ13-NEXT: vperm %v1, %v1, %v0, %v2
|
||||
; SZ13-NEXT: ledbra %f0, 0, %f0, 0
|
||||
; SZ13-NEXT: vl %v2, 0(%r1), 3
|
||||
; SZ13-NEXT: vperm %v1, %v1, %v1, %v2
|
||||
; SZ13-NEXT: ste %f0, 8(%r3)
|
||||
; SZ13-NEXT: vsteg %v1, 0(%r3), 0
|
||||
; SZ13-NEXT: br %r14
|
||||
|
|
Loading…
Reference in New Issue