diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 0ce6f3177224..7a8b5249255f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4474,7 +4474,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); - return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); + return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], + (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll new file mode 100644 index 000000000000..0cf3c6ef7a06 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; +; Test that only one vperm of the vector compare is needed for both extracts. + +define void @fun() { +; CHECK-LABEL: fun +; CHECK: vperm +; CHECK-NOT: vperm +bb: + %tmp = load <4 x i8>, <4 x i8>* undef + %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp + %tmp2 = extractelement <4 x i1> %tmp1, i32 0 + br i1 %tmp2, label %bb1, label %bb2 + +bb1: + unreachable + +bb2: + %tmp3 = extractelement <4 x i1> %tmp1, i32 1 + br i1 %tmp3, label %bb3, label %bb4 + +bb3: + unreachable + +bb4: + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index 7cea2ff8eb9c..b7cbac89db31 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -5377,12 +5377,12 @@ define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* % ; SZ13-LABEL: constrained_vector_fptrunc_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v1, 0(%r2), 4 +; SZ13-NEXT: ld %f0, 16(%r2) ; SZ13-NEXT: vledb %v1, %v1, 0, 0 ; SZ13-NEXT: larl %r1, .LCPI97_0 -; SZ13-NEXT: ld %f0, 16(%r2) -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: vperm %v1, %v1, %v0, %v2 ; SZ13-NEXT: ledbra %f0, 0, %f0, 0 +; SZ13-NEXT: vl %v2, 0(%r1), 3 +; SZ13-NEXT: vperm %v1, %v1, %v1, %v2 ; SZ13-NEXT: ste %f0, 8(%r3) ; SZ13-NEXT: vsteg %v1, 0(%r3), 0 ; SZ13-NEXT: br %r14