diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9ffce63501b9..82f9fa7a5e84 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21991,10 +21991,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, MVT RootVT = Root.getSimpleValueType(); SDLoc DL(Root); - // Just remove no-op shuffle masks. if (Mask.size() == 1) { - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), - /*AddTo*/ true); + int Index = Mask[0]; + assert((Index >= 0 || Index == SM_SentinelUndef || + Index == SM_SentinelZero) && + "Invalid shuffle index found!"); + + // We may end up with an accumulated mask of size 1 as a result of + // widening of shuffle operands (see function canWidenShuffleElements). + // If the only shuffle index is equal to SM_SentinelZero then propagate + // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle + // mask, and therefore the entire chain of shuffles can be folded away. + if (Index == SM_SentinelZero) + DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL)); + else + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), + /*AddTo*/ true); return true; } diff --git a/llvm/test/CodeGen/X86/pr24562.ll b/llvm/test/CodeGen/X86/pr24562.ll new file mode 100644 index 000000000000..f2e134bbd071 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr24562.ll @@ -0,0 +1,19 @@ +; RUN: llc -mattr=+ssse3 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +; The pshufb from function @pr24562 was wrongly folded into its first operand +; as a result of a late target shuffle combine on the legalized selection dag. +; +; Check that the pshufb is correctly folded to a zero vector. + +define <2 x i64> @pr24562() { +; CHECK-LABEL: pr24562: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> , <16 x i8> ) #2 + %1 = bitcast <16 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} + +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)