diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a3cd02f44b2d..f88d6f8b2fdb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19419,7 +19419,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, // We fell out of the loop without finding a viable combining instruction. return false; - // Record the old value to use in RAUW-ing. + // Combine away the bottom node as its shuffle will be accumulated into + // a preceding shuffle. + DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true); + + // Record the old value. SDValue Old = V; // Merge this node's mask and our incoming mask (adjusted to account for all @@ -19430,12 +19434,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DAG)); - // Replace N with its operand as we're going to combine that shuffle away. - DAG.ReplaceAllUsesWith(N, N.getOperand(0)); + // Check that the shuffles didn't cancel each other out. If not, we need to + // combine to the new one. + if (Old != V) + // Replace the combinable shuffle with the combined one, updating all users + // so that we re-evaluate the chain here. + DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); - // Replace the combinable shuffle with the combined one, updating all users - // so that we re-evaluate the chain here. - DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); return true; } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 1dc744af47d4..ffe537fc351f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -671,3 +671,28 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } + +define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { +; FIXME-SSE2-LABEL: @shuffle_v8i16_XX4X8acX +; FIXME-SSE2: # BB#0: +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7] +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] +; FIXME-SSE2-NEXT: punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,1,0,2,4,5,6,7] +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,1,3,4,5,6,7] +; FIXME-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,7] +; FIXME-SSE2-NEXT: retq +; +; FIXME-SSSE3-LABEL: @shuffle_v8i16_XX4X8acX +; FIXME-SSSE3: # BB#0: +; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] +; FIXME-SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,5,7,6,7] +; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] +; FIXME-SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; FIXME-SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,{{[0-9]+,[0-9]+}},2,3,6,7,10,11{{[0-9]+,[0-9]+}}] +; FIXME-SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +}