From a746239be3092c7b27d7e9b3cc81bd612bf641d5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 5 Aug 2014 18:45:49 +0000 Subject: [PATCH] [x86] Fix a crasher due to shuffles which cancel each other out and add a test case. We also miscompile this test case which is showing a serious flaw in the single-input v8i16 shuffle code. I've left the specific instruction checks FIXME-ed out until I can address the bug in the single-input code, but I wanted to separate out a significant functionality change to produce correct code from a very simple and targeted crasher fix. The miscompile problem stems from keeping track of inputs by value rather than by index. As a consequence of doing this, we can't reliably update those inputs because they might swap and we can't detect this without copying the mask. The blend code now uses indices for the input lists and this seems strictly better. It also should make it easier to sort things and do other cleanups. I think the time has come to simplify The Great Lambda here. llvm-svn: 214914 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 ++++++++----- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 25 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a3cd02f44b2d..f88d6f8b2fdb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19419,7 +19419,11 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, // We fell out of the loop without finding a viable combining instruction. return false; - // Record the old value to use in RAUW-ing. + // Combine away the bottom node as its shuffle will be accumulated into + // a preceding shuffle. + DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true); + + // Record the old value. SDValue Old = V; // Merge this node's mask and our incoming mask (adjusted to account for all @@ -19430,12 +19434,13 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), getV4X86ShuffleImm8ForMask(Mask, DAG)); - // Replace N with its operand as we're going to combine that shuffle away. - DAG.ReplaceAllUsesWith(N, N.getOperand(0)); + // Check that the shuffles didn't cancel each other out. If not, we need to + // combine to the new one. + if (Old != V) + // Replace the combinable shuffle with the combined one, updating all users + // so that we re-evaluate the chain here. + DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); - // Replace the combinable shuffle with the combined one, updating all users - // so that we re-evaluate the chain here. - DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); return true; } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 1dc744af47d4..ffe537fc351f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -671,3 +671,28 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) { %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %shuffle } + +define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { +; FIXME-SSE2-LABEL: @shuffle_v8i16_XX4X8acX +; FIXME-SSE2: # BB#0: +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7] +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] +; FIXME-SSE2-NEXT: punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,1,0,2,4,5,6,7] +; FIXME-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1] +; FIXME-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,1,3,4,5,6,7] +; FIXME-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,7] +; FIXME-SSE2-NEXT: retq +; +; FIXME-SSSE3-LABEL: @shuffle_v8i16_XX4X8acX +; FIXME-SSSE3: # BB#0: +; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3] +; FIXME-SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,5,7,6,7] +; FIXME-SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3] +; FIXME-SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; FIXME-SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,{{[0-9]+,[0-9]+}},2,3,6,7,10,11{{[0-9]+,[0-9]+}}] +; FIXME-SSSE3-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +}