From fc33e1d99b40936321cbfb9a8c8e4841da9351aa Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 17 May 2013 14:48:34 +0000 Subject: [PATCH] X86: Make shuffle -> shift conversion more aggressive about undefs. Shuffles that only move an element into position 0 of the vector are common in the output of the loop vectorizer and often generate suboptimal code when SSSE3 is not available. Lower them to vector shifts if possible. We still prefer palignr over psrldq because it has higher throughput on sandybridge. llvm-svn: 182102 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 46 +++++++++++++++---------- llvm/test/CodeGen/X86/avx-vpermil.ll | 2 +- llvm/test/CodeGen/X86/vec_insert-5.ll | 26 +++++++++++++- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 45fe69aa2904..8a7faf60e63c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4756,19 +4756,27 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, /// getNumOfConsecutiveZeros - Return the number of elements of a vector /// shuffle operation which come from a consecutively from a zero. The /// search can start in two different directions, from left or right. -static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, - bool ZerosFromLeft, SelectionDAG &DAG) { - unsigned i; - for (i = 0; i != NumElems; ++i) { - unsigned Index = ZerosFromLeft ? i : NumElems-i-1; +/// We count undefs as zeros until PreferredNum is reached. +static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, + unsigned NumElems, bool ZerosFromLeft, + SelectionDAG &DAG, + unsigned PreferredNum = -1U) { + unsigned NumZeros = 0; + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Index = ZerosFromLeft ? i : NumElems - i - 1; SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); - if (!(Elt.getNode() && - (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) + if (!Elt.getNode()) + break; + + if (X86::isZeroNode(Elt)) + ++NumZeros; + else if (Elt.getOpcode() == ISD::UNDEF) // Undef as zero up to PreferredNum. + NumZeros = std::min(NumZeros + 1, PreferredNum); + else break; } - return i; + return NumZeros; } /// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) @@ -4806,8 +4814,9 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, - false /* check zeros from right */, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros( + SVOp, NumElems, false /* check zeros from right */, DAG, + SVOp->getMaskElt(0)); unsigned OpSrc; if (!NumZeros) @@ -4839,8 +4848,9 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, - true /* check zeros from left */, DAG); + unsigned NumZeros = getNumOfConsecutiveZeros( + SVOp, NumElems, true /* check zeros from left */, DAG, + NumElems - SVOp->getMaskElt(NumElems - 1) - 1); unsigned OpSrc; if (!NumZeros) @@ -6871,6 +6881,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { TargetMask, DAG); } + if (isPALIGNRMask(M, VT, Subtarget)) + return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, + getShufflePALIGNRImmediate(SVOp), + DAG); + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; @@ -6988,11 +7003,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - if (isPALIGNRMask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2, - getShufflePALIGNRImmediate(SVOp), - DAG); - if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64 || VT == MVT::v2i64) diff --git a/llvm/test/CodeGen/X86/avx-vpermil.ll b/llvm/test/CodeGen/X86/avx-vpermil.ll index 7f2f9d821dd5..b7f8d72e58c9 100644 --- a/llvm/test/CodeGen/X86/avx-vpermil.ll +++ b/llvm/test/CodeGen/X86/avx-vpermil.ll @@ -46,7 +46,7 @@ entry: } ; CHECK: palignr $8 -; CHECK: psrldq $8 +; CHECK: palignr $8 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp { entry: %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll index 9024216af1ad..bd4a06d500af 100644 --- a/llvm/test/CodeGen/X86/vec_insert-5.ll +++ b/llvm/test/CodeGen/X86/vec_insert-5.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s ; There are no MMX operations in @t1 define void @t1(i32 %a, x86_mmx* %P) nounwind { @@ -41,3 +41,27 @@ define <4 x float> @t4(<4 x float>* %P) nounwind { ; CHECK: t4: ; CHECK: psrldq $12 } + +define <16 x i8> @t5(<16 x i8> %x) nounwind { + %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %s + +; CHECK: t5: +; CHECK: psrldq $1 +} + +define <16 x i8> @t6(<16 x i8> %x) nounwind { + %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> + ret <16 x i8> %s + +; CHECK: t6: +; CHECK: palignr $1 +} + +define <16 x i8> @t7(<16 x i8> %x) nounwind { + %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> + ret <16 x i8> %s + +; CHECK: t7: +; CHECK: pslldq $13 +}