[X86][SSE] Add SimplifyDemandedVectorElts support for SSE splat-vector-shifts.

SSE vector shifts only use the bottom 64-bits of the shift amount vector. llvm-svn: 347173
2018-11-18 20:21:52 +00:00 · 2018-11-18 20:21:52 +00:00 · b31bdbd2e9
parent 11d50948e2
commit b31bdbd2e9
5 changed files with 59 additions and 33 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -32152,6 +32152,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(

  // Handle special case opcodes.
  switch (Opc) {
+  case X86ISD::VSHL:
+  case X86ISD::VSRL:
+  case X86ISD::VSRA: {
+    // We only need the bottom 64-bits of the (128-bit) shift amount.
+    SDValue Amt = Op.getOperand(1);
+    EVT AmtVT = Amt.getSimpleValueType();
+    assert(AmtVT.is128BitVector() && "Unexpected value type");
+    APInt AmtUndef, AmtZero;
+    int NumAmtElts = AmtVT.getVectorNumElements();
+    APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
+    if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
+                                   Depth + 1))
+      return true;
+    break;
+  }
  case X86ISD::VBROADCAST: {
    SDValue Src = Op.getOperand(0);
    MVT SrcVT = Src.getSimpleValueType();
@ -35269,6 +35284,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

+static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const X86Subtarget &Subtarget) {
+  assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
+          X86ISD::VSRL == N->getOpcode()) &&
+         "Unexpected shift opcode");
+  EVT VT = N->getValueType(0);
+
+  // Shift zero -> zero.
+  if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+    return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+
+  APInt KnownUndef, KnownZero;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+  if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+                                     KnownZero, DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
 static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const X86Subtarget &Subtarget) {
@ -40834,6 +40871,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget);
  case X86ISD::PACKSS:
  case X86ISD::PACKUS:      return combineVectorPack(N, DAG, DCI, Subtarget);
+  case X86ISD::VSHL:
+  case X86ISD::VSRA:
+  case X86ISD::VSRL:
+    return combineVectorShiftVar(N, DAG, DCI, Subtarget);
  case X86ISD::VSHLI:
  case X86ISD::VSRAI:
  case X86ISD::VSRLI:
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@ -664,34 +664,22 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE-LABEL: splatvar_rotate_v2i64:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [64,64]
-; SSE-NEXT:    psubq %xmm2, %xmm3
-; SSE-NEXT:    movdqa %xmm0, %xmm2
-; SSE-NEXT:    psllq %xmm1, %xmm2
-; SSE-NEXT:    psrlq %xmm3, %xmm0
-; SSE-NEXT:    por %xmm2, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [64,64]
+; SSE-NEXT:    psubq %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psllq %xmm1, %xmm3
+; SSE-NEXT:    psrlq %xmm2, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: splatvar_rotate_v2i64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: splatvar_rotate_v2i64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    retq
+; AVX-LABEL: splatvar_rotate_v2i64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
+; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_rotate_v2i64:
 ; AVX512F:       # %bb.0:
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@ -521,12 +521,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ;
 ; AVX2-LABEL: splatvar_rotate_v4i64:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm2
-; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm2
 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vpsrlq %xmm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsubq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: splatvar_rotate_v4i64:
--- a/llvm/test/CodeGen/X86/vshift-1.ll
+++ b/llvm/test/CodeGen/X86/vshift-1.ll
@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
 ; X32-NEXT:    psllq %xmm1, %xmm0
 ; X32-NEXT:    movdqa %xmm0, (%eax)
 ; X32-NEXT:    retl
--- a/llvm/test/CodeGen/X86/vshift-2.ll
+++ b/llvm/test/CodeGen/X86/vshift-2.ll
@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
 ; X32-NEXT:    psrlq %xmm1, %xmm0
 ; X32-NEXT:    movdqa %xmm0, (%eax)
 ; X32-NEXT:    retl