forked from OSchip/llvm-project
[X86][SSE] Add SimplifyDemandedVectorElts support for SSE splat-vector-shifts.
SSE vector shifts only use the bottom 64-bits of the shift amount vector. llvm-svn: 347173
This commit is contained in:
parent
11d50948e2
commit
b31bdbd2e9
|
@ -32152,6 +32152,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
|
||||
// Handle special case opcodes.
|
||||
switch (Opc) {
|
||||
case X86ISD::VSHL:
|
||||
case X86ISD::VSRL:
|
||||
case X86ISD::VSRA: {
|
||||
// We only need the bottom 64-bits of the (128-bit) shift amount.
|
||||
SDValue Amt = Op.getOperand(1);
|
||||
EVT AmtVT = Amt.getSimpleValueType();
|
||||
assert(AmtVT.is128BitVector() && "Unexpected value type");
|
||||
APInt AmtUndef, AmtZero;
|
||||
int NumAmtElts = AmtVT.getVectorNumElements();
|
||||
APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
|
||||
if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
|
||||
Depth + 1))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case X86ISD::VBROADCAST: {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
|
@ -35269,6 +35284,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
|
||||
X86ISD::VSRL == N->getOpcode()) &&
|
||||
"Unexpected shift opcode");
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Shift zero -> zero.
|
||||
if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
|
||||
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
|
||||
|
||||
APInt KnownUndef, KnownZero;
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
|
||||
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
|
||||
KnownZero, DCI))
|
||||
return SDValue(N, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
@ -40834,6 +40871,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::VSHL:
|
||||
case X86ISD::VSRA:
|
||||
case X86ISD::VSRL:
|
||||
return combineVectorShiftVar(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRAI:
|
||||
case X86ISD::VSRLI:
|
||||
|
|
|
@ -664,34 +664,22 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
|
|||
define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
||||
; SSE-LABEL: splatvar_rotate_v2i64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,64]
|
||||
; SSE-NEXT: psubq %xmm2, %xmm3
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE-NEXT: psllq %xmm1, %xmm2
|
||||
; SSE-NEXT: psrlq %xmm3, %xmm0
|
||||
; SSE-NEXT: por %xmm2, %xmm0
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64,64]
|
||||
; SSE-NEXT: psubq %xmm1, %xmm2
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: psllq %xmm1, %xmm3
|
||||
; SSE-NEXT: psrlq %xmm2, %xmm0
|
||||
; SSE-NEXT: por %xmm3, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: splatvar_rotate_v2i64:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatvar_rotate_v2i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: splatvar_rotate_v2i64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
|
||||
; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v2i64:
|
||||
; AVX512F: # %bb.0:
|
||||
|
|
|
@ -521,12 +521,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
|
|||
;
|
||||
; AVX2-LABEL: splatvar_rotate_v4i64:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm2
|
||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
|
||||
; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
|
||||
; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
|
||||
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: splatvar_rotate_v4i64:
|
||||
|
|
|
@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
|
|||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X32-NEXT: psllq %xmm1, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
|
|
|
@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
|
|||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X32-NEXT: psrlq %xmm1, %xmm0
|
||||
; X32-NEXT: movdqa %xmm0, (%eax)
|
||||
; X32-NEXT: retl
|
||||
|
|
Loading…
Reference in New Issue