[DAGCombiner] Add vector support for (srl (trunc (srl x, c1)), c2) combine.

llvm-svn: 301305
This commit is contained in:
Simon Pilgrim 2017-04-25 12:40:45 +00:00
parent 1606fc0bf9
commit 7d65b66962
2 changed files with 22 additions and 22 deletions

View File

@ -5615,23 +5615,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
uint64_t c1 = N0.getOperand(0).getConstantOperandVal(1);
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
SDLoc DL(N0);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
return DAG.getNode(ISD::TRUNCATE, DL, VT,
DAG.getNode(ISD::SRL, DL, InnerShiftVT,
N0.getOperand(0)->getOperand(0),
DAG.getConstant(c1 + c2, DL,
ShiftCountVT)));
N0.getOperand(0).getOpcode() == ISD::SRL) {
if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
uint64_t c1 = N001C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
SDLoc DL(N0);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
return DAG.getNode(ISD::TRUNCATE, DL, VT,
DAG.getNode(ISD::SRL, DL, InnerShiftVT,
N0.getOperand(0).getOperand(0),
DAG.getConstant(c1 + c2, DL,
ShiftCountVT)));
}
}
}

View File

@ -223,18 +223,17 @@ define <4 x i32> @combine_vec_lshr_lshr_zero1(<4 x i32> %x) {
define <4 x i32> @combine_vec_lshr_trunc_lshr0(<4 x i64> %x) {
; SSE-LABEL: combine_vec_lshr_trunc_lshr0:
; SSE: # BB#0:
; SSE-NEXT: psrlq $32, %xmm1
; SSE-NEXT: psrlq $32, %xmm0
; SSE-NEXT: psrlq $48, %xmm1
; SSE-NEXT: psrlq $48, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE-NEXT: psrld $16, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_lshr_trunc_lshr0:
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX-NEXT: vpsrlq $48, %ymm0, %ymm0
; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = lshr <4 x i64> %x, <i64 32, i64 32, i64 32, i64 32>