forked from OSchip/llvm-project
[DAGCombiner] Add vector support for (srl (trunc (srl x, c1)), c2) combine.
llvm-svn: 301305
This commit is contained in:
parent
1606fc0bf9
commit
7d65b66962
|
@ -5615,23 +5615,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
|
|||
|
||||
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
|
||||
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
|
||||
N0.getOperand(0).getOpcode() == ISD::SRL &&
|
||||
isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
|
||||
uint64_t c1 = N0.getOperand(0).getConstantOperandVal(1);
|
||||
uint64_t c2 = N1C->getZExtValue();
|
||||
EVT InnerShiftVT = N0.getOperand(0).getValueType();
|
||||
EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
|
||||
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
|
||||
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
|
||||
if (c1 + OpSizeInBits == InnerShiftSize) {
|
||||
SDLoc DL(N0);
|
||||
if (c1 + c2 >= InnerShiftSize)
|
||||
return DAG.getConstant(0, DL, VT);
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, VT,
|
||||
DAG.getNode(ISD::SRL, DL, InnerShiftVT,
|
||||
N0.getOperand(0)->getOperand(0),
|
||||
DAG.getConstant(c1 + c2, DL,
|
||||
ShiftCountVT)));
|
||||
N0.getOperand(0).getOpcode() == ISD::SRL) {
|
||||
if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
|
||||
uint64_t c1 = N001C->getZExtValue();
|
||||
uint64_t c2 = N1C->getZExtValue();
|
||||
EVT InnerShiftVT = N0.getOperand(0).getValueType();
|
||||
EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
|
||||
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
|
||||
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
|
||||
if (c1 + OpSizeInBits == InnerShiftSize) {
|
||||
SDLoc DL(N0);
|
||||
if (c1 + c2 >= InnerShiftSize)
|
||||
return DAG.getConstant(0, DL, VT);
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, VT,
|
||||
DAG.getNode(ISD::SRL, DL, InnerShiftVT,
|
||||
N0.getOperand(0).getOperand(0),
|
||||
DAG.getConstant(c1 + c2, DL,
|
||||
ShiftCountVT)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -223,18 +223,17 @@ define <4 x i32> @combine_vec_lshr_lshr_zero1(<4 x i32> %x) {
|
|||
define <4 x i32> @combine_vec_lshr_trunc_lshr0(<4 x i64> %x) {
|
||||
; SSE-LABEL: combine_vec_lshr_trunc_lshr0:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: psrlq $32, %xmm1
|
||||
; SSE-NEXT: psrlq $32, %xmm0
|
||||
; SSE-NEXT: psrlq $48, %xmm1
|
||||
; SSE-NEXT: psrlq $48, %xmm0
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||
; SSE-NEXT: psrld $16, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_lshr_trunc_lshr0:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpsrlq $32, %ymm0, %ymm0
|
||||
; AVX-NEXT: vpsrlq $48, %ymm0, %ymm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%1 = lshr <4 x i64> %x, <i64 32, i64 32, i64 32, i64 32>
|
||||
|
|
Loading…
Reference in New Issue