forked from OSchip/llvm-project
[DAGCombiner] visitRotate patch to optimize pair of ROTR/ROTL instructions into one with combined shift operand.
For two ROTR operations with shifts C1, C2; combined shift operand will be (C1 + C2) % bitsize. Differential revision: https://reviews.llvm.org/D12833 llvm-svn: 307179
This commit is contained in:
parent
d7faa9165f
commit
45d192823e
|
@ -5279,6 +5279,25 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
|
|||
if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
|
||||
return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
|
||||
}
|
||||
|
||||
unsigned NextOp = N0.getOpcode();
|
||||
// fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
|
||||
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR)
|
||||
if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1))
|
||||
if (SDNode *C2 =
|
||||
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
|
||||
bool SameSide = (N->getOpcode() == NextOp);
|
||||
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
|
||||
if (SDValue CombinedShift =
|
||||
DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) {
|
||||
unsigned Bitsize = VT.getScalarSizeInBits();
|
||||
SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT);
|
||||
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
|
||||
ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode());
|
||||
return DAG.getNode(
|
||||
N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm);
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -3,8 +3,7 @@
|
|||
; rotr (rotr x, 4), 6 -> rotr x, 10 -> ror r0, r0, #10
|
||||
define i32 @test1(i32 %x) nounwind readnone {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: ror r0, r0, #4
|
||||
; CHECK: ror r0, r0, #6
|
||||
; CHECK: ror r0, r0, #10
|
||||
; CHECK: bx lr
|
||||
entry:
|
||||
%high_part.i = shl i32 %x, 28
|
||||
|
@ -19,10 +18,8 @@ entry:
|
|||
; the same vector test
|
||||
define <2 x i32> @test2(<2 x i32> %x) nounwind readnone {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: ror r0, r0, #4
|
||||
; CHECK: ror r1, r1, #4
|
||||
; CHECK: ror r0, r0, #6
|
||||
; CHECK: ror r1, r1, #6
|
||||
; CHECK: ror r0, r0, #10
|
||||
; CHECK: ror r1, r1, #10
|
||||
; CHECK: bx lr
|
||||
entry:
|
||||
%high_part.i = shl <2 x i32> %x, <i32 28, i32 28>
|
||||
|
|
|
@ -35,8 +35,7 @@ define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) {
|
|||
define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) {
|
||||
; XOP-LABEL: combine_vec_rot_rot_splat:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vprotd $29, %xmm0, %xmm0
|
||||
; XOP-NEXT: vprotd $10, %xmm0, %xmm0
|
||||
; XOP-NEXT: vprotd $7, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: combine_vec_rot_rot_splat:
|
||||
|
@ -60,8 +59,7 @@ define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) {
|
|||
define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) {
|
||||
; XOP-LABEL: combine_vec_rot_rot_splat_zero:
|
||||
; XOP: # BB#0:
|
||||
; XOP-NEXT: vprotd $31, %xmm0, %xmm0
|
||||
; XOP-NEXT: vprotd $1, %xmm0, %xmm0
|
||||
; XOP-NEXT: vprotd $0, %xmm0, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: combine_vec_rot_rot_splat_zero:
|
||||
|
|
Loading…
Reference in New Issue