From 45d192823ec0bc5ea40cd6af8e1ac00393cc0e21 Mon Sep 17 00:00:00 2001 From: Andrew Zhogin Date: Wed, 5 Jul 2017 17:55:42 +0000 Subject: [PATCH] [DAGCombiner] visitRotate patch to optimize pair of ROTR/ROTL instructions into one with combined shift operand. For two ROTR operations with shifts C1, C2; combined shift operand will be (C1 + C2) % bitsize. Differential revision: https://reviews.llvm.org/D12833 llvm-svn: 307179 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 +++++++++++++++++++ llvm/test/CodeGen/ARM/ror.ll | 9 +++------ llvm/test/CodeGen/X86/combine-rotates.ll | 6 ++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e40788eea1de..6fb5e3996e29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5279,6 +5279,25 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) + if (SDNode *C2 = + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) { + unsigned Bitsize = VT.getScalarSizeInBits(); + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode()); + return DAG.getNode( + N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); + } + } return SDValue(); } diff --git a/llvm/test/CodeGen/ARM/ror.ll b/llvm/test/CodeGen/ARM/ror.ll index a6cc415e8303..0f699a8dd29d 100644 --- a/llvm/test/CodeGen/ARM/ror.ll +++ b/llvm/test/CodeGen/ARM/ror.ll @@ -3,8 +3,7 @@ ; rotr (rotr x, 4), 6 -> rotr x, 10 -> ror r0, r0, #10 define i32 @test1(i32 %x) nounwind readnone { ; CHECK-LABEL: test1: -; CHECK: ror r0, r0, #4 -; CHECK: ror r0, r0, #6 +; CHECK: ror r0, r0, #10 ; CHECK: bx lr entry: %high_part.i = shl i32 %x, 28 @@ -19,10 +18,8 @@ entry: ; the same vector test define <2 x i32> @test2(<2 x i32> %x) nounwind readnone { ; CHECK-LABEL: test2: -; CHECK: ror r0, r0, #4 -; CHECK: ror r1, r1, #4 -; CHECK: ror r0, r0, #6 -; CHECK: ror r1, r1, #6 +; CHECK: ror r0, r0, #10 +; CHECK: ror r1, r1, #10 ; CHECK: bx lr entry: %high_part.i = shl <2 x i32> %x, diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll index 46a8b68bc087..1bce1cff326e 100644 --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -35,8 +35,7 @@ define <4 x i32> @combine_vec_rot_rot(<4 x i32> %x) { define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) { ; XOP-LABEL: combine_vec_rot_rot_splat: ; XOP: # BB#0: -; XOP-NEXT: vprotd $29, %xmm0, %xmm0 -; XOP-NEXT: vprotd $10, %xmm0, %xmm0 +; XOP-NEXT: vprotd $7, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: combine_vec_rot_rot_splat: @@ -60,8 +59,7 @@ define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) { define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) { ; XOP-LABEL: combine_vec_rot_rot_splat_zero: ; XOP: # BB#0: -; XOP-NEXT: vprotd $31, %xmm0, %xmm0 -; XOP-NEXT: vprotd $1, %xmm0, %xmm0 +; XOP-NEXT: vprotd $0, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; AVX512-LABEL: combine_vec_rot_rot_splat_zero: