[PowerPC] Exploit the rldicl + rldicl when and with mask

If we are and the constant like 0xFFFFFFC00000, for now, we are using several
instructions to generate this 48bit constant and final an "and". However, we
could exploit it with two rotate instructions.

       MB          ME               MB+63-ME
+----------------------+     +----------------------+
|0000001111111111111000| ->  |0000000001111111111111|
+----------------------+     +----------------------+
 0                    63      0                    63
Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63),
finally, rotate back. Notice that, we need to round it with 64 bit for the
wrapping case.

Reviewed by: ChenZheng, Nemanjai

Differential Revision: https://reviews.llvm.org/D71831
This commit is contained in:
QingShan Zhang 2020-04-17 05:24:00 +00:00
parent 5034df8600
commit 4bd186c0ff
6 changed files with 83 additions and 45 deletions

View File

@ -351,6 +351,7 @@ private:
bool tryAsSingleRLWINM(SDNode *N);
bool tryAsSingleRLWINM8(SDNode *N);
bool tryAsSingleRLWIMI(SDNode *N);
bool tryAsPairOfRLDICL(SDNode *N);
void PeepholePPC64();
void PeepholePPC64ZExt();
@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
return false;
}
bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
uint64_t Imm64;
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
return false;
// Do nothing if it is 16-bit imm as the pattern in the .td file handle
// it well with "andi.".
if (isUInt<16>(Imm64))
return false;
SDLoc Loc(N);
SDValue Val = N->getOperand(0);
// Optimized with two rldicl's as follows:
// Add missing bits on left to the mask and check that the mask is a
// wrapped run of ones, i.e.
// Change pattern |0001111100000011111111|
// to |1111111100000011111111|.
unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
if (NumOfLeadingZeros != 0)
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
unsigned MB, ME;
if (!isRunOfOnes64(Imm64, MB, ME))
return false;
// ME MB MB-ME+63
// +----------------------+ +----------------------+
// |1111111100000011111111| -> |0000001111111111111111|
// +----------------------+ +----------------------+
// 0 63 0 63
// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
unsigned OnesOnLeft = ME + 1;
unsigned ZerosInBetween = (MB - ME + 63) & 63;
// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
// on the left the bits that are already zeros in the mask.
Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
getI64Imm(OnesOnLeft, Loc),
getI64Imm(ZerosInBetween, Loc)),
0);
// MB-ME+63 ME MB
// +----------------------+ +----------------------+
// |0000001111111111111111| -> |0001111100000011111111|
// +----------------------+ +----------------------+
// 0 63 0 63
// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
// left the number of ones we previously added.
SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
getI64Imm(NumOfLeadingZeros, Loc)};
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
return true;
}
bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
return;
// Other cases are autogenerated.

View File

@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
%typ = type { i32, i32 }
; On release builds, it doesn't crash, spewing nonsense instead.
; To make sure it works, check that and is still alive.
; CHECK: and
; To make sure it works, check that rldicl is still alive.
; CHECK: rldicl
; Also, in release, it emits a COPY from a 32-bit register to
; a 64-bit register, which happens to be emitted as cror [!]
; by the confused CodeGen. Just to be sure, check there isn't one.

View File

@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
; PPC64-LINUX-LABEL: f1
; PPC64-LINUX: std 31, -8(1)
; PPC64-LINUX-NEXT: stdu 1, -64(1)
; PPC64-LINUX-NEXT: lis 4, 32767
; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
; PPC64-LINUX-NEXT: ori 4, 4, 65535
; PPC64-LINUX-NEXT: addi 3, 3, 15
; PPC64-LINUX-NEXT: sldi 4, 4, 4
; PPC64-LINUX-NEXT: mr 31, 1
; PPC64-LINUX-NEXT: and 3, 3, 4
; PPC64-LINUX-NEXT: neg 3, 3
; PPC64-LINUX-NEXT: addi 3, 3, 15
; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
; PPC64-LINUX-NEXT: addi 4, 31, 64
; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
; PPC64-LINUX-NEXT: neg 3, 3
; PPC64-LINUX-NEXT: stdux 4, 1, 3
; The linkage area is always put on the top of the stack.
@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
; PPC64-AIX-LABEL: f1
; PPC64-AIX: std 31, -8(1)
; PPC64-AIX-NEXT: stdu 1, -64(1)
; PPC64-AIX-NEXT: lis 4, 32767
; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
; PPC64-AIX-NEXT: ori 4, 4, 65535
; PPC64-AIX-NEXT: addi 3, 3, 15
; PPC64-AIX-NEXT: sldi 4, 4, 4
; PPC64-AIX-NEXT: mr 31, 1
; PPC64-AIX-NEXT: and 3, 3, 4
; PPC64-AIX-NEXT: addi 3, 3, 15
; PPC64-AIX-NEXT: addi 4, 31, 64
; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4
; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
; PPC64-AIX-NEXT: neg 3, 3
; PPC64-AIX-NEXT: stdux 4, 1, 3

View File

@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
define i64 @test2(i64 %a) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -7
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 61, 2
; CHECK-NEXT: rotldi 3, 3, 3
; CHECK-NEXT: blr
%and = and i64 %a, -7
ret i64 %and
@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
define i64 @test3(i64 %a) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: lis 4, 1023
; CHECK-NEXT: ori 4, 4, 65535
; CHECK-NEXT: sldi 4, 4, 22
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 42, 22
; CHECK-NEXT: rldicl 3, 3, 22, 16
; CHECK-NEXT: blr
%and = and i64 %a, 281474972516352
ret i64 %and
@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
define i64 @test4(i64 %a) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, 12
; CHECK-NEXT: sldi 4, 4, 32
; CHECK-NEXT: ori 4, 4, 255
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 30, 26
; CHECK-NEXT: rldicl 3, 3, 34, 28
; CHECK-NEXT: blr
%and = and i64 %a, 51539607807
ret i64 %and
@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
define i64 @test5(i64 %a) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, 0
; CHECK-NEXT: oris 4, 4, 65472
; CHECK-NEXT: ori 4, 4, 65535
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 42, 6
; CHECK-NEXT: rldicl 3, 3, 22, 32
; CHECK-NEXT: blr
%and = and i64 %a, 4290838527
ret i64 %and
@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
define i64 @test7(i64 %a) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -32767
; CHECK-NEXT: sldi 4, 4, 32
; CHECK-NEXT: oris 4, 4, 65024
; CHECK-NEXT: rldicr 4, 4, 17, 63
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 22, 25
; CHECK-NEXT: rldicl 3, 3, 42, 14
; CHECK-NEXT: blr
%and = and i64 %a, 1121501860462591
ret i64 %and

View File

@ -123,11 +123,9 @@ entry:
ret i32 %or55
; CHECK-LABEL: @test32p1
; CHECK: li [[REG1:[0-9]+]], 0
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
; CHECK: and 3, [[REG4]], [[REG3]]
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5
; CHECK: rldicl 3, [[REG2]], 24, 32
; CHECK: blr
}
@ -147,11 +145,9 @@ entry:
ret i32 %or37
; CHECK-LABEL: @test32p2
; CHECK: li [[REG1:[0-9]+]], 0
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
; CHECK: and 3, [[REG4]], [[REG3]]
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8
; CHECK: rldicl 3, [[REG2]], 24, 32
; CHECK: blr
}

View File

@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
define i1 @or_icmps_const_1bit_diff(i64 %x) {
; CHECK-LABEL: or_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -5
; CHECK-NEXT: addi 3, 3, -13
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 61, 1
; CHECK-NEXT: rotldi 3, 3, 3
; CHECK-NEXT: cntlzd 3, 3
; CHECK-NEXT: rldicl 3, 3, 58, 63
; CHECK-NEXT: blr