forked from OSchip/llvm-project
[PowerPC] Exploit the rldicl + rldicl when and with mask
If we are and the constant like 0xFFFFFFC00000, for now, we are using several instructions to generate this 48bit constant and final an "and". However, we could exploit it with two rotate instructions. MB ME MB+63-ME +----------------------+ +----------------------+ |0000001111111111111000| -> |0000000001111111111111| +----------------------+ +----------------------+ 0 63 0 63 Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63), finally, rotate back. Notice that, we need to round it with 64 bit for the wrapping case. Reviewed by: ChenZheng, Nemanjai Differential Revision: https://reviews.llvm.org/D71831
This commit is contained in:
parent
5034df8600
commit
4bd186c0ff
|
@ -351,6 +351,7 @@ private:
|
|||
bool tryAsSingleRLWINM(SDNode *N);
|
||||
bool tryAsSingleRLWINM8(SDNode *N);
|
||||
bool tryAsSingleRLWIMI(SDNode *N);
|
||||
bool tryAsPairOfRLDICL(SDNode *N);
|
||||
|
||||
void PeepholePPC64();
|
||||
void PeepholePPC64ZExt();
|
||||
|
@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
|
||||
uint64_t Imm64;
|
||||
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
|
||||
return false;
|
||||
|
||||
// Do nothing if it is 16-bit imm as the pattern in the .td file handle
|
||||
// it well with "andi.".
|
||||
if (isUInt<16>(Imm64))
|
||||
return false;
|
||||
|
||||
SDLoc Loc(N);
|
||||
SDValue Val = N->getOperand(0);
|
||||
|
||||
// Optimized with two rldicl's as follows:
|
||||
// Add missing bits on left to the mask and check that the mask is a
|
||||
// wrapped run of ones, i.e.
|
||||
// Change pattern |0001111100000011111111|
|
||||
// to |1111111100000011111111|.
|
||||
unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
|
||||
if (NumOfLeadingZeros != 0)
|
||||
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
|
||||
|
||||
unsigned MB, ME;
|
||||
if (!isRunOfOnes64(Imm64, MB, ME))
|
||||
return false;
|
||||
|
||||
// ME MB MB-ME+63
|
||||
// +----------------------+ +----------------------+
|
||||
// |1111111100000011111111| -> |0000001111111111111111|
|
||||
// +----------------------+ +----------------------+
|
||||
// 0 63 0 63
|
||||
// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
|
||||
unsigned OnesOnLeft = ME + 1;
|
||||
unsigned ZerosInBetween = (MB - ME + 63) & 63;
|
||||
// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
|
||||
// on the left the bits that are already zeros in the mask.
|
||||
Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
|
||||
getI64Imm(OnesOnLeft, Loc),
|
||||
getI64Imm(ZerosInBetween, Loc)),
|
||||
0);
|
||||
// MB-ME+63 ME MB
|
||||
// +----------------------+ +----------------------+
|
||||
// |0000001111111111111111| -> |0001111100000011111111|
|
||||
// +----------------------+ +----------------------+
|
||||
// 0 63 0 63
|
||||
// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
|
||||
// left the number of ones we previously added.
|
||||
SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
|
||||
getI64Imm(NumOfLeadingZeros, Loc)};
|
||||
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
|
||||
unsigned Imm;
|
||||
|
@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
|
|||
case ISD::AND:
|
||||
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
|
||||
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
|
||||
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
|
||||
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
|
||||
return;
|
||||
|
||||
// Other cases are autogenerated.
|
||||
|
|
|
@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
|
|||
%typ = type { i32, i32 }
|
||||
|
||||
; On release builds, it doesn't crash, spewing nonsense instead.
|
||||
; To make sure it works, check that and is still alive.
|
||||
; CHECK: and
|
||||
; To make sure it works, check that rldicl is still alive.
|
||||
; CHECK: rldicl
|
||||
; Also, in release, it emits a COPY from a 32-bit register to
|
||||
; a 64-bit register, which happens to be emitted as cror [!]
|
||||
; by the confused CodeGen. Just to be sure, check there isn't one.
|
||||
|
|
|
@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
|
|||
; PPC64-LINUX-LABEL: f1
|
||||
; PPC64-LINUX: std 31, -8(1)
|
||||
; PPC64-LINUX-NEXT: stdu 1, -64(1)
|
||||
; PPC64-LINUX-NEXT: lis 4, 32767
|
||||
; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
|
||||
; PPC64-LINUX-NEXT: ori 4, 4, 65535
|
||||
; PPC64-LINUX-NEXT: addi 3, 3, 15
|
||||
; PPC64-LINUX-NEXT: sldi 4, 4, 4
|
||||
; PPC64-LINUX-NEXT: mr 31, 1
|
||||
; PPC64-LINUX-NEXT: and 3, 3, 4
|
||||
; PPC64-LINUX-NEXT: neg 3, 3
|
||||
; PPC64-LINUX-NEXT: addi 3, 3, 15
|
||||
; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
|
||||
; PPC64-LINUX-NEXT: addi 4, 31, 64
|
||||
; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
|
||||
; PPC64-LINUX-NEXT: neg 3, 3
|
||||
; PPC64-LINUX-NEXT: stdux 4, 1, 3
|
||||
|
||||
; The linkage area is always put on the top of the stack.
|
||||
|
@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
|
|||
; PPC64-AIX-LABEL: f1
|
||||
; PPC64-AIX: std 31, -8(1)
|
||||
; PPC64-AIX-NEXT: stdu 1, -64(1)
|
||||
; PPC64-AIX-NEXT: lis 4, 32767
|
||||
; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
|
||||
; PPC64-AIX-NEXT: ori 4, 4, 65535
|
||||
; PPC64-AIX-NEXT: addi 3, 3, 15
|
||||
; PPC64-AIX-NEXT: sldi 4, 4, 4
|
||||
; PPC64-AIX-NEXT: mr 31, 1
|
||||
; PPC64-AIX-NEXT: and 3, 3, 4
|
||||
; PPC64-AIX-NEXT: addi 3, 3, 15
|
||||
; PPC64-AIX-NEXT: addi 4, 31, 64
|
||||
; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4
|
||||
; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
|
||||
; PPC64-AIX-NEXT: neg 3, 3
|
||||
; PPC64-AIX-NEXT: stdux 4, 1, 3
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
|
|||
define i64 @test2(i64 %a) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, -7
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 61, 2
|
||||
; CHECK-NEXT: rotldi 3, 3, 3
|
||||
; CHECK-NEXT: blr
|
||||
%and = and i64 %a, -7
|
||||
ret i64 %and
|
||||
|
@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
|
|||
define i64 @test3(i64 %a) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lis 4, 1023
|
||||
; CHECK-NEXT: ori 4, 4, 65535
|
||||
; CHECK-NEXT: sldi 4, 4, 22
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 42, 22
|
||||
; CHECK-NEXT: rldicl 3, 3, 22, 16
|
||||
; CHECK-NEXT: blr
|
||||
%and = and i64 %a, 281474972516352
|
||||
ret i64 %and
|
||||
|
@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
|
|||
define i64 @test4(i64 %a) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, 12
|
||||
; CHECK-NEXT: sldi 4, 4, 32
|
||||
; CHECK-NEXT: ori 4, 4, 255
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 30, 26
|
||||
; CHECK-NEXT: rldicl 3, 3, 34, 28
|
||||
; CHECK-NEXT: blr
|
||||
%and = and i64 %a, 51539607807
|
||||
ret i64 %and
|
||||
|
@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
|
|||
define i64 @test5(i64 %a) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, 0
|
||||
; CHECK-NEXT: oris 4, 4, 65472
|
||||
; CHECK-NEXT: ori 4, 4, 65535
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 42, 6
|
||||
; CHECK-NEXT: rldicl 3, 3, 22, 32
|
||||
; CHECK-NEXT: blr
|
||||
%and = and i64 %a, 4290838527
|
||||
ret i64 %and
|
||||
|
@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
|
|||
define i64 @test7(i64 %a) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, -32767
|
||||
; CHECK-NEXT: sldi 4, 4, 32
|
||||
; CHECK-NEXT: oris 4, 4, 65024
|
||||
; CHECK-NEXT: rldicr 4, 4, 17, 63
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 22, 25
|
||||
; CHECK-NEXT: rldicl 3, 3, 42, 14
|
||||
; CHECK-NEXT: blr
|
||||
%and = and i64 %a, 1121501860462591
|
||||
ret i64 %and
|
||||
|
|
|
@ -123,11 +123,9 @@ entry:
|
|||
ret i32 %or55
|
||||
|
||||
; CHECK-LABEL: @test32p1
|
||||
; CHECK: li [[REG1:[0-9]+]], 0
|
||||
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
|
||||
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
|
||||
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5
|
||||
; CHECK: rldicl 3, [[REG2]], 24, 32
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
@ -147,11 +145,9 @@ entry:
|
|||
ret i32 %or37
|
||||
|
||||
; CHECK-LABEL: @test32p2
|
||||
; CHECK: li [[REG1:[0-9]+]], 0
|
||||
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
|
||||
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
|
||||
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
|
||||
; CHECK: and 3, [[REG4]], [[REG3]]
|
||||
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
|
||||
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8
|
||||
; CHECK: rldicl 3, [[REG2]], 24, 32
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
|
|
@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
|
|||
define i1 @or_icmps_const_1bit_diff(i64 %x) {
|
||||
; CHECK-LABEL: or_icmps_const_1bit_diff:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li 4, -5
|
||||
; CHECK-NEXT: addi 3, 3, -13
|
||||
; CHECK-NEXT: and 3, 3, 4
|
||||
; CHECK-NEXT: rldicl 3, 3, 61, 1
|
||||
; CHECK-NEXT: rotldi 3, 3, 3
|
||||
; CHECK-NEXT: cntlzd 3, 3
|
||||
; CHECK-NEXT: rldicl 3, 3, 58, 63
|
||||
; CHECK-NEXT: blr
|
||||
|
|
Loading…
Reference in New Issue