[AArch64] Take advantage of variable shift/rotate amount implicit mod operation.

Summary:
Optimize code generated for variable shifts/rotates by taking advantage
of the implicit and/mod done on the variable shift amount register.

Resolves bug 27582 and bug 37421.

Reviewers: t.p.northover, qcolombet, MatzeB, javed.absar

Subscribers: rengolin, kristof.beyls, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D46844

llvm-svn: 333214
This commit is contained in:
Geoff Berry 2018-05-24 18:29:42 +00:00
parent 383fe5c866
commit 98150e3a62
3 changed files with 155 additions and 16 deletions

View File

@ -168,6 +168,7 @@ public:
bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);
bool tryBitfieldInsertInZeroOp(SDNode *N);
bool tryShiftAmountMod(SDNode *N);
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
@ -2441,6 +2442,111 @@ bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
return true;
}
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
/// variable shift/rotate instructions.
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned Opc;
switch (N->getOpcode()) {
case ISD::ROTR:
Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
break;
case ISD::SHL:
Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
break;
case ISD::SRL:
Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
break;
case ISD::SRA:
Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
break;
default:
return false;
}
uint64_t Size;
uint64_t Bits;
if (VT == MVT::i32) {
Bits = 5;
Size = 32;
} else if (VT == MVT::i64) {
Bits = 6;
Size = 64;
} else
return false;
SDValue ShiftAmt = N->getOperand(1);
SDLoc DL(N);
SDValue NewShiftAmt;
// Skip over an extend of the shift amount.
if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
ShiftAmt = ShiftAmt->getOperand(0);
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
SDValue Add0 = ShiftAmt->getOperand(0);
SDValue Add1 = ShiftAmt->getOperand(1);
uint64_t Add0Imm;
uint64_t Add1Imm;
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
// to avoid the ADD/SUB.
if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
NewShiftAmt = Add0;
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
// generate a NEG instead of a SUB of a constant.
else if (ShiftAmt->getOpcode() == ISD::SUB &&
isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
(Add0Imm % Size == 0)) {
unsigned NegOpc;
unsigned ZeroReg;
EVT SubVT = ShiftAmt->getValueType(0);
if (SubVT == MVT::i32) {
NegOpc = AArch64::SUBWrr;
ZeroReg = AArch64::WZR;
} else {
assert(SubVT == MVT::i64);
NegOpc = AArch64::SUBXrr;
ZeroReg = AArch64::XZR;
}
SDValue Zero =
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
MachineSDNode *Neg =
CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
NewShiftAmt = SDValue(Neg, 0);
} else
return false;
} else {
// If the shift amount is masked with an AND, check that the mask covers the
// bits that are implicitly ANDed off by the above opcodes and if so, skip
// the AND.
uint64_t MaskImm;
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
return false;
if (countTrailingOnes(MaskImm) < Bits)
return false;
NewShiftAmt = ShiftAmt->getOperand(0);
}
// Narrow/widen the shift amount to match the size of the shift operation.
if (VT == MVT::i32)
NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
MachineSDNode *Ext = CurDAG->getMachineNode(
AArch64::SUBREG_TO_REG, DL, VT,
CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
NewShiftAmt = SDValue(Ext, 0);
}
SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
return true;
}
bool
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
unsigned RegWidth) {
@ -2707,6 +2813,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
if (tryBitfieldInsertInZeroOp(Node))
return;
LLVM_FALLTHROUGH;
case ISD::ROTR:
case ISD::SHL:
if (tryShiftAmountMod(Node))
return;
break;
case ISD::SIGN_EXTEND:

View File

@ -2,19 +2,17 @@
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: shl:
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge
; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge
; CHECK: ret
%shl = shl i128 %r, %s
@ -23,20 +21,18 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone {
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: ashr:
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge
; CHECK: ret
%shr = ashr i128 %r, %s
@ -45,19 +41,16 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: lshr:
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
; CHECK: cmp x2, #0
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
; CHECK: cmp [[EXTRA_SHIFT]], #0
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge
; CHECK: ret
%shr = lshr i128 %r, %s

View File

@ -0,0 +1,35 @@
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
; Check that we optimize out AND instructions and ADD/SUB instructions
; modulo the shift size to take advantage of the implicit mod done on
; the shift amount value by the variable shift/rotate instructions.
define i32 @test1(i32 %x, i64 %y) {
; CHECK-LABEL: test1:
; CHECK-NOT: and
; CHECK: lsr
%sh_prom = trunc i64 %y to i32
%shr = lshr i32 %x, %sh_prom
ret i32 %shr
}
define i64 @test2(i32 %x, i64 %y) {
; CHECK-LABEL: test2:
; CHECK-NOT: orr
; CHECK-NOT: sub
; CHECK: neg
; CHECK: asr
%sub9 = sub nsw i32 64, %x
%sh_prom12.i = zext i32 %sub9 to i64
%shr.i = ashr i64 %y, %sh_prom12.i
ret i64 %shr.i
}
define i64 @test3(i64 %x, i64 %y) {
; CHECK-LABEL: test3:
; CHECK-NOT: add
; CHECK: lsl
%add = add nsw i64 64, %x
%shl = shl i64 %y, %add
ret i64 %shl
}