forked from OSchip/llvm-project
[AArch64] Take advantage of variable shift/rotate amount implicit mod operation.
Summary: Optimize code generated for variable shifts/rotates by taking advantage of the implicit and/mod done on the variable shift amount register. Resolves bug 27582 and bug 37421. Reviewers: t.p.northover, qcolombet, MatzeB, javed.absar Subscribers: rengolin, kristof.beyls, mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D46844 llvm-svn: 333214
This commit is contained in:
parent
383fe5c866
commit
98150e3a62
|
@ -168,6 +168,7 @@ public:
|
|||
bool tryBitfieldExtractOpFromSExt(SDNode *N);
|
||||
bool tryBitfieldInsertOp(SDNode *N);
|
||||
bool tryBitfieldInsertInZeroOp(SDNode *N);
|
||||
bool tryShiftAmountMod(SDNode *N);
|
||||
|
||||
bool tryReadRegister(SDNode *N);
|
||||
bool tryWriteRegister(SDNode *N);
|
||||
|
@ -2441,6 +2442,111 @@ bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
|
||||
/// variable shift/rotate instructions.
|
||||
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
unsigned Opc;
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::ROTR:
|
||||
Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
|
||||
break;
|
||||
case ISD::SHL:
|
||||
Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
|
||||
break;
|
||||
case ISD::SRL:
|
||||
Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
|
||||
break;
|
||||
case ISD::SRA:
|
||||
Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t Size;
|
||||
uint64_t Bits;
|
||||
if (VT == MVT::i32) {
|
||||
Bits = 5;
|
||||
Size = 32;
|
||||
} else if (VT == MVT::i64) {
|
||||
Bits = 6;
|
||||
Size = 64;
|
||||
} else
|
||||
return false;
|
||||
|
||||
SDValue ShiftAmt = N->getOperand(1);
|
||||
SDLoc DL(N);
|
||||
SDValue NewShiftAmt;
|
||||
|
||||
// Skip over an extend of the shift amount.
|
||||
if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
|
||||
ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
|
||||
ShiftAmt = ShiftAmt->getOperand(0);
|
||||
|
||||
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
|
||||
SDValue Add0 = ShiftAmt->getOperand(0);
|
||||
SDValue Add1 = ShiftAmt->getOperand(1);
|
||||
uint64_t Add0Imm;
|
||||
uint64_t Add1Imm;
|
||||
// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
|
||||
// to avoid the ADD/SUB.
|
||||
if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
|
||||
NewShiftAmt = Add0;
|
||||
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
|
||||
// generate a NEG instead of a SUB of a constant.
|
||||
else if (ShiftAmt->getOpcode() == ISD::SUB &&
|
||||
isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
|
||||
(Add0Imm % Size == 0)) {
|
||||
unsigned NegOpc;
|
||||
unsigned ZeroReg;
|
||||
EVT SubVT = ShiftAmt->getValueType(0);
|
||||
if (SubVT == MVT::i32) {
|
||||
NegOpc = AArch64::SUBWrr;
|
||||
ZeroReg = AArch64::WZR;
|
||||
} else {
|
||||
assert(SubVT == MVT::i64);
|
||||
NegOpc = AArch64::SUBXrr;
|
||||
ZeroReg = AArch64::XZR;
|
||||
}
|
||||
SDValue Zero =
|
||||
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
|
||||
MachineSDNode *Neg =
|
||||
CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
|
||||
NewShiftAmt = SDValue(Neg, 0);
|
||||
} else
|
||||
return false;
|
||||
} else {
|
||||
// If the shift amount is masked with an AND, check that the mask covers the
|
||||
// bits that are implicitly ANDed off by the above opcodes and if so, skip
|
||||
// the AND.
|
||||
uint64_t MaskImm;
|
||||
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
|
||||
return false;
|
||||
|
||||
if (countTrailingOnes(MaskImm) < Bits)
|
||||
return false;
|
||||
|
||||
NewShiftAmt = ShiftAmt->getOperand(0);
|
||||
}
|
||||
|
||||
// Narrow/widen the shift amount to match the size of the shift operation.
|
||||
if (VT == MVT::i32)
|
||||
NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
|
||||
else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
|
||||
SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
|
||||
MachineSDNode *Ext = CurDAG->getMachineNode(
|
||||
AArch64::SUBREG_TO_REG, DL, VT,
|
||||
CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
|
||||
NewShiftAmt = SDValue(Ext, 0);
|
||||
}
|
||||
|
||||
SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
|
||||
CurDAG->SelectNodeTo(N, Opc, VT, Ops);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
|
||||
unsigned RegWidth) {
|
||||
|
@ -2707,6 +2813,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
|||
return;
|
||||
if (tryBitfieldInsertInZeroOp(Node))
|
||||
return;
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::ROTR:
|
||||
case ISD::SHL:
|
||||
if (tryShiftAmountMod(Node))
|
||||
return;
|
||||
break;
|
||||
|
||||
case ISD::SIGN_EXTEND:
|
||||
|
|
|
@ -2,19 +2,17 @@
|
|||
|
||||
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: shl:
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
|
||||
; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
|
||||
; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
|
||||
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, x2
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
|
||||
; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge
|
||||
; CHECK: csel x0, xzr, [[HI_BIG_SHIFT]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shl = shl i128 %r, %s
|
||||
|
@ -23,20 +21,18 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone {
|
|||
|
||||
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: ashr:
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
|
||||
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
|
||||
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
|
||||
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
|
||||
; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
|
||||
; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
|
||||
; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shr = ashr i128 %r, %s
|
||||
|
@ -45,19 +41,16 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
|
|||
|
||||
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: lshr:
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
|
||||
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
|
||||
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
|
||||
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
|
||||
; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
|
||||
; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shr = lshr i128 %r, %s
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
|
||||
|
||||
; Check that we optimize out AND instructions and ADD/SUB instructions
|
||||
; modulo the shift size to take advantage of the implicit mod done on
|
||||
; the shift amount value by the variable shift/rotate instructions.
|
||||
|
||||
define i32 @test1(i32 %x, i64 %y) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK-NOT: and
|
||||
; CHECK: lsr
|
||||
%sh_prom = trunc i64 %y to i32
|
||||
%shr = lshr i32 %x, %sh_prom
|
||||
ret i32 %shr
|
||||
}
|
||||
|
||||
define i64 @test2(i32 %x, i64 %y) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK-NOT: orr
|
||||
; CHECK-NOT: sub
|
||||
; CHECK: neg
|
||||
; CHECK: asr
|
||||
%sub9 = sub nsw i32 64, %x
|
||||
%sh_prom12.i = zext i32 %sub9 to i64
|
||||
%shr.i = ashr i64 %y, %sh_prom12.i
|
||||
ret i64 %shr.i
|
||||
}
|
||||
|
||||
define i64 @test3(i64 %x, i64 %y) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK-NOT: add
|
||||
; CHECK: lsl
|
||||
%add = add nsw i64 64, %x
|
||||
%shl = shl i64 %y, %add
|
||||
ret i64 %shl
|
||||
}
|
Loading…
Reference in New Issue