forked from OSchip/llvm-project
[AArch64][GlobalISel] Fold in G_ANYEXT/G_ZEXT into TB(N)Z
This is similar to the code in getTestBitOperand in AArch64ISelLowering. Instead of implementing all of the TB(N)Z optimizations at once, this patch implements the simplest case first. The way that this is set up should make it fairly easy to add the rest as we go along. The idea here is that after determining that we can use a TB(N)Z, we can continue looking through instructions and perform further folding. In this case, when we have a G_ZEXT or G_ANYEXT where the extended bits are not used, we can fold it into the TB(N)Z. Differential Revision: https://reviews.llvm.org/D73673
This commit is contained in:
parent
6170272ab9
commit
c8c987d310
|
@ -990,6 +990,27 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
|
|||
}
|
||||
}
|
||||
|
||||
/// Return a register which can be used as a bit to test in a TB(N)Z.
|
||||
static Register getTestBitReg(Register Reg, MachineRegisterInfo &MRI) {
|
||||
assert(Reg.isValid() && "Expected valid register!");
|
||||
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
Register NextReg;
|
||||
|
||||
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
|
||||
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT)
|
||||
NextReg = MI->getOperand(1).getReg();
|
||||
|
||||
// Did we find something worth folding?
|
||||
if (!NextReg.isValid() || !MRI.hasOneUse(NextReg))
|
||||
break;
|
||||
|
||||
// NextReg is worth folding. Keep looking.
|
||||
Reg = NextReg;
|
||||
}
|
||||
return Reg;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
|
||||
MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
|
||||
MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
|
||||
|
@ -1018,7 +1039,6 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
|
|||
return false;
|
||||
|
||||
MachineRegisterInfo &MRI = *MIB.getMRI();
|
||||
Register TestReg = AndInst->getOperand(1).getReg();
|
||||
|
||||
// Only support EQ and NE. If we have LT, then it *is* possible to fold, but
|
||||
// we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
|
||||
|
@ -1034,7 +1054,11 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
|
|||
getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
|
||||
if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
|
||||
return false;
|
||||
|
||||
// Try to optimize the TB(N)Z.
|
||||
uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
|
||||
Register TestReg = AndInst->getOperand(1).getReg();
|
||||
TestReg = getTestBitReg(TestReg, MRI);
|
||||
|
||||
// Choose the correct TB(N)Z opcode to use.
|
||||
unsigned Opc = 0;
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Check that we can continue matching when we are in a situation where we will
|
||||
# emit a TB(N)Z.
|
||||
...
|
||||
---
|
||||
name: fold_zext
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: fold_zext
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: TBNZW %copy, 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%fold_me:gpr(s64) = G_ZEXT %copy(s32)
|
||||
%and:gpr(s64) = G_AND %fold_me, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: fold_anyext
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: fold_anyext
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: TBNZW %copy, 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%fold_me:gpr(s64) = G_ANYEXT %copy(s32)
|
||||
%and:gpr(s64) = G_AND %fold_me, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: fold_multiple
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: fold_multiple
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $h0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
|
||||
; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
|
||||
; CHECK: TBNZW [[COPY]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $h0
|
||||
%copy:gpr(s16) = COPY $h0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%ext1:gpr(s32) = G_ZEXT %copy(s16)
|
||||
%ext2:gpr(s64) = G_ANYEXT %ext1(s32)
|
||||
%and:gpr(s64) = G_AND %ext2, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: dont_fold_more_than_one_use
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: dont_fold_more_than_one_use
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %copy:gpr32 = COPY $w0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32
|
||||
; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
|
||||
; CHECK: TBNZW %copy, 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: $x0 = COPY %zext
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s32) = COPY $w0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%zext:gpr(s64) = G_ZEXT %copy(s32)
|
||||
%and:gpr(s64) = G_AND %zext, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
$x0 = COPY %zext:gpr(s64)
|
||||
RET_ReallyLR implicit $x0
|
Loading…
Reference in New Issue