forked from OSchip/llvm-project
[AArch64][GlobalISel] Fold G_SHL into TB(N)Z bit calculation
This implements the following optimization: ``` (tbz (shl x, c), b) -> (tbz x, b-c) ``` Which appears in `getTestBitOperand` in AArch64ISelLowering.cpp. If we test bit `b` of `shl x, c`, we can fold away the `shl` by looking `c` bits to the right of `b` in `x` when this fits in the type. So, we can just test the `b-c`th bit. Differential Revision: https://reviews.llvm.org/D73924
This commit is contained in:
parent
7d3aace3f5
commit
37910fd0e1
|
@ -991,7 +991,7 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
|
|||
}
|
||||
|
||||
/// Return a register which can be used as a bit to test in a TB(N)Z.
|
||||
static Register getTestBitReg(Register Reg, uint64_t Bit,
|
||||
static Register getTestBitReg(Register Reg, uint64_t &Bit,
|
||||
MachineRegisterInfo &MRI) {
|
||||
assert(Reg.isValid() && "Expected valid register!");
|
||||
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
|
||||
|
@ -1031,6 +1031,15 @@ static Register getTestBitReg(Register Reg, uint64_t Bit,
|
|||
}
|
||||
if (VRegAndVal)
|
||||
C = VRegAndVal->Value;
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_SHL: {
|
||||
TestReg = MI->getOperand(1).getReg();
|
||||
auto VRegAndVal =
|
||||
getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
|
||||
if (VRegAndVal)
|
||||
C = VRegAndVal->Value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1049,6 +1058,14 @@ static Register getTestBitReg(Register Reg, uint64_t Bit,
|
|||
if ((*C >> Bit) & 1)
|
||||
NextReg = TestReg;
|
||||
break;
|
||||
case TargetOpcode::G_SHL:
|
||||
// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
|
||||
// the type of the register.
|
||||
if (*C <= Bit && (Bit - *C) < MRI.getType(TestReg).getSizeInBits()) {
|
||||
NextReg = TestReg;
|
||||
Bit = Bit - *C;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if we found anything worth folding.
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z.
|
||||
...
|
||||
---
|
||||
name: fold_shl
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: fold_shl
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64all = COPY $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
|
||||
; CHECK: TBNZW [[COPY1]], 2, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s64) = COPY $x0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
|
||||
; tbnz (shl x, 1), 3 == tbnz x, 2
|
||||
%fold_cst:gpr(s64) = G_CONSTANT i64 1
|
||||
%fold_me:gpr(s64) = G_SHL %copy, %fold_cst
|
||||
|
||||
%and:gpr(s64) = G_AND %fold_me, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: dont_fold_shl_1
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: dont_fold_shl_1
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %fold_me:gpr64 = UBFMXri %copy, 59, 58
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s64) = COPY $x0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
|
||||
; 5 > 3, so we cannot do the transformation as above.
|
||||
%fold_cst:gpr(s64) = G_CONSTANT i64 5
|
||||
%fold_me:gpr(s64) = G_SHL %copy, %fold_cst
|
||||
|
||||
%and:gpr(s64) = G_AND %fold_me, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: dont_fold_shl_2
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: dont_fold_shl_2
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %copy:gpr64 = COPY $x0
|
||||
; CHECK: %fold_cst:gpr64 = MOVi64imm -5
|
||||
; CHECK: %fold_me:gpr64 = LSLVXr %copy, %fold_cst
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %fold_me
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
|
||||
; CHECK: TBNZW [[COPY2]], 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%copy:gpr(s64) = COPY $x0
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
|
||||
; Same case as above, except we wrap around.
|
||||
%fold_cst:gpr(s64) = G_CONSTANT i64 -5
|
||||
%fold_me:gpr(s64) = G_SHL %copy, %fold_cst
|
||||
|
||||
%and:gpr(s64) = G_AND %fold_me, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero
|
||||
%cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %cmp_trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
Loading…
Reference in New Issue