[AArch64][GlobalISel] Implement selection support for the new G_JUMP_TABLE and G_BRJT ops.

With this we can now fully code generate jump tables, which is important for code size.

Differential Revision: https://reviews.llvm.org/D63223

llvm-svn: 364086
This commit is contained in:
Amara Emerson 2019-06-21 18:10:41 +00:00
parent fe4625fb24
commit 6e71b34fe6
4 changed files with 178 additions and 1 deletions

View File

@ -105,6 +105,9 @@ private:
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
@ -1159,6 +1162,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
case TargetOpcode::G_BRJT:
return selectBrJT(I, MRI);
case TargetOpcode::G_BSWAP: {
// Handle vector types for G_BSWAP directly.
unsigned DstReg = I.getOperand(0).getReg();
@ -2011,11 +2017,50 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return selectInsertElt(I, MRI);
case TargetOpcode::G_CONCAT_VECTORS:
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
}
return false;
}
bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
unsigned JTAddr = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
unsigned Index = I.getOperand(2).getReg();
MachineIRBuilder MIB(I);
unsigned TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
unsigned ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
{JTAddr, Index})
.addJumpTableIndex(JTI);
// Build the indirect branch.
MIB.buildInstr(AArch64::BR, {}, {TargetReg});
I.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &I, MachineRegisterInfo &MRI) const {
assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
unsigned DstReg = I.getOperand(0).getReg();
unsigned JTI = I.getOperand(1).getIndex();
// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
MachineIRBuilder MIB(I);
auto MovMI =
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
.addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
bool AArch64InstructionSelector::selectIntrinsicTrunc(
MachineInstr &I, MachineRegisterInfo &MRI) const {
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());

View File

@ -577,6 +577,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
getActionDefinitionsBuilder(G_JUMP_TABLE)
.legalFor({{p0}, {s64}});
getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
return Query.Types[0] == p0 && Query.Types[1] == s64;
});
computeTables();
verify(*ST.getInstrInfo());
}

View File

@ -322,7 +322,7 @@
# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_BRJT (opcode {{[0-9]+}}): 2 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_INSERT_VECTOR_ELT (opcode {{[0-9]+}}): 3 type indices
# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected

View File

@ -0,0 +1,125 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
--- |
define i32 @jt_test(i32 %x) {
entry:
switch i32 %x, label %return [
i32 75, label %sw.bb
i32 34, label %sw.bb
i32 56, label %sw.bb
i32 35, label %sw.bb
i32 40, label %sw.bb
i32 4, label %sw.bb1
i32 5, label %sw.bb1
i32 6, label %sw.bb1
]
sw.bb:
%add = add nsw i32 %x, 42
br label %return
sw.bb1:
%mul = mul nsw i32 %x, 3
br label %return
return:
%retval.0 = phi i32 [ %mul, %sw.bb1 ], [ %add, %sw.bb ], [ 0, %entry ]
ret i32 %retval.0
}
...
---
name: jt_test
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
jumpTable:
kind: block-address
entries:
- id: 0
blocks: [ '%bb.3', '%bb.3', '%bb.3', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.2', '%bb.2', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.2', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.2', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4',
'%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.4', '%bb.2' ]
body: |
; CHECK-LABEL: name: jt_test
; CHECK: bb.0.entry:
; CHECK: successors: %bb.4(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 71
; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 0
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 4, 0, implicit-def $nzcv
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBSWri]], %subreg.sub_32
; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
; CHECK: [[UBFMXri1:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG1]], 0, 31
; CHECK: $xzr = SUBSXrr [[UBFMXri]], [[UBFMXri1]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: TBNZW [[CSINCWr]], 0, %bb.4
; CHECK: bb.1.entry:
; CHECK: successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab)
; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 0
; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
; CHECK: early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[UBFMXri]], %jump-table.0
; CHECK: BR %18
; CHECK: bb.2.sw.bb:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 42, 0
; CHECK: B %bb.4
; CHECK: bb.3.sw.bb1:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[MOVi32imm3:%[0-9]+]]:gpr32 = MOVi32imm 3
; CHECK: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[MOVi32imm3]], $wzr
; CHECK: bb.4.return:
; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[MADDWrrr]], %bb.3, [[ADDWri]], %bb.2, [[MOVi32imm1]], %bb.0, [[MOVi32imm2]], %bb.1
; CHECK: $w0 = COPY [[PHI]]
; CHECK: RET_ReallyLR implicit $w0
bb.1.entry:
liveins: $w0
%0:gpr(s32) = COPY $w0
%4:gpr(s32) = G_CONSTANT i32 71
%8:gpr(s32) = G_CONSTANT i32 3
%10:gpr(s32) = G_CONSTANT i32 42
%13:gpr(s32) = G_CONSTANT i32 0
%1:gpr(s32) = G_CONSTANT i32 4
%2:gpr(s32) = G_SUB %0, %1
%3:gpr(s64) = G_ZEXT %2(s32)
%5:gpr(s64) = G_ZEXT %4(s32)
%14:gpr(s32) = G_ICMP intpred(ugt), %3(s64), %5
%6:gpr(s1) = G_TRUNC %14(s32)
G_BRCOND %6(s1), %bb.4
bb.5.entry:
successors: %bb.3, %bb.4, %bb.2
%17:gpr(s32) = G_CONSTANT i32 0
%7:gpr(p0) = G_JUMP_TABLE %jump-table.0
G_BRJT %7(p0), %jump-table.0, %3(s64)
bb.2.sw.bb:
%16:gpr(s32) = G_CONSTANT i32 42
%11:gpr(s32) = nsw G_ADD %0, %16
G_BR %bb.4
bb.3.sw.bb1:
%15:gpr(s32) = G_CONSTANT i32 3
%9:gpr(s32) = nsw G_MUL %0, %15
bb.4.return:
%12:gpr(s32) = G_PHI %9(s32), %bb.3, %11(s32), %bb.2, %13(s32), %bb.1, %17(s32), %bb.5
$w0 = COPY %12(s32)
RET_ReallyLR implicit $w0
...