forked from OSchip/llvm-project
Change Thumb2 jumptable codegen to one that uses two level jumps:
Before: adr r12, #LJTI3_0_0 ldr pc, [r12, +r0, lsl #2] LJTI3_0_0: .long LBB3_24 .long LBB3_30 .long LBB3_31 .long LBB3_32 After: adr r12, #LJTI3_0_0 add pc, r12, +r0, lsl #2 LJTI3_0_0: b.w LBB3_24 b.w LBB3_30 b.w LBB3_31 b.w LBB3_32 This has several advantages. 1. This will make it easier to optimize this to a TBB / TBH instruction + (smaller) table. 2. This eliminate the need for ugly asm printer hack to force the address into thumb addresses (bit 0 is one). 3. Same codegen for pic and non-pic. 4. This eliminate the need to align the table so constantpool island pass won't have to over-estimate the size. Based on my calculation, the later is probably slightly faster as well since ldr pc with shifter address is very slow. That is, it should be a win as long as the HW implementation can do a reasonable job of branch predict the second branch. llvm-svn: 77024
This commit is contained in:
parent
bcf2ae6aa5
commit
f3a1fce8ae
|
@ -252,9 +252,11 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
|
|||
// ...likewise if it ends with a branch table followed by an unconditional
|
||||
// branch. The branch folder can create these, and we must get rid of them for
|
||||
// correctness of Thumb constant islands.
|
||||
if (((SecondLastOpc == getOpcode(ARMII::BR_JTr)) ||
|
||||
(SecondLastOpc == getOpcode(ARMII::BR_JTm)) ||
|
||||
(SecondLastOpc == getOpcode(ARMII::BR_JTadd))) &&
|
||||
if ((SecondLastOpc == ARM::BR_JTr ||
|
||||
SecondLastOpc == ARM::BR_JTm ||
|
||||
SecondLastOpc == ARM::BR_JTadd ||
|
||||
SecondLastOpc == ARM::tBR_JTr ||
|
||||
SecondLastOpc == ARM::t2BR_JT) &&
|
||||
(LastOpc == getOpcode(ARMII::B))) {
|
||||
I = LastInst;
|
||||
if (AllowModify)
|
||||
|
@ -451,9 +453,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
|
|||
case ARM::BR_JTr:
|
||||
case ARM::BR_JTm:
|
||||
case ARM::BR_JTadd:
|
||||
case ARM::t2BR_JTr:
|
||||
case ARM::t2BR_JTm:
|
||||
case ARM::t2BR_JTadd: {
|
||||
case ARM::t2BR_JT: {
|
||||
// These are jumptable branches, i.e. a branch followed by an inlined
|
||||
// jumptable. The size is 4 + 4 * number of entries.
|
||||
unsigned NumOps = TID.getNumOperands();
|
||||
|
|
|
@ -167,9 +167,6 @@ namespace ARMII {
|
|||
ADDrr,
|
||||
B,
|
||||
Bcc,
|
||||
BR_JTr,
|
||||
BR_JTm,
|
||||
BR_JTadd,
|
||||
BX_RET,
|
||||
LDRrr,
|
||||
LDRri,
|
||||
|
|
|
@ -389,10 +389,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
|
|||
int UOpc = Opc;
|
||||
switch (Opc) {
|
||||
case ARM::tBR_JTr:
|
||||
case ARM::t2BR_JTr:
|
||||
case ARM::t2BR_JTm:
|
||||
case ARM::t2BR_JTadd:
|
||||
// A Thumb table jump may involve padding; for the offsets to
|
||||
// A Thumb1 table jump may involve padding; for the offsets to
|
||||
// be right, functions containing these must be 4-byte aligned.
|
||||
AFI->setAlign(2U);
|
||||
if ((Offset+MBBSize)%4 != 0)
|
||||
|
@ -787,10 +784,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
|
|||
// Thumb1 jump tables require padding. They should be at the end;
|
||||
// following unconditional branches are removed by AnalyzeBranch.
|
||||
MachineInstr *ThumbJTMI = prior(MBB->end());
|
||||
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr ||
|
||||
ThumbJTMI->getOpcode() == ARM::t2BR_JTr ||
|
||||
ThumbJTMI->getOpcode() == ARM::t2BR_JTm ||
|
||||
ThumbJTMI->getOpcode() == ARM::t2BR_JTadd) {
|
||||
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
|
||||
unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
|
||||
unsigned oldMIOffset = newMIOffset - delta;
|
||||
if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
|
||||
|
|
|
@ -402,6 +402,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case ARMISD::tCALL: return "ARMISD::tCALL";
|
||||
case ARMISD::BRCOND: return "ARMISD::BRCOND";
|
||||
case ARMISD::BR_JT: return "ARMISD::BR_JT";
|
||||
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
|
||||
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
|
||||
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
|
||||
case ARMISD::CMP: return "ARMISD::CMP";
|
||||
|
@ -1704,15 +1705,27 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
|
||||
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
|
||||
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
|
||||
if (Subtarget->isThumb2()) {
|
||||
// Thumb2 uses a two-level jump. That is, it jumps into the jump table
|
||||
// which does another jump to the destination. This also makes it easier
|
||||
// to translate it to TBB / TBH later.
|
||||
// FIXME: This might not work if the function is extremely large.
|
||||
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Table, Index,
|
||||
JTI, UId);
|
||||
}
|
||||
|
||||
Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
|
||||
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
|
||||
bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
|
||||
Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
|
||||
Chain, Addr, NULL, 0);
|
||||
Chain = Addr.getValue(1);
|
||||
if (isPIC)
|
||||
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
|
||||
Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0);
|
||||
Chain = Addr.getValue(1);
|
||||
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
|
||||
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
|
||||
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
|
||||
} else {
|
||||
Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0);
|
||||
Chain = Addr.getValue(1);
|
||||
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
|
||||
|
|
|
@ -40,6 +40,7 @@ namespace llvm {
|
|||
tCALL, // Thumb function call.
|
||||
BRCOND, // Conditional branch.
|
||||
BR_JT, // Jumptable branch.
|
||||
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
|
||||
RET_FLAG, // Return with a flag operand.
|
||||
|
||||
PIC_ADD, // Add with a PC operand and a PIC label.
|
||||
|
|
|
@ -70,9 +70,6 @@ getOpcode(ARMII::Op Op) const {
|
|||
case ARMII::ADDrr: return ARM::ADDrr;
|
||||
case ARMII::B: return ARM::B;
|
||||
case ARMII::Bcc: return ARM::Bcc;
|
||||
case ARMII::BR_JTr: return ARM::BR_JTr;
|
||||
case ARMII::BR_JTm: return ARM::BR_JTm;
|
||||
case ARMII::BR_JTadd: return ARM::BR_JTadd;
|
||||
case ARMII::BX_RET: return ARM::BX_RET;
|
||||
case ARMII::LDRrr: return ARM::LDR;
|
||||
case ARMII::LDRri: return 0;
|
||||
|
|
|
@ -33,6 +33,9 @@ def SDT_ARMBrcond : SDTypeProfile<0, 2,
|
|||
def SDT_ARMBrJT : SDTypeProfile<0, 3,
|
||||
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
|
||||
SDTCisVT<2, i32>]>;
|
||||
def SDT_ARMBr2JT : SDTypeProfile<0, 4,
|
||||
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
|
||||
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
|
||||
|
||||
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
|
||||
|
||||
|
@ -72,6 +75,9 @@ def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
|
|||
def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
|
||||
[SDNPOutFlag]>;
|
||||
|
||||
|
@ -205,6 +211,9 @@ def cpinst_operand : Operand<i32> {
|
|||
def jtblock_operand : Operand<i32> {
|
||||
let PrintMethod = "printJTBlockOperand";
|
||||
}
|
||||
def jt2block_operand : Operand<i32> {
|
||||
let PrintMethod = "printJT2BlockOperand";
|
||||
}
|
||||
|
||||
// Local PC labels.
|
||||
def pclabel : Operand<i32> {
|
||||
|
|
|
@ -1080,24 +1080,12 @@ def t2B : T2XI<(outs), (ins brtarget:$target),
|
|||
"b $target",
|
||||
[(br bb:$target)]>;
|
||||
|
||||
let isNotDuplicable = 1, isIndirectBranch = 1 in {
|
||||
def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
|
||||
"mov pc, $target \n\t.align\t2\n$jt",
|
||||
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
|
||||
|
||||
def t2BR_JTm :
|
||||
let isNotDuplicable = 1, isIndirectBranch = 1 in
|
||||
def t2BR_JT :
|
||||
T2JTI<(outs),
|
||||
(ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id),
|
||||
"ldr pc, $target \n\t.align\t2\n$jt",
|
||||
[(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt,
|
||||
imm:$id)]>;
|
||||
|
||||
def t2BR_JTadd :
|
||||
T2JTI<(outs),
|
||||
(ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
|
||||
"add pc, $target, $idx \n\t.align\t2\n$jt",
|
||||
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>;
|
||||
} // isNotDuplicate, isIndirectBranch
|
||||
(ins GPR:$base, GPR:$idx, jt2block_operand:$jt, i32imm:$id),
|
||||
"add pc, $base, $idx, lsl #2\n$jt",
|
||||
[(ARMbr2jt GPR:$base, GPR:$idx, tjumptable:$jt, imm:$id)]>;
|
||||
} // isBranch, isTerminator, isBarrier
|
||||
|
||||
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
|
||||
|
|
|
@ -160,6 +160,7 @@ namespace {
|
|||
void printCPInstOperand(const MachineInstr *MI, int OpNum,
|
||||
const char *Modifier);
|
||||
void printJTBlockOperand(const MachineInstr *MI, int OpNum);
|
||||
void printJT2BlockOperand(const MachineInstr *MI, int OpNum);
|
||||
|
||||
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
unsigned AsmVariant, const char *ExtraCode);
|
||||
|
@ -907,6 +908,8 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
|
|||
}
|
||||
|
||||
void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
|
||||
assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!");
|
||||
|
||||
const MachineOperand &MO1 = MI->getOperand(OpNum);
|
||||
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
|
||||
unsigned JTI = MO1.getIndex();
|
||||
|
@ -922,23 +925,13 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
|
|||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
|
||||
bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
|
||||
bool NeedBit0 = Subtarget->isTargetDarwin() && Subtarget->isThumb2();
|
||||
SmallPtrSet<MachineBasicBlock*, 8> JTSets;
|
||||
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = JTBBs[i];
|
||||
if (UseSet && JTSets.insert(MBB)) {
|
||||
// FIXME: Temporary workaround for an assembler bug. The assembler isn't
|
||||
// setting the bit zero to 1 even though it is a thumb address.
|
||||
if (NeedBit0) {
|
||||
O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
|
||||
<< getFunctionNumber() << '_' << JTI << '_' << MO2.getImm()
|
||||
<< "_set_" << MBB->getNumber() << ",(";
|
||||
printBasicBlockLabel(MBB, false, false, false);
|
||||
O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
|
||||
<< '_' << JTI << '_' << MO2.getImm() << "+1)\n";
|
||||
} else
|
||||
printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
|
||||
}
|
||||
bool isNew = JTSets.insert(MBB);
|
||||
|
||||
if (UseSet && isNew)
|
||||
printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
|
||||
|
||||
O << JTEntryDirective << ' ';
|
||||
if (UseSet)
|
||||
|
@ -952,19 +945,33 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
|
|||
O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
|
||||
<< getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
|
||||
} else {
|
||||
// FIXME: Temporary workaround for an assembler bug. The assembler isn't
|
||||
// setting the bit zero to 1 even though it is a thumb address.
|
||||
if (NeedBit0)
|
||||
O << '(';
|
||||
printBasicBlockLabel(MBB, false, false, false);
|
||||
if (NeedBit0)
|
||||
O << "+1)";
|
||||
}
|
||||
if (i != e-1)
|
||||
O << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) {
|
||||
const MachineOperand &MO1 = MI->getOperand(OpNum);
|
||||
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
|
||||
unsigned JTI = MO1.getIndex();
|
||||
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
|
||||
<< '_' << JTI << '_' << MO2.getImm() << ":\n";
|
||||
|
||||
const MachineFunction *MF = MI->getParent()->getParent();
|
||||
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
|
||||
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
|
||||
MachineBasicBlock *MBB = JTBBs[i];
|
||||
O << "\tb.w ";
|
||||
printBasicBlockLabel(MBB, false, false, false);
|
||||
if (i != e-1)
|
||||
O << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
unsigned AsmVariant, const char *ExtraCode){
|
||||
|
|
|
@ -5,3 +5,7 @@
|
|||
* We should model IT instructions explicitly. We should introduce them (even if
|
||||
if-converter is not run, the function could still contain movcc's) before
|
||||
PEI since passes starting from PEI may require exact code size.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Make use of TBB and TBH for jumptables in small functions.
|
||||
|
|
|
@ -37,9 +37,6 @@ unsigned Thumb1InstrInfo::getOpcode(ARMII::Op Op) const {
|
|||
case ARMII::ADDrr: return ARM::tADDrr;
|
||||
case ARMII::B: return ARM::tB;
|
||||
case ARMII::Bcc: return ARM::tBcc;
|
||||
case ARMII::BR_JTr: return ARM::tBR_JTr;
|
||||
case ARMII::BR_JTm: return 0;
|
||||
case ARMII::BR_JTadd: return 0;
|
||||
case ARMII::BX_RET: return ARM::tBX_RET;
|
||||
case ARMII::LDRrr: return ARM::tLDR;
|
||||
case ARMII::LDRri: return 0;
|
||||
|
|
|
@ -38,9 +38,6 @@ unsigned Thumb2InstrInfo::getOpcode(ARMII::Op Op) const {
|
|||
case ARMII::ADDrr: return ARM::t2ADDrr;
|
||||
case ARMII::B: return ARM::t2B;
|
||||
case ARMII::Bcc: return ARM::t2Bcc;
|
||||
case ARMII::BR_JTr: return ARM::t2BR_JTr;
|
||||
case ARMII::BR_JTm: return ARM::t2BR_JTm;
|
||||
case ARMII::BR_JTadd: return ARM::t2BR_JTadd;
|
||||
case ARMII::BX_RET: return ARM::tBX_RET;
|
||||
case ARMII::LDRrr: return ARM::t2LDRs;
|
||||
case ARMII::LDRri: return ARM::t2LDRi12;
|
||||
|
@ -64,9 +61,7 @@ Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
|
|||
switch (MBB.back().getOpcode()) {
|
||||
case ARM::t2LDM_RET:
|
||||
case ARM::t2B: // Uncond branch.
|
||||
case ARM::t2BR_JTr: // Jumptable branch.
|
||||
case ARM::t2BR_JTm: // Jumptable branch through mem.
|
||||
case ARM::t2BR_JTadd: // Jumptable branch add to pc.
|
||||
case ARM::t2BR_JT: // Jumptable branch.
|
||||
case ARM::tBR_JTr: // Jumptable branch (16-bit version).
|
||||
case ARM::tBX_RET:
|
||||
case ARM::tBX_RET_vararg:
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||
; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
|
||||
|
||||
define void @bar(i32 %n.u) {
|
||||
entry:
|
||||
; CHECK: bar:
|
||||
; CHECK: add pc
|
||||
; CHECK: b.w LBB1_2
|
||||
|
||||
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
|
||||
bb:
|
||||
tail call void(...)* @foo1()
|
Loading…
Reference in New Issue