forked from OSchip/llvm-project
Rewrite the branch selector to be correct in the face of large functions.
The algorithm it used before wasn't 100% correct, we now use an iterative expansion model. This fixes assembler errors when compiling 403.gcc with tail merging enabled. Change the way the branch selector works overall: Now, the isel generates PPC::BCC instructions (as it used to) directly, and these BCC instructions are emitted to the output or jitted directly if branches don't need expansion. Only if branches need expansion are instructions rewritten and created. This should make branch select faster, and eliminates the Bxx instructions from the .td file. llvm-svn: 31837
This commit is contained in:
parent
33fc1d45e5
commit
542dfd5510
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/Target/TargetAsmInfo.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
using namespace llvm;
|
||||
|
||||
static Statistic<> NumExpanded("ppc-branch-select",
|
||||
|
@ -31,13 +32,13 @@ static Statistic<> NumExpanded("ppc-branch-select",
|
|||
|
||||
namespace {
|
||||
struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass {
|
||||
/// OffsetMap - Mapping between BB # and byte offset from start of function.
|
||||
std::vector<unsigned> OffsetMap;
|
||||
/// BlockSizes - The sizes of the basic blocks in the function.
|
||||
std::vector<unsigned> BlockSizes;
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &Fn);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "PowerPC Branch Selection";
|
||||
return "PowerPC Branch Selector";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -54,11 +55,6 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
|
|||
///
|
||||
static unsigned getNumBytesForInstruction(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
case PPC::BCC:
|
||||
// while this will be 4 most of the time, if we emit 8 it is just a
|
||||
// minor pessimization that saves us from having to worry about
|
||||
// keeping the offsets up to date later when we emit long branch glue.
|
||||
return 8;
|
||||
case PPC::IMPLICIT_DEF_GPRC: // no asm emitted
|
||||
case PPC::IMPLICIT_DEF_G8RC: // no asm emitted
|
||||
case PPC::IMPLICIT_DEF_F4: // no asm emitted
|
||||
|
@ -77,98 +73,119 @@ static unsigned getNumBytesForInstruction(MachineInstr *MI) {
|
|||
|
||||
|
||||
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
|
||||
// Running total of instructions encountered since beginning of function
|
||||
unsigned ByteCount = 0;
|
||||
|
||||
OffsetMap.resize(Fn.getNumBlockIDs());
|
||||
|
||||
// For each MBB, add its offset to the offset map, and count up its
|
||||
// instructions
|
||||
// Give the blocks of the function a dense, in-order, numbering.
|
||||
Fn.RenumberBlocks();
|
||||
BlockSizes.resize(Fn.getNumBlockIDs());
|
||||
|
||||
// Measure each MBB and compute a size for the entire function.
|
||||
unsigned FuncSize = 0;
|
||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||
++MFI) {
|
||||
MachineBasicBlock *MBB = MFI;
|
||||
OffsetMap[MBB->getNumber()] = ByteCount;
|
||||
|
||||
|
||||
unsigned BlockSize = 0;
|
||||
for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
|
||||
MBBI != EE; ++MBBI)
|
||||
ByteCount += getNumBytesForInstruction(MBBI);
|
||||
}
|
||||
|
||||
// We're about to run over the MBB's again, so reset the ByteCount
|
||||
ByteCount = 0;
|
||||
|
||||
// For each MBB, find the conditional branch pseudo instructions, and
|
||||
// calculate the difference between the target MBB and the current ICount
|
||||
// to decide whether or not to emit a short or long branch.
|
||||
//
|
||||
// short branch:
|
||||
// bCC .L_TARGET_MBB
|
||||
//
|
||||
// long branch:
|
||||
// bInverseCC $PC+8
|
||||
// b .L_TARGET_MBB
|
||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||
++MFI) {
|
||||
MachineBasicBlock *MBB = MFI;
|
||||
BlockSize += getNumBytesForInstruction(MBBI);
|
||||
|
||||
for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
|
||||
MBBI != EE; ++MBBI) {
|
||||
// We may end up deleting the MachineInstr that MBBI points to, so
|
||||
// remember its opcode now so we can refer to it after calling erase()
|
||||
unsigned ByteSize = getNumBytesForInstruction(MBBI);
|
||||
if (MBBI->getOpcode() != PPC::BCC) {
|
||||
ByteCount += ByteSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
// condbranch operands:
|
||||
// 0. CR register
|
||||
// 1. PPC branch opcode
|
||||
// 2. Target MBB
|
||||
MachineBasicBlock *DestMBB = MBBI->getOperand(2).getMachineBasicBlock();
|
||||
PPC::Predicate Pred = (PPC::Predicate)MBBI->getOperand(0).getImm();
|
||||
unsigned CRReg = MBBI->getOperand(1).getReg();
|
||||
int Displacement = OffsetMap[DestMBB->getNumber()] - ByteCount;
|
||||
|
||||
bool ShortBranchOk = Displacement >= -32768 && Displacement <= 32767;
|
||||
|
||||
// Branch on opposite condition if a short branch isn't ok.
|
||||
if (!ShortBranchOk)
|
||||
Pred = PPC::InvertPredicate(Pred);
|
||||
|
||||
unsigned Opcode;
|
||||
switch (Pred) {
|
||||
default: assert(0 && "Unknown cond branch predicate!");
|
||||
case PPC::PRED_LT: Opcode = PPC::BLT; break;
|
||||
case PPC::PRED_LE: Opcode = PPC::BLE; break;
|
||||
case PPC::PRED_EQ: Opcode = PPC::BEQ; break;
|
||||
case PPC::PRED_GE: Opcode = PPC::BGE; break;
|
||||
case PPC::PRED_GT: Opcode = PPC::BGT; break;
|
||||
case PPC::PRED_NE: Opcode = PPC::BNE; break;
|
||||
case PPC::PRED_UN: Opcode = PPC::BUN; break;
|
||||
case PPC::PRED_NU: Opcode = PPC::BNU; break;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator MBBJ;
|
||||
if (ShortBranchOk) {
|
||||
MBBJ = BuildMI(*MBB, MBBI, Opcode, 2).addReg(CRReg).addMBB(DestMBB);
|
||||
} else {
|
||||
// Long branch, skip next branch instruction (i.e. $PC+8).
|
||||
++NumExpanded;
|
||||
BuildMI(*MBB, MBBI, Opcode, 2).addReg(CRReg).addImm(2);
|
||||
MBBJ = BuildMI(*MBB, MBBI, PPC::B, 1).addMBB(DestMBB);
|
||||
}
|
||||
|
||||
// Erase the psuedo BCC instruction, and then back up the
|
||||
// iterator so that when the for loop increments it, we end up in
|
||||
// the correct place rather than iterating off the end.
|
||||
MBB->erase(MBBI);
|
||||
MBBI = MBBJ;
|
||||
ByteCount += ByteSize;
|
||||
}
|
||||
BlockSizes[MBB->getNumber()] = BlockSize;
|
||||
FuncSize += BlockSize;
|
||||
}
|
||||
|
||||
OffsetMap.clear();
|
||||
// If the entire function is smaller than the displacement of a branch field,
|
||||
// we know we don't need to shrink any branches in this function. This is a
|
||||
// common case.
|
||||
if (FuncSize < (1 << 15)) {
|
||||
BlockSizes.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
// For each conditional branch, if the offset to its destination is larger
|
||||
// than the offset field allows, transform it into a long branch sequence
|
||||
// like this:
|
||||
// short branch:
|
||||
// bCC MBB
|
||||
// long branch:
|
||||
// b!CC $PC+8
|
||||
// b MBB
|
||||
//
|
||||
bool MadeChange = true;
|
||||
bool EverMadeChange = false;
|
||||
while (MadeChange) {
|
||||
// Iteratively expand branches until we reach a fixed point.
|
||||
MadeChange = false;
|
||||
|
||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||
++MFI) {
|
||||
MachineBasicBlock &MBB = *MFI;
|
||||
unsigned MBBStartOffset = 0;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
|
||||
MBBStartOffset += getNumBytesForInstruction(I);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Determine the offset from the current branch to the destination
|
||||
// block.
|
||||
MachineBasicBlock *Dest = I->getOperand(2).getMachineBasicBlock();
|
||||
|
||||
int BranchSize;
|
||||
if (Dest->getNumber() <= MBB.getNumber()) {
|
||||
// If this is a backwards branch, the delta is the offset from the
|
||||
// start of this block to this branch, plus the sizes of all blocks
|
||||
// from this block to the dest.
|
||||
BranchSize = MBBStartOffset;
|
||||
|
||||
for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
|
||||
BranchSize += BlockSizes[i];
|
||||
} else {
|
||||
// Otherwise, add the size of the blocks between this block and the
|
||||
// dest to the number of bytes left in this block.
|
||||
BranchSize = -MBBStartOffset;
|
||||
|
||||
for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
|
||||
BranchSize += BlockSizes[i];
|
||||
}
|
||||
|
||||
// If this branch is in range, ignore it.
|
||||
if (isInt16(BranchSize)) {
|
||||
MBBStartOffset += 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, we have to expand it to a long branch.
|
||||
// The BCC operands are:
|
||||
// 0. PPC branch predicate
|
||||
// 1. CR register
|
||||
// 2. Target MBB
|
||||
PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
|
||||
unsigned CRReg = I->getOperand(1).getReg();
|
||||
|
||||
MachineInstr *OldBranch = I;
|
||||
|
||||
// Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
|
||||
BuildMI(MBB, I, PPC::BCC, 3)
|
||||
.addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
|
||||
|
||||
// Uncond branch to the real destination.
|
||||
I = BuildMI(MBB, I, PPC::B, 1).addMBB(Dest);
|
||||
|
||||
// Remove the old branch from the function.
|
||||
OldBranch->eraseFromParent();
|
||||
|
||||
// Remember that this instruction is 8-bytes, increase the size of the
|
||||
// block by 4, remember to iterate.
|
||||
BlockSizes[MBB.getNumber()] += 4;
|
||||
MBBStartOffset += 8;
|
||||
++NumExpanded;
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
EverMadeChange |= MadeChange;
|
||||
}
|
||||
|
||||
BlockSizes.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -60,22 +60,8 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OL, string asmstr,
|
|||
}
|
||||
|
||||
// 1.7.2 B-Form
|
||||
class BForm<bits<6> opcode, bit aa, bit lk, bits<5> bo, bits<2> bicode, dag OL,
|
||||
string asmstr, InstrItinClass itin>
|
||||
: I<opcode, OL, asmstr, itin> {
|
||||
bits<3> CR;
|
||||
bits<14> BD;
|
||||
|
||||
let Inst{6-10} = bo;
|
||||
let Inst{11-13} = CR;
|
||||
let Inst{14-15} = bicode;
|
||||
let Inst{16-29} = BD;
|
||||
let Inst{30} = aa;
|
||||
let Inst{31} = lk;
|
||||
}
|
||||
|
||||
class CBForm<bits<6> opcode, bit aa, bit lk, dag OL,
|
||||
string asmstr> : I<opcode, OL, asmstr, BrB> {
|
||||
class BForm<bits<6> opcode, bit aa, bit lk, dag OL, string asmstr>
|
||||
: I<opcode, OL, asmstr, BrB> {
|
||||
bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
|
||||
bits<3> CR;
|
||||
bits<14> BD;
|
||||
|
|
|
@ -361,27 +361,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1,
|
|||
// BCC represents an arbitrary conditional branch on a predicate.
|
||||
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
|
||||
// a two-value operand where a dag node expects two operands. :(
|
||||
def BCC : CBForm<16, 0, 0, (ops pred:$cond, target:$dst),
|
||||
"b${cond:cc} ${cond:reg}, $dst"
|
||||
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
|
||||
|
||||
// REMOVE BForm when these go away.
|
||||
def BLT : BForm<16, 0, 0, 12, 0, (ops CRRC:$crS, target:$block),
|
||||
"blt $crS, $block", BrB>;
|
||||
def BLE : BForm<16, 0, 0, 4, 1, (ops CRRC:$crS, target:$block),
|
||||
"ble $crS, $block", BrB>;
|
||||
def BEQ : BForm<16, 0, 0, 12, 2, (ops CRRC:$crS, target:$block),
|
||||
"beq $crS, $block", BrB>;
|
||||
def BGE : BForm<16, 0, 0, 4, 0, (ops CRRC:$crS, target:$block),
|
||||
"bge $crS, $block", BrB>;
|
||||
def BGT : BForm<16, 0, 0, 12, 1, (ops CRRC:$crS, target:$block),
|
||||
"bgt $crS, $block", BrB>;
|
||||
def BNE : BForm<16, 0, 0, 4, 2, (ops CRRC:$crS, target:$block),
|
||||
"bne $crS, $block", BrB>;
|
||||
def BUN : BForm<16, 0, 0, 12, 3, (ops CRRC:$crS, target:$block),
|
||||
"bun $crS, $block", BrB>;
|
||||
def BNU : BForm<16, 0, 0, 4, 3, (ops CRRC:$crS, target:$block),
|
||||
"bnu $crS, $block", BrB>;
|
||||
def BCC : BForm<16, 0, 0, (ops pred:$cond, target:$dst),
|
||||
"b${cond:cc} ${cond:reg}, $dst"
|
||||
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
|
||||
}
|
||||
|
||||
let isCall = 1, noResults = 1, PPC970_Unit = 7,
|
||||
|
|
|
@ -88,11 +88,6 @@ This is effectively a simple form of predication.
|
|||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Teach the .td file to pattern match PPC::BR_COND to appropriate bc variant, so
|
||||
we don't have to always run the branch selector for small functions.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Lump the constant pool for each function into ONE pic object, and reference
|
||||
pieces of it as offsets from the start. For functions like this (contrived
|
||||
to have lots of constants obviously):
|
||||
|
|
Loading…
Reference in New Issue