[BOLT] Strip 'repz' prefix from 'repz retq'.

Summary:
Add pass to strip 'repz' prefix from 'repz retq' sequence. The prefix
is not used in Intel CPUs afaik. The pass is on by default.

(cherry picked from FBD4610329)
This commit is contained in:
Maksim Panchenko 2017-02-23 18:09:10 -08:00
parent 88a461014b
commit 2029458f34
6 changed files with 98 additions and 22 deletions

View File

@ -35,22 +35,23 @@ void BinaryBasicBlock::adjustNumPseudos(const MCInst &Inst, int Sign) {
NumPseudos += Sign;
}
MCInst *BinaryBasicBlock::getFirstNonPseudo() {
auto &BC = Function->getBinaryContext();
for (auto &Inst : Instructions) {
if (!BC.MII->get(Inst.getOpcode()).isPseudo())
return &Inst;
BinaryBasicBlock::iterator BinaryBasicBlock::getFirstNonPseudo() {
const auto &BC = Function->getBinaryContext();
for (auto II = Instructions.begin(), E = Instructions.end(); II != E; ++II) {
if (!BC.MII->get(II->getOpcode()).isPseudo())
return II;
}
return nullptr;
return end();
}
MCInst *BinaryBasicBlock::getLastNonPseudo() {
auto &BC = Function->getBinaryContext();
for (auto Itr = Instructions.rbegin(); Itr != Instructions.rend(); ++Itr) {
if (!BC.MII->get(Itr->getOpcode()).isPseudo())
return &*Itr;
BinaryBasicBlock::reverse_iterator BinaryBasicBlock::getLastNonPseudo() {
const auto &BC = Function->getBinaryContext();
for (auto RII = Instructions.rbegin(), E = Instructions.rend();
RII != E; ++RII) {
if (!BC.MII->get(RII->getOpcode()).isPseudo())
return RII;
}
return nullptr;
return rend();
}
bool BinaryBasicBlock::validateSuccessorInvariants() {

View File

@ -412,13 +412,27 @@ public:
return size() - getNumPseudos();
}
/// Return iterator to the first non-pseudo instruction or end()
/// if no such instruction was found.
iterator getFirstNonPseudo();
/// Return a pointer to the first non-pseudo instruction in this basic
/// block. Returns nullptr if none exists.
MCInst *getFirstNonPseudo();
MCInst *getFirstNonPseudoInstr() {
auto II = getFirstNonPseudo();
return II == Instructions.end() ? nullptr : &*II;
}
/// Return reverse iterator to the last non-pseudo instruction or rend()
/// if no such instruction was found.
reverse_iterator getLastNonPseudo();
/// Return a pointer to the last non-pseudo instruction in this basic
/// block. Returns nullptr if none exists.
MCInst *getLastNonPseudo();
MCInst *getLastNonPseudoInstr() {
auto RII = getLastNonPseudo();
return RII == Instructions.rend() ? nullptr : &*RII;
}
/// Set minimum alignment for the basic block.
void setAlignment(uint64_t Align) {
@ -553,6 +567,11 @@ public:
return replaceInstruction(Inst, std::vector<MCInst>());
}
/// Erase non-pseudo instruction at a given iterator \p II.
iterator eraseInstruction(iterator II) {
return Instructions.erase(II);
}
/// Replace an instruction with a sequence of instructions. Returns true
/// if the instruction to be replaced was found and replaced.
template <typename Itr>

View File

@ -1617,7 +1617,7 @@ bool BinaryFunction::buildCFG() {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
// Populate profile counts for the jump table.
auto *LastInstr = FromBB->getLastNonPseudo();
auto *LastInstr = FromBB->getLastNonPseudoInstr();
if (!LastInstr)
continue;
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
@ -2711,7 +2711,7 @@ void BinaryFunction::dumpGraph(raw_ostream& OS) const {
CondBranch,
UncondBranch);
const auto *LastInstr = BB->getLastNonPseudo();
const auto *LastInstr = BB->getLastNonPseudoInstr();
const bool IsJumpTable = LastInstr && BC.MIA->getJumpTable(*LastInstr);
auto BI = BB->branch_info_begin();
@ -3717,7 +3717,7 @@ DynoStats BinaryFunction::getDynoStats() const {
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
// Jump tables.
const auto *LastInstr = BB->getLastNonPseudo();
const auto *LastInstr = BB->getLastNonPseudoInstr();
if (BC.MIA->getJumpTable(*LastInstr)) {
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
DEBUG(

View File

@ -68,6 +68,12 @@ SimplifyRODataLoads("simplify-rodata-loads",
"section"),
cl::ZeroOrMore);
static cl::opt<bool>
StripRepRet("strip-rep-ret",
cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
cl::init(true),
cl::ZeroOrMore);
static cl::opt<bool> OptimizeFrameAccesses(
"frame-opt", cl::desc("optimize stack frame accesses"), cl::ZeroOrMore);
@ -220,6 +226,9 @@ void BinaryFunctionPassManager::runAllPasses(
// Run this pass first to use stats for the original functions.
Manager.registerPass(llvm::make_unique<PrintSortedBy>(NeverPrint));
Manager.registerPass(llvm::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);
Manager.registerPass(llvm::make_unique<IdenticalCodeFolding>(PrintICF));
Manager.registerPass(llvm::make_unique<IndirectCallPromotion>(PrintICP),

View File

@ -221,7 +221,7 @@ void OptimizeBodylessFunctions::analyze(
if (BF.size() != 1 || BF.front().getNumNonPseudos() != 1)
return;
const auto *FirstInstr = BF.front().getFirstNonPseudo();
const auto *FirstInstr = BF.front().getFirstNonPseudoInstr();
if (!FirstInstr)
return;
if (!BC.MIA->isTailCall(*FirstInstr))
@ -461,7 +461,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
if (BB->getNumNonPseudos() != 1)
continue;
auto *Instr = BB->getFirstNonPseudo();
auto *Instr = BB->getFirstNonPseudoInstr();
if (!MIA->isTailCall(*Instr))
continue;
@ -621,7 +621,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
} else {
// Succ will be null in the tail call case. In this case we
// need to explicitly add a tail call instruction.
auto *Branch = Pred->getLastNonPseudo();
auto *Branch = Pred->getLastNonPseudoInstr();
if (Branch && BC.MIA->isUnconditionalBranch(*Branch)) {
Pred->removeSuccessor(&BB);
Pred->eraseInstruction(Branch);
@ -641,7 +641,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
if (BB.getNumNonPseudos() != 1 || BB.isLandingPad())
continue;
auto *Inst = BB.getFirstNonPseudo();
auto *Inst = BB.getFirstNonPseudoInstr();
const bool IsTailCall = BC.MIA->isTailCall(*Inst);
if (!BC.MIA->isUnconditionalBranch(*Inst) && !IsTailCall)
@ -671,7 +671,7 @@ void Peepholes::fixDoubleJumps(BinaryContext &BC,
void Peepholes::addTailcallTraps(BinaryContext &BC,
BinaryFunction &Function) {
for (auto &BB : Function) {
auto *Inst = BB.getLastNonPseudo();
auto *Inst = BB.getLastNonPseudoInstr();
if (Inst && BC.MIA->isTailCall(*Inst) && BC.MIA->isIndirectBranch(*Inst)) {
MCInst Trap;
if (BC.MIA->createTrap(Trap)) {
@ -1577,5 +1577,37 @@ void InstructionLowering::runOnFunctions(
}
}
void StripRepRet::runOnFunctions(
BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
uint64_t NumPrefixesRemoved = 0;
uint64_t NumBytesSaved = 0;
for (auto &BFI : BFs) {
for (auto &BB : BFI.second) {
auto LastInstRIter = BB.getLastNonPseudo();
if (LastInstRIter == BB.rend() ||
!BC.MIA->isReturn(*LastInstRIter))
continue;
auto NextToLastInstRIter = std::next(LastInstRIter);
if (NextToLastInstRIter == BB.rend() ||
!BC.MIA->isPrefix(*NextToLastInstRIter))
continue;
BB.eraseInstruction(std::next(NextToLastInstRIter).base());
NumPrefixesRemoved += BB.getKnownExecutionCount();
++NumBytesSaved;
}
}
if (NumBytesSaved) {
outs() << "BOLT-INFO: removed " << NumBytesSaved << " 'repz' prefixes"
" with estimated execution count of " << NumPrefixesRemoved
<< " times.\n";
}
}
} // namespace bolt
} // namespace llvm

View File

@ -484,6 +484,21 @@ public:
std::set<uint64_t> &LargeFunctions) override;
};
/// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
class StripRepRet : public BinaryFunctionPass {
public:
explicit StripRepRet(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
const char *getName() const override {
return "strip-rep-ret";
}
void runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) override;
};
} // namespace bolt
} // namespace llvm