2021-12-22 02:21:41 +08:00
|
|
|
//===- bolt/Passes/FrameOptimizer.cpp -------------------------------------===//
|
2016-12-06 03:47:08 +08:00
|
|
|
//
|
2021-03-16 09:04:18 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2016-12-06 03:47:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2021-12-22 02:21:41 +08:00
|
|
|
// This file implements the FrameOptimizerPass class.
|
|
|
|
//
|
2016-12-06 03:47:08 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2021-10-09 02:47:10 +08:00
|
|
|
#include "bolt/Passes/FrameOptimizer.h"
|
|
|
|
#include "bolt/Core/ParallelUtilities.h"
|
|
|
|
#include "bolt/Passes/BinaryFunctionCallGraph.h"
|
|
|
|
#include "bolt/Passes/DataflowInfoManager.h"
|
|
|
|
#include "bolt/Passes/ShrinkWrapping.h"
|
|
|
|
#include "bolt/Passes/StackAvailableExpressions.h"
|
|
|
|
#include "bolt/Passes/StackReachingUses.h"
|
2017-05-02 07:52:54 +08:00
|
|
|
#include "llvm/Support/Timer.h"
|
2021-05-01 04:54:02 +08:00
|
|
|
#include <deque>
|
2016-12-06 03:47:08 +08:00
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "fop"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace opts {
|
|
|
|
extern cl::opt<unsigned> Verbosity;
|
2017-06-09 04:46:17 +08:00
|
|
|
extern cl::opt<bool> TimeOpts;
|
2017-05-02 07:52:54 +08:00
|
|
|
extern cl::OptionCategory BoltOptCategory;
|
2016-12-29 09:09:52 +08:00
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
using namespace bolt;
|
2016-12-29 09:09:52 +08:00
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
cl::opt<FrameOptimizationType>
|
|
|
|
FrameOptimization("frame-opt",
|
|
|
|
cl::init(FOP_NONE),
|
|
|
|
cl::desc("optimize stack frame accesses"),
|
|
|
|
cl::values(
|
|
|
|
clEnumValN(FOP_NONE, "none", "do not perform frame optimization"),
|
|
|
|
clEnumValN(FOP_HOT, "hot", "perform FOP on hot functions"),
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
clEnumValN(FOP_ALL, "all", "perform FOP on all functions")),
|
2017-05-02 07:52:54 +08:00
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(BoltOptCategory));
|
2016-12-29 09:09:52 +08:00
|
|
|
|
2017-06-23 07:34:01 +08:00
|
|
|
cl::opt<bool>
|
|
|
|
RemoveStores("frame-opt-rm-stores",
|
|
|
|
cl::init(FOP_NONE),
|
|
|
|
cl::desc("apply additional analysis to remove stores (experimental)"),
|
|
|
|
cl::init(false),
|
|
|
|
cl::ZeroOrMore,
|
|
|
|
cl::cat(BoltOptCategory));
|
2019-07-03 01:48:43 +08:00
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
} // namespace opts
|
2016-12-29 09:09:52 +08:00
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
namespace llvm {
|
|
|
|
namespace bolt {
|
2016-12-06 03:47:08 +08:00
|
|
|
|
2017-06-03 07:57:22 +08:00
|
|
|
void FrameOptimizerPass::removeUnnecessaryLoads(const RegAnalysis &RA,
|
|
|
|
const FrameAnalysis &FA,
|
2017-05-02 07:52:54 +08:00
|
|
|
BinaryFunction &BF) {
|
2021-10-26 15:06:34 +08:00
|
|
|
StackAvailableExpressions SAE(RA, FA, BF);
|
2016-12-29 09:09:52 +08:00
|
|
|
SAE.run();
|
2016-12-06 03:47:08 +08:00
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Performing unnecessary loads removal\n");
|
2016-12-29 09:09:52 +08:00
|
|
|
std::deque<std::pair<BinaryBasicBlock *, MCInst *>> ToErase;
|
|
|
|
bool Changed = false;
|
|
|
|
const auto ExprEnd = SAE.expr_end();
|
2021-10-26 15:06:34 +08:00
|
|
|
MCPlusBuilder *MIB = BF.getBinaryContext().MIB.get();
|
2021-04-08 15:19:26 +08:00
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
2021-12-15 08:52:51 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
|
2016-12-29 09:09:52 +08:00
|
|
|
const MCInst *Prev = nullptr;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (MCInst &Inst : BB) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG({
|
2016-12-06 03:47:08 +08:00
|
|
|
dbgs() << "\t\tNow at ";
|
|
|
|
Inst.dump();
|
2016-12-29 09:09:52 +08:00
|
|
|
for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB);
|
|
|
|
I != ExprEnd; ++I) {
|
2016-12-06 03:47:08 +08:00
|
|
|
dbgs() << "\t\t\tReached by: ";
|
2016-12-29 09:09:52 +08:00
|
|
|
(*I)->dump();
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
});
|
|
|
|
// if Inst is a load from stack and the current available expressions show
|
|
|
|
// this value is available in a register or immediate, replace this load
|
|
|
|
// with move from register or from immediate.
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
|
2017-05-02 07:52:54 +08:00
|
|
|
if (!FIEX) {
|
2016-12-29 09:09:52 +08:00
|
|
|
Prev = &Inst;
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// FIXME: Change to remove IsSimple == 0. We're being conservative here,
|
|
|
|
// but once replaceMemOperandWithReg is ready, we should feed it with all
|
|
|
|
// sorts of complex instructions.
|
2017-05-02 07:52:54 +08:00
|
|
|
if (FIEX->IsLoad == false || FIEX->IsSimple == false ||
|
|
|
|
FIEX->StackOffset >= 0) {
|
2016-12-29 09:09:52 +08:00
|
|
|
Prev = &Inst;
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-12-29 09:09:52 +08:00
|
|
|
for (auto I = Prev ? SAE.expr_begin(*Prev) : SAE.expr_begin(BB);
|
|
|
|
I != ExprEnd; ++I) {
|
|
|
|
const MCInst *AvailableInst = *I;
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<const FrameIndexEntry &> FIEY = FA.getFIEFor(*AvailableInst);
|
2017-05-02 07:52:54 +08:00
|
|
|
if (!FIEY)
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
2017-05-02 07:52:54 +08:00
|
|
|
assert(FIEY->IsStore && FIEY->IsSimple);
|
|
|
|
if (FIEX->StackOffset != FIEY->StackOffset || FIEX->Size != FIEY->Size)
|
|
|
|
continue;
|
|
|
|
// TODO: Change push/pops to stack adjustment instruction
|
2021-10-26 15:06:34 +08:00
|
|
|
if (MIB->isPop(Inst))
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
2016-12-29 09:09:52 +08:00
|
|
|
|
2016-12-06 03:47:08 +08:00
|
|
|
++NumRedundantLoads;
|
2016-12-29 09:09:52 +08:00
|
|
|
Changed = true;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Redundant load instruction: ");
|
|
|
|
LLVM_DEBUG(Inst.dump());
|
|
|
|
LLVM_DEBUG(dbgs() << "Related store instruction: ");
|
|
|
|
LLVM_DEBUG(AvailableInst->dump());
|
|
|
|
LLVM_DEBUG(dbgs() << "@BB: " << BB.getName() << "\n");
|
2016-12-06 03:47:08 +08:00
|
|
|
// Replace load
|
2017-05-02 07:52:54 +08:00
|
|
|
if (FIEY->IsStoreFromReg) {
|
2021-10-26 15:06:34 +08:00
|
|
|
if (!MIB->replaceMemOperandWithReg(Inst, FIEY->RegOrImm)) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "FAILED to change operand to a reg\n");
|
2016-12-06 03:47:08 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
++NumLoadsChangedToReg;
|
2021-10-26 15:06:34 +08:00
|
|
|
MIB->removeAnnotation(Inst, "FrameAccessEntry");
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Changed operand to a reg\n");
|
2021-10-26 15:06:34 +08:00
|
|
|
if (MIB->isRedundantMove(Inst)) {
|
2016-12-06 03:47:08 +08:00
|
|
|
++NumLoadsDeleted;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Created a redundant move\n");
|
2016-12-06 03:47:08 +08:00
|
|
|
// Delete it!
|
2016-12-29 09:09:52 +08:00
|
|
|
ToErase.push_front(std::make_pair(&BB, &Inst));
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
char Buf[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
2017-05-02 07:52:54 +08:00
|
|
|
support::ulittle64_t::ref(Buf + 0) = FIEY->RegOrImm;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Changing operand to an imm... ");
|
2021-10-26 15:06:34 +08:00
|
|
|
if (!MIB->replaceMemOperandWithImm(Inst, StringRef(Buf, 8), 0)) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "FAILED\n");
|
2016-12-06 03:47:08 +08:00
|
|
|
} else {
|
|
|
|
++NumLoadsChangedToImm;
|
2021-10-26 15:06:34 +08:00
|
|
|
MIB->removeAnnotation(Inst, "FrameAccessEntry");
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Ok\n");
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
}
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Changed to: ");
|
|
|
|
LLVM_DEBUG(Inst.dump());
|
2016-12-06 03:47:08 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-12-29 09:09:52 +08:00
|
|
|
Prev = &Inst;
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
}
|
2016-12-29 09:09:52 +08:00
|
|
|
if (Changed) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n");
|
2016-12-29 09:09:52 +08:00
|
|
|
}
|
2017-05-02 07:52:54 +08:00
|
|
|
// TODO: Implement an interface of eraseInstruction that works out the
|
|
|
|
// complete list of elements to remove.
|
2021-04-08 15:19:26 +08:00
|
|
|
for (std::pair<BinaryBasicBlock *, MCInst *> I : ToErase) {
|
2019-02-01 03:23:02 +08:00
|
|
|
I.first->eraseInstruction(I.first->findInstruction(I.second));
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
void FrameOptimizerPass::removeUnusedStores(const FrameAnalysis &FA,
|
|
|
|
BinaryFunction &BF) {
|
2021-10-26 15:06:34 +08:00
|
|
|
StackReachingUses SRU(FA, BF);
|
2017-05-02 07:52:54 +08:00
|
|
|
SRU.run();
|
|
|
|
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Performing unused stores removal\n");
|
2017-05-02 07:52:54 +08:00
|
|
|
std::vector<std::pair<BinaryBasicBlock *, MCInst *>> ToErase;
|
|
|
|
bool Changed = false;
|
2021-04-08 15:19:26 +08:00
|
|
|
for (BinaryBasicBlock &BB : BF) {
|
2021-12-15 08:52:51 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
|
2017-05-02 07:52:54 +08:00
|
|
|
const MCInst *Prev = nullptr;
|
|
|
|
for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
|
2021-04-08 15:19:26 +08:00
|
|
|
MCInst &Inst = *I;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG({
|
2017-05-02 07:52:54 +08:00
|
|
|
dbgs() << "\t\tNow at ";
|
|
|
|
Inst.dump();
|
|
|
|
for (auto I = Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB);
|
|
|
|
I != SRU.expr_end(); ++I) {
|
|
|
|
dbgs() << "\t\t\tReached by: ";
|
|
|
|
(*I)->dump();
|
|
|
|
}
|
|
|
|
});
|
2021-04-08 15:19:26 +08:00
|
|
|
ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
|
2017-05-02 07:52:54 +08:00
|
|
|
if (!FIEX) {
|
|
|
|
Prev = &Inst;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (FIEX->IsLoad || !FIEX->IsSimple || FIEX->StackOffset >= 0) {
|
|
|
|
Prev = &Inst;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SRU.isStoreUsed(*FIEX,
|
|
|
|
Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB))) {
|
|
|
|
Prev = &Inst;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// TODO: Change push/pops to stack adjustment instruction
|
2021-10-26 15:06:34 +08:00
|
|
|
if (BF.getBinaryContext().MIB->isPush(Inst))
|
2017-05-02 07:52:54 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
++NumRedundantStores;
|
|
|
|
Changed = true;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Unused store instruction: ");
|
|
|
|
LLVM_DEBUG(Inst.dump());
|
|
|
|
LLVM_DEBUG(dbgs() << "@BB: " << BB.getName() << "\n");
|
|
|
|
LLVM_DEBUG(dbgs() << "FIE offset = " << FIEX->StackOffset
|
2021-12-15 08:52:51 +08:00
|
|
|
<< " size = " << (int)FIEX->Size << "\n");
|
2017-05-02 07:52:54 +08:00
|
|
|
// Delete it!
|
2021-05-08 09:43:25 +08:00
|
|
|
ToErase.emplace_back(&BB, &Inst);
|
2017-05-02 07:52:54 +08:00
|
|
|
Prev = &Inst;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-08 15:19:26 +08:00
|
|
|
for (std::pair<BinaryBasicBlock *, MCInst *> I : ToErase) {
|
2019-02-01 03:23:02 +08:00
|
|
|
I.first->eraseInstruction(I.first->findInstruction(I.second));
|
2017-05-02 07:52:54 +08:00
|
|
|
}
|
|
|
|
if (Changed) {
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "FOP modified \"" << BF.getPrintName() << "\"\n");
|
2017-05-02 07:52:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-04 06:52:01 +08:00
|
|
|
void FrameOptimizerPass::runOnFunctions(BinaryContext &BC) {
|
2017-05-02 07:52:54 +08:00
|
|
|
if (opts::FrameOptimization == FOP_NONE)
|
|
|
|
return;
|
|
|
|
|
2019-06-15 10:56:11 +08:00
|
|
|
std::unique_ptr<BinaryFunctionCallGraph> CG;
|
|
|
|
std::unique_ptr<FrameAnalysis> FA;
|
|
|
|
std::unique_ptr<RegAnalysis> RA;
|
|
|
|
|
|
|
|
{
|
|
|
|
NamedRegionTimer T1("callgraph", "create call graph", "FOP",
|
|
|
|
"FOP breakdown", opts::TimeOpts);
|
|
|
|
CG = std::make_unique<BinaryFunctionCallGraph>(buildCallGraph(BC));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
NamedRegionTimer T1("frameanalysis", "frame analysis", "FOP",
|
|
|
|
"FOP breakdown", opts::TimeOpts);
|
|
|
|
FA = std::make_unique<FrameAnalysis>(BC, *CG);
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
2021-12-15 08:52:51 +08:00
|
|
|
NamedRegionTimer T1("reganalysis", "reg analysis", "FOP", "FOP breakdown",
|
|
|
|
opts::TimeOpts);
|
2019-06-15 10:56:11 +08:00
|
|
|
RA = std::make_unique<RegAnalysis>(BC, &BC.getBinaryFunctions(), CG.get());
|
|
|
|
}
|
2017-05-02 07:52:54 +08:00
|
|
|
|
2019-07-03 01:48:43 +08:00
|
|
|
// Perform caller-saved register optimizations, then callee-saved register
|
|
|
|
// optimizations (shrink wrapping)
|
2019-04-04 06:52:01 +08:00
|
|
|
for (auto &I : BC.getBinaryFunctions()) {
|
2019-06-15 10:56:11 +08:00
|
|
|
if (!FA->hasFrameInfo(I.second))
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
2017-05-02 07:52:54 +08:00
|
|
|
// Restrict pass execution if user asked to only run on hot functions
|
|
|
|
if (opts::FrameOptimization == FOP_HOT) {
|
|
|
|
if (I.second.getKnownExecutionCount() < BC.getHotThreshold())
|
|
|
|
continue;
|
2020-12-02 08:29:39 +08:00
|
|
|
LLVM_DEBUG(
|
|
|
|
dbgs() << "Considering " << I.second.getPrintName()
|
|
|
|
<< " for frame optimizations because its execution count ( "
|
|
|
|
<< I.second.getKnownExecutionCount()
|
|
|
|
<< " ) exceeds our hotness threshold ( "
|
|
|
|
<< BC.getHotThreshold() << " )\n");
|
2017-05-02 07:52:54 +08:00
|
|
|
}
|
2019-07-03 01:48:43 +08:00
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
{
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
NamedRegionTimer T1("removeloads", "remove loads", "FOP", "FOP breakdown",
|
|
|
|
opts::TimeOpts);
|
2021-10-26 15:06:34 +08:00
|
|
|
removeUnnecessaryLoads(*RA, *FA, I.second);
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
2019-07-03 01:48:43 +08:00
|
|
|
|
2017-06-23 07:34:01 +08:00
|
|
|
if (opts::RemoveStores) {
|
[BOLT rebase] Rebase fixes on top of LLVM Feb2018
Summary:
This commit includes all code necessary to make BOLT working again
after the rebase. This includes a redesign of the EHFrame work,
cherry-pick of the 3dnow disassembly work, compilation error fixes,
and port of the debug_info work. The macroop fusion feature is not
ported yet.
The rebased version has minor changes to the "executed instructions"
dynostats counter because REP prefixes are considered a part of the
instruction it applies to. Also, some X86 instructions had the "mayLoad"
tablegen property removed, which BOLT uses to identify and account
for loads, thus reducing the total number of loads reported by
dynostats. This was observed in X86::MOVDQUmr. TRAP instructions are
not terminators anymore, changing our CFG. This commit adds compensation
to preserve this old behavior and minimize tests changes. debug_info
sections are now slightly larger. The discriminator field in the line
table is slightly different due to a change upstream. New profiles
generated with the other bolt are incompatible with this version
because of different hash values calculated for functions, so they will
be considered 100% stale. This commit changes the corresponding test
to XFAIL so it can be updated. The hash function changes because it
relies on raw opcode values, which change according to the opcodes
described in the X86 tablegen files. When processing HHVM, bolt was
observed to be using about 800MB more memory in the rebased version
and being about 5% slower.
(cherry picked from FBD7078072)
2018-02-07 07:00:23 +08:00
|
|
|
NamedRegionTimer T1("removestores", "remove stores", "FOP",
|
|
|
|
"FOP breakdown", opts::TimeOpts);
|
2021-10-26 15:06:34 +08:00
|
|
|
removeUnusedStores(*FA, I.second);
|
2017-05-02 07:52:54 +08:00
|
|
|
}
|
|
|
|
// Don't even start shrink wrapping if no profiling info is available
|
|
|
|
if (I.second.getKnownExecutionCount() == 0)
|
2016-12-06 03:47:08 +08:00
|
|
|
continue;
|
2019-07-03 01:48:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
NamedRegionTimer T1("shrinkwrapping", "shrink wrapping", "FOP",
|
|
|
|
"FOP breakdown", opts::TimeOpts);
|
|
|
|
performShrinkWrapping(*RA, *FA, BC);
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
|
2017-05-02 07:52:54 +08:00
|
|
|
outs() << "BOLT-INFO: FOP optimized " << NumRedundantLoads
|
|
|
|
<< " redundant load(s) and " << NumRedundantStores
|
|
|
|
<< " unused store(s)\n";
|
2016-12-29 09:09:52 +08:00
|
|
|
outs() << "BOLT-INFO: FOP changed " << NumLoadsChangedToReg
|
2016-12-06 03:47:08 +08:00
|
|
|
<< " load(s) to use a register instead of a stack access, and "
|
|
|
|
<< NumLoadsChangedToImm << " to use an immediate.\n"
|
2017-05-02 07:52:54 +08:00
|
|
|
<< "BOLT-INFO: FOP deleted " << NumLoadsDeleted << " load(s) and "
|
|
|
|
<< NumRedundantStores << " store(s).\n";
|
2019-06-15 10:56:11 +08:00
|
|
|
FA->printStats();
|
2017-05-02 07:52:54 +08:00
|
|
|
ShrinkWrapping::printStats();
|
2016-12-06 03:47:08 +08:00
|
|
|
}
|
|
|
|
|
2019-07-03 01:48:43 +08:00
|
|
|
void FrameOptimizerPass::performShrinkWrapping(const RegAnalysis &RA,
|
|
|
|
const FrameAnalysis &FA,
|
|
|
|
BinaryContext &BC) {
|
|
|
|
// Initialize necessary annotations to allow safe parallel accesses to
|
|
|
|
// annotation index in MIB
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex(CalleeSavedAnalysis::getSaveTagName());
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex(CalleeSavedAnalysis::getRestoreTagName());
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex(StackLayoutModifier::getTodoTagName());
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex(StackLayoutModifier::getSlotTagName());
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex(
|
|
|
|
StackLayoutModifier::getOffsetCFIRegTagName());
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("ReachingDefs");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("ReachingUses");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("LivenessAnalysis");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("StackReachingUses");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("PostDominatorAnalysis");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("DominatorAnalysis");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("StackPointerTracking");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("StackPointerTrackingForInternalCalls");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("StackAvailableExpressions");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("StackAllocationAnalysis");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("ShrinkWrap-Todo");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("PredictiveStackPointerTracking");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("ReachingInsnsBackward");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("ReachingInsns");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("AccessesDeletedPos");
|
|
|
|
BC.MIB->getOrCreateAnnotationIndex("DeleteMe");
|
|
|
|
|
|
|
|
ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
|
|
|
|
if (!FA.hasFrameInfo(BF))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (opts::FrameOptimization == FOP_HOT &&
|
|
|
|
(BF.getKnownExecutionCount() < BC.getHotThreshold()))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (BF.getKnownExecutionCount() == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
ParallelUtilities::WorkFuncWithAllocTy WorkFunction =
|
|
|
|
[&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) {
|
2021-10-26 15:06:34 +08:00
|
|
|
DataflowInfoManager Info(BF, &RA, &FA, AllocatorId);
|
|
|
|
ShrinkWrapping SW(FA, BF, Info, AllocatorId);
|
2019-07-03 01:48:43 +08:00
|
|
|
|
|
|
|
if (SW.perform()) {
|
|
|
|
std::lock_guard<std::mutex> Lock(FuncsChangedMutex);
|
|
|
|
FuncsChanged.insert(&BF);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
ParallelUtilities::runOnEachFunctionWithUniqueAllocId(
|
|
|
|
BC, ParallelUtilities::SchedulingPolicy::SP_INST_QUADRATIC, WorkFunction,
|
|
|
|
SkipPredicate, "shrink-wrapping");
|
|
|
|
}
|
|
|
|
|
2016-12-06 03:47:08 +08:00
|
|
|
} // namespace bolt
|
|
|
|
} // namespace llvm
|